diff --git a/block.c b/block.c index 580cb77a70..0ac5b163d2 100644 --- a/block.c +++ b/block.c @@ -87,8 +87,10 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs); +static void bdrv_child_free(BdrvChild *child); +static void bdrv_replace_child_noperm(BdrvChild **child, + BlockDriverState *new_bs, + bool free_empty_child); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran); @@ -1387,6 +1389,8 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + QLIST_INSERT_HEAD(&bs->children, child, next); + if (child->role & BDRV_CHILD_COW) { bdrv_backing_attach(child); } @@ -1403,6 +1407,8 @@ static void bdrv_child_cb_detach(BdrvChild *child) } bdrv_unapply_subtree_drain(child, bs); + + QLIST_REMOVE(child, next); } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -2250,13 +2256,18 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, typedef struct BdrvReplaceChildState { BdrvChild *child; + BdrvChild **childp; BlockDriverState *old_bs; + bool free_empty_child; } BdrvReplaceChildState; static void bdrv_replace_child_commit(void *opaque) { BdrvReplaceChildState *s = opaque; + if (s->free_empty_child && !s->child->bs) { + bdrv_child_free(s->child); + } bdrv_unref(s->old_bs); } @@ -2265,8 +2276,34 @@ static void bdrv_replace_child_abort(void *opaque) BdrvReplaceChildState *s = opaque; BlockDriverState *new_bs = s->child->bs; - /* old_bs reference is transparently moved from @s to @s->child */ - bdrv_replace_child_noperm(s->child, s->old_bs); + /* + * old_bs reference is transparently moved from @s to s->child. + * + * Pass &s->child here instead of s->childp, because: + * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not + * modify the BdrvChild * pointer we indirectly pass to it, i.e. it + * will not modify s->child. From that perspective, it does not matter + * whether we pass s->childp or &s->child. + * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use + * it here. + * (3) If new_bs is NULL, *s->childp will have been NULLed by + * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we + * must not pass a NULL *s->childp here. + * + * So whether new_bs was NULL or not, we cannot pass s->childp here; and in + * any case, there is no reason to pass it anyway. + */ + bdrv_replace_child_noperm(&s->child, s->old_bs, true); + /* + * The child was pre-existing, so s->old_bs must be non-NULL, and + * s->child thus must not have been freed + */ + assert(s->child != NULL); + if (!new_bs) { + /* As described above, *s->childp was cleared, so restore it */ + assert(s->childp != NULL); + *s->childp = s->child; + } bdrv_unref(new_bs); } @@ -2282,22 +2319,46 @@ static TransactionActionDrv bdrv_replace_child_drv = { * Note: real unref of old_bs is done only on commit. * * The function doesn't update permissions, caller is responsible for this. + * + * (*childp)->bs must not be NULL. + * + * Note that if new_bs == NULL, @childp is stored in a state object attached + * to @tran, so that the old child can be reinstated in the abort handler. + * Therefore, if @new_bs can be NULL, @childp must stay valid until the + * transaction is committed or aborted. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed (on commit). @free_empty_child should only be false if the + * caller will free the BDrvChild themselves (which may be important + * if this is in turn called in another transactional context). */ -static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, - Transaction *tran) +static void bdrv_replace_child_tran(BdrvChild **childp, + BlockDriverState *new_bs, + Transaction *tran, + bool free_empty_child) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { - .child = child, - .old_bs = child->bs, + .child = *childp, + .childp = new_bs == NULL ? childp : NULL, + .old_bs = (*childp)->bs, + .free_empty_child = free_empty_child, }; tran_add(tran, &bdrv_replace_child_drv, s); + /* The abort handler relies on this */ + assert(s->old_bs != NULL); + if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(child, new_bs); - /* old_bs reference is transparently moved from @child to @s */ + /* + * Pass free_empty_child=false, we will free the child (if + * necessary) in bdrv_replace_child_commit() (if our + * @free_empty_child parameter was true). + */ + bdrv_replace_child_noperm(childp, new_bs, false); + /* old_bs reference is transparently moved from *childp to @s */ } /* @@ -2668,9 +2729,24 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs) +/** + * Replace (*childp)->bs by @new_bs. + * + * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents + * generally cannot handle a BdrvChild with .bs == NULL, so clearing + * BdrvChild.bs should generally immediately be followed by the + * BdrvChild pointer being cleared as well. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed. @free_empty_child should only be false if the caller will + * free the BdrvChild themselves (this may be important in a + * transactional context, where it may only be freed on commit). + */ +static void bdrv_replace_child_noperm(BdrvChild **childp, + BlockDriverState *new_bs, + bool free_empty_child) { + BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; @@ -2705,6 +2781,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } child->bs = new_bs; + if (!new_bs) { + *childp = NULL; + } if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); @@ -2734,21 +2813,25 @@ static void bdrv_replace_child_noperm(BdrvChild *child, bdrv_parent_drained_end_single(child); drain_saldo++; } + + if (free_empty_child && !child->bs) { + bdrv_child_free(child); + } } -static void bdrv_child_free(void *opaque) -{ - BdrvChild *c = opaque; - - g_free(c->name); - g_free(c); -} - -static void bdrv_remove_empty_child(BdrvChild *child) +/** + * Free the given @child. + * + * The child must be empty (i.e. `child->bs == NULL`) and it must be + * unused (i.e. not in a children list). + */ +static void bdrv_child_free(BdrvChild *child) { assert(!child->bs); - QLIST_SAFE_REMOVE(child, next); - bdrv_child_free(child); + assert(!child->next.le_prev); /* not in children list */ + + g_free(child->name); + g_free(child); } typedef struct BdrvAttachChildCommonState { @@ -2763,27 +2846,35 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; - bdrv_replace_child_noperm(child, NULL); + /* + * Pass free_empty_child=false, because we still need the child + * for the AioContext operations on the parent below; those + * BdrvChildClass methods all work on a BdrvChild object, so we + * need to keep it as an empty shell (after this function, it will + * not be attached to any parent, and it will not have a .bs). + */ + bdrv_replace_child_noperm(s->child, NULL, false); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); } if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { - GSList *ignore = g_slist_prepend(NULL, child); + GSList *ignore; + /* No need to ignore `child`, because it has been detached already */ + ignore = NULL; child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, &error_abort); g_slist_free(ignore); - ignore = g_slist_prepend(NULL, child); - child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); + ignore = NULL; + child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); g_slist_free(ignore); } bdrv_unref(bs); - bdrv_remove_empty_child(child); - *s->child = NULL; + bdrv_child_free(child); } static TransactionActionDrv bdrv_attach_child_common_drv = { @@ -2855,13 +2946,15 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, if (ret < 0) { error_propagate(errp, local_err); - bdrv_remove_empty_child(new_child); + bdrv_child_free(new_child); return ret; } } bdrv_ref(child_bs); - bdrv_replace_child_noperm(new_child, child_bs); + bdrv_replace_child_noperm(&new_child, child_bs, true); + /* child_bs was non-NULL, so new_child must not have been freed */ + assert(new_child != NULL); *child = new_child; @@ -2913,21 +3006,14 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, return ret; } - QLIST_INSERT_HEAD(&parent_bs->children, *child, next); - /* - * child is removed in bdrv_attach_child_common_abort(), so don't care to - * abort this change separately. - */ - return 0; } -static void bdrv_detach_child(BdrvChild *child) +static void bdrv_detach_child(BdrvChild **childp) { - BlockDriverState *old_bs = child->bs; + BlockDriverState *old_bs = (*childp)->bs; - bdrv_replace_child_noperm(child, NULL); - bdrv_remove_empty_child(child); + bdrv_replace_child_noperm(childp, NULL, true); if (old_bs) { /* @@ -3033,7 +3119,7 @@ void bdrv_root_unref_child(BdrvChild *child) BlockDriverState *child_bs; child_bs = child->bs; - bdrv_detach_child(child); + bdrv_detach_child(&child); bdrv_unref(child_bs); } @@ -4843,6 +4929,7 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) typedef struct BdrvRemoveFilterOrCowChild { BdrvChild *child; + BlockDriverState *bs; bool is_backing; } BdrvRemoveFilterOrCowChild; @@ -4851,7 +4938,6 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) BdrvRemoveFilterOrCowChild *s = opaque; BlockDriverState *parent_bs = s->child->opaque; - QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); if (s->is_backing) { parent_bs->backing = s->child; } else { @@ -4873,10 +4959,19 @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque) bdrv_child_free(s->child); } +static void bdrv_remove_filter_or_cow_child_clean(void *opaque) +{ + BdrvRemoveFilterOrCowChild *s = opaque; + + /* Drop the bs reference after the transaction is done */ + bdrv_unref(s->bs); + g_free(s); +} + static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { .abort = bdrv_remove_filter_or_cow_child_abort, .commit = bdrv_remove_filter_or_cow_child_commit, - .clean = g_free, + .clean = bdrv_remove_filter_or_cow_child_clean, }; /* @@ -4887,31 +4982,41 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran) { + BdrvChild **childp; BdrvRemoveFilterOrCowChild *s; - assert(child == bs->backing || child == bs->file); - if (!child) { return; } + /* + * Keep a reference to @bs so @childp will stay valid throughout the + * transaction (required by bdrv_replace_child_tran()) + */ + bdrv_ref(bs); + if (child == bs->backing) { + childp = &bs->backing; + } else if (child == bs->file) { + childp = &bs->file; + } else { + g_assert_not_reached(); + } + if (child->bs) { - bdrv_replace_child_tran(child, NULL, tran); + /* + * Pass free_empty_child=false, we will free the child in + * bdrv_remove_filter_or_cow_child_commit() + */ + bdrv_replace_child_tran(childp, NULL, tran, false); } s = g_new(BdrvRemoveFilterOrCowChild, 1); *s = (BdrvRemoveFilterOrCowChild) { .child = child, - .is_backing = (child == bs->backing), + .bs = bs, + .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - - QLIST_SAFE_REMOVE(child, next); - if (s->is_backing) { - bs->backing = NULL; - } else { - bs->file = NULL; - } } /* @@ -4932,6 +5037,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, { BdrvChild *c, *next; + assert(to != NULL); + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { @@ -4947,7 +5054,12 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, c->name, from->node_name); return -EPERM; } - bdrv_replace_child_tran(c, to, tran); + + /* + * Passing a pointer to the local variable @c is fine here, because + * @to is not NULL, and so &c will not be attached to the transaction. + */ + bdrv_replace_child_tran(&c, to, tran, true); } return 0; @@ -4962,6 +5074,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, * * With @detach_subchain=true @to must be in a backing chain of @from. In this * case backing link of the cow-parent of @to is removed. + * + * @to must not be NULL. */ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to, @@ -4974,6 +5088,8 @@ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to_cow_parent = NULL; int ret; + assert(to != NULL); + if (detach_subchain) { assert(bdrv_chain_contains(from, to)); assert(from != to); @@ -5029,6 +5145,9 @@ out: return ret; } +/** + * Replace node @from by @to (where neither may be NULL). + */ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { @@ -5096,7 +5215,9 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); - bdrv_replace_child_tran(child, new_bs, tran); + bdrv_replace_child_tran(&child, new_bs, tran, true); + /* @new_bs must have been non-NULL, so @child must not have been freed */ + assert(child != NULL); found = g_hash_table_new(NULL, NULL); refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); diff --git a/block/file-posix.c b/block/file-posix.c index 7a27c83060..b283093e5b 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -167,6 +167,7 @@ typedef struct BDRVRawState { int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; + bool force_alignment; bool drop_cache; bool check_cache_dropped; struct { @@ -351,6 +352,17 @@ static bool dio_byte_aligned(int fd) return false; } +static bool raw_needs_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { + return true; + } + + return s->force_alignment; +} + /* Check if read is allowed with given memory buffer and length. * * This function is used to check O_DIRECT memory buffer and request alignment. @@ -728,9 +740,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->has_discard = true; s->has_write_zeroes = true; - if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { - s->needs_alignment = true; - } if (fstat(s->fd, &st) < 0) { ret = -errno; @@ -784,9 +793,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, * so QEMU makes sure all IO operations on the device are aligned * to sector size, or else FreeBSD will reject them with EINVAL. */ - s->needs_alignment = true; + s->force_alignment = true; } #endif + s->needs_alignment = raw_needs_alignment(bs); #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -1251,7 +1261,9 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) BDRVRawState *s = bs->opaque; struct stat st; + s->needs_alignment = raw_needs_alignment(bs); raw_probe_alignment(bs, s->fd, errp); + bs->bl.min_mem_alignment = s->buf_align; bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); diff --git a/block/stream.c b/block/stream.c index 97bee482dc..e45113aed6 100644 --- a/block/stream.c +++ b/block/stream.c @@ -54,8 +54,8 @@ static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); - BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base); - BlockDriverState *unfiltered_base = bdrv_skip_filters(base); + BlockDriverState *base; + BlockDriverState *unfiltered_base; Error *local_err = NULL; int ret = 0; @@ -63,6 +63,9 @@ static int stream_prepare(Job *job) bdrv_cor_filter_drop(s->cor_filter_bs); s->cor_filter_bs = NULL; + base = bdrv_filter_or_cow_bs(s->above_base); + unfiltered_base = bdrv_skip_filters(base); + if (bdrv_cow_child(unfiltered_bs)) { const char *base_id = NULL, *base_fmt = NULL; if (unfiltered_base) { diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 600031210d..c03fcf951f 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -250,6 +250,20 @@ options are removed in favor of using explicit ``blockdev-create`` and ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for details. +Incorrectly typed ``device_add`` arguments (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Due to shortcomings in the internal implementation of ``device_add``, QEMU +incorrectly accepts certain invalid arguments: Any object or list arguments are +silently ignored. Other argument types are not checked, but an implicit +conversion happens, so that e.g. string values can be assigned to integer +device properties or vice versa. + +This is a bug in QEMU that will be fixed in the future so that previously +accepted incorrect commands will return an error. Users should make sure that +all arguments passed to ``device_add`` are consistent with the documented +property types. + System accelerators ------------------- diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h index 92c5965235..2f2060acd9 100644 --- a/include/qemu/transactions.h +++ b/include/qemu/transactions.h @@ -31,6 +31,9 @@ * tran_create(), call your "prepare" functions on it, and finally call * tran_abort() or tran_commit() to finalize the transaction by corresponding * finalization actions in reverse order. + * + * The clean() functions registered by the drivers in a transaction are called + * last, after all abort() or commit() functions have been called. */ #ifndef QEMU_TRANSACTIONS_H diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 5fb65b4bef..567bf1da67 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -251,7 +251,16 @@ class TestParallelOps(iotests.QMPTestCase): speed=1024) self.assert_qmp(result, 'return', {}) - for job in pending_jobs: + # Do this in reverse: After unthrottling them, some jobs may finish + # before we have unthrottled all of them. This will drain their + # subgraph, and this will make jobs above them advance (despite those + # jobs on top being throttled). In the worst case, all jobs below the + # top one are finished before we can unthrottle it, and this makes it + # advance so far that it completes before we can unthrottle it - which + # results in an error. + # Starting from the top (i.e. in reverse) does not have this problem: + # When a job finishes, the ones below it are not advanced. + for job in reversed(pending_jobs): result = self.vm.qmp('block-job-set-speed', device=job, speed=0) self.assert_qmp(result, 'return', {}) diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142 index 69fd10ef51..86d65a2d1a 100755 --- a/tests/qemu-iotests/142 +++ b/tests/qemu-iotests/142 @@ -350,6 +350,35 @@ info block backing-file" echo "$hmp_cmds" | run_qemu -drive "$files","$ids" | grep "Cache" +echo +echo "--- Alignment after changing O_DIRECT ---" +echo + +# Directly test the protocol level: Can unaligned requests succeed even if +# O_DIRECT was only enabled through a reopen and vice versa? + +# Ensure image size is a multiple of the sector size (required for O_DIRECT) +$QEMU_IMG create -f file "$TEST_IMG" 1M | _filter_img_create + +# And write some data (not strictly necessary, but it feels better to actually +# have something to be read) +$QEMU_IO -f file -c 'write 0 4096' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO --cache=writeback -f file $TEST_IMG <actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->abort) { act->drv->abort(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); } @@ -80,11 +82,13 @@ void tran_commit(Transaction *tran) { TransactionAction *act, *next; - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->commit) { act->drv->commit(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); }