From d7be5dd19c0df7f76e1b42f0c2cbbabefa1974cb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 12 Sep 2018 19:10:38 +0200 Subject: [PATCH 1/5] aio-posix: fix concurrent access to poll_disable_cnt It is valid for an aio_set_fd_handler to happen concurrently with aio_poll. In that case, poll_disable_cnt can change under the heels of aio_poll, and the assertion on poll_disable_cnt can fail in run_poll_handlers. Therefore, this patch simply checks the counter on every polling iteration. There are no particular needs for ordering, since the polling loop is terminated anyway by aio_notify at the end of aio_set_fd_handler. Signed-off-by: Paolo Bonzini Message-Id: <20180912171040.1732-2-pbonzini@redhat.com> Reviewed-by: Fam Zheng Signed-off-by: Fam Zheng --- util/aio-posix.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/util/aio-posix.c b/util/aio-posix.c index 131ba6b4a8..5c29380575 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -211,6 +211,7 @@ void aio_set_fd_handler(AioContext *ctx, AioHandler *node; bool is_new = false; bool deleted = false; + int poll_disable_change; qemu_lockcnt_lock(&ctx->list_lock); @@ -244,11 +245,9 @@ void aio_set_fd_handler(AioContext *ctx, QLIST_REMOVE(node, node); deleted = true; } - - if (!node->io_poll) { - ctx->poll_disable_cnt--; - } + poll_disable_change = -!node->io_poll; } else { + poll_disable_change = !io_poll - (node && !node->io_poll); if (node == NULL) { /* Alloc and insert if it's not already there */ node = g_new0(AioHandler, 1); @@ -257,10 +256,6 @@ void aio_set_fd_handler(AioContext *ctx, g_source_add_poll(&ctx->source, &node->pfd); is_new = true; - - ctx->poll_disable_cnt += !io_poll; - } else { - ctx->poll_disable_cnt += !io_poll - !node->io_poll; } /* Update handler with latest information */ @@ -274,6 +269,15 @@ void aio_set_fd_handler(AioContext *ctx, node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); } + /* No need to order poll_disable_cnt writes against other updates; + * the counter is only used to avoid wasting time and latency on + * iterated polling when the system call will be ultimately necessary. + * Changing handlers is a rare event, and a little wasted polling until + * the aio_notify below is not an issue. + */ + atomic_set(&ctx->poll_disable_cnt, + atomic_read(&ctx->poll_disable_cnt) + poll_disable_change); + aio_epoll_update(ctx, node, is_new); qemu_lockcnt_unlock(&ctx->list_lock); aio_notify(ctx); @@ -525,7 +529,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) assert(ctx->notify_me); assert(qemu_lockcnt_count(&ctx->list_lock) > 0); - assert(ctx->poll_disable_cnt == 0); trace_run_poll_handlers_begin(ctx, max_ns); @@ -533,7 +536,8 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) do { progress = run_poll_handlers_once(ctx); - } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time); + } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time + && !atomic_read(&ctx->poll_disable_cnt)); trace_run_poll_handlers_end(ctx, progress); @@ -552,7 +556,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) */ static bool try_poll_mode(AioContext *ctx, bool blocking) { - if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) { + if (blocking && ctx->poll_max_ns && !atomic_read(&ctx->poll_disable_cnt)) { /* See qemu_soonest_timeout() uint64_t hack */ int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), (uint64_t)ctx->poll_ns); From e30cffa04d52e35996569f1cfac111be19576bde Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 12 Sep 2018 19:10:39 +0200 Subject: [PATCH 2/5] aio-posix: compute timeout before polling This is a preparation for the next patch, and also a very small optimization. Compute the timeout only once, before invoking try_poll_mode, and adjust it in run_poll_handlers. The adjustment is the polling time when polling fails, or zero (non-blocking) if polling succeeds. Fixes: 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa Signed-off-by: Paolo Bonzini Message-Id: <20180912171040.1732-3-pbonzini@redhat.com> Reviewed-by: Fam Zheng Signed-off-by: Fam Zheng --- util/aio-posix.c | 59 +++++++++++++++++++++++++++-------------------- util/trace-events | 4 ++-- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/util/aio-posix.c b/util/aio-posix.c index 5c29380575..c54d1ebfb1 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -490,7 +490,7 @@ static void add_pollfd(AioHandler *node) npfd++; } -static bool run_poll_handlers_once(AioContext *ctx) +static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) { bool progress = false; AioHandler *node; @@ -500,6 +500,7 @@ static bool run_poll_handlers_once(AioContext *ctx) aio_node_check(ctx, node->is_external) && node->io_poll(node->opaque) && node->opaque != &ctx->notifier) { + *timeout = 0; progress = true; } @@ -522,31 +523,38 @@ static bool run_poll_handlers_once(AioContext *ctx) * * Returns: true if progress was made, false otherwise */ -static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) +static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) { bool progress; - int64_t end_time; + int64_t start_time, elapsed_time; assert(ctx->notify_me); assert(qemu_lockcnt_count(&ctx->list_lock) > 0); - trace_run_poll_handlers_begin(ctx, max_ns); - - end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns; + trace_run_poll_handlers_begin(ctx, max_ns, *timeout); + start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); do { - progress = run_poll_handlers_once(ctx); - } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time + progress = run_poll_handlers_once(ctx, timeout); + elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time; + } while (!progress && elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt)); - trace_run_poll_handlers_end(ctx, progress); + /* If time has passed with no successful polling, adjust *timeout to + * keep the same ending time. + */ + if (*timeout != -1) { + *timeout -= MIN(*timeout, elapsed_time); + } + trace_run_poll_handlers_end(ctx, progress, *timeout); return progress; } /* try_poll_mode: * @ctx: the AioContext - * @blocking: busy polling is only attempted when blocking is true + * @timeout: timeout for blocking wait, computed by the caller and updated if + * polling succeeds. * * ctx->notify_me must be non-zero so this function can detect aio_notify(). * @@ -554,19 +562,16 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) * * Returns: true if progress was made, false otherwise */ -static bool try_poll_mode(AioContext *ctx, bool blocking) +static bool try_poll_mode(AioContext *ctx, int64_t *timeout) { - if (blocking && ctx->poll_max_ns && !atomic_read(&ctx->poll_disable_cnt)) { - /* See qemu_soonest_timeout() uint64_t hack */ - int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), - (uint64_t)ctx->poll_ns); + /* See qemu_soonest_timeout() uint64_t hack */ + int64_t max_ns = MIN((uint64_t)*timeout, (uint64_t)ctx->poll_ns); - if (max_ns) { - poll_set_started(ctx, true); + if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) { + poll_set_started(ctx, true); - if (run_poll_handlers(ctx, max_ns)) { - return true; - } + if (run_poll_handlers(ctx, max_ns, timeout)) { + return true; } } @@ -575,7 +580,7 @@ static bool try_poll_mode(AioContext *ctx, bool blocking) /* Even if we don't run busy polling, try polling once in case it can make * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2). */ - return run_poll_handlers_once(ctx); + return run_poll_handlers_once(ctx, timeout); } bool aio_poll(AioContext *ctx, bool blocking) @@ -605,8 +610,14 @@ bool aio_poll(AioContext *ctx, bool blocking) start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } - progress = try_poll_mode(ctx, blocking); - if (!progress) { + timeout = blocking ? aio_compute_timeout(ctx) : 0; + progress = try_poll_mode(ctx, &timeout); + assert(!(timeout && progress)); + + /* If polling is allowed, non-blocking aio_poll does not need the + * system call---a single round of run_poll_handlers_once suffices. + */ + if (timeout || atomic_read(&ctx->poll_disable_cnt)) { assert(npfd == 0); /* fill pollfds */ @@ -620,8 +631,6 @@ bool aio_poll(AioContext *ctx, bool blocking) } } - timeout = blocking ? aio_compute_timeout(ctx) : 0; - /* wait until next event */ if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { AioHandler epoll_handler; diff --git a/util/trace-events b/util/trace-events index 4822434c89..79569b7fdf 100644 --- a/util/trace-events +++ b/util/trace-events @@ -1,8 +1,8 @@ # See docs/devel/tracing.txt for syntax documentation. # util/aio-posix.c -run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64 -run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d" +run_poll_handlers_begin(void *ctx, int64_t max_ns, int64_t timeout) "ctx %p max_ns %"PRId64 " timeout %"PRId64 +run_poll_handlers_end(void *ctx, bool progress, int64_t timeout) "ctx %p progress %d new timeout %"PRId64 poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 From cfeb35d6774b2e936046aa9923217818bd160299 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 12 Sep 2018 19:10:40 +0200 Subject: [PATCH 3/5] aio-posix: do skip system call if ctx->notifier polling succeeds Commit 70232b5253 ("aio-posix: Don't count ctx->notifier as progress when 2018-08-15), by not reporting progress, causes aio_poll to execute the system call when polling succeeds because of ctx->notifier. This introduces latency before the call to aio_bh_poll() and negates the advantages of polling, unfortunately. The fix builds on the previous patch, separating the effect of polling on the timeout from the progress reported to aio_poll(). ctx->notifier does zero the timeout, causing the caller to skip the system call, but it does not report progress, so that the bug fix of commit 70232b5253 still stands. Fixes: 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa Signed-off-by: Paolo Bonzini Message-Id: <20180912171040.1732-4-pbonzini@redhat.com> Reviewed-by: Fam Zheng Signed-off-by: Fam Zheng --- util/aio-posix.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/util/aio-posix.c b/util/aio-posix.c index c54d1ebfb1..621b3025d8 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -498,10 +498,11 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (!node->deleted && node->io_poll && aio_node_check(ctx, node->is_external) && - node->io_poll(node->opaque) && - node->opaque != &ctx->notifier) { + node->io_poll(node->opaque)) { *timeout = 0; - progress = true; + if (node->opaque != &ctx->notifier) { + progress = true; + } } /* Caller handles freeing deleted nodes. Don't do it here. */ From b33bd859d12e70757bb2632573c3a1662d967dbf Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 20 Aug 2018 16:55:54 +0100 Subject: [PATCH 4/5] tests/vm: Use -cpu max rather than -cpu host -cpu max works with any accelerator, so we don't need to use it only conditionally if not using KVM. Just use it all the time. Signed-off-by: Peter Maydell Message-Id: <20180820155554.23476-1-peter.maydell@linaro.org> Signed-off-by: Fam Zheng --- tests/vm/basevm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py index 7e58d9e0ca..cafbc6b3a5 100755 --- a/tests/vm/basevm.py +++ b/tests/vm/basevm.py @@ -65,6 +65,7 @@ class BaseVM(object): self._stdout = self._devnull self._args = [ \ "-nodefaults", "-m", "4G", + "-cpu", "max", "-netdev", "user,id=vnet,hostfwd=:127.0.0.1:0-:22", "-device", "virtio-net-pci,netdev=vnet", "-vnc", "127.0.0.1:0,to=20", @@ -72,11 +73,9 @@ class BaseVM(object): if vcpus: self._args += ["-smp", str(vcpus)] if os.access("/dev/kvm", os.R_OK | os.W_OK): - self._args += ["-cpu", "host"] self._args += ["-enable-kvm"] else: logging.info("KVM not available, not using -enable-kvm") - self._args += ["-cpu", "max"] self._data_args = [] def _download_with_cache(self, url, sha256sum=None): From 51b3c6b73acae1e3fd3c7d441fc86dd17356695f Mon Sep 17 00:00:00 2001 From: yuchenlin Date: Thu, 13 Sep 2018 16:29:52 +0800 Subject: [PATCH 5/5] vmdk: align end of file to a sector boundary There is a rare case which the size of last compressed cluster is larger than the cluster size, which will cause the file is not aligned at the sector boundary. There are three reasons to do it. First, if vmdk doesn't align at the sector boundary, there may be many undefined behaviors, such as, in vbox it will show VMDK: Compressed image is corrupted 'syno-vm-disk1.vmdk' (VERR_ZIP_CORRUPTED) when we try to import an ova with unaligned vmdk. Second, all the cluster_sector is aligned to sector, the last one should be like this, too. Third, it ease reading with sector based I/Os. Signed-off-by: yuchenlin Message-Id: <20180913082952.3675-1-yuchenlin@synology.com> Reviewed-by: Fam Zheng Signed-off-by: Fam Zheng --- block/vmdk.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/block/vmdk.c b/block/vmdk.c index a9d0084e36..2c9e86d98f 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1698,6 +1698,27 @@ static int coroutine_fn vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov) { + if (bytes == 0) { + /* The caller will write bytes 0 to signal EOF. + * When receive it, we align EOF to a sector boundary. */ + BDRVVmdkState *s = bs->opaque; + int i, ret; + int64_t length; + + for (i = 0; i < s->num_extents; i++) { + length = bdrv_getlength(s->extents[i].file->bs); + if (length < 0) { + return length; + } + length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); + ret = bdrv_truncate(s->extents[i].file, length, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + return ret; + } + } + return 0; + } return vmdk_co_pwritev(bs, offset, bytes, qiov, 0); }