From d38d6de2a1fe209c849e8392f99e214d0a2db319 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Tue, 10 Sep 2019 12:03:10 +0300
Subject: [PATCH 1/4] util/ioc.c: try to reassure Coverity about
 qemu_iovec_init_extended

Make it more obvious, that filling qiov corresponds to qiov allocation,
which in turn corresponds to total_niov calculation, based on mid_niov
(not mid_len). Still add an assertion to show that there should be no
difference.

[Added mingw "error: 'mid_iov' may be used uninitialized in this
function" compiler error fix suggested by Vladimir.
--Stefan]

Reported-by: Coverity (CID 1405302)
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20190910090310.14032-1-vsementsov@virtuozzo.com
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20190910090310.14032-1-vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

fixup! util/ioc.c: try to reassure Coverity about qemu_iovec_init_extended
---
 util/iov.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/util/iov.c b/util/iov.c
index 5059e10431..45ef3043ee 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -423,7 +423,7 @@ void qemu_iovec_init_extended(
 {
     size_t mid_head, mid_tail;
     int total_niov, mid_niov = 0;
-    struct iovec *p, *mid_iov;
+    struct iovec *p, *mid_iov = NULL;
 
     if (mid_len) {
         mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
@@ -446,7 +446,8 @@ void qemu_iovec_init_extended(
         p++;
     }
 
-    if (mid_len) {
+    assert(!mid_niov == !mid_len);
+    if (mid_niov) {
         memcpy(p, mid_iov, mid_niov * sizeof(*p));
         p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
         p[0].iov_len -= mid_head;

From 9b92fbcf45ed6ed1daceba8480b0d9e8a327b5ce Mon Sep 17 00:00:00 2001
From: Sergio Lopez <slp@redhat.com>
Date: Mon, 16 Sep 2019 13:24:12 +0200
Subject: [PATCH 2/4] virtio-blk: schedule virtio_notify_config to run on main
 context

virtio_notify_config() needs to acquire the global mutex, which isn't
allowed from an iothread, and may lead to a deadlock like this:

 - main thead
  * Has acquired: qemu_global_mutex.
  * Is trying the acquire: iothread AioContext lock via
    AIO_WAIT_WHILE (after aio_poll).

 - iothread
  * Has acquired: AioContext lock.
  * Is trying to acquire: qemu_global_mutex (via
    virtio_notify_config->prepare_mmio_access).

If virtio_blk_resize() is called from an iothread, schedule
virtio_notify_config() to be run in the main context BH.

[Removed unnecessary newline as suggested by Kevin Wolf
<kwolf@redhat.com>.
--Stefan]

Signed-off-by: Sergio Lopez <slp@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 20190916112411.21636-1-slp@redhat.com
Message-Id: <20190916112411.21636-1-slp@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/virtio-blk.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 18851601cb..ed2ddebd2b 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -16,6 +16,7 @@
 #include "qemu/iov.h"
 #include "qemu/module.h"
 #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
 #include "trace.h"
 #include "hw/block/block.h"
 #include "hw/qdev-properties.h"
@@ -1086,11 +1087,24 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
     return 0;
 }
 
+static void virtio_resize_cb(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+    virtio_notify_config(vdev);
+}
+
 static void virtio_blk_resize(void *opaque)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
 
-    virtio_notify_config(vdev);
+    /*
+     * virtio_notify_config() needs to acquire the global mutex,
+     * so it can't be called from an iothread. Instead, schedule
+     * it to be run in the main context BH.
+     */
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
 }
 
 static const BlockDevOps virtio_block_ops = {

From 8644476e51fa33d7ab637b36b1306d0d4c66177c Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 1 Oct 2019 19:48:26 +0200
Subject: [PATCH 3/4] block: Skip COR for inactive nodes

We must not write data to inactive nodes, and a COR is certainly
something we can simply not do without upsetting anyone.  So skip COR
operations on inactive nodes.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20191001174827.11081-2-mreitz@redhat.com
Message-Id: <20191001174827.11081-2-mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/io.c | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/block/io.c b/block/io.c
index f8c3596131..4f9ee97c2b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1246,11 +1246,18 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
     int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
                                     BDRV_REQUEST_MAX_BYTES);
     unsigned int progress = 0;
+    bool skip_write;
 
     if (!drv) {
         return -ENOMEDIUM;
     }
 
+    /*
+     * Do not write anything when the BDS is inactive.  That is not
+     * allowed, and it would not help.
+     */
+    skip_write = (bs->open_flags & BDRV_O_INACTIVE);
+
     /* FIXME We cannot require callers to have write permissions when all they
      * are doing is a read request. If we did things right, write permissions
      * would be obtained anyway, but internally by the copy-on-read code. As
@@ -1274,23 +1281,29 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
     while (cluster_bytes) {
         int64_t pnum;
 
-        ret = bdrv_is_allocated(bs, cluster_offset,
-                                MIN(cluster_bytes, max_transfer), &pnum);
-        if (ret < 0) {
-            /* Safe to treat errors in querying allocation as if
-             * unallocated; we'll probably fail again soon on the
-             * read, but at least that will set a decent errno.
-             */
+        if (skip_write) {
+            ret = 1; /* "already allocated", so nothing will be copied */
             pnum = MIN(cluster_bytes, max_transfer);
-        }
+        } else {
+            ret = bdrv_is_allocated(bs, cluster_offset,
+                                    MIN(cluster_bytes, max_transfer), &pnum);
+            if (ret < 0) {
+                /*
+                 * Safe to treat errors in querying allocation as if
+                 * unallocated; we'll probably fail again soon on the
+                 * read, but at least that will set a decent errno.
+                 */
+                pnum = MIN(cluster_bytes, max_transfer);
+            }
 
-        /* Stop at EOF if the image ends in the middle of the cluster */
-        if (ret == 0 && pnum == 0) {
-            assert(progress >= bytes);
-            break;
-        }
+            /* Stop at EOF if the image ends in the middle of the cluster */
+            if (ret == 0 && pnum == 0) {
+                assert(progress >= bytes);
+                break;
+            }
 
-        assert(skip_bytes < pnum);
+            assert(skip_bytes < pnum);
+        }
 
         if (ret <= 0) {
             QEMUIOVector local_qiov;

From 4d804b5305ffb4d5fa414c38d4f1bdfb987c8d0b Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 1 Oct 2019 19:48:27 +0200
Subject: [PATCH 4/4] iotests/262: Switch source/dest VM launch order

Launching the destination VM before the source VM gives us a regression
test for HEAD^:

The guest device causes a read from the disk image through
guess_disk_lchs().  This will not work if the first sector (containing
the partition table) is yet unallocated, we use COR, and the node is
inactive.

By launching the source VM before the destination, however, the COR
filter on the source will allocate that area in the image shared between
both VMs, thus the problem will not become apparent.

Switching the launch order causes the sector to still be unallocated
when guess_disk_lchs() runs on the inactive node in the destination VM,
and thus we get our test case.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20191001174827.11081-3-mreitz@redhat.com
Message-Id: <20191001174827.11081-3-mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 tests/qemu-iotests/262     | 12 ++++++------
 tests/qemu-iotests/262.out |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262
index 398f63587e..0963daa806 100755
--- a/tests/qemu-iotests/262
+++ b/tests/qemu-iotests/262
@@ -54,12 +54,6 @@ with iotests.FilePath('img') as img_path, \
 
     os.mkfifo(fifo)
 
-    iotests.log('Launching source VM...')
-    add_opts(vm_a)
-    vm_a.launch()
-
-    vm_a.enable_migration_events('A')
-
     iotests.log('Launching destination VM...')
     add_opts(vm_b)
     vm_b.add_incoming("exec: cat '%s'" % (fifo))
@@ -67,6 +61,12 @@ with iotests.FilePath('img') as img_path, \
 
     vm_b.enable_migration_events('B')
 
+    iotests.log('Launching source VM...')
+    add_opts(vm_a)
+    vm_a.launch()
+
+    vm_a.enable_migration_events('A')
+
     iotests.log('Starting migration to B...')
     iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo)))
     with iotests.Timeout(3, 'Migration does not complete'):
diff --git a/tests/qemu-iotests/262.out b/tests/qemu-iotests/262.out
index 5a58e5e9f8..8e04c496c4 100644
--- a/tests/qemu-iotests/262.out
+++ b/tests/qemu-iotests/262.out
@@ -1,9 +1,9 @@
-Launching source VM...
-Enabling migration QMP events on A...
-{"return": {}}
 Launching destination VM...
 Enabling migration QMP events on B...
 {"return": {}}
+Launching source VM...
+Enabling migration QMP events on A...
+{"return": {}}
 Starting migration to B...
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}