Migration pull request

-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAl4lgSEACgkQ9IfvGFhy 1yO8eg/+Ip2mP5s4BfYTPIvWrUK2zNxr1R469EWPW5slKMpXimuTBi4HEMNuMqRm kVD/BPq2wwAPKkNt0AdSJ5URl9Nk2Ip/Mpf1Ov9Su9biqjG6tY5T21LxQg2KvvDv 8fl3nK2sqVqRHuQy/rRkfMdHyBzNaXSG8Y0LADlLJ8ceXDocr/UU7DWxBkidc/SF xUczpuds3SQeHgGDR61aScKF+7qQcXkjpjleQHIKV2aIymj0EqS8qYa8rp/g2XAu 6Jq0spXIt2mI5H1mxOeDN4xIWwvJWlqxbEdnspAmXBfJpt05878C30xCzTW33NiN 7OjCVyYfyi6MnnSL82D3uAcWA7HWmCQGg+laxYHwP+Ltq2FF5aOkk0A5skDKPZBY CLUNUrHHw+RAKTSwIJbBE7noD/dpsKy83jLB5JlzkO8Nz2+kv2zAPxes3Bzk6Zmr nyl2bcnu95pZcpR4gL5a0EpyMyHw/TLrHC4/Y93VTQKqTcr2+lapbwOGvoqzrimp 9EDf6/GXoLcc0govu66P3qlsotdyyxvp90j5v/C4Ew2oWjYhfWHIhWXS48+/ltcS +B2wPFWykY2Wt+aiAlUV+Sucl7DLC83TLnjBts1JpqpEJKaPKgWPIVyVXAwY3U2E Jxacd1viBKnxRZ4XAcqpavc+NhiFVFNafS9yIVvbb3iA8hO9HFQ= =DHw5 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/juanquintela/tags/migration-pull-pull-request' into staging Migration pull request # gpg: Signature made Mon 20 Jan 2020 10:29:53 GMT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * remotes/juanquintela/tags/migration-pull-pull-request: (29 commits) multifd: Be consistent about using uint64_t migration: Support QLIST migration apic: Use 32bit APIC ID for migration instance ID migration: Change SaveStateEntry.instance_id into uint32_t migration: Define VMSTATE_INSTANCE_ID_ANY Bug #1829242 correction. migration/multifd: fix destroyed mutex access in terminating multifd threads migration/multifd: fix nullptr access in terminating multifd threads migration/multifd: not use multifd during postcopy migration/multifd: clean pages after filling packet migration/postcopy: enable compress during postcopy migration/postcopy: enable random order target page arrival migration/postcopy: set all_zero to true on the first target page migration/postcopy: count target page number to decide the place_needed migration/postcopy: wait for decompress thread in precopy migration/postcopy: reduce memset when it is zero page and matches_target_page_size migration/ram: Yield periodically to the main loop migration: savevm_state_handler_insert: constant-time element insertion migration: add savevm_state_handler_remove() misc: use QEMU_IS_ALIGNED ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2020-01-20 10:41:27 +00:00 · 2020-01-20 10:41:27 +00:00 · 26deea0026
parent 7fb38daf25 ddac5cb2d9
commit 26deea0026
27 changed files with 659 additions and 132 deletions
--- a/backends/dbus-vmstate.c
+++ b/backends/dbus-vmstate.c
@ -412,7 +412,8 @@ dbus_vmstate_complete(UserCreatable *uc, Error **errp)
        return;
    }

-    if (vmstate_register(VMSTATE_IF(self), -1, &dbus_vmstate, self) < 0) {
+    if (vmstate_register(VMSTATE_IF(self), VMSTATE_INSTANCE_ID_ANY,
+                         &dbus_vmstate, self) < 0) {
        error_setg(errp, "Failed to register vmstate");
    }
 }
--- a/exec.c
+++ b/exec.c
@ -3895,7 +3895,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)

    uint8_t *host_startaddr = rb->host + start;

-    if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+    if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
        error_report("ram_block_discard_range: Unaligned start address: %p",
                     host_startaddr);
        goto err;
@ -3903,7 +3903,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)

    if ((start + length) <= rb->used_length) {
        bool need_madvise, need_fallocate;
-        if (length & (rb->page_size - 1)) {
+        if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
            error_report("ram_block_discard_range: Unaligned length: %zx",
                         length);
            goto err;
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq,
    memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000);
    memory_region_add_subregion(get_system_memory(), base, &s->iomem);
    ssys_reset(s);
-    vmstate_register(NULL, -1, &vmstate_stellaris_sys, s);
+    vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s);
    return 0;
 }

--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@ -879,7 +879,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp)

        if (qdev_get_vmsd(dev)) {
            if (vmstate_register_with_alias_id(VMSTATE_IF(dev),
-                                               -1, qdev_get_vmsd(dev), dev,
+                                               VMSTATE_INSTANCE_ID_ANY,
+                                               qdev_get_vmsd(dev), dev,
                                               dev->instance_id_alias,
                                               dev->alias_required_for_version,
                                               &local_err) < 0) {
--- a/hw/display/ads7846.c
+++ b/hw/display/ads7846.c
@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp)

    ads7846_int_update(s);

-    vmstate_register(NULL, -1, &vmstate_ads7846, s);
+    vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s);
 }

 static void ads7846_class_init(ObjectClass *klass, void *data)
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name)

    bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name));
    QLIST_INIT(&bus->current_devs);
-    vmstate_register(NULL, -1, &vmstate_i2c_bus, bus);
+    vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus);
    return bus;
 }

--- a/hw/input/stellaris_input.c
+++ b/hw/input/stellaris_input.c
@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode)
    }
    s->num_buttons = n;
    qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s);
-    vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s);
+    vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY,
+                     &vmstate_stellaris_gamepad, s);
 }
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
    APICCommonState *s = APIC_COMMON(dev);
    APICCommonClass *info;
    static DeviceState *vapic;
-    int instance_id = s->id;
+    uint32_t instance_id = s->initial_apic_id;
+
+    /* Normally initial APIC ID should be no more than hundreds */
+    assert(instance_id != VMSTATE_INSTANCE_ID_ANY);

    info = APIC_COMMON_GET_CLASS(s);
    info->realize(dev, errp);
@ -284,7 +287,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
    }

    if (s->legacy_instance_id) {
-        instance_id = -1;
+        instance_id = VMSTATE_INSTANCE_ID_ANY;
    }
    vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common,
                                   s, -1, 0, NULL);
--- a/hw/misc/max111x.c
+++ b/hw/misc/max111x.c
@ -146,7 +146,8 @@ static int max111x_init(SSISlave *d, int inputs)
    s->input[7] = 0x80;
    s->com = 0;

-    vmstate_register(VMSTATE_IF(dev), -1, &vmstate_max111x, s);
+    vmstate_register(VMSTATE_IF(dev), VMSTATE_INSTANCE_ID_ANY,
+                     &vmstate_max111x, s);
    return 0;
 }

--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@ -1874,7 +1874,8 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp)

    s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100));
    s->vmstate->name = qemu_get_queue(s->nic)->model;
-    vmstate_register(VMSTATE_IF(&pci_dev->qdev), -1, s->vmstate, s);
+    vmstate_register(VMSTATE_IF(&pci_dev->qdev), VMSTATE_INSTANCE_ID_ANY,
+                     s->vmstate, s);
 }

 static void eepro100_instance_init(Object *obj)
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@ -122,7 +122,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
    bus->machine_done.notify = pcibus_machine_done;
    qemu_add_machine_init_done_notifier(&bus->machine_done);

-    vmstate_register(NULL, -1, &vmstate_pcibus, bus);
+    vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus);
 }

 static void pcie_bus_realize(BusState *qbus, Error **errp)
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@ -2948,7 +2948,7 @@ static void spapr_machine_init(MachineState *machine)
     * interface, this is a legacy from the sPAPREnvironment structure
     * which predated MachineState but had a similar function */
    vmstate_register(NULL, 0, &vmstate_spapr, spapr);
-    register_savevm_live("spapr/htab", -1, 1,
+    register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1,
                         &savevm_htab_handlers, spapr);

    qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine),
--- a/hw/timer/arm_timer.c
+++ b/hw/timer/arm_timer.c
@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq)
    s->control = TIMER_CTRL_IE;

    s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT);
-    vmstate_register(NULL, -1, &vmstate_arm_timer, s);
+    vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s);
    return s;
 }

--- a/hw/tpm/tpm_emulator.c
+++ b/hw/tpm/tpm_emulator.c
@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj)
    tpm_emu->cur_locty_number = ~0;
    qemu_mutex_init(&tpm_emu->mutex);

-    vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj);
+    vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY,
+                     &vmstate_tpm_emulator, obj);
 }

 /*
--- a/include/migration/register.h
+++ b/include/migration/register.h
@ -71,7 +71,7 @@ typedef struct SaveVMHandlers {
 } SaveVMHandlers;

 int register_savevm_live(const char *idstr,
-                         int instance_id,
+                         uint32_t instance_id,
                         int version_id,
                         const SaveVMHandlers *ops,
                         void *opaque);
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@ -229,6 +229,7 @@ extern const VMStateInfo vmstate_info_tmp;
 extern const VMStateInfo vmstate_info_bitmap;
 extern const VMStateInfo vmstate_info_qtailq;
 extern const VMStateInfo vmstate_info_gtree;
+extern const VMStateInfo vmstate_info_qlist;

 #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
 /*
@ -798,6 +799,26 @@ extern const VMStateInfo vmstate_info_gtree;
    .offset       = offsetof(_state, _field),                                  \
 }

+/*
+ * For migrating a QLIST
+ * Target QLIST needs be properly initialized.
+ * _type: type of QLIST element
+ * _next: name of QLIST_ENTRY entry field in QLIST element
+ * _vmsd: VMSD for QLIST element
+ * size: size of QLIST element
+ * start: offset of QLIST_ENTRY in QTAILQ element
+ */
+#define VMSTATE_QLIST_V(_field, _state, _version, _vmsd, _type, _next)  \
+{                                                                        \
+    .name         = (stringify(_field)),                                 \
+    .version_id   = (_version),                                          \
+    .vmsd         = &(_vmsd),                                            \
+    .size         = sizeof(_type),                                       \
+    .info         = &vmstate_info_qlist,                                 \
+    .offset       = offsetof(_state, _field),                            \
+    .start        = offsetof(_type, _next),                              \
+}
+
 /* _f : field name
   _f_n : num of elements field_name
   _n : num of elements
@ -1157,8 +1178,10 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,

 bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque);

+#define  VMSTATE_INSTANCE_ID_ANY  -1
+
 /* Returns: 0 on success, -1 on failure */
-int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id,
+int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
                                   const VMStateDescription *vmsd,
                                   void *base, int alias_id,
                                   int required_for_version,
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@ -501,4 +501,43 @@ union {                                                                 \
        QTAILQ_RAW_TQH_CIRC(head)->tql_prev = QTAILQ_RAW_TQE_CIRC(elm, entry);  \
 } while (/*CONSTCOND*/0)

+#define QLIST_RAW_FIRST(head)                                                  \
+        field_at_offset(head, 0, void *)
+
+#define QLIST_RAW_NEXT(elm, entry)                                             \
+        field_at_offset(elm, entry, void *)
+
+#define QLIST_RAW_PREVIOUS(elm, entry)                                         \
+        field_at_offset(elm, entry + sizeof(void *), void *)
+
+#define QLIST_RAW_FOREACH(elm, head, entry)                                    \
+        for ((elm) = *QLIST_RAW_FIRST(head);                                   \
+             (elm);                                                            \
+             (elm) = *QLIST_RAW_NEXT(elm, entry))
+
+#define QLIST_RAW_INSERT_HEAD(head, elm, entry) do {                           \
+        void *first = *QLIST_RAW_FIRST(head);                                  \
+        *QLIST_RAW_FIRST(head) = elm;                                          \
+        *QLIST_RAW_PREVIOUS(elm, entry) = QLIST_RAW_FIRST(head);               \
+        if (first) {                                                           \
+            *QLIST_RAW_NEXT(elm, entry) = first;                               \
+            *QLIST_RAW_PREVIOUS(first, entry) = QLIST_RAW_NEXT(elm, entry);    \
+        } else {                                                               \
+            *QLIST_RAW_NEXT(elm, entry) = NULL;                                \
+        }                                                                      \
+} while (0)
+
+#define QLIST_RAW_REVERSE(head, elm, entry) do {                               \
+        void *iter = *QLIST_RAW_FIRST(head), *prev = NULL, *next;              \
+        while (iter) {                                                         \
+            next = *QLIST_RAW_NEXT(iter, entry);                               \
+            *QLIST_RAW_PREVIOUS(iter, entry) = QLIST_RAW_NEXT(next, entry);    \
+            *QLIST_RAW_NEXT(iter, entry) = prev;                               \
+            prev = iter;                                                       \
+            iter = next;                                                       \
+        }                                                                      \
+        *QLIST_RAW_FIRST(head) = prev;                                         \
+        *QLIST_RAW_PREVIOUS(prev, entry) = QLIST_RAW_FIRST(head);              \
+} while (0)
+
 #endif /* QEMU_SYS_QUEUE_H */
--- a/migration/migration.c
+++ b/migration/migration.c
@ -1005,17 +1005,6 @@ static bool migrate_caps_check(bool *cap_list,
 #endif

    if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
-        if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
-            /* The decompression threads asynchronously write into RAM
-             * rather than use the atomic copies needed to avoid
-             * userfaulting.  It should be possible to fix the decompression
-             * threads for compatibility in future.
-             */
-            error_setg(errp, "Postcopy is not currently compatible "
-                       "with compression");
-            return false;
-        }
-
        /* This check is reasonably expensive, so only when it's being
         * set the first time, also it's only the destination that needs
         * special support.
@ -1784,6 +1773,7 @@ void qmp_migrate_incoming(const char *uri, Error **errp)
    }
    if (!once) {
        error_setg(errp, "The incoming migration has already been started");
+        return;
    }

    qemu_start_incoming_migration(uri, &local_err);
@ -2035,11 +2025,10 @@ void qmp_migrate_set_downtime(double value, Error **errp)
    }

    value *= 1000; /* Convert to milliseconds */
-    value = MAX(0, MIN(INT64_MAX, value));

    MigrateSetParameters p = {
        .has_downtime_limit = true,
-        .downtime_limit = value,
+        .downtime_limit = (int64_t)value,
    };

    qmp_migrate_set_parameters(&p, errp);
@ -3224,6 +3213,37 @@ void migration_consume_urgent_request(void)
    qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
 }

+/* Returns true if the rate limiting was broken by an urgent request */
+bool migration_rate_limit(void)
+{
+    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    MigrationState *s = migrate_get_current();
+
+    bool urgent = false;
+    migration_update_counters(s, now);
+    if (qemu_file_rate_limit(s->to_dst_file)) {
+        /*
+         * Wait for a delay to do rate limiting OR
+         * something urgent to post the semaphore.
+         */
+        int ms = s->iteration_start_time + BUFFER_DELAY - now;
+        trace_migration_rate_limit_pre(ms);
+        if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
+            /*
+             * We were woken by one or more urgent things but
+             * the timedwait will have consumed one of them.
+             * The service routine for the urgent wake will dec
+             * the semaphore itself for each item it consumes,
+             * so add this one we just eat back.
+             */
+            qemu_sem_post(&s->rate_limit_sem);
+            urgent = true;
+        }
+        trace_migration_rate_limit_post(urgent);
+    }
+    return urgent;
+}
+
 /*
 * Master migration thread on the source VM.
 * It drives the migration and pumps the data down the outgoing channel.
@ -3290,8 +3310,6 @@ static void *migration_thread(void *opaque)
    trace_migration_thread_setup_complete();

    while (migration_is_active(s)) {
-        int64_t current_time;
-
        if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
            MigIterateState iter_state = migration_iteration_run(s);
            if (iter_state == MIG_ITERATE_SKIP) {
@ -3318,29 +3336,7 @@ static void *migration_thread(void *opaque)
            update_iteration_initial_status(s);
        }

-        current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-
-        migration_update_counters(s, current_time);
-
-        urgent = false;
-        if (qemu_file_rate_limit(s->to_dst_file)) {
-            /* Wait for a delay to do rate limiting OR
-             * something urgent to post the semaphore.
-             */
-            int ms = s->iteration_start_time + BUFFER_DELAY - current_time;
-            trace_migration_thread_ratelimit_pre(ms);
-            if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
-                /* We were worken by one or more urgent things but
-                 * the timedwait will have consumed one of them.
-                 * The service routine for the urgent wake will dec
-                 * the semaphore itself for each item it consumes,
-                 * so add this one we just eat back.
-                 */
-                qemu_sem_post(&s->rate_limit_sem);
-                urgent = true;
-            }
-            trace_migration_thread_ratelimit_post(urgent);
-        }
+        urgent = migration_rate_limit();
    }

    trace_migration_thread_after_loop();
--- a/migration/migration.h
+++ b/migration/migration.h
@ -341,5 +341,6 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);

 void migration_make_urgent_request(void);
 void migration_consume_urgent_request(void);
+bool migration_rate_limit(void);

 #endif
--- a/migration/ram.c
+++ b/migration/ram.c
@ -703,7 +703,7 @@ typedef struct {

 static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
 {
-    MultiFDInit_t msg;
+    MultiFDInit_t msg = {};
    int ret;

    msg.magic = cpu_to_be32(MULTIFD_MAGIC);
@ -803,7 +803,10 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
    }

    for (i = 0; i < p->pages->used; i++) {
-        packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
+        /* there are architectures where ram_addr_t is 32 bit */
+        uint64_t temp = p->pages->offset[i];
+
+        packet->offset[i] = cpu_to_be64(temp);
    }
 }

@ -877,10 +880,10 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
    }

    for (i = 0; i < p->pages->used; i++) {
-        ram_addr_t offset = be64_to_cpu(packet->offset[i]);
+        uint64_t offset = be64_to_cpu(packet->offset[i]);

        if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
-            error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
+            error_setg(errp, "multifd: offset too long %" PRIu64
                       " (max " RAM_ADDR_FMT ")",
                       offset, block->max_length);
            return -1;
@ -900,6 +903,12 @@ struct {
    uint64_t packet_num;
    /* send channels ready */
    QemuSemaphore channels_ready;
+    /*
+     * Have we already run terminate threads.  There is a race when it
+     * happens that we got one error while we are exiting.
+     * We will use atomic operations.  Only valid values are 0 and 1.
+     */
+    int exiting;
 } *multifd_send_state;

 /*
@ -928,6 +937,10 @@ static int multifd_send_pages(RAMState *rs)
    MultiFDPages_t *pages = multifd_send_state->pages;
    uint64_t transferred;

+    if (atomic_read(&multifd_send_state->exiting)) {
+        return -1;
+    }
+
    qemu_sem_wait(&multifd_send_state->channels_ready);
    for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
        p = &multifd_send_state->params[i];
@ -945,10 +958,10 @@ static int multifd_send_pages(RAMState *rs)
        }
        qemu_mutex_unlock(&p->mutex);
    }
-    p->pages->used = 0;
+    assert(!p->pages->used);
+    assert(!p->pages->block);

    p->packet_num = multifd_send_state->packet_num++;
-    p->pages->block = NULL;
    multifd_send_state->pages = p->pages;
    p->pages = pages;
    transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
@ -1009,6 +1022,16 @@ static void multifd_send_terminate_threads(Error *err)
        }
    }

+    /*
+     * We don't want to exit each threads twice.  Depending on where
+     * we get the error, or if there are two independent errors in two
+     * threads at the same time, we can end calling this function
+     * twice.
+     */
+    if (atomic_xchg(&multifd_send_state->exiting, 1)) {
+        return;
+    }
+
    for (i = 0; i < migrate_multifd_channels(); i++) {
        MultiFDSendParams *p = &multifd_send_state->params[i];

@ -1033,6 +1056,10 @@ void multifd_save_cleanup(void)
        if (p->running) {
            qemu_thread_join(&p->thread);
        }
+    }
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDSendParams *p = &multifd_send_state->params[i];
+
        socket_send_channel_destroy(p->c);
        p->c = NULL;
        qemu_mutex_destroy(&p->mutex);
@ -1118,6 +1145,10 @@ static void *multifd_send_thread(void *opaque)

    while (true) {
        qemu_sem_wait(&p->sem);
+
+        if (atomic_read(&multifd_send_state->exiting)) {
+            break;
+        }
        qemu_mutex_lock(&p->mutex);

        if (p->pending_job) {
@ -1130,6 +1161,8 @@ static void *multifd_send_thread(void *opaque)
            p->flags = 0;
            p->num_packets++;
            p->num_pages += used;
+            p->pages->used = 0;
+            p->pages->block = NULL;
            qemu_mutex_unlock(&p->mutex);

            trace_multifd_send(p->id, packet_num, used, flags,
@ -1224,6 +1257,7 @@ int multifd_save_setup(void)
    multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
    multifd_send_state->pages = multifd_pages_init(page_count);
    qemu_sem_init(&multifd_send_state->channels_ready, 0);
+    atomic_set(&multifd_send_state->exiting, 0);

    for (i = 0; i < thread_count; i++) {
        MultiFDSendParams *p = &multifd_send_state->params[i];
@ -1236,7 +1270,7 @@ int multifd_save_setup(void)
        p->id = i;
        p->pages = multifd_pages_init(page_count);
        p->packet_len = sizeof(MultiFDPacket_t)
-                      + sizeof(ram_addr_t) * page_count;
+                      + sizeof(uint64_t) * page_count;
        p->packet = g_malloc0(p->packet_len);
        p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
        p->packet->version = cpu_to_be32(MULTIFD_VERSION);
@ -1281,7 +1315,9 @@ static void multifd_recv_terminate_threads(Error *err)
           - normal quit, i.e. everything went fine, just finished
           - error quit: We close the channels so the channel threads
             finish the qio_channel_read_all_eof() */
-        qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+        if (p->c) {
+            qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+        }
        qemu_mutex_unlock(&p->mutex);
    }
 }
@ -1307,6 +1343,10 @@ int multifd_load_cleanup(Error **errp)
            qemu_sem_post(&p->sem_sync);
            qemu_thread_join(&p->thread);
        }
+    }
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDRecvParams *p = &multifd_recv_state->params[i];
+
        object_unref(OBJECT(p->c));
        p->c = NULL;
        qemu_mutex_destroy(&p->mutex);
@ -1447,7 +1487,7 @@ int multifd_load_setup(void)
        p->id = i;
        p->pages = multifd_pages_init(page_count);
        p->packet_len = sizeof(MultiFDPacket_t)
-                      + sizeof(ram_addr_t) * page_count;
+                      + sizeof(uint64_t) * page_count;
        p->packet = g_malloc0(p->packet_len);
        p->name = g_strdup_printf("multifdrecv_%d", i);
    }
@ -1731,7 +1771,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
    if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
        uint8_t shift = rb->clear_bmap_shift;
        hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
-        hwaddr start = (page << TARGET_PAGE_BITS) & (-size);
+        hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);

        /*
         * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
@ -1968,7 +2008,7 @@ static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
        return;
    }

-    ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
+    ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
 }

 /*
@ -2056,7 +2096,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
    uint8_t *p;
    bool send_async = true;
    RAMBlock *block = pss->block;
-    ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
+    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
    ram_addr_t current_addr = block->offset + offset;

    p = block->host + offset;
@ -2243,7 +2283,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
        *again = false;
        return false;
    }
-    if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
+    if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
+        >= pss->block->used_length) {
        /* Didn't find anything in this RAM Block */
        pss->page = 0;
        pss->block = QLIST_NEXT_RCU(pss->block, next);
@ -2434,7 +2475,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
             * it's the 1st request.
             */
            error_report("ram_save_queue_pages no previous block");
-            goto err;
+            return -1;
        }
    } else {
        ramblock = qemu_ram_block_by_name(rbname);
@ -2442,7 +2483,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
        if (!ramblock) {
            /* We shouldn't be asked for a non-existent RAMBlock */
            error_report("ram_save_queue_pages no block '%s'", rbname);
-            goto err;
+            return -1;
        }
        rs->last_req_rb = ramblock;
    }
@ -2451,7 +2492,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
        error_report("%s request overrun start=" RAM_ADDR_FMT " len="
                     RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
                     __func__, start, len, ramblock->used_length);
-        goto err;
+        return -1;
    }

    struct RAMSrcPageRequest *new_entry =
@ -2467,9 +2508,6 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
    qemu_mutex_unlock(&rs->src_page_req_mutex);

    return 0;
-
-err:
-    return -1;
 }

 static bool save_page_use_compression(RAMState *rs)
@ -2537,7 +2575,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
                                bool last_stage)
 {
    RAMBlock *block = pss->block;
-    ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
+    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
    int res;

    if (control_save_page(rs, block, offset, &res)) {
@ -2563,10 +2601,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
    }

    /*
-     * do not use multifd for compression as the first page in the new
-     * block should be posted out before sending the compressed page
+     * Do not use multifd for:
+     * 1. Compression as the first page in the new block should be posted out
+     *    before sending the compressed page
+     * 2. In postcopy as one whole host page should be placed
     */
-    if (!save_page_use_compression(rs) && migrate_use_multifd()) {
+    if (!save_page_use_compression(rs) && migrate_use_multifd()
+        && !migration_in_postcopy()) {
        return ram_save_multifd_page(rs, block, offset);
    }

@ -2617,8 +2658,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,

        pages += tmppages;
        pss->page++;
+        /* Allow rate limiting to happen in the middle of huge pages */
+        migration_rate_limit();
    } while ((pss->page & (pagesize_bits - 1)) &&
-             offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
+             offset_in_ramblock(pss->block,
+                                ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));

    /* The offset we leave with is the last one we looked at */
    pss->page--;
@ -2835,8 +2879,10 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)

        while (run_start < range) {
            unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
-            ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
-                              (run_end - run_start) << TARGET_PAGE_BITS);
+            ram_discard_range(block->idstr,
+                              ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
+                              ((ram_addr_t)(run_end - run_start))
+                                << TARGET_PAGE_BITS);
            run_start = find_next_zero_bit(bitmap, range, run_end + 1);
        }
    }
@ -3072,8 +3118,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
 */
 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
 {
-    int ret = -1;
-
    trace_ram_discard_range(rbname, start, length);

    RCU_READ_LOCK_GUARD();
@ -3081,7 +3125,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)

    if (!rb) {
        error_report("ram_discard_range: Failed to find block '%s'", rbname);
-        goto err;
+        return -1;
    }

    /*
@ -3093,10 +3137,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
                     length >> qemu_target_page_bits());
    }

-    ret = ram_block_discard_range(rb, start, length);
-
-err:
-    return ret;
+    return ram_block_discard_range(rb, start, length);
 }

 /*
@ -3450,6 +3491,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)

            rs->target_page_count += pages;

+            /*
+             * During postcopy, it is necessary to make sure one whole host
+             * page is sent in one chunk.
+             */
+            if (migrate_postcopy_ram()) {
+                flush_compressed_data(rs);
+            }
+
            /*
             * we want to check in the 1st loop, just in case it was the 1st
             * time and we had to sync the dirty bitmap.
@ -4031,8 +4080,9 @@ static int ram_load_postcopy(QEMUFile *f)
    MigrationIncomingState *mis = migration_incoming_get_current();
    /* Temporary page that is later 'placed' */
    void *postcopy_host_page = mis->postcopy_tmp_page;
-    void *last_host = NULL;
+    void *this_host = NULL;
    bool all_zero = false;
+    int target_pages = 0;

    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
        ram_addr_t addr;
@ -4041,6 +4091,7 @@ static int ram_load_postcopy(QEMUFile *f)
        void *place_source = NULL;
        RAMBlock *block = NULL;
        uint8_t ch;
+        int len;

        addr = qemu_get_be64(f);

@ -4058,7 +4109,8 @@ static int ram_load_postcopy(QEMUFile *f)

        trace_ram_load_postcopy_loop((uint64_t)addr, flags);
        place_needed = false;
-        if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
+        if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
+                     RAM_SAVE_FLAG_COMPRESS_PAGE)) {
            block = ram_block_from_stream(f, flags);

            host = host_from_ram_block_offset(block, addr);
@ -4067,6 +4119,7 @@ static int ram_load_postcopy(QEMUFile *f)
                ret = -EINVAL;
                break;
            }
+            target_pages++;
            matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
            /*
             * Postcopy requires that we place whole host pages atomically;
@ -4076,38 +4129,47 @@ static int ram_load_postcopy(QEMUFile *f)
             * that's moved into place later.
             * The migration protocol uses,  possibly smaller, target-pages
             * however the source ensures it always sends all the components
-             * of a host page in order.
+             * of a host page in one chunk.
             */
            page_buffer = postcopy_host_page +
                          ((uintptr_t)host & (block->page_size - 1));
            /* If all TP are zero then we can optimise the place */
-            if (!((uintptr_t)host & (block->page_size - 1))) {
+            if (target_pages == 1) {
                all_zero = true;
+                this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
+                                                    block->page_size);
            } else {
                /* not the 1st TP within the HP */
-                if (host != (last_host + TARGET_PAGE_SIZE)) {
-                    error_report("Non-sequential target page %p/%p",
-                                  host, last_host);
+                if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
+                    (uintptr_t)this_host) {
+                    error_report("Non-same host page %p/%p",
+                                  host, this_host);
                    ret = -EINVAL;
                    break;
                }
            }

-
            /*
             * If it's the last part of a host page then we place the host
             * page
             */
-            place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
-                                     (block->page_size - 1)) == 0;
+            if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
+                place_needed = true;
+                target_pages = 0;
+            }
            place_source = postcopy_host_page;
        }
-        last_host = host;

        switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
        case RAM_SAVE_FLAG_ZERO:
            ch = qemu_get_byte(f);
-            memset(page_buffer, ch, TARGET_PAGE_SIZE);
+            /*
+             * Can skip to set page_buffer when
+             * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
+             */
+            if (ch || !matches_target_page_size) {
+                memset(page_buffer, ch, TARGET_PAGE_SIZE);
+            }
            if (ch) {
                all_zero = false;
            }
@ -4131,6 +4193,17 @@ static int ram_load_postcopy(QEMUFile *f)
                                         TARGET_PAGE_SIZE);
            }
            break;
+        case RAM_SAVE_FLAG_COMPRESS_PAGE:
+            all_zero = false;
+            len = qemu_get_be32(f);
+            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
+                error_report("Invalid compressed data length: %d", len);
+                ret = -EINVAL;
+                break;
+            }
+            decompress_data_with_multi_threads(f, page_buffer, len);
+            break;
+
        case RAM_SAVE_FLAG_EOS:
            /* normal exit */
            multifd_recv_sync_main();
@ -4142,6 +4215,11 @@ static int ram_load_postcopy(QEMUFile *f)
            break;
        }

+        /* Got the whole host page, wait for decompress before placing. */
+        if (place_needed) {
+            ret |= wait_for_decompress_done();
+        }
+
        /* Detect for any possible file errors */
        if (!ret && qemu_file_get_error(f)) {
            ret = qemu_file_get_error(f);
@ -4149,7 +4227,8 @@ static int ram_load_postcopy(QEMUFile *f)

        if (!ret && place_needed) {
            /* This gets called at the last target page in the host page */
-            void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
+            void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
+                                                       block->page_size);

            if (all_zero) {
                ret = postcopy_place_page_zero(mis, place_dest,
@ -4201,13 +4280,16 @@ static void colo_flush_ram_cache(void)
        while (block) {
            offset = migration_bitmap_find_dirty(ram_state, block, offset);

-            if (offset << TARGET_PAGE_BITS >= block->used_length) {
+            if (((ram_addr_t)offset) << TARGET_PAGE_BITS
+                >= block->used_length) {
                offset = 0;
                block = QLIST_NEXT_RCU(block, next);
            } else {
                migration_bitmap_clear_dirty(ram_state, block, offset);
-                dst_host = block->host + (offset << TARGET_PAGE_BITS);
-                src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
+                dst_host = block->host
+                         + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
+                src_host = block->colo_cache
+                         + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
                memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
            }
        }
@ -4227,7 +4309,7 @@ static void colo_flush_ram_cache(void)
 */
 static int ram_load_precopy(QEMUFile *f)
 {
-    int flags = 0, ret = 0, invalid_flags = 0, len = 0;
+    int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
    /* ADVISE is earlier, it shows the source has the postcopy capability on */
    bool postcopy_advised = postcopy_is_advised();
    if (!migrate_use_compression()) {
@ -4239,6 +4321,17 @@ static int ram_load_precopy(QEMUFile *f)
        void *host = NULL;
        uint8_t ch;

+        /*
+         * Yield periodically to let main loop run, but an iteration of
+         * the main loop is expensive, so do it each some iterations
+         */
+        if ((i & 32767) == 0 && qemu_in_coroutine()) {
+            aio_co_schedule(qemu_get_current_aio_context(),
+                            qemu_coroutine_self());
+            qemu_coroutine_yield();
+        }
+        i++;
+
        addr = qemu_get_be64(f);
        flags = addr & ~TARGET_PAGE_MASK;
        addr &= TARGET_PAGE_MASK;
@ -4385,6 +4478,7 @@ static int ram_load_precopy(QEMUFile *f)
        }
    }

+    ret |= wait_for_decompress_done();
    return ret;
 }

@ -4416,8 +4510,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
        } else {
            ret = ram_load_precopy(f);
        }
-
-        ret |= wait_for_decompress_done();
    }
    trace_ram_load_complete(ret, seq_iter);

--- a/migration/savevm.c
+++ b/migration/savevm.c
@ -233,7 +233,7 @@ typedef struct CompatEntry {
 typedef struct SaveStateEntry {
    QTAILQ_ENTRY(SaveStateEntry) entry;
    char idstr[256];
-    int instance_id;
+    uint32_t instance_id;
    int alias_id;
    int version_id;
    /* version id read from the stream */
@ -250,6 +250,7 @@ typedef struct SaveStateEntry {

 typedef struct SaveState {
    QTAILQ_HEAD(, SaveStateEntry) handlers;
+    SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
    int global_section_id;
    uint32_t len;
    const char *name;
@ -261,6 +262,7 @@ typedef struct SaveState {

 static SaveState savevm_state = {
    .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
+    .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
    .global_section_id = 0,
 };

@ -665,10 +667,10 @@ void dump_vmstate_json_to_file(FILE *out_file)
    fclose(out_file);
 }

-static int calculate_new_instance_id(const char *idstr)
+static uint32_t calculate_new_instance_id(const char *idstr)
 {
    SaveStateEntry *se;
-    int instance_id = 0;
+    uint32_t instance_id = 0;

    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
        if (strcmp(idstr, se->idstr) == 0
@ -676,6 +678,8 @@ static int calculate_new_instance_id(const char *idstr)
            instance_id = se->instance_id + 1;
        }
    }
+    /* Make sure we never loop over without being noticed */
+    assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
    return instance_id;
 }

@ -709,20 +713,43 @@ static void savevm_state_handler_insert(SaveStateEntry *nse)
 {
    MigrationPriority priority = save_state_priority(nse);
    SaveStateEntry *se;
+    int i;

    assert(priority <= MIG_PRI_MAX);

-    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-        if (save_state_priority(se) < priority) {
+    for (i = priority - 1; i >= 0; i--) {
+        se = savevm_state.handler_pri_head[i];
+        if (se != NULL) {
+            assert(save_state_priority(se) < priority);
            break;
        }
    }

-    if (se) {
+    if (i >= 0) {
        QTAILQ_INSERT_BEFORE(se, nse, entry);
    } else {
        QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
    }
+
+    if (savevm_state.handler_pri_head[priority] == NULL) {
+        savevm_state.handler_pri_head[priority] = nse;
+    }
+}
+
+static void savevm_state_handler_remove(SaveStateEntry *se)
+{
+    SaveStateEntry *next;
+    MigrationPriority priority = save_state_priority(se);
+
+    if (se == savevm_state.handler_pri_head[priority]) {
+        next = QTAILQ_NEXT(se, entry);
+        if (next != NULL && save_state_priority(next) == priority) {
+            savevm_state.handler_pri_head[priority] = next;
+        } else {
+            savevm_state.handler_pri_head[priority] = NULL;
+        }
+    }
+    QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 }

 /* TODO: Individual devices generally have very little idea about the rest
@ -730,7 +757,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse)
   Meanwhile pass -1 as instance_id if you do not already have a clearly
   distinguishing id for all instances of your device class. */
 int register_savevm_live(const char *idstr,
-                         int instance_id,
+                         uint32_t instance_id,
                         int version_id,
                         const SaveVMHandlers *ops,
                         void *opaque)
@ -750,7 +777,7 @@ int register_savevm_live(const char *idstr,

    pstrcat(se->idstr, sizeof(se->idstr), idstr);

-    if (instance_id == -1) {
+    if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
        se->instance_id = calculate_new_instance_id(se->idstr);
    } else {
        se->instance_id = instance_id;
@ -777,14 +804,14 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)

    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
        if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
-            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
+            savevm_state_handler_remove(se);
            g_free(se->compat);
            g_free(se);
        }
    }
 }

-int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id,
+int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
                                   const VMStateDescription *vmsd,
                                   void *opaque, int alias_id,
                                   int required_for_version,
@ -817,14 +844,14 @@ int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id,

            se->compat = g_new0(CompatEntry, 1);
            pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
-            se->compat->instance_id = instance_id == -1 ?
+            se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
                         calculate_compat_instance_id(vmsd->name) : instance_id;
-            instance_id = -1;
+            instance_id = VMSTATE_INSTANCE_ID_ANY;
        }
    }
    pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);

-    if (instance_id == -1) {
+    if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
        se->instance_id = calculate_new_instance_id(se->idstr);
    } else {
        se->instance_id = instance_id;
@ -841,7 +868,7 @@ void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,

    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
        if (se->vmsd == vmsd && se->opaque == opaque) {
-            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
+            savevm_state_handler_remove(se);
            g_free(se->compat);
            g_free(se);
        }
@ -1600,7 +1627,7 @@ int qemu_save_device_state(QEMUFile *f)
    return qemu_file_get_error(f);
 }

-static SaveStateEntry *find_se(const char *idstr, int instance_id)
+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
 {
    SaveStateEntry *se;

@ -2267,7 +2294,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
    /* Find savevm section */
    se = find_se(idstr, instance_id);
    if (se == NULL) {
-        error_report("Unknown savevm section or instance '%s' %d. "
+        error_report("Unknown savevm section or instance '%s' %"PRIu32". "
                     "Make sure that your current VM setup matches your "
                     "saved VM setup, including any hotplugged devices",
                     idstr, instance_id);
@ -2291,7 +2318,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)

    ret = vmstate_load(f, se);
    if (ret < 0) {
-        error_report("error while loading state for instance 0x%x of"
+        error_report("error while loading state for instance 0x%"PRIx32" of"
                     " device '%s'", instance_id, idstr);
        return ret;
    }
--- a/migration/trace-events
+++ b/migration/trace-events
@ -76,6 +76,11 @@ get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val
 put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d"
 put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d"

+get_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)"
+get_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
+put_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)"
+put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
+
 # qemu-file.c
 qemu_file_fclose(void) ""

@ -138,12 +143,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6
 migration_completion_file_err(void) ""
 migration_completion_postcopy_end(void) ""
 migration_completion_postcopy_end_after_complete(void) ""
+migration_rate_limit_pre(int ms) "%d ms"
+migration_rate_limit_post(int urgent) "urgent: %d"
 migration_return_path_end_before(void) ""
 migration_return_path_end_after(int rp_error) "%d"
 migration_thread_after_loop(void) ""
 migration_thread_file_err(void) ""
-migration_thread_ratelimit_pre(int ms) "%d ms"
-migration_thread_ratelimit_post(int urgent) "urgent: %d"
 migration_thread_setup_complete(void) ""
 open_return_path_on_source(void) ""
 open_return_path_on_source_continue(void) ""
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@ -843,3 +843,73 @@ const VMStateInfo vmstate_info_gtree = {
    .get  = get_gtree,
    .put  = put_gtree,
 };
+
+static int put_qlist(QEMUFile *f, void *pv, size_t unused_size,
+                     const VMStateField *field, QJSON *vmdesc)
+{
+    const VMStateDescription *vmsd = field->vmsd;
+    /* offset of the QTAILQ entry in a QTAILQ element*/
+    size_t entry_offset = field->start;
+    void *elm;
+    int ret;
+
+    trace_put_qlist(field->name, vmsd->name, vmsd->version_id);
+    QLIST_RAW_FOREACH(elm, pv, entry_offset) {
+        qemu_put_byte(f, true);
+        ret = vmstate_save_state(f, vmsd, elm, vmdesc);
+        if (ret) {
+            error_report("%s: failed to save %s (%d)", field->name,
+                         vmsd->name, ret);
+            return ret;
+        }
+    }
+    qemu_put_byte(f, false);
+    trace_put_qlist_end(field->name, vmsd->name);
+
+    return 0;
+}
+
+static int get_qlist(QEMUFile *f, void *pv, size_t unused_size,
+                     const VMStateField *field)
+{
+    int ret = 0;
+    const VMStateDescription *vmsd = field->vmsd;
+    /* size of a QLIST element */
+    size_t size = field->size;
+    /* offset of the QLIST entry in a QLIST element */
+    size_t entry_offset = field->start;
+    int version_id = field->version_id;
+    void *elm;
+
+    trace_get_qlist(field->name, vmsd->name, vmsd->version_id);
+    if (version_id > vmsd->version_id) {
+        error_report("%s %s",  vmsd->name, "too new");
+        return -EINVAL;
+    }
+    if (version_id < vmsd->minimum_version_id) {
+        error_report("%s %s",  vmsd->name, "too old");
+        return -EINVAL;
+    }
+
+    while (qemu_get_byte(f)) {
+        elm = g_malloc(size);
+        ret = vmstate_load_state(f, vmsd, elm, version_id);
+        if (ret) {
+            error_report("%s: failed to load %s (%d)", field->name,
+                         vmsd->name, ret);
+            g_free(elm);
+            return ret;
+        }
+        QLIST_RAW_INSERT_HEAD(pv, elm, entry_offset);
+    }
+    QLIST_RAW_REVERSE(pv, elm, entry_offset);
+    trace_get_qlist_end(field->name, vmsd->name);
+
+    return ret;
+}
+
+const VMStateInfo vmstate_info_qlist = {
+    .name = "qlist",
+    .get  = get_qlist,
+    .put  = put_qlist,
+};
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@ -4,7 +4,7 @@
 const VMStateDescription vmstate_dummy = {};

 int vmstate_register_with_alias_id(VMStateIf *obj,
-                                   int instance_id,
+                                   uint32_t instance_id,
                                   const VMStateDescription *vmsd,
                                   void *base, int alias_id,
                                   int required_for_version,
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@ -356,6 +356,43 @@ static void migrate_set_parameter_int(QTestState *who, const char *parameter,
    migrate_check_parameter_int(who, parameter, value);
 }

+static char *migrate_get_parameter_str(QTestState *who,
+                                       const char *parameter)
+{
+    QDict *rsp;
+    char *result;
+
+    rsp = wait_command(who, "{ 'execute': 'query-migrate-parameters' }");
+    result = g_strdup(qdict_get_str(rsp, parameter));
+    qobject_unref(rsp);
+    return result;
+}
+
+static void migrate_check_parameter_str(QTestState *who, const char *parameter,
+                                        const char *value)
+{
+    char *result;
+
+    result = migrate_get_parameter_str(who, parameter);
+    g_assert_cmpstr(result, ==, value);
+    g_free(result);
+}
+
+__attribute__((unused))
+static void migrate_set_parameter_str(QTestState *who, const char *parameter,
+                                      const char *value)
+{
+    QDict *rsp;
+
+    rsp = qtest_qmp(who,
+                    "{ 'execute': 'migrate-set-parameters',"
+                    "'arguments': { %s: %s } }",
+                    parameter, value);
+    g_assert(qdict_haskey(rsp, "return"));
+    qobject_unref(rsp);
+    migrate_check_parameter_str(who, parameter, value);
+}
+
 static void migrate_pause(QTestState *who)
 {
    QDict *rsp;
@ -1202,6 +1239,61 @@ static void test_migrate_auto_converge(void)
    test_migrate_end(from, to, true);
 }

+static void test_multifd_tcp(void)
+{
+    MigrateStart *args = migrate_start_new();
+    QTestState *from, *to;
+    QDict *rsp;
+    char *uri;
+
+    if (test_migrate_start(&from, &to, "defer", args)) {
+        return;
+    }
+
+    /*
+     * We want to pick a speed slow enough that the test completes
+     * quickly, but that it doesn't complete precopy even on a slow
+     * machine, so also set the downtime.
+     */
+    /* 1 ms should make it not converge*/
+    migrate_set_parameter_int(from, "downtime-limit", 1);
+    /* 1GB/s */
+    migrate_set_parameter_int(from, "max-bandwidth", 1000000000);
+
+    migrate_set_parameter_int(from, "multifd-channels", 16);
+    migrate_set_parameter_int(to, "multifd-channels", 16);
+
+    migrate_set_capability(from, "multifd", "true");
+    migrate_set_capability(to, "multifd", "true");
+
+    /* Start incoming migration from the 1st socket */
+    rsp = wait_command(to, "{ 'execute': 'migrate-incoming',"
+                           "  'arguments': { 'uri': 'tcp:127.0.0.1:0' }}");
+    qobject_unref(rsp);
+
+    /* Wait for the first serial output from the source */
+    wait_for_serial("src_serial");
+
+    uri = migrate_get_socket_address(to, "socket-address");
+
+    migrate_qmp(from, uri, "{}");
+
+    wait_for_migration_pass(from);
+
+    /* 300ms it should converge */
+    migrate_set_parameter_int(from, "downtime-limit", 300);
+
+    if (!got_stop) {
+        qtest_qmp_eventwait(from, "STOP");
+    }
+    qtest_qmp_eventwait(to, "RESUME");
+
+    wait_for_serial("dest_serial");
+    wait_for_migration_complete(from);
+    test_migrate_end(from, to, true);
+    free(uri);
+}
+
 int main(int argc, char **argv)
 {
    char template[] = "/tmp/migration-test-XXXXXX";
@ -1266,6 +1358,7 @@ int main(int argc, char **argv)
                   test_validate_uuid_dst_not_set);

    qtest_add_func("/migration/auto_converge", test_migrate_auto_converge);
+    qtest_add_func("/migration/multifd/tcp", test_multifd_tcp);

    ret = g_test_run();

--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@ -926,6 +926,28 @@ static const VMStateDescription vmstate_domain = {
    }
 };

+/* test QLIST Migration */
+
+typedef struct TestQListElement {
+    uint32_t  id;
+    QLIST_ENTRY(TestQListElement) next;
+} TestQListElement;
+
+typedef struct TestQListContainer {
+    uint32_t  id;
+    QLIST_HEAD(, TestQListElement) list;
+} TestQListContainer;
+
+static const VMStateDescription vmstate_qlist_element = {
+    .name = "test/queue list",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(id, TestQListElement),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_iommu = {
    .name = "iommu",
    .version_id = 1,
@ -939,6 +961,18 @@ static const VMStateDescription vmstate_iommu = {
    }
 };

+static const VMStateDescription vmstate_container = {
+    .name = "test/container/qlist",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(id, TestQListContainer),
+        VMSTATE_QLIST_V(list, TestQListContainer, 1, vmstate_qlist_element,
+                        TestQListElement, next),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 uint8_t first_domain_dump[] = {
    /* id */
    0x00, 0x0, 0x0, 0x6,
@ -1229,6 +1263,140 @@ static void test_gtree_load_iommu(void)
    qemu_fclose(fload);
 }

+static uint8_t qlist_dump[] = {
+    0x00, 0x00, 0x00, 0x01, /* container id */
+    0x1, /* start of a */
+    0x00, 0x00, 0x00, 0x0a,
+    0x1, /* start of b */
+    0x00, 0x00, 0x0b, 0x00,
+    0x1, /* start of c */
+    0x00, 0x0c, 0x00, 0x00,
+    0x1, /* start of d */
+    0x0d, 0x00, 0x00, 0x00,
+    0x0, /* end of list */
+    QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+};
+
+static TestQListContainer *alloc_container(void)
+{
+    TestQListElement *a = g_malloc(sizeof(TestQListElement));
+    TestQListElement *b = g_malloc(sizeof(TestQListElement));
+    TestQListElement *c = g_malloc(sizeof(TestQListElement));
+    TestQListElement *d = g_malloc(sizeof(TestQListElement));
+    TestQListContainer *container = g_malloc(sizeof(TestQListContainer));
+
+    a->id = 0x0a;
+    b->id = 0x0b00;
+    c->id = 0xc0000;
+    d->id = 0xd000000;
+    container->id = 1;
+
+    QLIST_INIT(&container->list);
+    QLIST_INSERT_HEAD(&container->list, d, next);
+    QLIST_INSERT_HEAD(&container->list, c, next);
+    QLIST_INSERT_HEAD(&container->list, b, next);
+    QLIST_INSERT_HEAD(&container->list, a, next);
+    return container;
+}
+
+static void free_container(TestQListContainer *container)
+{
+    TestQListElement *iter, *tmp;
+
+    QLIST_FOREACH_SAFE(iter, &container->list, next, tmp) {
+        QLIST_REMOVE(iter, next);
+        g_free(iter);
+    }
+    g_free(container);
+}
+
+static void compare_containers(TestQListContainer *c1, TestQListContainer *c2)
+{
+    TestQListElement *first_item_c1, *first_item_c2;
+
+    while (!QLIST_EMPTY(&c1->list)) {
+        first_item_c1 = QLIST_FIRST(&c1->list);
+        first_item_c2 = QLIST_FIRST(&c2->list);
+        assert(first_item_c2);
+        assert(first_item_c1->id == first_item_c2->id);
+        QLIST_REMOVE(first_item_c1, next);
+        QLIST_REMOVE(first_item_c2, next);
+        g_free(first_item_c1);
+        g_free(first_item_c2);
+    }
+    assert(QLIST_EMPTY(&c2->list));
+}
+
+/*
+ * Check the prev & next fields are correct by doing list
+ * manipulations on the container. We will do that for both
+ * the source and the destination containers
+ */
+static void manipulate_container(TestQListContainer *c)
+{
+     TestQListElement *prev = NULL, *iter = QLIST_FIRST(&c->list);
+     TestQListElement *elem;
+
+     elem = g_malloc(sizeof(TestQListElement));
+     elem->id = 0x12;
+     QLIST_INSERT_AFTER(iter, elem, next);
+
+     elem = g_malloc(sizeof(TestQListElement));
+     elem->id = 0x13;
+     QLIST_INSERT_HEAD(&c->list, elem, next);
+
+     while (iter) {
+        prev = iter;
+        iter = QLIST_NEXT(iter, next);
+     }
+
+     elem = g_malloc(sizeof(TestQListElement));
+     elem->id = 0x14;
+     QLIST_INSERT_BEFORE(prev, elem, next);
+
+     elem = g_malloc(sizeof(TestQListElement));
+     elem->id = 0x15;
+     QLIST_INSERT_AFTER(prev, elem, next);
+
+     QLIST_REMOVE(prev, next);
+     g_free(prev);
+}
+
+static void test_save_qlist(void)
+{
+    TestQListContainer *container = alloc_container();
+
+    save_vmstate(&vmstate_container, container);
+    compare_vmstate(qlist_dump, sizeof(qlist_dump));
+    free_container(container);
+}
+
+static void test_load_qlist(void)
+{
+    QEMUFile *fsave, *fload;
+    TestQListContainer *orig_container = alloc_container();
+    TestQListContainer *dest_container = g_malloc0(sizeof(TestQListContainer));
+    char eof;
+
+    QLIST_INIT(&dest_container->list);
+
+    fsave = open_test_file(true);
+    qemu_put_buffer(fsave, qlist_dump, sizeof(qlist_dump));
+    g_assert(!qemu_file_get_error(fsave));
+    qemu_fclose(fsave);
+
+    fload = open_test_file(false);
+    vmstate_load_state(fload, &vmstate_container, dest_container, 1);
+    eof = qemu_get_byte(fload);
+    g_assert(!qemu_file_get_error(fload));
+    g_assert_cmpint(eof, ==, QEMU_VM_EOF);
+    manipulate_container(orig_container);
+    manipulate_container(dest_container);
+    compare_containers(orig_container, dest_container);
+    free_container(orig_container);
+    free_container(dest_container);
+}
+
 typedef struct TmpTestStruct {
    TestStruct *parent;
    int64_t diff;
@ -1353,6 +1521,8 @@ int main(int argc, char **argv)
    g_test_add_func("/vmstate/gtree/load/loaddomain", test_gtree_load_domain);
    g_test_add_func("/vmstate/gtree/save/saveiommu", test_gtree_save_iommu);
    g_test_add_func("/vmstate/gtree/load/loadiommu", test_gtree_load_iommu);
+    g_test_add_func("/vmstate/qlist/save/saveqlist", test_save_qlist);
+    g_test_add_func("/vmstate/qlist/load/loadqlist", test_load_qlist);
    g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
    g_test_run();

--- a/vl.c
+++ b/vl.c
@ -1604,9 +1604,6 @@ static bool main_loop_should_exit(void)
    RunState r;
    ShutdownCause request;

-    if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
-        return false;
-    }
    if (preconfig_exit_requested) {
        if (runstate_check(RUN_STATE_PRECONFIG)) {
            runstate_set(RUN_STATE_PRELAUNCH);
@ -1635,8 +1632,13 @@ static bool main_loop_should_exit(void)
        pause_all_vcpus();
        qemu_system_reset(request);
        resume_all_vcpus();
+        /*
+         * runstate can change in pause_all_vcpus()
+         * as iothread mutex is unlocked
+         */
        if (!runstate_check(RUN_STATE_RUNNING) &&
-                !runstate_check(RUN_STATE_INMIGRATE)) {
+                !runstate_check(RUN_STATE_INMIGRATE) &&
+                !runstate_check(RUN_STATE_FINISH_MIGRATE)) {
            runstate_set(RUN_STATE_PRELAUNCH);
        }
    }