virtio,pci,pc: features,fixes,cleanups

New virtio mem options. A vhost-user cleanup. Control over smbios entry point type. Config interrupt support for vdpa. Fixes, cleanups all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmHY2zEPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRpCiEH/jv5tHUffDdGz5M2pN7FTWPQ9UAMQZXbn5AS PPVutOI/B+ILYBuNjYLvMGeq6ymG4/0DM940/jkQwCWD4ku1OG0ReM5T5klUR8lY df5y1SCDv3Yoq0vxpQCnssKqbgm8Kf9tnAFjni7Lvbu3oo6DCq77m6MWEapLoEUu IkM+l60NKmHAClnE6RF4KobLa5srIlDTho1iBXH5S39CRF1LvP9NgnYzl7nqiEkq ZYQEqkKO5XGxZji9banZPJD2kxt1iL7s24QI6OJG2Lz8Hf86b0Yo7XJpmw4ShP9h Vl1SL3m/HhHSMBuXOb7w/EkCm59b7whXCmoyYBF/GqaxtZkvVnM= =4VIN -----END PGP SIGNATURE----- Merge tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging virtio,pci,pc: features,fixes,cleanups New virtio mem options. A vhost-user cleanup. Control over smbios entry point type. Config interrupt support for vdpa. Fixes, cleanups all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Fri 07 Jan 2022 04:30:41 PM PST # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (55 commits) tests: acpi: Add updated TPM related tables acpi: tpm: Add missing device identification objects tests: acpi: prepare for updated TPM related tables virtio/vhost-vsock: don't double close vhostfd, remove redundant cleanup hw/scsi/vhost-scsi: don't double close vhostfd on error hw/scsi/vhost-scsi: don't leak vqs on error docs: reSTify virtio-balloon-stats documentation and move to docs/interop hw/i386/pc: Add missing property descriptions acpihp: simplify acpi_pcihp_disable_root_bus tests: acpi: SLIC: update expected blobs tests: acpi: add SLIC table test tests: acpi: whitelist expected blobs before changing them acpi: fix QEMU crash when started with SLIC table intel-iommu: correctly check passthrough during translation virtio-mem: Set "unplugged-inaccessible=auto" for the 7.0 machine on x86 virtio-mem: Support VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE linux-headers: sync VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE MAINTAINERS: Add a separate entry for acpi/VIOT tables virtio: signal after wrapping packed used_idx virtio-mem: Support "prealloc=on" option ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2022-01-07 17:24:24 -08:00 · 2022-01-07 17:24:24 -08:00 · d70075373a
parent c87507a8cf ca745d2277
commit d70075373a
56 changed files with 1209 additions and 500 deletions
--- a/8
+++ b/8
@ -1777,6 +1777,13 @@ F: docs/specs/acpi_mem_hotplug.rst
 F: docs/specs/acpi_pci_hotplug.rst
 F: docs/specs/acpi_hw_reduced_hotplug.rst

+ACPI/VIOT
+M: Jean-Philippe Brucker <jean-philippe@linaro.org>
+R: Ani Sinha <ani@anisinha.ca>
+S: Supported
+F: hw/acpi/viot.c
+F: hw/acpi/viot.h
+
 ACPI/HEST/GHES
 R: Dongjiu Geng <gengdongjiu1@gmail.com>
 L: qemu-arm@nongnu.org
@ -1925,6 +1932,7 @@ virtio-balloon
 M: Michael S. Tsirkin <mst@redhat.com>
 M: David Hildenbrand <david@redhat.com>
 S: Maintained
+F: docs/interop/virtio-balloon-stats.rst
 F: hw/virtio/virtio-balloon*.c
 F: include/hw/virtio/virtio-balloon.h
 F: softmmu/balloon.c
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@ -290,13 +290,6 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
                                     NULL);
    }

-    if (ret == QIO_CHANNEL_ERR_BLOCK) {
-        errno = EAGAIN;
-        ret = -1;
-    } else if (ret == -1) {
-        errno = EIO;
-    }
-
    if (msgfds_num) {
        /* close and clean read_msgfds */
        for (i = 0; i < s->read_msgfds_num; i++) {
@ -325,6 +318,13 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
 #endif
    }

+    if (ret == QIO_CHANNEL_ERR_BLOCK) {
+        errno = EAGAIN;
+        ret = -1;
+    } else if (ret == -1) {
+        errno = EIO;
+    }
+
    return ret;
 }

@ -525,6 +525,7 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
 {
    SocketChardev *s = SOCKET_CHARDEV(chr);
    int size;
+    int saved_errno;

    if (s->state != TCP_CHARDEV_STATE_CONNECTED) {
        return 0;
@ -532,6 +533,7 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)

    qio_channel_set_blocking(s->ioc, true, NULL);
    size = tcp_chr_recv(chr, (void *) buf, len);
+    saved_errno = errno;
    if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) {
        qio_channel_set_blocking(s->ioc, false, NULL);
    }
@ -540,6 +542,7 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
        tcp_chr_disconnect(chr);
    }

+    errno = saved_errno;
    return size;
 }

--- a/docs/interop/index.rst
+++ b/docs/interop/index.rst
@ -22,3 +22,4 @@ are useful for making QEMU interoperate with other software.
   vhost-user
   vhost-user-gpu
   vhost-vdpa
+   virtio-balloon-stats
--- a/docs/interop/virtio-balloon-stats.rst
+++ b/docs/interop/virtio-balloon-stats.rst
@ -1,4 +1,4 @@
-virtio balloon memory statistics
+Virtio balloon memory statistics
 ================================

 The virtio balloon driver supports guest memory statistics reporting. These
@ -9,10 +9,12 @@ Before querying the available stats, clients first have to enable polling.
 This is done by writing a time interval value (in seconds) to the
 guest-stats-polling-interval property. This value can be:

-  > 0  enables polling in the specified interval. If polling is already
+  > 0
+       enables polling in the specified interval. If polling is already
       enabled, the polling time interval is changed to the new value

-  0    disables polling. Previous polled statistics are still valid and
+  0
+       disables polling. Previous polled statistics are still valid and
       can be queried.

 Once polling is enabled, the virtio-balloon device in QEMU will start
@ -22,7 +24,7 @@ interval.
 To retrieve those stats, clients have to query the guest-stats property,
 which will return a dictionary containing:

-  o A key named 'stats', containing all available stats. If the guest
+  * A key named 'stats', containing all available stats. If the guest
    doesn't support a particular stat, or if it couldn't be retrieved,
    its value will be -1. Currently, the following stats are supported:

@ -37,7 +39,7 @@ which will return a dictionary containing:
      - stat-htlb-pgalloc
      - stat-htlb-pgfail

-  o A key named last-update, which contains the last stats update
+  * A key named last-update, which contains the last stats update
    timestamp in seconds. Since this timestamp is generated by the host,
    a buggy guest can't influence its value. The value is 0 if the guest
    has not updated the stats (yet).
@ -61,32 +63,32 @@ It's also important to note the following:
   respond to the request the timer will never be re-armed, which has
   the same effect as disabling polling

-Here are a few examples. QEMU is started with '-device virtio-balloon',
-which generates '/machine/peripheral-anon/device[1]' as the QOM path for
+Here are a few examples. QEMU is started with ``-device virtio-balloon``,
+which generates ``/machine/peripheral-anon/device[1]`` as the QOM path for
 the balloon device.

-Enable polling with 2 seconds interval:
+Enable polling with 2 seconds interval::

-{ "execute": "qom-set",
-             "arguments": { "path": "/machine/peripheral-anon/device[1]",
-			 "property": "guest-stats-polling-interval", "value": 2 } }
+  { "execute": "qom-set",
+               "arguments": { "path": "/machine/peripheral-anon/device[1]",
+               "property": "guest-stats-polling-interval", "value": 2 } }

-{ "return": {} }
+  { "return": {} }

-Change polling to 10 seconds:
+Change polling to 10 seconds::

-{ "execute": "qom-set",
-             "arguments": { "path": "/machine/peripheral-anon/device[1]",
-			 "property": "guest-stats-polling-interval", "value": 10 } }
+  { "execute": "qom-set",
+               "arguments": { "path": "/machine/peripheral-anon/device[1]",
+               "property": "guest-stats-polling-interval", "value": 10 } }

-{ "return": {} }
+  { "return": {} }

-Get stats:
+Get stats::

-{ "execute": "qom-get",
-  "arguments": { "path": "/machine/peripheral-anon/device[1]",
-  "property": "guest-stats" } }
-{
+  { "execute": "qom-get",
+               "arguments": { "path": "/machine/peripheral-anon/device[1]",
+               "property": "guest-stats" } }
+  {
    "return": {
        "stats": {
            "stat-swap-out": 0,
@ -98,12 +100,12 @@ Get stats:
        },
        "last-update": 1358529861
    }
-}
+  }

-Disable polling:
+Disable polling::

-{ "execute": "qom-set",
-             "arguments": { "path": "/machine/peripheral-anon/device[1]",
-			 "property": "stats-polling-interval", "value": 0 } }
+  { "execute": "qom-set",
+               "arguments": { "path": "/machine/peripheral-anon/device[1]",
+               "property": "stats-polling-interval", "value": 0 } }

-{ "return": {} }
+  { "return": {} }
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@ -345,8 +345,8 @@ int acpi_get_slic_oem(AcpiSlicOem *oem)
        struct acpi_table_header *hdr = (void *)(u - sizeof(hdr->_length));

        if (memcmp(hdr->sig, "SLIC", 4) == 0) {
-            oem->id = hdr->oem_id;
-            oem->table_id = hdr->oem_table_id;
+            oem->id = g_strndup(hdr->oem_id, 6);
+            oem->table_id = g_strndup(hdr->oem_table_id, 8);
            return 0;
        }
    }
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@ -128,20 +128,15 @@ static void acpi_set_pci_info(void)

 static void acpi_pcihp_disable_root_bus(void)
 {
-    static bool root_hp_disabled;
    Object *host = acpi_get_i386_pci_host();
    PCIBus *bus;

-    if (root_hp_disabled) {
-        return;
-    }
-
    bus = PCI_HOST_BRIDGE(host)->bus;
-    if (bus) {
+    if (bus && qbus_is_hotpluggable(BUS(bus))) {
        /* setting the hotplug handler to NULL makes the bus non-hotpluggable */
        qbus_set_hotplug_handler(BUS(bus), NULL);
    }
-    root_hp_disabled = true;
+
    return;
 }

@ -491,6 +486,9 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data,
        }

        bus = acpi_pcihp_find_hotplug_bus(s, s->hotplug_select);
+        if (!bus) {
+            break;
+        }
        QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) {
            Object *o = OBJECT(kid->child);
            PCIDevice *dev = PCI_DEVICE(o);
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@ -229,6 +229,7 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)

    Aml *dev = aml_device("TPM0");
    aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
+    aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
    aml_append(dev, aml_name_decl("_UID", aml_int(0)));

    Aml *crs = aml_resource_template();
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@ -1589,7 +1589,7 @@ static void virt_build_smbios(VirtMachineState *vms)

    smbios_set_defaults("QEMU", product,
                        vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
-                        true, SMBIOS_ENTRY_POINT_30);
+                        true, SMBIOS_ENTRY_POINT_TYPE_64);

    smbios_get_tables(MACHINE(vms), NULL, 0,
                      &smbios_tables, &smbios_tables_len,
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@ -100,7 +100,7 @@ static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
                               &local_err);
    if (ret < 0) {
        error_report_err(local_err);
-        return -1;
+        return ret;
    }

    /* valid for resize only */
@ -252,6 +252,7 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
    VHostUserBlk *s = VHOST_USER_BLK(vdev);

    /* Turn on pre-defined features */
+    virtio_add_feature(&features, VIRTIO_BLK_F_SIZE_MAX);
    virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
    virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
    virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
@ -511,7 +512,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
            *errp = NULL;
        }
        ret = vhost_user_blk_realize_connect(s, errp);
-    } while (ret == -EPROTO && retries--);
+    } while (ret < 0 && retries--);

    if (ret < 0) {
        goto virtio_err;
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@ -485,6 +485,9 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx)
 {
    VhostUserGPU *g = VHOST_USER_GPU(vdev);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
    return vhost_virtqueue_pending(&g->vhost->dev, idx);
 }

@ -493,6 +496,9 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
 {
    VhostUserGPU *g = VHOST_USER_GPU(vdev);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
    vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask);
 }

--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@ -1812,11 +1812,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
                    dev = aml_device("TPM");
                    aml_append(dev, aml_name_decl("_HID",
                                                  aml_string("MSFT0101")));
+                    aml_append(dev,
+                               aml_name_decl("_STR",
+                                             aml_string("TPM 2.0 Device")));
                } else {
                    dev = aml_device("ISA.TPM");
                    aml_append(dev, aml_name_decl("_HID",
                                                  aml_eisaid("PNP0C31")));
                }
+                aml_append(dev, aml_name_decl("_UID", aml_int(1)));

                aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
                crs = aml_resource_template();
@ -1844,12 +1848,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
    if (TPM_IS_CRB(tpm)) {
        dev = aml_device("TPM");
        aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
+        aml_append(dev, aml_name_decl("_STR",
+                                      aml_string("TPM 2.0 Device")));
        crs = aml_resource_template();
        aml_append(crs, aml_memory32_fixed(TPM_CRB_ADDR_BASE,
                                           TPM_CRB_ADDR_SIZE, AML_READ_WRITE));
        aml_append(dev, aml_name_decl("_CRS", crs));

        aml_append(dev, aml_name_decl("_STA", aml_int(0xf)));
+        aml_append(dev, aml_name_decl("_UID", aml_int(1)));

        tpm_build_ppi_acpi(tpm, dev);

@ -2723,6 +2730,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)

    /* Cleanup memory that's no longer used. */
    g_array_free(table_offsets, true);
+    g_free(slic_oem.id);
+    g_free(slic_oem.table_id);
 }

 static void acpi_ram_update(MemoryRegion *mr, GArray *data)
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@ -1516,11 +1516,29 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
 * 1st-level translation or 2nd-level translation, it depends
 * on PGTT setting.
 */
-static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
+static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce)
+{
+    VTDPASIDEntry pe;
+    int ret;
+
+    if (s->root_scalable) {
+        ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+        if (ret) {
+            error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
+                              __func__, ret);
+            return false;
+        }
+        return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT);
+    }
+
+    return (vtd_ce_get_type(ce) == VTD_CONTEXT_TT_PASS_THROUGH);
+
+}
+
+static bool vtd_as_pt_enabled(VTDAddressSpace *as)
 {
    IntelIOMMUState *s;
    VTDContextEntry ce;
-    VTDPASIDEntry pe;
    int ret;

    assert(as);
@ -1538,17 +1556,7 @@ static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
        return false;
    }

-    if (s->root_scalable) {
-        ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe);
-        if (ret) {
-            error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
-                              __func__, ret);
-            return false;
-        }
-        return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT);
-    }
-
-    return (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH);
+    return vtd_dev_pt_enabled(s, &ce);
 }

 /* Return whether the device is using IOMMU translation. */
@ -1560,7 +1568,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)

    assert(as);

-    use_iommu = as->iommu_state->dmar_enabled && !vtd_dev_pt_enabled(as);
+    use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as);

    trace_vtd_switch_address_space(pci_bus_num(as->bus),
                                   VTD_PCI_SLOT(as->devfn),
@ -1753,7 +1761,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
     * We don't need to translate for pass-through context entries.
     * Also, let's ignore IOTLB caching as well for PT devices.
     */
-    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
+    if (vtd_dev_pt_enabled(s, &ce)) {
        entry->iova = addr & VTD_PAGE_MASK_4K;
        entry->translated_addr = entry->iova;
        entry->addr_mask = ~VTD_PAGE_MASK_4K;
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@ -77,6 +77,7 @@
 #include "hw/mem/nvdimm.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-common.h"
+#include "qapi/qapi-visit-machine.h"
 #include "qapi/visitor.h"
 #include "hw/core/cpu.h"
 #include "hw/usb.h"
@ -94,7 +95,9 @@
 #include "trace.h"
 #include CONFIG_DEVICES

-GlobalProperty pc_compat_6_2[] = {};
+GlobalProperty pc_compat_6_2[] = {
+    { "virtio-mem", "unplugged-inaccessible", "off" },
+};
 const size_t pc_compat_6_2_len = G_N_ELEMENTS(pc_compat_6_2);

 GlobalProperty pc_compat_6_1[] = {
@ -1524,6 +1527,23 @@ static void pc_machine_set_default_bus_bypass_iommu(Object *obj, bool value,
    pcms->default_bus_bypass_iommu = value;
 }

+static void pc_machine_get_smbios_ep(Object *obj, Visitor *v, const char *name,
+                                     void *opaque, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    SmbiosEntryPointType smbios_entry_point_type = pcms->smbios_entry_point_type;
+
+    visit_type_SmbiosEntryPointType(v, name, &smbios_entry_point_type, errp);
+}
+
+static void pc_machine_set_smbios_ep(Object *obj, Visitor *v, const char *name,
+                                     void *opaque, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    visit_type_SmbiosEntryPointType(v, name, &pcms->smbios_entry_point_type, errp);
+}
+
 static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
                                            const char *name, void *opaque,
                                            Error **errp)
@ -1614,6 +1634,8 @@ static void pc_machine_initfn(Object *obj)
    pcms->vmport = ON_OFF_AUTO_OFF;
 #endif /* CONFIG_VMPORT */
    pcms->max_ram_below_4g = 0; /* use default */
+    pcms->smbios_entry_point_type = SMBIOS_ENTRY_POINT_TYPE_32;
+
    /* acpi build is enabled by default if machine supports it */
    pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
    pcms->smbus_enabled = true;
@ -1737,15 +1759,23 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)

    object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
        pc_machine_get_smbus, pc_machine_set_smbus);
+    object_class_property_set_description(oc, PC_MACHINE_SMBUS,
+        "Enable/disable system management bus");

    object_class_property_add_bool(oc, PC_MACHINE_SATA,
        pc_machine_get_sata, pc_machine_set_sata);
+    object_class_property_set_description(oc, PC_MACHINE_SATA,
+        "Enable/disable Serial ATA bus");

    object_class_property_add_bool(oc, PC_MACHINE_PIT,
        pc_machine_get_pit, pc_machine_set_pit);
+    object_class_property_set_description(oc, PC_MACHINE_PIT,
+        "Enable/disable Intel 8254 programmable interval timer emulation");

    object_class_property_add_bool(oc, "hpet",
        pc_machine_get_hpet, pc_machine_set_hpet);
+    object_class_property_set_description(oc, "hpet",
+        "Enable/disable high precision event timer emulation");

    object_class_property_add_bool(oc, "default-bus-bypass-iommu",
        pc_machine_get_default_bus_bypass_iommu,
@ -1756,6 +1786,12 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
        NULL, NULL);
    object_class_property_set_description(oc, PC_MACHINE_MAX_FW_SIZE,
        "Maximum combined firmware size");
+
+    object_class_property_add(oc, PC_MACHINE_SMBIOS_EP, "str",
+        pc_machine_get_smbios_ep, pc_machine_set_smbios_ep,
+        NULL, NULL);
+    object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP,
+        "SMBIOS Entry Point type [32, 64]");
 }

 static const TypeInfo pc_machine_info = {
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@ -177,7 +177,7 @@ static void pc_init1(MachineState *machine,
        smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)",
                            mc->name, pcmc->smbios_legacy_mode,
                            pcmc->smbios_uuid_encoded,
-                            SMBIOS_ENTRY_POINT_21);
+                            pcms->smbios_entry_point_type);
    }

    /* allocate ram and load rom/bios */
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@ -200,7 +200,7 @@ static void pc_q35_init(MachineState *machine)
        smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)",
                            mc->name, pcmc->smbios_legacy_mode,
                            pcmc->smbios_uuid_encoded,
-                            SMBIOS_ENTRY_POINT_21);
+                            pcms->smbios_entry_point_type);
    }

    /* allocate ram and load rom/bios */
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@ -101,3 +101,12 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 {
    return 0;
 }
+
+bool vhost_net_config_pending(VHostNetState *net)
+{
+    return false;
+}
+
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
+{
+}
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@ -457,6 +457,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
    vhost_virtqueue_mask(&net->dev, dev, idx, mask);
 }

+bool vhost_net_config_pending(VHostNetState *net)
+{
+    return vhost_config_pending(&net->dev);
+}
+
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
+{
+    vhost_config_mask(&net->dev, dev, mask);
+}
 VHostNetState *get_vhost_net(NetClientState *nc)
 {
    VHostNetState *vhost_net = 0;
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@ -3168,6 +3168,9 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
    VirtIONet *n = VIRTIO_NET(vdev);
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
    assert(n->vhost_started);
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return vhost_net_config_pending(get_vhost_net(nc->peer));
+    }
    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
 }

@ -3177,8 +3180,11 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
    VirtIONet *n = VIRTIO_NET(vdev);
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
    assert(n->vhost_started);
-    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
-                             vdev, idx, mask);
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
+        return;
+    }
+    vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
 }

 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@ -1390,7 +1390,7 @@ static void pci_update_mappings(PCIDevice *d)

        /* now do the real mapping */
        if (r->addr != PCI_BAR_UNMAPPED) {
-            trace_pci_update_mappings_del(d, pci_dev_bus_num(d),
+            trace_pci_update_mappings_del(d->name, pci_dev_bus_num(d),
                                          PCI_SLOT(d->devfn),
                                          PCI_FUNC(d->devfn),
                                          i, r->addr, r->size);
@ -1398,7 +1398,7 @@ static void pci_update_mappings(PCIDevice *d)
        }
        r->addr = new_addr;
        if (r->addr != PCI_BAR_UNMAPPED) {
-            trace_pci_update_mappings_add(d, pci_dev_bus_num(d),
+            trace_pci_update_mappings_add(d->name, pci_dev_bus_num(d),
                                          PCI_SLOT(d->devfn),
                                          PCI_FUNC(d->devfn),
                                          i, r->addr, r->size);
@ -1497,11 +1497,6 @@ static void pci_irq_handler(void *opaque, int irq_num, int level)
    pci_change_irq_level(pci_dev, irq_num, change);
 }

-static inline int pci_intx(PCIDevice *pci_dev)
-{
-    return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1;
-}
-
 qemu_irq pci_allocate_irq(PCIDevice *pci_dev)
 {
    int intx = pci_intx(pci_dev);
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@ -79,7 +79,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
        return;
    }

-    trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn),
+    trace_pci_cfg_write(pci_dev->name, pci_dev_bus_num(pci_dev),
+                        PCI_SLOT(pci_dev->devfn),
                        PCI_FUNC(pci_dev->devfn), addr, val);
    pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
 }
@ -104,7 +105,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
    }

    ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
-    trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn),
+    trace_pci_cfg_read(pci_dev->name, pci_dev_bus_num(pci_dev),
+                       PCI_SLOT(pci_dev->devfn),
                       PCI_FUNC(pci_dev->devfn), addr, ret);

    return ret;
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@ -774,7 +774,9 @@ void pcie_aer_root_write_config(PCIDevice *dev,
    uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
    /* 6.2.4.1.2 Interrupt Generation */
    if (!msix_enabled(dev) && !msi_enabled(dev)) {
-        pci_set_irq(dev, !!(root_cmd & enabled_cmd));
+        if (pci_intx(dev) != -1) {
+            pci_set_irq(dev, !!(root_cmd & enabled_cmd));
+        }
        return;
    }

--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@ -1,12 +1,12 @@
 # See docs/devel/tracing.rst for syntax documentation.

 # pci.c
-pci_update_mappings_del(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
-pci_update_mappings_add(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
+pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
+pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64

 # pci_host.c
-pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x"
-pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x"
+pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x"
+pci_cfg_write(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x <- 0x%x"

 # msix.c
 msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d masked %d"
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@ -170,6 +170,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
    Error *err = NULL;
    int vhostfd = -1;
    int ret;
+    struct vhost_virtqueue *vqs = NULL;

    if (!vs->conf.wwpn) {
        error_setg(errp, "vhost-scsi: missing wwpn");
@ -213,13 +214,19 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
    }

    vsc->dev.nvqs = VHOST_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
-    vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
+    vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
+    vsc->dev.vqs = vqs;
    vsc->dev.vq_index = 0;
    vsc->dev.backend_features = 0;

    ret = vhost_dev_init(&vsc->dev, (void *)(uintptr_t)vhostfd,
                         VHOST_BACKEND_TYPE_KERNEL, 0, errp);
    if (ret < 0) {
+        /*
+         * vhost_dev_init calls vhost_dev_cleanup on error, which closes
+         * vhostfd, don't double close it.
+         */
+        vhostfd = -1;
        goto free_vqs;
    }

@ -232,7 +239,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
    return;

 free_vqs:
-    g_free(vsc->dev.vqs);
+    g_free(vqs);
    if (!vsc->migratable) {
        migrate_del_blocker(vsc->migration_blocker);
    }
@ -240,7 +247,9 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
    error_free(vsc->migration_blocker);
    virtio_scsi_common_unrealize(dev);
 close_fd:
-    close(vhostfd);
+    if (vhostfd >= 0) {
+        close(vhostfd);
+    }
    return;
 }

--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@ -62,7 +62,7 @@ uint8_t *smbios_tables;
 size_t smbios_tables_len;
 unsigned smbios_table_max;
 unsigned smbios_table_cnt;
-static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_21;
+static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;

 static SmbiosEntryPoint ep;

@ -432,7 +432,7 @@ static void smbios_validate_table(MachineState *ms)
        exit(1);
    }

-    if (smbios_ep_type == SMBIOS_ENTRY_POINT_21 &&
+    if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 &&
        smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) {
        error_report("SMBIOS 2.1 table length %zu exceeds %d",
                     smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN);
@ -927,7 +927,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
 static void smbios_entry_point_setup(void)
 {
    switch (smbios_ep_type) {
-    case SMBIOS_ENTRY_POINT_21:
+    case SMBIOS_ENTRY_POINT_TYPE_32:
        memcpy(ep.ep21.anchor_string, "_SM_", 4);
        memcpy(ep.ep21.intermediate_anchor_string, "_DMI_", 5);
        ep.ep21.length = sizeof(struct smbios_21_entry_point);
@ -950,7 +950,7 @@ static void smbios_entry_point_setup(void)
        ep.ep21.structure_table_address = cpu_to_le32(0);

        break;
-    case SMBIOS_ENTRY_POINT_30:
+    case SMBIOS_ENTRY_POINT_TYPE_64:
        memcpy(ep.ep30.anchor_string, "_SM3_", 5);
        ep.ep30.length = sizeof(struct smbios_30_entry_point);
        ep.ep30.entry_point_revision = 1;
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@ -53,6 +53,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
 vhost_vdpa_set_owner(void *dev) "dev: %p"
 vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
 vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64
+vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d"

 # virtio.c
 virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@ -47,7 +47,7 @@ static int vhost_kernel_cleanup(struct vhost_dev *dev)

    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);

-    return close(fd);
+    return close(fd) < 0 ? -errno : 0;
 }

 static int vhost_kernel_memslots_limit(struct vhost_dev *dev)
@ -58,7 +58,7 @@ static int vhost_kernel_memslots_limit(struct vhost_dev *dev)
    if (g_file_get_contents("/sys/module/vhost/parameters/max_mem_regions",
                            &s, NULL, NULL)) {
        uint64_t val = g_ascii_strtoull(s, NULL, 10);
-        if (!((val == G_MAXUINT64 || !val) && errno)) {
+        if (val < INT_MAX && val > 0) {
            g_free(s);
            return val;
        }
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@ -161,6 +161,9 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx,
 {
    VHostUserFS *fs = VHOST_USER_FS(vdev);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
    vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask);
 }

@ -168,6 +171,9 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx)
 {
    VHostUserFS *fs = VHOST_USER_FS(vdev);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
    return vhost_virtqueue_pending(&fs->vhost_dev, idx);
 }

--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@ -280,9 +280,10 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)

    r = qemu_chr_fe_read_all(chr, p, size);
    if (r != size) {
+        int saved_errno = errno;
        error_report("Failed to read msg header. Read %d instead of %d."
                     " Original request %d.", r, size, msg->hdr.request);
-        return -1;
+        return r < 0 ? -saved_errno : -EIO;
    }

    /* validate received flags */
@ -290,7 +291,7 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
        error_report("Failed to read msg header."
                " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
-        return -1;
+        return -EPROTO;
    }

    return 0;
@ -314,8 +315,9 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
    uint8_t *p = (uint8_t *) msg;
    int r, size;

-    if (vhost_user_read_header(dev, msg) < 0) {
-        data->ret = -1;
+    r = vhost_user_read_header(dev, msg);
+    if (r < 0) {
+        data->ret = r;
        goto end;
    }

@ -324,7 +326,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
        error_report("Failed to read msg header."
                " Size %d exceeds the maximum %zu.", msg->hdr.size,
                VHOST_USER_PAYLOAD_SIZE);
-        data->ret = -1;
+        data->ret = -EPROTO;
        goto end;
    }

@ -333,9 +335,10 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
        size = msg->hdr.size;
        r = qemu_chr_fe_read_all(chr, p, size);
        if (r != size) {
+            int saved_errno = errno;
            error_report("Failed to read msg payload."
                         " Read %d instead of %d.", r, msg->hdr.size);
-            data->ret = -1;
+            data->ret = r < 0 ? -saved_errno : -EIO;
            goto end;
        }
    }
@ -418,24 +421,26 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 static int process_message_reply(struct vhost_dev *dev,
                                 const VhostUserMsg *msg)
 {
+    int ret;
    VhostUserMsg msg_reply;

    if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
        return 0;
    }

-    if (vhost_user_read(dev, &msg_reply) < 0) {
-        return -1;
+    ret = vhost_user_read(dev, &msg_reply);
+    if (ret < 0) {
+        return ret;
    }

    if (msg_reply.hdr.request != msg->hdr.request) {
        error_report("Received unexpected msg type. "
                     "Expected %d received %d",
                     msg->hdr.request, msg_reply.hdr.request);
-        return -1;
+        return -EPROTO;
    }

-    return msg_reply.payload.u64 ? -1 : 0;
+    return msg_reply.payload.u64 ? -EIO : 0;
 }

 static bool vhost_user_one_time_request(VhostUserRequest request)
@ -472,14 +477,15 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,

    if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
        error_report("Failed to set msg fds.");
-        return -1;
+        return -EINVAL;
    }

    ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
    if (ret != size) {
+        int saved_errno = errno;
        error_report("Failed to write msg."
                     " Wrote %d instead of %d.", ret, size);
-        return -1;
+        return ret < 0 ? -saved_errno : -EIO;
    }

    return 0;
@ -502,6 +508,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
    size_t fd_num = 0;
    bool shmfd = virtio_has_feature(dev->protocol_features,
                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+    int ret;
    VhostUserMsg msg = {
        .hdr.request = VHOST_USER_SET_LOG_BASE,
        .hdr.flags = VHOST_USER_VERSION,
@ -514,21 +521,23 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
        fds[fd_num++] = log->fd;
    }

-    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, fds, fd_num);
+    if (ret < 0) {
+        return ret;
    }

    if (shmfd) {
        msg.hdr.size = 0;
-        if (vhost_user_read(dev, &msg) < 0) {
-            return -1;
+        ret = vhost_user_read(dev, &msg);
+        if (ret < 0) {
+            return ret;
        }

        if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
            error_report("Received unexpected msg type. "
                         "Expected %d received %d",
                         VHOST_USER_SET_LOG_BASE, msg.hdr.request);
-            return -1;
+            return -EPROTO;
        }
    }

@ -588,7 +597,7 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
                u->region_rb[i] = mr->ram_block;
            } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
                error_report("Failed preparing vhost-user memory table msg");
-                return -1;
+                return -ENOBUFS;
            }
            vhost_user_fill_msg_region(&region_buffer, reg, offset);
            msg->payload.memory.regions[*fd_num] = region_buffer;
@ -604,14 +613,14 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
    if (!*fd_num) {
        error_report("Failed initializing vhost-user memory map, "
                     "consider using -object memory-backend-file share=on");
-        return -1;
+        return -EINVAL;
    }

    msg->hdr.size = sizeof(msg->payload.memory.nregions);
    msg->hdr.size += sizeof(msg->payload.memory.padding);
    msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);

-    return 1;
+    return 0;
 }

 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
@ -741,8 +750,9 @@ static int send_remove_regions(struct vhost_dev *dev,
            vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
            msg->payload.mem_reg.region = region_buffer;

-            if (vhost_user_write(dev, msg, &fd, 1) < 0) {
-                return -1;
+            ret = vhost_user_write(dev, msg, &fd, 1);
+            if (ret < 0) {
+                return ret;
            }

            if (reply_supported) {
@ -801,15 +811,17 @@ static int send_add_regions(struct vhost_dev *dev,
            vhost_user_fill_msg_region(&region_buffer, reg, offset);
            msg->payload.mem_reg.region = region_buffer;

-            if (vhost_user_write(dev, msg, &fd, 1) < 0) {
-                return -1;
+            ret = vhost_user_write(dev, msg, &fd, 1);
+            if (ret < 0) {
+                return ret;
            }

            if (track_ramblocks) {
                uint64_t reply_gpa;

-                if (vhost_user_read(dev, &msg_reply) < 0) {
-                    return -1;
+                ret = vhost_user_read(dev, &msg_reply);
+                if (ret < 0) {
+                    return ret;
                }

                reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
@ -819,7 +831,7 @@ static int send_add_regions(struct vhost_dev *dev,
                                 "Expected %d received %d", __func__,
                                 VHOST_USER_ADD_MEM_REG,
                                 msg_reply.hdr.request);
-                    return -1;
+                    return -EPROTO;
                }

                /*
@ -830,7 +842,7 @@ static int send_add_regions(struct vhost_dev *dev,
                    error_report("%s: Unexpected size for postcopy reply "
                                 "%d vs %d", __func__, msg_reply.hdr.size,
                                 msg->hdr.size);
-                    return -1;
+                    return -EPROTO;
                }

                /* Get the postcopy client base from the backend's reply. */
@ -846,7 +858,7 @@ static int send_add_regions(struct vhost_dev *dev,
                                 "Got guest physical address %" PRIX64 ", expected "
                                 "%" PRIX64, __func__, reply_gpa,
                                 dev->mem->regions[reg_idx].guest_phys_addr);
-                    return -1;
+                    return -EPROTO;
                }
            } else if (reply_supported) {
                ret = process_message_reply(dev, msg);
@ -887,6 +899,7 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev,
    struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
    uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
    int nr_add_reg, nr_rem_reg;
+    int ret;

    msg->hdr.size = sizeof(msg->payload.mem_reg);

@ -894,16 +907,20 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev,
    scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
                         shadow_pcb, track_ramblocks);

-    if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
-                reply_supported) < 0)
-    {
-        goto err;
+    if (nr_rem_reg) {
+        ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
+                                  reply_supported);
+        if (ret < 0) {
+            goto err;
+        }
    }

-    if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg,
-                shadow_pcb, reply_supported, track_ramblocks) < 0)
-    {
-        goto err;
+    if (nr_add_reg) {
+        ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
+                               reply_supported, track_ramblocks);
+        if (ret < 0) {
+            goto err;
+        }
    }

    if (track_ramblocks) {
@ -918,8 +935,9 @@ static int vhost_user_add_remove_regions(struct vhost_dev *dev,
        msg->hdr.size = sizeof(msg->payload.u64);
        msg->payload.u64 = 0; /* OK */

-        if (vhost_user_write(dev, msg, NULL, 0) < 0) {
-            return -1;
+        ret = vhost_user_write(dev, msg, NULL, 0);
+        if (ret < 0) {
+            return ret;
        }
    }

@ -931,7 +949,7 @@ err:
               sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
    }

-    return -1;
+    return ret;
 }

 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
@ -944,6 +962,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
    size_t fd_num = 0;
    VhostUserMsg msg_reply;
    int region_i, msg_i;
+    int ret;

    VhostUserMsg msg = {
        .hdr.flags = VHOST_USER_VERSION,
@ -961,29 +980,32 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
    }

    if (config_mem_slots) {
-        if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
-                                          true) < 0) {
-            return -1;
+        ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
+        if (ret < 0) {
+            return ret;
        }
    } else {
-        if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
-                                              true) < 0) {
-            return -1;
+        ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
+                                                true);
+        if (ret < 0) {
+            return ret;
        }

-        if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
-            return -1;
+        ret = vhost_user_write(dev, &msg, fds, fd_num);
+        if (ret < 0) {
+            return ret;
        }

-        if (vhost_user_read(dev, &msg_reply) < 0) {
-            return -1;
+        ret = vhost_user_read(dev, &msg_reply);
+        if (ret < 0) {
+            return ret;
        }

        if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
            error_report("%s: Received unexpected msg type."
                         "Expected %d received %d", __func__,
                         VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
-            return -1;
+            return -EPROTO;
        }

        /*
@ -994,7 +1016,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
            error_report("%s: Unexpected size for postcopy reply "
                         "%d vs %d", __func__, msg_reply.hdr.size,
                         msg.hdr.size);
-            return -1;
+            return -EPROTO;
        }

        memset(u->postcopy_client_bases, 0,
@ -1024,7 +1046,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
            error_report("%s: postcopy reply not fully consumed "
                         "%d vs %zd",
                         __func__, msg_i, fd_num);
-            return -1;
+            return -EIO;
        }

        /*
@ -1035,8 +1057,9 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
        /* TODO: Use this for failure cases as well with a bad value. */
        msg.hdr.size = sizeof(msg.payload.u64);
        msg.payload.u64 = 0; /* OK */
-        if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-            return -1;
+        ret = vhost_user_write(dev, &msg, NULL, 0);
+        if (ret < 0) {
+            return ret;
        }
    }

@ -1055,6 +1078,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
    bool config_mem_slots =
        virtio_has_feature(dev->protocol_features,
                           VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
+    int ret;

    if (do_postcopy) {
        /*
@ -1074,17 +1098,20 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
    }

    if (config_mem_slots) {
-        if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
-                                          false) < 0) {
-            return -1;
+        ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
+        if (ret < 0) {
+            return ret;
        }
    } else {
-        if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
-                                              false) < 0) {
-            return -1;
+        ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
+                                                false);
+        if (ret < 0) {
+            return ret;
        }
-        if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
-            return -1;
+
+        ret = vhost_user_write(dev, &msg, fds, fd_num);
+        if (ret < 0) {
+            return ret;
        }

        if (reply_supported) {
@ -1109,14 +1136,10 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev,

    if (!cross_endian) {
        error_report("vhost-user trying to send unhandled ioctl");
-        return -1;
+        return -ENOTSUP;
    }

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
-    }
-
-    return 0;
+    return vhost_user_write(dev, &msg, NULL, 0);
 }

 static int vhost_set_vring(struct vhost_dev *dev,
@ -1130,11 +1153,7 @@ static int vhost_set_vring(struct vhost_dev *dev,
        .hdr.size = sizeof(msg.payload.state),
    };

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
-    }
-
-    return 0;
+    return vhost_user_write(dev, &msg, NULL, 0);
 }

 static int vhost_user_set_vring_num(struct vhost_dev *dev,
@ -1182,16 +1201,25 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
    int i;

    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
-        return -1;
+        return -EINVAL;
    }

    for (i = 0; i < dev->nvqs; ++i) {
+        int ret;
        struct vhost_vring_state state = {
            .index = dev->vq_index + i,
            .num   = enable,
        };

-        vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
+        ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
+        if (ret < 0) {
+            /*
+             * Restoring the previous state is likely infeasible, as well as
+             * proceeding regardless the error, so just bail out and hope for
+             * the device-level recovery.
+             */
+            return ret;
+        }
    }

    return 0;
@ -1200,6 +1228,7 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
 static int vhost_user_get_vring_base(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
 {
+    int ret;
    VhostUserMsg msg = {
        .hdr.request = VHOST_USER_GET_VRING_BASE,
        .hdr.flags = VHOST_USER_VERSION,
@ -1209,23 +1238,25 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,

    vhost_user_host_notifier_remove(dev, ring->index);

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

-    if (vhost_user_read(dev, &msg) < 0) {
-        return -1;
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        return ret;
    }

    if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     VHOST_USER_GET_VRING_BASE, msg.hdr.request);
-        return -1;
+        return -EPROTO;
    }

    if (msg.hdr.size != sizeof(msg.payload.state)) {
        error_report("Received bad msg size.");
-        return -1;
+        return -EPROTO;
    }

    *ring = msg.payload.state;
@ -1252,11 +1283,7 @@ static int vhost_set_vring_file(struct vhost_dev *dev,
        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
    }

-    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
-        return -1;
-    }
-
-    return 0;
+    return vhost_user_write(dev, &msg, fds, fd_num);
 }

 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
@ -1274,6 +1301,7 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev,

 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
 {
+    int ret;
    VhostUserMsg msg = {
        .hdr.request = request,
        .hdr.flags = VHOST_USER_VERSION,
@ -1283,23 +1311,25 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
        return 0;
    }

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

-    if (vhost_user_read(dev, &msg) < 0) {
-        return -1;
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        return ret;
    }

    if (msg.hdr.request != request) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     request, msg.hdr.request);
-        return -1;
+        return -EPROTO;
    }

    if (msg.hdr.size != sizeof(msg.payload.u64)) {
        error_report("Received bad msg size.");
-        return -1;
+        return -EPROTO;
    }

    *u64 = msg.payload.u64;
@ -1337,6 +1367,7 @@ static int enforce_reply(struct vhost_dev *dev,
 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
                                     struct vhost_vring_addr *addr)
 {
+    int ret;
    VhostUserMsg msg = {
        .hdr.request = VHOST_USER_SET_VRING_ADDR,
        .hdr.flags = VHOST_USER_VERSION,
@ -1357,8 +1388,9 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev,
        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
    }

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

    if (wait_for_reply) {
@ -1377,6 +1409,7 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
        .payload.u64 = u64,
        .hdr.size = sizeof(msg.payload.u64),
    };
+    int ret;

    if (wait_for_reply) {
        bool reply_supported = virtio_has_feature(dev->protocol_features,
@ -1386,8 +1419,9 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
        }
    }

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

    if (wait_for_reply) {
@ -1424,11 +1458,7 @@ static int vhost_user_set_owner(struct vhost_dev *dev)
        .hdr.flags = VHOST_USER_VERSION,
    };

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -EPROTO;
-    }
-
-    return 0;
+    return vhost_user_write(dev, &msg, NULL, 0);
 }

 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
@ -1459,26 +1489,16 @@ static int vhost_user_reset_device(struct vhost_dev *dev)
        ? VHOST_USER_RESET_DEVICE
        : VHOST_USER_RESET_OWNER;

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
-    }
-
-    return 0;
+    return vhost_user_write(dev, &msg, NULL, 0);
 }

 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
 {
-    int ret = -1;
-
-    if (!dev->config_ops) {
-        return -1;
+    if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
+        return -ENOSYS;
    }

-    if (dev->config_ops->vhost_dev_config_notifier) {
-        ret = dev->config_ops->vhost_dev_config_notifier(dev);
-    }
-
-    return ret;
+    return dev->config_ops->vhost_dev_config_notifier(dev);
 }

 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
@ -1497,7 +1517,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
    if (!virtio_has_feature(dev->protocol_features,
                            VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
        vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
-        return -1;
+        return -EINVAL;
    }

    n = &user->notifier[queue_idx];
@ -1515,13 +1535,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,

    /* Sanity check. */
    if (area->size != page_size) {
-        return -1;
+        return -EINVAL;
    }

    addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
                fd, area->offset);
    if (addr == MAP_FAILED) {
-        return -1;
+        return -EFAULT;
    }

    name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
@ -1534,7 +1554,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
    if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
        object_unparent(OBJECT(&n->mr));
        munmap(addr, page_size);
-        return -1;
+        return -ENXIO;
    }

    n->addr = addr;
@ -1664,14 +1684,15 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
    }

    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+        int saved_errno = errno;
        error_report("socketpair() failed");
-        return -1;
+        return -saved_errno;
    }

    ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
    if (!ioc) {
        error_report_err(local_err);
-        return -1;
+        return -ECONNREFUSED;
    }
    u->slave_ioc = ioc;
    slave_update_read_handler(dev, NULL);
@ -1778,35 +1799,38 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
    struct vhost_user *u = dev->opaque;
    CharBackend *chr = u->user->chr;
    int ufd;
+    int ret;
    VhostUserMsg msg = {
        .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
        .hdr.flags = VHOST_USER_VERSION,
    };

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
        error_setg(errp, "Failed to send postcopy_advise to vhost");
-        return -1;
+        return ret;
    }

-    if (vhost_user_read(dev, &msg) < 0) {
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
        error_setg(errp, "Failed to get postcopy_advise reply from vhost");
-        return -1;
+        return ret;
    }

    if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
        error_setg(errp, "Unexpected msg type. Expected %d received %d",
                     VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
-        return -1;
+        return -EPROTO;
    }

    if (msg.hdr.size) {
        error_setg(errp, "Received bad msg size.");
-        return -1;
+        return -EPROTO;
    }
    ufd = qemu_chr_fe_get_msgfd(chr);
    if (ufd < 0) {
        error_setg(errp, "%s: Failed to get ufd", __func__);
-        return -1;
+        return -EIO;
    }
    qemu_set_nonblock(ufd);

@ -1820,7 +1844,7 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
    return 0;
 #else
    error_setg(errp, "Postcopy not supported on non-Linux systems");
-    return -1;
+    return -ENOSYS;
 #endif
 }

@ -1836,10 +1860,13 @@ static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
    };
    u->postcopy_listen = true;
+
    trace_vhost_user_postcopy_listen();
-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
        error_setg(errp, "Failed to send postcopy_listen to vhost");
-        return -1;
+        return ret;
    }

    ret = process_message_reply(dev, &msg);
@ -1864,9 +1891,11 @@ static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
    struct vhost_user *u = dev->opaque;

    trace_vhost_user_postcopy_end_entry();
-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
        error_setg(errp, "Failed to send postcopy_end to vhost");
-        return -1;
+        return ret;
    }

    ret = process_message_reply(dev, &msg);
@ -2115,7 +2144,7 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)

        return vhost_user_write(dev, &msg, NULL, 0);
    }
-    return -1;
+    return -ENOTSUP;
 }

 static bool vhost_user_can_merge(struct vhost_dev *dev,
@ -2136,6 +2165,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
    VhostUserMsg msg;
    bool reply_supported = virtio_has_feature(dev->protocol_features,
                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+    int ret;

    if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
        return 0;
@ -2149,8 +2179,9 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
    }

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

    /* If reply_ack supported, slave has to ack specified MTU is valid */
@ -2164,6 +2195,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
                                            struct vhost_iotlb_msg *imsg)
 {
+    int ret;
    VhostUserMsg msg = {
        .hdr.request = VHOST_USER_IOTLB_MSG,
        .hdr.size = sizeof(msg.payload.iotlb),
@ -2171,8 +2203,9 @@ static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
        .payload.iotlb = *imsg,
    };

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -EFAULT;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

    return process_message_reply(dev, &msg);
@ -2187,6 +2220,7 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
                                 uint32_t config_len, Error **errp)
 {
+    int ret;
    VhostUserMsg msg = {
        .hdr.request = VHOST_USER_GET_CONFIG,
        .hdr.flags = VHOST_USER_VERSION,
@ -2203,26 +2237,28 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,

    msg.payload.config.offset = 0;
    msg.payload.config.size = config_len;
-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        error_setg_errno(errp, EPROTO, "vhost_get_config failed");
-        return -EPROTO;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost_get_config failed");
+        return ret;
    }

-    if (vhost_user_read(dev, &msg) < 0) {
-        error_setg_errno(errp, EPROTO, "vhost_get_config failed");
-        return -EPROTO;
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost_get_config failed");
+        return ret;
    }

    if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
        error_setg(errp,
                   "Received unexpected msg type. Expected %d received %d",
                   VHOST_USER_GET_CONFIG, msg.hdr.request);
-        return -EINVAL;
+        return -EPROTO;
    }

    if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
        error_setg(errp, "Received bad msg size.");
-        return -EINVAL;
+        return -EPROTO;
    }

    memcpy(config, msg.payload.config.region, config_len);
@ -2233,6 +2269,7 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
                                 uint32_t offset, uint32_t size, uint32_t flags)
 {
+    int ret;
    uint8_t *p;
    bool reply_supported = virtio_has_feature(dev->protocol_features,
                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
@ -2245,7 +2282,7 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,

    if (!virtio_has_feature(dev->protocol_features,
                VHOST_USER_PROTOCOL_F_CONFIG)) {
-        return -1;
+        return -ENOTSUP;
    }

    if (reply_supported) {
@ -2253,7 +2290,7 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
    }

    if (size > VHOST_USER_MAX_CONFIG_SIZE) {
-        return -1;
+        return -EINVAL;
    }

    msg.payload.config.offset = offset,
@ -2262,8 +2299,9 @@ static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
    p = msg.payload.config.region;
    memcpy(p, data, size);

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

    if (reply_supported) {
@ -2277,6 +2315,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev,
                                            void *session_info,
                                            uint64_t *session_id)
 {
+    int ret;
    bool crypto_session = virtio_has_feature(dev->protocol_features,
                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
    CryptoDevBackendSymSessionInfo *sess_info = session_info;
@ -2290,7 +2329,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev,

    if (!crypto_session) {
        error_report("vhost-user trying to send unhandled ioctl");
-        return -1;
+        return -ENOTSUP;
    }

    memcpy(&msg.payload.session.session_setup_data, sess_info,
@ -2303,31 +2342,35 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev,
        memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
               sess_info->auth_key_len);
    }
-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        error_report("vhost_user_write() return -1, create session failed");
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_report("vhost_user_write() return %d, create session failed",
+                     ret);
+        return ret;
    }

-    if (vhost_user_read(dev, &msg) < 0) {
-        error_report("vhost_user_read() return -1, create session failed");
-        return -1;
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        error_report("vhost_user_read() return %d, create session failed",
+                     ret);
+        return ret;
    }

    if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
-        return -1;
+        return -EPROTO;
    }

    if (msg.hdr.size != sizeof(msg.payload.session)) {
        error_report("Received bad msg size.");
-        return -1;
+        return -EPROTO;
    }

    if (msg.payload.session.session_id < 0) {
        error_report("Bad session id: %" PRId64 "",
                              msg.payload.session.session_id);
-        return -1;
+        return -EINVAL;
    }
    *session_id = msg.payload.session.session_id;

@ -2337,6 +2380,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev,
 static int
 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
 {
+    int ret;
    bool crypto_session = virtio_has_feature(dev->protocol_features,
                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
    VhostUserMsg msg = {
@ -2348,12 +2392,14 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)

    if (!crypto_session) {
        error_report("vhost-user trying to send unhandled ioctl");
-        return -1;
+        return -ENOTSUP;
    }

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        error_report("vhost_user_write() return -1, close session failed");
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_report("vhost_user_write() return %d, close session failed",
+                     ret);
+        return ret;
    }

    return 0;
@ -2375,6 +2421,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
 {
    void *addr;
    int fd;
+    int ret;
    struct vhost_user *u = dev->opaque;
    CharBackend *chr = u->user->chr;
    VhostUserMsg msg = {
@ -2390,24 +2437,26 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
        return 0;
    }

-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
    }

-    if (vhost_user_read(dev, &msg) < 0) {
-        return -1;
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        return ret;
    }

    if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
        error_report("Received unexpected msg type. "
                     "Expected %d received %d",
                     VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
-        return -1;
+        return -EPROTO;
    }

    if (msg.hdr.size != sizeof(msg.payload.inflight)) {
        error_report("Received bad msg size.");
-        return -1;
+        return -EPROTO;
    }

    if (!msg.payload.inflight.mmap_size) {
@ -2417,7 +2466,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
    fd = qemu_chr_fe_get_msgfd(chr);
    if (fd < 0) {
        error_report("Failed to get mem fd");
-        return -1;
+        return -EIO;
    }

    addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
@ -2426,7 +2475,7 @@ static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
    if (addr == MAP_FAILED) {
        error_report("Failed to mmap mem fd");
        close(fd);
-        return -1;
+        return -EFAULT;
    }

    inflight->addr = addr;
@ -2456,11 +2505,7 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
        return 0;
    }

-    if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
-        return -1;
-    }
-
-    return 0;
+    return vhost_user_write(dev, &msg, &inflight->fd, 1);
 }

 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@ -292,18 +292,34 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
    return ret < 0 ? -errno : ret;
 }

-static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
+static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
 {
    uint8_t s;
+    int ret;

    trace_vhost_vdpa_add_status(dev, status);
-    if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) {
-        return;
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
+    if (ret < 0) {
+        return ret;
    }

    s |= status;

-    vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (!(s & status)) {
+        return -EIO;
+    }
+
+    return 0;
 }

 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
@ -484,7 +500,7 @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
        }
    }
    if (mem->padding) {
-        return -1;
+        return -EINVAL;
    }

    return 0;
@ -501,14 +517,11 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,

    trace_vhost_vdpa_set_features(dev, features);
    ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
-    uint8_t status = 0;
    if (ret) {
        return ret;
    }
-    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
-    vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);

-    return !(status & VIRTIO_CONFIG_S_FEATURES_OK);
+    return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
 }

 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
@ -650,12 +663,8 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
    }

    if (started) {
-        uint8_t status = 0;
        memory_listener_register(&v->listener, &address_space_memory);
-        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-        vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
-
-        return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
+        return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
    } else {
        vhost_vdpa_reset_device(dev);
        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
@ -725,6 +734,12 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
    trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
 }
+static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
+                                       int fd)
+{
+    trace_vhost_vdpa_set_config_call(dev, fd);
+    return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
+}

 static int vhost_vdpa_get_features(struct vhost_dev *dev,
                                     uint64_t *features)
@ -795,4 +810,5 @@ const VhostOps vdpa_ops = {
        .vhost_get_device_id = vhost_vdpa_get_device_id,
        .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
        .vhost_force_iommu = vhost_vdpa_force_iommu,
+        .vhost_set_config_call = vhost_vdpa_set_config_call,
 };
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@ -125,6 +125,9 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx,
 {
    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
    vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask);
 }

@ -133,6 +136,9 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev,
 {
    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
    return vhost_virtqueue_pending(&vvc->vhost_dev, idx);
 }

--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@ -171,6 +171,10 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
    ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd,
                         VHOST_BACKEND_TYPE_KERNEL, 0, errp);
    if (ret < 0) {
+        /*
+         * vhostfd is closed by vhost_dev_cleanup, which is called
+         * by vhost_dev_init on initialization error.
+         */
        goto err_virtio;
    }

@ -183,15 +187,10 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
    return;

 err_vhost_dev:
-    vhost_dev_cleanup(&vvc->vhost_dev);
    /* vhost_dev_cleanup() closes the vhostfd passed to vhost_dev_init() */
-    vhostfd = -1;
+    vhost_dev_cleanup(&vvc->vhost_dev);
 err_virtio:
    vhost_vsock_common_unrealize(vdev);
-    if (vhostfd >= 0) {
-        close(vhostfd);
-    }
-    return;
 }

 static void vhost_vsock_device_unrealize(DeviceState *dev)
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@ -33,11 +33,13 @@
 #define _VHOST_DEBUG 1

 #ifdef _VHOST_DEBUG
-#define VHOST_OPS_DEBUG(fmt, ...) \
-    do { error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
-                      strerror(errno), errno); } while (0)
+#define VHOST_OPS_DEBUG(retval, fmt, ...) \
+    do { \
+        error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
+                     strerror(-retval), -retval); \
+    } while (0)
 #else
-#define VHOST_OPS_DEBUG(fmt, ...) \
+#define VHOST_OPS_DEBUG(retval, fmt, ...) \
    do { } while (0)
 #endif

@ -297,7 +299,7 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
       releasing the current log, to ensure no logging is lost */
    r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_set_log_base failed");
+        VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
    }

    vhost_log_put(dev, true);
@ -550,7 +552,7 @@ static void vhost_commit(MemoryListener *listener)
    if (!dev->log_enabled) {
        r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
        if (r < 0) {
-            VHOST_OPS_DEBUG("vhost_set_mem_table failed");
+            VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
        }
        goto out;
    }
@ -564,7 +566,7 @@ static void vhost_commit(MemoryListener *listener)
    }
    r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_set_mem_table failed");
+        VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
    }
    /* To log less, can only decrease log size after table update. */
    if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
@ -803,8 +805,8 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
    if (dev->vhost_ops->vhost_vq_get_addr) {
        r = dev->vhost_ops->vhost_vq_get_addr(dev, &addr, vq);
        if (r < 0) {
-            VHOST_OPS_DEBUG("vhost_vq_get_addr failed");
-            return -errno;
+            VHOST_OPS_DEBUG(r, "vhost_vq_get_addr failed");
+            return r;
        }
    } else {
        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc;
@ -816,10 +818,9 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
    addr.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0;
    r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
-        return -errno;
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_addr failed");
    }
-    return 0;
+    return r;
 }

 static int vhost_dev_set_features(struct vhost_dev *dev,
@ -840,19 +841,19 @@ static int vhost_dev_set_features(struct vhost_dev *dev,
    }
    r = dev->vhost_ops->vhost_set_features(dev, features);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_set_features failed");
+        VHOST_OPS_DEBUG(r, "vhost_set_features failed");
        goto out;
    }
    if (dev->vhost_ops->vhost_set_backend_cap) {
        r = dev->vhost_ops->vhost_set_backend_cap(dev);
        if (r < 0) {
-            VHOST_OPS_DEBUG("vhost_set_backend_cap failed");
+            VHOST_OPS_DEBUG(r, "vhost_set_backend_cap failed");
            goto out;
        }
    }

 out:
-    return r < 0 ? -errno : 0;
+    return r;
 }

 static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
@ -999,22 +1000,17 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
                                                   bool is_big_endian,
                                                   int vhost_vq_index)
 {
+    int r;
    struct vhost_vring_state s = {
        .index = vhost_vq_index,
        .num = is_big_endian
    };

-    if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) {
-        return 0;
+    r = dev->vhost_ops->vhost_set_vring_endian(dev, &s);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_endian failed");
    }
-
-    VHOST_OPS_DEBUG("vhost_set_vring_endian failed");
-    if (errno == ENOTTY) {
-        error_report("vhost does not support cross-endian");
-        return -ENOSYS;
-    }
-
-    return -errno;
+    return r;
 }

 static int vhost_memory_region_lookup(struct vhost_dev *hdev,
@ -1106,15 +1102,15 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
    vq->num = state.num = virtio_queue_get_num(vdev, idx);
    r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
    if (r) {
-        VHOST_OPS_DEBUG("vhost_set_vring_num failed");
-        return -errno;
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_num failed");
+        return r;
    }

    state.num = virtio_queue_get_last_avail_idx(vdev, idx);
    r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
    if (r) {
-        VHOST_OPS_DEBUG("vhost_set_vring_base failed");
-        return -errno;
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_base failed");
+        return r;
    }

    if (vhost_needs_vring_endian(vdev)) {
@ -1122,7 +1118,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
                                                    virtio_is_big_endian(vdev),
                                                    vhost_vq_index);
        if (r) {
-            return -errno;
+            return r;
        }
    }

@ -1150,15 +1146,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,

    r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
    if (r < 0) {
-        r = -errno;
        goto fail_alloc;
    }

    file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
    r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
    if (r) {
-        VHOST_OPS_DEBUG("vhost_set_vring_kick failed");
-        r = -errno;
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_kick failed");
        goto fail_kick;
    }

@ -1218,7 +1212,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,

    r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost VQ %u ring restore failed: %d", idx, r);
+        VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
        /* Connection to the backend is broken, so let's sync internal
         * last avail idx to the device used idx.
         */
@ -1274,7 +1268,7 @@ static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,

    r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
    if (r) {
-        VHOST_OPS_DEBUG("vhost_set_vring_busyloop_timeout failed");
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_busyloop_timeout failed");
        return r;
    }

@ -1296,8 +1290,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
    file.fd = event_notifier_get_fd(&vq->masked_notifier);
    r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
    if (r) {
-        VHOST_OPS_DEBUG("vhost_set_vring_call failed");
-        r = -errno;
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
        goto fail_call;
    }

@ -1557,7 +1550,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
    file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
    r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_set_vring_call failed");
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
+    }
+}
+
+bool vhost_config_pending(struct vhost_dev *hdev)
+{
+    assert(hdev->vhost_ops);
+    if ((hdev->started == false) ||
+        (hdev->vhost_ops->vhost_set_config_call == NULL)) {
+        return false;
+    }
+
+    EventNotifier *notifier =
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
+    return event_notifier_test_and_clear(notifier);
+}
+
+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask)
+{
+    int fd;
+    int r;
+    EventNotifier *notifier =
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
+    EventNotifier *config_notifier = &vdev->config_notifier;
+    assert(hdev->vhost_ops);
+
+    if ((hdev->started == false) ||
+        (hdev->vhost_ops->vhost_set_config_call == NULL)) {
+        return;
+    }
+    if (mask) {
+        assert(vdev->use_guest_notifier_mask);
+        fd = event_notifier_get_fd(notifier);
+    } else {
+        fd = event_notifier_get_fd(config_notifier);
+    }
+    r = hdev->vhost_ops->vhost_set_config_call(hdev, fd);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_config_call failed");
+    }
+}
+
+static void vhost_stop_config_intr(struct vhost_dev *dev)
+{
+    int fd = -1;
+    assert(dev->vhost_ops);
+    if (dev->vhost_ops->vhost_set_config_call) {
+        dev->vhost_ops->vhost_set_config_call(dev, fd);
+    }
+}
+
+static void vhost_start_config_intr(struct vhost_dev *dev)
+{
+    int r;
+
+    assert(dev->vhost_ops);
+    int fd = event_notifier_get_fd(&dev->vdev->config_notifier);
+    if (dev->vhost_ops->vhost_set_config_call) {
+        r = dev->vhost_ops->vhost_set_config_call(dev, fd);
+        if (!r) {
+            event_notifier_set(&dev->vdev->config_notifier);
+        }
    }
 }

@ -1599,7 +1653,7 @@ int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
    }

    error_setg(errp, "vhost_get_config not implemented");
-    return -ENOTSUP;
+    return -ENOSYS;
 }

 int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
@ -1612,7 +1666,7 @@ int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
                                                 size, flags);
    }

-    return -1;
+    return -ENOSYS;
 }

 void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
@ -1641,7 +1695,7 @@ static int vhost_dev_resize_inflight(struct vhost_inflight *inflight,

    if (err) {
        error_report_err(err);
-        return -1;
+        return -ENOMEM;
    }

    vhost_dev_free_inflight(inflight);
@ -1674,8 +1728,9 @@ int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f)
    }

    if (inflight->size != size) {
-        if (vhost_dev_resize_inflight(inflight, size)) {
-            return -1;
+        int ret = vhost_dev_resize_inflight(inflight, size);
+        if (ret < 0) {
+            return ret;
        }
    }
    inflight->queue_size = qemu_get_be16(f);
@ -1698,7 +1753,7 @@ int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev)

    r = vhost_dev_set_features(hdev, hdev->log_enabled);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_dev_prepare_inflight failed");
+        VHOST_OPS_DEBUG(r, "vhost_dev_prepare_inflight failed");
        return r;
    }

@ -1713,8 +1768,8 @@ int vhost_dev_set_inflight(struct vhost_dev *dev,
    if (dev->vhost_ops->vhost_set_inflight_fd && inflight->addr) {
        r = dev->vhost_ops->vhost_set_inflight_fd(dev, inflight);
        if (r) {
-            VHOST_OPS_DEBUG("vhost_set_inflight_fd failed");
-            return -errno;
+            VHOST_OPS_DEBUG(r, "vhost_set_inflight_fd failed");
+            return r;
        }
    }

@ -1729,8 +1784,8 @@ int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
    if (dev->vhost_ops->vhost_get_inflight_fd) {
        r = dev->vhost_ops->vhost_get_inflight_fd(dev, queue_size, inflight);
        if (r) {
-            VHOST_OPS_DEBUG("vhost_get_inflight_fd failed");
-            return -errno;
+            VHOST_OPS_DEBUG(r, "vhost_get_inflight_fd failed");
+            return r;
        }
    }

@ -1759,8 +1814,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)

    r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
    if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_set_mem_table failed");
-        r = -errno;
+        VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
        goto fail_mem;
    }
    for (i = 0; i < hdev->nvqs; ++i) {
@ -1773,6 +1827,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
        }
    }

+    r = event_notifier_init(
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0);
+    if (r < 0) {
+        return r;
+    }
+    event_notifier_test_and_clear(
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
+    if (!vdev->use_guest_notifier_mask) {
+        vhost_config_mask(hdev, vdev, true);
+    }
    if (hdev->log_enabled) {
        uint64_t log_base;

@ -1784,8 +1848,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
                                                hdev->log_size ? log_base : 0,
                                                hdev->log);
        if (r < 0) {
-            VHOST_OPS_DEBUG("vhost_set_log_base failed");
-            r = -errno;
+            VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
            goto fail_log;
        }
    }
@ -1806,6 +1869,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
            vhost_device_iotlb_miss(hdev, vq->used_phys, true);
        }
    }
+    vhost_start_config_intr(hdev);
    return 0;
 fail_log:
    vhost_log_put(hdev, false);
@ -1831,6 +1895,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)

    /* should only be called after backend is connected */
    assert(hdev->vhost_ops);
+    event_notifier_test_and_clear(
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
+    event_notifier_test_and_clear(&vdev->config_notifier);

    if (hdev->vhost_ops->vhost_dev_start) {
        hdev->vhost_ops->vhost_dev_start(hdev, false);
@ -1848,6 +1915,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
        }
        memory_listener_unregister(&hdev->iommu_listener);
    }
+    vhost_stop_config_intr(hdev);
    vhost_log_put(hdev, true);
    hdev->started = false;
    hdev->vdev = NULL;
@ -1860,5 +1928,5 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
        return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
    }

-    return -1;
+    return -ENOSYS;
 }
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@ -948,6 +948,9 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx,

    assert(vcrypto->vhost_started);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
    cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask);
 }

@ -958,6 +961,9 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx)

    assert(vcrypto->vhost_started);

+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
    return cryptodev_vhost_virtqueue_pending(vdev, queue, idx);
 }

--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@ -32,6 +32,14 @@
 #include CONFIG_DEVICES
 #include "trace.h"

+/*
+ * We only had legacy x86 guests that did not support
+ * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
+ */
+#if defined(TARGET_X86_64) || defined(TARGET_I386)
+#define VIRTIO_MEM_HAS_LEGACY_GUESTS
+#endif
+
 /*
 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
 * bitmap small.
@ -110,6 +118,19 @@ static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
    return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
 }

+#if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
+static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
+{
+    /*
+     * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
+     * anonymous RAM. In any other case, reading unplugged *can* populate a
+     * fresh page, consuming actual memory.
+     */
+    return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
+           qemu_ram_pagesize(rb) == qemu_real_host_page_size;
+}
+#endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
+
 /*
 * Size the usable region bigger than the requested size if possible. Esp.
 * Linux guests will only add (aligned) memory blocks in case they fully
@ -429,10 +450,40 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
            return -EBUSY;
        }
        virtio_mem_notify_unplug(vmem, offset, size);
-    } else if (virtio_mem_notify_plug(vmem, offset, size)) {
-        /* Could be a mapping attempt resulted in memory getting populated. */
-        ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
-        return -EBUSY;
+    } else {
+        int ret = 0;
+
+        if (vmem->prealloc) {
+            void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
+            int fd = memory_region_get_fd(&vmem->memdev->mr);
+            Error *local_err = NULL;
+
+            os_mem_prealloc(fd, area, size, 1, &local_err);
+            if (local_err) {
+                static bool warned;
+
+                /*
+                 * Warn only once, we don't want to fill the log with these
+                 * warnings.
+                 */
+                if (!warned) {
+                    warn_report_err(local_err);
+                    warned = true;
+                } else {
+                    error_free(local_err);
+                }
+                ret = -EBUSY;
+            }
+        }
+        if (!ret) {
+            ret = virtio_mem_notify_plug(vmem, offset, size);
+        }
+
+        if (ret) {
+            /* Could be preallocation or a notifier populated memory. */
+            ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
+            return -EBUSY;
+        }
    }
    virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
    return 0;
@ -653,15 +704,29 @@ static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
                                        Error **errp)
 {
    MachineState *ms = MACHINE(qdev_get_machine());
+    VirtIOMEM *vmem = VIRTIO_MEM(vdev);

    if (ms->numa_state) {
 #if defined(CONFIG_ACPI)
        virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
 #endif
    }
+    assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
+    if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
+        virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
+    }
    return features;
 }

+static int virtio_mem_validate_features(VirtIODevice *vdev)
+{
+    if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
+        !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
+        return -EFAULT;
+    }
+    return 0;
+}
+
 static void virtio_mem_system_reset(void *opaque)
 {
    VirtIOMEM *vmem = VIRTIO_MEM(opaque);
@ -716,6 +781,29 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
    rb = vmem->memdev->mr.ram_block;
    page_size = qemu_ram_pagesize(rb);

+#if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
+    switch (vmem->unplugged_inaccessible) {
+    case ON_OFF_AUTO_AUTO:
+        if (virtio_mem_has_shared_zeropage(rb)) {
+            vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
+        } else {
+            vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
+        }
+        break;
+    case ON_OFF_AUTO_OFF:
+        if (!virtio_mem_has_shared_zeropage(rb)) {
+            warn_report("'%s' property set to 'off' with a memdev that does"
+                        " not support the shared zeropage.",
+                        VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
+        }
+        break;
+    default:
+        break;
+    }
+#else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
+    vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
+#endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
+
    /*
     * If the block size wasn't configured by the user, use a sane default. This
     * allows using hugetlbfs backends of any page size without manual
@ -733,7 +821,8 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
        warn_report("'%s' property is smaller than the default block size (%"
                    PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
                    virtio_mem_default_block_size(rb) / MiB);
-    } else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
+    }
+    if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
        error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
                   ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
                   VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
@ -1107,8 +1196,13 @@ static void virtio_mem_instance_init(Object *obj)
 static Property virtio_mem_properties[] = {
    DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
    DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
+    DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
    DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+#if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
+    DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
+                            unplugged_inaccessible, ON_OFF_AUTO_AUTO),
+#endif
    DEFINE_PROP_END_OF_LIST(),
 };

@ -1247,6 +1341,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
    vdc->unrealize = virtio_mem_device_unrealize;
    vdc->get_config = virtio_mem_get_config;
    vdc->get_features = virtio_mem_get_features;
+    vdc->validate_features = virtio_mem_validate_features;
    vdc->vmsd = &vmstate_virtio_mem_device;

    vmc->fill_device_info = virtio_mem_fill_device_info;
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@ -673,7 +673,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,

    return 0;
 }
+static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    bool with_irqfd = false;
+    EventNotifier *notifier = virtio_config_get_guest_notifier(vdev);
+    int r = 0;

+    if (assign) {
+        r = event_notifier_init(notifier, 0);
+        if (r < 0) {
+            return r;
+        }
+        virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+    } else {
+        virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+        event_notifier_cleanup(notifier);
+    }
+    if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
+        vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign);
+    }
+    return r;
+}
 static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs,
                                           bool assign)
 {
@ -695,6 +718,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs,
            goto assign_error;
        }
    }
+    r = virtio_mmio_set_config_guest_notifier(d, assign);
+    if (r < 0) {
+        goto assign_error;
+    }

    return 0;

--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@ -677,7 +677,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev,
 }

 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
-                                        unsigned int queue_no,
                                        unsigned int vector)
 {
    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
@ -704,112 +703,160 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 }

 static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
-                                 unsigned int queue_no,
+                                 EventNotifier *n,
                                 unsigned int vector)
 {
    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
-    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
 }

 static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
-                                      unsigned int queue_no,
+                                      EventNotifier *n ,
                                      unsigned int vector)
 {
-    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    int ret;

    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
    assert(ret == 0);
 }
-
-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
+                                      EventNotifier **n, unsigned int *vector)
 {
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtQueue *vq;
+
+    if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
+        *n = virtio_config_get_guest_notifier(vdev);
+        *vector = vdev->config_vector;
+    } else {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            return -1;
+        }
+        *vector = virtio_queue_vector(vdev, queue_no);
+        vq = virtio_get_queue(vdev, queue_no);
+        *n = virtio_queue_get_guest_notifier(vq);
+    }
+    return 0;
+}
+
+static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no)
+{
+    unsigned int vector;
+    int ret;
+    EventNotifier *n;
    PCIDevice *dev = &proxy->pci_dev;
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
-    unsigned int vector;
-    int ret, queue_no;
+
+    ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+    if (ret < 0) {
+        return ret;
+    }
+    if (vector >= msix_nr_vectors_allocated(dev)) {
+        return 0;
+    }
+    ret = kvm_virtio_pci_vq_vector_use(proxy, vector);
+    if (ret < 0) {
+        goto undo;
+    }
+    /*
+     * If guest supports masking, set up irqfd now.
+     * Otherwise, delay until unmasked in the frontend.
+     */
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
+        if (ret < 0) {
+            kvm_virtio_pci_vq_vector_release(proxy, vector);
+            goto undo;
+        }
+    }
+
+    return 0;
+undo:
+
+    vector = virtio_queue_vector(vdev, queue_no);
+    if (vector >= msix_nr_vectors_allocated(dev)) {
+        return ret;
+    }
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+        if (ret < 0) {
+            return ret;
+        }
+        kvm_virtio_pci_irqfd_release(proxy, n, vector);
+    }
+    return ret;
+}
+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+{
+    int queue_no;
+    int ret = 0;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);

    for (queue_no = 0; queue_no < nvqs; queue_no++) {
        if (!virtio_queue_get_num(vdev, queue_no)) {
-            break;
+            return -1;
        }
-        vector = virtio_queue_vector(vdev, queue_no);
-        if (vector >= msix_nr_vectors_allocated(dev)) {
-            continue;
-        }
-        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
-        if (ret < 0) {
-            goto undo;
-        }
-        /* If guest supports masking, set up irqfd now.
-         * Otherwise, delay until unmasked in the frontend.
-         */
-        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
-            if (ret < 0) {
-                kvm_virtio_pci_vq_vector_release(proxy, vector);
-                goto undo;
-            }
-        }
-    }
-    return 0;
-
-undo:
-    while (--queue_no >= 0) {
-        vector = virtio_queue_vector(vdev, queue_no);
-        if (vector >= msix_nr_vectors_allocated(dev)) {
-            continue;
-        }
-        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
-        }
-        kvm_virtio_pci_vq_vector_release(proxy, vector);
+        ret = kvm_virtio_pci_vector_use_one(proxy, queue_no);
    }
    return ret;
 }

-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy)
+{
+    return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
+}
+
+static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
+                                              int queue_no)
 {
-    PCIDevice *dev = &proxy->pci_dev;
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    unsigned int vector;
-    int queue_no;
+    EventNotifier *n;
+    int ret;
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    PCIDevice *dev = &proxy->pci_dev;
+
+    ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+    if (ret < 0) {
+        return;
+    }
+    if (vector >= msix_nr_vectors_allocated(dev)) {
+        return;
+    }
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        kvm_virtio_pci_irqfd_release(proxy, n, vector);
+    }
+    kvm_virtio_pci_vq_vector_release(proxy, vector);
+}
+
+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+{
+    int queue_no;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);

    for (queue_no = 0; queue_no < nvqs; queue_no++) {
        if (!virtio_queue_get_num(vdev, queue_no)) {
            break;
        }
-        vector = virtio_queue_vector(vdev, queue_no);
-        if (vector >= msix_nr_vectors_allocated(dev)) {
-            continue;
-        }
-        /* If guest supports masking, clean up irqfd now.
-         * Otherwise, it was cleaned when masked in the frontend.
-         */
-        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
-        }
-        kvm_virtio_pci_vq_vector_release(proxy, vector);
+        kvm_virtio_pci_vector_release_one(proxy, queue_no);
    }
 }

-static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
+static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy)
+{
+    kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
+}
+
+static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
                                       unsigned int queue_no,
                                       unsigned int vector,
-                                       MSIMessage msg)
+                                       MSIMessage msg,
+                                       EventNotifier *n)
 {
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
-    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    VirtIOIRQFD *irqfd;
    int ret = 0;

@ -836,14 +883,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
            event_notifier_set(n);
        }
    } else {
-        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+        ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
    }
    return ret;
 }

-static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
+static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy,
                                             unsigned int queue_no,
-                                             unsigned int vector)
+                                             unsigned int vector,
+                                             EventNotifier *n)
 {
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
@ -854,7 +902,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
        k->guest_notifier_mask(vdev, queue_no, true);
    } else {
-        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+        kvm_virtio_pci_irqfd_release(proxy, n, vector);
    }
 }

@ -864,6 +912,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
+    EventNotifier *n;
    int ret, index, unmasked = 0;

    while (vq) {
@ -872,7 +921,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
            break;
        }
        if (index < proxy->nvqs_with_notifiers) {
-            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
+            n = virtio_queue_get_guest_notifier(vq);
+            ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n);
            if (ret < 0) {
                goto undo;
            }
@ -880,15 +930,24 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
        }
        vq = virtio_vector_next_queue(vq);
    }
-
+    /* unmask config intr */
+    n = virtio_config_get_guest_notifier(vdev);
+    ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector,
+                                       msg, n);
+    if (ret < 0) {
+        goto undo_config;
+    }
    return 0;
-
+undo_config:
+    n = virtio_config_get_guest_notifier(vdev);
+    virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
 undo:
    vq = virtio_vector_first_queue(vdev, vector);
    while (vq && unmasked >= 0) {
        index = virtio_get_queue_index(vq);
        if (index < proxy->nvqs_with_notifiers) {
-            virtio_pci_vq_vector_mask(proxy, index, vector);
+            n = virtio_queue_get_guest_notifier(vq);
+            virtio_pci_one_vector_mask(proxy, index, vector, n);
            --unmasked;
        }
        vq = virtio_vector_next_queue(vq);
@ -901,18 +960,22 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
+    EventNotifier *n;
    int index;

    while (vq) {
        index = virtio_get_queue_index(vq);
+        n = virtio_queue_get_guest_notifier(vq);
        if (!virtio_queue_get_num(vdev, index)) {
            break;
        }
        if (index < proxy->nvqs_with_notifiers) {
-            virtio_pci_vq_vector_mask(proxy, index, vector);
+            virtio_pci_one_vector_mask(proxy, index, vector, n);
        }
        vq = virtio_vector_next_queue(vq);
    }
+    n = virtio_config_get_guest_notifier(vdev);
+    virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
 }

 static void virtio_pci_vector_poll(PCIDevice *dev,
@ -925,19 +988,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev,
    int queue_no;
    unsigned int vector;
    EventNotifier *notifier;
-    VirtQueue *vq;
+    int ret;

    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
-        if (!virtio_queue_get_num(vdev, queue_no)) {
+        ret = virtio_pci_get_notifier(proxy, queue_no, &notifier, &vector);
+        if (ret < 0) {
            break;
        }
-        vector = virtio_queue_vector(vdev, queue_no);
        if (vector < vector_start || vector >= vector_end ||
            !msix_is_masked(dev, vector)) {
            continue;
        }
-        vq = virtio_get_queue(vdev, queue_no);
-        notifier = virtio_queue_get_guest_notifier(vq);
        if (k->guest_notifier_pending) {
            if (k->guest_notifier_pending(vdev, queue_no)) {
                msix_set_pending(dev, vector);
@ -946,6 +1007,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev,
            msix_set_pending(dev, vector);
        }
    }
+    /* poll the config intr */
+    ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, &notifier,
+                                  &vector);
+    if (ret < 0) {
+        return;
+    }
+    if (vector < vector_start || vector >= vector_end ||
+        !msix_is_masked(dev, vector)) {
+        return;
+    }
+    if (k->guest_notifier_pending) {
+        if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) {
+            msix_set_pending(dev, vector);
+        }
+    } else if (event_notifier_test_and_clear(notifier)) {
+        msix_set_pending(dev, vector);
+    }
+}
+
+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq,
+                                              int n, bool assign,
+                                              bool with_irqfd)
+{
+    if (n == VIRTIO_CONFIG_IRQ_IDX) {
+        virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+    } else {
+        virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd);
+    }
 }

 static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
@ -954,17 +1043,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
-    VirtQueue *vq = virtio_get_queue(vdev, n);
-    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+    VirtQueue *vq = NULL;
+    EventNotifier *notifier = NULL;
+
+    if (n == VIRTIO_CONFIG_IRQ_IDX) {
+        notifier = virtio_config_get_guest_notifier(vdev);
+    } else {
+        vq = virtio_get_queue(vdev, n);
+        notifier = virtio_queue_get_guest_notifier(vq);
+    }

    if (assign) {
        int r = event_notifier_init(notifier, 0);
        if (r < 0) {
            return r;
        }
-        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
+        virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd);
    } else {
-        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
+        virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false,
+                                                 with_irqfd);
        event_notifier_cleanup(notifier);
    }

@ -1006,6 +1103,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
        msix_unset_vector_notifiers(&proxy->pci_dev);
        if (proxy->vector_irqfd) {
            kvm_virtio_pci_vector_release(proxy, nvqs);
+            kvm_virtio_pci_vector_config_release(proxy);
            g_free(proxy->vector_irqfd);
            proxy->vector_irqfd = NULL;
        }
@ -1021,7 +1119,11 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
            goto assign_error;
        }
    }
-
+    r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign,
+                                      with_irqfd);
+    if (r < 0) {
+        goto config_assign_error;
+    }
    /* Must set vector notifier after guest notifier has been assigned */
    if ((with_irqfd || k->guest_notifier_mask) && assign) {
        if (with_irqfd) {
@ -1030,11 +1132,14 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
                          msix_nr_vectors_allocated(&proxy->pci_dev));
            r = kvm_virtio_pci_vector_use(proxy, nvqs);
            if (r < 0) {
-                goto assign_error;
+                goto config_assign_error;
            }
        }
-        r = msix_set_vector_notifiers(&proxy->pci_dev,
-                                      virtio_pci_vector_unmask,
+        r = kvm_virtio_pci_vector_config_use(proxy);
+        if (r < 0) {
+            goto config_error;
+        }
+        r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask,
                                      virtio_pci_vector_mask,
                                      virtio_pci_vector_poll);
        if (r < 0) {
@ -1049,7 +1154,11 @@ notifiers_error:
        assert(assign);
        kvm_virtio_pci_vector_release(proxy, nvqs);
    }
-
+config_error:
+    kvm_virtio_pci_vector_config_release(proxy);
+config_assign_error:
+    virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign,
+                                  with_irqfd);
 assign_error:
    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
    assert(assign);
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@ -251,5 +251,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
 * @fixed_queues.
 */
 unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
-
+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq,
+                                              int n, bool assign,
+                                              bool with_irqfd);
 #endif
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@ -885,6 +885,7 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
    if (vq->used_idx >= vq->vring.num) {
        vq->used_idx -= vq->vring.num;
        vq->used_wrap_counter ^= 1;
+        vq->signalled_used_valid = false;
    }
 }

@ -3493,7 +3494,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n)
        virtio_irq(vq);
    }
 }
+static void virtio_config_guest_notifier_read(EventNotifier *n)
+{
+    VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);

+    if (event_notifier_test_and_clear(n)) {
+        virtio_notify_config(vdev);
+    }
+}
 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool with_irqfd)
 {
@ -3510,6 +3518,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
    }
 }

+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
+                                                 bool assign, bool with_irqfd)
+{
+    EventNotifier *n;
+    n = &vdev->config_notifier;
+    if (assign && !with_irqfd) {
+        event_notifier_set_handler(n, virtio_config_guest_notifier_read);
+    } else {
+        event_notifier_set_handler(n, NULL);
+    }
+    if (!assign) {
+        /* Test and clear notifier before closing it,*/
+        /* in case poll callback didn't have time to run. */
+        virtio_config_guest_notifier_read(n);
+    }
+}
+
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
 {
    return &vq->guest_notifier;
@ -3583,6 +3608,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
    return &vq->host_notifier;
 }

+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
+{
+    return &vdev->config_notifier;
+}
+
 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
 {
    vq->host_notifier_enabled = enabled;
--- a/include/hw/firmware/smbios.h
+++ b/include/hw/firmware/smbios.h
@ -1,6 +1,8 @@
 #ifndef QEMU_SMBIOS_H
 #define QEMU_SMBIOS_H

+#include "qapi/qapi-types-machine.h"
+
 /*
 * SMBIOS Support
 *
@ -23,14 +25,6 @@ struct smbios_phys_mem_area {
    uint64_t length;
 };

-/*
- * SMBIOS spec defined tables
- */
-typedef enum SmbiosEntryPointType {
-    SMBIOS_ENTRY_POINT_21,
-    SMBIOS_ENTRY_POINT_30,
-} SmbiosEntryPointType;
-
 /* SMBIOS Entry Point
 * There are two types of entry points defined in the SMBIOS specification
 * (see below). BIOS must place the entry point(s) at a 16-byte-aligned
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@ -13,6 +13,7 @@
 #include "hw/hotplug.h"
 #include "qom/object.h"
 #include "hw/i386/sgx-epc.h"
+#include "hw/firmware/smbios.h"

 #define HPET_INTCAP "hpet-intcap"

@ -40,6 +41,7 @@ typedef struct PCMachineState {
    /* Configuration options: */
    uint64_t max_ram_below_4g;
    OnOffAuto vmport;
+    SmbiosEntryPointType smbios_entry_point_type;

    bool acpi_build_enabled;
    bool smbus_enabled;
@ -63,6 +65,8 @@ typedef struct PCMachineState {
 #define PC_MACHINE_SATA             "sata"
 #define PC_MACHINE_PIT              "pit"
 #define PC_MACHINE_MAX_FW_SIZE      "max-fw-size"
+#define PC_MACHINE_SMBIOS_EP        "smbios-entry-point-type"
+
 /**
 * PCMachineClass:
 *
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@ -735,6 +735,11 @@ void lsi53c8xx_handle_legacy_cmdline(DeviceState *lsi_dev);
 qemu_irq pci_allocate_irq(PCIDevice *pci_dev);
 void pci_set_irq(PCIDevice *pci_dev, int level);

+static inline int pci_intx(PCIDevice *pci_dev)
+{
+    return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1;
+}
+
 static inline void pci_irq_assert(PCIDevice *pci_dev)
 {
    pci_set_irq(pci_dev, 1);
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@ -126,6 +126,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id);

 typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);

+typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
+                                       int fd);
 typedef struct VhostOps {
    VhostBackendType backend_type;
    vhost_backend_init vhost_backend_init;
@ -171,6 +173,7 @@ typedef struct VhostOps {
    vhost_vq_get_addr_op  vhost_vq_get_addr;
    vhost_get_device_id_op vhost_get_device_id;
    vhost_force_iommu_op vhost_force_iommu;
+    vhost_set_config_call_op vhost_set_config_call;
 } VhostOps;

 int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@ -29,6 +29,7 @@ struct vhost_virtqueue {
    unsigned long long used_phys;
    unsigned used_size;
    EventNotifier masked_notifier;
+    EventNotifier masked_config_notifier;
    struct vhost_dev *dev;
 };

@ -37,6 +38,7 @@ typedef unsigned long vhost_log_chunk_t;
 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
 #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
 #define VHOST_INVALID_FEATURE_BIT   (0xff)
+#define VHOST_QUEUE_NUM_CONFIG_INR 0

 struct vhost_log {
    unsigned long long size;
@ -116,6 +118,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+bool vhost_config_pending(struct vhost_dev *hdev);
+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask);

 /* Test and clear masked event pending status.
 * Should be called after unmask to avoid losing events.
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@ -30,6 +30,8 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass,
 #define VIRTIO_MEM_REQUESTED_SIZE_PROP "requested-size"
 #define VIRTIO_MEM_BLOCK_SIZE_PROP "block-size"
 #define VIRTIO_MEM_ADDR_PROP "memaddr"
+#define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible"
+#define VIRTIO_MEM_PREALLOC_PROP "prealloc"

 struct VirtIOMEM {
    VirtIODevice parent_obj;
@ -62,6 +64,16 @@ struct VirtIOMEM {
    /* block size and alignment */
    uint64_t block_size;

+    /*
+     * Whether we indicate VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE to the guest.
+     * For !x86 targets this will always be "on" and consequently indicate
+     * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE.
+     */
+    OnOffAuto unplugged_inaccessible;
+
+    /* whether to prealloc memory when plugging new blocks */
+    bool prealloc;
+
    /* notifiers to notify when "size" changes */
    NotifierList size_change_notifiers;

--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@ -67,6 +67,9 @@ typedef struct VirtQueueElement

 #define VIRTIO_NO_VECTOR 0xffff

+/* special index value used internally for config irqs */
+#define VIRTIO_CONFIG_IRQ_IDX -1
+
 #define TYPE_VIRTIO_DEVICE "virtio-device"
 OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE)

@ -108,6 +111,7 @@ struct VirtIODevice
    bool use_guest_notifier_mask;
    AddressSpace *dma_as;
    QLIST_HEAD(, VirtQueue) *vector_queues;
+    EventNotifier config_notifier;
 };

 struct VirtioDeviceClass {
@ -310,11 +314,14 @@ uint16_t virtio_get_queue_index(VirtQueue *vq);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool with_irqfd);
+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
+                                                 bool assign, bool with_irqfd);
 int virtio_device_start_ioeventfd(VirtIODevice *vdev);
 int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
 void virtio_device_release_ioeventfd(VirtIODevice *vdev);
 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev);
 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled);
 void virtio_queue_host_notifier_read(EventNotifier *n);
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
 bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
                              int idx, bool mask);
+bool vhost_net_config_pending(VHostNetState *net);
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask);
 int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
 VHostNetState *get_vhost_net(NetClientState *nc);

--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@ -471,6 +471,11 @@ static inline void qemu_cleanup_generic_vfree(void *p)
 #else
 #define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
 #endif
+#ifdef MADV_POPULATE_WRITE
+#define QEMU_MADV_POPULATE_WRITE MADV_POPULATE_WRITE
+#else
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID
+#endif

 #elif defined(CONFIG_POSIX_MADVISE)

@ -484,6 +489,7 @@ static inline void qemu_cleanup_generic_vfree(void *p)
 #define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID

 #else /* no-op */

@ -497,6 +503,7 @@ static inline void qemu_cleanup_generic_vfree(void *p)
 #define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID

 #endif

--- a/include/standard-headers/linux/virtio_mem.h
+++ b/include/standard-headers/linux/virtio_mem.h
@ -68,9 +68,10 @@
 * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
 *
 * There are no guarantees what will happen if unplugged memory is
- * read/written. Such memory should, in general, not be touched. E.g.,
- * even writing might succeed, but the values will simply be discarded at
- * random points in time.
+ * read/written. In general, unplugged memory should not be touched, because
+ * the resulting action is undefined. There is one exception: without
+ * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, unplugged memory inside the usable
+ * region can be read, to simplify creation of memory dumps.
 *
 * It can happen that the device cannot process a request, because it is
 * busy. The device driver has to retry later.
@ -87,6 +88,8 @@

 /* node_id is an ACPI PXM and is valid */
 #define VIRTIO_MEM_F_ACPI_PXM		0
+/* unplugged memory must not be accessed */
+#define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE	1


 /* --- virtio-mem: guest -> host requests --- */
--- a/qapi/machine.json
+++ b/qapi/machine.json
@ -1568,3 +1568,15 @@
 { 'command': 'x-query-usb',
  'returns': 'HumanReadableText',
  'features': [ 'unstable' ] }
+
+##
+# @SmbiosEntryPointType:
+#
+# @32: SMBIOS version 2.1 (32-bit) Entry Point
+#
+# @64: SMBIOS version 3.0 (64-bit) Entry Point
+#
+# Since: 7.0
+##
+{ 'enum': 'SmbiosEntryPointType',
+  'data': [ '32', '64' ] }
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@ -352,6 +352,10 @@ static void qemu_init_sigbus(void)
 {
    struct sigaction action;

+    /*
+     * ALERT: when modifying this, take care that SIGBUS forwarding in
+     * os_mem_prealloc() will continue working as expected.
+     */
    memset(&action, 0, sizeof(action));
    action.sa_flags = SA_SIGINFO;
    action.sa_sigaction = sigbus_handler;
--- a/tests/data/acpi/q35/DSDT.tis.tpm12
+++ b/tests/data/acpi/q35/DSDT.tis.tpm12
--- a/tests/data/acpi/q35/DSDT.tis.tpm2
+++ b/tests/data/acpi/q35/DSDT.tis.tpm2
--- a/tests/data/acpi/q35/FACP.slic
+++ b/tests/data/acpi/q35/FACP.slic
--- a/tests/data/acpi/q35/SLIC.slic
+++ b/tests/data/acpi/q35/SLIC.slic
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@ -1502,6 +1502,20 @@ static void test_acpi_virt_viot(void)
    free_test_data(&data);
 }

+static void test_acpi_q35_slic(void)
+{
+    test_data data = {
+        .machine = MACHINE_Q35,
+        .variant = ".slic",
+    };
+
+    test_acpi_one("-acpitable sig=SLIC,oem_id='CRASH ',oem_table_id='ME',"
+                  "oem_rev=00002210,asl_compiler_id='qemu',"
+                  "asl_compiler_rev=00000000,data=/dev/null",
+                  &data);
+    free_test_data(&data);
+}
+
 static void test_oem_fields(test_data *data)
 {
    int i;
@ -1677,6 +1691,7 @@ int main(int argc, char *argv[])
            qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar);
        }
        qtest_add_func("acpi/q35/viot", test_acpi_q35_viot);
+        qtest_add_func("acpi/q35/slic", test_acpi_q35_slic);
    } else if (strcmp(arch, "aarch64") == 0) {
        if (has_tcg) {
            qtest_add_func("acpi/virt", test_acpi_virt_tcg);
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@ -35,11 +35,13 @@
 #include "sysemu/sysemu.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "qemu/thread.h"
 #include <libgen.h>
 #include "qemu/cutils.h"
 #include "qemu/compiler.h"
+#include "qemu/units.h"

 #ifdef CONFIG_LINUX
 #include <sys/syscall.h>
@ -73,22 +75,32 @@

 #define MAX_MEM_PREALLOC_THREAD_COUNT 16

+struct MemsetThread;
+
+typedef struct MemsetContext {
+    bool all_threads_created;
+    bool any_thread_failed;
+    struct MemsetThread *threads;
+    int num_threads;
+} MemsetContext;
+
 struct MemsetThread {
    char *addr;
    size_t numpages;
    size_t hpagesize;
    QemuThread pgthread;
    sigjmp_buf env;
+    MemsetContext *context;
 };
 typedef struct MemsetThread MemsetThread;

-static MemsetThread *memset_thread;
-static int memset_num_threads;
-static bool memset_thread_failed;
+/* used by sigbus_handler() */
+static MemsetContext *sigbus_memset_context;
+struct sigaction sigbus_oldact;
+static QemuMutex sigbus_mutex;

 static QemuMutex page_mutex;
 static QemuCond page_cond;
-static bool threads_created_flag;

 int qemu_get_thread_id(void)
 {
@ -436,22 +448,50 @@ const char *qemu_get_exec_dir(void)
    return exec_dir;
 }

+#ifdef CONFIG_LINUX
+static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
+#else /* CONFIG_LINUX */
 static void sigbus_handler(int signal)
+#endif /* CONFIG_LINUX */
 {
    int i;
-    if (memset_thread) {
-        for (i = 0; i < memset_num_threads; i++) {
-            if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
-                siglongjmp(memset_thread[i].env, 1);
+
+    if (sigbus_memset_context) {
+        for (i = 0; i < sigbus_memset_context->num_threads; i++) {
+            MemsetThread *thread = &sigbus_memset_context->threads[i];
+
+            if (qemu_thread_is_self(&thread->pgthread)) {
+                siglongjmp(thread->env, 1);
            }
        }
    }
+
+#ifdef CONFIG_LINUX
+    /*
+     * We assume that the MCE SIGBUS handler could have been registered. We
+     * should never receive BUS_MCEERR_AO on any of our threads, but only on
+     * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not
+     * receive BUS_MCEERR_AR triggered by action of other threads on one of
+     * our threads. So, no need to check for unrelated SIGBUS when seeing one
+     * for our threads.
+     *
+     * We will forward to the MCE handler, which will either handle the SIGBUS
+     * or reinstall the default SIGBUS handler and reraise the SIGBUS. The
+     * default SIGBUS handler will crash the process, so we don't care.
+     */
+    if (sigbus_oldact.sa_flags & SA_SIGINFO) {
+        sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
+        return;
+    }
+#endif /* CONFIG_LINUX */
+    warn_report("os_mem_prealloc: unrelated SIGBUS detected and ignored");
 }

 static void *do_touch_pages(void *arg)
 {
    MemsetThread *memset_args = (MemsetThread *)arg;
    sigset_t set, oldset;
+    int ret = 0;

    /*
     * On Linux, the page faults from the loop below can cause mmap_sem
@ -459,7 +499,7 @@ static void *do_touch_pages(void *arg)
     * clearing until all threads have been created.
     */
    qemu_mutex_lock(&page_mutex);
-    while(!threads_created_flag){
+    while (!memset_args->context->all_threads_created) {
        qemu_cond_wait(&page_cond, &page_mutex);
    }
    qemu_mutex_unlock(&page_mutex);
@ -470,7 +510,7 @@ static void *do_touch_pages(void *arg)
    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);

    if (sigsetjmp(memset_args->env, 1)) {
-        memset_thread_failed = true;
+        ret = -EFAULT;
    } else {
        char *addr = memset_args->addr;
        size_t numpages = memset_args->numpages;
@ -484,20 +524,37 @@ static void *do_touch_pages(void *arg)
             *
             * 'volatile' to stop compiler optimizing this away
             * to a no-op
-             *
-             * TODO: get a better solution from kernel so we
-             * don't need to write at all so we don't cause
-             * wear on the storage backing the region...
             */
            *(volatile char *)addr = *addr;
            addr += hpagesize;
        }
    }
    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
-    return NULL;
+    return (void *)(uintptr_t)ret;
 }

-static inline int get_memset_num_threads(int smp_cpus)
+static void *do_madv_populate_write_pages(void *arg)
+{
+    MemsetThread *memset_args = (MemsetThread *)arg;
+    const size_t size = memset_args->numpages * memset_args->hpagesize;
+    char * const addr = memset_args->addr;
+    int ret = 0;
+
+    /* See do_touch_pages(). */
+    qemu_mutex_lock(&page_mutex);
+    while (!memset_args->context->all_threads_created) {
+        qemu_cond_wait(&page_cond, &page_mutex);
+    }
+    qemu_mutex_unlock(&page_mutex);
+
+    if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
+        ret = -errno;
+    }
+    return (void *)(uintptr_t)ret;
+}
+
+static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
+                                         int smp_cpus)
 {
    long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
    int ret = 1;
@ -505,17 +562,27 @@ static inline int get_memset_num_threads(int smp_cpus)
    if (host_procs > 0) {
        ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
    }
+
+    /* Especially with gigantic pages, don't create more threads than pages. */
+    ret = MIN(ret, numpages);
+    /* Don't start threads to prealloc comparatively little memory. */
+    ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
+
    /* In case sysconf() fails, we fall back to single threaded */
    return ret;
 }

-static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
-                            int smp_cpus)
+static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
+                           int smp_cpus, bool use_madv_populate_write)
 {
    static gsize initialized = 0;
+    MemsetContext context = {
+        .num_threads = get_memset_num_threads(hpagesize, numpages, smp_cpus),
+    };
    size_t numpages_per_thread, leftover;
+    void *(*touch_fn)(void *);
+    int ret = 0, i = 0;
    char *addr = area;
-    int i = 0;

    if (g_once_init_enter(&initialized)) {
        qemu_mutex_init(&page_mutex);
@ -523,66 +590,121 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
        g_once_init_leave(&initialized, 1);
    }

-    memset_thread_failed = false;
-    threads_created_flag = false;
-    memset_num_threads = get_memset_num_threads(smp_cpus);
-    memset_thread = g_new0(MemsetThread, memset_num_threads);
-    numpages_per_thread = numpages / memset_num_threads;
-    leftover = numpages % memset_num_threads;
-    for (i = 0; i < memset_num_threads; i++) {
-        memset_thread[i].addr = addr;
-        memset_thread[i].numpages = numpages_per_thread + (i < leftover);
-        memset_thread[i].hpagesize = hpagesize;
-        qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
-                           do_touch_pages, &memset_thread[i],
+    if (use_madv_populate_write) {
+        /* Avoid creating a single thread for MADV_POPULATE_WRITE */
+        if (context.num_threads == 1) {
+            if (qemu_madvise(area, hpagesize * numpages,
+                             QEMU_MADV_POPULATE_WRITE)) {
+                return -errno;
+            }
+            return 0;
+        }
+        touch_fn = do_madv_populate_write_pages;
+    } else {
+        touch_fn = do_touch_pages;
+    }
+
+    context.threads = g_new0(MemsetThread, context.num_threads);
+    numpages_per_thread = numpages / context.num_threads;
+    leftover = numpages % context.num_threads;
+    for (i = 0; i < context.num_threads; i++) {
+        context.threads[i].addr = addr;
+        context.threads[i].numpages = numpages_per_thread + (i < leftover);
+        context.threads[i].hpagesize = hpagesize;
+        context.threads[i].context = &context;
+        qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
+                           touch_fn, &context.threads[i],
                           QEMU_THREAD_JOINABLE);
-        addr += memset_thread[i].numpages * hpagesize;
+        addr += context.threads[i].numpages * hpagesize;
+    }
+
+    if (!use_madv_populate_write) {
+        sigbus_memset_context = &context;
    }

    qemu_mutex_lock(&page_mutex);
-    threads_created_flag = true;
+    context.all_threads_created = true;
    qemu_cond_broadcast(&page_cond);
    qemu_mutex_unlock(&page_mutex);

-    for (i = 0; i < memset_num_threads; i++) {
-        qemu_thread_join(&memset_thread[i].pgthread);
-    }
-    g_free(memset_thread);
-    memset_thread = NULL;
+    for (i = 0; i < context.num_threads; i++) {
+        int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);

-    return memset_thread_failed;
+        if (tmp) {
+            ret = tmp;
+        }
+    }
+
+    if (!use_madv_populate_write) {
+        sigbus_memset_context = NULL;
+    }
+    g_free(context.threads);
+
+    return ret;
+}
+
+static bool madv_populate_write_possible(char *area, size_t pagesize)
+{
+    return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
+           errno != EINVAL;
 }

 void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
                     Error **errp)
 {
+    static gsize initialized;
    int ret;
-    struct sigaction act, oldact;
    size_t hpagesize = qemu_fd_getpagesize(fd);
    size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+    bool use_madv_populate_write;
+    struct sigaction act;

-    memset(&act, 0, sizeof(act));
-    act.sa_handler = &sigbus_handler;
-    act.sa_flags = 0;
+    /*
+     * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
+     * some special mappings, such as mapping /dev/mem.
+     */
+    use_madv_populate_write = madv_populate_write_possible(area, hpagesize);

-    ret = sigaction(SIGBUS, &act, &oldact);
-    if (ret) {
-        error_setg_errno(errp, errno,
-            "os_mem_prealloc: failed to install signal handler");
-        return;
+    if (!use_madv_populate_write) {
+        if (g_once_init_enter(&initialized)) {
+            qemu_mutex_init(&sigbus_mutex);
+            g_once_init_leave(&initialized, 1);
+        }
+
+        qemu_mutex_lock(&sigbus_mutex);
+        memset(&act, 0, sizeof(act));
+#ifdef CONFIG_LINUX
+        act.sa_sigaction = &sigbus_handler;
+        act.sa_flags = SA_SIGINFO;
+#else /* CONFIG_LINUX */
+        act.sa_handler = &sigbus_handler;
+        act.sa_flags = 0;
+#endif /* CONFIG_LINUX */
+
+        ret = sigaction(SIGBUS, &act, &sigbus_oldact);
+        if (ret) {
+            error_setg_errno(errp, errno,
+                "os_mem_prealloc: failed to install signal handler");
+            return;
+        }
    }

    /* touch pages simultaneously */
-    if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
-        error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
-            "pages available to allocate guest RAM");
+    ret = touch_all_pages(area, hpagesize, numpages, smp_cpus,
+                          use_madv_populate_write);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "os_mem_prealloc: preallocating memory failed");
    }

-    ret = sigaction(SIGBUS, &oldact, NULL);
-    if (ret) {
-        /* Terminate QEMU since it can't recover from error */
-        perror("os_mem_prealloc: failed to reinstall signal handler");
-        exit(1);
+    if (!use_madv_populate_write) {
+        ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
+        if (ret) {
+            /* Terminate QEMU since it can't recover from error */
+            perror("os_mem_prealloc: failed to reinstall signal handler");
+            exit(1);
+        }
+        qemu_mutex_unlock(&sigbus_mutex);
    }
 }