diff --git a/Makefile.objs b/Makefile.objs index 7f1f0a3ffd..6d5ddcfd3e 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -153,6 +153,7 @@ trace-events-y += hw/alpha/trace-events trace-events-y += ui/trace-events trace-events-y += audio/trace-events trace-events-y += net/trace-events +trace-events-y += target-i386/trace-events trace-events-y += target-sparc/trace-events trace-events-y += target-s390x/trace-events trace-events-y += target-ppc/trace-events diff --git a/block/mirror.c b/block/mirror.c index 75a5431325..69a1a7cc96 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -521,9 +521,6 @@ static void mirror_exit(BlockJob *job, void *opaque) block_job_completed(&s->common, data->ret); g_free(data); bdrv_drained_end(src); - if (qemu_get_aio_context() == bdrv_get_aio_context(src)) { - aio_enable_external(iohandler_get_aio_context()); - } bdrv_unref(src); } @@ -793,12 +790,6 @@ immediate_exit: /* Before we switch to target in mirror_exit, make sure data doesn't * change. */ bdrv_drained_begin(bs); - if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) { - /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the - * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we - * need a block layer API change to achieve this. */ - aio_disable_external(iohandler_get_aio_context()); - } block_job_defer_to_main_loop(&s->common, mirror_exit, data); } diff --git a/docs/virtio-migration.txt b/docs/virtio-migration.txt index cf66458b97..98a6b0ffb5 100644 --- a/docs/virtio-migration.txt +++ b/docs/virtio-migration.txt @@ -28,7 +28,8 @@ virtio core virtio transport virtio device ----------- ---------------- ------------- save() function registered - via register_savevm() + via VMState wrapper on + device class virtio_save() <---------- ------> save_config() - save proxy device @@ -63,7 +64,8 @@ virtio core virtio transport virtio device ----------- ---------------- ------------- load() function registered - via register_savevm() + via VMState wrapper on + device class virtio_load() <---------- ------> load_config() - load proxy device diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 494e85e029..009b43f6d0 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -97,14 +97,9 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config) g_free(cfg); } -static void virtio_9p_save(QEMUFile *f, void *opaque) +static int virtio_9p_load(QEMUFile *f, void *opaque, size_t size) { - virtio_save(VIRTIO_DEVICE(opaque), f); -} - -static int virtio_9p_load(QEMUFile *f, void *opaque, int version_id) -{ - return virtio_load(VIRTIO_DEVICE(opaque), f, version_id); + return virtio_load(VIRTIO_DEVICE(opaque), f, 1); } static void virtio_9p_device_realize(DeviceState *dev, Error **errp) @@ -120,7 +115,6 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp) v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag); virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size); v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output); - register_savevm(dev, "virtio-9p", -1, 1, virtio_9p_save, virtio_9p_load, v); out: return; @@ -133,7 +127,6 @@ static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp) V9fsState *s = &v->state; virtio_cleanup(vdev); - unregister_savevm(dev, "virtio-9p", v); v9fs_device_unrealize_common(s, errp); } @@ -175,6 +168,8 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, /* virtio-9p device */ +VMSTATE_VIRTIO_DEVICE(9p, 1, virtio_9p_load, virtio_vmstate_save); + static Property virtio_9p_properties[] = { DEFINE_PROP_STRING("mount_tag", V9fsVirtioState, state.fsconf.tag), DEFINE_PROP_STRING("fsdev", V9fsVirtioState, state.fsconf.fsdev_id), @@ -187,6 +182,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_9p_properties; + dc->vmsd = &vmstate_virtio_9p; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = virtio_9p_device_realize; vdc->unrealize = virtio_9p_device_unrealize; diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index 97721b535d..883db13f96 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -824,7 +824,6 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus, int i; dev = qdev_create(NULL, TYPE_TYPHOON_PCI_HOST_BRIDGE); - qdev_init_nofail(dev); s = TYPHOON_PCI_HOST_BRIDGE(dev); phb = PCI_HOST_BRIDGE(dev); @@ -889,6 +888,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus, &s->pchip.reg_mem, &s->pchip.reg_io, 0, 64, TYPE_PCI_BUS); phb->bus = b; + qdev_init_nofail(dev); /* Host memory as seen from the PCI side, via the IOMMU. */ memory_region_init_iommu(&s->pchip.iommu, OBJECT(s), &typhoon_iommu_ops, diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 357ff9081e..475a822f5a 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -798,7 +798,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) } } -static void virtio_blk_save(QEMUFile *f, void *opaque) +static void virtio_blk_save(QEMUFile *f, void *opaque, size_t size) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); @@ -823,15 +823,12 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) qemu_put_sbyte(f, 0); } -static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_blk_load(QEMUFile *f, void *opaque, size_t size) { VirtIOBlock *s = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(s); - if (version_id != 2) - return -EINVAL; - - return virtio_load(vdev, f, version_id); + return virtio_load(vdev, f, 2); } static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -880,7 +877,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) VirtIOBlock *s = VIRTIO_BLK(dev); VirtIOBlkConf *conf = &s->conf; Error *err = NULL; - static int virtio_blk_id; unsigned i; if (!conf->conf.blk) { @@ -914,7 +910,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; for (i = 0; i < conf->num_queues; i++) { - virtio_add_queue(vdev, 128, virtio_blk_handle_output); + virtio_add_queue_aio(vdev, 128, virtio_blk_handle_output); } virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); if (err != NULL) { @@ -924,8 +920,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); - register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, - virtio_blk_save, virtio_blk_load, s); blk_set_dev_ops(s->blk, &virtio_block_ops, s); blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size); @@ -940,7 +934,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) virtio_blk_data_plane_destroy(s->dataplane); s->dataplane = NULL; qemu_del_vm_change_state_handler(s->change); - unregister_savevm(dev, "virtio-blk", s); blockdev_mark_auto_del(s->blk); virtio_cleanup(vdev); } @@ -958,6 +951,8 @@ static void virtio_blk_instance_init(Object *obj) DEVICE(obj), NULL); } +VMSTATE_VIRTIO_DEVICE(blk, 2, virtio_blk_load, virtio_blk_save); + static Property virtio_blk_properties[] = { DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), @@ -979,6 +974,7 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_blk_properties; + dc->vmsd = &vmstate_virtio_blk; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = virtio_blk_device_realize; vdc->unrealize = virtio_blk_device_unrealize; diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 6e5de6dec2..db57a38546 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -594,12 +594,6 @@ static void vser_reset(VirtIODevice *vdev) guest_reset(vser); } -static void virtio_serial_save(QEMUFile *f, void *opaque) -{ - /* The virtio device */ - virtio_save(VIRTIO_DEVICE(opaque), f); -} - static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f) { VirtIOSerial *s = VIRTIO_SERIAL(vdev); @@ -685,7 +679,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque) s->post_load = NULL; } -static int fetch_active_ports_list(QEMUFile *f, int version_id, +static int fetch_active_ports_list(QEMUFile *f, VirtIOSerial *s, uint32_t nr_active_ports) { uint32_t i; @@ -702,6 +696,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, /* Items in struct VirtIOSerialPort */ for (i = 0; i < nr_active_ports; i++) { VirtIOSerialPort *port; + uint32_t elem_popped; uint32_t id; id = qemu_get_be32(f); @@ -714,37 +709,29 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, s->post_load->connected[i].port = port; s->post_load->connected[i].host_connected = qemu_get_byte(f); - if (version_id > 2) { - uint32_t elem_popped; + qemu_get_be32s(f, &elem_popped); + if (elem_popped) { + qemu_get_be32s(f, &port->iov_idx); + qemu_get_be64s(f, &port->iov_offset); - qemu_get_be32s(f, &elem_popped); - if (elem_popped) { - qemu_get_be32s(f, &port->iov_idx); - qemu_get_be64s(f, &port->iov_offset); + port->elem = + qemu_get_virtqueue_element(f, sizeof(VirtQueueElement)); - port->elem = - qemu_get_virtqueue_element(f, sizeof(VirtQueueElement)); - - /* - * Port was throttled on source machine. Let's - * unthrottle it here so data starts flowing again. - */ - virtio_serial_throttle_port(port, false); - } + /* + * Port was throttled on source machine. Let's + * unthrottle it here so data starts flowing again. + */ + virtio_serial_throttle_port(port, false); } } timer_mod(s->post_load->timer, 1); return 0; } -static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_serial_load(QEMUFile *f, void *opaque, size_t size) { - if (version_id > 3) { - return -EINVAL; - } - /* The virtio device */ - return virtio_load(VIRTIO_DEVICE(opaque), f, version_id); + return virtio_load(VIRTIO_DEVICE(opaque), f, 3); } static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -756,10 +743,6 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f, int ret; uint32_t tmp; - if (version_id < 2) { - return 0; - } - /* Unused */ qemu_get_be16s(f, (uint16_t *) &tmp); qemu_get_be16s(f, (uint16_t *) &tmp); @@ -781,7 +764,7 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f, qemu_get_be32s(f, &nr_active_ports); if (nr_active_ports) { - ret = fetch_active_ports_list(f, version_id, s, nr_active_ports); + ret = fetch_active_ports_list(f, s, nr_active_ports); if (ret) { return ret; } @@ -1049,13 +1032,6 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp) vser->post_load = NULL; - /* - * Register for the savevm section with the virtio-console name - * to preserve backward compat - */ - register_savevm(dev, "virtio-console", -1, 3, virtio_serial_save, - virtio_serial_load, vser); - QLIST_INSERT_HEAD(&vserdevices.devices, vser, next); } @@ -1086,8 +1062,6 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp) QLIST_REMOVE(vser, next); - unregister_savevm(dev, "virtio-console", vser); - g_free(vser->ivqs); g_free(vser->ovqs); g_free(vser->ports_map); @@ -1100,6 +1074,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp) virtio_cleanup(vdev); } +/* Note: 'console' is used for backwards compatibility */ +VMSTATE_VIRTIO_DEVICE(console, 3, virtio_serial_load, virtio_vmstate_save); + static Property virtio_serial_properties[] = { DEFINE_PROP_UINT32("max_ports", VirtIOSerial, serial.max_virtserial_ports, 31), @@ -1115,6 +1092,7 @@ static void virtio_serial_class_init(ObjectClass *klass, void *data) QLIST_INIT(&vserdevices.devices); dc->props = virtio_serial_properties; + dc->vmsd = &vmstate_virtio_console; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); vdc->realize = virtio_serial_device_realize; vdc->unrealize = virtio_serial_device_unrealize; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 929c3c8b8a..7fe6ed8bf0 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -19,6 +19,7 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-gpu.h" #include "hw/virtio/virtio-bus.h" +#include "migration/migration.h" #include "qemu/log.h" #include "qapi/error.h" @@ -986,12 +987,7 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = { }, }; -static const VMStateDescription vmstate_virtio_gpu_unmigratable = { - .name = "virtio-gpu-with-virgl", - .unmigratable = 1, -}; - -static void virtio_gpu_save(QEMUFile *f, void *opaque) +static void virtio_gpu_save(QEMUFile *f, void *opaque, size_t size) { VirtIOGPU *g = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(g); @@ -1021,7 +1017,7 @@ static void virtio_gpu_save(QEMUFile *f, void *opaque) vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL); } -static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size) { VirtIOGPU *g = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(g); @@ -1030,11 +1026,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id) uint32_t resource_id, pformat; int i, ret; - if (version_id != VIRTIO_GPU_VM_VERSION) { - return -EINVAL; - } - - ret = virtio_load(vdev, f, version_id); + ret = virtio_load(vdev, f, VIRTIO_GPU_VM_VERSION); if (ret) { return ret; } @@ -1169,10 +1161,17 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) } if (virtio_gpu_virgl_enabled(g->conf)) { - vmstate_register(qdev, -1, &vmstate_virtio_gpu_unmigratable, g); - } else { - register_savevm(qdev, "virtio-gpu", -1, VIRTIO_GPU_VM_VERSION, - virtio_gpu_save, virtio_gpu_load, g); + error_setg(&g->migration_blocker, "virgl is not yet migratable"); + migrate_add_blocker(g->migration_blocker); + } +} + +static void virtio_gpu_device_unrealize(DeviceState *qdev, Error **errp) +{ + VirtIOGPU *g = VIRTIO_GPU(qdev); + if (g->migration_blocker) { + migrate_del_blocker(g->migration_blocker); + error_free(g->migration_blocker); } } @@ -1220,6 +1219,9 @@ static void virtio_gpu_reset(VirtIODevice *vdev) #endif } +VMSTATE_VIRTIO_DEVICE(gpu, VIRTIO_GPU_VM_VERSION, virtio_gpu_load, + virtio_gpu_save); + static Property virtio_gpu_properties[] = { DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1), #ifdef CONFIG_VIRGL @@ -1237,6 +1239,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); vdc->realize = virtio_gpu_device_realize; + vdc->unrealize = virtio_gpu_device_unrealize; vdc->get_config = virtio_gpu_get_config; vdc->set_config = virtio_gpu_set_config; vdc->get_features = virtio_gpu_get_features; @@ -1245,6 +1248,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data) vdc->reset = virtio_gpu_reset; dc->props = virtio_gpu_properties; + dc->vmsd = &vmstate_virtio_gpu; } static const TypeInfo virtio_gpu_info = { diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index b52d5b8756..90e94ffefd 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -2,7 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ obj-y += multiboot.o obj-y += pc.o pc_piix.o pc_q35.o obj-y += pc_sysfw.o -obj-y += intel_iommu.o +obj-y += x86-iommu.o intel_iommu.o obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index fbba461a87..77c40d92e2 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -52,13 +52,14 @@ #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/q35.h" -#include "hw/i386/intel_iommu.h" +#include "hw/i386/x86-iommu.h" #include "hw/timer/hpet.h" #include "hw/acpi/aml-build.h" #include "qapi/qmp/qint.h" #include "qom/qom-qobject.h" +#include "hw/i386/x86-iommu.h" #include "hw/acpi/ipmi.h" @@ -80,6 +81,9 @@ #define ACPI_BUILD_DPRINTF(fmt, ...) #endif +/* Default IOAPIC ID */ +#define ACPI_BUILD_IOAPIC_ID 0x0 + typedef struct AcpiMcfgInfo { uint64_t mcfg_base; uint32_t mcfg_size; @@ -383,7 +387,6 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) io_apic = acpi_data_push(table_data, sizeof *io_apic); io_apic->type = ACPI_APIC_IO; io_apic->length = sizeof(*io_apic); -#define ACPI_BUILD_IOAPIC_ID 0x0 io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); io_apic->interrupt = cpu_to_le32(0); @@ -2454,6 +2457,10 @@ build_mcfg_q35(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info) build_header(linker, table_data, (void *)mcfg, sig, len, 1, NULL, NULL); } +/* + * VT-d spec 8.1 DMA Remapping Reporting Structure + * (version Oct. 2014 or later) + */ static void build_dmar_q35(GArray *table_data, BIOSLinker *linker) { @@ -2461,19 +2468,38 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) AcpiTableDmar *dmar; AcpiDmarHardwareUnit *drhd; + uint8_t dmar_flags = 0; + X86IOMMUState *iommu = x86_iommu_get_default(); + AcpiDmarDeviceScope *scope = NULL; + /* Root complex IOAPIC use one path[0] only */ + size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]); + + assert(iommu); + if (iommu->intr_supported) { + dmar_flags |= 0x1; /* Flags: 0x1: INT_REMAP */ + } dmar = acpi_data_push(table_data, sizeof(*dmar)); dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; - dmar->flags = 0; /* No intr_remap for now */ + dmar->flags = dmar_flags; /* DMAR Remapping Hardware Unit Definition structure */ - drhd = acpi_data_push(table_data, sizeof(*drhd)); + drhd = acpi_data_push(table_data, sizeof(*drhd) + ioapic_scope_size); drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); - drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ + drhd->length = cpu_to_le16(sizeof(*drhd) + ioapic_scope_size); drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; drhd->pci_segment = cpu_to_le16(0); drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + /* Scope definition for the root-complex IOAPIC. See VT-d spec + * 8.3.1 (version Oct. 2014 or later). */ + scope = &drhd->scope[0]; + scope->entry_type = 0x03; /* Type: 0x03 for IOAPIC */ + scope->length = ioapic_scope_size; + scope->enumeration_id = ACPI_BUILD_IOAPIC_ID; + scope->bus = Q35_PSEUDO_BUS_PLATFORM; + scope->path[0] = cpu_to_le16(Q35_PSEUDO_DEVFN_IOAPIC); + build_header(linker, table_data, (void *)(table_data->data + dmar_start), "DMAR", table_data->len - dmar_start, 1, NULL, NULL); } @@ -2539,12 +2565,7 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) static bool acpi_has_iommu(void) { - bool ambiguous; - Object *intel_iommu; - - intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, - &ambiguous); - return intel_iommu && !ambiguous; + return !!x86_iommu_get_default(); } static diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 464f2a0518..28c31a2cdf 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -20,18 +20,23 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "hw/sysbus.h" #include "exec/address-spaces.h" #include "intel_iommu_internal.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/i386/pc.h" +#include "hw/boards.h" +#include "hw/i386/x86-iommu.h" +#include "hw/pci-host/q35.h" +#include "sysemu/kvm.h" /*#define DEBUG_INTEL_IOMMU*/ #ifdef DEBUG_INTEL_IOMMU enum { DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, - DEBUG_CACHE, + DEBUG_CACHE, DEBUG_IR, }; #define VTD_DBGBIT(x) (1 << DEBUG_##x) static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); @@ -192,7 +197,7 @@ static void vtd_reset_context_cache(IntelIOMMUState *s) VTD_DPRINTF(CACHE, "global context_cache_gen=1"); while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { - for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { + for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { vtd_as = vtd_bus->dev_as[devfn_it]; if (!vtd_as) { continue; @@ -901,6 +906,27 @@ static void vtd_root_table_setup(IntelIOMMUState *s) (s->root_extended ? "(extended)" : "")); } +static void vtd_iec_notify_all(IntelIOMMUState *s, bool global, + uint32_t index, uint32_t mask) +{ + x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask); +} + +static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) +{ + uint64_t value = 0; + value = vtd_get_quad_raw(s, DMAR_IRTA_REG); + s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); + s->intr_root = value & VTD_IRTA_ADDR_MASK; + s->intr_eime = value & VTD_IRTA_EIME; + + /* Notify global invalidation */ + vtd_iec_notify_all(s, true, 0, 0); + + VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32, + s->intr_root, s->intr_size); +} + static void vtd_context_global_invalidate(IntelIOMMUState *s) { s->context_cache_gen++; @@ -964,7 +990,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); if (vtd_bus) { devfn = VTD_SID_TO_DEVFN(source_id); - for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { + for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { vtd_as = vtd_bus->dev_as[devfn_it]; if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16, @@ -1139,6 +1165,16 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); } +/* Set Interrupt Remap Table Pointer */ +static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) +{ + VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer"); + + vtd_interrupt_remap_table_setup(s); + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); +} + /* Handle Translation Enable/Disable */ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) { @@ -1158,6 +1194,22 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) } } +/* Handle Interrupt Remap Enable/Disable */ +static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) +{ + VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off")); + + if (en) { + s->intr_enabled = true; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES); + } else { + s->intr_enabled = false; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0); + } +} + /* Handle write to Global Command Register */ static void vtd_handle_gcmd_write(IntelIOMMUState *s) { @@ -1178,6 +1230,14 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) /* Queued Invalidation Enable */ vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); } + if (val & VTD_GCMD_SIRTP) { + /* Set/update the interrupt remapping root-table pointer */ + vtd_handle_gcmd_sirtp(s); + } + if (changed & VTD_GCMD_IRE) { + /* Interrupt remap enable/disable */ + vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE); + } } /* Handle write to Context Command Register */ @@ -1363,6 +1423,21 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) return true; } +static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d", + inv_desc->iec.granularity, + inv_desc->iec.index, + inv_desc->iec.index_mask); + + vtd_iec_notify_all(s, !inv_desc->iec.granularity, + inv_desc->iec.index, + inv_desc->iec.index_mask); + + return true; +} + static bool vtd_process_inv_desc(IntelIOMMUState *s) { VTDInvDesc inv_desc; @@ -1402,6 +1477,15 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_IEC: + VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache " + "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, + inv_desc.hi, inv_desc.lo); + if (!vtd_process_inv_iec_desc(s, &inv_desc)) { + return false; + } + break; + default: VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type " "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, @@ -1830,6 +1914,23 @@ static void vtd_mem_write(void *opaque, hwaddr addr, vtd_update_fsts_ppf(s); break; + case DMAR_IRTA_REG: + VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + break; + + case DMAR_IRTA_REG_HI: + VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + default: VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 ", size %d, val 0x%"PRIx64, addr, size, val); @@ -1907,6 +2008,295 @@ static Property vtd_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +/* Read IRTE entry with specific index */ +static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, + VTD_IR_TableEntry *entry, uint16_t sid) +{ + static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \ + {0xffff, 0xfffb, 0xfff9, 0xfff8}; + dma_addr_t addr = 0x00; + uint16_t mask, source_id; + uint8_t bus, bus_max, bus_min; + + addr = iommu->intr_root + index * sizeof(*entry); + if (dma_memory_read(&address_space_memory, addr, entry, + sizeof(*entry))) { + VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64 + " + %"PRIu16, iommu->intr_root, index); + return -VTD_FR_IR_ROOT_INVAL; + } + + if (!entry->irte.present) { + VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE" + " entry index %u value 0x%"PRIx64 " 0x%"PRIx64, + index, le64_to_cpu(entry->data[1]), + le64_to_cpu(entry->data[0])); + return -VTD_FR_IR_ENTRY_P; + } + + if (entry->irte.__reserved_0 || entry->irte.__reserved_1 || + entry->irte.__reserved_2) { + VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16 + " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64, + index, le64_to_cpu(entry->data[1]), + le64_to_cpu(entry->data[0])); + return -VTD_FR_IR_IRTE_RSVD; + } + + if (sid != X86_IOMMU_SID_INVALID) { + /* Validate IRTE SID */ + source_id = le32_to_cpu(entry->irte.source_id); + switch (entry->irte.sid_vtype) { + case VTD_SVT_NONE: + VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index); + break; + + case VTD_SVT_ALL: + mask = vtd_svt_mask[entry->irte.sid_q]; + if ((source_id & mask) != (sid & mask)) { + VTD_DPRINTF(GENERAL, "SID validation for IRTE index " + "%d failed (reqid 0x%04x sid 0x%04x)", index, + sid, source_id); + return -VTD_FR_IR_SID_ERR; + } + break; + + case VTD_SVT_BUS: + bus_max = source_id >> 8; + bus_min = source_id & 0xff; + bus = sid >> 8; + if (bus > bus_max || bus < bus_min) { + VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d " + "failed (bus %d outside %d-%d)", index, bus, + bus_min, bus_max); + return -VTD_FR_IR_SID_ERR; + } + break; + + default: + VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index " + "%d", entry->irte.sid_vtype, index); + /* Take this as verification failure. */ + return -VTD_FR_IR_SID_ERR; + break; + } + } + + return 0; +} + +/* Fetch IRQ information of specific IR index */ +static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, + VTDIrq *irq, uint16_t sid) +{ + VTD_IR_TableEntry irte = {}; + int ret = 0; + + ret = vtd_irte_get(iommu, index, &irte, sid); + if (ret) { + return ret; + } + + irq->trigger_mode = irte.irte.trigger_mode; + irq->vector = irte.irte.vector; + irq->delivery_mode = irte.irte.delivery_mode; + irq->dest = le32_to_cpu(irte.irte.dest_id); + if (!iommu->intr_eime) { +#define VTD_IR_APIC_DEST_MASK (0xff00ULL) +#define VTD_IR_APIC_DEST_SHIFT (8) + irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >> + VTD_IR_APIC_DEST_SHIFT; + } + irq->dest_mode = irte.irte.dest_mode; + irq->redir_hint = irte.irte.redir_hint; + + VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," + "deliver:%u,dest:%u,dest_mode:%u", index, + irq->trigger_mode, irq->vector, irq->delivery_mode, + irq->dest, irq->dest_mode); + + return 0; +} + +/* Generate one MSI message from VTDIrq info */ +static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out) +{ + VTD_MSIMessage msg = {}; + + /* Generate address bits */ + msg.dest_mode = irq->dest_mode; + msg.redir_hint = irq->redir_hint; + msg.dest = irq->dest; + msg.__addr_head = cpu_to_le32(0xfee); + /* Keep this from original MSI address bits */ + msg.__not_used = irq->msi_addr_last_bits; + + /* Generate data bits */ + msg.vector = irq->vector; + msg.delivery_mode = irq->delivery_mode; + msg.level = 1; + msg.trigger_mode = irq->trigger_mode; + + msg_out->address = msg.msi_addr; + msg_out->data = msg.msi_data; +} + +/* Interrupt remapping for MSI/MSI-X entry */ +static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, + MSIMessage *origin, + MSIMessage *translated, + uint16_t sid) +{ + int ret = 0; + VTD_IR_MSIAddress addr; + uint16_t index; + VTDIrq irq = {}; + + assert(origin && translated); + + if (!iommu || !iommu->intr_enabled) { + goto do_not_translate; + } + + if (origin->address & VTD_MSI_ADDR_HI_MASK) { + VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero" + " during interrupt remapping: 0x%"PRIx32, + (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \ + VTD_MSI_ADDR_HI_SHIFT)); + return -VTD_FR_IR_REQ_RSVD; + } + + addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; + if (le16_to_cpu(addr.addr.__head) != 0xfee) { + VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: " + "0x%"PRIx32, addr.data); + return -VTD_FR_IR_REQ_RSVD; + } + + /* This is compatible mode. */ + if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { + goto do_not_translate; + } + + index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l); + +#define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff) +#define VTD_IR_MSI_DATA_RESERVED (0xffff0000) + + if (addr.addr.sub_valid) { + /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */ + index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE; + } + + ret = vtd_remap_irq_get(iommu, index, &irq, sid); + if (ret) { + return ret; + } + + if (addr.addr.sub_valid) { + VTD_DPRINTF(IR, "received MSI interrupt"); + if (origin->data & VTD_IR_MSI_DATA_RESERVED) { + VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for " + "interrupt remappable entry: 0x%"PRIx32, + origin->data); + return -VTD_FR_IR_REQ_RSVD; + } + } else { + uint8_t vector = origin->data & 0xff; + VTD_DPRINTF(IR, "received IOAPIC interrupt"); + /* IOAPIC entry vector should be aligned with IRTE vector + * (see vt-d spec 5.1.5.1). */ + if (vector != irq.vector) { + VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: " + "entry: %d, IRTE: %d, index: %d", + vector, irq.vector, index); + } + } + + /* + * We'd better keep the last two bits, assuming that guest OS + * might modify it. Keep it does not hurt after all. + */ + irq.msi_addr_last_bits = addr.addr.__not_care; + + /* Translate VTDIrq to MSI message */ + vtd_generate_msi_message(&irq, translated); + + VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> " + "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data, + translated->address, translated->data); + return 0; + +do_not_translate: + memcpy(translated, origin, sizeof(*origin)); + return 0; +} + +static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src, + MSIMessage *dst, uint16_t sid) +{ + return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), + src, dst, sid); +} + +static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + return MEMTX_OK; +} + +static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + int ret = 0; + MSIMessage from = {}, to = {}; + uint16_t sid = X86_IOMMU_SID_INVALID; + + from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST; + from.data = (uint32_t) value; + + if (!attrs.unspecified) { + /* We have explicit Source ID */ + sid = attrs.requester_id; + } + + ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid); + if (ret) { + /* TODO: report error */ + VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64 + " data 0x%"PRIx32, from.address, from.data); + /* Drop this interrupt */ + return MEMTX_ERROR; + } + + VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32 + " for device sid 0x%04x", + to.address, to.data, sid); + + if (dma_memory_write(&address_space_memory, to.address, + &to.data, size)) { + VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64 + " value 0x%"PRIx32, to.address, to.data); + } + + return MEMTX_OK; +} + +static const MemoryRegionOps vtd_mem_ir_ops = { + .read_with_attrs = vtd_mem_ir_read, + .write_with_attrs = vtd_mem_ir_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) { @@ -1916,7 +2306,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) if (!vtd_bus) { /* No corresponding free() */ - vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX); + vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \ + X86_IOMMU_PCI_DEVFN_MAX); vtd_bus->bus = bus; key = (uintptr_t)bus; g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus); @@ -1933,6 +2324,11 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) vtd_dev_as->context_cache_entry.context_cache_gen = 0; memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), &s->iommu_ops, "intel_iommu", UINT64_MAX); + memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s), + &vtd_mem_ir_ops, s, "intel_iommu_ir", + VTD_INTERRUPT_ADDR_SIZE); + memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST, + &vtd_dev_as->iommu_ir); address_space_init(&vtd_dev_as->as, &vtd_dev_as->iommu, "intel_iommu"); } @@ -1944,6 +2340,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) */ static void vtd_init(IntelIOMMUState *s) { + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + memset(s->csr, 0, DMAR_REG_SIZE); memset(s->wmask, 0, DMAR_REG_SIZE); memset(s->w1cmask, 0, DMAR_REG_SIZE); @@ -1965,6 +2363,10 @@ static void vtd_init(IntelIOMMUState *s) VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + if (x86_iommu->intr_supported) { + s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM | VTD_ECAP_MHMV; + } + vtd_reset_context_cache(s); vtd_reset_iotlb(s); @@ -2014,6 +2416,11 @@ static void vtd_init(IntelIOMMUState *s) /* Fault Recording Registers, 128-bit */ vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0); vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); + + /* + * Interrupt remapping registers. + */ + vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0); } /* Should not reset address_spaces when reset because devices will still use @@ -2032,7 +2439,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) IntelIOMMUState *s = opaque; VTDAddressSpace *vtd_as; - assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX); + assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX); vtd_as = vtd_find_add_as(s, bus, devfn); return &vtd_as->as; @@ -2040,8 +2447,10 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) static void vtd_realize(DeviceState *dev, Error **errp) { - PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus; + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); VTD_DPRINTF(GENERAL, ""); memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); @@ -2056,22 +2465,34 @@ static void vtd_realize(DeviceState *dev, Error **errp) vtd_init(s); sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); pci_setup_iommu(bus, vtd_host_dma_iommu, dev); + /* Pseudo address space under root PCI bus. */ + pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); + + /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */ + if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() && + !kvm_irqchip_is_split()) { + error_report("Intel Interrupt Remapping cannot work with " + "kernel-irqchip=on, please use 'split|off'."); + exit(1); + } } static void vtd_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass); dc->reset = vtd_reset; - dc->realize = vtd_realize; dc->vmsd = &vtd_vmstate; dc->props = vtd_properties; dc->hotpluggable = false; + x86_class->realize = vtd_realize; + x86_class->int_remap = vtd_int_remap; } static const TypeInfo vtd_info = { .name = TYPE_INTEL_IOMMU_DEVICE, - .parent = TYPE_SYS_BUS_DEVICE, + .parent = TYPE_X86_IOMMU_DEVICE, .instance_size = sizeof(IntelIOMMUState), .class_init = vtd_class_init, }; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e5f514c6e3..0829a5064f 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -110,6 +110,8 @@ /* Interrupt Address Range */ #define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL #define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL +#define VTD_INTERRUPT_ADDR_SIZE (VTD_INTERRUPT_ADDR_LAST - \ + VTD_INTERRUPT_ADDR_FIRST + 1) /* The shift of source_id in the key of IOTLB hash table */ #define VTD_IOTLB_SID_SHIFT 36 @@ -172,10 +174,19 @@ #define VTD_RTADDR_RTT (1ULL << 11) #define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +/* IRTA_REG */ +#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IRTA_EIME (1ULL << 11) +#define VTD_IRTA_SIZE_MASK (0xfULL) + /* ECAP_REG */ /* (offset >> 4) << 8 */ #define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4) #define VTD_ECAP_QI (1ULL << 1) +/* Interrupt Remapping support */ +#define VTD_ECAP_IR (1ULL << 3) +#define VTD_ECAP_EIM (1ULL << 4) +#define VTD_ECAP_MHMV (15ULL << 20) /* CAP_REG */ /* (offset >> 4) << 24 */ @@ -265,6 +276,19 @@ typedef enum VTDFaultReason { * context-entry. */ VTD_FR_CONTEXT_ENTRY_TT, + + /* Interrupt remapping transition faults */ + VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved + * fields set */ + VTD_FR_IR_INDEX_OVER = 0x21, /* Index value greater than max */ + VTD_FR_IR_ENTRY_P = 0x22, /* Present (P) not set in IRTE */ + VTD_FR_IR_ROOT_INVAL = 0x23, /* IR Root table invalid */ + VTD_FR_IR_IRTE_RSVD = 0x24, /* IRTE Rsvd field non-zero with + * Present flag set */ + VTD_FR_IR_REQ_COMPAT = 0x25, /* Encountered compatible IR + * request while disabled */ + VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ + /* This is not a normal fault reason. We use this to indicate some faults * that are not referenced by the VT-d specification. * Fault event with such reason should not be recorded. @@ -275,17 +299,35 @@ typedef enum VTDFaultReason { #define VTD_CONTEXT_CACHE_GEN_MAX 0xffffffffUL -/* Queued Invalidation Descriptor */ -struct VTDInvDesc { - uint64_t lo; - uint64_t hi; +/* Interrupt Entry Cache Invalidation Descriptor: VT-d 6.5.2.7. */ +struct VTDInvDescIEC { + uint32_t type:4; /* Should always be 0x4 */ + uint32_t granularity:1; /* If set, it's global IR invalidation */ + uint32_t resved_1:22; + uint32_t index_mask:5; /* 2^N for continuous int invalidation */ + uint32_t index:16; /* Start index to invalidate */ + uint32_t reserved_2:16; }; -typedef struct VTDInvDesc VTDInvDesc; +typedef struct VTDInvDescIEC VTDInvDescIEC; + +/* Queued Invalidation Descriptor */ +union VTDInvDesc { + struct { + uint64_t lo; + uint64_t hi; + }; + union { + VTDInvDescIEC iec; + }; +}; +typedef union VTDInvDesc VTDInvDesc; /* Masks for struct VTDInvDesc */ #define VTD_INV_DESC_TYPE 0xf #define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */ #define VTD_INV_DESC_IOTLB 0x2 +#define VTD_INV_DESC_IEC 0x4 /* Interrupt Entry Cache + Invalidate Descriptor */ #define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ #define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index 1a429e5402..8238fbc630 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -974,10 +974,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) } if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) { - MSIMessage msg = msi_get_message(pci_dev, 0); int virq; - virq = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev); + virq = kvm_irqchip_add_msi_route(kvm_state, 0, pci_dev); if (virq < 0) { perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route"); return; @@ -1016,6 +1015,7 @@ static void assigned_dev_update_msi_msg(PCIDevice *pci_dev) kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0], msi_get_message(pci_dev, 0), pci_dev); + kvm_irqchip_commit_routes(kvm_state); } static bool assigned_dev_msix_masked(MSIXTableEntry *entry) @@ -1042,7 +1042,6 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) uint16_t entries_nr = 0; int i, r = 0; MSIXTableEntry *entry = adev->msix_table; - MSIMessage msg; /* Get the usable entry number for allocating */ for (i = 0; i < adev->msix_max; i++, entry++) { @@ -1079,9 +1078,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) continue; } - msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32); - msg.data = entry->data; - r = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev); + r = kvm_irqchip_add_msi_route(kvm_state, i, pci_dev); if (r < 0) { return r; } @@ -1606,6 +1603,7 @@ static void assigned_dev_msix_mmio_write(void *opaque, hwaddr addr, if (ret) { error_report("Error updating irq routing entry (%d)", ret); } + kvm_irqchip_commit_routes(kvm_state); } } } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index ac7a4d5a47..9e3c70fb23 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1473,6 +1473,9 @@ void pc_memory_init(PCMachineState *pcms, rom_add_option(option_rom[i].name, option_rom[i].bootindex); } pcms->fw_cfg = fw_cfg; + + /* Init default IOAPIC address space */ + pcms->ioapic_as = &address_space_memory; } qemu_irq pc_allocate_cpu_irq(void) diff --git a/hw/i386/trace-events b/hw/i386/trace-events index ea77bc24c3..b4882c1157 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -10,3 +10,6 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad # hw/i386/pc.c mhp_pc_dimm_assigned_slot(int slot) "0x%d" mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64 + +# hw/i386/x86-iommu.c +x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32 diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c new file mode 100644 index 0000000000..ce26b2a71d --- /dev/null +++ b/hw/i386/x86-iommu.c @@ -0,0 +1,128 @@ +/* + * QEMU emulation of common X86 IOMMU + * + * Copyright (C) 2016 Peter Xu, Red Hat + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "hw/boards.h" +#include "hw/i386/x86-iommu.h" +#include "qemu/error-report.h" +#include "trace.h" + +void x86_iommu_iec_register_notifier(X86IOMMUState *iommu, + iec_notify_fn fn, void *data) +{ + IEC_Notifier *notifier = g_new0(IEC_Notifier, 1); + + notifier->iec_notify = fn; + notifier->private = data; + + QLIST_INSERT_HEAD(&iommu->iec_notifiers, notifier, list); +} + +void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global, + uint32_t index, uint32_t mask) +{ + IEC_Notifier *notifier; + + trace_x86_iommu_iec_notify(global, index, mask); + + QLIST_FOREACH(notifier, &iommu->iec_notifiers, list) { + if (notifier->iec_notify) { + notifier->iec_notify(notifier->private, global, + index, mask); + } + } +} + +/* Default X86 IOMMU device */ +static X86IOMMUState *x86_iommu_default = NULL; + +static void x86_iommu_set_default(X86IOMMUState *x86_iommu) +{ + assert(x86_iommu); + + if (x86_iommu_default) { + error_report("QEMU does not support multiple vIOMMUs " + "for x86 yet."); + exit(1); + } + + x86_iommu_default = x86_iommu; +} + +X86IOMMUState *x86_iommu_get_default(void) +{ + return x86_iommu_default; +} + +static void x86_iommu_realize(DeviceState *dev, Error **errp) +{ + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); + X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev); + QLIST_INIT(&x86_iommu->iec_notifiers); + if (x86_class->realize) { + x86_class->realize(dev, errp); + } + x86_iommu_set_default(X86_IOMMU_DEVICE(dev)); +} + +static void x86_iommu_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = x86_iommu_realize; +} + +static bool x86_iommu_intremap_prop_get(Object *o, Error **errp) +{ + X86IOMMUState *s = X86_IOMMU_DEVICE(o); + return s->intr_supported; +} + +static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp) +{ + X86IOMMUState *s = X86_IOMMU_DEVICE(o); + s->intr_supported = value; +} + +static void x86_iommu_instance_init(Object *o) +{ + X86IOMMUState *s = X86_IOMMU_DEVICE(o); + + /* By default, do not support IR */ + s->intr_supported = false; + object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get, + x86_iommu_intremap_prop_set, NULL); +} + +static const TypeInfo x86_iommu_info = { + .name = TYPE_X86_IOMMU_DEVICE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = x86_iommu_instance_init, + .instance_size = sizeof(X86IOMMUState), + .class_init = x86_iommu_class_init, + .class_size = sizeof(X86IOMMUClass), + .abstract = true, +}; + +static void x86_iommu_register_types(void) +{ + type_register_static(&x86_iommu_info); +} + +type_init(x86_iommu_register_types) diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index edf69903a6..a87fd6862e 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -217,26 +217,14 @@ static void virtio_input_reset(VirtIODevice *vdev) } } -static void virtio_input_save(QEMUFile *f, void *opaque) -{ - VirtIOInput *vinput = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(vinput); - - virtio_save(vdev, f); -} - -static int virtio_input_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_input_load(QEMUFile *f, void *opaque, size_t size) { VirtIOInput *vinput = opaque; VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vinput); VirtIODevice *vdev = VIRTIO_DEVICE(vinput); int ret; - if (version_id != VIRTIO_INPUT_VM_VERSION) { - return -EINVAL; - } - - ret = virtio_load(vdev, f, version_id); + ret = virtio_load(vdev, f, VIRTIO_INPUT_VM_VERSION); if (ret) { return ret; } @@ -280,20 +268,14 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp) vinput->cfg_size); vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt); vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts); - - register_savevm(dev, "virtio-input", -1, VIRTIO_INPUT_VM_VERSION, - virtio_input_save, virtio_input_load, vinput); } static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) { VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VirtIOInput *vinput = VIRTIO_INPUT(dev); Error *local_err = NULL; - unregister_savevm(dev, "virtio-input", vinput); - if (vic->unrealize) { vic->unrealize(dev, &local_err); if (local_err) { @@ -304,6 +286,9 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) virtio_cleanup(vdev); } +VMSTATE_VIRTIO_DEVICE(input, VIRTIO_INPUT_VM_VERSION, virtio_input_load, + virtio_vmstate_save); + static Property virtio_input_properties[] = { DEFINE_PROP_STRING("serial", VirtIOInput, serial), DEFINE_PROP_END_OF_LIST(), @@ -315,6 +300,7 @@ static void virtio_input_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_input_properties; + dc->vmsd = &vmstate_virtio_input; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); vdc->realize = virtio_input_device_realize; vdc->unrealize = virtio_input_device_unrealize; diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 273bb0854c..2d3282a864 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -29,6 +29,9 @@ #include "hw/i386/ioapic_internal.h" #include "include/hw/pci/msi.h" #include "sysemu/kvm.h" +#include "target-i386/cpu.h" +#include "hw/i386/apic-msidef.h" +#include "hw/i386/x86-iommu.h" //#define DEBUG_IOAPIC @@ -48,16 +51,56 @@ static IOAPICCommonState *ioapics[MAX_IOAPICS]; /* global variable from ioapic_common.c */ extern int ioapic_no; +struct ioapic_entry_info { + /* fields parsed from IOAPIC entries */ + uint8_t masked; + uint8_t trig_mode; + uint16_t dest_idx; + uint8_t dest_mode; + uint8_t delivery_mode; + uint8_t vector; + + /* MSI message generated from above parsed fields */ + uint32_t addr; + uint32_t data; +}; + +static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info) +{ + memset(info, 0, sizeof(*info)); + info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1; + info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1; + /* + * By default, this would be dest_id[8] + reserved[8]. When IR + * is enabled, this would be interrupt_index[15] + + * interrupt_format[1]. This field never means anything, but + * only used to generate corresponding MSI. + */ + info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff; + info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; + info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \ + & IOAPIC_DM_MASK; + if (info->delivery_mode == IOAPIC_DM_EXTINT) { + info->vector = pic_read_irq(isa_pic); + } else { + info->vector = entry & IOAPIC_VECTOR_MASK; + } + + info->addr = APIC_DEFAULT_ADDRESS | \ + (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \ + (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT); + info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \ + (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \ + (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT); +} + static void ioapic_service(IOAPICCommonState *s) { + AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as; + struct ioapic_entry_info info; uint8_t i; - uint8_t trig_mode; - uint8_t vector; - uint8_t delivery_mode; uint32_t mask; uint64_t entry; - uint8_t dest; - uint8_t dest_mode; for (i = 0; i < IOAPIC_NUM_PINS; i++) { mask = 1 << i; @@ -65,26 +108,18 @@ static void ioapic_service(IOAPICCommonState *s) int coalesce = 0; entry = s->ioredtbl[i]; - if (!(entry & IOAPIC_LVT_MASKED)) { - trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); - dest = entry >> IOAPIC_LVT_DEST_SHIFT; - dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; - delivery_mode = - (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK; - if (trig_mode == IOAPIC_TRIGGER_EDGE) { + ioapic_entry_parse(entry, &info); + if (!info.masked) { + if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { s->irr &= ~mask; } else { coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR; s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR; } - if (delivery_mode == IOAPIC_DM_EXTINT) { - vector = pic_read_irq(isa_pic); - } else { - vector = entry & IOAPIC_VECTOR_MASK; - } + #ifdef CONFIG_KVM if (kvm_irqchip_is_split()) { - if (trig_mode == IOAPIC_TRIGGER_EDGE) { + if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { kvm_set_irq(kvm_state, i, 1); kvm_set_irq(kvm_state, i, 0); } else { @@ -97,8 +132,11 @@ static void ioapic_service(IOAPICCommonState *s) #else (void)coalesce; #endif - apic_deliver_irq(dest, dest_mode, delivery_mode, vector, - trig_mode); + /* No matter whether IR is enabled, we translate + * the IOAPIC message into a MSI one, and its + * address space will decide whether we need a + * translation. */ + stl_le_phys(ioapic_as, info.addr, info.data); } } } @@ -149,30 +187,11 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) if (kvm_irqchip_is_split()) { for (i = 0; i < IOAPIC_NUM_PINS; i++) { - uint64_t entry = s->ioredtbl[i]; - uint8_t trig_mode; - uint8_t delivery_mode; - uint8_t dest; - uint8_t dest_mode; - uint64_t pin_polarity; MSIMessage msg; - - trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); - dest = entry >> IOAPIC_LVT_DEST_SHIFT; - dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; - pin_polarity = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1; - delivery_mode = - (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK; - - msg.address = APIC_DEFAULT_ADDRESS; - msg.address |= dest_mode << 2; - msg.address |= dest << 12; - - msg.data = entry & IOAPIC_VECTOR_MASK; - msg.data |= delivery_mode << APIC_DELIVERY_MODE_SHIFT; - msg.data |= pin_polarity << APIC_POLARITY_SHIFT; - msg.data |= trig_mode << APIC_TRIG_MODE_SHIFT; - + struct ioapic_entry_info info; + ioapic_entry_parse(s->ioredtbl[i], &info); + msg.address = info.addr; + msg.data = info.data; kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); } kvm_irqchip_commit_routes(kvm_state); @@ -180,6 +199,16 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) #endif } +#ifdef CONFIG_KVM +static void ioapic_iec_notifier(void *private, bool global, + uint32_t index, uint32_t mask) +{ + IOAPICCommonState *s = (IOAPICCommonState *)private; + /* For simplicity, we just update all the routes */ + ioapic_update_kvm_routes(s); +} +#endif + void ioapic_eoi_broadcast(int vector) { IOAPICCommonState *s; @@ -336,6 +365,24 @@ static const MemoryRegionOps ioapic_io_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; +static void ioapic_machine_done_notify(Notifier *notifier, void *data) +{ +#ifdef CONFIG_KVM + IOAPICCommonState *s = container_of(notifier, IOAPICCommonState, + machine_done); + + if (kvm_irqchip_is_split()) { + X86IOMMUState *iommu = x86_iommu_get_default(); + if (iommu) { + /* Register this IOAPIC with IOMMU IEC notifier, so that + * when there are IR invalidates, we can be notified to + * update kernel IR cache. */ + x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s); + } + } +#endif +} + static void ioapic_realize(DeviceState *dev, Error **errp) { IOAPICCommonState *s = IOAPIC_COMMON(dev); @@ -346,6 +393,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp) qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS); ioapics[ioapic_no] = s; + s->machine_done.notify = ioapic_machine_done_notify; + qemu_add_machine_init_done_notifier(&s->machine_done); } static void ioapic_class_init(ObjectClass *klass, void *data) diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 81896c0e84..7895805a23 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -98,6 +98,7 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) "small to contain nvdimm label (0x%" PRIx64 ") and " "aligned PMEM (0x%" PRIx64 ")", path, memory_region_size(mr), nvdimm->label_size, align); + g_free(path); return; } diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c index 3f4523df22..4811843ab6 100644 --- a/hw/mips/gt64xxx_pci.c +++ b/hw/mips/gt64xxx_pci.c @@ -1167,7 +1167,6 @@ PCIBus *gt64120_register(qemu_irq *pic) DeviceState *dev; dev = qdev_create(NULL, TYPE_GT64120_PCI_HOST_BRIDGE); - qdev_init_nofail(dev); d = GT64120_PCI_HOST_BRIDGE(dev); phb = PCI_HOST_BRIDGE(dev); memory_region_init(&d->pci0_mem, OBJECT(dev), "pci0-mem", UINT32_MAX); @@ -1178,6 +1177,7 @@ PCIBus *gt64120_register(qemu_irq *pic) &d->pci0_mem, get_system_io(), PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS); + qdev_init_nofail(dev); memory_region_init_io(&d->ISD_mem, OBJECT(dev), &isd_mem_ops, d, "isd-mem", 0x1000); pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "gt64120_pci"); diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 7e7c843b32..40a2ebca20 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -322,6 +322,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, if (ret < 0) { return ret; } + kvm_irqchip_commit_routes(kvm_state); return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); } @@ -441,13 +442,12 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, Error **errp) { PCIDevice *pdev = PCI_DEVICE(s); - MSIMessage msg = msix_get_message(pdev, vector); int ret; IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); assert(!s->msi_vectors[vector].pdev); - ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); + ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); if (ret < 0) { error_setg(errp, "kvm_irqchip_add_msi_route failed"); return; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 56d85062ff..01f1351554 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1492,7 +1492,7 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) virtio_net_set_queues(n); } -static void virtio_net_save(QEMUFile *f, void *opaque) +static void virtio_net_save(QEMUFile *f, void *opaque, size_t size) { VirtIONet *n = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(n); @@ -1538,15 +1538,12 @@ static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f) } } -static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_net_load(QEMUFile *f, void *opaque, size_t size) { VirtIONet *n = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(n); - if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION) - return -EINVAL; - - return virtio_load(vdev, f, version_id); + return virtio_load(vdev, f, VIRTIO_NET_VM_VERSION); } static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -1562,68 +1559,49 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); - if (version_id >= 3) - n->status = qemu_get_be16(f); + n->status = qemu_get_be16(f); - if (version_id >= 4) { - if (version_id < 8) { - n->promisc = qemu_get_be32(f); - n->allmulti = qemu_get_be32(f); - } else { - n->promisc = qemu_get_byte(f); - n->allmulti = qemu_get_byte(f); - } - } - - if (version_id >= 5) { - n->mac_table.in_use = qemu_get_be32(f); - /* MAC_TABLE_ENTRIES may be different from the saved image */ - if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) { - qemu_get_buffer(f, n->mac_table.macs, - n->mac_table.in_use * ETH_ALEN); - } else { - int64_t i; - - /* Overflow detected - can happen if source has a larger MAC table. - * We simply set overflow flag so there's no need to maintain the - * table of addresses, discard them all. - * Note: 64 bit math to avoid integer overflow. - */ - for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) { - qemu_get_byte(f); - } - n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1; - n->mac_table.in_use = 0; + n->promisc = qemu_get_byte(f); + n->allmulti = qemu_get_byte(f); + + n->mac_table.in_use = qemu_get_be32(f); + /* MAC_TABLE_ENTRIES may be different from the saved image */ + if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) { + qemu_get_buffer(f, n->mac_table.macs, + n->mac_table.in_use * ETH_ALEN); + } else { + int64_t i; + + /* Overflow detected - can happen if source has a larger MAC table. + * We simply set overflow flag so there's no need to maintain the + * table of addresses, discard them all. + * Note: 64 bit math to avoid integer overflow. + */ + for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) { + qemu_get_byte(f); } + n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1; + n->mac_table.in_use = 0; } - if (version_id >= 6) - qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3); + qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3); - if (version_id >= 7) { - if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) { - error_report("virtio-net: saved image requires vnet_hdr=on"); - return -1; - } + if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) { + error_report("virtio-net: saved image requires vnet_hdr=on"); + return -1; } - if (version_id >= 9) { - n->mac_table.multi_overflow = qemu_get_byte(f); - n->mac_table.uni_overflow = qemu_get_byte(f); - } + n->mac_table.multi_overflow = qemu_get_byte(f); + n->mac_table.uni_overflow = qemu_get_byte(f); - if (version_id >= 10) { - n->alluni = qemu_get_byte(f); - n->nomulti = qemu_get_byte(f); - n->nouni = qemu_get_byte(f); - n->nobcast = qemu_get_byte(f); - } + n->alluni = qemu_get_byte(f); + n->nomulti = qemu_get_byte(f); + n->nouni = qemu_get_byte(f); + n->nobcast = qemu_get_byte(f); - if (version_id >= 11) { - if (qemu_get_byte(f) && !peer_has_ufo(n)) { - error_report("virtio-net: saved image requires TUN_F_UFO support"); - return -1; - } + if (qemu_get_byte(f) && !peer_has_ufo(n)) { + error_report("virtio-net: saved image requires TUN_F_UFO support"); + return -1; } if (n->max_queues > 1) { @@ -1809,8 +1787,6 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) nc->rxfilter_notify_enabled = 1; n->qdev = dev; - register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION, - virtio_net_save, virtio_net_load, n); } static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) @@ -1822,8 +1798,6 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) /* This will stop vhost backend if appropriate. */ virtio_net_set_status(vdev, 0); - unregister_savevm(dev, "virtio-net", n); - g_free(n->netclient_name); n->netclient_name = NULL; g_free(n->netclient_type); @@ -1858,6 +1832,9 @@ static void virtio_net_instance_init(Object *obj) DEVICE(n), NULL); } +VMSTATE_VIRTIO_DEVICE(net, VIRTIO_NET_VM_VERSION, virtio_net_load, + virtio_net_save); + static Property virtio_net_properties[] = { DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true), DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features, @@ -1912,6 +1889,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_net_properties; + dc->vmsd = &vmstate_virtio_net; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); vdc->realize = virtio_net_device_realize; vdc->unrealize = virtio_net_device_unrealize; diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index babbbef0c2..16587f8373 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -670,6 +670,13 @@ PCIBus *pci_apb_init(hwaddr special_base, /* Ultrasparc PBM main bus */ dev = qdev_create(NULL, TYPE_APB); + d = APB_DEVICE(dev); + phb = PCI_HOST_BRIDGE(dev); + phb->bus = pci_register_bus(DEVICE(phb), "pci", + pci_apb_set_irq, pci_pbm_map_irq, d, + &d->pci_mmio, + get_system_io(), + 0, 32, TYPE_PCI_BUS); qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); /* apb_config */ @@ -678,18 +685,10 @@ PCIBus *pci_apb_init(hwaddr special_base, sysbus_mmio_map(s, 1, special_base + 0x1000000ULL); /* pci_ioport */ sysbus_mmio_map(s, 2, special_base + 0x2000000ULL); - d = APB_DEVICE(dev); memory_region_init(&d->pci_mmio, OBJECT(s), "pci-mmio", 0x100000000ULL); memory_region_add_subregion(get_system_memory(), mem_base, &d->pci_mmio); - phb = PCI_HOST_BRIDGE(dev); - phb->bus = pci_register_bus(DEVICE(phb), "pci", - pci_apb_set_irq, pci_pbm_map_irq, d, - &d->pci_mmio, - get_system_io(), - 0, 32, TYPE_PCI_BUS); - *pbm_irqs = d->pbm_irqs; d->ivec_irqs = ivec_irqs; diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c index 8f91216157..2c8acdaaca 100644 --- a/hw/pci-host/grackle.c +++ b/hw/pci-host/grackle.c @@ -72,7 +72,6 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic, GrackleState *d; dev = qdev_create(NULL, TYPE_GRACKLE_PCI_HOST_BRIDGE); - qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); phb = PCI_HOST_BRIDGE(dev); d = GRACKLE_PCI_HOST_BRIDGE(dev); @@ -92,6 +91,7 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic, 0, 4, TYPE_PCI_BUS); pci_create_simple(phb->bus, 0, "grackle"); + qdev_init_nofail(dev); sysbus_mmio_map(s, 0, base); sysbus_mmio_map(s, 1, base + 0x00200000); diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c index 487e32ecbf..5580293f93 100644 --- a/hw/pci-host/prep.c +++ b/hw/pci-host/prep.c @@ -247,6 +247,7 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp) memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack); /* TODO Remove once realize propagates to child devices. */ + object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp); object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp); } diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index 0792c4501c..467cbb9cb8 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -455,6 +455,7 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp) } /* TODO Remove once realize propagates to child devices. */ + object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp); object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 149994b815..728c6d4b3b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2596,6 +2596,21 @@ PCIDevice *pci_get_function_0(PCIDevice *pci_dev) } } +MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) +{ + MSIMessage msg; + if (msix_enabled(dev)) { + msg = msix_get_message(dev, vector); + } else if (msi_enabled(dev)) { + msg = msi_get_message(dev, vector); + } else { + /* Should never happen */ + error_report("%s: unknown interrupt type", __func__); + abort(); + } + return msg; +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 722c93e5fc..ce57ef6248 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -663,22 +663,17 @@ static void virtio_scsi_reset(VirtIODevice *vdev) /* The device does not have anything to save beyond the virtio data. * Request data is saved with callbacks from SCSI devices. */ -static void virtio_scsi_save(QEMUFile *f, void *opaque) +static void virtio_scsi_save(QEMUFile *f, void *opaque, size_t size) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); virtio_save(vdev, f); } -static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_scsi_load(QEMUFile *f, void *opaque, size_t size) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - int ret; - ret = virtio_load(vdev, f, version_id); - if (ret) { - return ret; - } - return 0; + return virtio_load(vdev, f, 1); } void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, @@ -824,8 +819,9 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = { }; void virtio_scsi_common_realize(DeviceState *dev, Error **errp, - HandleOutput ctrl, HandleOutput evt, - HandleOutput cmd) + VirtIOHandleOutput ctrl, + VirtIOHandleOutput evt, + VirtIOHandleOutput cmd) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev); @@ -846,13 +842,10 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp, s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; - s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - ctrl); - s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - evt); + s->ctrl_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl); + s->event_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, evt); for (i = 0; i < s->conf.num_queues; i++) { - s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - cmd); + s->cmd_vqs[i] = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, cmd); } if (s->conf.iothread) { @@ -864,7 +857,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSI *s = VIRTIO_SCSI(dev); - static int virtio_scsi_id; Error *err = NULL; virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl, @@ -887,9 +879,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) return; } } - - register_savevm(dev, "virtio-scsi", virtio_scsi_id++, 1, - virtio_scsi_save, virtio_scsi_load, s); } static void virtio_scsi_instance_init(Object *obj) @@ -913,9 +902,6 @@ void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp) static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp) { - VirtIOSCSI *s = VIRTIO_SCSI(dev); - - unregister_savevm(dev, "virtio-scsi", s); virtio_scsi_common_unrealize(dev, errp); } @@ -932,6 +918,8 @@ static Property virtio_scsi_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +VMSTATE_VIRTIO_DEVICE(scsi, 1, virtio_scsi_load, virtio_scsi_save); + static void virtio_scsi_common_class_init(ObjectClass *klass, void *data) { VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); @@ -948,6 +936,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data) HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); dc->props = virtio_scsi_properties; + dc->vmsd = &vmstate_virtio_scsi; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = virtio_scsi_device_realize; vdc->unrealize = virtio_scsi_device_unrealize; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c8436a19d6..7bfa17ce38 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -417,11 +417,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) } static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, - MSIMessage *msg, bool msix) + int vector_n, bool msix) { int virq; - if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) { + if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; } @@ -429,7 +429,7 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, return; } - virq = kvm_irqchip_add_msi_route(kvm_state, *msg, &vdev->pdev); + virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev); if (virq < 0) { event_notifier_cleanup(&vector->kvm_interrupt); return; @@ -458,6 +458,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, PCIDevice *pdev) { kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev); + kvm_irqchip_commit_routes(kvm_state); } static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, @@ -495,7 +496,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vfio_update_kvm_msi_virq(vector, *msg, pdev); } } else { - vfio_add_kvm_msi_virq(vdev, vector, msg, true); + vfio_add_kvm_msi_virq(vdev, vector, nr, true); } /* @@ -639,7 +640,6 @@ retry: for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; - MSIMessage msg = msi_get_message(&vdev->pdev, i); vector->vdev = vdev; vector->virq = -1; @@ -656,7 +656,7 @@ retry: * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vfio_add_kvm_msi_virq(vdev, vector, &msg, false); + vfio_add_kvm_msi_virq(vdev, vector, i, false); } /* Set interrupt type prior to possible interrupts */ diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 1a22e6d993..5af429a58a 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -396,11 +396,6 @@ static void virtio_balloon_to_target(void *opaque, ram_addr_t target) trace_virtio_balloon_to_target(target, dev->num_pages); } -static void virtio_balloon_save(QEMUFile *f, void *opaque) -{ - virtio_save(VIRTIO_DEVICE(opaque), f); -} - static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -409,12 +404,9 @@ static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f) qemu_put_be32(f, s->actual); } -static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_balloon_load(QEMUFile *f, void *opaque, size_t size) { - if (version_id != 1) - return -EINVAL; - - return virtio_load(VIRTIO_DEVICE(opaque), f, version_id); + return virtio_load(VIRTIO_DEVICE(opaque), f, 1); } static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -454,9 +446,6 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); reset_stats(s); - - register_savevm(dev, "virtio-balloon", -1, 1, - virtio_balloon_save, virtio_balloon_load, s); } static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) @@ -466,7 +455,6 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) balloon_stats_destroy_timer(s); qemu_remove_balloon_handler(s); - unregister_savevm(dev, "virtio-balloon", s); virtio_cleanup(vdev); } @@ -493,6 +481,8 @@ static void virtio_balloon_instance_init(Object *obj) NULL, s, NULL); } +VMSTATE_VIRTIO_DEVICE(balloon, 1, virtio_balloon_load, virtio_vmstate_save); + static Property virtio_balloon_properties[] = { DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), @@ -505,6 +495,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_balloon_properties; + dc->vmsd = &vmstate_virtio_balloon; set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = virtio_balloon_device_realize; vdc->unrealize = virtio_balloon_device_unrealize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 2b34b43060..f0677b73d8 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -699,14 +699,13 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, unsigned int queue_no, - unsigned int vector, - MSIMessage msg) + unsigned int vector) { VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; int ret; if (irqfd->users == 0) { - ret = kvm_irqchip_add_msi_route(kvm_state, msg, &proxy->pci_dev); + ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev); if (ret < 0) { return ret; } @@ -757,7 +756,6 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); unsigned int vector; int ret, queue_no; - MSIMessage msg; for (queue_no = 0; queue_no < nvqs; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { @@ -767,8 +765,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) if (vector >= msix_nr_vectors_allocated(dev)) { continue; } - msg = msix_get_message(dev, vector); - ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg); + ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); if (ret < 0) { goto undo; } @@ -845,6 +842,7 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, if (ret < 0) { return ret; } + kvm_irqchip_commit_routes(kvm_state); } } diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index 6b991a7642..cd8ca10177 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -120,22 +120,12 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp) return f; } -static void virtio_rng_save(QEMUFile *f, void *opaque) -{ - VirtIODevice *vdev = opaque; - - virtio_save(vdev, f); -} - -static int virtio_rng_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_rng_load(QEMUFile *f, void *opaque, size_t size) { VirtIORNG *vrng = opaque; int ret; - if (version_id != 1) { - return -EINVAL; - } - ret = virtio_load(VIRTIO_DEVICE(vrng), f, version_id); + ret = virtio_load(VIRTIO_DEVICE(vrng), f, 1); if (ret != 0) { return ret; } @@ -214,8 +204,6 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, check_rate_limit, vrng); vrng->activate_timer = true; - register_savevm(dev, "virtio-rng", -1, 1, virtio_rng_save, - virtio_rng_load, vrng); } static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) @@ -225,10 +213,11 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) timer_del(vrng->rate_limit_timer); timer_free(vrng->rate_limit_timer); - unregister_savevm(dev, "virtio-rng", vrng); virtio_cleanup(vdev); } +VMSTATE_VIRTIO_DEVICE(rng, 1, virtio_rng_load, virtio_vmstate_save); + static Property virtio_rng_properties[] = { /* Set a default rate limit of 2^47 bytes per minute or roughly 2TB/s. If * you have an entropy source capable of generating more entropy than this @@ -246,6 +235,7 @@ static void virtio_rng_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_rng_properties; + dc->vmsd = &vmstate_virtio_rng; set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = virtio_rng_device_realize; vdc->unrealize = virtio_rng_device_unrealize; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 18153d5a39..752b2715d0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -95,8 +95,9 @@ struct VirtQueue int inuse; uint16_t vector; - void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); - void (*handle_aio_output)(VirtIODevice *vdev, VirtQueue *vq); + VirtIOHandleOutput handle_output; + VirtIOHandleOutput handle_aio_output; + bool use_aio; VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; @@ -1130,8 +1131,9 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) } } -VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - void (*handle_output)(VirtIODevice *, VirtQueue *)) +static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output, + bool use_aio) { int i; @@ -1148,10 +1150,28 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; vdev->vq[i].handle_output = handle_output; vdev->vq[i].handle_aio_output = NULL; + vdev->vq[i].use_aio = use_aio; return &vdev->vq[i]; } +/* Add a virt queue and mark AIO. + * An AIO queue will use the AioContext based event interface instead of the + * default IOHandler and EventNotifier interface. + */ +VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output) +{ + return virtio_add_queue_internal(vdev, queue_size, handle_output, true); +} + +/* Add a normal virt queue (on the contrary to the AIO version above. */ +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output) +{ + return virtio_add_queue_internal(vdev, queue_size, handle_output, false); +} + void virtio_del_queue(VirtIODevice *vdev, int n) { if (n < 0 || n >= VIRTIO_QUEUE_MAX) { @@ -1444,6 +1464,12 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) vmstate_save_state(f, &vmstate_virtio, vdev, NULL); } +/* A wrapper for use as a VMState .put function */ +void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size) +{ + virtio_save(VIRTIO_DEVICE(opaque), f); +} + static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); @@ -1804,8 +1830,7 @@ static void virtio_queue_host_notifier_aio_read(EventNotifier *n) } void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, - void (*handle_output)(VirtIODevice *, - VirtQueue *)) + VirtIOHandleOutput handle_output) { if (handle_output) { vq->handle_aio_output = handle_output; @@ -1831,11 +1856,21 @@ static void virtio_queue_host_notifier_read(EventNotifier *n) void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, bool set_handler) { + AioContext *ctx = qemu_get_aio_context(); if (assign && set_handler) { - event_notifier_set_handler(&vq->host_notifier, true, + if (vq->use_aio) { + aio_set_event_notifier(ctx, &vq->host_notifier, true, virtio_queue_host_notifier_read); + } else { + event_notifier_set_handler(&vq->host_notifier, true, + virtio_queue_host_notifier_read); + } } else { - event_notifier_set_handler(&vq->host_notifier, true, NULL); + if (vq->use_aio) { + aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); + } else { + event_notifier_set_handler(&vq->host_notifier, true, NULL); + } } if (!assign) { /* Test and clear notifier before after disabling event, diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index ea9be0bdb1..41c1d95c4c 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -571,6 +571,18 @@ enum { /* * Sub-structures for DMAR */ + +/* Device scope structure for DRHD. */ +struct AcpiDmarDeviceScope { + uint8_t entry_type; + uint8_t length; + uint16_t reserved; + uint8_t enumeration_id; + uint8_t bus; + uint16_t path[0]; /* list of dev:func pairs */ +} QEMU_PACKED; +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; + /* Type 0: Hardware Unit Definition */ struct AcpiDmarHardwareUnit { uint16_t type; @@ -579,6 +591,7 @@ struct AcpiDmarHardwareUnit { uint8_t reserved; uint16_t pci_segment; /* The PCI Segment associated with this unit */ uint64_t address; /* Base address of remapping hardware register-set */ + AcpiDmarDeviceScope scope[0]; } QEMU_PACKED; typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit; diff --git a/include/hw/i386/apic-msidef.h b/include/hw/i386/apic-msidef.h index 6e2eb71f2f..8b4d4cca55 100644 --- a/include/hw/i386/apic-msidef.h +++ b/include/hw/i386/apic-msidef.h @@ -25,6 +25,7 @@ #define MSI_ADDR_REDIRECTION_SHIFT 3 #define MSI_ADDR_DEST_ID_SHIFT 12 +#define MSI_ADDR_DEST_IDX_SHIFT 4 #define MSI_ADDR_DEST_ID_MASK 0x00ffff0 #endif /* HW_APIC_MSIDEF_H */ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index b024ffa720..a42dbd745a 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -23,6 +23,10 @@ #define INTEL_IOMMU_H #include "hw/qdev.h" #include "sysemu/dma.h" +#include "hw/i386/x86-iommu.h" +#include "hw/i386/ioapic.h" +#include "hw/pci/msi.h" +#include "hw/sysbus.h" #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" #define INTEL_IOMMU_DEVICE(obj) \ @@ -34,7 +38,6 @@ #define VTD_PCI_BUS_MAX 256 #define VTD_PCI_SLOT_MAX 32 #define VTD_PCI_FUNC_MAX 8 -#define VTD_PCI_DEVFN_MAX 256 #define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define VTD_PCI_FUNC(devfn) ((devfn) & 0x07) #define VTD_SID_TO_BUS(sid) (((sid) >> 8) & 0xff) @@ -44,12 +47,22 @@ #define VTD_HOST_ADDRESS_WIDTH 39 #define VTD_HAW_MASK ((1ULL << VTD_HOST_ADDRESS_WIDTH) - 1) +#define DMAR_REPORT_F_INTR (1) + +#define VTD_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) +#define VTD_MSI_ADDR_HI_SHIFT (32) +#define VTD_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) + typedef struct VTDContextEntry VTDContextEntry; typedef struct VTDContextCacheEntry VTDContextCacheEntry; typedef struct IntelIOMMUState IntelIOMMUState; typedef struct VTDAddressSpace VTDAddressSpace; typedef struct VTDIOTLBEntry VTDIOTLBEntry; typedef struct VTDBus VTDBus; +typedef union VTD_IR_TableEntry VTD_IR_TableEntry; +typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; +typedef struct VTDIrq VTDIrq; +typedef struct VTD_MSIMessage VTD_MSIMessage; /* Context-Entry */ struct VTDContextEntry { @@ -70,6 +83,7 @@ struct VTDAddressSpace { uint8_t devfn; AddressSpace as; MemoryRegion iommu; + MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */ IntelIOMMUState *iommu_state; VTDContextCacheEntry context_cache_entry; }; @@ -88,9 +102,155 @@ struct VTDIOTLBEntry { bool write_flags; }; +/* VT-d Source-ID Qualifier types */ +enum { + VTD_SQ_FULL = 0x00, /* Full SID verification */ + VTD_SQ_IGN_3 = 0x01, /* Ignore bit 3 */ + VTD_SQ_IGN_2_3 = 0x02, /* Ignore bits 2 & 3 */ + VTD_SQ_IGN_1_3 = 0x03, /* Ignore bits 1-3 */ + VTD_SQ_MAX, +}; + +/* VT-d Source Validation Types */ +enum { + VTD_SVT_NONE = 0x00, /* No validation */ + VTD_SVT_ALL = 0x01, /* Do full validation */ + VTD_SVT_BUS = 0x02, /* Validate bus range */ + VTD_SVT_MAX, +}; + +/* Interrupt Remapping Table Entry Definition */ +union VTD_IR_TableEntry { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t dest_id:32; /* Destination ID */ + uint32_t __reserved_1:8; /* Reserved 1 */ + uint32_t vector:8; /* Interrupt Vector */ + uint32_t irte_mode:1; /* IRTE Mode */ + uint32_t __reserved_0:3; /* Reserved 0 */ + uint32_t __avail:4; /* Available spaces for software */ + uint32_t delivery_mode:3; /* Delivery Mode */ + uint32_t trigger_mode:1; /* Trigger Mode */ + uint32_t redir_hint:1; /* Redirection Hint */ + uint32_t dest_mode:1; /* Destination Mode */ + uint32_t fault_disable:1; /* Fault Processing Disable */ + uint32_t present:1; /* Whether entry present/available */ +#else + uint32_t present:1; /* Whether entry present/available */ + uint32_t fault_disable:1; /* Fault Processing Disable */ + uint32_t dest_mode:1; /* Destination Mode */ + uint32_t redir_hint:1; /* Redirection Hint */ + uint32_t trigger_mode:1; /* Trigger Mode */ + uint32_t delivery_mode:3; /* Delivery Mode */ + uint32_t __avail:4; /* Available spaces for software */ + uint32_t __reserved_0:3; /* Reserved 0 */ + uint32_t irte_mode:1; /* IRTE Mode */ + uint32_t vector:8; /* Interrupt Vector */ + uint32_t __reserved_1:8; /* Reserved 1 */ + uint32_t dest_id:32; /* Destination ID */ +#endif + uint16_t source_id:16; /* Source-ID */ +#ifdef HOST_WORDS_BIGENDIAN + uint64_t __reserved_2:44; /* Reserved 2 */ + uint64_t sid_vtype:2; /* Source-ID Validation Type */ + uint64_t sid_q:2; /* Source-ID Qualifier */ +#else + uint64_t sid_q:2; /* Source-ID Qualifier */ + uint64_t sid_vtype:2; /* Source-ID Validation Type */ + uint64_t __reserved_2:44; /* Reserved 2 */ +#endif + } QEMU_PACKED irte; + uint64_t data[2]; +}; + +#define VTD_IR_INT_FORMAT_COMPAT (0) /* Compatible Interrupt */ +#define VTD_IR_INT_FORMAT_REMAP (1) /* Remappable Interrupt */ + +/* Programming format for MSI/MSI-X addresses */ +union VTD_IR_MSIAddress { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t __head:12; /* Should always be: 0x0fee */ + uint32_t index_l:15; /* Interrupt index bit 14-0 */ + uint32_t int_mode:1; /* Interrupt format */ + uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ + uint32_t index_h:1; /* Interrupt index bit 15 */ + uint32_t __not_care:2; +#else + uint32_t __not_care:2; + uint32_t index_h:1; /* Interrupt index bit 15 */ + uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ + uint32_t int_mode:1; /* Interrupt format */ + uint32_t index_l:15; /* Interrupt index bit 14-0 */ + uint32_t __head:12; /* Should always be: 0x0fee */ +#endif + } QEMU_PACKED addr; + uint32_t data; +}; + +/* Generic IRQ entry information */ +struct VTDIrq { + /* Used by both IOAPIC/MSI interrupt remapping */ + uint8_t trigger_mode; + uint8_t vector; + uint8_t delivery_mode; + uint32_t dest; + uint8_t dest_mode; + + /* only used by MSI interrupt remapping */ + uint8_t redir_hint; + uint8_t msi_addr_last_bits; +}; + +struct VTD_MSIMessage { + union { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t __addr_head:12; /* 0xfee */ + uint32_t dest:8; + uint32_t __reserved:8; + uint32_t redir_hint:1; + uint32_t dest_mode:1; + uint32_t __not_used:2; +#else + uint32_t __not_used:2; + uint32_t dest_mode:1; + uint32_t redir_hint:1; + uint32_t __reserved:8; + uint32_t dest:8; + uint32_t __addr_head:12; /* 0xfee */ +#endif + uint32_t __addr_hi:32; + } QEMU_PACKED; + uint64_t msi_addr; + }; + union { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint16_t trigger_mode:1; + uint16_t level:1; + uint16_t __resved:3; + uint16_t delivery_mode:3; + uint16_t vector:8; +#else + uint16_t vector:8; + uint16_t delivery_mode:3; + uint16_t __resved:3; + uint16_t level:1; + uint16_t trigger_mode:1; +#endif + uint16_t __resved1:16; + } QEMU_PACKED; + uint32_t msi_data; + }; +}; + +/* When IR is enabled, all MSI/MSI-X data bits should be zero */ +#define VTD_IR_MSI_DATA (0) + /* The iommu (DMAR) device state struct */ struct IntelIOMMUState { - SysBusDevice busdev; + X86IOMMUState x86_iommu; MemoryRegion csrmem; uint8_t csr[DMAR_REG_SIZE]; /* register values */ uint8_t wmask[DMAR_REG_SIZE]; /* R/W bytes */ @@ -123,6 +283,12 @@ struct IntelIOMMUState { MemoryRegionIOMMUOps iommu_ops; GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */ VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ + + /* interrupt remapping */ + bool intr_enabled; /* Whether guest enabled IR */ + dma_addr_t intr_root; /* Interrupt remapping table pointer */ + uint32_t intr_size; /* Number of IR table entries */ + bool intr_eime; /* Extended interrupt mode enabled */ }; /* Find the VTD Address space associated with the given bus pointer, diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h index 0542aa1131..d89ea1b63b 100644 --- a/include/hw/i386/ioapic_internal.h +++ b/include/hw/i386/ioapic_internal.h @@ -25,12 +25,14 @@ #include "hw/hw.h" #include "exec/memory.h" #include "hw/sysbus.h" +#include "qemu/notify.h" #define MAX_IOAPICS 1 #define IOAPIC_VERSION 0x11 #define IOAPIC_LVT_DEST_SHIFT 56 +#define IOAPIC_LVT_DEST_IDX_SHIFT 48 #define IOAPIC_LVT_MASKED_SHIFT 16 #define IOAPIC_LVT_TRIGGER_MODE_SHIFT 15 #define IOAPIC_LVT_REMOTE_IRR_SHIFT 14 @@ -106,6 +108,7 @@ struct IOAPICCommonState { uint8_t ioregsel; uint32_t irr; uint64_t ioredtbl[IOAPIC_NUM_PINS]; + Notifier machine_done; }; void ioapic_reset_common(DeviceState *dev); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index bc937b989e..c87c5c1eec 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -72,6 +72,10 @@ struct PCMachineState { /* NUMA information: */ uint64_t numa_nodes; uint64_t *node_mem; + + /* Address space used by IOAPIC device. All IOAPIC interrupts + * will be translated to MSI messages in the address space. */ + AddressSpace *ioapic_as; }; #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h new file mode 100644 index 0000000000..c48e8dd597 --- /dev/null +++ b/include/hw/i386/x86-iommu.h @@ -0,0 +1,100 @@ +/* + * Common IOMMU interface for X86 platform + * + * Copyright (C) 2016 Peter Xu, Red Hat + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef IOMMU_COMMON_H +#define IOMMU_COMMON_H + +#include "hw/sysbus.h" +#include "hw/pci/pci.h" + +#define TYPE_X86_IOMMU_DEVICE ("x86-iommu") +#define X86_IOMMU_DEVICE(obj) \ + OBJECT_CHECK(X86IOMMUState, (obj), TYPE_X86_IOMMU_DEVICE) +#define X86_IOMMU_CLASS(klass) \ + OBJECT_CLASS_CHECK(X86IOMMUClass, (klass), TYPE_X86_IOMMU_DEVICE) +#define X86_IOMMU_GET_CLASS(obj) \ + OBJECT_GET_CLASS(X86IOMMUClass, obj, TYPE_X86_IOMMU_DEVICE) + +#define X86_IOMMU_PCI_DEVFN_MAX 256 +#define X86_IOMMU_SID_INVALID (0xffff) + +typedef struct X86IOMMUState X86IOMMUState; +typedef struct X86IOMMUClass X86IOMMUClass; + +struct X86IOMMUClass { + SysBusDeviceClass parent; + /* Intel/AMD specific realize() hook */ + DeviceRealize realize; + /* MSI-based interrupt remapping */ + int (*int_remap)(X86IOMMUState *iommu, MSIMessage *src, + MSIMessage *dst, uint16_t sid); +}; + +/** + * iec_notify_fn - IEC (Interrupt Entry Cache) notifier hook, + * triggered when IR invalidation happens. + * @private: private data + * @global: whether this is a global IEC invalidation + * @index: IRTE index to invalidate (start from) + * @mask: invalidation mask + */ +typedef void (*iec_notify_fn)(void *private, bool global, + uint32_t index, uint32_t mask); + +struct IEC_Notifier { + iec_notify_fn iec_notify; + void *private; + QLIST_ENTRY(IEC_Notifier) list; +}; +typedef struct IEC_Notifier IEC_Notifier; + +struct X86IOMMUState { + SysBusDevice busdev; + bool intr_supported; /* Whether vIOMMU supports IR */ + QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ +}; + +/** + * x86_iommu_get_default - get default IOMMU device + * @return: pointer to default IOMMU device + */ +X86IOMMUState *x86_iommu_get_default(void); + +/** + * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry + * Cache) notifiers + * @iommu: IOMMU device to register + * @fn: IEC notifier hook function + * @data: notifier private data + */ +void x86_iommu_iec_register_notifier(X86IOMMUState *iommu, + iec_notify_fn fn, void *data); + +/** + * x86_iommu_iec_notify_all - Notify IEC invalidations + * @iommu: IOMMU device that sends the notification + * @global: whether this is a global invalidation. If true, @index + * and @mask are undefined. + * @index: starting index of interrupt entry to invalidate + * @mask: index mask for the invalidation + */ +void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global, + uint32_t index, uint32_t mask); + +#endif diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 0d64032d87..94486fdd37 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -179,4 +179,12 @@ typedef struct Q35PCIHost { uint64_t mch_mcfg_base(void); +/* + * Arbitary but unique BNF number for IOAPIC device. + * + * TODO: make sure there would have no conflict with real PCI bus + */ +#define Q35_PSEUDO_BUS_PLATFORM (0xff) +#define Q35_PSEUDO_DEVFN_IOAPIC (0x00) + #endif /* HW_Q35_H */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 9ed1624f09..74d797d1cf 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -805,4 +805,6 @@ extern const VMStateDescription vmstate_pci_device; .offset = vmstate_offset_pointer(_state, _field, PCIDevice), \ } +MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); + #endif diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 325354f9f3..e4f424ad4a 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -118,6 +118,8 @@ typedef struct VirtIOGPU { uint32_t req_3d; uint32_t bytes_3d; } stats; + + Error *migration_blocker; } VirtIOGPU; extern const GraphicHwOps virtio_gpu_ops; diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 5e3f088f9a..a1e0cfb449 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -121,11 +121,9 @@ typedef struct VirtIOSCSIReq { } req; } VirtIOSCSIReq; -typedef void (*HandleOutput)(VirtIODevice *, VirtQueue *); - void virtio_scsi_common_realize(DeviceState *dev, Error **errp, - HandleOutput ctrl, HandleOutput evt, - HandleOutput cmd); + VirtIOHandleOutput ctrl, VirtIOHandleOutput evt, + VirtIOHandleOutput cmd); void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp); void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 8a681f56f1..d2490c1975 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -138,9 +138,13 @@ void virtio_cleanup(VirtIODevice *vdev); /* Set the child bus name. */ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name); +typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *); + VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - void (*handle_output)(VirtIODevice *, - VirtQueue *)); + VirtIOHandleOutput handle_output); + +VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output); void virtio_del_queue(VirtIODevice *vdev, int n); @@ -167,6 +171,26 @@ bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); void virtio_save(VirtIODevice *vdev, QEMUFile *f); +void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size); + +#define VMSTATE_VIRTIO_DEVICE(devname, v, getf, putf) \ + static const VMStateDescription vmstate_virtio_ ## devname = { \ + .name = "virtio-" #devname , \ + .minimum_version_id = v, \ + .version_id = v, \ + .fields = (VMStateField[]) { \ + { \ + .name = "virtio", \ + .info = &(const VMStateInfo) {\ + .name = "virtio", \ + .get = getf, \ + .put = putf, \ + }, \ + .flags = VMS_SINGLE, \ + }, \ + VMSTATE_END_OF_LIST() \ + } \ + } int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index ad6f837bb4..c9c243631e 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -359,13 +359,18 @@ void kvm_arch_init_irq_routing(KVMState *s); int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, uint64_t address, uint32_t data, PCIDevice *dev); +/* Notify arch about newly added MSI routes */ +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev); +/* Notify arch about released MSI routes */ +int kvm_arch_release_virq_post(int virq); + int kvm_arch_msi_data_to_gsi(uint32_t data); int kvm_set_irq(KVMState *s, int irq, int level); int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); -void kvm_irqchip_commit_routes(KVMState *s); void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); @@ -474,9 +479,21 @@ static inline void cpu_synchronize_post_init(CPUState *cpu) } } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev); +/** + * kvm_irqchip_add_msi_route - Add MSI route for specific vector + * @s: KVM state + * @vector: which vector to add. This can be either MSI/MSIX + * vector. The function will automatically detect whether + * MSI/MSIX is enabled, and fetch corresponding MSI + * message. + * @dev: Owner PCI device to add the route. If @dev is specified + * as @NULL, an empty MSI message will be inited. + * @return: virq (>=0) when success, errno (<0) when failed. + */ +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev); int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, PCIDevice *dev); +void kvm_irqchip_commit_routes(KVMState *s); void kvm_irqchip_release_virq(KVMState *s, int virq); int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); diff --git a/kvm-all.c b/kvm-all.c index a88f917fda..ef81ca532a 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -25,6 +25,7 @@ #include "qemu/error-report.h" #include "hw/hw.h" #include "hw/pci/msi.h" +#include "hw/pci/msix.h" #include "hw/s390x/adapter.h" #include "exec/gdbstub.h" #include "sysemu/kvm_int.h" @@ -1047,6 +1048,7 @@ void kvm_irqchip_commit_routes(KVMState *s) int ret; s->irq_routes->flags = 0; + trace_kvm_irqchip_commit_routes(); ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); assert(ret == 0); } @@ -1093,8 +1095,6 @@ static int kvm_update_routing_entry(KVMState *s, *entry = *new_entry; - kvm_irqchip_commit_routes(s); - return 0; } @@ -1132,6 +1132,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) } } clear_gsi(s, virq); + kvm_arch_release_virq_post(virq); } static unsigned int kvm_hash_msi(uint32_t data) @@ -1237,10 +1238,15 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_set_irq(s, route->kroute.gsi, 1); } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; int virq; + MSIMessage msg = {0, 0}; + + if (dev) { + msg = pci_get_msi_message(dev, vector); + } if (kvm_gsi_direct_mapping()) { return kvm_arch_msi_data_to_gsi(msg.data); @@ -1266,7 +1272,10 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) return -EINVAL; } + trace_kvm_irqchip_add_msi_route(virq); + kvm_add_routing_entry(s, &kroute); + kvm_arch_add_msi_route_post(&kroute, vector, dev); kvm_irqchip_commit_routes(s); return virq; @@ -1295,6 +1304,8 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, return -EINVAL; } + trace_kvm_irqchip_update_msi_route(virq); + return kvm_update_routing_entry(s, &kroute); } @@ -1390,7 +1401,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) abort(); } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { return -ENOSYS; } diff --git a/kvm-stub.c b/kvm-stub.c index 07c09d1141..64e23f6be0 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -116,7 +116,7 @@ int kvm_on_sigbus(int code, void *addr) } #ifndef CONFIG_USER_ONLY -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { return -ENOSYS; } @@ -135,6 +135,10 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, return -ENOSYS; } +void kvm_irqchip_commit_routes(KVMState *s) +{ +} + int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter) { return -ENOSYS; diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 5c2bd7a10b..dbe393c109 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -622,6 +622,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { return (data - 32) & 0xffff; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index df28dd254a..9697e16fdd 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -35,6 +35,8 @@ #include "hw/i386/apic.h" #include "hw/i386/apic_internal.h" #include "hw/i386/apic-msidef.h" +#include "hw/i386/intel_iommu.h" +#include "hw/i386/x86-iommu.h" #include "exec/ioport.h" #include "standard-headers/asm-x86/hyperv.h" @@ -42,6 +44,7 @@ #include "hw/pci/msi.h" #include "migration/migration.h" #include "exec/memattrs.h" +#include "trace.h" //#define DEBUG_KVM @@ -3233,8 +3236,7 @@ void kvm_arch_init_irq_routing(KVMState *s) /* If the ioapic is in QEMU and the lapics are in KVM, reserve MSI routes for signaling interrupts to the local apics. */ for (i = 0; i < IOAPIC_NUM_PINS; i++) { - struct MSIMessage msg = { 0x0, 0x0 }; - if (kvm_irqchip_add_msi_route(s, msg, NULL) < 0) { + if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) { error_report("Could not enable split IRQ mode."); exit(1); } @@ -3404,6 +3406,109 @@ int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id) int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, uint64_t address, uint32_t data, PCIDevice *dev) { + X86IOMMUState *iommu = x86_iommu_get_default(); + + if (iommu) { + int ret; + MSIMessage src, dst; + X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu); + + src.address = route->u.msi.address_hi; + src.address <<= VTD_MSI_ADDR_HI_SHIFT; + src.address |= route->u.msi.address_lo; + src.data = route->u.msi.data; + + ret = class->int_remap(iommu, &src, &dst, dev ? \ + pci_requester_id(dev) : \ + X86_IOMMU_SID_INVALID); + if (ret) { + trace_kvm_x86_fixup_msi_error(route->gsi); + return 1; + } + + route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; + route->u.msi.data = dst.data; + } + + return 0; +} + +typedef struct MSIRouteEntry MSIRouteEntry; + +struct MSIRouteEntry { + PCIDevice *dev; /* Device pointer */ + int vector; /* MSI/MSIX vector index */ + int virq; /* Virtual IRQ index */ + QLIST_ENTRY(MSIRouteEntry) list; +}; + +/* List of used GSI routes */ +static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \ + QLIST_HEAD_INITIALIZER(msi_route_list); + +static void kvm_update_msi_routes_all(void *private, bool global, + uint32_t index, uint32_t mask) +{ + int cnt = 0; + MSIRouteEntry *entry; + MSIMessage msg; + /* TODO: explicit route update */ + QLIST_FOREACH(entry, &msi_route_list, list) { + cnt++; + msg = pci_get_msi_message(entry->dev, entry->vector); + kvm_irqchip_update_msi_route(kvm_state, entry->virq, + msg, entry->dev); + } + kvm_irqchip_commit_routes(kvm_state); + trace_kvm_x86_update_msi_routes(cnt); +} + +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + static bool notify_list_inited = false; + MSIRouteEntry *entry; + + if (!dev) { + /* These are (possibly) IOAPIC routes only used for split + * kernel irqchip mode, while what we are housekeeping are + * PCI devices only. */ + return 0; + } + + entry = g_new0(MSIRouteEntry, 1); + entry->dev = dev; + entry->vector = vector; + entry->virq = route->gsi; + QLIST_INSERT_HEAD(&msi_route_list, entry, list); + + trace_kvm_x86_add_msi_route(route->gsi); + + if (!notify_list_inited) { + /* For the first time we do add route, add ourselves into + * IOMMU's IEC notify list if needed. */ + X86IOMMUState *iommu = x86_iommu_get_default(); + if (iommu) { + x86_iommu_iec_register_notifier(iommu, + kvm_update_msi_routes_all, + NULL); + } + notify_list_inited = true; + } + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + MSIRouteEntry *entry, *next; + QLIST_FOREACH_SAFE(entry, &msi_route_list, list, next) { + if (entry->virq == virq) { + trace_kvm_x86_remove_msi_route(virq); + QLIST_REMOVE(entry, list); + break; + } + } return 0; } diff --git a/target-i386/trace-events b/target-i386/trace-events new file mode 100644 index 0000000000..ccc49e31e8 --- /dev/null +++ b/target-i386/trace-events @@ -0,0 +1,7 @@ +# See docs/trace-events.txt for syntax documentation. + +# target-i386/kvm.c +kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32 +kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" +kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" +kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" diff --git a/target-mips/kvm.c b/target-mips/kvm.c index f3f832d498..dcf5fbba0c 100644 --- a/target-mips/kvm.c +++ b/target-mips/kvm.c @@ -1043,6 +1043,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { abort(); diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 7a8f5559d9..91e6daf4fd 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -2621,6 +2621,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { return data & 0xffff; diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 2991bff6a4..80ac6215fb 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -2267,6 +2267,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { abort(); diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c index 6df57d224b..7a628574c3 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c @@ -30,7 +30,7 @@ static void check_guest_memory(void) int i; /* Poll until code has run and modified memory. Wait at most 30 seconds */ - for (i = 0; i < 3000; ++i) { + for (i = 0; i < 10000; ++i) { signature = readl(ADDRESS); if (signature == MAGIC) { break; diff --git a/trace-events b/trace-events index 476705996b..52c6a6cccf 100644 --- a/trace-events +++ b/trace-events @@ -118,6 +118,9 @@ kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" +kvm_irqchip_commit_routes(void) "" +kvm_irqchip_add_msi_route(int virq) "Adding MSI route virq=%d" +kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" # TCG related tracing (mostly disabled by default) # cpu-exec.c