diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c index 0473ae866c..7c27834e06 100644 --- a/hw/vfio_pci.c +++ b/hw/vfio_pci.c @@ -185,6 +185,21 @@ static void vfio_unmask_intx(VFIODevice *vdev) ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); } +#ifdef CONFIG_KVM /* Unused outside of CONFIG_KVM code */ +static void vfio_mask_intx(VFIODevice *vdev) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, + .index = VFIO_PCI_INTX_IRQ_INDEX, + .start = 0, + .count = 1, + }; + + ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} +#endif + /* * Disabling BAR mmaping can be slow, but toggling it around INTx can * also be a huge overhead. We try to get the best of both worlds by @@ -248,6 +263,161 @@ static void vfio_eoi(VFIODevice *vdev) vfio_unmask_intx(vdev); } +static void vfio_enable_intx_kvm(VFIODevice *vdev) +{ +#ifdef CONFIG_KVM + struct kvm_irqfd irqfd = { + .fd = event_notifier_get_fd(&vdev->intx.interrupt), + .gsi = vdev->intx.route.irq, + .flags = KVM_IRQFD_FLAG_RESAMPLE, + }; + struct vfio_irq_set *irq_set; + int ret, argsz; + int32_t *pfd; + + if (!kvm_irqchip_in_kernel() || + vdev->intx.route.mode != PCI_INTX_ENABLED || + !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { + return; + } + + /* Get to a known interrupt state */ + qemu_set_fd_handler(irqfd.fd, NULL, NULL, vdev); + vfio_mask_intx(vdev); + vdev->intx.pending = false; + qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0); + + /* Get an eventfd for resample/unmask */ + if (event_notifier_init(&vdev->intx.unmask, 0)) { + error_report("vfio: Error: event_notifier_init failed eoi\n"); + goto fail; + } + + /* KVM triggers it, VFIO listens for it */ + irqfd.resamplefd = event_notifier_get_fd(&vdev->intx.unmask); + + if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) { + error_report("vfio: Error: Failed to setup resample irqfd: %m\n"); + goto fail_irqfd; + } + + argsz = sizeof(*irq_set) + sizeof(*pfd); + + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + + *pfd = irqfd.resamplefd; + + ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + g_free(irq_set); + if (ret) { + error_report("vfio: Error: Failed to setup INTx unmask fd: %m\n"); + goto fail_vfio; + } + + /* Let'em rip */ + vfio_unmask_intx(vdev); + + vdev->intx.kvm_accel = true; + + DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel enabled\n", + __func__, vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + + return; + +fail_vfio: + irqfd.flags = KVM_IRQFD_FLAG_DEASSIGN; + kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd); +fail_irqfd: + event_notifier_cleanup(&vdev->intx.unmask); +fail: + qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev); + vfio_unmask_intx(vdev); +#endif +} + +static void vfio_disable_intx_kvm(VFIODevice *vdev) +{ +#ifdef CONFIG_KVM + struct kvm_irqfd irqfd = { + .fd = event_notifier_get_fd(&vdev->intx.interrupt), + .gsi = vdev->intx.route.irq, + .flags = KVM_IRQFD_FLAG_DEASSIGN, + }; + + if (!vdev->intx.kvm_accel) { + return; + } + + /* + * Get to a known state, hardware masked, QEMU ready to accept new + * interrupts, QEMU IRQ de-asserted. + */ + vfio_mask_intx(vdev); + vdev->intx.pending = false; + qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0); + + /* Tell KVM to stop listening for an INTx irqfd */ + if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) { + error_report("vfio: Error: Failed to disable INTx irqfd: %m\n"); + } + + /* We only need to close the eventfd for VFIO to cleanup the kernel side */ + event_notifier_cleanup(&vdev->intx.unmask); + + /* QEMU starts listening for interrupt events. */ + qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev); + + vdev->intx.kvm_accel = false; + + /* If we've missed an event, let it re-fire through QEMU */ + vfio_unmask_intx(vdev); + + DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel disabled\n", + __func__, vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); +#endif +} + +static void vfio_update_irq(PCIDevice *pdev) +{ + VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + PCIINTxRoute route; + + if (vdev->interrupt != VFIO_INT_INTx) { + return; + } + + route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); + + if (!pci_intx_route_changed(&vdev->intx.route, &route)) { + return; /* Nothing changed */ + } + + DPRINTF("%s(%04x:%02x:%02x.%x) IRQ moved %d -> %d\n", __func__, + vdev->host.domain, vdev->host.bus, vdev->host.slot, + vdev->host.function, vdev->intx.route.irq, route.irq); + + vfio_disable_intx_kvm(vdev); + + vdev->intx.route = route; + + if (route.mode != PCI_INTX_ENABLED) { + return; + } + + vfio_enable_intx_kvm(vdev); + + /* Re-enable the interrupt in cased we missed an EOI */ + vfio_eoi(vdev); +} + static int vfio_enable_intx(VFIODevice *vdev) { uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); @@ -262,6 +432,18 @@ static int vfio_enable_intx(VFIODevice *vdev) vfio_disable_interrupts(vdev); vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ + +#ifdef CONFIG_KVM + /* + * Only conditional to avoid generating error messages on platforms + * where we won't actually use the result anyway. + */ + if (kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { + vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, + vdev->intx.pin); + } +#endif + ret = event_notifier_init(&vdev->intx.interrupt, 0); if (ret) { error_report("vfio: Error: event_notifier_init failed\n"); @@ -290,6 +472,8 @@ static int vfio_enable_intx(VFIODevice *vdev) return -errno; } + vfio_enable_intx_kvm(vdev); + vdev->interrupt = VFIO_INT_INTx; DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, @@ -303,6 +487,7 @@ static void vfio_disable_intx(VFIODevice *vdev) int fd; qemu_del_timer(vdev->intx.mmap_timer); + vfio_disable_intx_kvm(vdev); vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0); @@ -503,28 +688,6 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) vector->use = false; } -/* TODO This should move to msi.c */ -static MSIMessage msi_get_msg(PCIDevice *pdev, unsigned int vector) -{ - uint16_t flags = pci_get_word(pdev->config + pdev->msi_cap + PCI_MSI_FLAGS); - bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; - MSIMessage msg; - - if (msi64bit) { - msg.address = pci_get_quad(pdev->config + - pdev->msi_cap + PCI_MSI_ADDRESS_LO); - } else { - msg.address = pci_get_long(pdev->config + - pdev->msi_cap + PCI_MSI_ADDRESS_LO); - } - - msg.data = pci_get_word(pdev->config + pdev->msi_cap + - (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32)); - msg.data += vector; - - return msg; -} - static void vfio_enable_msix(VFIODevice *vdev) { vfio_disable_interrupts(vdev); @@ -563,7 +726,7 @@ retry: error_report("vfio: Error: event_notifier_init failed\n"); } - msg = msi_get_msg(&vdev->pdev, i); + msg = msi_get_message(&vdev->pdev, i); /* * Attempt to enable route through KVM irqchip, @@ -1839,6 +2002,7 @@ static int vfio_initfn(PCIDevice *pdev) if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { vdev->intx.mmap_timer = qemu_new_timer_ms(vm_clock, vfio_intx_mmap_enable, vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_update_irq); ret = vfio_enable_intx(vdev); if (ret) { goto out_teardown; diff --git a/linux-headers/asm-powerpc/kvm_para.h b/linux-headers/asm-powerpc/kvm_para.h index c047a84000..5e04383a1d 100644 --- a/linux-headers/asm-powerpc/kvm_para.h +++ b/linux-headers/asm-powerpc/kvm_para.h @@ -17,8 +17,8 @@ * Authors: Hollis Blanchard */ -#ifndef __POWERPC_KVM_PARA_H__ -#define __POWERPC_KVM_PARA_H__ +#ifndef _UAPI__POWERPC_KVM_PARA_H__ +#define _UAPI__POWERPC_KVM_PARA_H__ #include @@ -87,4 +87,4 @@ struct kvm_vcpu_arch_shared { #define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1) -#endif /* __POWERPC_KVM_PARA_H__ */ +#endif /* _UAPI__POWERPC_KVM_PARA_H__ */ diff --git a/linux-headers/asm-s390/kvm_para.h b/linux-headers/asm-s390/kvm_para.h index 870051f645..ff1f4e7b30 100644 --- a/linux-headers/asm-s390/kvm_para.h +++ b/linux-headers/asm-s390/kvm_para.h @@ -1,5 +1,5 @@ /* - * definition for paravirtual devices on s390 + * User API definitions for paravirtual devices on s390 * * Copyright IBM Corp. 2008 * @@ -9,9 +9,3 @@ * * Author(s): Christian Borntraeger */ - -#ifndef __S390_KVM_PARA_H -#define __S390_KVM_PARA_H - - -#endif /* __S390_KVM_PARA_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 246617efd6..a65ec29e6f 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -9,6 +9,22 @@ #include #include +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC @@ -25,6 +41,7 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS +#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 4b9e575dd0..81d2feb7ab 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -101,9 +101,13 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; -/* for kvm_memory_region::flags */ -#define KVM_MEM_LOG_DIRTY_PAGES 1UL -#define KVM_MEMSLOT_INVALID (1UL << 1) +/* + * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, + * other bits are reserved for kvm internal use which are defined in + * include/linux/kvm_host.h. + */ +#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) +#define KVM_MEM_READONLY (1UL << 1) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -618,6 +622,10 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 +#ifdef __KVM_HAVE_READONLY_MEM +#define KVM_CAP_READONLY_MEM 81 +#endif +#define KVM_CAP_IRQFD_RESAMPLE 82 #ifdef KVM_CAP_IRQ_ROUTING @@ -683,12 +691,21 @@ struct kvm_xen_hvm_config { #endif #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) +/* + * Available with KVM_CAP_IRQFD_RESAMPLE + * + * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies + * the irqfd to operate in resampling mode for level triggered interrupt + * emlation. See Documentation/virtual/kvm/api.txt. + */ +#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) struct kvm_irqfd { __u32 fd; __u32 gsi; __u32 flags; - __u8 pad[20]; + __u32 resamplefd; + __u8 pad[16]; }; struct kvm_clock_data { diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h index 7bdcf93c1d..cea2c5c72d 100644 --- a/linux-headers/linux/kvm_para.h +++ b/linux-headers/linux/kvm_para.h @@ -1,5 +1,5 @@ -#ifndef __LINUX_KVM_PARA_H -#define __LINUX_KVM_PARA_H +#ifndef _UAPI__LINUX_KVM_PARA_H +#define _UAPI__LINUX_KVM_PARA_H /* * This header file provides a method for making a hypercall to the host @@ -25,4 +25,4 @@ */ #include -#endif /* __LINUX_KVM_PARA_H */ +#endif /* _UAPI__LINUX_KVM_PARA_H */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index f787b727a9..4758d1bfcf 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -8,8 +8,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#ifndef VFIO_H -#define VFIO_H +#ifndef _UAPIVFIO_H +#define _UAPIVFIO_H #include #include @@ -365,4 +365,4 @@ struct vfio_iommu_type1_dma_unmap { #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) -#endif /* VFIO_H */ +#endif /* _UAPIVFIO_H */ diff --git a/linux-headers/linux/virtio_config.h b/linux-headers/linux/virtio_config.h index 4f51d8f3af..b7cda390fd 100644 --- a/linux-headers/linux/virtio_config.h +++ b/linux-headers/linux/virtio_config.h @@ -1,5 +1,5 @@ -#ifndef _LINUX_VIRTIO_CONFIG_H -#define _LINUX_VIRTIO_CONFIG_H +#ifndef _UAPI_LINUX_VIRTIO_CONFIG_H +#define _UAPI_LINUX_VIRTIO_CONFIG_H /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so * anyone can use the definitions to implement compatible drivers/servers. * @@ -51,4 +51,4 @@ * suppressed them? */ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 -#endif /* _LINUX_VIRTIO_CONFIG_H */ +#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ diff --git a/linux-headers/linux/virtio_ring.h b/linux-headers/linux/virtio_ring.h index 1b333e2536..921694a084 100644 --- a/linux-headers/linux/virtio_ring.h +++ b/linux-headers/linux/virtio_ring.h @@ -1,5 +1,5 @@ -#ifndef _LINUX_VIRTIO_RING_H -#define _LINUX_VIRTIO_RING_H +#ifndef _UAPI_LINUX_VIRTIO_RING_H +#define _UAPI_LINUX_VIRTIO_RING_H /* An interface for efficient virtio implementation, currently for use by KVM * and lguest, but hopefully others soon. Do NOT change this since it will * break existing servers and clients. @@ -160,4 +160,4 @@ static __inline__ int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); } -#endif /* _LINUX_VIRTIO_RING_H */ +#endif /* _UAPI_LINUX_VIRTIO_RING_H */