From df410675e5fad55e056fb505cba3a62cac13c411 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 25 Jun 2012 09:40:39 -0600 Subject: [PATCH 01/15] kvm: Don't abort on kvm_irqchip_add_msi_route() Anyone using these functions has to be prepared that irqchip support may not be present. It shouldn't be up to the core code to determine whether this is a fatal error. Currently code written as: virq = kvm_irqchip_add_msi_route(...) if (virq < 0) { } else { } works on x86 with and without kvm irqchip enabled, works without kvm support compiled in, but aborts() on !x86 with kvm support. Signed-off-by: Alex Williamson Acked-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index f8e432841f..a0c33b38de 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1142,7 +1142,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) { - abort(); + return -ENOSYS; } static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign) From bfee7546df51c08e395dc8a7676a5c7f20186fee Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 1 Jul 2012 18:08:30 +0300 Subject: [PATCH 02/15] kvm_pv_eoi: add flag support Support the new PV EOI flag in kvm - it recently got merged into kvm.git. Set by default with -cpu kvm. Set for -cpu qemu by adding +kvm_pv_eoi. Clear by adding -kvm_pv_eoi to -cpu option. Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti --- target-i386/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 5521709240..b3bcbacd76 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -77,7 +77,7 @@ static const char *ext3_feature_name[] = { }; static const char *kvm_feature_name[] = { - "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock", "kvm_asyncpf", NULL, NULL, NULL, + "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock", "kvm_asyncpf", NULL, "kvm_pv_eoi", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, From a75b3e0f641a68b7cdd4bbe4ed1b60f963bf2b62 Mon Sep 17 00:00:00 2001 From: "Liu, Jinsong" Date: Tue, 3 Jul 2012 02:35:10 +0800 Subject: [PATCH 03/15] kvm: expose tsc deadline timer feature to guest This patch exposes tsc deadline timer feature to guest if 1). in-kernel irqchip is used, and 2). kvm has emulated tsc deadline timer, and 3). user authorize the feature exposing via -cpu or +/- tsc-deadline Signed-off-by: Liu, Jinsong Reviewed-by: Eduardo Habkost Acked-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/cpu.h | 1 + target-i386/kvm.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index f257c972fb..33d221eae4 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -400,6 +400,7 @@ #define CPUID_EXT_X2APIC (1 << 21) #define CPUID_EXT_MOVBE (1 << 22) #define CPUID_EXT_POPCNT (1 << 23) +#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24) #define CPUID_EXT_XSAVE (1 << 26) #define CPUID_EXT_OSXSAVE (1 << 27) #define CPUID_EXT_HYPERVISOR (1 << 31) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 0d0d8f69d3..52b577fe2b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -361,8 +361,13 @@ int kvm_arch_init_vcpu(CPUX86State *env) env->cpuid_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR; + j = env->cpuid_ext_features & CPUID_EXT_TSC_DEADLINE_TIMER; env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX); env->cpuid_ext_features |= i; + if (j && kvm_irqchip_in_kernel() && + kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) { + env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER; + } env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX); From 3db3659bf60094657e1465cc809acb09551816ee Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 9 Jul 2012 16:42:30 +0200 Subject: [PATCH 04/15] apic: Resolve potential endless loop around apic_update_irq Commit d96e173769 refactored the reinjection of pending PIC interrupts. However, it missed the potential loop of apic_update_irq -> apic_deliver_pic_intr -> apic_local_deliver -> apic_set_irq -> apic_update_irq that /could/ occur if LINT0 is injected as APIC_DM_FIXED and that vector is currently blocked via TPR. Resolve this by reinjecting only where it matters: inside apic_get_interrupt. This function may clear a vector while a PIC-originated reason still exists. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/apic.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index 60552df619..e65a35fd7d 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -363,9 +363,6 @@ static void apic_update_irq(APICCommonState *s) } if (apic_irq_pending(s) > 0) { cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD); - } else if (apic_accept_pic_intr(&s->busdev.qdev) && - pic_get_output(isa_pic)) { - apic_deliver_pic_intr(&s->busdev.qdev, 1); } } @@ -560,7 +557,14 @@ int apic_get_interrupt(DeviceState *d) reset_bit(s->irr, intno); set_bit(s->isr, intno); apic_sync_vapic(s, SYNC_TO_VAPIC); + + /* re-inject if there is still a pending PIC interrupt */ + if (apic_accept_pic_intr(&s->busdev.qdev) && pic_get_output(isa_pic)) { + apic_deliver_pic_intr(&s->busdev.qdev, 1); + } + apic_update_irq(s); + return intno; } From a94820ddc36f8c452b37f9dcb323f55ffdbc75f9 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 9 Jul 2012 16:42:31 +0200 Subject: [PATCH 05/15] apic: Reevaluate pending interrupts on LVT_LINT0 changes When the guest modifies the LVT_LINT0 register, we need to check if some pending PIC interrupt can now be delivered. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/apic.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index e65a35fd7d..5b8f3e8150 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -532,6 +532,15 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode, apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); } +static bool apic_check_pic(APICCommonState *s) +{ + if (!apic_accept_pic_intr(&s->busdev.qdev) || !pic_get_output(isa_pic)) { + return false; + } + apic_deliver_pic_intr(&s->busdev.qdev, 1); + return true; +} + int apic_get_interrupt(DeviceState *d) { APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); @@ -559,9 +568,7 @@ int apic_get_interrupt(DeviceState *d) apic_sync_vapic(s, SYNC_TO_VAPIC); /* re-inject if there is still a pending PIC interrupt */ - if (apic_accept_pic_intr(&s->busdev.qdev) && pic_get_output(isa_pic)) { - apic_deliver_pic_intr(&s->busdev.qdev, 1); - } + apic_check_pic(s); apic_update_irq(s); @@ -804,8 +811,11 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) { int n = index - 0x32; s->lvt[n] = val; - if (n == APIC_LVT_TIMER) + if (n == APIC_LVT_TIMER) { apic_timer_update(s, qemu_get_clock_ns(vm_clock)); + } else if (n == APIC_LVT_LINT0 && apic_check_pic(s)) { + apic_update_irq(s); + } } break; case 0x38: From 5d62c43a17edaa7f6a88821c9086e6c8e0e5327d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 9 Jul 2012 16:42:32 +0200 Subject: [PATCH 06/15] apic: Defer interrupt updates to VCPU thread KVM performs TPR raising asynchronously to QEMU, specifically outside QEMU's global lock. When an interrupt is injected into the APIC and TPR is checked to decide if this can be delivered, a stale TPR value may be used, causing spurious interrupts in the end. Fix this by deferring apic_update_irq to the context of the target VCPU. We introduce a new interrupt flag for this, CPU_INTERRUPT_POLL. When it is set, the VCPU calls apic_poll_irq before checking for further pending interrupts. To avoid special-casing KVM, we also implement this logic for TCG mode. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- cpu-exec.c | 6 ++++++ hw/apic.c | 5 ++++- hw/apic.h | 1 + hw/apic_internal.h | 1 - target-i386/cpu.h | 4 +++- target-i386/kvm.c | 4 ++++ 6 files changed, 18 insertions(+), 3 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 08c35f72d4..fc185a4f04 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -288,6 +288,12 @@ int cpu_exec(CPUArchState *env) } #endif #if defined(TARGET_I386) +#if !defined(CONFIG_USER_ONLY) + if (interrupt_request & CPU_INTERRUPT_POLL) { + env->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(env->apic_state); + } +#endif if (interrupt_request & CPU_INTERRUPT_INIT) { cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0); diff --git a/hw/apic.c b/hw/apic.c index 5b8f3e8150..385555eb43 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -16,6 +16,7 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see */ +#include "qemu-thread.h" #include "apic_internal.h" #include "apic.h" #include "ioapic.h" @@ -361,7 +362,9 @@ static void apic_update_irq(APICCommonState *s) if (!(s->spurious_vec & APIC_SV_ENABLE)) { return; } - if (apic_irq_pending(s) > 0) { + if (!qemu_cpu_is_self(s->cpu_env)) { + cpu_interrupt(s->cpu_env, CPU_INTERRUPT_POLL); + } else if (apic_irq_pending(s) > 0) { cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD); } } diff --git a/hw/apic.h b/hw/apic.h index 62179cebee..a89542b231 100644 --- a/hw/apic.h +++ b/hw/apic.h @@ -20,6 +20,7 @@ void apic_init_reset(DeviceState *s); void apic_sipi(DeviceState *s); void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip, TPRAccess access); +void apic_poll_irq(DeviceState *d); /* pc.c */ int cpu_is_bsp(CPUX86State *env); diff --git a/hw/apic_internal.h b/hw/apic_internal.h index 60a6a8bdae..4d8ff490ce 100644 --- a/hw/apic_internal.h +++ b/hw/apic_internal.h @@ -141,7 +141,6 @@ void apic_report_irq_delivered(int delivered); bool apic_next_timer(APICCommonState *s, int64_t current_time); void apic_enable_tpr_access_reporting(DeviceState *d, bool enable); void apic_enable_vapic(DeviceState *d, target_phys_addr_t paddr); -void apic_poll_irq(DeviceState *d); void vapic_report_tpr_access(DeviceState *dev, void *cpu, target_ulong ip, TPRAccess access); diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 33d221eae4..2a61c810bb 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -478,6 +478,7 @@ for syscall instruction */ /* i386-specific interrupt pending bits. */ +#define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1 #define CPU_INTERRUPT_SMI CPU_INTERRUPT_TGT_EXT_2 #define CPU_INTERRUPT_NMI CPU_INTERRUPT_TGT_EXT_3 #define CPU_INTERRUPT_MCE CPU_INTERRUPT_TGT_EXT_4 @@ -1048,7 +1049,8 @@ static inline void cpu_clone_regs(CPUX86State *env, target_ulong newsp) static inline bool cpu_has_work(CPUX86State *env) { - return ((env->interrupt_request & CPU_INTERRUPT_HARD) && + return ((env->interrupt_request & (CPU_INTERRUPT_HARD | + CPU_INTERRUPT_POLL)) && (env->eflags & IF_MASK)) || (env->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_INIT | diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 52b577fe2b..e53c2f6bdf 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1732,6 +1732,10 @@ int kvm_arch_process_async_events(CPUX86State *env) return 0; } + if (env->interrupt_request & CPU_INTERRUPT_POLL) { + env->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(env->apic_state); + } if (((env->interrupt_request & CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || (env->interrupt_request & CPU_INTERRUPT_NMI)) { From 2ec10b952b40d287037a50387a8b66d9ccc5124b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:22 +0200 Subject: [PATCH 07/15] event_notifier: add event_notifier_set EventNotifier right now cannot be used as an inter-thread communication primitive. It only works if something else (the kernel) sets the eventfd. Add a primitive to signal an EventNotifier that another thread is waiting on. Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- event_notifier.c | 7 +++++++ event_notifier.h | 1 + 2 files changed, 8 insertions(+) diff --git a/event_notifier.c b/event_notifier.c index 0b829813d3..2b210f4b44 100644 --- a/event_notifier.c +++ b/event_notifier.c @@ -38,6 +38,13 @@ int event_notifier_get_fd(EventNotifier *e) return e->fd; } +int event_notifier_set(EventNotifier *e) +{ + uint64_t value = 1; + int r = write(e->fd, &value, sizeof(value)); + return r == sizeof(value); +} + int event_notifier_test_and_clear(EventNotifier *e) { uint64_t value; diff --git a/event_notifier.h b/event_notifier.h index 886222cb36..efca852d10 100644 --- a/event_notifier.h +++ b/event_notifier.h @@ -22,6 +22,7 @@ struct EventNotifier { int event_notifier_init(EventNotifier *, int active); void event_notifier_cleanup(EventNotifier *); int event_notifier_get_fd(EventNotifier *); +int event_notifier_set(EventNotifier *); int event_notifier_test_and_clear(EventNotifier *); int event_notifier_test(EventNotifier *); From ebe7acea532fba4a0ad7aaf74e59bff729867d93 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:23 +0200 Subject: [PATCH 08/15] event_notifier: remove event_notifier_test The function is useless (and unused). Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- event_notifier.c | 15 --------------- event_notifier.h | 1 - 2 files changed, 16 deletions(-) diff --git a/event_notifier.c b/event_notifier.c index 2b210f4b44..c339bfee50 100644 --- a/event_notifier.c +++ b/event_notifier.c @@ -51,18 +51,3 @@ int event_notifier_test_and_clear(EventNotifier *e) int r = read(e->fd, &value, sizeof(value)); return r == sizeof(value); } - -int event_notifier_test(EventNotifier *e) -{ - uint64_t value; - int r = read(e->fd, &value, sizeof(value)); - if (r == sizeof(value)) { - /* restore previous value. */ - int s = write(e->fd, &value, sizeof(value)); - /* never blocks because we use EFD_SEMAPHORE. - * If we didn't we'd get EAGAIN on overflow - * and we'd have to write code to ignore it. */ - assert(s == sizeof(value)); - } - return r == sizeof(value); -} diff --git a/event_notifier.h b/event_notifier.h index efca852d10..9b2edf41a7 100644 --- a/event_notifier.h +++ b/event_notifier.h @@ -24,6 +24,5 @@ void event_notifier_cleanup(EventNotifier *); int event_notifier_get_fd(EventNotifier *); int event_notifier_set(EventNotifier *); int event_notifier_test_and_clear(EventNotifier *); -int event_notifier_test(EventNotifier *); #endif From e80c262be7f821051a6d89b39fe21d951848dece Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:24 +0200 Subject: [PATCH 09/15] event_notifier: add event_notifier_init_fd Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- event_notifier.c | 7 +++++++ event_notifier.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/event_notifier.c b/event_notifier.c index c339bfee50..99c376c6a3 100644 --- a/event_notifier.c +++ b/event_notifier.c @@ -10,11 +10,18 @@ * See the COPYING file in the top-level directory. */ +#include "qemu-common.h" #include "event_notifier.h" + #ifdef CONFIG_EVENTFD #include #endif +void event_notifier_init_fd(EventNotifier *e, int fd) +{ + e->fd = fd; +} + int event_notifier_init(EventNotifier *e, int active) { #ifdef CONFIG_EVENTFD diff --git a/event_notifier.h b/event_notifier.h index 9b2edf41a7..fd3d9427b5 100644 --- a/event_notifier.h +++ b/event_notifier.h @@ -16,9 +16,10 @@ #include "qemu-common.h" struct EventNotifier { - int fd; + int fd; }; +void event_notifier_init_fd(EventNotifier *, int fd); int event_notifier_init(EventNotifier *, int active); void event_notifier_cleanup(EventNotifier *); int event_notifier_get_fd(EventNotifier *); From 563027cc0c94aa4846c18f9d665a4c90f8c42ba8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:25 +0200 Subject: [PATCH 10/15] ivshmem: use EventNotifier and memory API All of ivshmem's usage of eventfd now has a corresponding API in EventNotifier. Simplify the code by using it, and also use the memory API consistently to set up and tear down the ioeventfds. Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- hw/ivshmem.c | 63 +++++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/hw/ivshmem.c b/hw/ivshmem.c index 05559b639c..3cdbea2133 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -23,6 +23,7 @@ #include "kvm.h" #include "migration.h" #include "qerror.h" +#include "event_notifier.h" #include #include @@ -45,7 +46,7 @@ typedef struct Peer { int nb_eventfds; - int *eventfds; + EventNotifier *eventfds; } Peer; typedef struct EventfdEntry { @@ -63,7 +64,6 @@ typedef struct IVShmemState { CharDriverState *server_chr; MemoryRegion ivshmem_mmio; - pcibus_t mmio_addr; /* We might need to register the BAR before we actually have the memory. * So prepare a container MemoryRegion for the BAR immediately and * add a subregion when we have the memory. @@ -168,7 +168,6 @@ static void ivshmem_io_write(void *opaque, target_phys_addr_t addr, { IVShmemState *s = opaque; - uint64_t write_one = 1; uint16_t dest = val >> 16; uint16_t vector = val & 0xff; @@ -194,12 +193,8 @@ static void ivshmem_io_write(void *opaque, target_phys_addr_t addr, /* check doorbell range */ if (vector < s->peers[dest].nb_eventfds) { - IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n", - write_one, dest, vector); - if (write(s->peers[dest].eventfds[vector], - &(write_one), 8) != 8) { - IVSHMEM_DPRINTF("error writing to eventfd\n"); - } + IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); + event_notifier_set(&s->peers[dest].eventfds[vector]); } break; default: @@ -279,12 +274,13 @@ static void fake_irqfd(void *opaque, const uint8_t *buf, int size) { msix_notify(pdev, entry->vector); } -static CharDriverState* create_eventfd_chr_device(void * opaque, int eventfd, - int vector) +static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *n, + int vector) { /* create a event character device based on the passed eventfd */ IVShmemState *s = opaque; CharDriverState * chr; + int eventfd = event_notifier_get_fd(n); chr = qemu_chr_open_eventfd(eventfd); @@ -347,6 +343,26 @@ static void create_shared_memory_BAR(IVShmemState *s, int fd) { pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar); } +static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) +{ + memory_region_add_eventfd(&s->ivshmem_mmio, + DOORBELL, + 4, + true, + (posn << 16) | i, + event_notifier_get_fd(&s->peers[posn].eventfds[i])); +} + +static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) +{ + memory_region_del_eventfd(&s->ivshmem_mmio, + DOORBELL, + 4, + true, + (posn << 16) | i, + event_notifier_get_fd(&s->peers[posn].eventfds[i])); +} + static void close_guest_eventfds(IVShmemState *s, int posn) { int i, guest_curr_max; @@ -354,9 +370,8 @@ static void close_guest_eventfds(IVShmemState *s, int posn) guest_curr_max = s->peers[posn].nb_eventfds; for (i = 0; i < guest_curr_max; i++) { - kvm_set_ioeventfd_mmio(s->peers[posn].eventfds[i], - s->mmio_addr + DOORBELL, (posn << 16) | i, 0, 4); - close(s->peers[posn].eventfds[i]); + ivshmem_del_eventfd(s, posn, i); + event_notifier_cleanup(&s->peers[posn].eventfds[i]); } g_free(s->peers[posn].eventfds); @@ -369,12 +384,7 @@ static void setup_ioeventfds(IVShmemState *s) { for (i = 0; i <= s->max_peer; i++) { for (j = 0; j < s->peers[i].nb_eventfds; j++) { - memory_region_add_eventfd(&s->ivshmem_mmio, - DOORBELL, - 4, - true, - (i << 16) | j, - s->peers[i].eventfds[j]); + ivshmem_add_eventfd(s, i, j); } } } @@ -476,14 +486,14 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags) if (guest_max_eventfd == 0) { /* one eventfd per MSI vector */ - s->peers[incoming_posn].eventfds = (int *) g_malloc(s->vectors * - sizeof(int)); + s->peers[incoming_posn].eventfds = g_new(EventNotifier, s->vectors); } /* this is an eventfd for a particular guest VM */ IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn, guest_max_eventfd, incoming_fd); - s->peers[incoming_posn].eventfds[guest_max_eventfd] = incoming_fd; + event_notifier_init_fd(&s->peers[incoming_posn].eventfds[guest_max_eventfd], + incoming_fd); /* increment count for particular guest */ s->peers[incoming_posn].nb_eventfds++; @@ -495,15 +505,12 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags) if (incoming_posn == s->vm_id) { s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s, - s->peers[s->vm_id].eventfds[guest_max_eventfd], + &s->peers[s->vm_id].eventfds[guest_max_eventfd], guest_max_eventfd); } if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { - if (kvm_set_ioeventfd_mmio(incoming_fd, s->mmio_addr + DOORBELL, - (incoming_posn << 16) | guest_max_eventfd, 1, 4) < 0) { - fprintf(stderr, "ivshmem: ioeventfd not available\n"); - } + ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd); } return; From b6a1f3a56921c80cd04d8130e713028c7c91edc1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:26 +0200 Subject: [PATCH 11/15] ivshmem: wrap ivshmem_del_eventfd loops with transaction Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- hw/ivshmem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/ivshmem.c b/hw/ivshmem.c index 3cdbea2133..19e164a353 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -369,8 +369,12 @@ static void close_guest_eventfds(IVShmemState *s, int posn) guest_curr_max = s->peers[posn].nb_eventfds; + memory_region_transaction_begin(); for (i = 0; i < guest_curr_max; i++) { ivshmem_del_eventfd(s, posn, i); + } + memory_region_transaction_commit(); + for (i = 0; i < guest_curr_max; i++) { event_notifier_cleanup(&s->peers[posn].eventfds[i]); } From 753d5e14c4cd9e545242971c5d149fe5da0a5ba1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:27 +0200 Subject: [PATCH 12/15] memory: pass EventNotifier, not eventfd Under Win32, EventNotifiers will not have event_notifier_get_fd, so we cannot call it in common code such as hw/virtio-pci.c. Pass a pointer to the notifier, and only retrieve the file descriptor in kvm-specific code. Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- exec.c | 8 ++++---- hw/ivshmem.c | 4 ++-- hw/vhost.c | 4 ++-- hw/virtio-pci.c | 4 ++-- hw/xen_pt.c | 2 +- kvm-all.c | 19 +++++++++++++------ memory.c | 18 +++++++++--------- memory.h | 9 +++++---- xen-all.c | 6 ++++-- 9 files changed, 42 insertions(+), 32 deletions(-) diff --git a/exec.c b/exec.c index 8244d54a85..29b5078bbd 100644 --- a/exec.c +++ b/exec.c @@ -3212,13 +3212,13 @@ static void core_log_global_stop(MemoryListener *listener) static void core_eventfd_add(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, EventNotifier *e) { } static void core_eventfd_del(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, EventNotifier *e) { } @@ -3278,13 +3278,13 @@ static void io_log_global_stop(MemoryListener *listener) static void io_eventfd_add(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, EventNotifier *e) { } static void io_eventfd_del(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, EventNotifier *e) { } diff --git a/hw/ivshmem.c b/hw/ivshmem.c index 19e164a353..bba21c55e2 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -350,7 +350,7 @@ static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 4, true, (posn << 16) | i, - event_notifier_get_fd(&s->peers[posn].eventfds[i])); + &s->peers[posn].eventfds[i]); } static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) @@ -360,7 +360,7 @@ static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 4, true, (posn << 16) | i, - event_notifier_get_fd(&s->peers[posn].eventfds[i])); + &s->peers[posn].eventfds[i]); } static void close_guest_eventfds(IVShmemState *s, int posn) diff --git a/hw/vhost.c b/hw/vhost.c index 43664e7f4d..0fd8da84e2 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -737,13 +737,13 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev, static void vhost_eventfd_add(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, EventNotifier *e) { } static void vhost_eventfd_del(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, EventNotifier *e) { } diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 9342eed070..a555728b25 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -174,10 +174,10 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, return r; } memory_region_add_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2, - true, n, event_notifier_get_fd(notifier)); + true, n, notifier); } else { memory_region_del_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2, - true, n, event_notifier_get_fd(notifier)); + true, n, notifier); /* Handle the race condition where the guest kicked and we deassigned * before we got around to handling the kick. */ diff --git a/hw/xen_pt.c b/hw/xen_pt.c index 3b6d1867ab..fdf68aa564 100644 --- a/hw/xen_pt.c +++ b/hw/xen_pt.c @@ -634,7 +634,7 @@ static void xen_pt_log_global_fns(MemoryListener *l) } static void xen_pt_eventfd_fns(MemoryListener *l, MemoryRegionSection *s, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, EventNotifier *n) { } diff --git a/kvm-all.c b/kvm-all.c index a0c33b38de..5a386b45f9 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -32,6 +32,7 @@ #include "bswap.h" #include "memory.h" #include "exec-memory.h" +#include "event_notifier.h" /* This check must be after config-host.h is included */ #ifdef CONFIG_EVENTFD @@ -800,23 +801,29 @@ static void kvm_io_ioeventfd_del(MemoryRegionSection *section, static void kvm_eventfd_add(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, + EventNotifier *e) { if (section->address_space == get_system_memory()) { - kvm_mem_ioeventfd_add(section, match_data, data, fd); + kvm_mem_ioeventfd_add(section, match_data, data, + event_notifier_get_fd(e)); } else { - kvm_io_ioeventfd_add(section, match_data, data, fd); + kvm_io_ioeventfd_add(section, match_data, data, + event_notifier_get_fd(e)); } } static void kvm_eventfd_del(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, + EventNotifier *e) { if (section->address_space == get_system_memory()) { - kvm_mem_ioeventfd_del(section, match_data, data, fd); + kvm_mem_ioeventfd_del(section, match_data, data, + event_notifier_get_fd(e)); } else { - kvm_io_ioeventfd_del(section, match_data, data, fd); + kvm_io_ioeventfd_del(section, match_data, data, + event_notifier_get_fd(e)); } } diff --git a/memory.c b/memory.c index aab4a31323..643871bafa 100644 --- a/memory.c +++ b/memory.c @@ -156,7 +156,7 @@ struct MemoryRegionIoeventfd { AddrRange addr; bool match_data; uint64_t data; - int fd; + EventNotifier *e; }; static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a, @@ -181,9 +181,9 @@ static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a, return false; } } - if (a.fd < b.fd) { + if (a.e < b.e) { return true; - } else if (a.fd > b.fd) { + } else if (a.e > b.e) { return false; } return false; @@ -597,7 +597,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as, .size = int128_get64(fd->addr.size), }; MEMORY_LISTENER_CALL(eventfd_del, Forward, §ion, - fd->match_data, fd->data, fd->fd); + fd->match_data, fd->data, fd->e); ++iold; } else if (inew < fds_new_nb && (iold == fds_old_nb @@ -610,7 +610,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as, .size = int128_get64(fd->addr.size), }; MEMORY_LISTENER_CALL(eventfd_add, Reverse, §ion, - fd->match_data, fd->data, fd->fd); + fd->match_data, fd->data, fd->e); ++inew; } else { ++iold; @@ -1195,14 +1195,14 @@ void memory_region_add_eventfd(MemoryRegion *mr, unsigned size, bool match_data, uint64_t data, - int fd) + EventNotifier *e) { MemoryRegionIoeventfd mrfd = { .addr.start = int128_make64(addr), .addr.size = int128_make64(size), .match_data = match_data, .data = data, - .fd = fd, + .e = e, }; unsigned i; @@ -1225,14 +1225,14 @@ void memory_region_del_eventfd(MemoryRegion *mr, unsigned size, bool match_data, uint64_t data, - int fd) + EventNotifier *e) { MemoryRegionIoeventfd mrfd = { .addr.start = int128_make64(addr), .addr.size = int128_make64(size), .match_data = match_data, .data = data, - .fd = fd, + .e = e, }; unsigned i; diff --git a/memory.h b/memory.h index 740c48e8e5..bd1bbaeabe 100644 --- a/memory.h +++ b/memory.h @@ -198,9 +198,9 @@ struct MemoryListener { void (*log_global_start)(MemoryListener *listener); void (*log_global_stop)(MemoryListener *listener); void (*eventfd_add)(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd); + bool match_data, uint64_t data, EventNotifier *e); void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd); + bool match_data, uint64_t data, EventNotifier *e); /* Lower = earlier (during add), later (during del) */ unsigned priority; MemoryRegion *address_space_filter; @@ -541,7 +541,7 @@ void memory_region_add_eventfd(MemoryRegion *mr, unsigned size, bool match_data, uint64_t data, - int fd); + EventNotifier *e); /** * memory_region_del_eventfd: Cancel an eventfd. @@ -561,7 +561,8 @@ void memory_region_del_eventfd(MemoryRegion *mr, unsigned size, bool match_data, uint64_t data, - int fd); + EventNotifier *e); + /** * memory_region_add_subregion: Add a subregion to a container. * diff --git a/xen-all.c b/xen-all.c index 59f232395e..61def2ec8f 100644 --- a/xen-all.c +++ b/xen-all.c @@ -560,13 +560,15 @@ static void xen_log_global_stop(MemoryListener *listener) static void xen_eventfd_add(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, + EventNotifier *e) { } static void xen_eventfd_del(MemoryListener *listener, MemoryRegionSection *section, - bool match_data, uint64_t data, int fd) + bool match_data, uint64_t data, + EventNotifier *e) { } From 6bf819f0a2a77dc8504651f369293f0efac54fe5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:28 +0200 Subject: [PATCH 13/15] event_notifier: add event_notifier_set_handler Win32 event notifiers are not file descriptors, so they will not be able to use qemu_set_fd_handler. But even if for now we only have a POSIX version of EventNotifier, we can add a specific function that wraps the call. The wrapper passes the EventNotifier as the opaque value so that it will be used with container_of. Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- event_notifier.c | 7 +++++++ event_notifier.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/event_notifier.c b/event_notifier.c index 99c376c6a3..2c207e1399 100644 --- a/event_notifier.c +++ b/event_notifier.c @@ -12,6 +12,7 @@ #include "qemu-common.h" #include "event_notifier.h" +#include "qemu-char.h" #ifdef CONFIG_EVENTFD #include @@ -45,6 +46,12 @@ int event_notifier_get_fd(EventNotifier *e) return e->fd; } +int event_notifier_set_handler(EventNotifier *e, + EventNotifierHandler *handler) +{ + return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e); +} + int event_notifier_set(EventNotifier *e) { uint64_t value = 1; diff --git a/event_notifier.h b/event_notifier.h index fd3d9427b5..f0ec2f2171 100644 --- a/event_notifier.h +++ b/event_notifier.h @@ -19,11 +19,14 @@ struct EventNotifier { int fd; }; +typedef void EventNotifierHandler(EventNotifier *); + void event_notifier_init_fd(EventNotifier *, int fd); int event_notifier_init(EventNotifier *, int active); void event_notifier_cleanup(EventNotifier *); int event_notifier_get_fd(EventNotifier *); int event_notifier_set(EventNotifier *); int event_notifier_test_and_clear(EventNotifier *); +int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *); #endif From b1f416aa8d870fab71030abc9401cfc77b948e8e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:29 +0200 Subject: [PATCH 14/15] virtio: move common ioeventfd handling out of virtio-pci All transports can use the same event handler for the ioeventfd, though the exact setup (address/memory region) will be specific. This lets virtio use event_notifier_set_handler. Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- hw/virtio-pci.c | 36 ++---------------------------------- hw/virtio.c | 22 ++++++++++++++++++++++ hw/virtio.h | 1 + 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index a555728b25..36770fd8f7 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -173,46 +173,18 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, __func__, r); return r; } + virtio_queue_set_host_notifier_fd_handler(vq, true); memory_region_add_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2, true, n, notifier); } else { memory_region_del_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2, true, n, notifier); - /* Handle the race condition where the guest kicked and we deassigned - * before we got around to handling the kick. - */ - if (event_notifier_test_and_clear(notifier)) { - virtio_queue_notify_vq(vq); - } - + virtio_queue_set_host_notifier_fd_handler(vq, false); event_notifier_cleanup(notifier); } return r; } -static void virtio_pci_host_notifier_read(void *opaque) -{ - VirtQueue *vq = opaque; - EventNotifier *n = virtio_queue_get_host_notifier(vq); - if (event_notifier_test_and_clear(n)) { - virtio_queue_notify_vq(vq); - } -} - -static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy, - int n, bool assign) -{ - VirtQueue *vq = virtio_get_queue(proxy->vdev, n); - EventNotifier *notifier = virtio_queue_get_host_notifier(vq); - if (assign) { - qemu_set_fd_handler(event_notifier_get_fd(notifier), - virtio_pci_host_notifier_read, NULL, vq); - } else { - qemu_set_fd_handler(event_notifier_get_fd(notifier), - NULL, NULL, NULL); - } -} - static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy) { int n, r; @@ -232,8 +204,6 @@ static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy) if (r < 0) { goto assign_error; } - - virtio_pci_set_host_notifier_fd_handler(proxy, n, true); } proxy->ioeventfd_started = true; return; @@ -244,7 +214,6 @@ assign_error: continue; } - virtio_pci_set_host_notifier_fd_handler(proxy, n, false); r = virtio_pci_set_host_notifier_internal(proxy, n, false); assert(r >= 0); } @@ -266,7 +235,6 @@ static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy) continue; } - virtio_pci_set_host_notifier_fd_handler(proxy, n, false); r = virtio_pci_set_host_notifier_internal(proxy, n, false); assert(r >= 0); } diff --git a/hw/virtio.c b/hw/virtio.c index 168abe4864..197edf00f4 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -988,6 +988,28 @@ EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) { return &vq->guest_notifier; } + +static void virtio_queue_host_notifier_read(EventNotifier *n) +{ + VirtQueue *vq = container_of(n, VirtQueue, host_notifier); + if (event_notifier_test_and_clear(n)) { + virtio_queue_notify_vq(vq); + } +} + +void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign) +{ + if (assign) { + event_notifier_set_handler(&vq->host_notifier, + virtio_queue_host_notifier_read); + } else { + event_notifier_set_handler(&vq->host_notifier, NULL); + /* Test and clear notifier before after disabling event, + * in case poll callback didn't have time to run. */ + virtio_queue_host_notifier_read(&vq->host_notifier); + } +} + EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) { return &vq->host_notifier; diff --git a/hw/virtio.h b/hw/virtio.h index 85aabe53d8..294948519c 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -232,6 +232,7 @@ VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n); int virtio_queue_get_id(VirtQueue *vq); EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); +void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign); void virtio_queue_notify_vq(VirtQueue *vq); void virtio_irq(VirtQueue *vq); #endif From 15b2bd1847239fe0b4a1041b69a631741d2e273a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jul 2012 17:16:30 +0200 Subject: [PATCH 15/15] virtio: move common irqfd handling out of virtio-pci All transports can use the same event handler for the irqfd, though the exact mechanics of the assignment will be specific. Note that there are three states: handled by the kernel, handled in userspace, disabled. This also lets virtio use event_notifier_set_handler. Signed-off-by: Paolo Bonzini Signed-off-by: Avi Kivity --- hw/virtio-pci.c | 37 ++++++++++--------------------------- hw/virtio.c | 24 ++++++++++++++++++++++++ hw/virtio.h | 2 ++ kvm-all.c | 10 ++++++++++ kvm-stub.c | 10 ++++++++++ kvm.h | 2 ++ 6 files changed, 58 insertions(+), 27 deletions(-) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 36770fd8f7..a66c946f48 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -496,25 +496,15 @@ static unsigned virtio_pci_get_features(void *opaque) return proxy->host_features; } -static void virtio_pci_guest_notifier_read(void *opaque) -{ - VirtQueue *vq = opaque; - EventNotifier *n = virtio_queue_get_guest_notifier(vq); - if (event_notifier_test_and_clear(n)) { - virtio_irq(vq); - } -} - static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, unsigned int queue_no, unsigned int vector, MSIMessage msg) { VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + EventNotifier *n = virtio_queue_get_guest_notifier(vq); VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; - int fd, ret; - - fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); + int ret; if (irqfd->users == 0) { ret = kvm_irqchip_add_msi_route(kvm_state, msg); @@ -525,7 +515,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, } irqfd->users++; - ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq); + ret = kvm_irqchip_add_irq_notifier(kvm_state, n, irqfd->virq); if (ret < 0) { if (--irqfd->users == 0) { kvm_irqchip_release_virq(kvm_state, irqfd->virq); @@ -533,8 +523,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, return ret; } - qemu_set_fd_handler(fd, NULL, NULL, NULL); - + virtio_queue_set_guest_notifier_fd_handler(vq, true, true); return 0; } @@ -543,19 +532,18 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, unsigned int vector) { VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + EventNotifier *n = virtio_queue_get_guest_notifier(vq); VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; - int fd, ret; + int ret; - fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); - - ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq); + ret = kvm_irqchip_remove_irq_notifier(kvm_state, n, irqfd->virq); assert(ret == 0); if (--irqfd->users == 0) { kvm_irqchip_release_virq(kvm_state, irqfd->virq); } - qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq); + virtio_queue_set_guest_notifier_fd_handler(vq, true, false); } static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector, @@ -617,14 +605,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) if (r < 0) { return r; } - qemu_set_fd_handler(event_notifier_get_fd(notifier), - virtio_pci_guest_notifier_read, NULL, vq); + virtio_queue_set_guest_notifier_fd_handler(vq, true, false); } else { - qemu_set_fd_handler(event_notifier_get_fd(notifier), - NULL, NULL, NULL); - /* Test and clear notifier before closing it, - * in case poll callback didn't have time to run. */ - virtio_pci_guest_notifier_read(vq); + virtio_queue_set_guest_notifier_fd_handler(vq, false, false); event_notifier_cleanup(notifier); } diff --git a/hw/virtio.c b/hw/virtio.c index 197edf00f4..d146f86f13 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -984,6 +984,30 @@ VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) return vdev->vq + n; } +static void virtio_queue_guest_notifier_read(EventNotifier *n) +{ + VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); + if (event_notifier_test_and_clear(n)) { + virtio_irq(vq); + } +} + +void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + bool with_irqfd) +{ + if (assign && !with_irqfd) { + event_notifier_set_handler(&vq->guest_notifier, + virtio_queue_guest_notifier_read); + } else { + event_notifier_set_handler(&vq->guest_notifier, NULL); + } + if (!assign) { + /* Test and clear notifier before closing it, + * in case poll callback didn't have time to run. */ + virtio_queue_guest_notifier_read(&vq->guest_notifier); + } +} + EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) { return &vq->guest_notifier; diff --git a/hw/virtio.h b/hw/virtio.h index 294948519c..96f4dbb8d9 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -231,6 +231,8 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx); VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n); int virtio_queue_get_id(VirtQueue *vq); EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); +void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + bool with_irqfd); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign); void virtio_queue_notify_vq(VirtQueue *vq); diff --git a/kvm-all.c b/kvm-all.c index 5a386b45f9..add24a14a9 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1163,11 +1163,21 @@ int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq) return kvm_irqchip_assign_irqfd(s, fd, virq, true); } +int kvm_irqchip_add_irq_notifier(KVMState *s, EventNotifier *n, int virq) +{ + return kvm_irqchip_add_irqfd(s, event_notifier_get_fd(n), virq); +} + int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq) { return kvm_irqchip_assign_irqfd(s, fd, virq, false); } +int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq) +{ + return kvm_irqchip_remove_irqfd(s, event_notifier_get_fd(n), virq); +} + static int kvm_irqchip_create(KVMState *s) { QemuOptsList *list = qemu_find_opts("machine"); diff --git a/kvm-stub.c b/kvm-stub.c index ec9a36454d..d23b11c020 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -147,7 +147,17 @@ int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq) return -ENOSYS; } +int kvm_irqchip_add_irq_notifier(KVMState *s, EventNotifier *n, int virq) +{ + return -ENOSYS; +} + int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq) { return -ENOSYS; } + +int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq) +{ + return -ENOSYS; +} diff --git a/kvm.h b/kvm.h index 9c7b0ea6ae..99003f459a 100644 --- a/kvm.h +++ b/kvm.h @@ -218,4 +218,6 @@ void kvm_irqchip_release_virq(KVMState *s, int virq); int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq); int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq); +int kvm_irqchip_add_irq_notifier(KVMState *s, EventNotifier *n, int virq); +int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq); #endif