kvm: x86: add support for KVM_CAP_SPLIT_IRQCHIP

This patch adds support for split IRQ chip mode. When
KVM_CAP_SPLIT_IRQCHIP is enabled:

    1.) The PIC, PIT, and IOAPIC are implemented in userspace while
    the LAPIC is implemented by KVM.

    2.) The software IOAPIC delivers interrupts to the KVM LAPIC via
    kvm_set_irq. Interrupt delivery is configured via the MSI routing
    table, for which routes are reserved in target-i386/kvm.c then
    configured in hw/intc/ioapic.c

    3.) KVM delivers IOAPIC EOIs via a new exit KVM_EXIT_IOAPIC_EOI,
    which is handled in target-i386/kvm.c and relayed to the software
    IOAPIC via ioapic_eoi_broadcast.

Signed-off-by: Matt Gingell <gingell@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2015-12-17 17:16:08 +01:00
parent 32c18a2dba
commit 15eafc2e60
11 changed files with 148 additions and 14 deletions

View file

@ -65,6 +65,7 @@
#include "hw/mem/pc-dimm.h" #include "hw/mem/pc-dimm.h"
#include "qapi/visitor.h" #include "qapi/visitor.h"
#include "qapi-visit.h" #include "qapi-visit.h"
#include "qom/cpu.h"
/* debug PC/ISA interrupts */ /* debug PC/ISA interrupts */
//#define DEBUG_IRQ //#define DEBUG_IRQ
@ -1517,7 +1518,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
qemu_register_boot_set(pc_boot_set, *rtc_state); qemu_register_boot_set(pc_boot_set, *rtc_state);
if (!xen_enabled()) { if (!xen_enabled()) {
if (kvm_irqchip_in_kernel()) { if (kvm_pit_in_kernel()) {
pit = kvm_pit_init(isa_bus, 0x40); pit = kvm_pit_init(isa_bus, 0x40);
} else { } else {
pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq); pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
@ -1592,7 +1593,7 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
SysBusDevice *d; SysBusDevice *d;
unsigned int i; unsigned int i;
if (kvm_irqchip_in_kernel()) { if (kvm_ioapic_in_kernel()) {
dev = qdev_create(NULL, "kvm-ioapic"); dev = qdev_create(NULL, "kvm-ioapic");
} else { } else {
dev = qdev_create(NULL, "ioapic"); dev = qdev_create(NULL, "ioapic");

View file

@ -182,7 +182,7 @@ static void pc_init1(MachineState *machine,
} }
gsi_state = g_malloc0(sizeof(*gsi_state)); gsi_state = g_malloc0(sizeof(*gsi_state));
if (kvm_irqchip_in_kernel()) { if (kvm_ioapic_in_kernel()) {
kvm_pc_setup_irq_routing(pci_enabled); kvm_pc_setup_irq_routing(pci_enabled);
gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
GSI_NUM_PINS); GSI_NUM_PINS);
@ -206,7 +206,7 @@ static void pc_init1(MachineState *machine,
} }
isa_bus_irqs(isa_bus, gsi); isa_bus_irqs(isa_bus, gsi);
if (kvm_irqchip_in_kernel()) { if (kvm_pic_in_kernel()) {
i8259 = kvm_i8259_init(isa_bus); i8259 = kvm_i8259_init(isa_bus);
} else if (xen_enabled()) { } else if (xen_enabled()) {
i8259 = xen_interrupt_controller_init(); i8259 = xen_interrupt_controller_init();

View file

@ -25,6 +25,8 @@
#include "hw/i386/pc.h" #include "hw/i386/pc.h"
#include "hw/i386/ioapic.h" #include "hw/i386/ioapic.h"
#include "hw/i386/ioapic_internal.h" #include "hw/i386/ioapic_internal.h"
#include "include/hw/pci/msi.h"
#include "sysemu/kvm.h"
//#define DEBUG_IOAPIC //#define DEBUG_IOAPIC
@ -35,6 +37,10 @@
#define DPRINTF(fmt, ...) #define DPRINTF(fmt, ...)
#endif #endif
#define APIC_DELIVERY_MODE_SHIFT 8
#define APIC_POLARITY_SHIFT 14
#define APIC_TRIG_MODE_SHIFT 15
static IOAPICCommonState *ioapics[MAX_IOAPICS]; static IOAPICCommonState *ioapics[MAX_IOAPICS];
/* global variable from ioapic_common.c */ /* global variable from ioapic_common.c */
@ -54,6 +60,8 @@ static void ioapic_service(IOAPICCommonState *s)
for (i = 0; i < IOAPIC_NUM_PINS; i++) { for (i = 0; i < IOAPIC_NUM_PINS; i++) {
mask = 1 << i; mask = 1 << i;
if (s->irr & mask) { if (s->irr & mask) {
int coalesce = 0;
entry = s->ioredtbl[i]; entry = s->ioredtbl[i];
if (!(entry & IOAPIC_LVT_MASKED)) { if (!(entry & IOAPIC_LVT_MASKED)) {
trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1);
@ -64,6 +72,7 @@ static void ioapic_service(IOAPICCommonState *s)
if (trig_mode == IOAPIC_TRIGGER_EDGE) { if (trig_mode == IOAPIC_TRIGGER_EDGE) {
s->irr &= ~mask; s->irr &= ~mask;
} else { } else {
coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR; s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
} }
if (delivery_mode == IOAPIC_DM_EXTINT) { if (delivery_mode == IOAPIC_DM_EXTINT) {
@ -71,8 +80,23 @@ static void ioapic_service(IOAPICCommonState *s)
} else { } else {
vector = entry & IOAPIC_VECTOR_MASK; vector = entry & IOAPIC_VECTOR_MASK;
} }
apic_deliver_irq(dest, dest_mode, delivery_mode, #ifdef CONFIG_KVM
vector, trig_mode); if (kvm_irqchip_is_split()) {
if (trig_mode == IOAPIC_TRIGGER_EDGE) {
kvm_set_irq(kvm_state, i, 1);
kvm_set_irq(kvm_state, i, 0);
} else {
if (!coalesce) {
kvm_set_irq(kvm_state, i, 1);
}
}
continue;
}
#else
(void)coalesce;
#endif
apic_deliver_irq(dest, dest_mode, delivery_mode, vector,
trig_mode);
} }
} }
} }
@ -116,6 +140,44 @@ static void ioapic_set_irq(void *opaque, int vector, int level)
} }
} }
static void ioapic_update_kvm_routes(IOAPICCommonState *s)
{
#ifdef CONFIG_KVM
int i;
if (kvm_irqchip_is_split()) {
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
uint64_t entry = s->ioredtbl[i];
uint8_t trig_mode;
uint8_t delivery_mode;
uint8_t dest;
uint8_t dest_mode;
uint64_t pin_polarity;
MSIMessage msg;
trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1);
dest = entry >> IOAPIC_LVT_DEST_SHIFT;
dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
pin_polarity = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
delivery_mode =
(entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK;
msg.address = APIC_DEFAULT_ADDRESS;
msg.address |= dest_mode << 2;
msg.address |= dest << 12;
msg.data = entry & IOAPIC_VECTOR_MASK;
msg.data |= delivery_mode << APIC_DELIVERY_MODE_SHIFT;
msg.data |= pin_polarity << APIC_POLARITY_SHIFT;
msg.data |= trig_mode << APIC_TRIG_MODE_SHIFT;
kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
}
kvm_irqchip_commit_routes(kvm_state);
}
#endif
}
void ioapic_eoi_broadcast(int vector) void ioapic_eoi_broadcast(int vector)
{ {
IOAPICCommonState *s; IOAPICCommonState *s;
@ -229,6 +291,8 @@ ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
} }
break; break;
} }
ioapic_update_kvm_routes(s);
} }
static const MemoryRegionOps ioapic_io_ops = { static const MemoryRegionOps ioapic_io_ops = {

View file

@ -20,6 +20,19 @@
#define HPET_INTCAP "hpet-intcap" #define HPET_INTCAP "hpet-intcap"
#ifdef CONFIG_KVM
#define kvm_pit_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
#define kvm_pic_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
#define kvm_ioapic_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
#else
#define kvm_pit_in_kernel() 0
#define kvm_pic_in_kernel() 0
#define kvm_ioapic_in_kernel() 0
#endif
/** /**
* PCMachineState: * PCMachineState:
* @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling

View file

@ -174,6 +174,7 @@ extern bool kvm_ioeventfd_any_length_allowed;
#else #else
#define kvm_enabled() (0) #define kvm_enabled() (0)
#define kvm_irqchip_in_kernel() (false) #define kvm_irqchip_in_kernel() (false)
#define kvm_irqchip_is_split() (false)
#define kvm_async_interrupts_enabled() (false) #define kvm_async_interrupts_enabled() (false)
#define kvm_halt_in_kernel() (false) #define kvm_halt_in_kernel() (false)
#define kvm_eventfds_enabled() (false) #define kvm_eventfds_enabled() (false)
@ -317,6 +318,8 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run);
int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run);
int kvm_arch_handle_ioapic_eoi(CPUState *cpu, struct kvm_run *run);
int kvm_arch_process_async_events(CPUState *cpu); int kvm_arch_process_async_events(CPUState *cpu);
int kvm_arch_get_registers(CPUState *cpu); int kvm_arch_get_registers(CPUState *cpu);
@ -484,6 +487,7 @@ void kvm_init_irq_routing(KVMState *s);
/** /**
* kvm_arch_irqchip_create: * kvm_arch_irqchip_create:
* @KVMState: The KVMState pointer * @KVMState: The KVMState pointer
* @MachineState: The MachineState pointer
* *
* Allow architectures to create an in-kernel irq chip themselves. * Allow architectures to create an in-kernel irq chip themselves.
* *
@ -491,7 +495,7 @@ void kvm_init_irq_routing(KVMState *s);
* 0: irq chip was not created * 0: irq chip was not created
* > 0: irq chip was created * > 0: irq chip was created
*/ */
int kvm_arch_irqchip_create(KVMState *s); int kvm_arch_irqchip_create(MachineState *ms, KVMState *s);
/** /**
* kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl

View file

@ -99,6 +99,7 @@ struct KVMState
KVMState *kvm_state; KVMState *kvm_state;
bool kvm_kernel_irqchip; bool kvm_kernel_irqchip;
bool kvm_split_irqchip;
bool kvm_async_interrupts_allowed; bool kvm_async_interrupts_allowed;
bool kvm_halt_in_kernel_allowed; bool kvm_halt_in_kernel_allowed;
bool kvm_eventfds_allowed; bool kvm_eventfds_allowed;
@ -1430,9 +1431,14 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s)
/* First probe and see if there's a arch-specific hook to create the /* First probe and see if there's a arch-specific hook to create the
* in-kernel irqchip for us */ * in-kernel irqchip for us */
ret = kvm_arch_irqchip_create(s); ret = kvm_arch_irqchip_create(machine, s);
if (ret == 0) { if (ret == 0) {
ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); if (machine_kernel_irqchip_split(machine)) {
perror("Split IRQ chip mode not supported.");
exit(1);
} else {
ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
}
} }
if (ret < 0) { if (ret < 0) {
fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret)); fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));

View file

@ -1,7 +1,7 @@
#include "qemu-common.h" #include "qemu-common.h"
#include "sysemu/kvm.h" #include "sysemu/kvm.h"
int kvm_arch_irqchip_create(KVMState *s) int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
{ {
return 0; return 0;
} }

View file

@ -25,6 +25,7 @@
#include "internals.h" #include "internals.h"
#include "hw/arm/arm.h" #include "hw/arm/arm.h"
#include "exec/memattrs.h" #include "exec/memattrs.h"
#include "hw/boards.h"
const KVMCapabilityInfo kvm_arch_required_capabilities[] = { const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
KVM_CAP_LAST_INFO KVM_CAP_LAST_INFO
@ -578,8 +579,13 @@ void kvm_arch_init_irq_routing(KVMState *s)
{ {
} }
int kvm_arch_irqchip_create(KVMState *s) int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
{ {
if (machine_kernel_irqchip_split(ms)) {
perror("-machine kernel_irqchip=split is not supported on ARM.");
exit(1);
}
/* If we can create the VGIC using the newer device control API, we /* If we can create the VGIC using the newer device control API, we
* let the device do this when it initializes itself, otherwise we * let the device do this when it initializes itself, otherwise we
* fall back to the old API */ * fall back to the old API */

View file

@ -2743,7 +2743,7 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
APICCommonState *apic; APICCommonState *apic;
const char *apic_type = "apic"; const char *apic_type = "apic";
if (kvm_irqchip_in_kernel()) { if (kvm_apic_in_kernel()) {
apic_type = "kvm-apic"; apic_type = "kvm-apic";
} else if (xen_enabled()) { } else if (xen_enabled()) {
apic_type = "xen-apic"; apic_type = "xen-apic";

View file

@ -39,6 +39,7 @@
#include "exec/ioport.h" #include "exec/ioport.h"
#include "standard-headers/asm-x86/hyperv.h" #include "standard-headers/asm-x86/hyperv.h"
#include "hw/pci/pci.h" #include "hw/pci/pci.h"
#include "hw/pci/msi.h"
#include "migration/migration.h" #include "migration/migration.h"
#include "exec/memattrs.h" #include "exec/memattrs.h"
@ -2597,7 +2598,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
} }
} }
if (!kvm_irqchip_in_kernel()) { if (!kvm_pic_in_kernel()) {
qemu_mutex_lock_iothread(); qemu_mutex_lock_iothread();
} }
@ -2615,7 +2616,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
} }
} }
if (!kvm_irqchip_in_kernel()) { if (!kvm_pic_in_kernel()) {
/* Try to inject an interrupt if the guest can accept it */ /* Try to inject an interrupt if the guest can accept it */
if (run->ready_for_interrupt_injection && if (run->ready_for_interrupt_injection &&
(cpu->interrupt_request & CPU_INTERRUPT_HARD) && (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
@ -3017,6 +3018,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
case KVM_EXIT_HYPERV: case KVM_EXIT_HYPERV:
ret = kvm_hv_handle_exit(cpu, &run->hyperv); ret = kvm_hv_handle_exit(cpu, &run->hyperv);
break; break;
case KVM_EXIT_IOAPIC_EOI:
ioapic_eoi_broadcast(run->eoi.vector);
ret = 0;
break;
default: default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
ret = -1; ret = -1;
@ -3051,6 +3056,39 @@ void kvm_arch_init_irq_routing(KVMState *s)
*/ */
kvm_msi_via_irqfd_allowed = true; kvm_msi_via_irqfd_allowed = true;
kvm_gsi_routing_allowed = true; kvm_gsi_routing_allowed = true;
if (kvm_irqchip_is_split()) {
int i;
/* If the ioapic is in QEMU and the lapics are in KVM, reserve
MSI routes for signaling interrupts to the local apics. */
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
struct MSIMessage msg = { 0x0, 0x0 };
if (kvm_irqchip_add_msi_route(s, msg, NULL) < 0) {
error_report("Could not enable split IRQ mode.");
exit(1);
}
}
}
}
int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
{
int ret;
if (machine_kernel_irqchip_split(ms)) {
ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24);
if (ret) {
error_report("Could not enable split irqchip mode: %s\n",
strerror(-ret));
exit(1);
} else {
DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n");
kvm_split_irqchip = true;
return 1;
}
} else {
return 0;
}
} }
/* Classic KVM device assignment interface. Will remain x86 only. */ /* Classic KVM device assignment interface. Will remain x86 only. */

View file

@ -13,6 +13,8 @@
#include "sysemu/kvm.h" #include "sysemu/kvm.h"
#define kvm_apic_in_kernel() (kvm_irqchip_in_kernel())
bool kvm_allows_irq0_override(void); bool kvm_allows_irq0_override(void);
bool kvm_has_smm(void); bool kvm_has_smm(void);
void kvm_synchronize_all_tsc(void); void kvm_synchronize_all_tsc(void);