diff --git a/cpus.c b/cpus.c index eb22bd5df1..b182b3d7d9 100644 --- a/cpus.c +++ b/cpus.c @@ -441,8 +441,7 @@ static bool cpu_thread_is_idle(CPUArchState *env) if (env->stopped || !runstate_is_running()) { return true; } - if (!env->halted || qemu_cpu_has_work(env) || - (kvm_enabled() && kvm_irqchip_in_kernel())) { + if (!env->halted || qemu_cpu_has_work(env) || kvm_irqchip_in_kernel()) { return false; } return true; diff --git a/hw/ivshmem.c b/hw/ivshmem.c index b80aa8f1de..df4f50a092 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -354,8 +354,8 @@ static void close_guest_eventfds(IVShmemState *s, int posn) guest_curr_max = s->peers[posn].nb_eventfds; for (i = 0; i < guest_curr_max; i++) { - kvm_set_ioeventfd_mmio_long(s->peers[posn].eventfds[i], - s->mmio_addr + DOORBELL, (posn << 16) | i, 0); + kvm_set_ioeventfd_mmio(s->peers[posn].eventfds[i], + s->mmio_addr + DOORBELL, (posn << 16) | i, 0, 4); close(s->peers[posn].eventfds[i]); } @@ -500,8 +500,8 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags) } if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { - if (kvm_set_ioeventfd_mmio_long(incoming_fd, s->mmio_addr + DOORBELL, - (incoming_posn << 16) | guest_max_eventfd, 1) < 0) { + if (kvm_set_ioeventfd_mmio(incoming_fd, s->mmio_addr + DOORBELL, + (incoming_posn << 16) | guest_max_eventfd, 1, 4) < 0) { fprintf(stderr, "ivshmem: ioeventfd not available\n"); } } diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c index 446bd62176..824b978397 100644 --- a/hw/kvm/clock.c +++ b/hw/kvm/clock.c @@ -65,9 +65,25 @@ static void kvmclock_vm_state_change(void *opaque, int running, RunState state) { KVMClockState *s = opaque; + CPUArchState *penv = first_cpu; + int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL); + int ret; if (running) { s->clock_valid = false; + + if (!cap_clock_ctrl) { + return; + } + for (penv = first_cpu; penv != NULL; penv = penv->next_cpu) { + ret = kvm_vcpu_ioctl(penv, KVM_KVMCLOCK_CTRL, 0); + if (ret) { + if (ret != -EINVAL) { + fprintf(stderr, "%s: %s\n", __func__, strerror(-ret)); + } + return; + } + } } } diff --git a/kvm-all.c b/kvm-all.c index ba2cee10f2..b8e9dc69de 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -75,7 +75,6 @@ struct KVMState #ifdef KVM_CAP_SET_GUEST_DEBUG struct kvm_sw_breakpoint_head kvm_sw_breakpoints; #endif - int pit_in_kernel; int pit_state2; int xsave, xcrs; int many_ioeventfds; @@ -198,11 +197,6 @@ static void kvm_reset_vcpu(void *opaque) kvm_arch_reset_vcpu(env); } -int kvm_pit_in_kernel(void) -{ - return kvm_state->pit_in_kernel; -} - int kvm_init_vcpu(CPUArchState *env) { KVMState *s = kvm_state; @@ -747,10 +741,10 @@ static void kvm_mem_ioeventfd_add(MemoryRegionSection *section, { int r; - assert(match_data && section->size == 4); + assert(match_data && section->size <= 8); - r = kvm_set_ioeventfd_mmio_long(fd, section->offset_within_address_space, - data, true); + r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space, + data, true, section->size); if (r < 0) { abort(); } @@ -761,8 +755,8 @@ static void kvm_mem_ioeventfd_del(MemoryRegionSection *section, { int r; - r = kvm_set_ioeventfd_mmio_long(fd, section->offset_within_address_space, - data, false); + r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space, + data, false, section->size); if (r < 0) { abort(); } @@ -877,7 +871,7 @@ static void kvm_init_irq_routing(KVMState *s) unsigned int gsi_bits, i; /* Round up so we can search ints using ffs */ - gsi_bits = (gsi_count + 31) / 32; + gsi_bits = ALIGN(gsi_count, 32); s->used_gsi_bitmap = g_malloc0(gsi_bits / 8); s->max_gsi = gsi_bits; @@ -1642,14 +1636,15 @@ int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset) return r; } -int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign) +int kvm_set_ioeventfd_mmio(int fd, uint32_t addr, uint32_t val, bool assign, + uint32_t size) { int ret; struct kvm_ioeventfd iofd; iofd.datamatch = val; iofd.addr = addr; - iofd.len = 4; + iofd.len = size; iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH; iofd.fd = fd; diff --git a/kvm-stub.c b/kvm-stub.c index 69a1228756..47c573d6f3 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -16,12 +16,6 @@ #include "gdbstub.h" #include "kvm.h" -int kvm_pit_in_kernel(void) -{ - return 0; -} - - int kvm_init_vcpu(CPUArchState *env) { return -ENOSYS; @@ -120,7 +114,7 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) return -ENOSYS; } -int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign) +int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint32_t len) { return -ENOSYS; } diff --git a/kvm.h b/kvm.h index 330f17b1db..4ccae8c0c8 100644 --- a/kvm.h +++ b/kvm.h @@ -83,8 +83,6 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap); int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset); #endif -int kvm_pit_in_kernel(void); - int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr); int kvm_on_sigbus(int code, void *addr); @@ -210,7 +208,8 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, #endif #endif -int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign); +int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, + uint32_t size); int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); #endif diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index b921c3f489..1bea4d8ea6 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -277,6 +277,7 @@ struct kvm_sync_regs { #define KVM_CPU_E500V2 2 #define KVM_CPU_3S_32 3 #define KVM_CPU_3S_64 4 +#define KVM_CPU_E500MC 5 /* for KVM_CAP_SPAPR_TCE */ struct kvm_create_spapr_tce { diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index 9acbde4af2..96076676e2 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -44,10 +44,12 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_PREFIX (1UL << 0) #define KVM_SYNC_GPRS (1UL << 1) #define KVM_SYNC_ACRS (1UL << 2) +#define KVM_SYNC_CRS (1UL << 3) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ __u64 gprs[16]; /* general purpose registers */ __u32 acrs[16]; /* access registers */ + __u64 crs[16]; /* control registers */ }; #endif diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index f6b53432db..ee7bd9cc32 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -588,6 +588,8 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 #define KVM_CAP_SYNC_REGS 74 +#define KVM_CAP_PCI_2_3 75 +#define KVM_CAP_KVMCLOCK_CTRL 76 #ifdef KVM_CAP_IRQ_ROUTING @@ -784,6 +786,9 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_TSC_CONTROL */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) +/* Available with KVM_CAP_PCI_2_3 */ +#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ + struct kvm_assigned_pci_dev) /* * ioctls for vcpu fds @@ -855,8 +860,12 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_ONE_REG */ #define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) +/* VM is being stopped by host */ +#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) struct kvm_assigned_pci_dev { __u32 assigned_dev_id; diff --git a/scripts/kvm/kvm_flightrecorder b/scripts/kvm/kvm_flightrecorder new file mode 100755 index 0000000000..7fb1c2d1a7 --- /dev/null +++ b/scripts/kvm/kvm_flightrecorder @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# +# KVM Flight Recorder - ring buffer tracing script +# +# Copyright (C) 2012 IBM Corp +# +# Author: Stefan Hajnoczi +# +# This script provides a command-line interface to kvm ftrace and is designed +# to be used as a flight recorder that is always running. To start in-memory +# recording: +# +# sudo kvm_flightrecorder start 8192 # 8 MB per-cpu ring buffers +# +# The per-cpu ring buffer size can be given in KB as an optional argument to +# the 'start' subcommand. +# +# To stop the flight recorder: +# +# sudo kvm_flightrecorder stop +# +# To dump the contents of the flight recorder (this can be done when the +# recorder is stopped or while it is running): +# +# sudo kvm_flightrecorder dump >/path/to/dump.txt +# +# To observe the trace while it is running, use the 'tail' subcommand: +# +# sudo kvm_flightrecorder tail +# +# Note that the flight recorder may impact overall system performance by +# consuming CPU cycles. No disk I/O is performed since the ring buffer holds a +# fixed-size in-memory trace. + +import sys +import os + +tracing_dir = '/sys/kernel/debug/tracing' + +def trace_path(*args): + return os.path.join(tracing_dir, *args) + +def write_file(path, data): + open(path, 'wb').write(data) + +def enable_event(subsystem, event, enable): + write_file(trace_path('events', subsystem, event, 'enable'), '1' if enable else '0') + +def enable_subsystem(subsystem, enable): + write_file(trace_path('events', subsystem, 'enable'), '1' if enable else '0') + +def start_tracing(): + enable_subsystem('kvm', True) + write_file(trace_path('tracing_on'), '1') + +def stop_tracing(): + write_file(trace_path('tracing_on'), '0') + enable_subsystem('kvm', False) + write_file(trace_path('events', 'enable'), '0') + write_file(trace_path('current_tracer'), 'nop') + +def dump_trace(): + tracefile = open(trace_path('trace'), 'r') + try: + lines = True + while lines: + lines = tracefile.readlines(64 * 1024) + sys.stdout.writelines(lines) + except KeyboardInterrupt: + pass + +def tail_trace(): + try: + for line in open(trace_path('trace_pipe'), 'r'): + sys.stdout.write(line) + except KeyboardInterrupt: + pass + +def usage(): + print 'Usage: %s start [buffer_size_kb] | stop | dump | tail' % sys.argv[0] + print 'Control the KVM flight recorder tracing.' + sys.exit(0) + +def main(): + if len(sys.argv) < 2: + usage() + + cmd = sys.argv[1] + if cmd == '--version': + print 'kvm_flightrecorder version 1.0' + sys.exit(0) + + if not os.path.isdir(tracing_dir): + print 'Unable to tracing debugfs directory, try:' + print 'mount -t debugfs none /sys/kernel/debug' + sys.exit(1) + if not os.access(tracing_dir, os.W_OK): + print 'Unable to write to tracing debugfs directory, please run as root' + sys.exit(1) + + if cmd == 'start': + stop_tracing() # clean up first + + if len(sys.argv) == 3: + try: + buffer_size_kb = int(sys.argv[2]) + except ValueError: + print 'Invalid per-cpu trace buffer size in KB' + sys.exit(1) + write_file(trace_path('buffer_size_kb'), str(buffer_size_kb)) + print 'Per-CPU ring buffer size set to %d KB' % buffer_size_kb + + start_tracing() + print 'KVM flight recorder enabled' + elif cmd == 'stop': + stop_tracing() + print 'KVM flight recorder disabled' + elif cmd == 'dump': + dump_trace() + elif cmd == 'tail': + tail_trace() + else: + usage() + +if __name__ == '__main__': + sys.exit(main())