From a096b3a6732f846ec57dc28b47ee9435aa0609bf Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 May 2014 17:15:21 +0200 Subject: [PATCH 01/11] kvmclock: Ensure time in migration never goes backward When we migrate we ask the kernel about its current belief on what the guest time would be. However, I've seen cases where the kvmclock guest structure indicates a time more recent than the kvm returned time. To make sure we never go backwards, calculate what the guest would have seen as time at the point of migration and use that value instead of the kernel returned one when it's more recent. This bases the view of the kvmclock after migration on the same foundation in host as well as guest. Signed-off-by: Alexander Graf Cc: qemu-stable@nongnu.org Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 892aa025f4..6f4ed28ad4 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -14,6 +14,7 @@ */ #include "qemu-common.h" +#include "qemu/host-utils.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" #include "hw/sysbus.h" @@ -34,6 +35,47 @@ typedef struct KVMClockState { bool clock_valid; } KVMClockState; +struct pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + uint8_t flags; + uint8_t pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +static uint64_t kvmclock_current_nsec(KVMClockState *s) +{ + CPUState *cpu = first_cpu; + CPUX86State *env = cpu->env_ptr; + hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; + uint64_t migration_tsc = env->tsc; + struct pvclock_vcpu_time_info time; + uint64_t delta; + uint64_t nsec_lo; + uint64_t nsec_hi; + uint64_t nsec; + + if (!(env->system_time_msr & 1ULL)) { + /* KVM clock not active */ + return 0; + } + + cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); + + delta = migration_tsc - time.tsc_timestamp; + if (time.tsc_shift < 0) { + delta >>= -time.tsc_shift; + } else { + delta <<= time.tsc_shift; + } + + mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul); + nsec = (nsec_lo >> 32) | (nsec_hi << 32); + return nsec + time.system_time; +} static void kvmclock_vm_state_change(void *opaque, int running, RunState state) @@ -45,9 +87,15 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (running) { struct kvm_clock_data data; + uint64_t time_at_migration = kvmclock_current_nsec(s); s->clock_valid = false; + /* We can't rely on the migrated clock value, just discard it */ + if (time_at_migration) { + s->clock = time_at_migration; + } + data.clock = s->clock; data.flags = 0; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); From b763adf1a6b271f17f15ea31fae93d03e980d911 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 21 May 2014 12:42:26 +0200 Subject: [PATCH 02/11] kvm_stat: allow choosing between tracepoints and old stats The old stats contain information not available in the tracepoints. By default, keep the old behavior, but allow choosing which set of stats to present, or even both. Inspired by a patch from Marcelo Tosatti. Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 60 ++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 762544b197..d7e97e7488 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -352,8 +352,8 @@ class TracepointProvider(object): return ret class Stats: - def __init__(self, provider, fields = None): - self.provider = provider + def __init__(self, providers, fields = None): + self.providers = providers self.fields_filter = fields self._update() def _update(self): @@ -362,22 +362,25 @@ class Stats: if not self.fields_filter: return True return re.match(self.fields_filter, key) is not None - self.values = dict([(key, None) - for key in provider.fields() - if wanted(key)]) - self.provider.select(self.values.keys()) + self.values = dict() + for d in providers: + provider_fields = [key for key in d.fields() if wanted(key)] + for key in provider_fields: + self.values[key] = None + d.select(provider_fields) def set_fields_filter(self, fields_filter): self.fields_filter = fields_filter self._update() def get(self): - new = self.provider.read() - for key in self.provider.fields(): - oldval = self.values.get(key, (0, 0)) - newval = new[key] - newdelta = None - if oldval is not None: - newdelta = newval - oldval[0] - self.values[key] = (newval, newdelta) + for d in providers: + new = d.read() + for key in d.fields(): + oldval = self.values.get(key, (0, 0)) + newval = new[key] + newdelta = None + if oldval is not None: + newdelta = newval - oldval[0] + self.values[key] = (newval, newdelta) return self.values if not os.access('/sys/kernel/debug', os.F_OK): @@ -487,6 +490,18 @@ options.add_option('-l', '--log', dest = 'log', help = 'run in logging mode (like vmstat)', ) +options.add_option('-t', '--tracepoints', + action = 'store_true', + default = False, + dest = 'tracepoints', + help = 'retrieve statistics from tracepoints', + ) +options.add_option('-d', '--debugfs', + action = 'store_true', + default = False, + dest = 'debugfs', + help = 'retrieve statistics from debugfs', + ) options.add_option('-f', '--fields', action = 'store', default = None, @@ -495,12 +510,19 @@ options.add_option('-f', '--fields', ) (options, args) = options.parse_args(sys.argv) -try: - provider = TracepointProvider() -except: - provider = DebugfsProvider() +providers = [] +if options.tracepoints: + providers.append(TracepointProvider()) +if options.debugfs: + providers.append(DebugfsProvider()) -stats = Stats(provider, fields = options.fields) +if len(providers) == 0: + try: + providers = [TracepointProvider()] + except: + providers = [DebugfsProvider()] + +stats = Stats(providers, fields = options.fields) if options.log: log(stats) From 87446327ccb2e944fe7abc848bab798a0864eb03 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Tue, 20 May 2014 17:10:24 -0400 Subject: [PATCH 03/11] target-i386: Fix vm86 mode regression introduced in fd460606fd6f. Commit fd460606fd6f moved setting of eflags above calls to cpu_x86_load_seg_cache() in seg_helper.c. Unfortunately, in do_interrupt_protected() this moved the clearing of VM_MASK above a test for it. Fix this regression by storing the value of VM_MASK at the start of do_interrupt_protected(). Signed-off-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- target-i386/seg_helper.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 3cf862ee60..cc7eadf9e2 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -558,6 +558,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, int has_error_code, new_stack, shift; uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0; uint32_t old_eip, sp_mask; + int vm86 = env->eflags & VM_MASK; has_error_code = 0; if (!is_int && !is_hw) { @@ -673,7 +674,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, ssp = get_seg_base(ss_e1, ss_e2); } else if ((e2 & DESC_C_MASK) || dpl == cpl) { /* to same privilege */ - if (env->eflags & VM_MASK) { + if (vm86) { raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc); } new_stack = 0; @@ -694,14 +695,14 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, #if 0 /* XXX: check that enough room is available */ push_size = 6 + (new_stack << 2) + (has_error_code << 1); - if (env->eflags & VM_MASK) { + if (vm86) { push_size += 8; } push_size <<= shift; #endif if (shift == 1) { if (new_stack) { - if (env->eflags & VM_MASK) { + if (vm86) { PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector); PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector); PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector); @@ -718,7 +719,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } } else { if (new_stack) { - if (env->eflags & VM_MASK) { + if (vm86) { PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector); PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector); PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector); @@ -742,7 +743,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); if (new_stack) { - if (env->eflags & VM_MASK) { + if (vm86) { cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0); cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0); cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0); From b98dbc90950cd4e43ab9b4f8300dbeae6cf8c8cb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 15 May 2014 16:07:04 +0200 Subject: [PATCH 04/11] target-i386: fix segment flags for SMM and VM86 mode With the next patch, these need to be correct or VM86 tasks have the wrong CPL. The flags are basically what the Intel VMX documentation say is mandatory for entry into a VM86 guest. For consistency, SMM ought to have the same flags except with CPL=0. Tested-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- bsd-user/main.c | 2 +- linux-user/main.c | 2 +- target-i386/gdbstub.c | 4 +++- target-i386/seg_helper.c | 11 ++++++++--- target-i386/smm_helper.c | 24 ++++++++++++++++++------ 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/bsd-user/main.c b/bsd-user/main.c index 4ba61da896..0e8c26c137 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -1004,7 +1004,7 @@ int main(int argc, char **argv) #if defined(TARGET_I386) env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; - env->hflags |= HF_PE_MASK; + env->hflags |= HF_PE_MASK | HF_CPL_MASK; if (env->features[FEAT_1_EDX] & CPUID_SSE) { env->cr[4] |= CR4_OSFXSR_MASK; env->hflags |= HF_OSFXSR_MASK; diff --git a/linux-user/main.c b/linux-user/main.c index 882186e1a0..3e21024056 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -4052,7 +4052,7 @@ int main(int argc, char **argv, char **envp) #if defined(TARGET_I386) env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; - env->hflags |= HF_PE_MASK; + env->hflags |= HF_PE_MASK | HF_CPL_MASK; if (env->features[FEAT_1_EDX] & CPUID_SSE) { env->cr[4] |= CR4_OSFXSR_MASK; env->hflags |= HF_OSFXSR_MASK; diff --git a/target-i386/gdbstub.c b/target-i386/gdbstub.c index d34e5355f7..19fe9adc3f 100644 --- a/target-i386/gdbstub.c +++ b/target-i386/gdbstub.c @@ -127,9 +127,11 @@ static int x86_cpu_gdb_load_seg(X86CPU *cpu, int sreg, uint8_t *mem_buf) target_ulong base; if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { + int dpl = (env->eflags & VM_MASK) ? 3 : 0; base = selector << 4; limit = 0xffff; - flags = 0; + flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK | (dpl << DESC_DPL_SHIFT); } else { if (!cpu_x86_get_descr_debug(env, selector, &base, &limit, &flags)) { diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index cc7eadf9e2..6f7efeeaa0 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -88,8 +88,10 @@ static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, static inline void load_seg_vm(CPUX86State *env, int seg, int selector) { selector &= 0xffff; - cpu_x86_load_seg_cache(env, seg, selector, - (selector << 4), 0xffff, 0); + + cpu_x86_load_seg_cache(env, seg, selector, (selector << 4), 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK | (3 << DESC_DPL_SHIFT)); } static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr, @@ -2465,9 +2467,12 @@ void helper_verw(CPUX86State *env, target_ulong selector1) void cpu_x86_load_seg(CPUX86State *env, int seg_reg, int selector) { if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { + int dpl = (env->eflags & VM_MASK) ? 3 : 0; selector &= 0xffff; cpu_x86_load_seg_cache(env, seg_reg, selector, - (selector << 4), 0xffff, 0); + (selector << 4), 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK | (dpl << DESC_DPL_SHIFT)); } else { helper_load_seg(env, seg_reg, selector); } diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index 4841d53b24..517f3b0cb7 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -171,12 +171,24 @@ void do_smm_enter(X86CPU *cpu) CC_OP = CC_OP_EFLAGS; cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, - 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0); + 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); } void helper_rsm(CPUX86State *env) From d3b5491897456739c6dc21c604ef8bc28e294bfc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 15 May 2014 18:19:17 +0200 Subject: [PATCH 05/11] target-i386: rework CPL checks during task switch, preparing for next patch During task switch, all of CS.DPL, CS.RPL, SS.DPL must match (in addition to all the other requirements) and will be the new CPL. So far this worked by carefully setting the CS selector and flags before doing the task switch; but this will not work once we get the CPL from SS.DPL. Temporarily assume that the CPL comes from CS.RPL during task switch to a protected-mode task, until the descriptor of SS is loaded. Tested-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- target-i386/seg_helper.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 6f7efeeaa0..0f00aede08 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -135,11 +135,10 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr, } } -/* XXX: merge with load_seg() */ -static void tss_load_seg(CPUX86State *env, int seg_reg, int selector) +static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl) { uint32_t e1, e2; - int rpl, dpl, cpl; + int rpl, dpl; if ((selector & 0xfffc) != 0) { if (load_segment(env, &e1, &e2, selector) != 0) { @@ -150,18 +149,13 @@ static void tss_load_seg(CPUX86State *env, int seg_reg, int selector) } rpl = selector & 3; dpl = (e2 >> DESC_DPL_SHIFT) & 3; - cpl = env->hflags & HF_CPL_MASK; if (seg_reg == R_CS) { if (!(e2 & DESC_CS_MASK)) { raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc); } - /* XXX: is it correct? */ if (dpl != rpl) { raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc); } - if ((e2 & DESC_C_MASK) && dpl > rpl) { - raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc); - } } else if (seg_reg == R_SS) { /* SS must be writable data */ if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) { @@ -448,12 +442,13 @@ static void switch_tss(CPUX86State *env, int tss_selector, /* load the segments */ if (!(new_eflags & VM_MASK)) { - tss_load_seg(env, R_CS, new_segs[R_CS]); - tss_load_seg(env, R_SS, new_segs[R_SS]); - tss_load_seg(env, R_ES, new_segs[R_ES]); - tss_load_seg(env, R_DS, new_segs[R_DS]); - tss_load_seg(env, R_FS, new_segs[R_FS]); - tss_load_seg(env, R_GS, new_segs[R_GS]); + int cpl = new_segs[R_CS] & 3; + tss_load_seg(env, R_CS, new_segs[R_CS], cpl); + tss_load_seg(env, R_SS, new_segs[R_SS], cpl); + tss_load_seg(env, R_ES, new_segs[R_ES], cpl); + tss_load_seg(env, R_DS, new_segs[R_DS], cpl); + tss_load_seg(env, R_FS, new_segs[R_FS], cpl); + tss_load_seg(env, R_GS, new_segs[R_GS], cpl); } /* check that env->eip is in the CS segment limits */ From 7125c937c97d9ec4a41b3cb6d1b3e805ec53e255 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 May 2014 10:38:18 +0200 Subject: [PATCH 06/11] target-i386: get CPL from SS.DPL CS.RPL is not equal to the CPL in the few instructions between setting CR0.PE and reloading CS. We get this right in the common case, because writes to CR0 do not modify the CPL, but it would not be enough if an SMI comes exactly during that brief period. Were this to happen, the RSM instruction would erroneously set CPL to the low two bits of the real-mode selector; and if they are not 00, the next instruction fetch cannot access the code segment and causes a triple fault. However, SS.DPL *is* always equal to the CPL. In real processors (AMD only) there is a weird case of SYSRET setting SS.DPL=SS.RPL from the STAR register while forcing CPL=3, but we do not emulate that. Tested-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 8 +++----- target-i386/kvm.c | 2 +- target-i386/machine.c | 8 ++++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index e9cbdabc03..65a44d9fe3 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -986,7 +986,6 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, /* update the hidden flags */ { if (seg_reg == R_CS) { - int cpl = selector & 3; #ifdef TARGET_X86_64 if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) { /* long mode */ @@ -996,15 +995,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, #endif { /* legacy / compatibility case */ - if (!(env->cr[0] & CR0_PE_MASK)) - cpl = 0; - else if (env->eflags & VM_MASK) - cpl = 3; new_hflags = (env->segs[R_CS].flags & DESC_B_MASK) >> (DESC_B_SHIFT - HF_CS32_SHIFT); env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) | new_hflags; } + } + if (seg_reg == R_SS) { + int cpl = (flags >> DESC_DPL_SHIFT) & 3; #if HF_CPL_MASK != 3 #error HF_CPL_MASK is hardcoded #endif diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 0d894ef4aa..3931d4c340 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1430,7 +1430,7 @@ static int kvm_get_sregs(X86CPU *cpu) HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) - hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); diff --git a/target-i386/machine.c b/target-i386/machine.c index 168cab681b..bdff447786 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -312,6 +312,14 @@ static int cpu_post_load(void *opaque, int version_id) env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK); } + /* Older versions of QEMU incorrectly used CS.DPL as the CPL when + * running under KVM. This is wrong for conforming code segments. + * Luckily, in our implementation the CPL field of hflags is redundant + * and we can get the right value from the SS descriptor privilege level. + */ + env->hflags &= ~HF_CPL_MASK; + env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + /* XXX: restore FPU round state */ env->fpstt = (env->fpus_vmstate >> 11) & 7; env->fpus = env->fpus_vmstate & ~0x3800; From 28fb26f19ffa675ac8cc08a355e5b01cc194aa5e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 May 2014 16:47:48 +0200 Subject: [PATCH 07/11] target-i386: set CC_OP to CC_OP_EFLAGS in cpu_load_eflags There is no reason to keep that out of the function. The comment refers to the disassembler's cc_op state rather than the CPUState field. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 5 ++++- target-i386/seg_helper.c | 2 -- target-i386/smm_helper.c | 2 -- target-i386/svm_helper.c | 2 -- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 65a44d9fe3..ee410af7ae 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1232,11 +1232,14 @@ static inline uint32_t cpu_compute_eflags(CPUX86State *env) return env->eflags | cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK); } -/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */ +/* NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS + * after generating a call to a helper that uses this. + */ static inline void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask) { CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + CC_OP = CC_OP_EFLAGS; env->df = 1 - (2 * ((eflags >> 10) & 1)); env->eflags = (env->eflags & ~update_mask) | (eflags & update_mask) | 0x2; diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 0f00aede08..0b19a8ce64 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -1598,7 +1598,6 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip, } next_eip = env->eip + next_eip_addend; switch_tss(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip); - CC_OP = CC_OP_EFLAGS; break; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -1767,7 +1766,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc); } switch_tss(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip); - CC_OP = CC_OP_EFLAGS; return; case 4: /* 286 call gate */ case 12: /* 386 call gate */ diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index 517f3b0cb7..1e5f5ce07e 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -168,7 +168,6 @@ void do_smm_enter(X86CPU *cpu) CR0_PG_MASK)); cpu_x86_update_cr4(env, 0); env->dr[7] = 0x00000400; - CC_OP = CC_OP_EFLAGS; cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, 0xffffffff, @@ -308,7 +307,6 @@ void helper_rsm(CPUX86State *env) env->smbase = ldl_phys(cs->as, sm_state + 0x7ef8) & ~0x7fff; } #endif - CC_OP = CC_OP_EFLAGS; env->hflags &= ~HF_SMM_MASK; cpu_smm_update(env); diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c index 846eaa5918..8dc6f8ccda 100644 --- a/target-i386/svm_helper.c +++ b/target-i386/svm_helper.c @@ -260,7 +260,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->vm_vmcb + offsetof(struct vmcb, save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); - CC_OP = CC_OP_EFLAGS; svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es), R_ES); @@ -702,7 +701,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK | VM_MASK)); - CC_OP = CC_OP_EFLAGS; svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es), R_ES); From 0e1dac6c41f337f997814344a847162968c20c64 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 30 May 2014 17:26:22 -0300 Subject: [PATCH 08/11] kvm: Ensure negative return value on kvm_init() error handling path We need to ensure ret < 0 when going through the error path, or QEMU may try to run the half-initialized VM and crash. Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- kvm-all.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index a343ede4d4..f7fe9c6cd9 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1374,7 +1374,7 @@ int kvm_init(MachineClass *mc) ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0); if (ret < KVM_API_VERSION) { - if (ret > 0) { + if (ret >= 0) { ret = -EINVAL; } fprintf(stderr, "kvm version too old\n"); @@ -1425,6 +1425,7 @@ int kvm_init(MachineClass *mc) if (mc->kvm_type) { type = mc->kvm_type(kvm_type); } else if (kvm_type) { + ret = -EINVAL; fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type); goto err; } @@ -1525,6 +1526,7 @@ int kvm_init(MachineClass *mc) return 0; err: + assert(ret < 0); if (s->vmfd >= 0) { close(s->vmfd); } From f522d2acc549dd11f495048330aa5f3f424a7dfa Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 2 Jun 2014 11:28:50 -0600 Subject: [PATCH 09/11] kvm: Enable -cpu option to hide KVM The latest Nvidia driver (337.88) specifically checks for KVM as the hypervisor and reports Code 43 for the driver in a Windows guest when found. Removing or changing the KVM signature is sufficient for the driver to load and work. This patch adds an option to easily allow the KVM hypervisor signature to be hidden using '-cpu kvm=off'. We continue to expose KVM via the cpuid value by default. The state of this option does not supercede or replace -enable-kvm or the accel=kvm machine option. This only changes the visibility of KVM to the guest and paravirtual features specifically tied to the KVM cpuid. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- target-i386/cpu-qom.h | 1 + target-i386/cpu.c | 1 + target-i386/kvm.c | 28 +++++++++++++++------------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h index e9b3d577b3..0808cfc67d 100644 --- a/target-i386/cpu-qom.h +++ b/target-i386/cpu-qom.h @@ -87,6 +87,7 @@ typedef struct X86CPU { bool hyperv_time; bool check_cpuid; bool enforce_cpuid; + bool expose_kvm; /* if true the CPUID code directly forward host cache leaves to the guest */ bool cache_info_passthrough; diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 042a48d703..cbf1d979cb 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2792,6 +2792,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 3931d4c340..40cd4b84af 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -528,23 +528,25 @@ int kvm_arch_init_vcpu(CPUState *cs) has_msr_hv_hypercall = true; } - memcpy(signature, "KVMKVMKVM\0\0\0", 12); - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = 0; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; + if (cpu->expose_kvm) { + memcpy(signature, "KVMKVMKVM\0\0\0", 12); + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_SIGNATURE | kvm_base; + c->eax = 0; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_FEATURES | kvm_base; - c->eax = env->features[FEAT_KVM]; + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_FEATURES | kvm_base; + c->eax = env->features[FEAT_KVM]; - has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF); + has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF); - has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI); + has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI); - has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME); + has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME); + } cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); From 9b1786829aefb83f37a8f3135e3ea91c56001b56 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 3 Jun 2014 13:34:48 -0300 Subject: [PATCH 10/11] kvmclock: Ensure proper env->tsc value for kvmclock_current_nsec calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure proper env->tsc value for kvmclock_current_nsec calculation. Reported-by: Marcin GibuĊ‚a Cc: qemu-stable@nongnu.org Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 6f4ed28ad4..bef2504389 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -17,6 +17,7 @@ #include "qemu/host-utils.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" +#include "sysemu/cpus.h" #include "hw/sysbus.h" #include "hw/kvm/clock.h" @@ -65,6 +66,7 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); + assert(time.tsc_timestamp <= migration_tsc); delta = migration_tsc - time.tsc_timestamp; if (time.tsc_shift < 0) { delta >>= -time.tsc_shift; @@ -123,6 +125,8 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (s->clock_valid) { return; } + + cpu_synchronize_all_states(); ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); if (ret < 0) { fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); From 79b6f2f651d64a122dd647c1456635d5a6a176ac Mon Sep 17 00:00:00 2001 From: Jidong Xiao Date: Tue, 3 Jun 2014 21:10:06 -0400 Subject: [PATCH 11/11] kvm: Fix eax for cpuid leaf 0x40000000 Since Linux kernel 3.5, KVM has documented eax for leaf 0x40000000 to be KVM_CPUID_FEATURES: https://github.com/torvalds/linux/commit/57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190 But qemu still tries to set it to 0. It would be better to make qemu and kvm consistent. This patch just fixes this issue. Signed-off-by: Jidong Xiao [Include kvm_base in the value. - Paolo] Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 40cd4b84af..4bf0ac9e76 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -532,7 +532,7 @@ int kvm_arch_init_vcpu(CPUState *cs) memcpy(signature, "KVMKVMKVM\0\0\0", 12); c = &cpuid_data.entries[cpuid_i++]; c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = 0; + c->eax = KVM_CPUID_FEATURES | kvm_base; c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2];