From 5eba5a6632ff4b0aaa8cb6c806fa95758518a28d Mon Sep 17 00:00:00 2001 From: Drew DeVault Date: Wed, 29 Oct 2014 23:52:03 -0600 Subject: [PATCH 01/47] Add bootloader name to multiboot implementation The name is set to "qemu". Signed-off-by: Drew DeVault Signed-off-by: Paolo Bonzini Signed-off-by: Drew DeVault Signed-off-by: Paolo Bonzini --- hw/i386/multiboot.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c index 985ca1ed84..f86d351b3e 100644 --- a/hw/i386/multiboot.c +++ b/hw/i386/multiboot.c @@ -54,6 +54,7 @@ enum { MBI_MODS_COUNT = 20, MBI_MODS_ADDR = 24, MBI_MMAP_ADDR = 48, + MBI_BOOTLOADER = 64, MBI_SIZE = 88, @@ -74,6 +75,7 @@ enum { MULTIBOOT_FLAGS_CMDLINE = 1 << 2, MULTIBOOT_FLAGS_MODULES = 1 << 3, MULTIBOOT_FLAGS_MMAP = 1 << 6, + MULTIBOOT_FLAGS_BOOTLOADER = 1 << 9, }; typedef struct { @@ -87,6 +89,8 @@ typedef struct { hwaddr offset_mbinfo; /* offset in buffer for cmdlines in bytes */ hwaddr offset_cmdlines; + /* offset in buffer for bootloader name in bytes */ + hwaddr offset_bootloader; /* offset of modules in bytes */ hwaddr offset_mods; /* available slots for mb modules infos */ @@ -95,6 +99,8 @@ typedef struct { int mb_mods_count; } MultibootState; +const char *bootloader_name = "qemu"; + static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline) { hwaddr p = s->offset_cmdlines; @@ -105,6 +111,16 @@ static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline) return s->mb_buf_phys + p; } +static uint32_t mb_add_bootloader(MultibootState *s, const char *bootloader) +{ + hwaddr p = s->offset_bootloader; + char *b = (char *)s->mb_buf + p; + + memcpy(b, bootloader, strlen(bootloader) + 1); + s->offset_bootloader += strlen(b) + 1; + return s->mb_buf_phys + p; +} + static void mb_add_mod(MultibootState *s, hwaddr start, hwaddr end, hwaddr cmdline_phys) @@ -241,9 +257,10 @@ int load_multiboot(FWCfgState *fw_cfg, mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_kernel_size); mbs.offset_mbinfo = mbs.mb_buf_size; - /* Calculate space for cmdlines and mb_mods */ + /* Calculate space for cmdlines, bootloader name, and mb_mods */ mbs.mb_buf_size += strlen(kernel_filename) + 1; mbs.mb_buf_size += strlen(kernel_cmdline) + 1; + mbs.mb_buf_size += strlen(bootloader_name) + 1; if (initrd_filename) { const char *r = initrd_filename; mbs.mb_buf_size += strlen(r) + 1; @@ -257,9 +274,11 @@ int load_multiboot(FWCfgState *fw_cfg, mbs.mb_buf_size = TARGET_PAGE_ALIGN(mbs.mb_buf_size); - /* enlarge mb_buf to hold cmdlines and mb-info structs */ - mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); - mbs.offset_cmdlines = mbs.offset_mbinfo + mbs.mb_mods_avail * MB_MOD_SIZE; + /* enlarge mb_buf to hold cmdlines, bootloader, mb-info structs */ + mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); + mbs.offset_cmdlines = mbs.offset_mbinfo + mbs.mb_mods_avail * MB_MOD_SIZE; + mbs.offset_bootloader = mbs.offset_cmdlines + strlen(kernel_filename) + 1 + + strlen(kernel_cmdline) + 1; if (initrd_filename) { char *next_initrd, not_last; @@ -306,6 +325,8 @@ int load_multiboot(FWCfgState *fw_cfg, kernel_filename, kernel_cmdline); stl_p(bootinfo + MBI_CMDLINE, mb_add_cmdline(&mbs, kcmdline)); + stl_p(bootinfo + MBI_BOOTLOADER, mb_add_bootloader(&mbs, bootloader_name)); + stl_p(bootinfo + MBI_MODS_ADDR, mbs.mb_buf_phys + mbs.offset_mbinfo); stl_p(bootinfo + MBI_MODS_COUNT, mbs.mb_mods_count); /* mods_count */ @@ -314,7 +335,8 @@ int load_multiboot(FWCfgState *fw_cfg, | MULTIBOOT_FLAGS_BOOT_DEVICE | MULTIBOOT_FLAGS_CMDLINE | MULTIBOOT_FLAGS_MODULES - | MULTIBOOT_FLAGS_MMAP); + | MULTIBOOT_FLAGS_MMAP + | MULTIBOOT_FLAGS_BOOTLOADER); stl_p(bootinfo + MBI_BOOT_DEVICE, 0x8000ffff); /* XXX: use the -boot switch? */ stl_p(bootinfo + MBI_MMAP_ADDR, ADDR_E820_MAP); From 9551ea6991cfb7c777f7943ad69b30d0a4fadac3 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sat, 21 Jun 2014 00:48:09 +0200 Subject: [PATCH 02/47] target-i386: simplify AES emulation This patch simplifies the AES code, by directly accessing the newly added S-Box, InvS-Box and InvMixColumns tables instead of recreating them by using the AES_Te and AES_Td tables. Signed-off-by: Aurelien Jarno Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target-i386/ops_sse.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 886e0a8243..0765073792 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -2228,7 +2228,7 @@ void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) Reg rk = *s; for (i = 0; i < 16; i++) { - d->B(i) = rk.B(i) ^ (AES_Td4[st.B(AES_ishifts[i])] & 0xff); + d->B(i) = rk.B(i) ^ (AES_isbox[st.B(AES_ishifts[i])]); } } @@ -2253,7 +2253,7 @@ void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) Reg rk = *s; for (i = 0; i < 16; i++) { - d->B(i) = rk.B(i) ^ (AES_Te4[st.B(AES_shifts[i])] & 0xff); + d->B(i) = rk.B(i) ^ (AES_sbox[st.B(AES_shifts[i])]); } } @@ -2264,10 +2264,10 @@ void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) Reg tmp = *s; for (i = 0 ; i < 4 ; i++) { - d->L(i) = bswap32(AES_Td0[AES_Te4[tmp.B(4*i+0)] & 0xff] ^ - AES_Td1[AES_Te4[tmp.B(4*i+1)] & 0xff] ^ - AES_Td2[AES_Te4[tmp.B(4*i+2)] & 0xff] ^ - AES_Td3[AES_Te4[tmp.B(4*i+3)] & 0xff]); + d->L(i) = bswap32(AES_imc[tmp.B(4*i+0)][0] ^ + AES_imc[tmp.B(4*i+1)][1] ^ + AES_imc[tmp.B(4*i+2)][2] ^ + AES_imc[tmp.B(4*i+3)][3]); } } @@ -2278,8 +2278,8 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg tmp = *s; for (i = 0 ; i < 4 ; i++) { - d->B(i) = AES_Te4[tmp.B(i + 4)] & 0xff; - d->B(i + 8) = AES_Te4[tmp.B(i + 12)] & 0xff; + d->B(i) = AES_sbox[tmp.B(i + 4)]; + d->B(i + 8) = AES_sbox[tmp.B(i + 12)]; } d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl; d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl; From f41389ae3c54bd5e2040e3f95a2872981c3ed965 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 31 Oct 2014 13:38:18 +0000 Subject: [PATCH 03/47] KVM_CAP_IRQFD and KVM_CAP_IRQFD_RESAMPLE checks Compute kvm_irqfds_allowed by checking the KVM_CAP_IRQFD extension. Remove direct settings in architecture specific files. Add a new kvm_resamplefds_allowed variable, initialized by checking the KVM_CAP_IRQFD_RESAMPLE extension. Add a corresponding kvm_resamplefds_enabled() function. A special notice for s390 where KVM_CAP_IRQFD was not immediatly advirtised when irqfd capability was introduced in the kernel. KVM_CAP_IRQ_ROUTING was advertised instead. This was fixed in "KVM: s390: announce irqfd capability", ebc3226202d5956a5963185222982d435378b899 whereas irqfd support was brought in 84223598778ba08041f4297fda485df83414d57e, "KVM: s390: irq routing for adapter interrupts". Both commits first appear in 3.15 so there should not be any kernel version impacted by this QEMU modification. Signed-off-by: Eric Auger Signed-off-by: Paolo Bonzini --- hw/intc/openpic_kvm.c | 1 - hw/intc/xics_kvm.c | 1 - include/sysemu/kvm.h | 10 ++++++++++ kvm-all.c | 7 +++++++ target-i386/kvm.c | 1 - target-s390x/kvm.c | 1 - 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c index 3e2cd189ff..f7cac585a9 100644 --- a/hw/intc/openpic_kvm.c +++ b/hw/intc/openpic_kvm.c @@ -248,7 +248,6 @@ static void kvm_openpic_realize(DeviceState *dev, Error **errp) kvm_irqchip_add_irq_route(kvm_state, i, 0, i); } - kvm_irqfds_allowed = true; kvm_msi_via_irqfd_allowed = true; kvm_gsi_routing_allowed = true; diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 20b19e9d4f..c15453f26f 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -448,7 +448,6 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp) } kvm_kernel_irqchip = true; - kvm_irqfds_allowed = true; kvm_msi_via_irqfd_allowed = true; kvm_gsi_direct_mapping = true; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 22e42ef236..104cf3535e 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -45,6 +45,7 @@ extern bool kvm_async_interrupts_allowed; extern bool kvm_halt_in_kernel_allowed; extern bool kvm_eventfds_allowed; extern bool kvm_irqfds_allowed; +extern bool kvm_resamplefds_allowed; extern bool kvm_msi_via_irqfd_allowed; extern bool kvm_gsi_routing_allowed; extern bool kvm_gsi_direct_mapping; @@ -101,6 +102,15 @@ extern bool kvm_readonly_mem_allowed; */ #define kvm_irqfds_enabled() (kvm_irqfds_allowed) +/** + * kvm_resamplefds_enabled: + * + * Returns: true if we can use resamplefds to inject interrupts into + * a KVM CPU (ie the kernel supports resamplefds and we are running + * with a configuration where it is meaningful to use them). + */ +#define kvm_resamplefds_enabled() (kvm_resamplefds_allowed) + /** * kvm_msi_via_irqfd_enabled: * diff --git a/kvm-all.c b/kvm-all.c index 937bc9d903..c86626f9ff 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -120,6 +120,7 @@ bool kvm_async_interrupts_allowed; bool kvm_halt_in_kernel_allowed; bool kvm_eventfds_allowed; bool kvm_irqfds_allowed; +bool kvm_resamplefds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; bool kvm_gsi_direct_mapping; @@ -1584,6 +1585,12 @@ static int kvm_init(MachineState *ms) kvm_eventfds_allowed = (kvm_check_extension(s, KVM_CAP_IOEVENTFD) > 0); + kvm_irqfds_allowed = + (kvm_check_extension(s, KVM_CAP_IRQFD) > 0); + + kvm_resamplefds_allowed = + (kvm_check_extension(s, KVM_CAP_IRQFD_RESAMPLE) > 0); + ret = kvm_arch_init(s); if (ret < 0) { goto err; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index ccf36e8719..3a3dfc4795 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2563,7 +2563,6 @@ void kvm_arch_init_irq_routing(KVMState *s) * irqchip, so we can use irqfds, and on x86 we know * we can use msi via irqfd and GSI routing. */ - kvm_irqfds_allowed = true; kvm_msi_via_irqfd_allowed = true; kvm_gsi_routing_allowed = true; } diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 50709ba6b5..1b79c4de75 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -1294,7 +1294,6 @@ void kvm_arch_init_irq_routing(KVMState *s) * have to override the common code kvm_halt_in_kernel_allowed setting. */ if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { - kvm_irqfds_allowed = true; kvm_gsi_routing_allowed = true; kvm_halt_in_kernel_allowed = false; } From 9fc0e2d8ac10c1bb08720b44a4cf9190f1ab4f9e Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 31 Oct 2014 13:38:19 +0000 Subject: [PATCH 04/47] vfio: use kvm_resamplefds_enabled() Use the kvm_resamplefds_enabled function Signed-off-by: Eric Auger Acked-by: Alex Williamson Signed-off-by: Paolo Bonzini --- hw/misc/vfio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index fd318a122d..a315c3acdc 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -406,7 +406,7 @@ static void vfio_enable_intx_kvm(VFIODevice *vdev) if (!VFIO_ALLOW_KVM_INTX || !kvm_irqfds_enabled() || vdev->intx.route.mode != PCI_INTX_ENABLED || - !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { + !kvm_resamplefds_enabled()) { return; } @@ -568,8 +568,7 @@ static int vfio_enable_intx(VFIODevice *vdev) * Only conditional to avoid generating error messages on platforms * where we won't actually use the result anyway. */ - if (kvm_irqfds_enabled() && - kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { + if (kvm_irqfds_enabled() && kvm_resamplefds_enabled()) { vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); } From d229b985b504261369f2035936cc147c2606fa92 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 14 Oct 2014 11:50:27 +0200 Subject: [PATCH 05/47] valgrind: avoid false positives in KVM_GET_DIRTY_LOG ioctl struct kvm_dirty_log contains padding fields that trigger false positives in valgrind. Let's use a designated initializer to avoid false positives from valgrind/memcheck. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index c86626f9ff..4bfecccf8e 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -417,7 +417,7 @@ static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section) { KVMState *s = kvm_state; unsigned long size, allocated_size = 0; - KVMDirtyLog d; + KVMDirtyLog d = {}; KVMSlot *mem; int ret = 0; hwaddr start_addr = section->offset_within_address_space; From 5e0b7d8869a567d8f535f63510adf8f53ca438d3 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 14 Oct 2014 11:55:49 +0200 Subject: [PATCH 06/47] valgrind/i386: avoid false positives on KVM_SET_CLOCK ioctl kvm_clock_data contains pad fields. Let's use a designated initializer to avoid false positives from valgrind/memcheck. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 58be2bda27..efdf165848 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -88,7 +88,7 @@ static void kvmclock_vm_state_change(void *opaque, int running, int ret; if (running) { - struct kvm_clock_data data; + struct kvm_clock_data data = {}; uint64_t time_at_migration = kvmclock_current_nsec(s); s->clock_valid = false; @@ -99,7 +99,6 @@ static void kvmclock_vm_state_change(void *opaque, int running, } data.clock = s->clock; - data.flags = 0; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret)); From b0a0551283076c6f3e57cf2bdd525334009b2677 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 14 Oct 2014 11:58:34 +0200 Subject: [PATCH 07/47] valgrind/i386: avoid false positives on KVM_SET_PIT ioctl struct kvm_pit_state2 contains pad fields. Let's use a designated initializer to avoid false positives from valgrind/memcheck. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- hw/i386/kvm/i8254.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c index 472af811cd..90eea10df7 100644 --- a/hw/i386/kvm/i8254.c +++ b/hw/i386/kvm/i8254.c @@ -138,7 +138,7 @@ static void kvm_pit_get(PITCommonState *pit) static void kvm_pit_put(PITCommonState *pit) { KVMPITState *s = KVM_PIT(pit); - struct kvm_pit_state2 kpit; + struct kvm_pit_state2 kpit = {}; struct kvm_pit_channel_state *kchan; struct PITChannelState *sc; int i, ret; From bdfc8480c50a53d91aa9a513d23a84de0d5fbc86 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 30 Oct 2014 09:23:41 +0100 Subject: [PATCH 08/47] valgrind/i386: avoid false positives on KVM_SET_XCRS ioctl struct kvm_xcrs contains padding bytes. Let's use a designated initializer to avoid false positives from valgrind/memcheck. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 3a3dfc4795..42f8def0fa 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1085,7 +1085,7 @@ static int kvm_put_xsave(X86CPU *cpu) static int kvm_put_xcrs(X86CPU *cpu) { CPUX86State *env = &cpu->env; - struct kvm_xcrs xcrs; + struct kvm_xcrs xcrs = {}; if (!kvm_has_xcrs()) { return 0; From c7fe4b12984a36b87438080e48aff5e8f6d48ac9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 30 Oct 2014 09:27:34 +0100 Subject: [PATCH 09/47] valgrind/i386: avoid false positives on KVM_SET_MSRS ioctl struct kvm_msrs contains padding bytes. Let's use a designated initializer on the info part to avoid false positives from valgrind/memcheck. Do the same for generic MSRS, the TSC and feature control. We also need to zero out the reserved fields in the entries. We do this in kvm_msr_entry_set as suggested by Paolo. This avoids a big memset that a designated initializer on the full structure would do. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 42f8def0fa..93927bd93b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1152,6 +1152,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, uint32_t index, uint64_t value) { entry->index = index; + entry->reserved = 0; entry->data = value; } @@ -1170,7 +1171,9 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) kvm_msr_entry_set(&msrs[0], MSR_IA32_TSCDEADLINE, env->tsc_deadline); - msr_data.info.nmsrs = 1; + msr_data.info = (struct kvm_msrs) { + .nmsrs = 1, + }; return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); } @@ -1190,7 +1193,11 @@ static int kvm_put_msr_feature_control(X86CPU *cpu) kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL, cpu->env.msr_ia32_feature_control); - msr_data.info.nmsrs = 1; + + msr_data.info = (struct kvm_msrs) { + .nmsrs = 1, + }; + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); } @@ -1339,7 +1346,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) } } - msr_data.info.nmsrs = n; + msr_data.info = (struct kvm_msrs) { + .nmsrs = n, + }; return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); From d19ae73e987ecc13a89c0830b501341103d06982 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 30 Oct 2014 09:33:23 +0100 Subject: [PATCH 10/47] valgrind/i386: avoid false positives on KVM_GET_MSRS ioctl struct kvm_msrs contains a pad field. Let's use a designated initializer on the info part to avoid false positives from valgrind/memcheck. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 93927bd93b..1762c2e184 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1655,7 +1655,10 @@ static int kvm_get_msrs(X86CPU *cpu) } } - msr_data.info.nmsrs = n; + msr_data.info = (struct kvm_msrs) { + .nmsrs = n, + }; + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); if (ret < 0) { return ret; From 076796f8fd27f4d014fe2efb6372f1cdc1df9a41 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 30 Oct 2014 09:33:43 +0100 Subject: [PATCH 11/47] valgrind/i386: avoid false positives on KVM_SET_VCPU_EVENTS ioctl struct kvm_vcpu_events contains reserved fields. Let's use a designated initializer to avoid false positives in valgrind. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 1762c2e184..4a2ee6fd32 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1884,7 +1884,7 @@ static int kvm_put_apic(X86CPU *cpu) static int kvm_put_vcpu_events(X86CPU *cpu, int level) { CPUX86State *env = &cpu->env; - struct kvm_vcpu_events events; + struct kvm_vcpu_events events = {}; if (!kvm_has_vcpu_events()) { return 0; From e6eef7c221634c942e9f586df84aae623aa06cd5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 30 Oct 2014 10:05:28 +0100 Subject: [PATCH 12/47] valgrind/s390x: avoid false positives on KVM_SET_FPU ioctl struct kvm_fpu contains an alignment padding on s390x. Let's use a designated initializer to avoid false positives from valgrind/memcheck. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- target-s390x/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 1b79c4de75..1dcdaa6662 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -208,7 +208,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) CPUS390XState *env = &cpu->env; struct kvm_sregs sregs; struct kvm_regs regs; - struct kvm_fpu fpu; + struct kvm_fpu fpu = {}; int r; int i; From e9af2fef242ce92f86d3d5c1a94c3199ff1e24c9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 20 Nov 2014 22:10:58 +0100 Subject: [PATCH 13/47] coverity/s390x: avoid false positive in kvm_irqchip_add_adapter_route Paolo Bonzini reported that Coverity reports an uninitialized pad value. Let's use a designated initializer for kvm_irq_routing_entry to avoid this false positive. This is similar to kvm_irqchip_add_msi_route and other users of kvm_irq_routing_entry. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index 4bfecccf8e..18cc6b4d3d 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1277,7 +1277,7 @@ static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq, int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter) { - struct kvm_irq_routing_entry kroute; + struct kvm_irq_routing_entry kroute = {}; int virq; if (!kvm_gsi_routing_enabled()) { From 0bb0b2d2fe7f645ddaf1f0ff40ac669c9feb4aa1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 Nov 2014 15:54:43 +0100 Subject: [PATCH 14/47] target-i386: add feature flags for CPUID[EAX=0xd,ECX=1] These represent xsave-related capabilities of the processor, and KVM may or may not support them. Add feature bits so that they are considered by "-cpu ...,enforce", and use the new feature work instead of calling kvm_arch_get_supported_cpuid. Bit 3 (XSAVES) is not migratables because it requires saving MSR_IA32_XSS. Neither KVM nor any commonly available hardware supports it anyway. Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 29 ++++++++++++++++++++++++++++- target-i386/cpu.h | 6 ++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index e9df33e5c3..cf4cf01419 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -274,6 +274,17 @@ static const char *cpuid_apm_edx_feature_name[] = { NULL, NULL, NULL, NULL, }; +static const char *cpuid_xsave_feature_name[] = { + "xsaveopt", "xsavec", "xgetbv1", "xsaves", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +}; + #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE) #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \ CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC) @@ -391,6 +402,14 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .tcg_features = TCG_APM_FEATURES, .unmigratable_flags = CPUID_APM_INVTSC, }, + [FEAT_XSAVE] = { + .feat_names = cpuid_xsave_feature_name, + .cpuid_eax = 0xd, + .cpuid_needs_ecx = true, .cpuid_ecx = 1, + .cpuid_reg = R_EAX, + .tcg_features = 0, + .unmigratable_flags = FEAT_XSAVES, + }, }; typedef struct X86RegisterInfo32 { @@ -1018,6 +1037,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT, .xlevel = 0x8000000A, .model_id = "Intel Xeon E312xx (Sandy Bridge)", }, @@ -1051,6 +1072,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT, .xlevel = 0x8000000A, .model_id = "Intel Core Processor (Haswell)", }, @@ -1085,6 +1108,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT, .xlevel = 0x8000000A, .model_id = "Intel Core Processor (Broadwell)", }, @@ -1202,6 +1227,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, + /* no xsaveopt! */ .xlevel = 0x8000001A, .model_id = "AMD Opteron 62xx class CPU", }, @@ -1236,6 +1262,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, + /* no xsaveopt! */ .xlevel = 0x8000001A, .model_id = "AMD Opteron 63xx class CPU", }, @@ -2377,7 +2404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax |= kvm_mask & (XSTATE_FP | XSTATE_SSE); *ebx = *ecx; } else if (count == 1) { - *eax = kvm_arch_get_supported_cpuid(s, 0xd, 1, R_EAX); + *eax = env->features[FEAT_XSAVE]; } else if (count < ARRAY_SIZE(ext_save_areas)) { const ExtSaveArea *esa = &ext_save_areas[count]; if ((env->features[esa->feature] & esa->bits) == esa->bits && diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 015f5b5276..f9d74c7856 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -411,6 +411,7 @@ typedef enum FeatureWord { FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_SVM, /* CPUID[8000_000A].EDX */ + FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEATURE_WORDS, } FeatureWord; @@ -571,6 +572,11 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ +#define CPUID_XSAVE_XSAVEOPT (1U << 0) +#define CPUID_XSAVE_XSAVEC (1U << 1) +#define CPUID_XSAVE_XGETBV1 (1U << 2) +#define CPUID_XSAVE_XSAVES (1U << 3) + /* CPUID[0x80000007].EDX flags: */ #define CPUID_APM_INVTSC (1U << 8) From 906b53a2de31a4612e94000f7cfe3a8e4b002f25 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Nov 2014 14:33:59 +0100 Subject: [PATCH 15/47] target-mips: kvm: do not use get_clock() Use the external qemu-timer API instead. Signed-off-by: Paolo Bonzini --- target-mips/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-mips/kvm.c b/target-mips/kvm.c index 97fd51a02f..a761ea5b32 100644 --- a/target-mips/kvm.c +++ b/target-mips/kvm.c @@ -439,7 +439,7 @@ static void kvm_mips_update_state(void *opaque, int running, RunState state) } } else { /* Set clock restore time to now */ - count_resume = get_clock(); + count_resume = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); ret = kvm_mips_put_one_reg64(cs, KVM_REG_MIPS_COUNT_RESUME, &count_resume); if (ret < 0) { From 18cd2c17b5370369a886155c001da0a7f54bbcca Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Dec 2014 10:36:23 +0800 Subject: [PATCH 16/47] target-i386: get/set/migrate XSAVES state Add xsaves related definition, it also adds corresponding part to kvm_get/put, and vmstate. Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 1 - target-i386/cpu.h | 2 ++ target-i386/kvm.c | 15 +++++++++++++++ target-i386/machine.c | 21 +++++++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index cf4cf01419..a2dde11564 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -408,7 +408,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid_needs_ecx = true, .cpuid_ecx = 1, .cpuid_reg = R_EAX, .tcg_features = 0, - .unmigratable_flags = FEAT_XSAVES, }, }; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index f9d74c7856..7e363654b8 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -389,6 +389,7 @@ #define MSR_VM_HSAVE_PA 0xc0010117 #define MSR_IA32_BNDCFGS 0x00000d90 +#define MSR_IA32_XSS 0x00000da0 #define XSTATE_FP (1ULL << 0) #define XSTATE_SSE (1ULL << 1) @@ -1025,6 +1026,7 @@ typedef struct CPUX86State { uint64_t xstate_bv; uint64_t xcr0; + uint64_t xss; TPRAccess tpr_access_type; } CPUX86State; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 4a2ee6fd32..60c4475b67 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -80,6 +80,7 @@ static bool has_msr_hv_hypercall; static bool has_msr_hv_vapic; static bool has_msr_hv_tsc; static bool has_msr_mtrr; +static bool has_msr_xss; static bool has_msr_architectural_pmu; static uint32_t num_architectural_pmu_counters; @@ -826,6 +827,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_bndcfgs = true; continue; } + if (kvm_msr_list->indices[i] == MSR_IA32_XSS) { + has_msr_xss = true; + continue; + } } } @@ -1231,6 +1236,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_bndcfgs) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); } + if (has_msr_xss) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_XSS, env->xss); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1579,6 +1587,10 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_bndcfgs) { msrs[n++].index = MSR_IA32_BNDCFGS; } + if (has_msr_xss) { + msrs[n++].index = MSR_IA32_XSS; + } + if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1729,6 +1741,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_BNDCFGS: env->msr_bndcfgs = msrs[i].data; break; + case MSR_IA32_XSS: + env->xss = msrs[i].data; + break; default: if (msrs[i].index >= MSR_MC0_CTL && msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { diff --git a/target-i386/machine.c b/target-i386/machine.c index 1c13b14352..722d62e471 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -687,6 +687,24 @@ static const VMStateDescription vmstate_avx512 = { } }; +static bool xss_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->xss != 0; +} + +static const VMStateDescription vmstate_xss = { + .name = "cpu/xss", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.xss, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -832,6 +850,9 @@ VMStateDescription vmstate_x86_cpu = { }, { .vmsd = &vmstate_avx512, .needed = avx512_needed, + }, { + .vmsd = &vmstate_xss, + .needed = xss_needed, } , { /* empty */ } From 18fc80553420a3188336baeeb542e9f2d1ada13b Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 4 Dec 2014 14:46:43 +0100 Subject: [PATCH 17/47] x86: Drop superfluous conditionals around g_free() Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- hw/i386/pc_sysfw.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 75913c5b2f..662d99768e 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -204,9 +204,7 @@ static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); exit(1); } - if (filename) { - g_free(filename); - } + g_free(filename); /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = bios_size; From 4be34d1e2140b6d1be611c4bfa542c54c232520b Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 4 Dec 2014 14:46:44 +0100 Subject: [PATCH 18/47] x86: Fuse g_malloc(); memset() into g_malloc0() Coccinelle semantic patch: @@ expression LHS, SZ; @@ - LHS = g_malloc(SZ); - memset(LHS, 0, SZ); + LHS = g_malloc0(SZ); Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- target-i386/arch_dump.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c index 0bbed239f8..eccd8031af 100644 --- a/target-i386/arch_dump.c +++ b/target-i386/arch_dump.c @@ -78,9 +78,7 @@ static int x86_64_write_elf64_note(WriteCoreDumpFunction f, descsz = sizeof(x86_64_elf_prstatus); note_size = ((sizeof(Elf64_Nhdr) + 3) / 4 + (name_size + 3) / 4 + (descsz + 3) / 4) * 4; - note = g_malloc(note_size); - - memset(note, 0, note_size); + note = g_malloc0(note_size); note->n_namesz = cpu_to_le32(name_size); note->n_descsz = cpu_to_le32(descsz); note->n_type = cpu_to_le32(NT_PRSTATUS); @@ -159,9 +157,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, descsz = sizeof(x86_elf_prstatus); note_size = ((sizeof(Elf64_Nhdr) + 3) / 4 + (name_size + 3) / 4 + (descsz + 3) / 4) * 4; - note = g_malloc(note_size); - - memset(note, 0, note_size); + note = g_malloc0(note_size); note->n_namesz = cpu_to_le32(name_size); note->n_descsz = cpu_to_le32(descsz); note->n_type = cpu_to_le32(NT_PRSTATUS); @@ -216,9 +212,7 @@ int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, descsz = sizeof(x86_elf_prstatus); note_size = ((sizeof(Elf32_Nhdr) + 3) / 4 + (name_size + 3) / 4 + (descsz + 3) / 4) * 4; - note = g_malloc(note_size); - - memset(note, 0, note_size); + note = g_malloc0(note_size); note->n_namesz = cpu_to_le32(name_size); note->n_descsz = cpu_to_le32(descsz); note->n_type = cpu_to_le32(NT_PRSTATUS); @@ -345,9 +339,7 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, } note_size = ((note_head_size + 3) / 4 + (name_size + 3) / 4 + (descsz + 3) / 4) * 4; - note = g_malloc(note_size); - - memset(note, 0, note_size); + note = g_malloc0(note_size); if (type == 0) { note32 = note; note32->n_namesz = cpu_to_le32(name_size); From ab3ad07f89c7f9e03c17c98e1d1a02dbf61c605c Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 4 Dec 2014 14:46:45 +0100 Subject: [PATCH 19/47] x86: Use g_new() & friends where that makes obvious sense g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer, for two reasons. One, it catches multiplication overflowing size_t. Two, it returns T * rather than void *, which lets the compiler catch more type errors. This commit only touches allocations with size arguments of the form sizeof(T). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 3 +-- target-i386/kvm.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8be50a4ad6..60c1d54b6d 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -601,8 +601,7 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) } /* new "etc/e820" file -- include ram too */ - e820_table = g_realloc(e820_table, - sizeof(struct e820_entry) * (e820_entries+1)); + e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); e820_table[e820_entries].address = cpu_to_le64(address); e820_table[e820_entries].length = cpu_to_le64(length); e820_table[e820_entries].type = cpu_to_le32(type); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 60c4475b67..8832a02f8a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -278,7 +278,7 @@ static void kvm_hwpoison_page_add(ram_addr_t ram_addr) return; } } - page = g_malloc(sizeof(HWPoisonPage)); + page = g_new(HWPoisonPage, 1); page->ram_addr = ram_addr; QLIST_INSERT_HEAD(&hwpoison_page_list, page, list); } From e42a92ae640dc22ecb4eb7705ddfe89aeadb92cc Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 4 Dec 2014 14:46:46 +0100 Subject: [PATCH 20/47] x86: Drop some superfluous casts from void * Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 2 +- target-i386/kvm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index a2dde11564..ef7d71c778 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -1556,7 +1556,7 @@ static char *x86_cpuid_get_vendor(Object *obj, Error **errp) CPUX86State *env = &cpu->env; char *value; - value = (char *)g_malloc(CPUID_VENDOR_SZ + 1); + value = g_malloc(CPUID_VENDOR_SZ + 1); x86_cpu_vendor_words2str(value, env->cpuid_vendor1, env->cpuid_vendor2, env->cpuid_vendor3); return value; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 8832a02f8a..f92edfe14a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -96,7 +96,7 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) int r, size; size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); - cpuid = (struct kvm_cpuid2 *)g_malloc0(size); + cpuid = g_malloc0(size); cpuid->nent = max; r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid); if (r == 0 && cpuid->nent >= max) { From 1c3381af327b5f94a10942a3c25777a57fcdd85e Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 4 Dec 2014 14:12:43 +0100 Subject: [PATCH 21/47] scsi: Drop superfluous conditionals around g_free() Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-generic.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index 6b9e4e1ef9..e53470f85e 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -298,8 +298,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd) #endif if (r->req.cmd.xfer == 0) { - if (r->buf != NULL) - g_free(r->buf); + g_free(r->buf); r->buflen = 0; r->buf = NULL; /* The request is used as the AIO opaque value, so add a ref. */ @@ -314,8 +313,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *cmd) } if (r->buflen != r->req.cmd.xfer) { - if (r->buf != NULL) - g_free(r->buf); + g_free(r->buf); r->buf = g_malloc(r->req.cmd.xfer); r->buflen = r->req.cmd.xfer; } From 0bd0adbe5b438cabbf02230dba1be4c209158f57 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 4 Dec 2014 14:12:44 +0100 Subject: [PATCH 22/47] scsi: Fuse g_malloc(); memset() into g_malloc0() Coccinelle semantic patch: @@ expression LHS, SZ; @@ - LHS = g_malloc(SZ); - memset(LHS, 0, SZ); + LHS = g_malloc0(SZ); Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 604252a198..4852237a79 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -1018,8 +1018,7 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, size_t len, resid; if (!cmd->iov_buf) { - cmd->iov_buf = g_malloc(dcmd_size); - memset(cmd->iov_buf, 0, dcmd_size); + cmd->iov_buf = g_malloc0(dcmd_size); info = cmd->iov_buf; info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */ info->vpd_page83[0] = 0x7f; @@ -1221,8 +1220,7 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, uint64_t ld_size; if (!cmd->iov_buf) { - cmd->iov_buf = g_malloc(dcmd_size); - memset(cmd->iov_buf, 0x0, dcmd_size); + cmd->iov_buf = g_malloc0(dcmd_size); info = cmd->iov_buf; megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83)); req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd); From 3c55fe2a13353b3ddf1db51c34ada23d161ee428 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 4 Dec 2014 14:12:45 +0100 Subject: [PATCH 23/47] scsi: Use g_new() & friends where that makes obvious sense g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer, for two reasons. One, it catches multiplication overflowing size_t. Two, it returns T * rather than void *, which lets the compiler catch more type errors. This commit only touches allocations with size arguments of the form sizeof(T). Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/scsi/lsi53c895a.c | 2 +- hw/scsi/virtio-scsi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index d9b4c7ea3c..ec920488d6 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -781,7 +781,7 @@ static void lsi_do_command(LSIState *s) } assert(s->current == NULL); - s->current = g_malloc0(sizeof(lsi_request)); + s->current = g_new0(lsi_request, 1); s->current->tag = s->select_tag; s->current->req = scsi_req_new(dev, s->current->tag, s->current_lun, buf, s->current); diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index ef485508b1..b06dd390d2 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -829,7 +829,7 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp, virtio_cleanup(vdev); return; } - s->cmd_vqs = g_malloc0(s->conf.num_queues * sizeof(VirtQueue *)); + s->cmd_vqs = g_new0(VirtQueue *, s->conf.num_queues); s->sense_size = VIRTIO_SCSI_SENSE_SIZE; s->cdb_size = VIRTIO_SCSI_CDB_SIZE; From f8e1f53334700950eb6691076d51c7d60f1c28f7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Dec 2014 11:16:57 +0100 Subject: [PATCH 24/47] scsi-disk: provide maximum transfer length The QEMU block layer has a limit of INT_MAX bytes per transfer. Expose it in the block limits VPD page for both regular transfers and WRITE SAME. Reported-by: Ming Lei Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 2f75d7d51c..f65618d802 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -49,6 +49,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0) #define DEFAULT_DISCARD_GRANULARITY 4096 #define DEFAULT_MAX_UNMAP_SIZE (1 << 30) /* 1 GB */ +#define DEFAULT_MAX_IO_SIZE INT_MAX /* 2 GB - 1 block */ typedef struct SCSIDiskState SCSIDiskState; @@ -79,6 +80,7 @@ struct SCSIDiskState uint64_t port_wwn; uint16_t port_index; uint64_t max_unmap_size; + uint64_t max_io_size; QEMUBH *bh; char *version; char *serial; @@ -635,6 +637,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) s->qdev.conf.opt_io_size / s->qdev.blocksize; unsigned int max_unmap_sectors = s->max_unmap_size / s->qdev.blocksize; + unsigned int max_io_sectors = + s->max_io_size / s->qdev.blocksize; if (s->qdev.type == TYPE_ROM) { DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n", @@ -651,6 +655,12 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) outbuf[6] = (min_io_size >> 8) & 0xff; outbuf[7] = min_io_size & 0xff; + /* maximum transfer length */ + outbuf[8] = (max_io_sectors >> 24) & 0xff; + outbuf[9] = (max_io_sectors >> 16) & 0xff; + outbuf[10] = (max_io_sectors >> 8) & 0xff; + outbuf[11] = max_io_sectors & 0xff; + /* optimal transfer length */ outbuf[12] = (opt_io_size >> 24) & 0xff; outbuf[13] = (opt_io_size >> 16) & 0xff; @@ -674,6 +684,17 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) outbuf[29] = (unmap_sectors >> 16) & 0xff; outbuf[30] = (unmap_sectors >> 8) & 0xff; outbuf[31] = unmap_sectors & 0xff; + + /* max write same size */ + outbuf[36] = 0; + outbuf[37] = 0; + outbuf[38] = 0; + outbuf[39] = 0; + + outbuf[40] = (max_io_sectors >> 24) & 0xff; + outbuf[41] = (max_io_sectors >> 16) & 0xff; + outbuf[42] = (max_io_sectors >> 8) & 0xff; + outbuf[43] = max_io_sectors & 0xff; break; } case 0xb2: /* thin provisioning */ @@ -2579,6 +2600,8 @@ static Property scsi_hd_properties[] = { DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size, DEFAULT_MAX_UNMAP_SIZE), + DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size, + DEFAULT_MAX_IO_SIZE), DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf), DEFINE_PROP_END_OF_LIST(), }; @@ -2625,6 +2648,8 @@ static Property scsi_cd_properties[] = { DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0), DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0), DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), + DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size, + DEFAULT_MAX_IO_SIZE), DEFINE_PROP_END_OF_LIST(), }; @@ -2690,6 +2715,8 @@ static Property scsi_disk_properties[] = { DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size, DEFAULT_MAX_UNMAP_SIZE), + DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size, + DEFAULT_MAX_IO_SIZE), DEFINE_PROP_END_OF_LIST(), }; From b4ac20b4df0d1eaa5d546ccb84751e3e97d257fd Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Wed, 26 Nov 2014 13:38:52 +0300 Subject: [PATCH 25/47] cpu-exec: fix cpu_exec_nocache In icount mode cpu_exec_nocache function is used to execute part of the existing TB. At the end of cpu_exec_nocache newly created TB is deleted. Sometimes io_read function needs to recompile current TB and restart TB lookup and execution. After that tb_find_fast function finds old (bigger) TB again. This TB cannot be executed (because icount is not big enough) and cpu_exec_nocache is called again. Such a loop continues over and over. This patch deletes old TB and avoids finding it in the TB cache. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- cpu-exec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index 3913de020b..8830255db3 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -202,13 +202,18 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles, { CPUState *cpu = ENV_GET_CPU(env); TranslationBlock *tb; + target_ulong pc = orig_tb->pc; + target_ulong cs_base = orig_tb->cs_base; + uint64_t flags = orig_tb->flags; /* Should never happen. We only end up here when an existing TB is too long. */ if (max_cycles > CF_COUNT_MASK) max_cycles = CF_COUNT_MASK; - tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, + /* tb_gen_code can flush our orig_tb, invalidate it now */ + tb_phys_invalidate(orig_tb, -1); + tb = tb_gen_code(cpu, pc, cs_base, flags, max_cycles); cpu->current_tb = tb; /* execute the generated code */ From e511b4d783c47a32420da802104cfb0eb974b22f Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Wed, 26 Nov 2014 13:39:20 +0300 Subject: [PATCH 26/47] cpu-exec: reset exception_index correctly Exception index is reset at every entry at every entry into cpu_exec() function. This may cause missing the exceptions while replaying them. This patch moves exception_index reset to the locations where they are processed. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- cpu-exec.c | 3 ++- cpus.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index 8830255db3..4df98567ce 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -358,7 +358,6 @@ int cpu_exec(CPUArchState *env) } cc->cpu_exec_enter(cpu); - cpu->exception_index = -1; /* Calculate difference between guest clock and host clock. * This delay includes the delay of the last cycle, so @@ -378,6 +377,7 @@ int cpu_exec(CPUArchState *env) if (ret == EXCP_DEBUG) { cpu_handle_debug_exception(env); } + cpu->exception_index = -1; break; } else { #if defined(CONFIG_USER_ONLY) @@ -388,6 +388,7 @@ int cpu_exec(CPUArchState *env) cc->do_interrupt(cpu); #endif ret = cpu->exception_index; + cpu->exception_index = -1; break; #else cc->do_interrupt(cpu); diff --git a/cpus.c b/cpus.c index 0c33458bb1..91119bb864 100644 --- a/cpus.c +++ b/cpus.c @@ -934,6 +934,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) qemu_mutex_lock(&qemu_global_mutex); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); + cpu->exception_index = -1; current_cpu = cpu; r = kvm_init_vcpu(cpu); @@ -974,6 +975,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) qemu_mutex_lock_iothread(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); + cpu->exception_index = -1; sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); @@ -1016,6 +1018,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) CPU_FOREACH(cpu) { cpu->thread_id = qemu_get_thread_id(); cpu->created = true; + cpu->exception_index = -1; } qemu_cond_signal(&qemu_cpu_cond); From 626cf8f4c6157ed133f0daa89b90d4169060bc97 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Mon, 8 Dec 2014 10:53:17 +0300 Subject: [PATCH 27/47] icount: set can_do_io outside TB execution This patch sets can_do_io function to allow reading icount within cpu-exec, but outside TB execution. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- cpu-exec.c | 3 +++ cpus.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cpu-exec.c b/cpu-exec.c index 4df98567ce..cce80f0c01 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -168,7 +168,9 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr) } #endif /* DEBUG_DISAS */ + cpu->can_do_io = 0; next_tb = tcg_qemu_tb_exec(env, tb_ptr); + cpu->can_do_io = 1; trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK); @@ -543,6 +545,7 @@ int cpu_exec(CPUArchState *env) cpu = current_cpu; env = cpu->env_ptr; cc = CPU_GET_CLASS(cpu); + cpu->can_do_io = 1; #ifdef TARGET_I386 x86_cpu = X86_CPU(cpu); #endif diff --git a/cpus.c b/cpus.c index 91119bb864..615d4ae07d 100644 --- a/cpus.c +++ b/cpus.c @@ -935,6 +935,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); cpu->exception_index = -1; + cpu->can_do_io = 1; current_cpu = cpu; r = kvm_init_vcpu(cpu); @@ -976,6 +977,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); cpu->exception_index = -1; + cpu->can_do_io = 1; sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); @@ -1019,6 +1021,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) cpu->thread_id = qemu_get_thread_id(); cpu->created = true; cpu->exception_index = -1; + cpu->can_do_io = 1; } qemu_cond_signal(&qemu_cpu_cond); From 2a62914bd8209d97e918f30f0de74bec2bf622c4 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Mon, 8 Dec 2014 10:53:45 +0300 Subject: [PATCH 28/47] icount: introduce cpu_get_icount_raw Separate accessing the instruction counter from the compensation for speed and halting that are introduced by qemu_icount_bias. This introduces new infrastructure used by the record/replay patches. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- cpus.c | 13 ++++++++++--- include/qemu/timer.h | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cpus.c b/cpus.c index 615d4ae07d..5f8acba15d 100644 --- a/cpus.c +++ b/cpus.c @@ -136,8 +136,7 @@ typedef struct TimersState { static TimersState timers_state; -/* Return the virtual CPU time, based on the instruction counter. */ -static int64_t cpu_get_icount_locked(void) +int64_t cpu_get_icount_raw(void) { int64_t icount; CPUState *cpu = current_cpu; @@ -145,10 +144,18 @@ static int64_t cpu_get_icount_locked(void) icount = timers_state.qemu_icount; if (cpu) { if (!cpu_can_do_io(cpu)) { - fprintf(stderr, "Bad clock read\n"); + fprintf(stderr, "Bad icount read\n"); + exit(1); } icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); } + return icount; +} + +/* Return the virtual CPU time, based on the instruction counter. */ +static int64_t cpu_get_icount_locked(void) +{ + int64_t icount = cpu_get_icount_raw(); return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount); } diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 5f5210d543..3dae414f40 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -743,6 +743,7 @@ static inline int64_t get_clock(void) #endif /* icount */ +int64_t cpu_get_icount_raw(void); int64_t cpu_get_icount(void); int64_t cpu_get_clock(void); int64_t cpu_get_clock_offset(void); From d8a499f17ee5f05407874f29f69f0e3e3198a853 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Wed, 26 Nov 2014 13:40:16 +0300 Subject: [PATCH 29/47] cpu-exec: invalidate nocache translation if they are interrupted In this case, QEMU might longjmp out of cpu-exec.c and miss the final cleanup in cpu_exec_nocache. Do this manually through a new compile flag. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- cpu-exec.c | 2 +- include/exec/exec-all.h | 1 + translate-all.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index cce80f0c01..a4f0effaf4 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -216,7 +216,7 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles, /* tb_gen_code can flush our orig_tb, invalidate it now */ tb_phys_invalidate(orig_tb, -1); tb = tb_gen_code(cpu, pc, cs_base, flags, - max_cycles); + max_cycles | CF_NOCACHE); cpu->current_tb = tb; /* execute the generated code */ trace_exec_tb_nocache(tb, tb->pc); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 0844885edd..38a8a09b42 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -145,6 +145,7 @@ struct TranslationBlock { uint16_t cflags; /* compile flags */ #define CF_COUNT_MASK 0x7fff #define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */ +#define CF_NOCACHE 0x10000 /* To be freed after execution */ void *tc_ptr; /* pointer to the translated code */ /* next matching tb for physical address. */ diff --git a/translate-all.c b/translate-all.c index ba5c8403d3..cf05472008 100644 --- a/translate-all.c +++ b/translate-all.c @@ -264,6 +264,12 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr) tb = tb_find_pc(retaddr); if (tb) { cpu_restore_state_from_tb(cpu, tb, retaddr); + if (tb->cflags & CF_NOCACHE) { + /* one-shot translation, invalidate it immediately */ + cpu->current_tb = NULL; + tb_phys_invalidate(tb, -1); + tb_free(tb); + } return true; } return false; From 4e7fa73ec2516334b58e82f9a5649b1468b1eb7a Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Wed, 26 Nov 2014 13:40:50 +0300 Subject: [PATCH 30/47] timer: introduce new QEMU_CLOCK_VIRTUAL_RT clock This patch introduces new QEMU_CLOCK_VIRTUAL_RT clock, which should be used for icount warping. In the next patch, it will be used to avoid a huge icount warp when a virtual machine is stopped for a long time. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- include/qemu/timer.h | 7 +++++++ qemu-timer.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 3dae414f40..552487c45f 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -36,12 +36,19 @@ * is suspended, and it will reflect system time changes the host may * undergo (e.g. due to NTP). The host clock has the same precision as * the virtual clock. + * + * @QEMU_CLOCK_VIRTUAL_RT: realtime clock used for icount warp + * + * Outside icount mode, this clock is the same as @QEMU_CLOCK_VIRTUAL. + * In icount mode, this clock counts nanoseconds while the virtual + * machine is running. */ typedef enum { QEMU_CLOCK_REALTIME = 0, QEMU_CLOCK_VIRTUAL = 1, QEMU_CLOCK_HOST = 2, + QEMU_CLOCK_VIRTUAL_RT = 3, QEMU_CLOCK_MAX } QEMUClockType; diff --git a/qemu-timer.c b/qemu-timer.c index 00a5d35c3f..f4b4b6aa45 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -566,6 +566,8 @@ int64_t qemu_clock_get_ns(QEMUClockType type) notifier_list_notify(&clock->reset_notifiers, &now); } return now; + case QEMU_CLOCK_VIRTUAL_RT: + return cpu_get_clock(); } } From bf2a7ddb0a066c27ed1432b918baa046b6b7dfc5 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Wed, 26 Nov 2014 13:40:55 +0300 Subject: [PATCH 31/47] cpus: make icount warp behave well with respect to stop/cont This patch makes icount warp use the new QEMU_CLOCK_VIRTUAL_RT clock. This way, icount's QEMU_CLOCK_VIRTUAL will never count time during which the virtual machine is stopped. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- cpus.c | 21 ++++++++++----------- include/qemu/timer.h | 3 ++- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpus.c b/cpus.c index 5f8acba15d..1b5168a1db 100644 --- a/cpus.c +++ b/cpus.c @@ -352,7 +352,7 @@ static void icount_warp_rt(void *opaque) seqlock_write_lock(&timers_state.vm_clock_seqlock); if (runstate_is_running()) { - int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + int64_t clock = cpu_get_clock_locked(); int64_t warp_delta; warp_delta = clock - vm_clock_warp_start; @@ -361,9 +361,8 @@ static void icount_warp_rt(void *opaque) * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too * far ahead of real time. */ - int64_t cur_time = cpu_get_clock_locked(); int64_t cur_icount = cpu_get_icount_locked(); - int64_t delta = cur_time - cur_icount; + int64_t delta = clock - cur_icount; warp_delta = MIN(warp_delta, delta); } timers_state.qemu_icount_bias += warp_delta; @@ -426,7 +425,7 @@ void qemu_clock_warp(QEMUClockType type) } /* We want to use the earliest deadline from ALL vm_clocks */ - clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); if (deadline < 0) { return; @@ -444,8 +443,8 @@ void qemu_clock_warp(QEMUClockType type) * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL - * after some e"real" time, (related to the time left until the next - * event) has passed. The QEMU_CLOCK_REALTIME timer will do this. + * after some "real" time, (related to the time left until the next + * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. * This avoids that the warps are visible externally; for example, * you will not be sending network packets continuously instead of * every 100ms. @@ -519,8 +518,8 @@ void configure_icount(QemuOpts *opts, Error **errp) return; } icount_align_option = qemu_opt_get_bool(opts, "align", false); - icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME, - icount_warp_rt, NULL); + icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, + icount_warp_rt, NULL); if (strcmp(option, "auto") != 0) { errno = 0; icount_time_shift = strtol(option, &rem_str, 0); @@ -544,10 +543,10 @@ void configure_icount(QemuOpts *opts, Error **errp) the virtual time trigger catches emulated time passing too fast. Realtime triggers occur even when idle, so use them less frequently than VM triggers. */ - icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME, - icount_adjust_rt, NULL); + icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, + icount_adjust_rt, NULL); timer_mod(icount_rt_timer, - qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000); + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, icount_adjust_vm, NULL); timer_mod(icount_vm_timer, diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 552487c45f..d9df0940d9 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -41,7 +41,8 @@ * * Outside icount mode, this clock is the same as @QEMU_CLOCK_VIRTUAL. * In icount mode, this clock counts nanoseconds while the virtual - * machine is running. + * machine is running. It is used to increase @QEMU_CLOCK_VIRTUAL + * while the CPUs are sleeping and thus not executing instructions. */ typedef enum { From 5b9efc39aee90bbd343793e942bf8f582a0c9e4f Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Wed, 26 Nov 2014 13:39:42 +0300 Subject: [PATCH 32/47] i386: do not cross the pages boundaries in replay mode This patch denies crossing the boundary of the pages in the replay mode, because it can cause an exception. Do it only when boundary is crossed by the first instruction in the block. If current instruction already crossed the bound - it's ok, because an exception hasn't stopped this code. Signed-off-by: Pavel Dovgalyuk Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 3 +++ target-i386/translate.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 7e363654b8..3ecff96325 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -28,6 +28,9 @@ #define TARGET_LONG_BITS 32 #endif +/* Maximum instruction code size */ +#define TARGET_MAX_INSN_SIZE 16 + /* target supports implicit self modifying code */ #define TARGET_HAS_SMC /* support for self modifying code even if the modified instruction is diff --git a/target-i386/translate.c b/target-i386/translate.c index 782f7d2666..31a9f74467 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -8022,6 +8022,20 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu, gen_eob(dc); break; } + /* Do not cross the boundary of the pages in icount mode, + it can cause an exception. Do it only when boundary is + crossed by the first instruction in the block. + If current instruction already crossed the bound - it's ok, + because an exception hasn't stopped this code. + */ + if (use_icount + && ((pc_ptr & TARGET_PAGE_MASK) + != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK) + || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } /* if too long translation, stop generation too */ if (tcg_ctx.gen_opc_ptr >= gen_opc_end || (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) || From 64bbd372f21dac51cbbb6ba4a52fb0ecb21ca159 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Dec 2014 10:51:42 +0100 Subject: [PATCH 33/47] pc: add 2.3 machine types The next patch will differentiate them. Reviewed-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- hw/i386/pc_piix.c | 29 +++++++++++++++++++++++++---- hw/i386/pc_q35.c | 26 +++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 741dffd5f3..ea75f1cdab 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -303,9 +303,15 @@ static void pc_init_pci(MachineState *machine) pc_init1(machine, 1, 1); } +static void pc_compat_2_2(MachineState *machine) +{ +} + static void pc_compat_2_1(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); + + pc_compat_2_2(machine); smbios_uuid_encoded = false; x86_cpu_compat_set_features("coreduo", FEAT_1_ECX, CPUID_EXT_VMX, 0); x86_cpu_compat_set_features("core2duo", FEAT_1_ECX, CPUID_EXT_VMX, 0); @@ -380,6 +386,12 @@ static void pc_compat_1_2(MachineState *machine) x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, KVM_FEATURE_PV_EOI); } +static void pc_init_pci_2_2(MachineState *machine) +{ + pc_compat_2_2(machine); + pc_init_pci(machine); +} + static void pc_init_pci_2_1(MachineState *machine) { pc_compat_2_1(machine); @@ -473,19 +485,27 @@ static void pc_xen_hvm_init(MachineState *machine) .desc = "Standard PC (i440FX + PIIX, 1996)", \ .hot_add_cpu = pc_hot_add_cpu -#define PC_I440FX_2_2_MACHINE_OPTIONS \ +#define PC_I440FX_2_3_MACHINE_OPTIONS \ PC_I440FX_MACHINE_OPTIONS, \ .default_machine_opts = "firmware=bios-256k.bin", \ .default_display = "std" -static QEMUMachine pc_i440fx_machine_v2_2 = { - PC_I440FX_2_2_MACHINE_OPTIONS, - .name = "pc-i440fx-2.2", +static QEMUMachine pc_i440fx_machine_v2_3 = { + PC_I440FX_2_3_MACHINE_OPTIONS, + .name = "pc-i440fx-2.3", .alias = "pc", .init = pc_init_pci, .is_default = 1, }; +#define PC_I440FX_2_2_MACHINE_OPTIONS PC_I440FX_2_3_MACHINE_OPTIONS + +static QEMUMachine pc_i440fx_machine_v2_2 = { + PC_I440FX_2_2_MACHINE_OPTIONS, + .name = "pc-i440fx-2.2", + .init = pc_init_pci_2_2, +}; + #define PC_I440FX_2_1_MACHINE_OPTIONS \ PC_I440FX_MACHINE_OPTIONS, \ .default_machine_opts = "firmware=bios-256k.bin" @@ -923,6 +943,7 @@ static QEMUMachine xenfv_machine = { static void pc_machine_init(void) { + qemu_register_pc_machine(&pc_i440fx_machine_v2_3); qemu_register_pc_machine(&pc_i440fx_machine_v2_2); qemu_register_pc_machine(&pc_i440fx_machine_v2_1); qemu_register_pc_machine(&pc_i440fx_machine_v2_0); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index e9ba1a2735..a40b93943a 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -282,10 +282,15 @@ static void pc_q35_init(MachineState *machine) } } +static void pc_compat_2_2(MachineState *machine) +{ +} + static void pc_compat_2_1(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); + pc_compat_2_2(machine); pcms->enforce_aligned_dimm = false; smbios_uuid_encoded = false; x86_cpu_compat_set_features("coreduo", FEAT_1_ECX, CPUID_EXT_VMX, 0); @@ -329,6 +334,12 @@ static void pc_compat_1_4(MachineState *machine) x86_cpu_compat_set_features("Westmere", FEAT_1_ECX, 0, CPUID_EXT_PCLMULQDQ); } +static void pc_q35_init_2_2(MachineState *machine) +{ + pc_compat_2_2(machine); + pc_q35_init(machine); +} + static void pc_q35_init_2_1(MachineState *machine) { pc_compat_2_1(machine); @@ -372,16 +383,24 @@ static void pc_q35_init_1_4(MachineState *machine) .hot_add_cpu = pc_hot_add_cpu, \ .units_per_default_bus = 1 -#define PC_Q35_2_2_MACHINE_OPTIONS \ +#define PC_Q35_2_3_MACHINE_OPTIONS \ PC_Q35_MACHINE_OPTIONS, \ .default_machine_opts = "firmware=bios-256k.bin", \ .default_display = "std" +static QEMUMachine pc_q35_machine_v2_3 = { + PC_Q35_2_3_MACHINE_OPTIONS, + .name = "pc-q35-2.3", + .alias = "q35", + .init = pc_q35_init, +}; + +#define PC_Q35_2_2_MACHINE_OPTIONS PC_Q35_2_3_MACHINE_OPTIONS + static QEMUMachine pc_q35_machine_v2_2 = { PC_Q35_2_2_MACHINE_OPTIONS, .name = "pc-q35-2.2", - .alias = "q35", - .init = pc_q35_init, + .init = pc_q35_init_2_2, }; #define PC_Q35_2_1_MACHINE_OPTIONS \ @@ -460,6 +479,7 @@ static QEMUMachine pc_q35_machine_v1_4 = { static void pc_q35_machine_init(void) { + qemu_register_pc_machine(&pc_q35_machine_v2_3); qemu_register_pc_machine(&pc_q35_machine_v2_2); qemu_register_pc_machine(&pc_q35_machine_v2_1); qemu_register_pc_machine(&pc_q35_machine_v2_0); From b3a4f0b1a072a467d003755ca0e55c5be38387cb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Dec 2014 14:12:41 -0200 Subject: [PATCH 34/47] target-i386: add VME to all CPUs vm86 mode extensions date back to the 486. All models should have them. Signed-off-by: Paolo Bonzini Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- hw/i386/pc_piix.c | 14 ++++++++++++++ hw/i386/pc_q35.c | 14 ++++++++++++++ target-i386/cpu.c | 30 +++++++++++++++--------------- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index ea75f1cdab..548d99a0fa 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -305,6 +305,20 @@ static void pc_init_pci(MachineState *machine) static void pc_compat_2_2(MachineState *machine) { + x86_cpu_compat_set_features("kvm64", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("kvm32", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Conroe", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Penryn", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Nehalem", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Westmere", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("SandyBridge", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Broadwell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G1", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G2", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G3", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G4", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G5", FEAT_1_EDX, 0, CPUID_VME); } static void pc_compat_2_1(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index a40b93943a..1105ef430f 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -284,6 +284,20 @@ static void pc_q35_init(MachineState *machine) static void pc_compat_2_2(MachineState *machine) { + x86_cpu_compat_set_features("kvm64", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("kvm32", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Conroe", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Penryn", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Nehalem", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Westmere", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("SandyBridge", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Broadwell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G1", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G2", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G3", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G4", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G5", FEAT_1_EDX, 0, CPUID_VME); } static void pc_compat_2_1(MachineState *machine) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index ef7d71c778..136c4570cd 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -760,9 +760,9 @@ static X86CPUDefinition builtin_x86_defs[] = { .family = 15, .model = 6, .stepping = 1, - /* Missing: CPUID_VME, CPUID_HT */ + /* Missing: CPUID_HT */ .features[FEAT_1_EDX] = - PPRO_FEATURES | + PPRO_FEATURES | CPUID_VME | CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_PSE36, /* Missing: CPUID_EXT_POPCNT, CPUID_EXT_MONITOR */ @@ -802,7 +802,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 6, .stepping = 1, .features[FEAT_1_EDX] = - PPRO_FEATURES | + PPRO_FEATURES | CPUID_VME | CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_PSE36, .features[FEAT_1_ECX] = CPUID_EXT_SSE3, @@ -928,7 +928,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 15, .stepping = 3, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -950,7 +950,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 23, .stepping = 3, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -973,7 +973,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 26, .stepping = 3, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -996,7 +996,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 44, .stepping = 1, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1020,7 +1020,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 42, .stepping = 1, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1049,7 +1049,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 60, .stepping = 1, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1084,7 +1084,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 61, .stepping = 2, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1120,7 +1120,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 6, .stepping = 1, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1145,7 +1145,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 6, .stepping = 1, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1173,7 +1173,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 6, .stepping = 1, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1203,7 +1203,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 1, .stepping = 2, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | @@ -1238,7 +1238,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .model = 2, .stepping = 0, .features[FEAT_1_EDX] = - CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | From 78a611f1936b3eac8ed78a2be2146a742a85212c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Dec 2014 10:52:46 +0100 Subject: [PATCH 35/47] target-i386: add f16c and rdrand to Haswell and Broadwell Both were added in Ivy Bridge (for which we do not have a CPU model yet!). Reviewed-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- hw/i386/pc_piix.c | 4 ++++ hw/i386/pc_q35.c | 4 ++++ target-i386/cpu.c | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 548d99a0fa..7647e34528 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -319,6 +319,10 @@ static void pc_compat_2_2(MachineState *machine) x86_cpu_compat_set_features("Opteron_G3", FEAT_1_EDX, 0, CPUID_VME); x86_cpu_compat_set_features("Opteron_G4", FEAT_1_EDX, 0, CPUID_VME); x86_cpu_compat_set_features("Opteron_G5", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); } static void pc_compat_2_1(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 1105ef430f..858e82889d 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -298,6 +298,10 @@ static void pc_compat_2_2(MachineState *machine) x86_cpu_compat_set_features("Opteron_G3", FEAT_1_EDX, 0, CPUID_VME); x86_cpu_compat_set_features("Opteron_G4", FEAT_1_EDX, 0, CPUID_VME); x86_cpu_compat_set_features("Opteron_G5", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); } static void pc_compat_2_1(MachineState *machine) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 136c4570cd..b2bb9a44ca 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -1060,7 +1060,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID, + CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, @@ -1095,7 +1095,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID, + CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, From 2f9ac42acf4602453d5839221df6cc7cabc3355e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Dec 2014 10:55:23 +0100 Subject: [PATCH 36/47] target-i386: add Ivy Bridge CPU model Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index b2bb9a44ca..b81ac5cda1 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -1041,6 +1041,38 @@ static X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x8000000A, .model_id = "Intel Xeon E312xx (Sandy Bridge)", }, + { + .name = "IvyBridge", + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 58, + .stepping = 9, + .features[FEAT_1_EDX] = + CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | + CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | + CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | + CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | + CPUID_DE | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | + CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_POPCNT | + CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3 | CPUID_EXT_F16C | CPUID_EXT_RDRAND, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_ERMS, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT, + .xlevel = 0x8000000A, + .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)", + }, { .name = "Haswell", .level = 0xd, From 575a6f4082c45778b93032ef1e7fbea4467b3a2a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Dec 2014 16:56:46 +0100 Subject: [PATCH 37/47] kvm/apic: fix 2.2->2.1 migration The wait_for_sipi field is set back to 1 after an INIT, so it was not effective to reset it in kvm_apic_realize. Introduce a reset callback and reset wait_for_sipi there. Reported-by: Igor Mammedov Cc: qemu-stable@nongnu.org Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Paolo Bonzini --- hw/i386/kvm/apic.c | 10 +++++++--- hw/intc/apic_common.c | 5 +++++ include/hw/i386/apic_internal.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 271e97f86f..5b470562a6 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -171,13 +171,16 @@ static const MemoryRegionOps kvm_apic_io_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; +static void kvm_apic_reset(APICCommonState *s) +{ + /* Not used by KVM, which uses the CPU mp_state instead. */ + s->wait_for_sipi = 0; +} + static void kvm_apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC_COMMON(dev); - /* Not used by KVM, which uses the CPU mp_state instead. */ - s->wait_for_sipi = 0; - memory_region_init_io(&s->io_memory, NULL, &kvm_apic_io_ops, s, "kvm-apic-msi", APIC_SPACE_SIZE); @@ -191,6 +194,7 @@ static void kvm_apic_class_init(ObjectClass *klass, void *data) APICCommonClass *k = APIC_COMMON_CLASS(klass); k->realize = kvm_apic_realize; + k->reset = kvm_apic_reset; k->set_base = kvm_apic_set_base; k->set_tpr = kvm_apic_set_tpr; k->get_tpr = kvm_apic_get_tpr; diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 4e62f25edb..d9bb188c15 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -178,6 +178,7 @@ bool apic_next_timer(APICCommonState *s, int64_t current_time) void apic_init_reset(DeviceState *dev) { APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info = APIC_COMMON_GET_CLASS(s); int i; if (!s) { @@ -206,6 +207,10 @@ void apic_init_reset(DeviceState *dev) timer_del(s->timer); } s->timer_expiry = -1; + + if (info->reset) { + info->reset(s); + } } void apic_designate_bsp(DeviceState *dev) diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index 83e2a42cc1..dc7a89d988 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -89,6 +89,7 @@ typedef struct APICCommonClass void (*external_nmi)(APICCommonState *s); void (*pre_save)(APICCommonState *s); void (*post_load)(APICCommonState *s); + void (*reset)(APICCommonState *s); } APICCommonClass; struct APICCommonState { From 269e2358492b674c50160553d037702e916b9f1b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Dec 2014 02:17:03 +0100 Subject: [PATCH 38/47] linuxboot: fix loading old kernels Old kernels that used high memory only allowed the initrd to be in the first 896MB of memory. If you load the initrd above, they complain that "initrd extends beyond end of memory". In order to fix this, while not breaking machines with small amounts of memory fixed by cdebec5 (linuxboot: compute initrd loading address, 2014-10-06), we need to distinguish two cases. If pc.c placed the initrd at end of memory, use the new algorithm based on the e801 memory map. If instead pc.c placed the initrd at the maximum address specified by the bzImage, leave it there. The only interesting part is that the low-memory info block is now loaded very early, in real mode, and thus the 32-bit address has to be converted into a real mode segment. The initrd address is also patched in the info block before entering real mode, it is simpler that way. This fixes booting the RHEL4.8 32-bit installation image with 1GB of RAM. Cc: qemu-stable@nongnu.org Cc: mst@redhat.com Cc: jsnow@redhat.com Signed-off-by: Paolo Bonzini --- pc-bios/linuxboot.bin | Bin 1024 -> 1024 bytes pc-bios/optionrom/linuxboot.S | 37 +++++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/pc-bios/linuxboot.bin b/pc-bios/linuxboot.bin index 130103fb739228a6869aaf1b174b9d20c13378fc..923d1796fbc58f1a836c160be533d1cf697b7511 100644 GIT binary patch delta 142 zcmZqRXyBNj#iGKj$vjbStpET3 delta 89 zcmV-f0H*(d2!IHXDg?v=$pVossgsWZS(EMo83L>VlL-PJ0%li}!2uqVMgp}0xE7KA v9R$Z_iNlfnG8kuOkAoNj003sV000004= 0x203 + jae 1f // have initrd_max + movl $0x37ffffff, %es:0x22c // else assume 0x37ffffff +1: + + /* Check if using kernel-specified initrd address */ + read_fw FW_CFG_INITRD_ADDR + mov %eax, %edi // (load_kernel wants it in %edi) + read_fw FW_CFG_INITRD_SIZE // find end of initrd + add %edi, %eax + xor %es:0x22c, %eax // if it matches es:0x22c + and $-4096, %eax // (apart from padding for page) + jz load_kernel // then initrd is not at top + // of memory + + /* pc.c placed the initrd at end of memory. Compute a better + * initrd address based on e801 data. + */ mov $0xe801, %ax xor %cx, %cx xor %dx, %dx @@ -107,7 +131,9 @@ copy_kernel: read_fw FW_CFG_INITRD_SIZE subl %eax, %edi andl $-4096, %edi /* EDI = start of initrd */ + movl %edi, %es:0x218 /* put it in the header */ +load_kernel: /* We need to load the kernel into memory we can't access in 16 bit mode, so let's get into 32 bit mode, write the kernel and jump back again. */ @@ -139,19 +165,10 @@ copy_kernel: /* We're now running in 16-bit CS, but 32-bit ES! */ /* Load kernel and initrd */ - pushl %edi read_fw_blob_addr32_edi(FW_CFG_INITRD) read_fw_blob_addr32(FW_CFG_KERNEL) read_fw_blob_addr32(FW_CFG_CMDLINE) - read_fw FW_CFG_SETUP_ADDR - mov %eax, %edi - mov %eax, %ebx - read_fw_blob_addr32_edi(FW_CFG_SETUP) - - /* Update the header with the initrd address we chose above */ - popl %es:0x218(%ebx) - /* And now jump into Linux! */ mov $0, %eax mov %eax, %cr0 From 4e02b0fcf5c97579d0d3261c80c65abcf92870fe Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Dec 2014 10:17:08 +0100 Subject: [PATCH 39/47] serial: reset thri_pending on IER writes with THRI=0 This is responsible for failure of migration from 2.2 to 2.1, because thr_ipending is always one in practice. serial.c is setting thr_ipending unconditionally. However, thr_ipending is not used at all if THRI=0, and it will be overwritten again the next time THRE or THRI changes. For that reason, we can set thr_ipending to zero every time THRI is reset. There is disagreement on whether LSR.THRE should be resampled when IER.THRI goes from 1 to 1. This patch does not touch the code, leaving that for QEMU 2.3+. This has no semantic change and is enough to fix migration in the common case where the interrupt is not pending or is reported in IIR. It does not change the migration format, so 2.2.0 -> 2.1 will remain broken but we can fix 2.2.1 -> 2.1 without breaking 2.2.1 <-> 2.2.0. The case that remains broken (the one in which the subsection is strictly necessary) is when THRE=1, the THRI interrupt has *not* been acknowledged yet, and a higher-priority interrupt comes. In this case, you need the subsection to tell the source that the lower-priority THRI interrupt is pending. The subsection's breakage of migration, in this case, prevents continuing the VM on the destination with an invalid state. Cc: qemu-stable@nongnu.org Reported-by: Igor Mammedov Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Paolo Bonzini --- hw/char/serial.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/hw/char/serial.c b/hw/char/serial.c index ebcacdc872..8c42d03faf 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -350,10 +350,24 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val, s->poll_msl = 0; } } - if (s->lsr & UART_LSR_THRE) { + + /* Turning on the THRE interrupt on IER can trigger the interrupt + * if LSR.THRE=1, even if it had been masked before by reading IIR. + * This is not in the datasheet, but Windows relies on it. It is + * unclear if THRE has to be resampled every time THRI becomes + * 1, or only on the rising edge. Bochs does the latter, and Windows + * always toggles IER to all zeroes and back to all ones. But for + * now leave it as it has always been in QEMU. + * + * If IER.THRI is zero, thr_ipending is not used. Set it to zero + * so that the thr_ipending subsection is not migrated. + */ + if ((s->ier & UART_IER_THRI) && (s->lsr & UART_LSR_THRE)) { s->thr_ipending = 1; - serial_update_irq(s); + } else { + s->thr_ipending = 0; } + serial_update_irq(s); } break; case 2: From 0d931d706266d6ada3bf22d3afca1afdc8d12fa9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Dec 2014 17:01:39 +0100 Subject: [PATCH 40/47] serial: clean up THRE/TEMT handling - assert TEMT is cleared before sending a character; we'll get one from TSR if tsr_retry > 0, from the FIFO or THR otherwise - assert THRE cleared and FIFO not empty (if enabled) before fetching a character to send. This effectively reverts dffacd46, but the check makes no sense and commit f702e62 (serial: change retry logic to avoid concurrency, 2014-07-11) must have made it unnecessary. The commit message for f702e62 talks about multiple calls to qemu_chr_fe_add_watch triggering s->tsr_retry >= MAX_XMIT_RETRY, but other failures were possible. For example, if you have multiple calls, the subsequent ones will see s->tsr_retry == 0 and will find THRE and/or TEMT on entry. - for clarity, raise THRI immediately after the code sets THRE - check THRE to see if another character has to be sent. This makes the assertions more obvious and also means TEMT has to be set as soon as the loop ends. It makes the loop send both TSR and THR if flow-control happens in non-FIFO mode. Previously, THR would be lost. - clear TEMT together with THRE even in the non-FIFO case The last two items are bugfixes, but they were just found by inspection and do not squash known bugs. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Paolo Bonzini --- hw/char/serial.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/hw/char/serial.c b/hw/char/serial.c index 8c42d03faf..9adb12667d 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -224,21 +224,23 @@ static gboolean serial_xmit(GIOChannel *chan, GIOCondition cond, void *opaque) SerialState *s = opaque; do { + assert(!(s->lsr & UART_LSR_TEMT)); if (s->tsr_retry <= 0) { + assert(!(s->lsr & UART_LSR_THRE)); + if (s->fcr & UART_FCR_FE) { - if (fifo8_is_empty(&s->xmit_fifo)) { - return FALSE; - } + assert(!fifo8_is_empty(&s->xmit_fifo)); s->tsr = fifo8_pop(&s->xmit_fifo); if (!s->xmit_fifo.num) { s->lsr |= UART_LSR_THRE; } - } else if ((s->lsr & UART_LSR_THRE)) { - return FALSE; } else { s->tsr = s->thr; s->lsr |= UART_LSR_THRE; - s->lsr &= ~UART_LSR_TEMT; + } + if ((s->lsr & UART_LSR_THRE) && !s->thr_ipending) { + s->thr_ipending = 1; + serial_update_irq(s); } } @@ -256,17 +258,13 @@ static gboolean serial_xmit(GIOChannel *chan, GIOCondition cond, void *opaque) } else { s->tsr_retry = 0; } + /* Transmit another byte if it is already available. It is only possible when FIFO is enabled and not empty. */ - } while ((s->fcr & UART_FCR_FE) && !fifo8_is_empty(&s->xmit_fifo)); + } while (!(s->lsr & UART_LSR_THRE)); s->last_xmit_ts = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - - if (s->lsr & UART_LSR_THRE) { - s->lsr |= UART_LSR_TEMT; - s->thr_ipending = 1; - serial_update_irq(s); - } + s->lsr |= UART_LSR_TEMT; return FALSE; } @@ -323,10 +321,10 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val, fifo8_pop(&s->xmit_fifo); } fifo8_push(&s->xmit_fifo, s->thr); - s->lsr &= ~UART_LSR_TEMT; } s->thr_ipending = 0; s->lsr &= ~UART_LSR_THRE; + s->lsr &= ~UART_LSR_TEMT; serial_update_irq(s); if (s->tsr_retry <= 0) { serial_xmit(NULL, G_IO_OUT, s); From 023c3a9707d0d9259a1e858cdf7804dd10973fca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Dec 2014 19:08:14 +0100 Subject: [PATCH 41/47] serial: update LSR on enabling/disabling FIFOs When the transmit FIFO is emptied or enabled, the transmitter hold register is empty. When it is disabled, it is also emptied and in addition the previous contents of the transmitter hold register are discarded. In either case, the THRE bit in LSR must be set and THRI raised. When the receive FIFO is emptied or enabled, the data ready and break bits must be cleared in LSR. Likewise when the receive FIFO is disabled. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Paolo Bonzini --- hw/char/serial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/char/serial.c b/hw/char/serial.c index 9adb12667d..76054a542a 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -377,12 +377,15 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val, /* FIFO clear */ if (val & UART_FCR_RFR) { + s->lsr &= ~(UART_LSR_DR | UART_LSR_BI); timer_del(s->fifo_timeout_timer); s->timeout_ipending = 0; fifo8_reset(&s->recv_fifo); } if (val & UART_FCR_XFR) { + s->lsr |= UART_LSR_THRE; + s->thr_ipending = 1; fifo8_reset(&s->xmit_fifo); } From 1645b8eee558ffe2389a081bf61d08a864c36d2c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Dec 2014 11:54:42 +0100 Subject: [PATCH 42/47] serial: only resample THR interrupt on rising edge of IER.THRI There is disagreement on whether LSR.THRE should be resampled when IER.THRI goes from 1 to 1. Bochs only does it if IER.THRI goes from 0 to 1; PCE does it even if IER.THRI is unchanged. But the Windows driver seems to always go from 1 to 0 and back to 1, so do things in agreement with Bochs, because the handling of thr_ipending was reported in 2010 (https://lists.gnu.org/archive/html/qemu-devel/2010-03/msg01914.html) as breaking DR-DOS Plus. Reported-by: Roy Tam Signed-off-by: Paolo Bonzini --- hw/char/serial.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/hw/char/serial.c b/hw/char/serial.c index 76054a542a..6d522ff4f3 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -336,10 +336,12 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val, s->divider = (s->divider & 0x00ff) | (val << 8); serial_update_parameters(s); } else { + uint8_t changed = (s->ier ^ val) & 0x0f; s->ier = val & 0x0f; /* If the backend device is a real serial port, turn polling of the modem - status lines on physical port on or off depending on UART_IER_MSI state */ - if (s->poll_msl >= 0) { + * status lines on physical port on or off depending on UART_IER_MSI state. + */ + if ((changed & UART_IER_MSI) && s->poll_msl >= 0) { if (s->ier & UART_IER_MSI) { s->poll_msl = 1; serial_update_msl(s); @@ -354,18 +356,23 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val, * This is not in the datasheet, but Windows relies on it. It is * unclear if THRE has to be resampled every time THRI becomes * 1, or only on the rising edge. Bochs does the latter, and Windows - * always toggles IER to all zeroes and back to all ones. But for - * now leave it as it has always been in QEMU. + * always toggles IER to all zeroes and back to all ones, so do the + * same. * * If IER.THRI is zero, thr_ipending is not used. Set it to zero * so that the thr_ipending subsection is not migrated. */ - if ((s->ier & UART_IER_THRI) && (s->lsr & UART_LSR_THRE)) { - s->thr_ipending = 1; - } else { - s->thr_ipending = 0; + if (changed & UART_IER_THRI) { + if ((s->ier & UART_IER_THRI) && (s->lsr & UART_LSR_THRE)) { + s->thr_ipending = 1; + } else { + s->thr_ipending = 0; + } + } + + if (changed) { + serial_update_irq(s); } - serial_update_irq(s); } break; case 2: From c7ff8daacf4a669f9ab5a975f33a51ef901b7556 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Mon, 8 Dec 2014 18:10:34 -0500 Subject: [PATCH 43/47] sdhci: Set a default frequency clock The Linux SDHCI PCI driver will only register the device if there is a clock frequency set. So, set a default frequency of 52Mhz. Signed-off-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- hw/sd/sdhci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index b38005003c..9ef99b67d5 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -74,10 +74,10 @@ #define SDHC_CAPAB_MAXBLOCKLENGTH 512ul /* Maximum clock frequency for SDclock in MHz * value in range 10-63 MHz, 0 - not defined */ -#define SDHC_CAPAB_BASECLKFREQ 0ul +#define SDHC_CAPAB_BASECLKFREQ 52ul #define SDHC_CAPAB_TOUNIT 1ul /* Timeout clock unit 0 - kHz, 1 - MHz */ /* Timeout clock frequency 1-63, 0 - not defined */ -#define SDHC_CAPAB_TOCLKFREQ 0ul +#define SDHC_CAPAB_TOCLKFREQ 52ul /* Now check all parameters and calculate CAPABILITIES REGISTER value */ #if SDHC_CAPAB_64BITBUS > 1 || SDHC_CAPAB_18V > 1 || SDHC_CAPAB_30V > 1 || \ From d368ba4376b2c1c24175c74b3733b8fe64dbe8a6 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Mon, 8 Dec 2014 18:10:30 -0500 Subject: [PATCH 44/47] sdhci: Remove class "virtual" methods The SDHCIClass defines a series of class "methods". However, no code in the QEMU tree overrides these methods or even uses them outside of sdhci.c. Remove the virtual methods and replace them with direct calls to the underlying functions. This simplifies the process of extending the sdhci code to support PCI devices (which have a different parent class). Signed-off-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- hw/sd/sdhci.c | 122 +++++++++++++++++--------------------------------- hw/sd/sdhci.h | 24 ---------- 2 files changed, 40 insertions(+), 106 deletions(-) diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 9ef99b67d5..f834ea1a4a 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -198,12 +198,7 @@ static void sdhci_reset(SDHCIState *s) s->stopped_state = sdhc_not_stopped; } -static void sdhci_do_data_transfer(void *opaque) -{ - SDHCIState *s = (SDHCIState *)opaque; - - SDHCI_GET_CLASS(s)->data_transfer(s); -} +static void sdhci_data_transfer(void *opaque); static void sdhci_send_command(SDHCIState *s) { @@ -261,7 +256,7 @@ static void sdhci_send_command(SDHCIState *s) if (s->blksize && (s->cmdreg & SDHC_CMD_DATA_PRESENT)) { s->data_count = 0; - sdhci_do_data_transfer(s); + sdhci_data_transfer(s); } } @@ -367,9 +362,9 @@ static uint32_t sdhci_read_dataport(SDHCIState *s, unsigned size) /* stop at gap request */ (s->stopped_state == sdhc_gap_read && !(s->prnsts & SDHC_DAT_LINE_ACTIVE))) { - SDHCI_GET_CLASS(s)->end_data_transfer(s); + sdhci_end_transfer(s); } else { /* if there are more data, read next block from card */ - SDHCI_GET_CLASS(s)->read_block_from_card(s); + sdhci_read_block_from_card(s); } break; } @@ -410,7 +405,7 @@ static void sdhci_write_block_to_card(SDHCIState *s) if ((s->trnmod & SDHC_TRNS_MULTI) == 0 || ((s->trnmod & SDHC_TRNS_MULTI) && (s->trnmod & SDHC_TRNS_BLK_CNT_EN) && (s->blkcnt == 0))) { - SDHCI_GET_CLASS(s)->end_data_transfer(s); + sdhci_end_transfer(s); } else if (s->norintstsen & SDHC_NISEN_WBUFRDY) { s->norintsts |= SDHC_NIS_WBUFRDY; } @@ -422,7 +417,7 @@ static void sdhci_write_block_to_card(SDHCIState *s) if (s->norintstsen & SDHC_EISEN_BLKGAP) { s->norintsts |= SDHC_EIS_BLKGAP; } - SDHCI_GET_CLASS(s)->end_data_transfer(s); + sdhci_end_transfer(s); } sdhci_update_irq(s); @@ -450,7 +445,7 @@ static void sdhci_write_dataport(SDHCIState *s, uint32_t value, unsigned size) s->data_count = 0; s->prnsts &= ~SDHC_SPACE_AVAILABLE; if (s->prnsts & SDHC_DOING_WRITE) { - SDHCI_GET_CLASS(s)->write_block_to_card(s); + sdhci_write_block_to_card(s); } } } @@ -537,7 +532,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) } if (s->blkcnt == 0) { - SDHCI_GET_CLASS(s)->end_data_transfer(s); + sdhci_end_transfer(s); } else { if (s->norintstsen & SDHC_NISEN_DMA) { s->norintsts |= SDHC_NIS_DMA; @@ -571,7 +566,7 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s) s->blkcnt--; } - SDHCI_GET_CLASS(s)->end_data_transfer(s); + sdhci_end_transfer(s); } typedef struct ADMADescr { @@ -758,7 +753,7 @@ static void sdhci_do_adma(SDHCIState *s) sdhci_update_irq(s); } - SDHCI_GET_CLASS(s)->end_data_transfer(s); + sdhci_end_transfer(s); return; } @@ -771,9 +766,9 @@ static void sdhci_do_adma(SDHCIState *s) /* Perform data transfer according to controller configuration */ -static void sdhci_data_transfer(SDHCIState *s) +static void sdhci_data_transfer(void *opaque) { - SDHCIClass *k = SDHCI_GET_CLASS(s); + SDHCIState *s = (SDHCIState *)opaque; if (s->trnmod & SDHC_TRNS_DMA) { switch (SDHC_DMA_TYPE(s->hostctl)) { @@ -784,9 +779,9 @@ static void sdhci_data_transfer(SDHCIState *s) } if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) { - k->do_sdma_single(s); + sdhci_sdma_transfer_single_block(s); } else { - k->do_sdma_multi(s); + sdhci_sdma_transfer_multi_blocks(s); } break; @@ -796,7 +791,7 @@ static void sdhci_data_transfer(SDHCIState *s) break; } - k->do_adma(s); + sdhci_do_adma(s); break; case SDHC_CTRL_ADMA2_32: if (!(s->capareg & SDHC_CAN_DO_ADMA2)) { @@ -804,7 +799,7 @@ static void sdhci_data_transfer(SDHCIState *s) break; } - k->do_adma(s); + sdhci_do_adma(s); break; case SDHC_CTRL_ADMA2_64: if (!(s->capareg & SDHC_CAN_DO_ADMA2) || @@ -813,7 +808,7 @@ static void sdhci_data_transfer(SDHCIState *s) break; } - k->do_adma(s); + sdhci_do_adma(s); break; default: ERRPRINT("Unsupported DMA type\n"); @@ -823,11 +818,11 @@ static void sdhci_data_transfer(SDHCIState *s) if ((s->trnmod & SDHC_TRNS_READ) && sd_data_ready(s->card)) { s->prnsts |= SDHC_DOING_READ | SDHC_DATA_INHIBIT | SDHC_DAT_LINE_ACTIVE; - SDHCI_GET_CLASS(s)->read_block_from_card(s); + sdhci_read_block_from_card(s); } else { s->prnsts |= SDHC_DOING_WRITE | SDHC_DAT_LINE_ACTIVE | SDHC_SPACE_AVAILABLE | SDHC_DATA_INHIBIT; - SDHCI_GET_CLASS(s)->write_block_to_card(s); + sdhci_write_block_to_card(s); } } } @@ -858,8 +853,9 @@ sdhci_buff_access_is_sequential(SDHCIState *s, unsigned byte_num) return true; } -static uint32_t sdhci_read(SDHCIState *s, unsigned int offset, unsigned size) +static uint64_t sdhci_read(void *opaque, hwaddr offset, unsigned size) { + SDHCIState *s = (SDHCIState *)opaque; uint32_t ret = 0; switch (offset & ~0x3) { @@ -880,8 +876,8 @@ static uint32_t sdhci_read(SDHCIState *s, unsigned int offset, unsigned size) break; case SDHC_BDATA: if (sdhci_buff_access_is_sequential(s, offset - SDHC_BDATA)) { - ret = SDHCI_GET_CLASS(s)->bdata_read(s, size); - DPRINT_L2("read %ub: addr[0x%04x] -> %u(0x%x)\n", size, offset, + ret = sdhci_read_dataport(s, size); + DPRINT_L2("read %ub: addr[0x%04x] -> %u(0x%x)\n", size, (int)offset, ret, ret); return ret; } @@ -927,13 +923,13 @@ static uint32_t sdhci_read(SDHCIState *s, unsigned int offset, unsigned size) ret = (SD_HOST_SPECv2_VERS << 16) | sdhci_slotint(s); break; default: - ERRPRINT("bad %ub read: addr[0x%04x]\n", size, offset); + ERRPRINT("bad %ub read: addr[0x%04x]\n", size, (int)offset); break; } ret >>= (offset & 0x3) * 8; ret &= (1ULL << (size * 8)) - 1; - DPRINT_L2("read %ub: addr[0x%04x] -> %u(0x%x)\n", size, offset, ret, ret); + DPRINT_L2("read %ub: addr[0x%04x] -> %u(0x%x)\n", size, (int)offset, ret, ret); return ret; } @@ -948,10 +944,10 @@ static inline void sdhci_blkgap_write(SDHCIState *s, uint8_t value) (s->blkgap & SDHC_STOP_AT_GAP_REQ) == 0) { if (s->stopped_state == sdhc_gap_read) { s->prnsts |= SDHC_DAT_LINE_ACTIVE | SDHC_DOING_READ; - SDHCI_GET_CLASS(s)->read_block_from_card(s); + sdhci_read_block_from_card(s); } else { s->prnsts |= SDHC_DAT_LINE_ACTIVE | SDHC_DOING_WRITE; - SDHCI_GET_CLASS(s)->write_block_to_card(s); + sdhci_write_block_to_card(s); } s->stopped_state = sdhc_not_stopped; } else if (!s->stopped_state && (value & SDHC_STOP_AT_GAP_REQ)) { @@ -967,7 +963,7 @@ static inline void sdhci_reset_write(SDHCIState *s, uint8_t value) { switch (value) { case SDHC_RESET_ALL: - DEVICE_GET_CLASS(s)->reset(DEVICE(s)); + sdhci_reset(s); break; case SDHC_RESET_CMD: s->prnsts &= ~SDHC_CMD_INHIBIT; @@ -987,10 +983,12 @@ static inline void sdhci_reset_write(SDHCIState *s, uint8_t value) } static void -sdhci_write(SDHCIState *s, unsigned int offset, uint32_t value, unsigned size) +sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { + SDHCIState *s = (SDHCIState *)opaque; unsigned shift = 8 * (offset & 0x3); uint32_t mask = ~(((1ULL << (size * 8)) - 1) << shift); + uint32_t value = val; value <<= shift; switch (offset & ~0x3) { @@ -1000,7 +998,7 @@ sdhci_write(SDHCIState *s, unsigned int offset, uint32_t value, unsigned size) /* Writing to last byte of sdmasysad might trigger transfer */ if (!(mask & 0xFF000000) && TRANSFERRING_DATA(s->prnsts) && s->blkcnt && s->blksize && SDHC_DMA_TYPE(s->hostctl) == SDHC_CTRL_SDMA) { - SDHCI_GET_CLASS(s)->do_sdma_multi(s); + sdhci_sdma_transfer_multi_blocks(s); } break; case SDHC_BLKSIZE: @@ -1022,15 +1020,15 @@ sdhci_write(SDHCIState *s, unsigned int offset, uint32_t value, unsigned size) MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16); /* Writing to the upper byte of CMDREG triggers SD command generation */ - if ((mask & 0xFF000000) || !SDHCI_GET_CLASS(s)->can_issue_command(s)) { + if ((mask & 0xFF000000) || !sdhci_can_issue_command(s)) { break; } - SDHCI_GET_CLASS(s)->send_command(s); + sdhci_send_command(s); break; case SDHC_BDATA: if (sdhci_buff_access_is_sequential(s, offset - SDHC_BDATA)) { - SDHCI_GET_CLASS(s)->bdata_write(s, value >> shift, size); + sdhci_write_dataport(s, value >> shift, size); } break; case SDHC_HOSTCTL: @@ -1111,32 +1109,16 @@ sdhci_write(SDHCIState *s, unsigned int offset, uint32_t value, unsigned size) break; default: ERRPRINT("bad %ub write offset: addr[0x%04x] <- %u(0x%x)\n", - size, offset, value >> shift, value >> shift); + size, (int)offset, value >> shift, value >> shift); break; } DPRINT_L2("write %ub: addr[0x%04x] <- %u(0x%x)\n", - size, offset, value >> shift, value >> shift); -} - -static uint64_t -sdhci_readfn(void *opaque, hwaddr offset, unsigned size) -{ - SDHCIState *s = (SDHCIState *)opaque; - - return SDHCI_GET_CLASS(s)->mem_read(s, offset, size); -} - -static void -sdhci_writefn(void *opaque, hwaddr off, uint64_t val, unsigned sz) -{ - SDHCIState *s = (SDHCIState *)opaque; - - SDHCI_GET_CLASS(s)->mem_write(s, off, val, sz); + size, (int)offset, value >> shift, value >> shift); } static const MemoryRegionOps sdhci_mmio_ops = { - .read = sdhci_readfn, - .write = sdhci_writefn, + .read = sdhci_read, + .write = sdhci_write, .valid = { .min_access_size = 1, .max_access_size = 4, @@ -1175,7 +1157,7 @@ static void sdhci_initfn(Object *obj) sd_set_cb(s->card, s->ro_cb, s->eject_cb); s->insert_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_raise_insertion_irq, s); - s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_do_data_transfer, s); + s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_data_transfer, s); } static void sdhci_uninitfn(Object *obj) @@ -1254,36 +1236,13 @@ static void sdhci_realize(DeviceState *dev, Error ** errp) sysbus_init_mmio(sbd, &s->iomem); } -static void sdhci_generic_reset(DeviceState *ds) -{ - SDHCIState *s = SDHCI(ds); - SDHCI_GET_CLASS(s)->reset(s); -} - static void sdhci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - SDHCIClass *k = SDHCI_CLASS(klass); dc->vmsd = &sdhci_vmstate; dc->props = sdhci_properties; - dc->reset = sdhci_generic_reset; dc->realize = sdhci_realize; - - k->reset = sdhci_reset; - k->mem_read = sdhci_read; - k->mem_write = sdhci_write; - k->send_command = sdhci_send_command; - k->can_issue_command = sdhci_can_issue_command; - k->data_transfer = sdhci_data_transfer; - k->end_data_transfer = sdhci_end_transfer; - k->do_sdma_single = sdhci_sdma_transfer_single_block; - k->do_sdma_multi = sdhci_sdma_transfer_multi_blocks; - k->do_adma = sdhci_do_adma; - k->read_block_from_card = sdhci_read_block_from_card; - k->write_block_to_card = sdhci_write_block_to_card; - k->bdata_read = sdhci_read_dataport; - k->bdata_write = sdhci_write_dataport; } static const TypeInfo sdhci_type_info = { @@ -1293,7 +1252,6 @@ static const TypeInfo sdhci_type_info = { .instance_init = sdhci_initfn, .instance_finalize = sdhci_uninitfn, .class_init = sdhci_class_init, - .class_size = sizeof(SDHCIClass) }; static void sdhci_register_types(void) diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci.h index a560c3c93f..9a334ac96f 100644 --- a/hw/sd/sdhci.h +++ b/hw/sd/sdhci.h @@ -279,34 +279,10 @@ typedef struct SDHCIState { /* RO Host Controller Version Register always reads as 0x2401 */ } SDHCIState; -typedef struct SDHCIClass { - SysBusDeviceClass busdev_class; - - void (*reset)(SDHCIState *s); - uint32_t (*mem_read)(SDHCIState *s, unsigned int offset, unsigned size); - void (*mem_write)(SDHCIState *s, unsigned int offset, uint32_t value, - unsigned size); - void (*send_command)(SDHCIState *s); - bool (*can_issue_command)(SDHCIState *s); - void (*data_transfer)(SDHCIState *s); - void (*end_data_transfer)(SDHCIState *s); - void (*do_sdma_single)(SDHCIState *s); - void (*do_sdma_multi)(SDHCIState *s); - void (*do_adma)(SDHCIState *s); - void (*read_block_from_card)(SDHCIState *s); - void (*write_block_to_card)(SDHCIState *s); - uint32_t (*bdata_read)(SDHCIState *s, unsigned size); - void (*bdata_write)(SDHCIState *s, uint32_t value, unsigned size); -} SDHCIClass; - extern const VMStateDescription sdhci_vmstate; #define TYPE_SDHCI "generic-sdhci" #define SDHCI(obj) \ OBJECT_CHECK(SDHCIState, (obj), TYPE_SDHCI) -#define SDHCI_CLASS(klass) \ - OBJECT_CLASS_CHECK(SDHCIClass, (klass), TYPE_SDHCI) -#define SDHCI_GET_CLASS(obj) \ - OBJECT_GET_CLASS(SDHCIClass, (obj), TYPE_SDHCI) #endif /* SDHCI_H */ From 7302dcd60bbde1b11c298feb8134a34791f21b21 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Mon, 8 Dec 2014 18:10:31 -0500 Subject: [PATCH 45/47] sdhci: Add "sysbus" to sdhci QOM types and methods Update the sdhci sysbus QOM types and methods so that sysbus is in their name. This is in preparation for adding PCI versions of these types and methods. Signed-off-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- hw/sd/sdhci.c | 39 ++++++++++++++++++++++++--------------- hw/sd/sdhci.h | 6 +++--- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index f834ea1a4a..d7ca0c057c 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -1142,9 +1142,8 @@ static inline unsigned int sdhci_get_fifolen(SDHCIState *s) } } -static void sdhci_initfn(Object *obj) +static void sdhci_initfn(SDHCIState *s) { - SDHCIState *s = SDHCI(obj); DriveInfo *di; di = drive_get_next(IF_SD); @@ -1160,10 +1159,8 @@ static void sdhci_initfn(Object *obj) s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_data_transfer, s); } -static void sdhci_uninitfn(Object *obj) +static void sdhci_uninitfn(SDHCIState *s) { - SDHCIState *s = SDHCI(obj); - timer_del(s->insert_timer); timer_free(s->insert_timer); timer_del(s->transfer_timer); @@ -1223,9 +1220,21 @@ static Property sdhci_properties[] = { DEFINE_PROP_END_OF_LIST(), }; -static void sdhci_realize(DeviceState *dev, Error ** errp) +static void sdhci_sysbus_init(Object *obj) { - SDHCIState *s = SDHCI(dev); + SDHCIState *s = SYSBUS_SDHCI(obj); + sdhci_initfn(s); +} + +static void sdhci_sysbus_finalize(Object *obj) +{ + SDHCIState *s = SYSBUS_SDHCI(obj); + sdhci_uninitfn(s); +} + +static void sdhci_sysbus_realize(DeviceState *dev, Error ** errp) +{ + SDHCIState *s = SYSBUS_SDHCI(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); s->buf_maxsz = sdhci_get_fifolen(s); @@ -1236,27 +1245,27 @@ static void sdhci_realize(DeviceState *dev, Error ** errp) sysbus_init_mmio(sbd, &s->iomem); } -static void sdhci_class_init(ObjectClass *klass, void *data) +static void sdhci_sysbus_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->vmsd = &sdhci_vmstate; dc->props = sdhci_properties; - dc->realize = sdhci_realize; + dc->realize = sdhci_sysbus_realize; } -static const TypeInfo sdhci_type_info = { - .name = TYPE_SDHCI, +static const TypeInfo sdhci_sysbus_info = { + .name = TYPE_SYSBUS_SDHCI, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(SDHCIState), - .instance_init = sdhci_initfn, - .instance_finalize = sdhci_uninitfn, - .class_init = sdhci_class_init, + .instance_init = sdhci_sysbus_init, + .instance_finalize = sdhci_sysbus_finalize, + .class_init = sdhci_sysbus_class_init, }; static void sdhci_register_types(void) { - type_register_static(&sdhci_type_info); + type_register_static(&sdhci_sysbus_info); } type_init(sdhci_register_types) diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci.h index 9a334ac96f..9fbf682935 100644 --- a/hw/sd/sdhci.h +++ b/hw/sd/sdhci.h @@ -281,8 +281,8 @@ typedef struct SDHCIState { extern const VMStateDescription sdhci_vmstate; -#define TYPE_SDHCI "generic-sdhci" -#define SDHCI(obj) \ - OBJECT_CHECK(SDHCIState, (obj), TYPE_SDHCI) +#define TYPE_SYSBUS_SDHCI "generic-sdhci" +#define SYSBUS_SDHCI(obj) \ + OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI) #endif /* SDHCI_H */ From ece5e5bfa1377546d5f94e1bb04298e48ce60c1c Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Mon, 8 Dec 2014 18:10:32 -0500 Subject: [PATCH 46/47] sdhci: Define SDHCI PCI ids Signed-off-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- docs/specs/pci-ids.txt | 2 ++ include/hw/pci/pci.h | 1 + include/hw/pci/pci_ids.h | 1 + 3 files changed, 4 insertions(+) diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt index 3c65e1a6ef..9b57d5e8fe 100644 --- a/docs/specs/pci-ids.txt +++ b/docs/specs/pci-ids.txt @@ -44,6 +44,8 @@ PCI devices (other than virtio): 1b36:0002 PCI serial port (16550A) adapter (docs/specs/pci-serial.txt) 1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt) 1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt) +1b36:0005 PCI test device (docs/specs/pci-testdev.txt) +1b36:0006 PCI SD Card Host Controller Interface (SDHCI) All these devices are documented in docs/specs. diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index c352c7b3ad..97e4257ac0 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -88,6 +88,7 @@ #define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003 #define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004 #define PCI_DEVICE_ID_REDHAT_TEST 0x0005 +#define PCI_DEVICE_ID_REDHAT_SDHCI 0x0006 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100 #define FMT_PCIBUS PRIx64 diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 321d622b78..d7be386849 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -31,6 +31,7 @@ #define PCI_CLASS_MEMORY_RAM 0x0500 +#define PCI_CLASS_SYSTEM_SDHCI 0x0805 #define PCI_CLASS_SYSTEM_OTHER 0x0880 #define PCI_CLASS_SERIAL_USB 0x0c03 From 224d10ff5aea9e74a1792fc21188bc9752c43ee9 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Mon, 8 Dec 2014 18:10:33 -0500 Subject: [PATCH 47/47] sdhci: Support SDHCI devices on PCI Support for PCI devices following the "SD Host Controller Simplified Specification Version 2.00" spec. Signed-off-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- default-configs/pci.mak | 2 ++ hw/sd/sdhci.c | 44 +++++++++++++++++++++++++++++++++++++++++ hw/sd/sdhci.h | 9 ++++++++- 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 91b1e92da5..a186c39c0e 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -30,3 +30,5 @@ CONFIG_IPACK=y CONFIG_WDT_IB6300ESB=y CONFIG_PCI_TESTDEV=y CONFIG_NVME_PCI=y +CONFIG_SD=y +CONFIG_SDHCI=y diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index d7ca0c057c..15064d3ec2 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -1220,6 +1220,49 @@ static Property sdhci_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static int sdhci_pci_init(PCIDevice *dev) +{ + SDHCIState *s = PCI_SDHCI(dev); + dev->config[PCI_CLASS_PROG] = 0x01; /* Standard Host supported DMA */ + dev->config[PCI_INTERRUPT_PIN] = 0x01; /* interrupt pin A */ + sdhci_initfn(s); + s->buf_maxsz = sdhci_get_fifolen(s); + s->fifo_buffer = g_malloc0(s->buf_maxsz); + s->irq = pci_allocate_irq(dev); + memory_region_init_io(&s->iomem, OBJECT(s), &sdhci_mmio_ops, s, "sdhci", + SDHC_REGISTERS_MAP_SIZE); + pci_register_bar(dev, 0, 0, &s->iomem); + return 0; +} + +static void sdhci_pci_exit(PCIDevice *dev) +{ + SDHCIState *s = PCI_SDHCI(dev); + sdhci_uninitfn(s); +} + +static void sdhci_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = sdhci_pci_init; + k->exit = sdhci_pci_exit; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_SDHCI; + k->class_id = PCI_CLASS_SYSTEM_SDHCI; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->vmsd = &sdhci_vmstate; + dc->props = sdhci_properties; +} + +static const TypeInfo sdhci_pci_info = { + .name = TYPE_PCI_SDHCI, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(SDHCIState), + .class_init = sdhci_pci_class_init, +}; + static void sdhci_sysbus_init(Object *obj) { SDHCIState *s = SYSBUS_SDHCI(obj); @@ -1265,6 +1308,7 @@ static const TypeInfo sdhci_sysbus_info = { static void sdhci_register_types(void) { + type_register_static(&sdhci_pci_info); type_register_static(&sdhci_sysbus_info); } diff --git a/hw/sd/sdhci.h b/hw/sd/sdhci.h index 9fbf682935..3352d23d68 100644 --- a/hw/sd/sdhci.h +++ b/hw/sd/sdhci.h @@ -26,6 +26,7 @@ #define SDHCI_H #include "qemu-common.h" +#include "hw/pci/pci.h" #include "hw/sysbus.h" #include "hw/sd.h" @@ -232,7 +233,10 @@ enum { /* SD/MMC host controller state */ typedef struct SDHCIState { - SysBusDevice busdev; + union { + PCIDevice pcidev; + SysBusDevice busdev; + }; SDState *card; MemoryRegion iomem; @@ -281,6 +285,9 @@ typedef struct SDHCIState { extern const VMStateDescription sdhci_vmstate; +#define TYPE_PCI_SDHCI "sdhci-pci" +#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI) + #define TYPE_SYSBUS_SDHCI "generic-sdhci" #define SYSBUS_SDHCI(obj) \ OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI)