diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index 47d13312dc..b5c614cc3a 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -38,6 +38,33 @@ struct ARMSSEInfo { bool has_cachectrl; bool has_cpusecctrl; bool has_cpuid; + Property *props; +}; + +static Property iotkit_properties[] = { + DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION, + MemoryRegion *), + DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64), + DEFINE_PROP_UINT32("MAINCLK", ARMSSE, mainclk_frq, 0), + DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15), + DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000), + DEFINE_PROP_BOOL("CPU0_FPU", ARMSSE, cpu_fpu[0], true), + DEFINE_PROP_BOOL("CPU0_DSP", ARMSSE, cpu_dsp[0], true), + DEFINE_PROP_END_OF_LIST() +}; + +static Property armsse_properties[] = { + DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION, + MemoryRegion *), + DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64), + DEFINE_PROP_UINT32("MAINCLK", ARMSSE, mainclk_frq, 0), + DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15), + DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000), + DEFINE_PROP_BOOL("CPU0_FPU", ARMSSE, cpu_fpu[0], false), + DEFINE_PROP_BOOL("CPU0_DSP", ARMSSE, cpu_dsp[0], false), + DEFINE_PROP_BOOL("CPU1_FPU", ARMSSE, cpu_fpu[1], true), + DEFINE_PROP_BOOL("CPU1_DSP", ARMSSE, cpu_dsp[1], true), + DEFINE_PROP_END_OF_LIST() }; static const ARMSSEInfo armsse_variants[] = { @@ -53,6 +80,7 @@ static const ARMSSEInfo armsse_variants[] = { .has_cachectrl = false, .has_cpusecctrl = false, .has_cpuid = false, + .props = iotkit_properties, }, { .name = TYPE_SSE200, @@ -66,6 +94,7 @@ static const ARMSSEInfo armsse_variants[] = { .has_cachectrl = true, .has_cpusecctrl = true, .has_cpuid = true, + .props = armsse_properties, }, }; @@ -533,6 +562,20 @@ static void armsse_realize(DeviceState *dev, Error **errp) return; } } + if (!s->cpu_fpu[i]) { + object_property_set_bool(cpuobj, false, "vfp", &err); + if (err) { + error_propagate(errp, err); + return; + } + } + if (!s->cpu_dsp[i]) { + object_property_set_bool(cpuobj, false, "dsp", &err); + if (err) { + error_propagate(errp, err); + return; + } + } if (i > 0) { memory_region_add_subregion_overlap(&s->cpu_container[i], 0, @@ -1222,16 +1265,6 @@ static const VMStateDescription armsse_vmstate = { } }; -static Property armsse_properties[] = { - DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION, - MemoryRegion *), - DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64), - DEFINE_PROP_UINT32("MAINCLK", ARMSSE, mainclk_frq, 0), - DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15), - DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000), - DEFINE_PROP_END_OF_LIST() -}; - static void armsse_reset(DeviceState *dev) { ARMSSE *s = ARMSSE(dev); @@ -1244,13 +1277,14 @@ static void armsse_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); IDAUInterfaceClass *iic = IDAU_INTERFACE_CLASS(klass); ARMSSEClass *asc = ARMSSE_CLASS(klass); + const ARMSSEInfo *info = data; dc->realize = armsse_realize; dc->vmsd = &armsse_vmstate; - dc->props = armsse_properties; + dc->props = info->props; dc->reset = armsse_reset; iic->check = armsse_idau_check; - asc->info = data; + asc->info = info; } static const TypeInfo armsse_info = { diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index 1a8a6c8bf9..b9efad6bac 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -190,6 +190,22 @@ static void armv7m_realize(DeviceState *dev, Error **errp) return; } } + if (object_property_find(OBJECT(s->cpu), "vfp", NULL)) { + object_property_set_bool(OBJECT(s->cpu), s->vfp, + "vfp", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + } + if (object_property_find(OBJECT(s->cpu), "dsp", NULL)) { + object_property_set_bool(OBJECT(s->cpu), s->dsp, + "dsp", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + } /* * Tell the CPU where the NVIC is; it will fail realize if it doesn't @@ -260,6 +276,8 @@ static Property armv7m_properties[] = { DEFINE_PROP_BOOL("enable-bitband", ARMv7MState, enable_bitband, false), DEFINE_PROP_BOOL("start-powered-off", ARMv7MState, start_powered_off, false), + DEFINE_PROP_BOOL("vfp", ARMv7MState, vfp, true), + DEFINE_PROP_BOOL("dsp", ARMv7MState, dsp, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 0261fdabab..b2f93f6bef 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -911,6 +911,7 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, hwaddr *entry, AddressSpace *as) { hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR; + uint64_t kernel_size = 0; uint8_t *buffer; int size; @@ -938,7 +939,10 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, * is only valid if the image_size is non-zero. */ memcpy(&hdrvals, buffer + ARM64_TEXT_OFFSET_OFFSET, sizeof(hdrvals)); - if (hdrvals[1] != 0) { + + kernel_size = le64_to_cpu(hdrvals[1]); + + if (kernel_size != 0) { kernel_load_offset = le64_to_cpu(hdrvals[0]); /* @@ -956,12 +960,21 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, } } + /* + * Kernels before v3.17 don't populate the image_size field, and + * raw images have no header. For those our best guess at the size + * is the size of the Image file itself. + */ + if (kernel_size == 0) { + kernel_size = size; + } + *entry = mem_base + kernel_load_offset; rom_add_blob_fixed_as(filename, buffer, size, *entry, as); g_free(buffer); - return size; + return kernel_size; } static void arm_setup_direct_kernel_boot(ARMCPU *cpu, @@ -977,6 +990,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, int elf_machine; hwaddr entry; static const ARMInsnFixup *primary_loader; + uint64_t ram_end = info->loader_start + info->ram_size; if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { primary_loader = bootloader_aarch64; @@ -999,20 +1013,6 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, if (info->nb_cpus == 0) info->nb_cpus = 1; - /* - * We want to put the initrd far enough into RAM that when the - * kernel is uncompressed it will not clobber the initrd. However - * on boards without much RAM we must ensure that we still leave - * enough room for a decent sized initrd, and on boards with large - * amounts of RAM we must avoid the initrd being so far up in RAM - * that it is outside lowmem and inaccessible to the kernel. - * So for boards with less than 256MB of RAM we put the initrd - * halfway into RAM, and for boards with 256MB of RAM or more we put - * the initrd at 128MB. - */ - info->initrd_start = info->loader_start + - MIN(info->ram_size / 2, 128 * 1024 * 1024); - /* Assume that raw images are linux kernels, and ELF images are not. */ kernel_size = arm_load_elf(info, &elf_entry, &elf_low_addr, &elf_high_addr, elf_machine, as); @@ -1048,27 +1048,59 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, /* 32-bit ARM */ entry = info->loader_start + KERNEL_LOAD_ADDR; kernel_size = load_image_targphys_as(info->kernel_filename, entry, - info->ram_size - KERNEL_LOAD_ADDR, - as); + ram_end - KERNEL_LOAD_ADDR, as); is_linux = 1; } if (kernel_size < 0) { error_report("could not load kernel '%s'", info->kernel_filename); exit(1); } + + if (kernel_size > info->ram_size) { + error_report("kernel '%s' is too large to fit in RAM " + "(kernel size %d, RAM size %" PRId64 ")", + info->kernel_filename, kernel_size, info->ram_size); + exit(1); + } + info->entry = entry; + + /* + * We want to put the initrd far enough into RAM that when the + * kernel is uncompressed it will not clobber the initrd. However + * on boards without much RAM we must ensure that we still leave + * enough room for a decent sized initrd, and on boards with large + * amounts of RAM we must avoid the initrd being so far up in RAM + * that it is outside lowmem and inaccessible to the kernel. + * So for boards with less than 256MB of RAM we put the initrd + * halfway into RAM, and for boards with 256MB of RAM or more we put + * the initrd at 128MB. + * We also refuse to put the initrd somewhere that will definitely + * overlay the kernel we just loaded, though for kernel formats which + * don't tell us their exact size (eg self-decompressing 32-bit kernels) + * we might still make a bad choice here. + */ + info->initrd_start = info->loader_start + + MAX(MIN(info->ram_size / 2, 128 * 1024 * 1024), kernel_size); + info->initrd_start = TARGET_PAGE_ALIGN(info->initrd_start); + if (is_linux) { uint32_t fixupcontext[FIXUP_MAX]; if (info->initrd_filename) { + + if (info->initrd_start >= ram_end) { + error_report("not enough space after kernel to load initrd"); + exit(1); + } + initrd_size = load_ramdisk_as(info->initrd_filename, info->initrd_start, - info->ram_size - info->initrd_start, - as); + ram_end - info->initrd_start, as); if (initrd_size < 0) { initrd_size = load_image_targphys_as(info->initrd_filename, info->initrd_start, - info->ram_size - + ram_end - info->initrd_start, as); } @@ -1077,6 +1109,11 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, info->initrd_filename); exit(1); } + if (info->initrd_start + initrd_size > info->ram_size) { + error_report("could not load initrd '%s': " + "too big to fit into RAM after the kernel", + info->initrd_filename); + } } else { initrd_size = 0; } @@ -1112,6 +1149,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, /* Place the DTB after the initrd in memory with alignment. */ info->dtb_start = QEMU_ALIGN_UP(info->initrd_start + initrd_size, align); + if (info->dtb_start >= ram_end) { + error_report("Not enough space for DTB after kernel/initrd"); + exit(1); + } fixupcontext[FIXUP_ARGPTR_LO] = info->dtb_start; fixupcontext[FIXUP_ARGPTR_HI] = info->dtb_start >> 32; } else { diff --git a/hw/arm/musca.c b/hw/arm/musca.c index 825d80e75a..ddd8842732 100644 --- a/hw/arm/musca.c +++ b/hw/arm/musca.c @@ -385,6 +385,14 @@ static void musca_init(MachineState *machine) qdev_prop_set_uint32(ssedev, "init-svtor", mmc->init_svtor); qdev_prop_set_uint32(ssedev, "SRAM_ADDR_WIDTH", mmc->sram_addr_width); qdev_prop_set_uint32(ssedev, "MAINCLK", SYSCLK_FRQ); + /* + * Musca-A takes the default SSE-200 FPU/DSP settings (ie no for + * CPU0 and yes for CPU1); Musca-B1 explicitly enables them for CPU0. + */ + if (mmc->type == MUSCA_B1) { + qdev_prop_set_bit(ssedev, "CPU0_FPU", true); + qdev_prop_set_bit(ssedev, "CPU0_DSP", true); + } object_property_set_bool(OBJECT(&mms->sse), true, "realized", &error_fatal); diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c index 53c55c5729..b65f56f903 100644 --- a/hw/intc/arm_gicv3_dist.c +++ b/hw/intc/arm_gicv3_dist.c @@ -378,8 +378,14 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, * ITLinesNumber == (num external irqs / 32) - 1 */ int itlinesnumber = ((s->num_irq - GIC_INTERNAL) / 32) - 1; + /* + * SecurityExtn must be RAZ if GICD_CTLR.DS == 1, and + * "security extensions not supported" always implies DS == 1, + * so we only need to check the DS bit. + */ + bool sec_extn = !(s->gicd_ctlr & GICD_CTLR_DS); - *data = (1 << 25) | (1 << 24) | (s->security_extn << 10) | + *data = (1 << 25) | (1 << 24) | (sec_extn << 10) | (0xf << 19) | itlinesnumber; return MEMTX_OK; } @@ -533,7 +539,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset, } return MEMTX_OK; } - case GICD_IDREGS ... GICD_IDREGS + 0x1f: + case GICD_IDREGS ... GICD_IDREGS + 0x2f: /* ID registers */ *data = gicv3_idreg(offset - GICD_IDREGS); return MEMTX_OK; @@ -744,7 +750,7 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset, gicd_write_irouter(s, attrs, irq, r); return MEMTX_OK; } - case GICD_IDREGS ... GICD_IDREGS + 0x1f: + case GICD_IDREGS ... GICD_IDREGS + 0x2f: case GICD_TYPER: case GICD_IIDR: /* RO registers, ignore the write */ diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c index 3b0ba6de1a..8645220d61 100644 --- a/hw/intc/arm_gicv3_redist.c +++ b/hw/intc/arm_gicv3_redist.c @@ -233,7 +233,7 @@ static MemTxResult gicr_readl(GICv3CPUState *cs, hwaddr offset, } *data = cs->gicr_nsacr; return MEMTX_OK; - case GICR_IDREGS ... GICR_IDREGS + 0x1f: + case GICR_IDREGS ... GICR_IDREGS + 0x2f: *data = gicv3_idreg(offset - GICR_IDREGS); return MEMTX_OK; default: @@ -363,7 +363,7 @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset, return MEMTX_OK; case GICR_IIDR: case GICR_TYPER: - case GICR_IDREGS ... GICR_IDREGS + 0x1f: + case GICR_IDREGS ... GICR_IDREGS + 0x2f: /* RO registers, ignore the write */ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write to RO register at offset " diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h index 81e082cccf..84080c2299 100644 --- a/include/hw/arm/armsse.h +++ b/include/hw/arm/armsse.h @@ -50,6 +50,11 @@ * address of each SRAM bank (and thus the total amount of internal SRAM) * + QOM property "init-svtor" sets the initial value of the CPU SVTOR register * (where it expects to load the PC and SP from the vector table on reset) + * + QOM properties "CPU0_FPU", "CPU0_DSP", "CPU1_FPU" and "CPU1_DSP" which + * set whether the CPUs have the FPU and DSP features present. The default + * (matching the hardware) is that for CPU0 in an IoTKit and CPU1 in an + * SSE-200 both are present; CPU0 in an SSE-200 has neither. + * Since the IoTKit has only one CPU, it does not have the CPU1_* properties. * + Named GPIO inputs "EXP_IRQ" 0..n are the expansion interrupts for CPU 0, * which are wired to its NVIC lines 32 .. n+32 * + Named GPIO inputs "EXP_CPU1_IRQ" 0..n are the expansion interrupts for @@ -208,6 +213,8 @@ typedef struct ARMSSE { uint32_t mainclk_frq; uint32_t sram_addr_width; uint32_t init_svtor; + bool cpu_fpu[SSE_MAX_CPUS]; + bool cpu_dsp[SSE_MAX_CPUS]; } ARMSSE; typedef struct ARMSSEInfo ARMSSEInfo; diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h index e96a98f809..d2c74d3872 100644 --- a/include/hw/arm/armv7m.h +++ b/include/hw/arm/armv7m.h @@ -43,6 +43,8 @@ typedef struct { * devices will be automatically layered on top of this view.) * + Property "idau": IDAU interface (forwarded to CPU object) * + Property "init-svtor": secure VTOR reset value (forwarded to CPU object) + * + Property "vfp": enable VFP (forwarded to CPU object) + * + Property "dsp": enable DSP (forwarded to CPU object) * + Property "enable-bitband": expose bitbanded IO */ typedef struct ARMv7MState { @@ -66,6 +68,8 @@ typedef struct ARMv7MState { uint32_t init_svtor; bool enable_bitband; bool start_powered_off; + bool vfp; + bool dsp; } ARMv7MState; #endif diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 2335659a85..376db154f0 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -763,6 +763,15 @@ static Property arm_cpu_cfgend_property = static Property arm_cpu_has_pmu_property = DEFINE_PROP_BOOL("pmu", ARMCPU, has_pmu, true); +static Property arm_cpu_has_vfp_property = + DEFINE_PROP_BOOL("vfp", ARMCPU, has_vfp, true); + +static Property arm_cpu_has_neon_property = + DEFINE_PROP_BOOL("neon", ARMCPU, has_neon, true); + +static Property arm_cpu_has_dsp_property = + DEFINE_PROP_BOOL("dsp", ARMCPU, has_dsp, true); + static Property arm_cpu_has_mpu_property = DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true); @@ -803,6 +812,13 @@ void arm_cpu_post_init(Object *obj) if (arm_feature(&cpu->env, ARM_FEATURE_M)) { set_feature(&cpu->env, ARM_FEATURE_PMSA); } + /* Similarly for the VFP feature bits */ + if (arm_feature(&cpu->env, ARM_FEATURE_VFP4)) { + set_feature(&cpu->env, ARM_FEATURE_VFP3); + } + if (arm_feature(&cpu->env, ARM_FEATURE_VFP3)) { + set_feature(&cpu->env, ARM_FEATURE_VFP); + } if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) || arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) { @@ -847,6 +863,33 @@ void arm_cpu_post_init(Object *obj) &error_abort); } + /* + * Allow user to turn off VFP and Neon support, but only for TCG -- + * KVM does not currently allow us to lie to the guest about its + * ID/feature registers, so the guest always sees what the host has. + */ + if (arm_feature(&cpu->env, ARM_FEATURE_VFP)) { + cpu->has_vfp = true; + if (!kvm_enabled()) { + qdev_property_add_static(DEVICE(obj), &arm_cpu_has_vfp_property, + &error_abort); + } + } + + if (arm_feature(&cpu->env, ARM_FEATURE_NEON)) { + cpu->has_neon = true; + if (!kvm_enabled()) { + qdev_property_add_static(DEVICE(obj), &arm_cpu_has_neon_property, + &error_abort); + } + } + + if (arm_feature(&cpu->env, ARM_FEATURE_M) && + arm_feature(&cpu->env, ARM_FEATURE_THUMB_DSP)) { + qdev_property_add_static(DEVICE(obj), &arm_cpu_has_dsp_property, + &error_abort); + } + if (arm_feature(&cpu->env, ARM_FEATURE_PMSA)) { qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property, &error_abort); @@ -956,6 +999,136 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) return; } + if (arm_feature(env, ARM_FEATURE_AARCH64) && + cpu->has_vfp != cpu->has_neon) { + /* + * This is an architectural requirement for AArch64; AArch32 is + * more flexible and permits VFP-no-Neon and Neon-no-VFP. + */ + error_setg(errp, + "AArch64 CPUs must have both VFP and Neon or neither"); + return; + } + + if (!cpu->has_vfp) { + uint64_t t; + uint32_t u; + + unset_feature(env, ARM_FEATURE_VFP); + unset_feature(env, ARM_FEATURE_VFP3); + unset_feature(env, ARM_FEATURE_VFP4); + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0); + cpu->isar.id_aa64isar1 = t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf); + cpu->isar.id_aa64pfr0 = t; + + u = cpu->isar.id_isar6; + u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0); + cpu->isar.id_isar6 = u; + + u = cpu->isar.mvfr0; + u = FIELD_DP32(u, MVFR0, FPSP, 0); + u = FIELD_DP32(u, MVFR0, FPDP, 0); + u = FIELD_DP32(u, MVFR0, FPTRAP, 0); + u = FIELD_DP32(u, MVFR0, FPDIVIDE, 0); + u = FIELD_DP32(u, MVFR0, FPSQRT, 0); + u = FIELD_DP32(u, MVFR0, FPSHVEC, 0); + u = FIELD_DP32(u, MVFR0, FPROUND, 0); + cpu->isar.mvfr0 = u; + + u = cpu->isar.mvfr1; + u = FIELD_DP32(u, MVFR1, FPFTZ, 0); + u = FIELD_DP32(u, MVFR1, FPDNAN, 0); + u = FIELD_DP32(u, MVFR1, FPHP, 0); + cpu->isar.mvfr1 = u; + + u = cpu->isar.mvfr2; + u = FIELD_DP32(u, MVFR2, FPMISC, 0); + cpu->isar.mvfr2 = u; + } + + if (!cpu->has_neon) { + uint64_t t; + uint32_t u; + + unset_feature(env, ARM_FEATURE_NEON); + + t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0); + cpu->isar.id_aa64isar0 = t; + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0); + cpu->isar.id_aa64isar1 = t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf); + cpu->isar.id_aa64pfr0 = t; + + u = cpu->isar.id_isar5; + u = FIELD_DP32(u, ID_ISAR5, RDM, 0); + u = FIELD_DP32(u, ID_ISAR5, VCMA, 0); + cpu->isar.id_isar5 = u; + + u = cpu->isar.id_isar6; + u = FIELD_DP32(u, ID_ISAR6, DP, 0); + u = FIELD_DP32(u, ID_ISAR6, FHM, 0); + cpu->isar.id_isar6 = u; + + u = cpu->isar.mvfr1; + u = FIELD_DP32(u, MVFR1, SIMDLS, 0); + u = FIELD_DP32(u, MVFR1, SIMDINT, 0); + u = FIELD_DP32(u, MVFR1, SIMDSP, 0); + u = FIELD_DP32(u, MVFR1, SIMDHP, 0); + u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0); + cpu->isar.mvfr1 = u; + + u = cpu->isar.mvfr2; + u = FIELD_DP32(u, MVFR2, SIMDMISC, 0); + cpu->isar.mvfr2 = u; + } + + if (!cpu->has_neon && !cpu->has_vfp) { + uint64_t t; + uint32_t u; + + t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0); + cpu->isar.id_aa64isar0 = t; + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0); + cpu->isar.id_aa64isar1 = t; + + u = cpu->isar.mvfr0; + u = FIELD_DP32(u, MVFR0, SIMDREG, 0); + cpu->isar.mvfr0 = u; + } + + if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) { + uint32_t u; + + unset_feature(env, ARM_FEATURE_THUMB_DSP); + + u = cpu->isar.id_isar1; + u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1); + cpu->isar.id_isar1 = u; + + u = cpu->isar.id_isar2; + u = FIELD_DP32(u, ID_ISAR2, MULTU, 1); + u = FIELD_DP32(u, ID_ISAR2, MULTS, 1); + cpu->isar.id_isar2 = u; + + u = cpu->isar.id_isar3; + u = FIELD_DP32(u, ID_ISAR3, SIMD, 1); + u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0); + cpu->isar.id_isar3 = u; + } + /* Some features automatically imply others: */ if (arm_feature(env, ARM_FEATURE_V8)) { if (arm_feature(env, ARM_FEATURE_M)) { @@ -1016,12 +1189,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) if (arm_feature(env, ARM_FEATURE_V5)) { set_feature(env, ARM_FEATURE_V4T); } - if (arm_feature(env, ARM_FEATURE_VFP4)) { - set_feature(env, ARM_FEATURE_VFP3); - } - if (arm_feature(env, ARM_FEATURE_VFP3)) { - set_feature(env, ARM_FEATURE_VFP); - } if (arm_feature(env, ARM_FEATURE_LPAE)) { set_feature(env, ARM_FEATURE_V7MP); set_feature(env, ARM_FEATURE_PXN); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 9229862421..f9da672be5 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -786,6 +786,12 @@ struct ARMCPU { bool has_el3; /* CPU has PMU (Performance Monitor Unit) */ bool has_pmu; + /* CPU has VFP */ + bool has_vfp; + /* CPU has Neon */ + bool has_neon; + /* CPU has M-profile DSP extension */ + bool has_dsp; /* CPU has memory protection unit */ bool has_mpu; @@ -3382,6 +3388,12 @@ static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id) return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0; } +static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id) +{ + /* Return true if CPU supports double precision floating point */ + return FIELD_EX64(id->mvfr0, MVFR0, FPDP) > 0; +} + /* * We always set the FP and SIMD FP16 fields to indicate identical * levels of support (assuming SIMD is implemented at all), so diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index ae739f6575..97f4164fbb 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -6380,38 +6380,6 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn) } } -/* The imm8 encodes the sign bit, enough bits to represent an exponent in - * the range 01....1xx to 10....0xx, and the most significant 4 bits of - * the mantissa; see VFPExpandImm() in the v8 ARM ARM. - */ -uint64_t vfp_expand_imm(int size, uint8_t imm8) -{ - uint64_t imm; - - switch (size) { - case MO_64: - imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | - (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) | - extract32(imm8, 0, 6); - imm <<= 48; - break; - case MO_32: - imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | - (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) | - (extract32(imm8, 0, 6) << 3); - imm <<= 16; - break; - case MO_16: - imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | - (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) | - (extract32(imm8, 0, 6) << 6); - break; - default: - g_assert_not_reached(); - } - return imm; -} - /* Floating point immediate * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 * +---+---+---+-----------+------+---+------------+-------+------+------+ diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 9569bc5963..9ab40872d8 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -39,7 +39,6 @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v); TCGv_ptr get_fpstatus_ptr(bool); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); -uint64_t vfp_expand_imm(int size, uint8_t imm8); bool sve_access_check(DisasContext *s); /* We should have at some point before trying to access an FP register diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index 709fc65374..348173d602 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -30,6 +30,39 @@ #include "decode-vfp.inc.c" #include "decode-vfp-uncond.inc.c" +/* + * The imm8 encodes the sign bit, enough bits to represent an exponent in + * the range 01....1xx to 10....0xx, and the most significant 4 bits of + * the mantissa; see VFPExpandImm() in the v8 ARM ARM. + */ +uint64_t vfp_expand_imm(int size, uint8_t imm8) +{ + uint64_t imm; + + switch (size) { + case MO_64: + imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | + (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) | + extract32(imm8, 0, 6); + imm <<= 48; + break; + case MO_32: + imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | + (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) | + (extract32(imm8, 0, 6) << 3); + imm <<= 16; + break; + case MO_16: + imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | + (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) | + (extract32(imm8, 0, 6) << 6); + break; + default: + g_assert_not_reached(); + } + return imm; +} + /* * Return the offset of a 16-bit half of the specified VFP single-precision * register. If top is true, returns the top 16 bits; otherwise the bottom @@ -173,6 +206,11 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) ((a->vm | a->vn | a->vd) & 0x10)) { return false; } + + if (dp && !dc_isar_feature(aa32_fpdp, s)) { + return false; + } + rd = a->vd; rn = a->vn; rm = a->vm; @@ -301,6 +339,11 @@ static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a) ((a->vm | a->vn | a->vd) & 0x10)) { return false; } + + if (dp && !dc_isar_feature(aa32_fpdp, s)) { + return false; + } + rd = a->vd; rn = a->vn; rm = a->vm; @@ -382,6 +425,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) ((a->vm | a->vd) & 0x10)) { return false; } + + if (dp && !dc_isar_feature(aa32_fpdp, s)) { + return false; + } + rd = a->vd; rm = a->vm; @@ -440,6 +488,11 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) { return false; } + + if (dp && !dc_isar_feature(aa32_fpdp, s)) { + return false; + } + rd = a->vd; rm = a->vm; @@ -835,7 +888,7 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) return true; } -static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_sp *a) +static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) { TCGv_i32 tmp; @@ -910,7 +963,7 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) return true; } -static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a) +static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) { uint32_t offset; TCGv_i32 addr; @@ -1268,6 +1321,10 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!dc_isar_feature(aa32_fpshvec, s) && (veclen != 0 || s->vec_stride != 0)) { return false; @@ -1413,6 +1470,10 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!dc_isar_feature(aa32_fpshvec, s) && (veclen != 0 || s->vec_stride != 0)) { return false; @@ -1500,7 +1561,7 @@ static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) tcg_temp_free_i64(tmp); } -static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_sp *a) +static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a) { return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true); } @@ -1538,7 +1599,7 @@ static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) tcg_temp_free_i64(tmp); } -static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_sp *a) +static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a) { return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true); } @@ -1580,7 +1641,7 @@ static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) tcg_temp_free_i64(tmp); } -static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_sp *a) +static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a) { return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true); } @@ -1614,7 +1675,7 @@ static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) tcg_temp_free_i64(tmp); } -static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_sp *a) +static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a) { return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true); } @@ -1624,7 +1685,7 @@ static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a) return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false); } -static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_sp *a) +static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a) { return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false); } @@ -1648,7 +1709,7 @@ static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) gen_helper_vfp_negd(vd, vd); } -static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_sp *a) +static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) { return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false); } @@ -1658,7 +1719,7 @@ static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a) return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false); } -static bool trans_VADD_dp(DisasContext *s, arg_VADD_sp *a) +static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a) { return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false); } @@ -1668,7 +1729,7 @@ static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a) return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false); } -static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_sp *a) +static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a) { return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false); } @@ -1678,7 +1739,7 @@ static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a) return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false); } -static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a) +static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a) { return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false); } @@ -1710,6 +1771,10 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -1741,7 +1806,7 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a) return true; } -static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a) +static bool trans_VFM_dp(DisasContext *s, arg_VFM_dp *a) { /* * VFNMA : fd = muladd(-fd, fn, fm) @@ -1809,7 +1874,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) uint32_t delta_d = 0; int veclen = s->vec_len; TCGv_i32 fd; - uint32_t n, i, vd; + uint32_t vd; vd = a->vd; @@ -1836,17 +1901,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) } } - n = (a->imm4h << 28) & 0x80000000; - i = ((a->imm4h << 4) & 0x70) | a->imm4l; - if (i & 0x40) { - i |= 0x780; - } else { - i |= 0x800; - } - n |= i << 19; - - fd = tcg_temp_new_i32(); - tcg_gen_movi_i32(fd, n); + fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm)); for (;;) { neon_store_reg32(fd, vd); @@ -1869,7 +1924,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) uint32_t delta_d = 0; int veclen = s->vec_len; TCGv_i64 fd; - uint32_t n, i, vd; + uint32_t vd; vd = a->vd; @@ -1878,6 +1933,10 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!dc_isar_feature(aa32_fpshvec, s) && (veclen != 0 || s->vec_stride != 0)) { return false; @@ -1901,17 +1960,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) } } - n = (a->imm4h << 28) & 0x80000000; - i = ((a->imm4h << 4) & 0x70) | a->imm4l; - if (i & 0x40) { - i |= 0x3f80; - } else { - i |= 0x4000; - } - n |= i << 16; - - fd = tcg_temp_new_i64(); - tcg_gen_movi_i64(fd, ((uint64_t)n) << 32); + fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm)); for (;;) { neon_store_reg64(fd, vd); @@ -2028,6 +2077,10 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2097,6 +2150,10 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2159,6 +2216,10 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2201,7 +2262,7 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) return true; } -static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_sp *a) +static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) { TCGv_ptr fpst; TCGv_i64 tmp; @@ -2215,6 +2276,10 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_sp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2257,7 +2322,7 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) return true; } -static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_sp *a) +static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) { TCGv_ptr fpst; TCGv_i64 tmp; @@ -2272,6 +2337,10 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_sp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2327,6 +2396,10 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2351,6 +2424,10 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2375,6 +2452,10 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2425,6 +2506,10 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2461,6 +2546,10 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2550,6 +2639,10 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } @@ -2642,6 +2735,10 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) return false; } + if (!dc_isar_feature(aa32_fpdp, s)) { + return false; + } + if (!vfp_access_check(s)) { return true; } diff --git a/target/arm/translate.c b/target/arm/translate.c index c274c8b460..4750b9fa1b 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -67,10 +67,6 @@ TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; TCGv_i64 cpu_exclusive_addr; TCGv_i64 cpu_exclusive_val; -/* FIXME: These should be removed. */ -static TCGv_i32 cpu_F0s, cpu_F1s; -static TCGv_i64 cpu_F0d, cpu_F1d; - #include "exec/gen-icount.h" static const char * const regnames[] = @@ -80,6 +76,8 @@ static const char * const regnames[] = /* Function prototypes for gen_ functions calling Neon helpers. */ typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); +/* Function prototypes for gen_ functions for fix point conversions */ +typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); /* initialize TCG globals. */ void arm_translate_init(void) @@ -1374,75 +1372,6 @@ static TCGv_ptr get_fpstatus_ptr(int neon) return statusptr; } -static inline void gen_vfp_abs(int dp) -{ - if (dp) - gen_helper_vfp_absd(cpu_F0d, cpu_F0d); - else - gen_helper_vfp_abss(cpu_F0s, cpu_F0s); -} - -static inline void gen_vfp_neg(int dp) -{ - if (dp) - gen_helper_vfp_negd(cpu_F0d, cpu_F0d); - else - gen_helper_vfp_negs(cpu_F0s, cpu_F0s); -} - -#define VFP_GEN_ITOF(name) \ -static inline void gen_vfp_##name(int dp, int neon) \ -{ \ - TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ - if (dp) { \ - gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \ - } else { \ - gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ - } \ - tcg_temp_free_ptr(statusptr); \ -} - -VFP_GEN_ITOF(uito) -VFP_GEN_ITOF(sito) -#undef VFP_GEN_ITOF - -#define VFP_GEN_FTOI(name) \ -static inline void gen_vfp_##name(int dp, int neon) \ -{ \ - TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ - if (dp) { \ - gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \ - } else { \ - gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ - } \ - tcg_temp_free_ptr(statusptr); \ -} - -VFP_GEN_FTOI(touiz) -VFP_GEN_FTOI(tosiz) -#undef VFP_GEN_FTOI - -#define VFP_GEN_FIX(name, round) \ -static inline void gen_vfp_##name(int dp, int shift, int neon) \ -{ \ - TCGv_i32 tmp_shift = tcg_const_i32(shift); \ - TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ - if (dp) { \ - gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \ - statusptr); \ - } else { \ - gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \ - statusptr); \ - } \ - tcg_temp_free_i32(tmp_shift); \ - tcg_temp_free_ptr(statusptr); \ -} -VFP_GEN_FIX(tosl, _round_to_zero) -VFP_GEN_FIX(toul, _round_to_zero) -VFP_GEN_FIX(slto, ) -VFP_GEN_FIX(ulto, ) -#undef VFP_GEN_FIX - static inline long vfp_reg_offset(bool dp, unsigned reg) { if (dp) { @@ -1609,9 +1538,6 @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg) return ret; } -#define tcg_gen_ld_f32 tcg_gen_ld_i32 -#define tcg_gen_st_f32 tcg_gen_st_i32 - #define ARM_CP_RW_BIT (1 << 20) /* Include the VFP decoder */ @@ -4189,16 +4115,6 @@ static const uint8_t neon_3r_sizes[] = { #define NEON_2RM_VCVT_SF 62 #define NEON_2RM_VCVT_UF 63 -static int neon_2rm_is_float_op(int op) -{ - /* Return true if this neon 2reg-misc op is float-to-float */ - return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F || - (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) || - op == NEON_2RM_VRINTM || - (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) || - op >= NEON_2RM_VRECPE_F); -} - static bool neon_2rm_is_v8_op(int op) { /* Return true if this neon 2reg-misc op is ARMv8 and up */ @@ -5779,28 +5695,41 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } } else if (op >= 14) { /* VCVT fixed-point. */ + TCGv_ptr fpst; + TCGv_i32 shiftv; + VFPGenFixPointFn *fn; + if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { return 1; } + + if (!(op & 1)) { + if (u) { + fn = gen_helper_vfp_ultos; + } else { + fn = gen_helper_vfp_sltos; + } + } else { + if (u) { + fn = gen_helper_vfp_touls_round_to_zero; + } else { + fn = gen_helper_vfp_tosls_round_to_zero; + } + } + /* We have already masked out the must-be-1 top bit of imm6, * hence this 32-shift where the ARM ARM has 64-imm6. */ shift = 32 - shift; + fpst = get_fpstatus_ptr(1); + shiftv = tcg_const_i32(shift); for (pass = 0; pass < (q ? 4 : 2); pass++) { - tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); - if (!(op & 1)) { - if (u) - gen_vfp_ulto(0, shift, 1); - else - gen_vfp_slto(0, shift, 1); - } else { - if (u) - gen_vfp_toul(0, shift, 1); - else - gen_vfp_tosl(0, shift, 1); - } - tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); + TCGv_i32 tmpf = neon_load_reg(rm, pass); + fn(tmpf, tmpf, shiftv, fpst); + neon_store_reg(rd, pass, tmpf); } + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(shiftv); } else { return 1; } @@ -6489,25 +6418,23 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) q || (rm & 1)) { return 1; } - tmp = tcg_temp_new_i32(); - tmp2 = tcg_temp_new_i32(); fpst = get_fpstatus_ptr(true); ahp = get_ahp_flag(); - tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp); - tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1)); - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp); + tmp = neon_load_reg(rm, 0); + gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); + tmp2 = neon_load_reg(rm, 1); + gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp); tcg_gen_shli_i32(tmp2, tmp2, 16); tcg_gen_or_i32(tmp2, tmp2, tmp); - tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2)); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp); - tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3)); + tcg_temp_free_i32(tmp); + tmp = neon_load_reg(rm, 2); + gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); + tmp3 = neon_load_reg(rm, 3); neon_store_reg(rd, 0, tmp2); - tmp2 = tcg_temp_new_i32(); - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp); - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_or_i32(tmp2, tmp2, tmp); - neon_store_reg(rd, 1, tmp2); + gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); + tcg_gen_shli_i32(tmp3, tmp3, 16); + tcg_gen_or_i32(tmp3, tmp3, tmp); + neon_store_reg(rd, 1, tmp3); tcg_temp_free_i32(tmp); tcg_temp_free_i32(ahp); tcg_temp_free_ptr(fpst); @@ -6527,20 +6454,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); tcg_gen_ext16u_i32(tmp3, tmp); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); - tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_shri_i32(tmp3, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); - tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1)); - tcg_temp_free_i32(tmp); + gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); + neon_store_reg(rd, 0, tmp3); + tcg_gen_shri_i32(tmp, tmp, 16); + gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); + neon_store_reg(rd, 1, tmp); + tmp3 = tcg_temp_new_i32(); tcg_gen_ext16u_i32(tmp3, tmp2); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); - tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2)); - tcg_gen_shri_i32(tmp3, tmp2, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); - tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3)); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp3); + gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); + neon_store_reg(rd, 2, tmp3); + tcg_gen_shri_i32(tmp2, tmp2, 16); + gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); + neon_store_reg(rd, 3, tmp2); tcg_temp_free_i32(ahp); tcg_temp_free_ptr(fpst); break; @@ -6614,13 +6539,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (neon_2rm_is_float_op(op)) { - tcg_gen_ld_f32(cpu_F0s, cpu_env, - neon_reg_offset(rm, pass)); - tmp = NULL; - } else { - tmp = neon_load_reg(rm, pass); - } + tmp = neon_load_reg(rm, pass); switch (op) { case NEON_2RM_VREV32: switch (size) { @@ -6761,10 +6680,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; } case NEON_2RM_VABS_F: - gen_vfp_abs(0); + gen_helper_vfp_abss(tmp, tmp); break; case NEON_2RM_VNEG_F: - gen_vfp_neg(0); + gen_helper_vfp_negs(tmp, tmp); break; case NEON_2RM_VSWP: tmp2 = neon_load_reg(rd, pass); @@ -6798,7 +6717,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); - gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus); + gen_helper_rints(tmp, tmp, fpstatus); gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); tcg_temp_free_ptr(fpstatus); @@ -6808,7 +6727,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VRINTX: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); - gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus); + gen_helper_rints_exact(tmp, tmp, fpstatus); tcg_temp_free_ptr(fpstatus); break; } @@ -6832,10 +6751,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_env); if (is_signed) { - gen_helper_vfp_tosls(cpu_F0s, cpu_F0s, + gen_helper_vfp_tosls(tmp, tmp, tcg_shift, fpst); } else { - gen_helper_vfp_touls(cpu_F0s, cpu_F0s, + gen_helper_vfp_touls(tmp, tmp, tcg_shift, fpst); } @@ -6863,41 +6782,52 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VRECPE_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); - gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus); + gen_helper_recpe_f32(tmp, tmp, fpstatus); tcg_temp_free_ptr(fpstatus); break; } case NEON_2RM_VRSQRTE_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); - gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus); + gen_helper_rsqrte_f32(tmp, tmp, fpstatus); tcg_temp_free_ptr(fpstatus); break; } case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ - gen_vfp_sito(0, 1); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_sitos(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ - gen_vfp_uito(0, 1); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_uitos(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ - gen_vfp_tosiz(0, 1); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_tosizs(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ - gen_vfp_touiz(0, 1); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_touizs(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } default: /* Reserved op values were caught by the * neon_2rm_sizes[] check earlier. */ abort(); } - if (neon_2rm_is_float_op(op)) { - tcg_gen_st_f32(cpu_F0s, cpu_env, - neon_reg_offset(rd, pass)); - } else { - neon_store_reg(rd, pass, tmp); - } + neon_store_reg(rd, pass, tmp); } break; } @@ -11977,12 +11907,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->base.max_insns = MIN(dc->base.max_insns, bound); } - cpu_F0s = tcg_temp_new_i32(); - cpu_F1s = tcg_temp_new_i32(); - cpu_F0d = tcg_temp_new_i64(); - cpu_F1d = tcg_temp_new_i64(); - cpu_V0 = cpu_F0d; - cpu_V1 = cpu_F1d; + cpu_V0 = tcg_temp_new_i64(); + cpu_V1 = tcg_temp_new_i64(); /* FIXME: cpu_M0 can probably be the same as cpu_V0. */ cpu_M0 = tcg_temp_new_i64(); } diff --git a/target/arm/translate.h b/target/arm/translate.h index dc06dce767..bc1617809d 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -237,6 +237,13 @@ static inline void gen_ss_advance(DisasContext *s) } } +/* + * Given a VFP floating point constant encoded into an 8 bit immediate in an + * instruction, expand it to the actual constant value of the specified + * size, as per the VFPExpandImm() pseudocode in the Arm ARM. + */ +uint64_t vfp_expand_imm(int size, uint8_t imm8); + /* Vector operations shared between ARM and AArch64. */ extern const GVecGen3 mla_op[4]; extern const GVecGen3 mls_op[4]; diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode index ea24365bb4..a67b3f29ee 100644 --- a/target/arm/vfp.decode +++ b/target/arm/vfp.decode @@ -44,6 +44,8 @@ %vmov_idx_b 21:1 5:2 %vmov_idx_h 21:1 6:1 +%vmov_imm 16:4 0:4 + # VMOV scalar to general-purpose register; note that this does # include some Neon cases. VMOV_to_gp ---- 1110 u:1 1. 1 .... rt:4 1011 ... 1 0000 \ @@ -152,10 +154,10 @@ VFM_sp ---- 1110 1.10 .... .... 1010 . o2:1 . 0 .... \ VFM_dp ---- 1110 1.10 .... .... 1011 . o2:1 . 0 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=2 -VMOV_imm_sp ---- 1110 1.11 imm4h:4 .... 1010 0000 imm4l:4 \ - vd=%vd_sp -VMOV_imm_dp ---- 1110 1.11 imm4h:4 .... 1011 0000 imm4l:4 \ - vd=%vd_dp +VMOV_imm_sp ---- 1110 1.11 .... .... 1010 0000 .... \ + vd=%vd_sp imm=%vmov_imm +VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \ + vd=%vd_dp imm=%vmov_imm VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... \ vd=%vd_sp vm=%vm_sp