target-arm queue:

* support large kernel images in bootloader (by avoiding putting the initrd over the top of them) * correctly disable FPU/DSP in the CPU for the mps2-an521, musca-a boards * arm_gicv3: Fix decoding of ID register range * arm_gicv3: GICD_TYPER.SecurityExtn is RAZ if GICD_CTLR.DS == 1 * some code cleanups following on from the VFP decodetree conversion * Only implement doubles if the FPU supports them (so we now correctly model Cortex-M4, -M33 as single precision only) -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAl0HpJ0ZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3sjOD/4tXT0achhuXaDOYhMyIHzJ XK2DSIi0KC2d/c0NRtJX5JCUoWXW1rJGS2JYvknHRyrr77Hyf2HM7ESZLcFldzyp tjA0NJ7VwM2ykkeC13DSgM2AL72ayA+i0y3GjbUL6HefZOC0MMpg+u2sg10pNmcY FajUn4ejLGgsl1OmkG1QDu+hrmY9LEPaOrnTHWy/PGLsBjqJ4fAfOmLmjpCNvZzR WDbcK0AIAtqH/98PthSdsjyecDkVo3JEJld0fnfjoCLfhDKFg6YWi9WBD7QMF2VA 5LOxbrw6kjt1NzAJ1b6S6jImTU3yfGh6luqCRcqAeUPyU7WN/5rWUGInFJAhHtyA tDeHQy2w73o6Onr6Tps+co17YxXr066I9ADmTc0zxDfE2Fc3dC4+b54rNQ6S0URi 7EsXEwAKLRKHpzr8HW8kk7P87DEjxq0WpCiTaV2/p6fMbXhFLwAWexr5z4wzWFAx OMFTo04Aqw9K+ouQ2xhpuA4gPXxExa2EbSj6T1Zmby/iFJf3uXPXgY/Uj4k48P2k J8RMwr9f7BGTuh6F8GhGxXAWMAfTJcVHJ9E+CEPKHN5WAHQimv7X5aQLTK7SI0xE 4ij2JGsE9Drq/g4fLsXB45yYrGLOCPZZ+NpGSIYdkN9/sH+vxAp1OedGERftb7PX CvACBM9VmEYRo/+S1BZu+w== =mGBo -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190617' into staging target-arm queue: * support large kernel images in bootloader (by avoiding putting the initrd over the top of them) * correctly disable FPU/DSP in the CPU for the mps2-an521, musca-a boards * arm_gicv3: Fix decoding of ID register range * arm_gicv3: GICD_TYPER.SecurityExtn is RAZ if GICD_CTLR.DS == 1 * some code cleanups following on from the VFP decodetree conversion * Only implement doubles if the FPU supports them (so we now correctly model Cortex-M4, -M33 as single precision only) # gpg: Signature made Mon 17 Jun 2019 15:33:01 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20190617: (24 commits) target/arm: Only implement doubles if the FPU supports them target/arm: Fix typos in trans function prototypes target/arm: Remove unused cpu_F0s, cpu_F0d, cpu_F1s, cpu_F1d target/arm: Stop using deprecated functions in NEON_2RM_VCVT_F32_F16 target/arm: stop using deprecated functions in NEON_2RM_VCVT_F16_F32 target/arm: Stop using cpu_F0s in Neon VCVT fixed-point ops target/arm: Stop using cpu_F0s for Neon f32/s32 VCVT target/arm: Stop using cpu_F0s for NEON_2RM_VRECPE_F and NEON_2RM_VRSQRTE_F target/arm: Stop using cpu_F0s for NEON_2RM_VCVT[ANPM][US] target/arm: Stop using cpu_F0s for NEON_2RM_VRINT* target/arm: Stop using cpu_F0s for NEON_2RM_VNEG_F target/arm: Stop using cpu_F0s for NEON_2RM_VABS_F target/arm: Use vfp_expand_imm() for AArch32 VFP VMOV_imm target/arm: Move vfp_expand_imm() to translate.[ch] hw/intc/arm_gicv3: GICD_TYPER.SecurityExtn is RAZ if GICD_CTLR.DS == 1 hw/intc/arm_gicv3: Fix decoding of ID register range hw/arm: Correctly disable FPU/DSP for some ARMSSE-based boards hw/arm/armv7m: Forward "vfp" and "dsp" properties to CPU target/arm: Allow M-profile CPUs to disable the DSP extension via CPU property target/arm: Allow VFP and Neon to be disabled via a CPU property ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2019-06-17 15:35:21 +01:00 · 2019-06-17 15:35:21 +01:00 · 144ecc7f1a
parent 5d0e569447 1120827fa1
commit 144ecc7f1a
16 changed files with 572 additions and 276 deletions
--- a/hw/arm/armsse.c
+++ b/hw/arm/armsse.c
@ -38,6 +38,33 @@ struct ARMSSEInfo {
    bool has_cachectrl;
    bool has_cpusecctrl;
    bool has_cpuid;
+    Property *props;
+};
+
+static Property iotkit_properties[] = {
+    DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
+    DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64),
+    DEFINE_PROP_UINT32("MAINCLK", ARMSSE, mainclk_frq, 0),
+    DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15),
+    DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000),
+    DEFINE_PROP_BOOL("CPU0_FPU", ARMSSE, cpu_fpu[0], true),
+    DEFINE_PROP_BOOL("CPU0_DSP", ARMSSE, cpu_dsp[0], true),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static Property armsse_properties[] = {
+    DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
+    DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64),
+    DEFINE_PROP_UINT32("MAINCLK", ARMSSE, mainclk_frq, 0),
+    DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15),
+    DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000),
+    DEFINE_PROP_BOOL("CPU0_FPU", ARMSSE, cpu_fpu[0], false),
+    DEFINE_PROP_BOOL("CPU0_DSP", ARMSSE, cpu_dsp[0], false),
+    DEFINE_PROP_BOOL("CPU1_FPU", ARMSSE, cpu_fpu[1], true),
+    DEFINE_PROP_BOOL("CPU1_DSP", ARMSSE, cpu_dsp[1], true),
+    DEFINE_PROP_END_OF_LIST()
 };

 static const ARMSSEInfo armsse_variants[] = {
@ -53,6 +80,7 @@ static const ARMSSEInfo armsse_variants[] = {
        .has_cachectrl = false,
        .has_cpusecctrl = false,
        .has_cpuid = false,
+        .props = iotkit_properties,
    },
    {
        .name = TYPE_SSE200,
@ -66,6 +94,7 @@ static const ARMSSEInfo armsse_variants[] = {
        .has_cachectrl = true,
        .has_cpusecctrl = true,
        .has_cpuid = true,
+        .props = armsse_properties,
    },
 };

@ -533,6 +562,20 @@ static void armsse_realize(DeviceState *dev, Error **errp)
                return;
            }
        }
+        if (!s->cpu_fpu[i]) {
+            object_property_set_bool(cpuobj, false, "vfp", &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
+        }
+        if (!s->cpu_dsp[i]) {
+            object_property_set_bool(cpuobj, false, "dsp", &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
+        }

        if (i > 0) {
            memory_region_add_subregion_overlap(&s->cpu_container[i], 0,
@ -1222,16 +1265,6 @@ static const VMStateDescription armsse_vmstate = {
    }
 };

-static Property armsse_properties[] = {
-    DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION,
-                     MemoryRegion *),
-    DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64),
-    DEFINE_PROP_UINT32("MAINCLK", ARMSSE, mainclk_frq, 0),
-    DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15),
-    DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000),
-    DEFINE_PROP_END_OF_LIST()
-};
-
 static void armsse_reset(DeviceState *dev)
 {
    ARMSSE *s = ARMSSE(dev);
@ -1244,13 +1277,14 @@ static void armsse_class_init(ObjectClass *klass, void *data)
    DeviceClass *dc = DEVICE_CLASS(klass);
    IDAUInterfaceClass *iic = IDAU_INTERFACE_CLASS(klass);
    ARMSSEClass *asc = ARMSSE_CLASS(klass);
+    const ARMSSEInfo *info = data;

    dc->realize = armsse_realize;
    dc->vmsd = &armsse_vmstate;
-    dc->props = armsse_properties;
+    dc->props = info->props;
    dc->reset = armsse_reset;
    iic->check = armsse_idau_check;
-    asc->info = data;
+    asc->info = info;
 }

 static const TypeInfo armsse_info = {
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@ -190,6 +190,22 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
            return;
        }
    }
+    if (object_property_find(OBJECT(s->cpu), "vfp", NULL)) {
+        object_property_set_bool(OBJECT(s->cpu), s->vfp,
+                                 "vfp", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+    }
+    if (object_property_find(OBJECT(s->cpu), "dsp", NULL)) {
+        object_property_set_bool(OBJECT(s->cpu), s->dsp,
+                                 "dsp", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+    }

    /*
     * Tell the CPU where the NVIC is; it will fail realize if it doesn't
@ -260,6 +276,8 @@ static Property armv7m_properties[] = {
    DEFINE_PROP_BOOL("enable-bitband", ARMv7MState, enable_bitband, false),
    DEFINE_PROP_BOOL("start-powered-off", ARMv7MState, start_powered_off,
                     false),
+    DEFINE_PROP_BOOL("vfp", ARMv7MState, vfp, true),
+    DEFINE_PROP_BOOL("dsp", ARMv7MState, dsp, true),
    DEFINE_PROP_END_OF_LIST(),
 };

--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@ -911,6 +911,7 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
                                   hwaddr *entry, AddressSpace *as)
 {
    hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR;
+    uint64_t kernel_size = 0;
    uint8_t *buffer;
    int size;

@ -938,7 +939,10 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
         * is only valid if the image_size is non-zero.
         */
        memcpy(&hdrvals, buffer + ARM64_TEXT_OFFSET_OFFSET, sizeof(hdrvals));
-        if (hdrvals[1] != 0) {
+
+        kernel_size = le64_to_cpu(hdrvals[1]);
+
+        if (kernel_size != 0) {
            kernel_load_offset = le64_to_cpu(hdrvals[0]);

            /*
@ -956,12 +960,21 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
        }
    }

+    /*
+     * Kernels before v3.17 don't populate the image_size field, and
+     * raw images have no header. For those our best guess at the size
+     * is the size of the Image file itself.
+     */
+    if (kernel_size == 0) {
+        kernel_size = size;
+    }
+
    *entry = mem_base + kernel_load_offset;
    rom_add_blob_fixed_as(filename, buffer, size, *entry, as);

    g_free(buffer);

-    return size;
+    return kernel_size;
 }

 static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
@ -977,6 +990,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
    int elf_machine;
    hwaddr entry;
    static const ARMInsnFixup *primary_loader;
+    uint64_t ram_end = info->loader_start + info->ram_size;

    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
        primary_loader = bootloader_aarch64;
@ -999,20 +1013,6 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
    if (info->nb_cpus == 0)
        info->nb_cpus = 1;

-    /*
-     * We want to put the initrd far enough into RAM that when the
-     * kernel is uncompressed it will not clobber the initrd. However
-     * on boards without much RAM we must ensure that we still leave
-     * enough room for a decent sized initrd, and on boards with large
-     * amounts of RAM we must avoid the initrd being so far up in RAM
-     * that it is outside lowmem and inaccessible to the kernel.
-     * So for boards with less  than 256MB of RAM we put the initrd
-     * halfway into RAM, and for boards with 256MB of RAM or more we put
-     * the initrd at 128MB.
-     */
-    info->initrd_start = info->loader_start +
-        MIN(info->ram_size / 2, 128 * 1024 * 1024);
-
    /* Assume that raw images are linux kernels, and ELF images are not.  */
    kernel_size = arm_load_elf(info, &elf_entry, &elf_low_addr,
                               &elf_high_addr, elf_machine, as);
@ -1048,27 +1048,59 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
        /* 32-bit ARM */
        entry = info->loader_start + KERNEL_LOAD_ADDR;
        kernel_size = load_image_targphys_as(info->kernel_filename, entry,
-                                             info->ram_size - KERNEL_LOAD_ADDR,
-                                             as);
+                                             ram_end - KERNEL_LOAD_ADDR, as);
        is_linux = 1;
    }
    if (kernel_size < 0) {
        error_report("could not load kernel '%s'", info->kernel_filename);
        exit(1);
    }
+
+    if (kernel_size > info->ram_size) {
+        error_report("kernel '%s' is too large to fit in RAM "
+                     "(kernel size %d, RAM size %" PRId64 ")",
+                     info->kernel_filename, kernel_size, info->ram_size);
+        exit(1);
+    }
+
    info->entry = entry;
+
+    /*
+     * We want to put the initrd far enough into RAM that when the
+     * kernel is uncompressed it will not clobber the initrd. However
+     * on boards without much RAM we must ensure that we still leave
+     * enough room for a decent sized initrd, and on boards with large
+     * amounts of RAM we must avoid the initrd being so far up in RAM
+     * that it is outside lowmem and inaccessible to the kernel.
+     * So for boards with less  than 256MB of RAM we put the initrd
+     * halfway into RAM, and for boards with 256MB of RAM or more we put
+     * the initrd at 128MB.
+     * We also refuse to put the initrd somewhere that will definitely
+     * overlay the kernel we just loaded, though for kernel formats which
+     * don't tell us their exact size (eg self-decompressing 32-bit kernels)
+     * we might still make a bad choice here.
+     */
+    info->initrd_start = info->loader_start +
+        MAX(MIN(info->ram_size / 2, 128 * 1024 * 1024), kernel_size);
+    info->initrd_start = TARGET_PAGE_ALIGN(info->initrd_start);
+
    if (is_linux) {
        uint32_t fixupcontext[FIXUP_MAX];

        if (info->initrd_filename) {
+
+            if (info->initrd_start >= ram_end) {
+                error_report("not enough space after kernel to load initrd");
+                exit(1);
+            }
+
            initrd_size = load_ramdisk_as(info->initrd_filename,
                                          info->initrd_start,
-                                          info->ram_size - info->initrd_start,
-                                          as);
+                                          ram_end - info->initrd_start, as);
            if (initrd_size < 0) {
                initrd_size = load_image_targphys_as(info->initrd_filename,
                                                     info->initrd_start,
-                                                     info->ram_size -
+                                                     ram_end -
                                                     info->initrd_start,
                                                     as);
            }
@ -1077,6 +1109,11 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
                             info->initrd_filename);
                exit(1);
            }
+            if (info->initrd_start + initrd_size > info->ram_size) {
+                error_report("could not load initrd '%s': "
+                             "too big to fit into RAM after the kernel",
+                             info->initrd_filename);
+            }
        } else {
            initrd_size = 0;
        }
@ -1112,6 +1149,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
            /* Place the DTB after the initrd in memory with alignment. */
            info->dtb_start = QEMU_ALIGN_UP(info->initrd_start + initrd_size,
                                           align);
+            if (info->dtb_start >= ram_end) {
+                error_report("Not enough space for DTB after kernel/initrd");
+                exit(1);
+            }
            fixupcontext[FIXUP_ARGPTR_LO] = info->dtb_start;
            fixupcontext[FIXUP_ARGPTR_HI] = info->dtb_start >> 32;
        } else {
--- a/hw/arm/musca.c
+++ b/hw/arm/musca.c
@ -385,6 +385,14 @@ static void musca_init(MachineState *machine)
    qdev_prop_set_uint32(ssedev, "init-svtor", mmc->init_svtor);
    qdev_prop_set_uint32(ssedev, "SRAM_ADDR_WIDTH", mmc->sram_addr_width);
    qdev_prop_set_uint32(ssedev, "MAINCLK", SYSCLK_FRQ);
+    /*
+     * Musca-A takes the default SSE-200 FPU/DSP settings (ie no for
+     * CPU0 and yes for CPU1); Musca-B1 explicitly enables them for CPU0.
+     */
+    if (mmc->type == MUSCA_B1) {
+        qdev_prop_set_bit(ssedev, "CPU0_FPU", true);
+        qdev_prop_set_bit(ssedev, "CPU0_DSP", true);
+    }
    object_property_set_bool(OBJECT(&mms->sse), true, "realized",
                             &error_fatal);

--- a/hw/intc/arm_gicv3_dist.c
+++ b/hw/intc/arm_gicv3_dist.c
@ -378,8 +378,14 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
         * ITLinesNumber == (num external irqs / 32) - 1
         */
        int itlinesnumber = ((s->num_irq - GIC_INTERNAL) / 32) - 1;
+        /*
+         * SecurityExtn must be RAZ if GICD_CTLR.DS == 1, and
+         * "security extensions not supported" always implies DS == 1,
+         * so we only need to check the DS bit.
+         */
+        bool sec_extn = !(s->gicd_ctlr & GICD_CTLR_DS);

-        *data = (1 << 25) | (1 << 24) | (s->security_extn << 10) |
+        *data = (1 << 25) | (1 << 24) | (sec_extn << 10) |
            (0xf << 19) | itlinesnumber;
        return MEMTX_OK;
    }
@ -533,7 +539,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
        }
        return MEMTX_OK;
    }
-    case GICD_IDREGS ... GICD_IDREGS + 0x1f:
+    case GICD_IDREGS ... GICD_IDREGS + 0x2f:
        /* ID registers */
        *data = gicv3_idreg(offset - GICD_IDREGS);
        return MEMTX_OK;
@ -744,7 +750,7 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
        gicd_write_irouter(s, attrs, irq, r);
        return MEMTX_OK;
    }
-    case GICD_IDREGS ... GICD_IDREGS + 0x1f:
+    case GICD_IDREGS ... GICD_IDREGS + 0x2f:
    case GICD_TYPER:
    case GICD_IIDR:
        /* RO registers, ignore the write */
--- a/hw/intc/arm_gicv3_redist.c
+++ b/hw/intc/arm_gicv3_redist.c
@ -233,7 +233,7 @@ static MemTxResult gicr_readl(GICv3CPUState *cs, hwaddr offset,
        }
        *data = cs->gicr_nsacr;
        return MEMTX_OK;
-    case GICR_IDREGS ... GICR_IDREGS + 0x1f:
+    case GICR_IDREGS ... GICR_IDREGS + 0x2f:
        *data = gicv3_idreg(offset - GICR_IDREGS);
        return MEMTX_OK;
    default:
@ -363,7 +363,7 @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset,
        return MEMTX_OK;
    case GICR_IIDR:
    case GICR_TYPER:
-    case GICR_IDREGS ... GICR_IDREGS + 0x1f:
+    case GICR_IDREGS ... GICR_IDREGS + 0x2f:
        /* RO registers, ignore the write */
        qemu_log_mask(LOG_GUEST_ERROR,
                      "%s: invalid guest write to RO register at offset "
--- a/include/hw/arm/armsse.h
+++ b/include/hw/arm/armsse.h
@ -50,6 +50,11 @@
 *    address of each SRAM bank (and thus the total amount of internal SRAM)
 *  + QOM property "init-svtor" sets the initial value of the CPU SVTOR register
 *    (where it expects to load the PC and SP from the vector table on reset)
+ *  + QOM properties "CPU0_FPU", "CPU0_DSP", "CPU1_FPU" and "CPU1_DSP" which
+ *    set whether the CPUs have the FPU and DSP features present. The default
+ *    (matching the hardware) is that for CPU0 in an IoTKit and CPU1 in an
+ *    SSE-200 both are present; CPU0 in an SSE-200 has neither.
+ *    Since the IoTKit has only one CPU, it does not have the CPU1_* properties.
 *  + Named GPIO inputs "EXP_IRQ" 0..n are the expansion interrupts for CPU 0,
 *    which are wired to its NVIC lines 32 .. n+32
 *  + Named GPIO inputs "EXP_CPU1_IRQ" 0..n are the expansion interrupts for
@ -208,6 +213,8 @@ typedef struct ARMSSE {
    uint32_t mainclk_frq;
    uint32_t sram_addr_width;
    uint32_t init_svtor;
+    bool cpu_fpu[SSE_MAX_CPUS];
+    bool cpu_dsp[SSE_MAX_CPUS];
 } ARMSSE;

 typedef struct ARMSSEInfo ARMSSEInfo;
--- a/include/hw/arm/armv7m.h
+++ b/include/hw/arm/armv7m.h
@ -43,6 +43,8 @@ typedef struct {
 *   devices will be automatically layered on top of this view.)
 * + Property "idau": IDAU interface (forwarded to CPU object)
 * + Property "init-svtor": secure VTOR reset value (forwarded to CPU object)
+ * + Property "vfp": enable VFP (forwarded to CPU object)
+ * + Property "dsp": enable DSP (forwarded to CPU object)
 * + Property "enable-bitband": expose bitbanded IO
 */
 typedef struct ARMv7MState {
@ -66,6 +68,8 @@ typedef struct ARMv7MState {
    uint32_t init_svtor;
    bool enable_bitband;
    bool start_powered_off;
+    bool vfp;
+    bool dsp;
 } ARMv7MState;

 #endif
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@ -763,6 +763,15 @@ static Property arm_cpu_cfgend_property =
 static Property arm_cpu_has_pmu_property =
            DEFINE_PROP_BOOL("pmu", ARMCPU, has_pmu, true);

+static Property arm_cpu_has_vfp_property =
+            DEFINE_PROP_BOOL("vfp", ARMCPU, has_vfp, true);
+
+static Property arm_cpu_has_neon_property =
+            DEFINE_PROP_BOOL("neon", ARMCPU, has_neon, true);
+
+static Property arm_cpu_has_dsp_property =
+            DEFINE_PROP_BOOL("dsp", ARMCPU, has_dsp, true);
+
 static Property arm_cpu_has_mpu_property =
            DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true);

@ -803,6 +812,13 @@ void arm_cpu_post_init(Object *obj)
    if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
        set_feature(&cpu->env, ARM_FEATURE_PMSA);
    }
+    /* Similarly for the VFP feature bits */
+    if (arm_feature(&cpu->env, ARM_FEATURE_VFP4)) {
+        set_feature(&cpu->env, ARM_FEATURE_VFP3);
+    }
+    if (arm_feature(&cpu->env, ARM_FEATURE_VFP3)) {
+        set_feature(&cpu->env, ARM_FEATURE_VFP);
+    }

    if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) ||
        arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) {
@ -847,6 +863,33 @@ void arm_cpu_post_init(Object *obj)
                                 &error_abort);
    }

+    /*
+     * Allow user to turn off VFP and Neon support, but only for TCG --
+     * KVM does not currently allow us to lie to the guest about its
+     * ID/feature registers, so the guest always sees what the host has.
+     */
+    if (arm_feature(&cpu->env, ARM_FEATURE_VFP)) {
+        cpu->has_vfp = true;
+        if (!kvm_enabled()) {
+            qdev_property_add_static(DEVICE(obj), &arm_cpu_has_vfp_property,
+                                     &error_abort);
+        }
+    }
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_NEON)) {
+        cpu->has_neon = true;
+        if (!kvm_enabled()) {
+            qdev_property_add_static(DEVICE(obj), &arm_cpu_has_neon_property,
+                                     &error_abort);
+        }
+    }
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_M) &&
+        arm_feature(&cpu->env, ARM_FEATURE_THUMB_DSP)) {
+        qdev_property_add_static(DEVICE(obj), &arm_cpu_has_dsp_property,
+                                 &error_abort);
+    }
+
    if (arm_feature(&cpu->env, ARM_FEATURE_PMSA)) {
        qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property,
                                 &error_abort);
@ -956,6 +999,136 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
        return;
    }

+    if (arm_feature(env, ARM_FEATURE_AARCH64) &&
+        cpu->has_vfp != cpu->has_neon) {
+        /*
+         * This is an architectural requirement for AArch64; AArch32 is
+         * more flexible and permits VFP-no-Neon and Neon-no-VFP.
+         */
+        error_setg(errp,
+                   "AArch64 CPUs must have both VFP and Neon or neither");
+        return;
+    }
+
+    if (!cpu->has_vfp) {
+        uint64_t t;
+        uint32_t u;
+
+        unset_feature(env, ARM_FEATURE_VFP);
+        unset_feature(env, ARM_FEATURE_VFP3);
+        unset_feature(env, ARM_FEATURE_VFP4);
+
+        t = cpu->isar.id_aa64isar1;
+        t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0);
+        cpu->isar.id_aa64isar1 = t;
+
+        t = cpu->isar.id_aa64pfr0;
+        t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf);
+        cpu->isar.id_aa64pfr0 = t;
+
+        u = cpu->isar.id_isar6;
+        u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0);
+        cpu->isar.id_isar6 = u;
+
+        u = cpu->isar.mvfr0;
+        u = FIELD_DP32(u, MVFR0, FPSP, 0);
+        u = FIELD_DP32(u, MVFR0, FPDP, 0);
+        u = FIELD_DP32(u, MVFR0, FPTRAP, 0);
+        u = FIELD_DP32(u, MVFR0, FPDIVIDE, 0);
+        u = FIELD_DP32(u, MVFR0, FPSQRT, 0);
+        u = FIELD_DP32(u, MVFR0, FPSHVEC, 0);
+        u = FIELD_DP32(u, MVFR0, FPROUND, 0);
+        cpu->isar.mvfr0 = u;
+
+        u = cpu->isar.mvfr1;
+        u = FIELD_DP32(u, MVFR1, FPFTZ, 0);
+        u = FIELD_DP32(u, MVFR1, FPDNAN, 0);
+        u = FIELD_DP32(u, MVFR1, FPHP, 0);
+        cpu->isar.mvfr1 = u;
+
+        u = cpu->isar.mvfr2;
+        u = FIELD_DP32(u, MVFR2, FPMISC, 0);
+        cpu->isar.mvfr2 = u;
+    }
+
+    if (!cpu->has_neon) {
+        uint64_t t;
+        uint32_t u;
+
+        unset_feature(env, ARM_FEATURE_NEON);
+
+        t = cpu->isar.id_aa64isar0;
+        t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0);
+        cpu->isar.id_aa64isar0 = t;
+
+        t = cpu->isar.id_aa64isar1;
+        t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0);
+        cpu->isar.id_aa64isar1 = t;
+
+        t = cpu->isar.id_aa64pfr0;
+        t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf);
+        cpu->isar.id_aa64pfr0 = t;
+
+        u = cpu->isar.id_isar5;
+        u = FIELD_DP32(u, ID_ISAR5, RDM, 0);
+        u = FIELD_DP32(u, ID_ISAR5, VCMA, 0);
+        cpu->isar.id_isar5 = u;
+
+        u = cpu->isar.id_isar6;
+        u = FIELD_DP32(u, ID_ISAR6, DP, 0);
+        u = FIELD_DP32(u, ID_ISAR6, FHM, 0);
+        cpu->isar.id_isar6 = u;
+
+        u = cpu->isar.mvfr1;
+        u = FIELD_DP32(u, MVFR1, SIMDLS, 0);
+        u = FIELD_DP32(u, MVFR1, SIMDINT, 0);
+        u = FIELD_DP32(u, MVFR1, SIMDSP, 0);
+        u = FIELD_DP32(u, MVFR1, SIMDHP, 0);
+        u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);
+        cpu->isar.mvfr1 = u;
+
+        u = cpu->isar.mvfr2;
+        u = FIELD_DP32(u, MVFR2, SIMDMISC, 0);
+        cpu->isar.mvfr2 = u;
+    }
+
+    if (!cpu->has_neon && !cpu->has_vfp) {
+        uint64_t t;
+        uint32_t u;
+
+        t = cpu->isar.id_aa64isar0;
+        t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0);
+        cpu->isar.id_aa64isar0 = t;
+
+        t = cpu->isar.id_aa64isar1;
+        t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0);
+        cpu->isar.id_aa64isar1 = t;
+
+        u = cpu->isar.mvfr0;
+        u = FIELD_DP32(u, MVFR0, SIMDREG, 0);
+        cpu->isar.mvfr0 = u;
+    }
+
+    if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) {
+        uint32_t u;
+
+        unset_feature(env, ARM_FEATURE_THUMB_DSP);
+
+        u = cpu->isar.id_isar1;
+        u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1);
+        cpu->isar.id_isar1 = u;
+
+        u = cpu->isar.id_isar2;
+        u = FIELD_DP32(u, ID_ISAR2, MULTU, 1);
+        u = FIELD_DP32(u, ID_ISAR2, MULTS, 1);
+        cpu->isar.id_isar2 = u;
+
+        u = cpu->isar.id_isar3;
+        u = FIELD_DP32(u, ID_ISAR3, SIMD, 1);
+        u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0);
+        cpu->isar.id_isar3 = u;
+    }
+
    /* Some features automatically imply others: */
    if (arm_feature(env, ARM_FEATURE_V8)) {
        if (arm_feature(env, ARM_FEATURE_M)) {
@ -1016,12 +1189,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
    if (arm_feature(env, ARM_FEATURE_V5)) {
        set_feature(env, ARM_FEATURE_V4T);
    }
-    if (arm_feature(env, ARM_FEATURE_VFP4)) {
-        set_feature(env, ARM_FEATURE_VFP3);
-    }
-    if (arm_feature(env, ARM_FEATURE_VFP3)) {
-        set_feature(env, ARM_FEATURE_VFP);
-    }
    if (arm_feature(env, ARM_FEATURE_LPAE)) {
        set_feature(env, ARM_FEATURE_V7MP);
        set_feature(env, ARM_FEATURE_PXN);
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@ -786,6 +786,12 @@ struct ARMCPU {
    bool has_el3;
    /* CPU has PMU (Performance Monitor Unit) */
    bool has_pmu;
+    /* CPU has VFP */
+    bool has_vfp;
+    /* CPU has Neon */
+    bool has_neon;
+    /* CPU has M-profile DSP extension */
+    bool has_dsp;

    /* CPU has memory protection unit */
    bool has_mpu;
@ -3382,6 +3388,12 @@ static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id)
    return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0;
 }

+static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id)
+{
+    /* Return true if CPU supports double precision floating point */
+    return FIELD_EX64(id->mvfr0, MVFR0, FPDP) > 0;
+}
+
 /*
 * We always set the FP and SIMD FP16 fields to indicate identical
 * levels of support (assuming SIMD is implemented at all), so
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@ -6380,38 +6380,6 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
    }
 }

-/* The imm8 encodes the sign bit, enough bits to represent an exponent in
- * the range 01....1xx to 10....0xx, and the most significant 4 bits of
- * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
- */
-uint64_t vfp_expand_imm(int size, uint8_t imm8)
-{
-    uint64_t imm;
-
-    switch (size) {
-    case MO_64:
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
-            extract32(imm8, 0, 6);
-        imm <<= 48;
-        break;
-    case MO_32:
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
-            (extract32(imm8, 0, 6) << 3);
-        imm <<= 16;
-        break;
-    case MO_16:
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
-            (extract32(imm8, 0, 6) << 6);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    return imm;
-}
-
 /* Floating point immediate
 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
 * +---+---+---+-----------+------+---+------------+-------+------+------+
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@ -39,7 +39,6 @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
 TCGv_ptr get_fpstatus_ptr(bool);
 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
                            unsigned int imms, unsigned int immr);
-uint64_t vfp_expand_imm(int size, uint8_t imm8);
 bool sve_access_check(DisasContext *s);

 /* We should have at some point before trying to access an FP register
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@ -30,6 +30,39 @@
 #include "decode-vfp.inc.c"
 #include "decode-vfp-uncond.inc.c"

+/*
+ * The imm8 encodes the sign bit, enough bits to represent an exponent in
+ * the range 01....1xx to 10....0xx, and the most significant 4 bits of
+ * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
+ */
+uint64_t vfp_expand_imm(int size, uint8_t imm8)
+{
+    uint64_t imm;
+
+    switch (size) {
+    case MO_64:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
+            extract32(imm8, 0, 6);
+        imm <<= 48;
+        break;
+    case MO_32:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
+            (extract32(imm8, 0, 6) << 3);
+        imm <<= 16;
+        break;
+    case MO_16:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
+            (extract32(imm8, 0, 6) << 6);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return imm;
+}
+
 /*
 * Return the offset of a 16-bit half of the specified VFP single-precision
 * register. If top is true, returns the top 16 bits; otherwise the bottom
@ -173,6 +206,11 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
        ((a->vm | a->vn | a->vd) & 0x10)) {
        return false;
    }
+
+    if (dp && !dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    rd = a->vd;
    rn = a->vn;
    rm = a->vm;
@ -301,6 +339,11 @@ static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
        ((a->vm | a->vn | a->vd) & 0x10)) {
        return false;
    }
+
+    if (dp && !dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    rd = a->vd;
    rn = a->vn;
    rm = a->vm;
@ -382,6 +425,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
        ((a->vm | a->vd) & 0x10)) {
        return false;
    }
+
+    if (dp && !dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    rd = a->vd;
    rm = a->vm;

@ -440,6 +488,11 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
    if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
        return false;
    }
+
+    if (dp && !dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    rd = a->vd;
    rm = a->vm;

@ -835,7 +888,7 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
    return true;
 }

-static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_sp *a)
+static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
 {
    TCGv_i32 tmp;

@ -910,7 +963,7 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
    return true;
 }

-static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
+static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
 {
    uint32_t offset;
    TCGv_i32 addr;
@ -1268,6 +1321,10 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!dc_isar_feature(aa32_fpshvec, s) &&
        (veclen != 0 || s->vec_stride != 0)) {
        return false;
@ -1413,6 +1470,10 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!dc_isar_feature(aa32_fpshvec, s) &&
        (veclen != 0 || s->vec_stride != 0)) {
        return false;
@ -1500,7 +1561,7 @@ static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
    tcg_temp_free_i64(tmp);
 }

-static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_sp *a)
+static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
 {
    return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
 }
@ -1538,7 +1599,7 @@ static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
    tcg_temp_free_i64(tmp);
 }

-static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_sp *a)
+static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
 {
    return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
 }
@ -1580,7 +1641,7 @@ static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
    tcg_temp_free_i64(tmp);
 }

-static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_sp *a)
+static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
 {
    return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
 }
@ -1614,7 +1675,7 @@ static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
    tcg_temp_free_i64(tmp);
 }

-static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_sp *a)
+static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
 {
    return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
 }
@ -1624,7 +1685,7 @@ static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
    return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
 }

-static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_sp *a)
+static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
 {
    return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
 }
@ -1648,7 +1709,7 @@ static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
    gen_helper_vfp_negd(vd, vd);
 }

-static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_sp *a)
+static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
 {
    return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
 }
@ -1658,7 +1719,7 @@ static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
    return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
 }

-static bool trans_VADD_dp(DisasContext *s, arg_VADD_sp *a)
+static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
 {
    return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
 }
@ -1668,7 +1729,7 @@ static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
    return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
 }

-static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_sp *a)
+static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
 {
    return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
 }
@ -1678,7 +1739,7 @@ static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
    return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
 }

-static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
+static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
 {
    return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
 }
@ -1710,6 +1771,10 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -1741,7 +1806,7 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
    return true;
 }

-static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
+static bool trans_VFM_dp(DisasContext *s, arg_VFM_dp *a)
 {
    /*
     * VFNMA : fd = muladd(-fd,  fn, fm)
@ -1809,7 +1874,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
    uint32_t delta_d = 0;
    int veclen = s->vec_len;
    TCGv_i32 fd;
-    uint32_t n, i, vd;
+    uint32_t vd;

    vd = a->vd;

@ -1836,17 +1901,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
        }
    }

-    n = (a->imm4h << 28) & 0x80000000;
-    i = ((a->imm4h << 4) & 0x70) | a->imm4l;
-    if (i & 0x40) {
-        i |= 0x780;
-    } else {
-        i |= 0x800;
-    }
-    n |= i << 19;
-
-    fd = tcg_temp_new_i32();
-    tcg_gen_movi_i32(fd, n);
+    fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));

    for (;;) {
        neon_store_reg32(fd, vd);
@ -1869,7 +1924,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
    uint32_t delta_d = 0;
    int veclen = s->vec_len;
    TCGv_i64 fd;
-    uint32_t n, i, vd;
+    uint32_t vd;

    vd = a->vd;

@ -1878,6 +1933,10 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!dc_isar_feature(aa32_fpshvec, s) &&
        (veclen != 0 || s->vec_stride != 0)) {
        return false;
@ -1901,17 +1960,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
        }
    }

-    n = (a->imm4h << 28) & 0x80000000;
-    i = ((a->imm4h << 4) & 0x70) | a->imm4l;
-    if (i & 0x40) {
-        i |= 0x3f80;
-    } else {
-        i |= 0x4000;
-    }
-    n |= i << 16;
-
-    fd = tcg_temp_new_i64();
-    tcg_gen_movi_i64(fd, ((uint64_t)n) << 32);
+    fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));

    for (;;) {
        neon_store_reg64(fd, vd);
@ -2028,6 +2077,10 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2097,6 +2150,10 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2159,6 +2216,10 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2201,7 +2262,7 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
    return true;
 }

-static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_sp *a)
+static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
 {
    TCGv_ptr fpst;
    TCGv_i64 tmp;
@ -2215,6 +2276,10 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_sp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2257,7 +2322,7 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
    return true;
 }

-static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_sp *a)
+static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
 {
    TCGv_ptr fpst;
    TCGv_i64 tmp;
@ -2272,6 +2337,10 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_sp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2327,6 +2396,10 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2351,6 +2424,10 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2375,6 +2452,10 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2425,6 +2506,10 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2461,6 +2546,10 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2550,6 +2639,10 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
@ -2642,6 +2735,10 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
        return false;
    }

+    if (!dc_isar_feature(aa32_fpdp, s)) {
+        return false;
+    }
+
    if (!vfp_access_check(s)) {
        return true;
    }
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@ -67,10 +67,6 @@ TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
 TCGv_i64 cpu_exclusive_addr;
 TCGv_i64 cpu_exclusive_val;

-/* FIXME:  These should be removed.  */
-static TCGv_i32 cpu_F0s, cpu_F1s;
-static TCGv_i64 cpu_F0d, cpu_F1d;
-
 #include "exec/gen-icount.h"

 static const char * const regnames[] =
@ -80,6 +76,8 @@ static const char * const regnames[] =
 /* Function prototypes for gen_ functions calling Neon helpers.  */
 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
                                 TCGv_i32, TCGv_i32);
+/* Function prototypes for gen_ functions for fix point conversions */
+typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);

 /* initialize TCG globals.  */
 void arm_translate_init(void)
@ -1374,75 +1372,6 @@ static TCGv_ptr get_fpstatus_ptr(int neon)
    return statusptr;
 }

-static inline void gen_vfp_abs(int dp)
-{
-    if (dp)
-        gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
-    else
-        gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
-}
-
-static inline void gen_vfp_neg(int dp)
-{
-    if (dp)
-        gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
-    else
-        gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
-}
-
-#define VFP_GEN_ITOF(name) \
-static inline void gen_vfp_##name(int dp, int neon) \
-{ \
-    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
-    if (dp) { \
-        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
-    } else { \
-        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
-    } \
-    tcg_temp_free_ptr(statusptr); \
-}
-
-VFP_GEN_ITOF(uito)
-VFP_GEN_ITOF(sito)
-#undef VFP_GEN_ITOF
-
-#define VFP_GEN_FTOI(name) \
-static inline void gen_vfp_##name(int dp, int neon) \
-{ \
-    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
-    if (dp) { \
-        gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
-    } else { \
-        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
-    } \
-    tcg_temp_free_ptr(statusptr); \
-}
-
-VFP_GEN_FTOI(touiz)
-VFP_GEN_FTOI(tosiz)
-#undef VFP_GEN_FTOI
-
-#define VFP_GEN_FIX(name, round) \
-static inline void gen_vfp_##name(int dp, int shift, int neon) \
-{ \
-    TCGv_i32 tmp_shift = tcg_const_i32(shift); \
-    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
-    if (dp) { \
-        gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
-                                        statusptr); \
-    } else { \
-        gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
-                                        statusptr); \
-    } \
-    tcg_temp_free_i32(tmp_shift); \
-    tcg_temp_free_ptr(statusptr); \
-}
-VFP_GEN_FIX(tosl, _round_to_zero)
-VFP_GEN_FIX(toul, _round_to_zero)
-VFP_GEN_FIX(slto, )
-VFP_GEN_FIX(ulto, )
-#undef VFP_GEN_FIX
-
 static inline long vfp_reg_offset(bool dp, unsigned reg)
 {
    if (dp) {
@ -1609,9 +1538,6 @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
    return ret;
 }

-#define tcg_gen_ld_f32 tcg_gen_ld_i32
-#define tcg_gen_st_f32 tcg_gen_st_i32
-
 #define ARM_CP_RW_BIT   (1 << 20)

 /* Include the VFP decoder */
@ -4189,16 +4115,6 @@ static const uint8_t neon_3r_sizes[] = {
 #define NEON_2RM_VCVT_SF 62
 #define NEON_2RM_VCVT_UF 63

-static int neon_2rm_is_float_op(int op)
-{
-    /* Return true if this neon 2reg-misc op is float-to-float */
-    return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
-            (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
-            op == NEON_2RM_VRINTM ||
-            (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
-            op >= NEON_2RM_VRECPE_F);
-}
-
 static bool neon_2rm_is_v8_op(int op)
 {
    /* Return true if this neon 2reg-misc op is ARMv8 and up */
@ -5779,28 +5695,41 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                }
            } else if (op >= 14) {
                /* VCVT fixed-point.  */
+                TCGv_ptr fpst;
+                TCGv_i32 shiftv;
+                VFPGenFixPointFn *fn;
+
                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
                    return 1;
                }
+
+                if (!(op & 1)) {
+                    if (u) {
+                        fn = gen_helper_vfp_ultos;
+                    } else {
+                        fn = gen_helper_vfp_sltos;
+                    }
+                } else {
+                    if (u) {
+                        fn = gen_helper_vfp_touls_round_to_zero;
+                    } else {
+                        fn = gen_helper_vfp_tosls_round_to_zero;
+                    }
+                }
+
                /* We have already masked out the must-be-1 top bit of imm6,
                 * hence this 32-shift where the ARM ARM has 64-imm6.
                 */
                shift = 32 - shift;
+                fpst = get_fpstatus_ptr(1);
+                shiftv = tcg_const_i32(shift);
                for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
-                    if (!(op & 1)) {
-                        if (u)
-                            gen_vfp_ulto(0, shift, 1);
-                        else
-                            gen_vfp_slto(0, shift, 1);
-                    } else {
-                        if (u)
-                            gen_vfp_toul(0, shift, 1);
-                        else
-                            gen_vfp_tosl(0, shift, 1);
-                    }
-                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
+                    TCGv_i32 tmpf = neon_load_reg(rm, pass);
+                    fn(tmpf, tmpf, shiftv, fpst);
+                    neon_store_reg(rd, pass, tmpf);
                }
+                tcg_temp_free_ptr(fpst);
+                tcg_temp_free_i32(shiftv);
            } else {
                return 1;
            }
@ -6489,25 +6418,23 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                        q || (rm & 1)) {
                        return 1;
                    }
-                    tmp = tcg_temp_new_i32();
-                    tmp2 = tcg_temp_new_i32();
                    fpst = get_fpstatus_ptr(true);
                    ahp = get_ahp_flag();
-                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
-                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
+                    tmp = neon_load_reg(rm, 0);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+                    tmp2 = neon_load_reg(rm, 1);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
                    tcg_gen_shli_i32(tmp2, tmp2, 16);
                    tcg_gen_or_i32(tmp2, tmp2, tmp);
-                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
-                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
+                    tcg_temp_free_i32(tmp);
+                    tmp = neon_load_reg(rm, 2);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+                    tmp3 = neon_load_reg(rm, 3);
                    neon_store_reg(rd, 0, tmp2);
-                    tmp2 = tcg_temp_new_i32();
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
-                    tcg_gen_shli_i32(tmp2, tmp2, 16);
-                    tcg_gen_or_i32(tmp2, tmp2, tmp);
-                    neon_store_reg(rd, 1, tmp2);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
+                    tcg_gen_shli_i32(tmp3, tmp3, 16);
+                    tcg_gen_or_i32(tmp3, tmp3, tmp);
+                    neon_store_reg(rd, 1, tmp3);
                    tcg_temp_free_i32(tmp);
                    tcg_temp_free_i32(ahp);
                    tcg_temp_free_ptr(fpst);
@ -6527,20 +6454,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                    tmp = neon_load_reg(rm, 0);
                    tmp2 = neon_load_reg(rm, 1);
                    tcg_gen_ext16u_i32(tmp3, tmp);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
-                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
-                    tcg_gen_shri_i32(tmp3, tmp, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
-                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
-                    tcg_temp_free_i32(tmp);
+                    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+                    neon_store_reg(rd, 0, tmp3);
+                    tcg_gen_shri_i32(tmp, tmp, 16);
+                    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
+                    neon_store_reg(rd, 1, tmp);
+                    tmp3 = tcg_temp_new_i32();
                    tcg_gen_ext16u_i32(tmp3, tmp2);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
-                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
-                    tcg_gen_shri_i32(tmp3, tmp2, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
-                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
-                    tcg_temp_free_i32(tmp2);
-                    tcg_temp_free_i32(tmp3);
+                    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+                    neon_store_reg(rd, 2, tmp3);
+                    tcg_gen_shri_i32(tmp2, tmp2, 16);
+                    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
+                    neon_store_reg(rd, 3, tmp2);
                    tcg_temp_free_i32(ahp);
                    tcg_temp_free_ptr(fpst);
                    break;
@ -6614,13 +6539,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                default:
                elementwise:
                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                        if (neon_2rm_is_float_op(op)) {
-                            tcg_gen_ld_f32(cpu_F0s, cpu_env,
-                                           neon_reg_offset(rm, pass));
-                            tmp = NULL;
-                        } else {
-                            tmp = neon_load_reg(rm, pass);
-                        }
+                        tmp = neon_load_reg(rm, pass);
                        switch (op) {
                        case NEON_2RM_VREV32:
                            switch (size) {
@ -6761,10 +6680,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                            break;
                        }
                        case NEON_2RM_VABS_F:
-                            gen_vfp_abs(0);
+                            gen_helper_vfp_abss(tmp, tmp);
                            break;
                        case NEON_2RM_VNEG_F:
-                            gen_vfp_neg(0);
+                            gen_helper_vfp_negs(tmp, tmp);
                            break;
                        case NEON_2RM_VSWP:
                            tmp2 = neon_load_reg(rd, pass);
@ -6798,7 +6717,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
                                                      cpu_env);
-                            gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
+                            gen_helper_rints(tmp, tmp, fpstatus);
                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
                                                      cpu_env);
                            tcg_temp_free_ptr(fpstatus);
@ -6808,7 +6727,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                        case NEON_2RM_VRINTX:
                        {
                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
+                            gen_helper_rints_exact(tmp, tmp, fpstatus);
                            tcg_temp_free_ptr(fpstatus);
                            break;
                        }
@ -6832,10 +6751,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                                                      cpu_env);

                            if (is_signed) {
-                                gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
+                                gen_helper_vfp_tosls(tmp, tmp,
                                                     tcg_shift, fpst);
                            } else {
-                                gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
+                                gen_helper_vfp_touls(tmp, tmp,
                                                     tcg_shift, fpst);
                            }

@ -6863,41 +6782,52 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                        case NEON_2RM_VRECPE_F:
                        {
                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
+                            gen_helper_recpe_f32(tmp, tmp, fpstatus);
                            tcg_temp_free_ptr(fpstatus);
                            break;
                        }
                        case NEON_2RM_VRSQRTE_F:
                        {
                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
+                            gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
                            tcg_temp_free_ptr(fpstatus);
                            break;
                        }
                        case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
-                            gen_vfp_sito(0, 1);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_vfp_sitos(tmp, tmp, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                            break;
+                        }
                        case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
-                            gen_vfp_uito(0, 1);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_vfp_uitos(tmp, tmp, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                            break;
+                        }
                        case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
-                            gen_vfp_tosiz(0, 1);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                            break;
+                        }
                        case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
-                            gen_vfp_touiz(0, 1);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_vfp_touizs(tmp, tmp, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                            break;
+                        }
                        default:
                            /* Reserved op values were caught by the
                             * neon_2rm_sizes[] check earlier.
                             */
                            abort();
                        }
-                        if (neon_2rm_is_float_op(op)) {
-                            tcg_gen_st_f32(cpu_F0s, cpu_env,
-                                           neon_reg_offset(rd, pass));
-                        } else {
-                            neon_store_reg(rd, pass, tmp);
-                        }
+                        neon_store_reg(rd, pass, tmp);
                    }
                    break;
                }
@ -11977,12 +11907,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
        dc->base.max_insns = MIN(dc->base.max_insns, bound);
    }

-    cpu_F0s = tcg_temp_new_i32();
-    cpu_F1s = tcg_temp_new_i32();
-    cpu_F0d = tcg_temp_new_i64();
-    cpu_F1d = tcg_temp_new_i64();
-    cpu_V0 = cpu_F0d;
-    cpu_V1 = cpu_F1d;
+    cpu_V0 = tcg_temp_new_i64();
+    cpu_V1 = tcg_temp_new_i64();
    /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
    cpu_M0 = tcg_temp_new_i64();
 }
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@ -237,6 +237,13 @@ static inline void gen_ss_advance(DisasContext *s)
    }
 }

+/*
+ * Given a VFP floating point constant encoded into an 8 bit immediate in an
+ * instruction, expand it to the actual constant value of the specified
+ * size, as per the VFPExpandImm() pseudocode in the Arm ARM.
+ */
+uint64_t vfp_expand_imm(int size, uint8_t imm8);
+
 /* Vector operations shared between ARM and AArch64.  */
 extern const GVecGen3 mla_op[4];
 extern const GVecGen3 mls_op[4];
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@ -44,6 +44,8 @@
 %vmov_idx_b     21:1 5:2
 %vmov_idx_h     21:1 6:1

+%vmov_imm 16:4 0:4
+
 # VMOV scalar to general-purpose register; note that this does
 # include some Neon cases.
 VMOV_to_gp   ---- 1110 u:1 1.        1 .... rt:4 1011 ... 1 0000 \
@ -152,10 +154,10 @@ VFM_sp       ---- 1110 1.10 .... .... 1010 . o2:1 . 0 .... \
 VFM_dp       ---- 1110 1.10 .... .... 1011 . o2:1 . 0 .... \
             vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=2

-VMOV_imm_sp  ---- 1110 1.11 imm4h:4 .... 1010 0000 imm4l:4 \
-             vd=%vd_sp
-VMOV_imm_dp  ---- 1110 1.11 imm4h:4 .... 1011 0000 imm4l:4 \
-             vd=%vd_dp
+VMOV_imm_sp  ---- 1110 1.11 .... .... 1010 0000 .... \
+             vd=%vd_sp imm=%vmov_imm
+VMOV_imm_dp  ---- 1110 1.11 .... .... 1011 0000 .... \
+             vd=%vd_dp imm=%vmov_imm

 VMOV_reg_sp  ---- 1110 1.11 0000 .... 1010 01.0 .... \
             vd=%vd_sp vm=%vm_sp