target-arm queue:

* Start of conversion of Neon insns to decodetree
  * versal board: support SD and RTC
  * Implement ARMv8.2-TTS2UXN
  * Make VQDMULL undefined when U=1
  * Some minor code cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAl6wC0gZHHBldGVyLm1h
 eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3givD/98aFRntNZuoJ6T7ankH+yO
 x0r6huJNYtKYAz+rFGT9NGeLcOT2bMwVmwJbTyKWBtfGii7Qiif4QrVihx6uBMAu
 KwoJU+rMeDLLWY8MiPpN+ifnc2QDa9Y1/+B6YiLTRYUEerAcmaWk1OrYPGPJZd/w
 zoDCVouIlU3kUQHRPbeNGq4SF82nP9uGnPTyZvaabZPL6QblhrN7WxRjTM0PPTmg
 RXHlzDmkegPOPjQ9JDMbLMlNAoMFnubNAErYMqz0oVhPEXoLCTekX1uxRZu+sxHB
 sDduIuDGl+UfOFtTeBWRrrhF2roBb9JiGgsTIP/7Iz8PcgXOLroPHlb9uShl3xnz
 o15dkebbWz1bTOtGZRQ6X0qA6pPbuzrggA4d5qmw0JrDh7xwzOlxsjTcQ+XeOcnX
 QndQSPbHlfWWdbJOoXl+EtLrRVOyjt8dvNNnJemSk9Kp4LClU9fpmezS1MinFOVD
 mzO0gvcJ5IJdCIkD+G60OqRqmW8i0SkTrL6KoFFKX1lvTU2pD4TchBPYVscAIuwI
 m8pub2vKoNUEM/0P5ZbBznbpN9whhrmYDPnsEAlYIkHxw6lCIaGOz5jmXvoczrZ5
 vohllV/PrK3kO8eGBYztKYInOo9vPAIzJZ43dr4GC/r8+cyNulHa6uZFzruqcC96
 qgSF0KsMzhPdzlMtTL2DtQ==
 =4We8
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200504' into staging

target-arm queue:
 * Start of conversion of Neon insns to decodetree
 * versal board: support SD and RTC
 * Implement ARMv8.2-TTS2UXN
 * Make VQDMULL undefined when U=1
 * Some minor code cleanups

# gpg: Signature made Mon 04 May 2020 13:32:08 BST
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20200504: (39 commits)
  target/arm: Move gen_ function typedefs to translate.h
  target/arm: Convert Neon 3-reg-same VMUL, VMLA, VMLS, VSHL to decodetree
  target/arm: Convert Neon 3-reg-same VQADD/VQSUB to decodetree
  target/arm: Convert Neon 3-reg-same comparisons to decodetree
  target/arm: Convert Neon 3-reg-same VMAX/VMIN to decodetree
  target/arm: Convert Neon 3-reg-same logic ops to decodetree
  target/arm: Convert Neon 3-reg-same VADD/VSUB to decodetree
  target/arm: Convert Neon 'load/store single structure' to decodetree
  target/arm: Convert Neon 'load single structure to all lanes' to decodetree
  target/arm: Convert Neon load/store multiple structures to decodetree
  target/arm: Convert VFM[AS]L (scalar) to decodetree
  target/arm: Convert V[US]DOT (scalar) to decodetree
  target/arm: Convert VCMLA (scalar) to decodetree
  target/arm: Convert VFM[AS]L (vector) to decodetree
  target/arm: Convert V[US]DOT (vector) to decodetree
  target/arm: Convert VCADD (vector) to decodetree
  target/arm: Convert VCMLA (vector) to decodetree
  target/arm: Add stubs for AArch32 Neon decodetree
  target/arm: Don't allow Thumb Neon insns without FEATURE_NEON
  target/arm/translate-vfp.inc.c: Remove duplicate simd_r32 check
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-05-04 13:37:17 +01:00
commit 9af638cc1f
19 changed files with 1302 additions and 864 deletions

View file

@ -395,7 +395,7 @@ static void mps2tz_common_init(MachineState *machine)
exit(EXIT_FAILURE);
}
sysbus_init_child_obj(OBJECT(machine), "iotkit", &mms->iotkit,
sysbus_init_child_obj(OBJECT(machine), TYPE_IOTKIT, &mms->iotkit,
sizeof(mms->iotkit), mmc->armsse_type);
iotkitdev = DEVICE(&mms->iotkit);
object_property_set_link(OBJECT(&mms->iotkit), OBJECT(system_memory),

View file

@ -20,6 +20,7 @@
#include "hw/arm/sysbus-fdt.h"
#include "hw/arm/fdt.h"
#include "cpu.h"
#include "hw/qdev-properties.h"
#include "hw/arm/xlnx-versal.h"
#define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt")
@ -256,6 +257,53 @@ static void fdt_add_zdma_nodes(VersalVirt *s)
}
}
static void fdt_add_sd_nodes(VersalVirt *s)
{
const char clocknames[] = "clk_xin\0clk_ahb";
const char compat[] = "arasan,sdhci-8.9a";
int i;
for (i = ARRAY_SIZE(s->soc.pmc.iou.sd) - 1; i >= 0; i--) {
uint64_t addr = MM_PMC_SD0 + MM_PMC_SD0_SIZE * i;
char *name = g_strdup_printf("/sdhci@%" PRIx64, addr);
qemu_fdt_add_subnode(s->fdt, name);
qemu_fdt_setprop_cells(s->fdt, name, "clocks",
s->phandle.clk_25Mhz, s->phandle.clk_25Mhz);
qemu_fdt_setprop(s->fdt, name, "clock-names",
clocknames, sizeof(clocknames));
qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, VERSAL_SD0_IRQ_0 + i * 2,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
2, addr, 2, MM_PMC_SD0_SIZE);
qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
g_free(name);
}
}
static void fdt_add_rtc_node(VersalVirt *s)
{
const char compat[] = "xlnx,zynqmp-rtc";
const char interrupt_names[] = "alarm\0sec";
char *name = g_strdup_printf("/rtc@%x", MM_PMC_RTC);
qemu_fdt_add_subnode(s->fdt, name);
qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_ALARM_IRQ,
GIC_FDT_IRQ_FLAGS_LEVEL_HI,
GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_SECONDS_IRQ,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
qemu_fdt_setprop(s->fdt, name, "interrupt-names",
interrupt_names, sizeof(interrupt_names));
qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
2, MM_PMC_RTC, 2, MM_PMC_RTC_SIZE);
qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
g_free(name);
}
static void fdt_nop_memory_nodes(void *fdt, Error **errp)
{
Error *err = NULL;
@ -411,10 +459,23 @@ static void create_virtio_regions(VersalVirt *s)
}
}
static void sd_plugin_card(SDHCIState *sd, DriveInfo *di)
{
BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL;
DeviceState *card;
card = qdev_create(qdev_get_child_bus(DEVICE(sd), "sd-bus"), TYPE_SD_CARD);
object_property_add_child(OBJECT(sd), "card[*]", OBJECT(card),
&error_fatal);
qdev_prop_set_drive(card, "drive", blk, &error_fatal);
object_property_set_bool(OBJECT(card), true, "realized", &error_fatal);
}
static void versal_virt_init(MachineState *machine)
{
VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(machine);
int psci_conduit = QEMU_PSCI_CONDUIT_DISABLED;
int i;
/*
* If the user provides an Operating System to be loaded, we expect them
@ -440,7 +501,7 @@ static void versal_virt_init(MachineState *machine)
psci_conduit = QEMU_PSCI_CONDUIT_SMC;
}
sysbus_init_child_obj(OBJECT(machine), "xlnx-ve", &s->soc,
sysbus_init_child_obj(OBJECT(machine), "xlnx-versal", &s->soc,
sizeof(s->soc), TYPE_XLNX_VERSAL);
object_property_set_link(OBJECT(&s->soc), OBJECT(machine->ram),
"ddr", &error_abort);
@ -455,6 +516,8 @@ static void versal_virt_init(MachineState *machine)
fdt_add_gic_nodes(s);
fdt_add_timer_nodes(s);
fdt_add_zdma_nodes(s);
fdt_add_sd_nodes(s);
fdt_add_rtc_node(s);
fdt_add_cpu_nodes(s, psci_conduit);
fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz);
fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz);
@ -464,14 +527,19 @@ static void versal_virt_init(MachineState *machine)
memory_region_add_subregion_overlap(get_system_memory(),
0, &s->soc.fpd.apu.mr, 0);
/* Plugin SD cards. */
for (i = 0; i < ARRAY_SIZE(s->soc.pmc.iou.sd); i++) {
sd_plugin_card(&s->soc.pmc.iou.sd[i], drive_get_next(IF_SD));
}
s->binfo.ram_size = machine->ram_size;
s->binfo.loader_start = 0x0;
s->binfo.get_dtb = versal_virt_get_dtb;
s->binfo.modify_dtb = versal_virt_modify_dtb;
if (machine->kernel_filename) {
arm_load_kernel(s->soc.fpd.apu.cpu[0], machine, &s->binfo);
arm_load_kernel(&s->soc.fpd.apu.cpu[0], machine, &s->binfo);
} else {
AddressSpace *as = arm_boot_address_space(s->soc.fpd.apu.cpu[0],
AddressSpace *as = arm_boot_address_space(&s->soc.fpd.apu.cpu[0],
&s->binfo);
/* Some boot-loaders (e.g u-boot) don't like blobs at address 0 (NULL).
* Offset things by 4K. */

View file

@ -20,9 +20,7 @@
#include "hw/arm/boot.h"
#include "kvm_arm.h"
#include "hw/misc/unimp.h"
#include "hw/intc/arm_gicv3_common.h"
#include "hw/arm/xlnx-versal.h"
#include "hw/char/pl011.h"
#define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72")
#define GEM_REVISION 0x40070106
@ -33,23 +31,15 @@ static void versal_create_apu_cpus(Versal *s)
for (i = 0; i < ARRAY_SIZE(s->fpd.apu.cpu); i++) {
Object *obj;
char *name;
obj = object_new(XLNX_VERSAL_ACPU_TYPE);
if (!obj) {
/* Secondary CPUs start in PSCI powered-down state */
error_report("Unable to create apu.cpu[%d] of type %s",
i, XLNX_VERSAL_ACPU_TYPE);
exit(EXIT_FAILURE);
}
name = g_strdup_printf("apu-cpu[%d]", i);
object_property_add_child(OBJECT(s), name, obj, &error_fatal);
g_free(name);
object_initialize_child(OBJECT(s), "apu-cpu[*]",
&s->fpd.apu.cpu[i], sizeof(s->fpd.apu.cpu[i]),
XLNX_VERSAL_ACPU_TYPE, &error_abort, NULL);
obj = OBJECT(&s->fpd.apu.cpu[i]);
object_property_set_int(obj, s->cfg.psci_conduit,
"psci-conduit", &error_abort);
if (i) {
/* Secondary CPUs start in PSCI powered-down state */
object_property_set_bool(obj, true,
"start-powered-off", &error_abort);
}
@ -59,7 +49,6 @@ static void versal_create_apu_cpus(Versal *s)
object_property_set_link(obj, OBJECT(&s->fpd.apu.mr), "memory",
&error_abort);
object_property_set_bool(obj, true, "realized", &error_fatal);
s->fpd.apu.cpu[i] = ARM_CPU(obj);
}
}
@ -97,7 +86,7 @@ static void versal_create_apu_gic(Versal *s, qemu_irq *pic)
}
for (i = 0; i < nr_apu_cpus; i++) {
DeviceState *cpudev = DEVICE(s->fpd.apu.cpu[i]);
DeviceState *cpudev = DEVICE(&s->fpd.apu.cpu[i]);
int ppibase = XLNX_VERSAL_NR_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS;
qemu_irq maint_irq;
int ti;
@ -145,16 +134,17 @@ static void versal_create_uarts(Versal *s, qemu_irq *pic)
DeviceState *dev;
MemoryRegion *mr;
dev = qdev_create(NULL, TYPE_PL011);
s->lpd.iou.uart[i] = SYS_BUS_DEVICE(dev);
sysbus_init_child_obj(OBJECT(s), name,
&s->lpd.iou.uart[i], sizeof(s->lpd.iou.uart[i]),
TYPE_PL011);
dev = DEVICE(&s->lpd.iou.uart[i]);
qdev_prop_set_chr(dev, "chardev", serial_hd(i));
object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
qdev_init_nofail(dev);
mr = sysbus_mmio_get_region(s->lpd.iou.uart[i], 0);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
sysbus_connect_irq(s->lpd.iou.uart[i], 0, pic[irqs[i]]);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]);
g_free(name);
}
}
@ -171,25 +161,26 @@ static void versal_create_gems(Versal *s, qemu_irq *pic)
DeviceState *dev;
MemoryRegion *mr;
dev = qdev_create(NULL, "cadence_gem");
s->lpd.iou.gem[i] = SYS_BUS_DEVICE(dev);
object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
sysbus_init_child_obj(OBJECT(s), name,
&s->lpd.iou.gem[i], sizeof(s->lpd.iou.gem[i]),
TYPE_CADENCE_GEM);
dev = DEVICE(&s->lpd.iou.gem[i]);
if (nd->used) {
qemu_check_nic_model(nd, "cadence_gem");
qdev_set_nic_properties(dev, nd);
}
object_property_set_int(OBJECT(s->lpd.iou.gem[i]),
object_property_set_int(OBJECT(dev),
2, "num-priority-queues",
&error_abort);
object_property_set_link(OBJECT(s->lpd.iou.gem[i]),
object_property_set_link(OBJECT(dev),
OBJECT(&s->mr_ps), "dma",
&error_abort);
qdev_init_nofail(dev);
mr = sysbus_mmio_get_region(s->lpd.iou.gem[i], 0);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
sysbus_connect_irq(s->lpd.iou.gem[i], 0, pic[irqs[i]]);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]);
g_free(name);
}
}
@ -203,22 +194,72 @@ static void versal_create_admas(Versal *s, qemu_irq *pic)
DeviceState *dev;
MemoryRegion *mr;
dev = qdev_create(NULL, "xlnx.zdma");
s->lpd.iou.adma[i] = SYS_BUS_DEVICE(dev);
object_property_set_int(OBJECT(s->lpd.iou.adma[i]), 128, "bus-width",
&error_abort);
object_property_add_child(OBJECT(s), name, OBJECT(dev), &error_fatal);
sysbus_init_child_obj(OBJECT(s), name,
&s->lpd.iou.adma[i], sizeof(s->lpd.iou.adma[i]),
TYPE_XLNX_ZDMA);
dev = DEVICE(&s->lpd.iou.adma[i]);
object_property_set_int(OBJECT(dev), 128, "bus-width", &error_abort);
qdev_init_nofail(dev);
mr = sysbus_mmio_get_region(s->lpd.iou.adma[i], 0);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->mr_ps,
MM_ADMA_CH0 + i * MM_ADMA_CH0_SIZE, mr);
sysbus_connect_irq(s->lpd.iou.adma[i], 0, pic[VERSAL_ADMA_IRQ_0 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[VERSAL_ADMA_IRQ_0 + i]);
g_free(name);
}
}
#define SDHCI_CAPABILITIES 0x280737ec6481 /* Same as on ZynqMP. */
static void versal_create_sds(Versal *s, qemu_irq *pic)
{
int i;
for (i = 0; i < ARRAY_SIZE(s->pmc.iou.sd); i++) {
DeviceState *dev;
MemoryRegion *mr;
sysbus_init_child_obj(OBJECT(s), "sd[*]",
&s->pmc.iou.sd[i], sizeof(s->pmc.iou.sd[i]),
TYPE_SYSBUS_SDHCI);
dev = DEVICE(&s->pmc.iou.sd[i]);
object_property_set_uint(OBJECT(dev),
3, "sd-spec-version", &error_fatal);
object_property_set_uint(OBJECT(dev), SDHCI_CAPABILITIES, "capareg",
&error_fatal);
object_property_set_uint(OBJECT(dev), UHS_I, "uhs", &error_fatal);
qdev_init_nofail(dev);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->mr_ps,
MM_PMC_SD0 + i * MM_PMC_SD0_SIZE, mr);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
pic[VERSAL_SD0_IRQ_0 + i * 2]);
}
}
static void versal_create_rtc(Versal *s, qemu_irq *pic)
{
SysBusDevice *sbd;
MemoryRegion *mr;
sysbus_init_child_obj(OBJECT(s), "rtc", &s->pmc.rtc, sizeof(s->pmc.rtc),
TYPE_XLNX_ZYNQMP_RTC);
sbd = SYS_BUS_DEVICE(&s->pmc.rtc);
qdev_init_nofail(DEVICE(sbd));
mr = sysbus_mmio_get_region(sbd, 0);
memory_region_add_subregion(&s->mr_ps, MM_PMC_RTC, mr);
/*
* TODO: Connect the ALARM and SECONDS interrupts once our RTC model
* supports them.
*/
sysbus_connect_irq(sbd, 1, pic[VERSAL_RTC_APB_ERR_IRQ]);
}
/* This takes the board allocated linear DDR memory and creates aliases
* for each split DDR range/aperture on the Versal address map.
*/
@ -301,6 +342,8 @@ static void versal_realize(DeviceState *dev, Error **errp)
versal_create_uarts(s, pic);
versal_create_gems(s, pic);
versal_create_admas(s, pic);
versal_create_sds(s, pic);
versal_create_rtc(s, pic);
versal_map_ddr(s);
versal_unimp(s);

View file

@ -14,7 +14,12 @@
#include "hw/sysbus.h"
#include "hw/arm/boot.h"
#include "hw/sd/sdhci.h"
#include "hw/intc/arm_gicv3.h"
#include "hw/char/pl011.h"
#include "hw/dma/xlnx-zdma.h"
#include "hw/net/cadence_gem.h"
#include "hw/rtc/xlnx-zynqmp-rtc.h"
#define TYPE_XLNX_VERSAL "xlnx-versal"
#define XLNX_VERSAL(obj) OBJECT_CHECK(Versal, (obj), TYPE_XLNX_VERSAL)
@ -23,6 +28,7 @@
#define XLNX_VERSAL_NR_UARTS 2
#define XLNX_VERSAL_NR_GEMS 2
#define XLNX_VERSAL_NR_ADMAS 8
#define XLNX_VERSAL_NR_SDS 2
#define XLNX_VERSAL_NR_IRQS 192
typedef struct Versal {
@ -33,7 +39,7 @@ typedef struct Versal {
struct {
struct {
MemoryRegion mr;
ARMCPU *cpu[XLNX_VERSAL_NR_ACPUS];
ARMCPU cpu[XLNX_VERSAL_NR_ACPUS];
GICv3State gic;
} apu;
} fpd;
@ -49,12 +55,21 @@ typedef struct Versal {
MemoryRegion mr_ocm;
struct {
SysBusDevice *uart[XLNX_VERSAL_NR_UARTS];
SysBusDevice *gem[XLNX_VERSAL_NR_GEMS];
SysBusDevice *adma[XLNX_VERSAL_NR_ADMAS];
PL011State uart[XLNX_VERSAL_NR_UARTS];
CadenceGEMState gem[XLNX_VERSAL_NR_GEMS];
XlnxZDMA adma[XLNX_VERSAL_NR_ADMAS];
} iou;
} lpd;
/* The Platform Management Controller subsystem. */
struct {
struct {
SDHCIState sd[XLNX_VERSAL_NR_SDS];
} iou;
XlnxZynqMPRTC rtc;
} pmc;
struct {
MemoryRegion *mr_ddr;
uint32_t psci_conduit;
@ -77,6 +92,10 @@ typedef struct Versal {
#define VERSAL_GEM1_IRQ_0 58
#define VERSAL_GEM1_WAKE_IRQ_0 59
#define VERSAL_ADMA_IRQ_0 60
#define VERSAL_RTC_APB_ERR_IRQ 121
#define VERSAL_SD0_IRQ_0 126
#define VERSAL_RTC_ALARM_IRQ 142
#define VERSAL_RTC_SECONDS_IRQ 143
/* Architecturally reserved IRQs suitable for virtualization. */
#define VERSAL_RSVD_IRQ_FIRST 111
@ -126,6 +145,10 @@ typedef struct Versal {
#define MM_FPD_CRF 0xfd1a0000U
#define MM_FPD_CRF_SIZE 0x140000
#define MM_PMC_SD0 0xf1040000U
#define MM_PMC_SD0_SIZE 0x10000
#define MM_PMC_CRP 0xf1260000U
#define MM_PMC_CRP_SIZE 0x10000
#define MM_PMC_RTC 0xf12a0000
#define MM_PMC_RTC_SIZE 0x10000
#endif

View file

@ -18,6 +18,21 @@ target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE)
$(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
target/arm/decode-neon-shared.inc.c: $(SRC_PATH)/target/arm/neon-shared.decode $(DECODETREE)
$(call quiet-command,\
$(PYTHON) $(DECODETREE) --static-decode disas_neon_shared -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
target/arm/decode-neon-dp.inc.c: $(SRC_PATH)/target/arm/neon-dp.decode $(DECODETREE)
$(call quiet-command,\
$(PYTHON) $(DECODETREE) --static-decode disas_neon_dp -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
target/arm/decode-neon-ls.inc.c: $(SRC_PATH)/target/arm/neon-ls.decode $(DECODETREE)
$(call quiet-command,\
$(PYTHON) $(DECODETREE) --static-decode disas_neon_ls -o $@ $<,\
"GEN", $(TARGET_DIR)$@)
target/arm/decode-vfp.inc.c: $(SRC_PATH)/target/arm/vfp.decode $(DECODETREE)
$(call quiet-command,\
$(PYTHON) $(DECODETREE) --static-decode disas_vfp -o $@ $<,\
@ -49,6 +64,9 @@ target/arm/decode-t16.inc.c: $(SRC_PATH)/target/arm/t16.decode $(DECODETREE)
"GEN", $(TARGET_DIR)$@)
target/arm/translate-sve.o: target/arm/decode-sve.inc.c
target/arm/translate.o: target/arm/decode-neon-shared.inc.c
target/arm/translate.o: target/arm/decode-neon-dp.inc.c
target/arm/translate.o: target/arm/decode-neon-ls.inc.c
target/arm/translate.o: target/arm/decode-vfp.inc.c
target/arm/translate.o: target/arm/decode-vfp-uncond.inc.c
target/arm/translate.o: target/arm/decode-a32.inc.c

View file

@ -29,6 +29,6 @@
# define TARGET_PAGE_BITS_MIN 10
#endif
#define NB_MMU_MODES 12
#define NB_MMU_MODES 11
#endif

View file

@ -2686,6 +2686,7 @@ static void arm_max_initfn(Object *obj)
t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */
t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */
t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */
cpu->isar.id_mmfr4 = t;
}
#endif
@ -2754,7 +2755,7 @@ static const ARMCPUInfo arm_cpus[] = {
static Property arm_cpu_properties[] = {
DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0),
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
mp_affinity, ARM64_AFFINITY_INVALID),
DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),

View file

@ -894,7 +894,7 @@ struct ARMCPU {
uint64_t id_aa64dfr0;
uint64_t id_aa64dfr1;
} isar;
uint32_t midr;
uint64_t midr;
uint32_t revidr;
uint32_t reset_fpsid;
uint32_t ctr;
@ -2801,6 +2801,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* handling via the TLB. The only way to do a stage 1 translation without
* the immediate stage 2 translation is via the ATS or AT system insns,
* which can be slow-pathed and always do a page table walk.
* The only use of stage 2 translations is either as part of an s1+2
* lookup or when loading the descriptors during a stage 1 page table walk,
* and in both those cases we don't use the TLB.
* 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
* translation regimes, because they map reasonably well to each other
* and they can't both be active at the same time.
@ -2816,15 +2819,15 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* NS EL1 EL1&0 stage 1+2 (aka NS PL1)
* NS EL1 EL1&0 stage 1+2 +PAN
* NS EL0 EL2&0
* NS EL2 EL2&0
* NS EL2 EL2&0 +PAN
* NS EL2 (aka NS PL2)
* S EL0 EL1&0 (aka S PL0)
* S EL1 EL1&0 (not used if EL3 is 32 bit)
* S EL1 EL1&0 +PAN
* S EL3 (aka S PL1)
* NS EL1&0 stage 2
*
* for a total of 12 different mmu_idx.
* for a total of 11 different mmu_idx.
*
* R profile CPUs have an MPU, but can use the same set of MMU indexes
* as A profile. They only need to distinguish NS EL0 and NS EL1 (and
@ -2846,7 +2849,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* are not quite the same -- different CPU types (most notably M profile
* vs A/R profile) would like to use MMU indexes with different semantics,
* but since we don't ever need to use all of those in a single CPU we
* can avoid setting NB_MMU_MODES to more than 8. The lower bits of
* can avoid having to set NB_MMU_MODES to "total number of A profile MMU
* modes + total number of M profile MMU modes". The lower bits of
* ARMMMUIdx are the core TLB mmu index, and the higher bits are always
* the same for any particular CPU.
* Variables of type ARMMUIdx are always full values, and the core
@ -2894,8 +2898,6 @@ typedef enum ARMMMUIdx {
ARMMMUIdx_SE10_1_PAN = 9 | ARM_MMU_IDX_A,
ARMMMUIdx_SE3 = 10 | ARM_MMU_IDX_A,
ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A,
/*
* These are not allocated TLBs and are used only for AT system
* instructions or for the first stage of an S12 page table walk.
@ -2903,6 +2905,14 @@ typedef enum ARMMMUIdx {
ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB,
ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB,
ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB,
/*
* Not allocated a TLB: used only for second stage of an S12 page
* table walk, or for descriptor loads during first stage of an S1
* page table walk. Note that if we ever want to have a TLB for this
* then various TLB flush insns which currently are no-ops or flush
* only stage 1 MMU indexes will need to change to flush stage 2.
*/
ARMMMUIdx_Stage2 = 3 | ARM_MMU_IDX_NOTLB,
/*
* M-profile.
@ -2936,7 +2946,6 @@ typedef enum ARMMMUIdxBit {
TO_CORE_BIT(SE10_1),
TO_CORE_BIT(SE10_1_PAN),
TO_CORE_BIT(SE3),
TO_CORE_BIT(Stage2),
TO_CORE_BIT(MUser),
TO_CORE_BIT(MPriv),
@ -3601,6 +3610,11 @@ static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id)
return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0;
}
static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id)
{
return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0;
}
/*
* 64-bit feature tests via id registers.
*/
@ -3813,6 +3827,11 @@ static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
}
static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id)
{
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
}
/*
* Feature tests for "does this exist in either 32-bit or 64-bit?"
*/
@ -3841,6 +3860,11 @@ static inline bool isar_feature_any_ccidx(const ARMISARegisters *id)
return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id);
}
static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id)
{
return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id);
}
/*
* Forward to the above feature tests given an ARMCPU pointer.
*/

View file

@ -673,6 +673,7 @@ static void aarch64_max_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1);
t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */
t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* VMID16 */
t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* TTS2UXN */
cpu->isar.id_aa64mmfr1 = t;
t = cpu->isar.id_aa64mmfr2;
@ -706,11 +707,12 @@ static void aarch64_max_initfn(Object *obj)
u = FIELD_DP32(u, ID_MMFR4, HPDS, 1); /* AA32HPD */
u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
u = FIELD_DP32(u, ID_MMFR4, CNP, 1); /* TTCNP */
u = FIELD_DP32(u, ID_MMFR4, XNX, 1); /* TTS2UXN */
cpu->isar.id_mmfr4 = u;
u = cpu->isar.id_aa64dfr0;
u = FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */
cpu->isar.id_aa64dfr0 = u;
t = cpu->isar.id_aa64dfr0;
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */
cpu->isar.id_aa64dfr0 = t;
u = cpu->isar.id_dfr0;
u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */

View file

@ -41,6 +41,7 @@
static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
MMUAccessType access_type, ARMMMUIdx mmu_idx,
bool s1_is_el0,
hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
target_ulong *page_size_ptr,
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
@ -814,8 +815,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx(cs,
ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
ARMMMUIdxBit_E10_0 |
ARMMMUIdxBit_Stage2);
ARMMMUIdxBit_E10_0);
}
static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -826,46 +826,9 @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx_all_cpus_synced(cs,
ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
ARMMMUIdxBit_E10_0 |
ARMMMUIdxBit_Stage2);
ARMMMUIdxBit_E10_0);
}
static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
/* Invalidate by IPA. This has to invalidate any structures that
* contain only stage 2 translation information, but does not need
* to apply to structures that contain combined stage 1 and stage 2
* translation information.
* This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
*/
CPUState *cs = env_cpu(env);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
return;
}
pageaddr = sextract64(value << 12, 0, 40);
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
}
static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *cs = env_cpu(env);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
return;
}
pageaddr = sextract64(value << 12, 0, 40);
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
ARMMMUIdxBit_Stage2);
}
static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
@ -4055,8 +4018,7 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx(cs,
ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
ARMMMUIdxBit_E10_0 |
ARMMMUIdxBit_Stage2);
ARMMMUIdxBit_E10_0);
raw_write(env, ri, value);
}
}
@ -4538,11 +4500,6 @@ static int alle1_tlbmask(CPUARMState *env)
return ARMMMUIdxBit_SE10_1 |
ARMMMUIdxBit_SE10_1_PAN |
ARMMMUIdxBit_SE10_0;
} else if (arm_feature(env, ARM_FEATURE_EL2)) {
return ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
ARMMMUIdxBit_E10_0 |
ARMMMUIdxBit_Stage2;
} else {
return ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
@ -4689,44 +4646,6 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
ARMMMUIdxBit_SE3);
}
static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
/* Invalidate by IPA. This has to invalidate any structures that
* contain only stage 2 translation information, but does not need
* to apply to structures that contain combined stage 1 and stage 2
* translation information.
* This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
*/
ARMCPU *cpu = env_archcpu(env);
CPUState *cs = CPU(cpu);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
return;
}
pageaddr = sextract64(value << 12, 0, 48);
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
}
static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *cs = env_cpu(env);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
return;
}
pageaddr = sextract64(value << 12, 0, 48);
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
ARMMMUIdxBit_Stage2);
}
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
@ -4965,12 +4884,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = tlbi_aa64_vae1_write },
{ .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
.access = PL2_W, .type = ARM_CP_NO_RAW,
.writefn = tlbi_aa64_ipas2e1is_write },
.access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
.access = PL2_W, .type = ARM_CP_NO_RAW,
.writefn = tlbi_aa64_ipas2e1is_write },
.access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
.access = PL2_W, .type = ARM_CP_NO_RAW,
@ -4981,12 +4898,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = tlbi_aa64_alle1is_write },
{ .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
.access = PL2_W, .type = ARM_CP_NO_RAW,
.writefn = tlbi_aa64_ipas2e1_write },
.access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
.access = PL2_W, .type = ARM_CP_NO_RAW,
.writefn = tlbi_aa64_ipas2e1_write },
.access = PL2_W, .type = ARM_CP_NOP },
{ .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
.access = PL2_W, .type = ARM_CP_NO_RAW,
@ -5067,20 +4982,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = tlbimva_hyp_is_write },
{ .name = "TLBIIPAS2",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
.type = ARM_CP_NO_RAW, .access = PL2_W,
.writefn = tlbiipas2_write },
.type = ARM_CP_NOP, .access = PL2_W },
{ .name = "TLBIIPAS2IS",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
.type = ARM_CP_NO_RAW, .access = PL2_W,
.writefn = tlbiipas2_is_write },
.type = ARM_CP_NOP, .access = PL2_W },
{ .name = "TLBIIPAS2L",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
.type = ARM_CP_NO_RAW, .access = PL2_W,
.writefn = tlbiipas2_write },
.type = ARM_CP_NOP, .access = PL2_W },
{ .name = "TLBIIPAS2LIS",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
.type = ARM_CP_NO_RAW, .access = PL2_W,
.writefn = tlbiipas2_is_write },
.type = ARM_CP_NOP, .access = PL2_W },
/* 32 bit cache operations */
{ .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
.type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access },
@ -9997,9 +9908,10 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
*
* @env: CPUARMState
* @s2ap: The 2-bit stage2 access permissions (S2AP)
* @xn: XN (execute-never) bit
* @xn: XN (execute-never) bits
* @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
*/
static int get_S2prot(CPUARMState *env, int s2ap, int xn)
static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
{
int prot = 0;
@ -10009,9 +9921,32 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn)
if (s2ap & 2) {
prot |= PAGE_WRITE;
}
if (!xn) {
if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) {
switch (xn) {
case 0:
prot |= PAGE_EXEC;
break;
case 1:
if (s1_is_el0) {
prot |= PAGE_EXEC;
}
break;
case 2:
break;
case 3:
if (!s1_is_el0) {
prot |= PAGE_EXEC;
}
break;
default:
g_assert_not_reached();
}
} else {
if (!extract32(xn, 1, 1)) {
if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
prot |= PAGE_EXEC;
}
}
}
return prot;
@ -10142,8 +10077,10 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
pcacheattrs = &cacheattrs;
}
ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_Stage2, &s2pa,
&txattrs, &s2prot, &s2size, fi, pcacheattrs);
ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2,
false,
&s2pa, &txattrs, &s2prot, &s2size, fi,
pcacheattrs);
if (ret) {
assert(fi->type != ARMFault_None);
fi->s2addr = addr;
@ -10744,8 +10681,32 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
};
}
/**
* get_phys_addr_lpae: perform one stage of page table walk, LPAE format
*
* Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
* prot and page_size may not be filled in, and the populated fsr value provides
* information on why the translation aborted, in the format of a long-format
* DFSR/IFSR fault register, with the following caveats:
* * the WnR bit is never set (the caller must do this).
*
* @env: CPUARMState
* @address: virtual address to get physical address for
* @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
* @mmu_idx: MMU index indicating required translation regime
* @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table
* walk), must be true if this is stage 2 of a stage 1+2 walk for an
* EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored.
* @phys_ptr: set to the physical address corresponding to the virtual address
* @attrs: set to the memory transaction attributes to use
* @prot: set to the permissions for the page containing phys_ptr
* @page_size_ptr: set to the size of the page containing phys_ptr
* @fi: set to fault info if the translation fails
* @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
*/
static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
MMUAccessType access_type, ARMMMUIdx mmu_idx,
bool s1_is_el0,
hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
target_ulong *page_size_ptr,
ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
@ -10964,13 +10925,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
}
ap = extract32(attrs, 4, 2);
xn = extract32(attrs, 12, 1);
if (mmu_idx == ARMMMUIdx_Stage2) {
ns = true;
*prot = get_S2prot(env, ap, xn);
xn = extract32(attrs, 11, 2);
*prot = get_S2prot(env, ap, xn, s1_is_el0);
} else {
ns = extract32(attrs, 3, 1);
xn = extract32(attrs, 12, 1);
pxn = extract32(attrs, 11, 1);
*prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
}
@ -11837,6 +11799,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
/* S1 is done. Now do S2 translation. */
ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2,
mmu_idx == ARMMMUIdx_E10_0,
phys_ptr, attrs, &s2_prot,
page_size, fi,
cacheattrs != NULL ? &cacheattrs2 : NULL);
@ -11961,7 +11924,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
}
if (regime_using_lpae_format(env, mmu_idx)) {
return get_phys_addr_lpae(env, address, access_type, mmu_idx,
return get_phys_addr_lpae(env, address, access_type, mmu_idx, false,
phys_ptr, attrs, prot, page_size,
fi, cacheattrs);
} else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {

86
target/arm/neon-dp.decode Normal file
View file

@ -0,0 +1,86 @@
# AArch32 Neon data-processing instruction descriptions
#
# Copyright (c) 2020 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# VFP/Neon register fields; same as vfp.decode
%vm_dp 5:1 0:4
%vn_dp 7:1 16:4
%vd_dp 22:1 12:4
# Encodings for Neon data processing instructions where the T32 encoding
# is a simple transformation of the A32 encoding.
# More specifically, this file covers instructions where the A32 encoding is
# 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
# and the T32 encoding is
# 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
# This file works on the A32 encoding only; calling code for T32 has to
# transform the insn into the A32 version first.
######################################################################
# 3-reg-same grouping:
# 1111 001 U 0 D sz:2 Vn:4 Vd:4 opc:4 N Q M op Vm:4
######################################################################
&3same vm vn vd q size
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
VAND_3s 1111 001 0 0 . 00 .... .... 0001 ... 1 .... @3same_logic
VBIC_3s 1111 001 0 0 . 01 .... .... 0001 ... 1 .... @3same_logic
VORR_3s 1111 001 0 0 . 10 .... .... 0001 ... 1 .... @3same_logic
VORN_3s 1111 001 0 0 . 11 .... .... 0001 ... 1 .... @3same_logic
VEOR_3s 1111 001 1 0 . 00 .... .... 0001 ... 1 .... @3same_logic
VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
VCGT_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 0 .... @3same
VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same
VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
VTST_3s 1111 001 0 0 . .. .... .... 1000 . . . 1 .... @3same
VCEQ_3s 1111 001 1 0 . .. .... .... 1000 . . . 1 .... @3same
VMLA_3s 1111 001 0 0 . .. .... .... 1001 . . . 0 .... @3same
VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same

52
target/arm/neon-ls.decode Normal file
View file

@ -0,0 +1,52 @@
# AArch32 Neon load/store instruction descriptions
#
# Copyright (c) 2020 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for Neon load/store instructions where the T32 encoding
# is a simple transformation of the A32 encoding.
# More specifically, this file covers instructions where the A32 encoding is
# 0b1111_0100_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
# and the T32 encoding is
# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
# This file works on the A32 encoding only; calling code for T32 has to
# transform the insn into the A32 version first.
%vd_dp 22:1 12:4
# Neon load/store multiple structures
VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
vd=%vd_dp
# Neon load single element to all lanes
VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
vd=%vd_dp
# Neon load/store single structure to one lane
%imm1_5_p1 5:1 !function=plus1
%imm1_6_p1 6:1 !function=plus1
VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \
vd=%vd_dp size=0 stride=1
VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 align:2 rm:4 \
vd=%vd_dp size=1 stride=%imm1_5_p1
VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 align:3 rm:4 \
vd=%vd_dp size=2 stride=%imm1_6_p1

View file

@ -0,0 +1,66 @@
# AArch32 Neon instruction descriptions
#
# Copyright (c) 2020 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for Neon instructions whose encoding is the same for
# both A32 and T32.
# More specifically, this covers:
# 2reg scalar ext: 0b1111_1110_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
# 3same ext: 0b1111_110x_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
# VFP/Neon register fields; same as vfp.decode
%vm_dp 5:1 0:4
%vm_sp 0:4 5:1
%vn_dp 7:1 16:4
%vn_sp 16:4 7:1
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
VCMLA 1111 110 rot:2 . 1 size:1 .... .... 1000 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VCADD 1111 110 rot:1 1 . 0 size:1 .... .... 1000 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
# VUDOT and VSDOT
VDOT 1111 110 00 . 10 .... .... 1101 . q:1 . u:1 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
# VFM[AS]L
VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_dp q=0
VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
vn=%vn_dp vd=%vd_dp size=0
VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp size=1 index=0
VDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 u:1 rm:4 \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
%vfml_scalar_q0_rm 0:3 5:1
%vfml_scalar_q1_index 5:1 3:1
VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \
rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0
VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \
index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1

View file

@ -70,23 +70,6 @@ typedef struct AArch64DecodeTable {
AArch64DecodeFn *disas_fn;
} AArch64DecodeTable;
/* Function prototype for gen_ functions for calling Neon helpers */
typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
/* initialize TCG globals. */
void a64_translate_init(void)
{

View file

@ -115,13 +115,4 @@ static inline int vec_full_reg_size(DisasContext *s)
bool disas_sve(DisasContext *, uint32_t);
/* Note that the gvec expanders operate on offsets + sizes. */
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
uint32_t, uint32_t);
typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
#endif /* TARGET_ARM_TRANSLATE_A64_H */

View file

@ -0,0 +1,714 @@
/*
* ARM translation: AArch32 Neon instructions
*
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2005-2007 CodeSourcery
* Copyright (c) 2007 OpenedHand, Ltd.
* Copyright (c) 2020 Linaro, Ltd.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* This file is intended to be included from translate.c; it uses
* some macros and definitions provided by that file.
* It might be possible to convert it to a standalone .c file eventually.
*/
static inline int plus1(DisasContext *s, int x)
{
return x + 1;
}
/* Include the generated Neon decoder */
#include "decode-neon-dp.inc.c"
#include "decode-neon-ls.inc.c"
#include "decode-neon-shared.inc.c"
static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
{
int opr_sz;
TCGv_ptr fpst;
gen_helper_gvec_3_ptr *fn_gvec_ptr;
if (!dc_isar_feature(aa32_vcma, s)
|| (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if ((a->vn | a->vm | a->vd) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
opr_sz = (1 + a->q) * 8;
fpst = get_fpstatus_ptr(1);
fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
fpst, opr_sz, opr_sz, a->rot,
fn_gvec_ptr);
tcg_temp_free_ptr(fpst);
return true;
}
static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
{
int opr_sz;
TCGv_ptr fpst;
gen_helper_gvec_3_ptr *fn_gvec_ptr;
if (!dc_isar_feature(aa32_vcma, s)
|| (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if ((a->vn | a->vm | a->vd) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
opr_sz = (1 + a->q) * 8;
fpst = get_fpstatus_ptr(1);
fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
fpst, opr_sz, opr_sz, a->rot,
fn_gvec_ptr);
tcg_temp_free_ptr(fpst);
return true;
}
static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
{
int opr_sz;
gen_helper_gvec_3 *fn_gvec;
if (!dc_isar_feature(aa32_dp, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if ((a->vn | a->vm | a->vd) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
opr_sz = (1 + a->q) * 8;
fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
opr_sz, opr_sz, 0, fn_gvec);
return true;
}
static bool trans_VFML(DisasContext *s, arg_VFML *a)
{
int opr_sz;
if (!dc_isar_feature(aa32_fhm, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
(a->vd & 0x10)) {
return false;
}
if (a->vd & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
opr_sz = (1 + a->q) * 8;
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(a->q, a->vn),
vfp_reg_offset(a->q, a->vm),
cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
gen_helper_gvec_fmlal_a32);
return true;
}
static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
{
gen_helper_gvec_3_ptr *fn_gvec_ptr;
int opr_sz;
TCGv_ptr fpst;
if (!dc_isar_feature(aa32_vcma, s)) {
return false;
}
if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if ((a->vd | a->vn) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
: gen_helper_gvec_fcmlah_idx);
opr_sz = (1 + a->q) * 8;
fpst = get_fpstatus_ptr(1);
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->vm),
fpst, opr_sz, opr_sz,
(a->index << 2) | a->rot, fn_gvec_ptr);
tcg_temp_free_ptr(fpst);
return true;
}
static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
{
gen_helper_gvec_3 *fn_gvec;
int opr_sz;
TCGv_ptr fpst;
if (!dc_isar_feature(aa32_dp, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn) & 0x10)) {
return false;
}
if ((a->vd | a->vn) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
opr_sz = (1 + a->q) * 8;
fpst = get_fpstatus_ptr(1);
tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
vfp_reg_offset(1, a->vn),
vfp_reg_offset(1, a->rm),
opr_sz, opr_sz, a->index, fn_gvec);
tcg_temp_free_ptr(fpst);
return true;
}
static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
{
int opr_sz;
if (!dc_isar_feature(aa32_fhm, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
return false;
}
if (a->vd & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
opr_sz = (1 + a->q) * 8;
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
vfp_reg_offset(a->q, a->vn),
vfp_reg_offset(a->q, a->rm),
cpu_env, opr_sz, opr_sz,
(a->index << 2) | a->s, /* is_2 == 0 */
gen_helper_gvec_fmlal_idx_a32);
return true;
}
static struct {
int nregs;
int interleave;
int spacing;
} const neon_ls_element_type[11] = {
{1, 4, 1},
{1, 4, 2},
{4, 1, 1},
{2, 2, 2},
{1, 3, 1},
{1, 3, 2},
{3, 1, 1},
{1, 1, 1},
{1, 2, 1},
{1, 2, 2},
{2, 1, 1}
};
static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
int stride)
{
if (rm != 15) {
TCGv_i32 base;
base = load_reg(s, rn);
if (rm == 13) {
tcg_gen_addi_i32(base, base, stride);
} else {
TCGv_i32 index;
index = load_reg(s, rm);
tcg_gen_add_i32(base, base, index);
tcg_temp_free_i32(index);
}
store_reg(s, rn, base);
}
}
static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
{
/* Neon load/store multiple structures */
int nregs, interleave, spacing, reg, n;
MemOp endian = s->be_data;
int mmu_idx = get_mem_index(s);
int size = a->size;
TCGv_i64 tmp64;
TCGv_i32 addr, tmp;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
if (a->itype > 10) {
return false;
}
/* Catch UNDEF cases for bad values of align field */
switch (a->itype & 0xc) {
case 4:
if (a->align >= 2) {
return false;
}
break;
case 8:
if (a->align == 3) {
return false;
}
break;
default:
break;
}
nregs = neon_ls_element_type[a->itype].nregs;
interleave = neon_ls_element_type[a->itype].interleave;
spacing = neon_ls_element_type[a->itype].spacing;
if (size == 3 && (interleave | spacing) != 1) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
/* For our purposes, bytes are always little-endian. */
if (size == 0) {
endian = MO_LE;
}
/*
* Consecutive little-endian elements from a single register
* can be promoted to a larger little-endian operation.
*/
if (interleave == 1 && endian == MO_LE) {
size = 3;
}
tmp64 = tcg_temp_new_i64();
addr = tcg_temp_new_i32();
tmp = tcg_const_i32(1 << size);
load_reg_var(s, addr, a->rn);
for (reg = 0; reg < nregs; reg++) {
for (n = 0; n < 8 >> size; n++) {
int xs;
for (xs = 0; xs < interleave; xs++) {
int tt = a->vd + reg + spacing * xs;
if (a->l) {
gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
neon_store_element64(tt, n, size, tmp64);
} else {
neon_load_element64(tmp64, tt, n, size);
gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
}
tcg_gen_add_i32(addr, addr, tmp);
}
}
}
tcg_temp_free_i32(addr);
tcg_temp_free_i32(tmp);
tcg_temp_free_i64(tmp64);
gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
return true;
}
static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
{
/* Neon load single structure to all lanes */
int reg, stride, vec_size;
int vd = a->vd;
int size = a->size;
int nregs = a->n + 1;
TCGv_i32 addr, tmp;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
if (size == 3) {
if (nregs != 4 || a->a == 0) {
return false;
}
/* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
size = 2;
}
if (nregs == 1 && a->a == 1 && size == 0) {
return false;
}
if (nregs == 3 && a->a == 1) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
/*
* VLD1 to all lanes: T bit indicates how many Dregs to write.
* VLD2/3/4 to all lanes: T bit indicates register stride.
*/
stride = a->t ? 2 : 1;
vec_size = nregs == 1 ? stride * 8 : 8;
tmp = tcg_temp_new_i32();
addr = tcg_temp_new_i32();
load_reg_var(s, addr, a->rn);
for (reg = 0; reg < nregs; reg++) {
gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
s->be_data | size);
if ((vd & 1) && vec_size == 16) {
/*
* We cannot write 16 bytes at once because the
* destination is unaligned.
*/
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
8, 8, tmp);
tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
neon_reg_offset(vd, 0), 8, 8);
} else {
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
vec_size, vec_size, tmp);
}
tcg_gen_addi_i32(addr, addr, 1 << size);
vd += stride;
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(addr);
gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
return true;
}
static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
{
/* Neon load/store single structure to one lane */
int reg;
int nregs = a->n + 1;
int vd = a->vd;
TCGv_i32 addr, tmp;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}
/* Catch the UNDEF cases. This is unavoidably a bit messy. */
switch (nregs) {
case 1:
if (((a->align & (1 << a->size)) != 0) ||
(a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
return false;
}
break;
case 3:
if ((a->align & 1) != 0) {
return false;
}
/* fall through */
case 2:
if (a->size == 2 && (a->align & 2) != 0) {
return false;
}
break;
case 4:
if ((a->size == 2) && ((a->align & 3) == 3)) {
return false;
}
break;
default:
abort();
}
if ((vd + a->stride * (nregs - 1)) > 31) {
/*
* Attempts to write off the end of the register file are
* UNPREDICTABLE; we choose to UNDEF because otherwise we would
* access off the end of the array that holds the register data.
*/
return false;
}
if (!vfp_access_check(s)) {
return true;
}
tmp = tcg_temp_new_i32();
addr = tcg_temp_new_i32();
load_reg_var(s, addr, a->rn);
/*
* TODO: if we implemented alignment exceptions, we should check
* addr against the alignment encoded in a->align here.
*/
for (reg = 0; reg < nregs; reg++) {
if (a->l) {
gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
s->be_data | a->size);
neon_store_element(vd, a->reg_idx, a->size, tmp);
} else { /* Store */
neon_load_element(tmp, vd, a->reg_idx, a->size);
gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
s->be_data | a->size);
}
vd += a->stride;
tcg_gen_addi_i32(addr, addr, 1 << a->size);
}
tcg_temp_free_i32(addr);
tcg_temp_free_i32(tmp);
gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
return true;
}
static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
{
int vec_size = a->q ? 16 : 8;
int rd_ofs = neon_reg_offset(a->vd, 0);
int rn_ofs = neon_reg_offset(a->vn, 0);
int rm_ofs = neon_reg_offset(a->vm, 0);
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if ((a->vn | a->vm | a->vd) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
return true;
}
#define DO_3SAME(INSN, FUNC) \
static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
{ \
return do_3same(s, a, FUNC); \
}
DO_3SAME(VADD, tcg_gen_gvec_add)
DO_3SAME(VSUB, tcg_gen_gvec_sub)
DO_3SAME(VAND, tcg_gen_gvec_and)
DO_3SAME(VBIC, tcg_gen_gvec_andc)
DO_3SAME(VORR, tcg_gen_gvec_or)
DO_3SAME(VORN, tcg_gen_gvec_orc)
DO_3SAME(VEOR, tcg_gen_gvec_xor)
/* These insns are all gvec_bitsel but with the inputs in various orders. */
#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
} \
DO_3SAME(INSN, gen_##INSN##_3s)
DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
#define DO_3SAME_NO_SZ_3(INSN, FUNC) \
static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
{ \
if (a->size == 3) { \
return false; \
} \
return do_3same(s, a, FUNC); \
}
DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
#define DO_3SAME_CMP(INSN, COND) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
} \
DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
{
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]);
}
DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s)
#define DO_3SAME_GVEC4(INSN, OPARRAY) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), \
rn_ofs, rm_ofs, oprsz, maxsz, &OPARRAY[vece]); \
} \
DO_3SAME(INSN, gen_##INSN##_3s)
DO_3SAME_GVEC4(VQADD_S, sqadd_op)
DO_3SAME_GVEC4(VQADD_U, uqadd_op)
DO_3SAME_GVEC4(VQSUB_S, sqsub_op)
DO_3SAME_GVEC4(VQSUB_U, uqsub_op)
static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
{
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz,
0, gen_helper_gvec_pmul_b);
}
static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
{
if (a->size != 0) {
return false;
}
return do_3same(s, a, gen_VMUL_p_3s);
}
#define DO_3SAME_GVEC3_NO_SZ_3(INSN, OPARRAY) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
oprsz, maxsz, &OPARRAY[vece]); \
} \
DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
DO_3SAME_GVEC3_NO_SZ_3(VMLA, mla_op)
DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
/* Note the operation is vshl vd,vm,vn */ \
tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, \
oprsz, maxsz, &OPARRAY[vece]); \
} \
DO_3SAME(INSN, gen_##INSN##_3s)
DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)

View file

@ -1872,12 +1872,6 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}

View file

@ -1313,8 +1313,9 @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
#define ARM_CP_RW_BIT (1 << 20)
/* Include the VFP decoder */
/* Include the VFP and Neon decoders */
#include "translate-vfp.inc.c"
#include "translate-neon.inc.c"
static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
{
@ -2609,8 +2610,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
}
#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
#define VFP_SREG(insn, bigbit, smallbit) \
((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
if (dc_isar_feature(aa32_simd_r32, s)) { \
reg = (((insn) >> (bigbit)) & 0x0f) \
@ -2621,11 +2620,8 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
reg = ((insn) >> (bigbit)) & 0x0f; \
}} while (0)
#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
static void gen_neon_dup_low16(TCGv_i32 var)
@ -3217,274 +3213,6 @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
tcg_temp_free_i32(rd);
}
static struct {
int nregs;
int interleave;
int spacing;
} const neon_ls_element_type[11] = {
{1, 4, 1},
{1, 4, 2},
{4, 1, 1},
{2, 2, 2},
{1, 3, 1},
{1, 3, 2},
{3, 1, 1},
{1, 1, 1},
{1, 2, 1},
{1, 2, 2},
{2, 1, 1}
};
/* Translate a NEON load/store element instruction. Return nonzero if the
instruction is invalid. */
static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
{
int rd, rn, rm;
int op;
int nregs;
int interleave;
int spacing;
int stride;
int size;
int reg;
int load;
int n;
int vec_size;
int mmu_idx;
MemOp endian;
TCGv_i32 addr;
TCGv_i32 tmp;
TCGv_i32 tmp2;
TCGv_i64 tmp64;
/* FIXME: this access check should not take precedence over UNDEF
* for invalid encodings; we will generate incorrect syndrome information
* for attempts to execute invalid vfp/neon encodings with FP disabled.
*/
if (s->fp_excp_el) {
gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
return 0;
}
if (!s->vfp_enabled)
return 1;
VFP_DREG_D(rd, insn);
rn = (insn >> 16) & 0xf;
rm = insn & 0xf;
load = (insn & (1 << 21)) != 0;
endian = s->be_data;
mmu_idx = get_mem_index(s);
if ((insn & (1 << 23)) == 0) {
/* Load store all elements. */
op = (insn >> 8) & 0xf;
size = (insn >> 6) & 3;
if (op > 10)
return 1;
/* Catch UNDEF cases for bad values of align field */
switch (op & 0xc) {
case 4:
if (((insn >> 5) & 1) == 1) {
return 1;
}
break;
case 8:
if (((insn >> 4) & 3) == 3) {
return 1;
}
break;
default:
break;
}
nregs = neon_ls_element_type[op].nregs;
interleave = neon_ls_element_type[op].interleave;
spacing = neon_ls_element_type[op].spacing;
if (size == 3 && (interleave | spacing) != 1) {
return 1;
}
/* For our purposes, bytes are always little-endian. */
if (size == 0) {
endian = MO_LE;
}
/* Consecutive little-endian elements from a single register
* can be promoted to a larger little-endian operation.
*/
if (interleave == 1 && endian == MO_LE) {
size = 3;
}
tmp64 = tcg_temp_new_i64();
addr = tcg_temp_new_i32();
tmp2 = tcg_const_i32(1 << size);
load_reg_var(s, addr, rn);
for (reg = 0; reg < nregs; reg++) {
for (n = 0; n < 8 >> size; n++) {
int xs;
for (xs = 0; xs < interleave; xs++) {
int tt = rd + reg + spacing * xs;
if (load) {
gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
neon_store_element64(tt, n, size, tmp64);
} else {
neon_load_element64(tmp64, tt, n, size);
gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
}
tcg_gen_add_i32(addr, addr, tmp2);
}
}
}
tcg_temp_free_i32(addr);
tcg_temp_free_i32(tmp2);
tcg_temp_free_i64(tmp64);
stride = nregs * interleave * 8;
} else {
size = (insn >> 10) & 3;
if (size == 3) {
/* Load single element to all lanes. */
int a = (insn >> 4) & 1;
if (!load) {
return 1;
}
size = (insn >> 6) & 3;
nregs = ((insn >> 8) & 3) + 1;
if (size == 3) {
if (nregs != 4 || a == 0) {
return 1;
}
/* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
size = 2;
}
if (nregs == 1 && a == 1 && size == 0) {
return 1;
}
if (nregs == 3 && a == 1) {
return 1;
}
addr = tcg_temp_new_i32();
load_reg_var(s, addr, rn);
/* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
* VLD2/3/4 to all lanes: bit 5 indicates register stride.
*/
stride = (insn & (1 << 5)) ? 2 : 1;
vec_size = nregs == 1 ? stride * 8 : 8;
tmp = tcg_temp_new_i32();
for (reg = 0; reg < nregs; reg++) {
gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
s->be_data | size);
if ((rd & 1) && vec_size == 16) {
/* We cannot write 16 bytes at once because the
* destination is unaligned.
*/
tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
8, 8, tmp);
tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
neon_reg_offset(rd, 0), 8, 8);
} else {
tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
vec_size, vec_size, tmp);
}
tcg_gen_addi_i32(addr, addr, 1 << size);
rd += stride;
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(addr);
stride = (1 << size) * nregs;
} else {
/* Single element. */
int idx = (insn >> 4) & 0xf;
int reg_idx;
switch (size) {
case 0:
reg_idx = (insn >> 5) & 7;
stride = 1;
break;
case 1:
reg_idx = (insn >> 6) & 3;
stride = (insn & (1 << 5)) ? 2 : 1;
break;
case 2:
reg_idx = (insn >> 7) & 1;
stride = (insn & (1 << 6)) ? 2 : 1;
break;
default:
abort();
}
nregs = ((insn >> 8) & 3) + 1;
/* Catch the UNDEF cases. This is unavoidably a bit messy. */
switch (nregs) {
case 1:
if (((idx & (1 << size)) != 0) ||
(size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
return 1;
}
break;
case 3:
if ((idx & 1) != 0) {
return 1;
}
/* fall through */
case 2:
if (size == 2 && (idx & 2) != 0) {
return 1;
}
break;
case 4:
if ((size == 2) && ((idx & 3) == 3)) {
return 1;
}
break;
default:
abort();
}
if ((rd + stride * (nregs - 1)) > 31) {
/* Attempts to write off the end of the register file
* are UNPREDICTABLE; we choose to UNDEF because otherwise
* the neon_load_reg() would write off the end of the array.
*/
return 1;
}
tmp = tcg_temp_new_i32();
addr = tcg_temp_new_i32();
load_reg_var(s, addr, rn);
for (reg = 0; reg < nregs; reg++) {
if (load) {
gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
s->be_data | size);
neon_store_element(rd, reg_idx, size, tmp);
} else { /* Store */
neon_load_element(tmp, rd, reg_idx, size);
gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
s->be_data | size);
}
rd += stride;
tcg_gen_addi_i32(addr, addr, 1 << size);
}
tcg_temp_free_i32(addr);
tcg_temp_free_i32(tmp);
stride = nregs * (1 << size);
}
}
if (rm != 15) {
TCGv_i32 base;
base = load_reg(s, rn);
if (rm == 13) {
tcg_gen_addi_i32(base, base, stride);
} else {
TCGv_i32 index;
index = load_reg(s, rm);
tcg_gen_add_i32(base, base, index);
tcg_temp_free_i32(index);
}
store_reg(s, rn, base);
}
return 0;
}
static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
{
switch (size) {
@ -5002,6 +4730,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
TCGv_ptr ptr1, ptr2, ptr3;
TCGv_i64 tmp64;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
}
/* FIXME: this access check should not take precedence over UNDEF
* for invalid encodings; we will generate incorrect syndrome information
* for attempts to execute invalid vfp/neon encodings with FP disabled.
@ -5116,128 +4848,20 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
return 1;
case NEON_3R_LOGIC: /* Logic ops. */
switch ((u << 2) | size) {
case 0: /* VAND */
tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
break;
case 1: /* VBIC */
tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
break;
case 2: /* VORR */
tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
break;
case 3: /* VORN */
tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
break;
case 4: /* VEOR */
tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
break;
case 5: /* VBSL */
tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
break;
case 6: /* VBIT */
tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
vec_size, vec_size);
break;
case 7: /* VBIF */
tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
vec_size, vec_size);
break;
}
return 0;
case NEON_3R_VADD_VSUB:
if (u) {
tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
} else {
tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
}
return 0;
case NEON_3R_VQADD:
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
rn_ofs, rm_ofs, vec_size, vec_size,
(u ? uqadd_op : sqadd_op) + size);
return 0;
case NEON_3R_VQSUB:
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
rn_ofs, rm_ofs, vec_size, vec_size,
(u ? uqsub_op : sqsub_op) + size);
return 0;
case NEON_3R_VMUL: /* VMUL */
if (u) {
/* Polynomial case allows only P8. */
if (size != 0) {
return 1;
}
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
0, gen_helper_gvec_pmul_b);
} else {
tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
}
return 0;
case NEON_3R_VML: /* VMLA, VMLS */
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
u ? &mls_op[size] : &mla_op[size]);
return 0;
case NEON_3R_VTST_VCEQ:
if (u) { /* VCEQ */
tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
} else { /* VTST */
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size, &cmtst_op[size]);
}
return 0;
case NEON_3R_VCGT:
tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
return 0;
case NEON_3R_VCGE:
tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
return 0;
case NEON_3R_LOGIC:
case NEON_3R_VMAX:
if (u) {
tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
} else {
tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
}
return 0;
case NEON_3R_VMIN:
if (u) {
tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
} else {
tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
}
return 0;
case NEON_3R_VTST_VCEQ:
case NEON_3R_VCGT:
case NEON_3R_VCGE:
case NEON_3R_VQADD:
case NEON_3R_VQSUB:
case NEON_3R_VMUL:
case NEON_3R_VML:
case NEON_3R_VSHL:
/* Note the operation is vshl vd,vm,vn */
tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
u ? &ushl_op[size] : &sshl_op[size]);
return 0;
/* Already handled by decodetree */
return 1;
}
if (size == 3) {
@ -6016,7 +5640,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
{0, 0, 0, 0}, /* VMLSL */
{0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
{0, 0, 0, 1}, /* VQDMULL */
{0, 0, 0, 9}, /* VQDMULL */
{0, 0, 0, 0xa}, /* Polynomial VMULL */
{0, 0, 0, 7}, /* Reserved: always UNDEF */
};
@ -7023,232 +6647,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
return 0;
}
/* Advanced SIMD three registers of the same length extension.
* 31 25 23 22 20 16 12 11 10 9 8 3 0
* +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
* | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
* +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
*/
static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
{
gen_helper_gvec_3 *fn_gvec = NULL;
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
int rd, rn, rm, opr_sz;
int data = 0;
int off_rn, off_rm;
bool is_long = false, q = extract32(insn, 6, 1);
bool ptr_is_env = false;
if ((insn & 0xfe200f10) == 0xfc200800) {
/* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
int size = extract32(insn, 20, 1);
data = extract32(insn, 23, 2); /* rot */
if (!dc_isar_feature(aa32_vcma, s)
|| (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
return 1;
}
fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
} else if ((insn & 0xfea00f10) == 0xfc800800) {
/* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
int size = extract32(insn, 20, 1);
data = extract32(insn, 24, 1); /* rot */
if (!dc_isar_feature(aa32_vcma, s)
|| (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
return 1;
}
fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
} else if ((insn & 0xfeb00f00) == 0xfc200d00) {
/* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
bool u = extract32(insn, 4, 1);
if (!dc_isar_feature(aa32_dp, s)) {
return 1;
}
fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
} else if ((insn & 0xff300f10) == 0xfc200810) {
/* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
int is_s = extract32(insn, 23, 1);
if (!dc_isar_feature(aa32_fhm, s)) {
return 1;
}
is_long = true;
data = is_s; /* is_2 == 0 */
fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
ptr_is_env = true;
} else {
return 1;
}
VFP_DREG_D(rd, insn);
if (rd & q) {
return 1;
}
if (q || !is_long) {
VFP_DREG_N(rn, insn);
VFP_DREG_M(rm, insn);
if ((rn | rm) & q & !is_long) {
return 1;
}
off_rn = vfp_reg_offset(1, rn);
off_rm = vfp_reg_offset(1, rm);
} else {
rn = VFP_SREG_N(insn);
rm = VFP_SREG_M(insn);
off_rn = vfp_reg_offset(0, rn);
off_rm = vfp_reg_offset(0, rm);
}
if (s->fp_excp_el) {
gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
return 0;
}
if (!s->vfp_enabled) {
return 1;
}
opr_sz = (1 + q) * 8;
if (fn_gvec_ptr) {
TCGv_ptr ptr;
if (ptr_is_env) {
ptr = cpu_env;
} else {
ptr = get_fpstatus_ptr(1);
}
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
opr_sz, opr_sz, data, fn_gvec_ptr);
if (!ptr_is_env) {
tcg_temp_free_ptr(ptr);
}
} else {
tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
opr_sz, opr_sz, data, fn_gvec);
}
return 0;
}
/* Advanced SIMD two registers and a scalar extension.
* 31 24 23 22 20 16 12 11 10 9 8 3 0
* +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
* | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
* +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
*
*/
static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
{
gen_helper_gvec_3 *fn_gvec = NULL;
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
int rd, rn, rm, opr_sz, data;
int off_rn, off_rm;
bool is_long = false, q = extract32(insn, 6, 1);
bool ptr_is_env = false;
if ((insn & 0xff000f10) == 0xfe000800) {
/* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
int rot = extract32(insn, 20, 2);
int size = extract32(insn, 23, 1);
int index;
if (!dc_isar_feature(aa32_vcma, s)) {
return 1;
}
if (size == 0) {
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return 1;
}
/* For fp16, rm is just Vm, and index is M. */
rm = extract32(insn, 0, 4);
index = extract32(insn, 5, 1);
} else {
/* For fp32, rm is the usual M:Vm, and index is 0. */
VFP_DREG_M(rm, insn);
index = 0;
}
data = (index << 2) | rot;
fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
: gen_helper_gvec_fcmlah_idx);
} else if ((insn & 0xffb00f00) == 0xfe200d00) {
/* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
int u = extract32(insn, 4, 1);
if (!dc_isar_feature(aa32_dp, s)) {
return 1;
}
fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
/* rm is just Vm, and index is M. */
data = extract32(insn, 5, 1); /* index */
rm = extract32(insn, 0, 4);
} else if ((insn & 0xffa00f10) == 0xfe000810) {
/* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
int is_s = extract32(insn, 20, 1);
int vm20 = extract32(insn, 0, 3);
int vm3 = extract32(insn, 3, 1);
int m = extract32(insn, 5, 1);
int index;
if (!dc_isar_feature(aa32_fhm, s)) {
return 1;
}
if (q) {
rm = vm20;
index = m * 2 + vm3;
} else {
rm = vm20 * 2 + m;
index = vm3;
}
is_long = true;
data = (index << 2) | is_s; /* is_2 == 0 */
fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
ptr_is_env = true;
} else {
return 1;
}
VFP_DREG_D(rd, insn);
if (rd & q) {
return 1;
}
if (q || !is_long) {
VFP_DREG_N(rn, insn);
if (rn & q & !is_long) {
return 1;
}
off_rn = vfp_reg_offset(1, rn);
off_rm = vfp_reg_offset(1, rm);
} else {
rn = VFP_SREG_N(insn);
off_rn = vfp_reg_offset(0, rn);
off_rm = vfp_reg_offset(0, rm);
}
if (s->fp_excp_el) {
gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
return 0;
}
if (!s->vfp_enabled) {
return 1;
}
opr_sz = (1 + q) * 8;
if (fn_gvec_ptr) {
TCGv_ptr ptr;
if (ptr_is_env) {
ptr = cpu_env;
} else {
ptr = get_fpstatus_ptr(1);
}
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
opr_sz, opr_sz, data, fn_gvec_ptr);
if (!ptr_is_env) {
tcg_temp_free_ptr(ptr);
}
} else {
tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
opr_sz, opr_sz, data, fn_gvec);
}
return 0;
}
static int disas_coproc_insn(DisasContext *s, uint32_t insn)
{
int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
@ -10941,33 +10339,21 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
/* Unconditional instructions. */
/* TODO: Perhaps merge these into one decodetree output file. */
if (disas_a32_uncond(s, insn) ||
disas_vfp_uncond(s, insn)) {
disas_vfp_uncond(s, insn) ||
disas_neon_dp(s, insn) ||
disas_neon_ls(s, insn) ||
disas_neon_shared(s, insn)) {
return;
}
/* fall back to legacy decoder */
if (((insn >> 25) & 7) == 1) {
/* NEON Data processing. */
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
goto illegal_op;
}
if (disas_neon_data_insn(s, insn)) {
goto illegal_op;
}
return;
}
if ((insn & 0x0f100000) == 0x04000000) {
/* NEON load/store. */
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
goto illegal_op;
}
if (disas_neon_ls_insn(s, insn)) {
goto illegal_op;
}
return;
}
if ((insn & 0x0e000f00) == 0x0c000100) {
if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
/* iWMMXt register transfer. */
@ -10977,18 +10363,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
}
}
}
} else if ((insn & 0x0e000a00) == 0x0c000800
&& arm_dc_feature(s, ARM_FEATURE_V8)) {
if (disas_neon_insn_3same_ext(s, insn)) {
goto illegal_op;
}
return;
} else if ((insn & 0x0f000a00) == 0x0e000800
&& arm_dc_feature(s, ARM_FEATURE_V8)) {
if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
goto illegal_op;
}
return;
}
goto illegal_op;
}
@ -11102,6 +10476,33 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
ARCH(6T2);
}
if ((insn & 0xef000000) == 0xef000000) {
/*
* T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
* transform into
* A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
*/
uint32_t a32_insn = (insn & 0xe2ffffff) |
((insn & (1 << 28)) >> 4) | (1 << 28);
if (disas_neon_dp(s, a32_insn)) {
return;
}
}
if ((insn & 0xff100000) == 0xf9000000) {
/*
* T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
* transform into
* A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
*/
uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
if (disas_neon_ls(s, a32_insn)) {
return;
}
}
/*
* TODO: Perhaps merge these into one decodetree output file.
* Note disas_vfp is written for a32 with cond field in the
@ -11109,6 +10510,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
*/
if (disas_t32(s, insn) ||
disas_vfp_uncond(s, insn) ||
disas_neon_shared(s, insn) ||
((insn >> 28) == 0xe && disas_vfp(s, insn))) {
return;
}
@ -11138,19 +10540,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
}
break;
}
if ((insn & 0xfe000a00) == 0xfc000800
&& arm_dc_feature(s, ARM_FEATURE_V8)) {
/* The Thumb2 and ARM encodings are identical. */
if (disas_neon_insn_3same_ext(s, insn)) {
goto illegal_op;
}
} else if ((insn & 0xff000a00) == 0xfe000800
&& arm_dc_feature(s, ARM_FEATURE_V8)) {
/* The Thumb2 and ARM encodings are identical. */
if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
goto illegal_op;
}
} else if (((insn >> 24) & 3) == 3) {
if (((insn >> 24) & 3) == 3) {
/* Translate into the equivalent ARM encoding. */
insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
if (disas_neon_data_insn(s, insn)) {
@ -11168,12 +10558,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
}
break;
case 12:
if ((insn & 0x01100000) == 0x01000000) {
if (disas_neon_ls_insn(s, insn)) {
goto illegal_op;
}
break;
}
goto illegal_op;
default:
illegal_op:

View file

@ -305,4 +305,30 @@ void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
#define dc_isar_feature(name, ctx) \
({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); })
/* Note that the gvec expanders operate on offsets + sizes. */
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
uint32_t, uint32_t);
typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
/* Function prototype for gen_ functions for calling Neon helpers */
typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
#endif /* TARGET_ARM_TRANSLATE_H */