target-arm queue:
* more A64 Neon instructions * fixes to reset CBAR values for A9 and A15 boards * fix accesses to PMCR register in -icount mode -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) iQIcBAABCAAGBQJTJ3GEAAoJEDwlJe0UNgzevGcP/2ftt3PRACZ9BqUh6s1xBW0/ /dqcEIildxZYxmTHDH+g5t2ueho+o+qhpmXf+lHO0C+nl86SRm/DVJj+tmuUoWdf 5BA1eOVjQnvrnmQx72/CS4NI4t0npoYf7Cserkpm9ZOdzweJy68YHZZRVpHLfldS Ba7W749EsGPnd5ZEhnplwGSIjM3ZUfixm3yJSsGnHAf6KEskkVKjUUI2lZWecT81 5f14qN6F7qk7XvH9HGOWZktiKGfaSLVXzZGsmdq6oDVTr+2ZMkoFxn7jMFm4EHtW cTDVcwN9Y6tFM2Pm7PIxzXmP9lTc5L+ghVXn9XhuY9OS7ZFD46r/sh3Lkhypq+WP SfJaPOG5zZuKkmj+hyO+08hjLxR+TJDIKr26tY62yGrteWN+SkzoJuO6Gn17uuC8 UhAqjbLuunhSlJA7oy42i7YcR84LXemMCplbqBY/v7W54ZWrxV+QgNKiLtbsIpWF tGg8R85jkjE7lV7dfaeK7N+vQjGIMwzT+g9sYyS3zsY0ubFnkIMa04Zn4gMsCheU azmyCfQOCmdN71CEEN6rbTWL3AtWw2Oss1RxK1iQu5J8+YgC2TvNsb4hE4K5KctX utvoPoVScBWZvvX2zvMv43+qz74arSTOxuBCMW9Gf0pEQA1cT0GdYzRrb3g+8CCp n3GuAoTMj2d72c2WO36I =YFAg -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140317' into staging target-arm queue: * more A64 Neon instructions * fixes to reset CBAR values for A9 and A15 boards * fix accesses to PMCR register in -icount mode # gpg: Signature made Mon 17 Mar 2014 22:04:52 GMT using RSA key ID 14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" * remotes/pmaydell/tags/pull-target-arm-20140317: (30 commits) scripts/qemu-binfmt-conf.sh: Add AArch64 registration target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate) target-arm: A64: Implement FCVTXN target-arm: A64: Implement scalar saturating narrow ops target-arm: A64: Move handle_2misc_narrow function target-arm: A64: Implement AdvSIMD reciprocal estimate insns URECPE, FRECPE softfloat: export squash_input_denormal functions target-arm: A64: Implement FCVTZS, FCVTZU in the shift-imm categories target-arm: A64: Handle saturating left shifts SQSHL, SQSHLU, UQSHL exec-all.h: Increase MAX_OP_PER_INSTR for ARM A64 decoder target-arm: A64: Implement FRINT* target-arm: A64: Implement SRI target-arm: A64: Add FRECPX (reciprocal exponent) target-arm: A64: List unsupported shift-imm opcodes target-arm: A64: Implement FCVTL target-arm: A64: Implement FCVTN target-arm: A64: Implement FCVT[NMAPZ][SU] SIMD instructions target-arm: A64: Implement SHLL, SHLL2 target-arm: A64: Implement SADDLP, UADDLP, SADALP, UADALP target-arm: A64: Saturating and narrowing shift ops ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
2dda43bacc
@ -288,7 +288,7 @@ INLINE flag extractFloat32Sign( float32 a )
|
||||
| If `a' is denormal and we are in flush-to-zero mode then set the
|
||||
| input-denormal exception and return zero. Otherwise just return the value.
|
||||
*----------------------------------------------------------------------------*/
|
||||
static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
|
||||
float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
|
||||
{
|
||||
if (STATUS(flush_inputs_to_zero)) {
|
||||
if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
|
||||
@ -473,7 +473,7 @@ INLINE flag extractFloat64Sign( float64 a )
|
||||
| If `a' is denormal and we are in flush-to-zero mode then set the
|
||||
| input-denormal exception and return zero. Otherwise just return the value.
|
||||
*----------------------------------------------------------------------------*/
|
||||
static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
|
||||
float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
|
||||
{
|
||||
if (STATUS(flush_inputs_to_zero)) {
|
||||
if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
|
||||
|
@ -143,11 +143,21 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
|
||||
unsigned long mem_size;
|
||||
DeviceState *dev;
|
||||
SysBusDevice *busdev;
|
||||
ObjectClass *cpu_oc;
|
||||
|
||||
cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, "cortex-a9");
|
||||
assert(cpu_oc);
|
||||
|
||||
for (n = 0; n < EXYNOS4210_NCPUS; n++) {
|
||||
s->cpu[n] = cpu_arm_init("cortex-a9");
|
||||
if (!s->cpu[n]) {
|
||||
fprintf(stderr, "Unable to find CPU %d definition\n", n);
|
||||
Object *cpuobj = object_new(object_class_get_name(cpu_oc));
|
||||
Error *err = NULL;
|
||||
|
||||
s->cpu[n] = ARM_CPU(cpuobj);
|
||||
object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR,
|
||||
"reset-cbar", &error_abort);
|
||||
object_property_set_bool(cpuobj, true, "realized", &err);
|
||||
if (err) {
|
||||
error_report("%s", error_get_pretty(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "hw/i2c/i2c.h"
|
||||
#include "sysemu/blockdev.h"
|
||||
#include "exec/address-spaces.h"
|
||||
#include "qemu/error-report.h"
|
||||
|
||||
#define SMP_BOOT_ADDR 0xe0000000
|
||||
#define SMP_BOOTREG_ADDR 0x10000030
|
||||
@ -49,6 +50,7 @@ static void realview_init(QEMUMachineInitArgs *args,
|
||||
{
|
||||
ARMCPU *cpu = NULL;
|
||||
CPUARMState *env;
|
||||
ObjectClass *cpu_oc;
|
||||
MemoryRegion *sysmem = get_system_memory();
|
||||
MemoryRegion *ram_lo = g_new(MemoryRegion, 1);
|
||||
MemoryRegion *ram_hi = g_new(MemoryRegion, 1);
|
||||
@ -70,12 +72,14 @@ static void realview_init(QEMUMachineInitArgs *args,
|
||||
uint32_t sys_id;
|
||||
ram_addr_t low_ram_size;
|
||||
ram_addr_t ram_size = args->ram_size;
|
||||
hwaddr periphbase = 0;
|
||||
|
||||
switch (board_type) {
|
||||
case BOARD_EB:
|
||||
break;
|
||||
case BOARD_EB_MPCORE:
|
||||
is_mpcore = 1;
|
||||
periphbase = 0x10100000;
|
||||
break;
|
||||
case BOARD_PB_A8:
|
||||
is_pb = 1;
|
||||
@ -83,16 +87,37 @@ static void realview_init(QEMUMachineInitArgs *args,
|
||||
case BOARD_PBX_A9:
|
||||
is_mpcore = 1;
|
||||
is_pb = 1;
|
||||
periphbase = 0x1f000000;
|
||||
break;
|
||||
}
|
||||
|
||||
cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, args->cpu_model);
|
||||
if (!cpu_oc) {
|
||||
fprintf(stderr, "Unable to find CPU definition\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
cpu = cpu_arm_init(args->cpu_model);
|
||||
if (!cpu) {
|
||||
fprintf(stderr, "Unable to find CPU definition\n");
|
||||
Object *cpuobj = object_new(object_class_get_name(cpu_oc));
|
||||
Error *err = NULL;
|
||||
|
||||
if (is_pb && is_mpcore) {
|
||||
object_property_set_int(cpuobj, periphbase, "reset-cbar", &err);
|
||||
if (err) {
|
||||
error_report("%s", error_get_pretty(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
object_property_set_bool(cpuobj, true, "realized", &err);
|
||||
if (err) {
|
||||
error_report("%s", error_get_pretty(err));
|
||||
exit(1);
|
||||
}
|
||||
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
|
||||
|
||||
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpuobj), ARM_CPU_IRQ);
|
||||
}
|
||||
cpu = ARM_CPU(first_cpu);
|
||||
env = &cpu->env;
|
||||
if (arm_feature(env, ARM_FEATURE_V7)) {
|
||||
if (is_mpcore) {
|
||||
@ -141,16 +166,10 @@ static void realview_init(QEMUMachineInitArgs *args,
|
||||
sysbus_mmio_map(SYS_BUS_DEVICE(sysctl), 0, 0x10000000);
|
||||
|
||||
if (is_mpcore) {
|
||||
hwaddr periphbase;
|
||||
dev = qdev_create(NULL, is_pb ? "a9mpcore_priv": "realview_mpcore");
|
||||
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
|
||||
qdev_init_nofail(dev);
|
||||
busdev = SYS_BUS_DEVICE(dev);
|
||||
if (is_pb) {
|
||||
periphbase = 0x1f000000;
|
||||
} else {
|
||||
periphbase = 0x10100000;
|
||||
}
|
||||
sysbus_mmio_map(busdev, 0, periphbase);
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
sysbus_connect_irq(busdev, n, cpu_irq[n]);
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "sysemu/blockdev.h"
|
||||
#include "hw/block/flash.h"
|
||||
#include "sysemu/device_tree.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include <libfdt.h>
|
||||
|
||||
#define VEXPRESS_BOARD_ID 0x8e0
|
||||
@ -173,6 +174,64 @@ struct VEDBoardInfo {
|
||||
DBoardInitFn *init;
|
||||
};
|
||||
|
||||
static void init_cpus(const char *cpu_model, const char *privdev,
|
||||
hwaddr periphbase, qemu_irq *pic)
|
||||
{
|
||||
ObjectClass *cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
|
||||
DeviceState *dev;
|
||||
SysBusDevice *busdev;
|
||||
int n;
|
||||
|
||||
if (!cpu_oc) {
|
||||
fprintf(stderr, "Unable to find CPU definition\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Create the actual CPUs */
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
Object *cpuobj = object_new(object_class_get_name(cpu_oc));
|
||||
Error *err = NULL;
|
||||
|
||||
object_property_set_int(cpuobj, periphbase, "reset-cbar", &err);
|
||||
if (err) {
|
||||
error_report("%s", error_get_pretty(err));
|
||||
exit(1);
|
||||
}
|
||||
object_property_set_bool(cpuobj, true, "realized", &err);
|
||||
if (err) {
|
||||
error_report("%s", error_get_pretty(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Create the private peripheral devices (including the GIC);
|
||||
* this must happen after the CPUs are created because a15mpcore_priv
|
||||
* wires itself up to the CPU's generic_timer gpio out lines.
|
||||
*/
|
||||
dev = qdev_create(NULL, privdev);
|
||||
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
|
||||
qdev_init_nofail(dev);
|
||||
busdev = SYS_BUS_DEVICE(dev);
|
||||
sysbus_mmio_map(busdev, 0, periphbase);
|
||||
|
||||
/* Interrupts [42:0] are from the motherboard;
|
||||
* [47:43] are reserved; [63:48] are daughterboard
|
||||
* peripherals. Note that some documentation numbers
|
||||
* external interrupts starting from 32 (because there
|
||||
* are internal interrupts 0..31).
|
||||
*/
|
||||
for (n = 0; n < 64; n++) {
|
||||
pic[n] = qdev_get_gpio_in(dev, n);
|
||||
}
|
||||
|
||||
/* Connect the CPUs to the GIC */
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
DeviceState *cpudev = DEVICE(qemu_get_cpu(n));
|
||||
|
||||
sysbus_connect_irq(busdev, n, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
|
||||
}
|
||||
}
|
||||
|
||||
static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
|
||||
ram_addr_t ram_size,
|
||||
const char *cpu_model,
|
||||
@ -181,25 +240,12 @@ static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
|
||||
MemoryRegion *sysmem = get_system_memory();
|
||||
MemoryRegion *ram = g_new(MemoryRegion, 1);
|
||||
MemoryRegion *lowram = g_new(MemoryRegion, 1);
|
||||
DeviceState *dev;
|
||||
SysBusDevice *busdev;
|
||||
int n;
|
||||
qemu_irq cpu_irq[4];
|
||||
ram_addr_t low_ram_size;
|
||||
|
||||
if (!cpu_model) {
|
||||
cpu_model = "cortex-a9";
|
||||
}
|
||||
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
ARMCPU *cpu = cpu_arm_init(cpu_model);
|
||||
if (!cpu) {
|
||||
fprintf(stderr, "Unable to find CPU definition\n");
|
||||
exit(1);
|
||||
}
|
||||
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
|
||||
}
|
||||
|
||||
if (ram_size > 0x40000000) {
|
||||
/* 1GB is the maximum the address space permits */
|
||||
fprintf(stderr, "vexpress-a9: cannot model more than 1GB RAM\n");
|
||||
@ -221,23 +267,7 @@ static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
|
||||
memory_region_add_subregion(sysmem, 0x60000000, ram);
|
||||
|
||||
/* 0x1e000000 A9MPCore (SCU) private memory region */
|
||||
dev = qdev_create(NULL, "a9mpcore_priv");
|
||||
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
|
||||
qdev_init_nofail(dev);
|
||||
busdev = SYS_BUS_DEVICE(dev);
|
||||
sysbus_mmio_map(busdev, 0, 0x1e000000);
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
sysbus_connect_irq(busdev, n, cpu_irq[n]);
|
||||
}
|
||||
/* Interrupts [42:0] are from the motherboard;
|
||||
* [47:43] are reserved; [63:48] are daughterboard
|
||||
* peripherals. Note that some documentation numbers
|
||||
* external interrupts starting from 32 (because the
|
||||
* A9MP has internal interrupts 0..31).
|
||||
*/
|
||||
for (n = 0; n < 64; n++) {
|
||||
pic[n] = qdev_get_gpio_in(dev, n);
|
||||
}
|
||||
init_cpus(cpu_model, "a9mpcore_priv", 0x1e000000, pic);
|
||||
|
||||
/* Daughterboard peripherals : 0x10020000 .. 0x20000000 */
|
||||
|
||||
@ -296,29 +326,14 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
|
||||
const char *cpu_model,
|
||||
qemu_irq *pic)
|
||||
{
|
||||
int n;
|
||||
MemoryRegion *sysmem = get_system_memory();
|
||||
MemoryRegion *ram = g_new(MemoryRegion, 1);
|
||||
MemoryRegion *sram = g_new(MemoryRegion, 1);
|
||||
qemu_irq cpu_irq[4];
|
||||
DeviceState *dev;
|
||||
SysBusDevice *busdev;
|
||||
|
||||
if (!cpu_model) {
|
||||
cpu_model = "cortex-a15";
|
||||
}
|
||||
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
ARMCPU *cpu;
|
||||
|
||||
cpu = cpu_arm_init(cpu_model);
|
||||
if (!cpu) {
|
||||
fprintf(stderr, "Unable to find CPU definition\n");
|
||||
exit(1);
|
||||
}
|
||||
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
|
||||
}
|
||||
|
||||
{
|
||||
/* We have to use a separate 64 bit variable here to avoid the gcc
|
||||
* "comparison is always false due to limited range of data type"
|
||||
@ -337,23 +352,7 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
|
||||
memory_region_add_subregion(sysmem, 0x80000000, ram);
|
||||
|
||||
/* 0x2c000000 A15MPCore private memory region (GIC) */
|
||||
dev = qdev_create(NULL, "a15mpcore_priv");
|
||||
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
|
||||
qdev_init_nofail(dev);
|
||||
busdev = SYS_BUS_DEVICE(dev);
|
||||
sysbus_mmio_map(busdev, 0, 0x2c000000);
|
||||
for (n = 0; n < smp_cpus; n++) {
|
||||
sysbus_connect_irq(busdev, n, cpu_irq[n]);
|
||||
}
|
||||
/* Interrupts [42:0] are from the motherboard;
|
||||
* [47:43] are reserved; [63:48] are daughterboard
|
||||
* peripherals. Note that some documentation numbers
|
||||
* external interrupts starting from 32 (because there
|
||||
* are internal interrupts 0..31).
|
||||
*/
|
||||
for (n = 0; n < 64; n++) {
|
||||
pic[n] = qdev_get_gpio_in(dev, n);
|
||||
}
|
||||
init_cpus(cpu_model, "a15mpcore_priv", 0x2c000000, pic);
|
||||
|
||||
/* A15 daughterboard peripherals: */
|
||||
|
||||
|
@ -390,6 +390,12 @@ static void machvirt_init(QEMUMachineInitArgs *args)
|
||||
if (n > 0) {
|
||||
object_property_set_bool(cpuobj, true, "start-powered-off", NULL);
|
||||
}
|
||||
|
||||
if (object_property_find(cpuobj, "reset-cbar", NULL)) {
|
||||
object_property_set_int(cpuobj, vbi->memmap[VIRT_CPUPERIPHS].base,
|
||||
"reset-cbar", &error_abort);
|
||||
}
|
||||
|
||||
object_property_set_bool(cpuobj, true, "realized", NULL);
|
||||
}
|
||||
fdt_add_cpu_nodes(vbi);
|
||||
|
@ -44,7 +44,7 @@ struct TranslationBlock;
|
||||
typedef struct TranslationBlock TranslationBlock;
|
||||
|
||||
/* XXX: make safe guess about sizes */
|
||||
#define MAX_OP_PER_INSTR 208
|
||||
#define MAX_OP_PER_INSTR 266
|
||||
|
||||
#if HOST_LONG_BITS == 32
|
||||
#define MAX_OPC_PARAM_PER_ARG 2
|
||||
|
@ -244,6 +244,13 @@ INLINE flag get_default_nan_mode(float_status *status)
|
||||
*----------------------------------------------------------------------------*/
|
||||
void float_raise( int8 flags STATUS_PARAM);
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| If `a' is denormal and we are in flush-to-zero mode then set the
|
||||
| input-denormal exception and return zero. Otherwise just return the value.
|
||||
*----------------------------------------------------------------------------*/
|
||||
float32 float32_squash_input_denormal(float32 a STATUS_PARAM);
|
||||
float64 float64_squash_input_denormal(float64 a STATUS_PARAM);
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Options to indicate which negations to perform in float*_muladd()
|
||||
| Using these differs from negating an input or output before calling
|
||||
|
@ -41,6 +41,9 @@ if [ $cpu != "arm" ] ; then
|
||||
echo ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register
|
||||
echo ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register
|
||||
fi
|
||||
if [ $cpu != "aarch64" ] ; then
|
||||
echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-aarch64:' > /proc/sys/fs/binfmt_misc/register
|
||||
fi
|
||||
if [ $cpu != "sparc" ] ; then
|
||||
echo ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register
|
||||
fi
|
||||
|
@ -60,6 +60,11 @@ uint32_t HELPER(cls32)(uint32_t x)
|
||||
return clrsb32(x);
|
||||
}
|
||||
|
||||
uint32_t HELPER(clz32)(uint32_t x)
|
||||
{
|
||||
return clz32(x);
|
||||
}
|
||||
|
||||
uint64_t HELPER(rbit64)(uint64_t x)
|
||||
{
|
||||
/* assign the correct byte position */
|
||||
@ -180,6 +185,36 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Helper function for 64 bit polynomial multiply case:
|
||||
* perform PolynomialMult(op1, op2) and return either the top or
|
||||
* bottom half of the 128 bit result.
|
||||
*/
|
||||
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
|
||||
{
|
||||
int bitnum;
|
||||
uint64_t res = 0;
|
||||
|
||||
for (bitnum = 0; bitnum < 64; bitnum++) {
|
||||
if (op1 & (1ULL << bitnum)) {
|
||||
res ^= op2 << bitnum;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
|
||||
{
|
||||
int bitnum;
|
||||
uint64_t res = 0;
|
||||
|
||||
/* bit 0 of op1 can't influence the high 64 bits at all */
|
||||
for (bitnum = 1; bitnum < 64; bitnum++) {
|
||||
if (op1 & (1ULL << bitnum)) {
|
||||
res ^= op2 >> (64 - bitnum);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* 64bit/double versions of the neon float compare functions */
|
||||
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
|
||||
{
|
||||
@ -258,3 +293,146 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
|
||||
}
|
||||
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
|
||||
}
|
||||
|
||||
/* Pairwise long add: add pairs of adjacent elements into
|
||||
* double-width elements in the result (eg _s8 is an 8x8->16 op)
|
||||
*/
|
||||
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
|
||||
{
|
||||
uint64_t nsignmask = 0x0080008000800080ULL;
|
||||
uint64_t wsignmask = 0x8000800080008000ULL;
|
||||
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
|
||||
uint64_t tmp1, tmp2;
|
||||
uint64_t res, signres;
|
||||
|
||||
/* Extract odd elements, sign extend each to a 16 bit field */
|
||||
tmp1 = a & elementmask;
|
||||
tmp1 ^= nsignmask;
|
||||
tmp1 |= wsignmask;
|
||||
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
|
||||
/* Ditto for the even elements */
|
||||
tmp2 = (a >> 8) & elementmask;
|
||||
tmp2 ^= nsignmask;
|
||||
tmp2 |= wsignmask;
|
||||
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
|
||||
|
||||
/* calculate the result by summing bits 0..14, 16..22, etc,
|
||||
* and then adjusting the sign bits 15, 23, etc manually.
|
||||
* This ensures the addition can't overflow the 16 bit field.
|
||||
*/
|
||||
signres = (tmp1 ^ tmp2) & wsignmask;
|
||||
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
|
||||
res ^= signres;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
|
||||
{
|
||||
uint64_t tmp;
|
||||
|
||||
tmp = a & 0x00ff00ff00ff00ffULL;
|
||||
tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
|
||||
{
|
||||
int32_t reslo, reshi;
|
||||
|
||||
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
|
||||
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
|
||||
|
||||
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
|
||||
{
|
||||
uint64_t tmp;
|
||||
|
||||
tmp = a & 0x0000ffff0000ffffULL;
|
||||
tmp += (a >> 16) & 0x0000ffff0000ffffULL;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
|
||||
float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
|
||||
{
|
||||
float_status *fpst = fpstp;
|
||||
uint32_t val32, sbit;
|
||||
int32_t exp;
|
||||
|
||||
if (float32_is_any_nan(a)) {
|
||||
float32 nan = a;
|
||||
if (float32_is_signaling_nan(a)) {
|
||||
float_raise(float_flag_invalid, fpst);
|
||||
nan = float32_maybe_silence_nan(a);
|
||||
}
|
||||
if (fpst->default_nan_mode) {
|
||||
nan = float32_default_nan;
|
||||
}
|
||||
return nan;
|
||||
}
|
||||
|
||||
val32 = float32_val(a);
|
||||
sbit = 0x80000000ULL & val32;
|
||||
exp = extract32(val32, 23, 8);
|
||||
|
||||
if (exp == 0) {
|
||||
return make_float32(sbit | (0xfe << 23));
|
||||
} else {
|
||||
return make_float32(sbit | (~exp & 0xff) << 23);
|
||||
}
|
||||
}
|
||||
|
||||
float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
|
||||
{
|
||||
float_status *fpst = fpstp;
|
||||
uint64_t val64, sbit;
|
||||
int64_t exp;
|
||||
|
||||
if (float64_is_any_nan(a)) {
|
||||
float64 nan = a;
|
||||
if (float64_is_signaling_nan(a)) {
|
||||
float_raise(float_flag_invalid, fpst);
|
||||
nan = float64_maybe_silence_nan(a);
|
||||
}
|
||||
if (fpst->default_nan_mode) {
|
||||
nan = float64_default_nan;
|
||||
}
|
||||
return nan;
|
||||
}
|
||||
|
||||
val64 = float64_val(a);
|
||||
sbit = 0x8000000000000000ULL & val64;
|
||||
exp = extract64(float64_val(a), 52, 11);
|
||||
|
||||
if (exp == 0) {
|
||||
return make_float64(sbit | (0x7feULL << 52));
|
||||
} else {
|
||||
return make_float64(sbit | (~exp & 0x7ffULL) << 52);
|
||||
}
|
||||
}
|
||||
|
||||
float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
|
||||
{
|
||||
/* Von Neumann rounding is implemented by using round-to-zero
|
||||
* and then setting the LSB of the result if Inexact was raised.
|
||||
*/
|
||||
float32 r;
|
||||
float_status *fpst = &env->vfp.fp_status;
|
||||
float_status tstat = *fpst;
|
||||
int exflags;
|
||||
|
||||
set_float_rounding_mode(float_round_to_zero, &tstat);
|
||||
set_float_exception_flags(0, &tstat);
|
||||
r = float64_to_float32(a, &tstat);
|
||||
r = float32_maybe_silence_nan(r);
|
||||
exflags = get_float_exception_flags(&tstat);
|
||||
if (exflags & float_flag_inexact) {
|
||||
r = make_float32(float32_val(r) | 1);
|
||||
}
|
||||
exflags |= get_float_exception_flags(fpst);
|
||||
set_float_exception_flags(exflags, fpst);
|
||||
return r;
|
||||
}
|
||||
|
@ -21,12 +21,15 @@ DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
|
||||
DEF_HELPER_FLAGS_1(clz64, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(cls64, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(cls32, TCG_CALL_NO_RWG_SE, i32, i32)
|
||||
DEF_HELPER_FLAGS_1(clz32, TCG_CALL_NO_RWG_SE, i32, i32)
|
||||
DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
|
||||
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
|
||||
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
|
||||
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
|
||||
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
|
||||
@ -36,3 +39,10 @@ DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
|
||||
|
@ -1983,6 +1983,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
|
||||
ARMCPRegInfo pmcr = {
|
||||
.name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
|
||||
.access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
|
||||
.type = ARM_CP_IO,
|
||||
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
|
||||
.accessfn = pmreg_access, .writefn = pmcr_write,
|
||||
.raw_writefn = raw_write,
|
||||
@ -4519,16 +4520,21 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
|
||||
* int->float conversions at run-time. */
|
||||
#define float64_256 make_float64(0x4070000000000000LL)
|
||||
#define float64_512 make_float64(0x4080000000000000LL)
|
||||
#define float32_maxnorm make_float32(0x7f7fffff)
|
||||
#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
|
||||
|
||||
/* The algorithm that must be used to calculate the estimate
|
||||
* is specified by the ARM ARM.
|
||||
/* Reciprocal functions
|
||||
*
|
||||
* The algorithm that must be used to calculate the estimate
|
||||
* is specified by the ARM ARM, see FPRecipEstimate()
|
||||
*/
|
||||
static float64 recip_estimate(float64 a, CPUARMState *env)
|
||||
|
||||
static float64 recip_estimate(float64 a, float_status *real_fp_status)
|
||||
{
|
||||
/* These calculations mustn't set any fp exception flags,
|
||||
* so we use a local copy of the fp_status.
|
||||
*/
|
||||
float_status dummy_status = env->vfp.standard_fp_status;
|
||||
float_status dummy_status = *real_fp_status;
|
||||
float_status *s = &dummy_status;
|
||||
/* q = (int)(a * 512.0) */
|
||||
float64 q = float64_mul(float64_512, a, s);
|
||||
@ -4549,56 +4555,178 @@ static float64 recip_estimate(float64 a, CPUARMState *env)
|
||||
return float64_div(int64_to_float64(q_int, s), float64_256, s);
|
||||
}
|
||||
|
||||
float32 HELPER(recpe_f32)(float32 a, CPUARMState *env)
|
||||
/* Common wrapper to call recip_estimate */
|
||||
static float64 call_recip_estimate(float64 num, int off, float_status *fpst)
|
||||
{
|
||||
float_status *s = &env->vfp.standard_fp_status;
|
||||
float64 f64;
|
||||
uint32_t val32 = float32_val(a);
|
||||
uint64_t val64 = float64_val(num);
|
||||
uint64_t frac = extract64(val64, 0, 52);
|
||||
int64_t exp = extract64(val64, 52, 11);
|
||||
uint64_t sbit;
|
||||
float64 scaled, estimate;
|
||||
|
||||
int result_exp;
|
||||
int a_exp = (val32 & 0x7f800000) >> 23;
|
||||
int sign = val32 & 0x80000000;
|
||||
|
||||
if (float32_is_any_nan(a)) {
|
||||
if (float32_is_signaling_nan(a)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
/* Generate the scaled number for the estimate function */
|
||||
if (exp == 0) {
|
||||
if (extract64(frac, 51, 1) == 0) {
|
||||
exp = -1;
|
||||
frac = extract64(frac, 0, 50) << 2;
|
||||
} else {
|
||||
frac = extract64(frac, 0, 51) << 1;
|
||||
}
|
||||
return float32_default_nan;
|
||||
} else if (float32_is_infinity(a)) {
|
||||
return float32_set_sign(float32_zero, float32_is_neg(a));
|
||||
} else if (float32_is_zero_or_denormal(a)) {
|
||||
if (!float32_is_zero(a)) {
|
||||
float_raise(float_flag_input_denormal, s);
|
||||
}
|
||||
float_raise(float_flag_divbyzero, s);
|
||||
return float32_set_sign(float32_infinity, float32_is_neg(a));
|
||||
} else if (a_exp >= 253) {
|
||||
float_raise(float_flag_underflow, s);
|
||||
return float32_set_sign(float32_zero, float32_is_neg(a));
|
||||
}
|
||||
|
||||
f64 = make_float64((0x3feULL << 52)
|
||||
| ((int64_t)(val32 & 0x7fffff) << 29));
|
||||
/* scaled = '0' : '01111111110' : fraction<51:44> : Zeros(44); */
|
||||
scaled = make_float64((0x3feULL << 52)
|
||||
| extract64(frac, 44, 8) << 44);
|
||||
|
||||
result_exp = 253 - a_exp;
|
||||
estimate = recip_estimate(scaled, fpst);
|
||||
|
||||
f64 = recip_estimate(f64, env);
|
||||
/* Build new result */
|
||||
val64 = float64_val(estimate);
|
||||
sbit = 0x8000000000000000ULL & val64;
|
||||
exp = off - exp;
|
||||
frac = extract64(val64, 0, 52);
|
||||
|
||||
val32 = sign
|
||||
| ((result_exp & 0xff) << 23)
|
||||
| ((float64_val(f64) >> 29) & 0x7fffff);
|
||||
return make_float32(val32);
|
||||
if (exp == 0) {
|
||||
frac = 1ULL << 51 | extract64(frac, 1, 51);
|
||||
} else if (exp == -1) {
|
||||
frac = 1ULL << 50 | extract64(frac, 2, 50);
|
||||
exp = 0;
|
||||
}
|
||||
|
||||
return make_float64(sbit | (exp << 52) | frac);
|
||||
}
|
||||
|
||||
static bool round_to_inf(float_status *fpst, bool sign_bit)
|
||||
{
|
||||
switch (fpst->float_rounding_mode) {
|
||||
case float_round_nearest_even: /* Round to Nearest */
|
||||
return true;
|
||||
case float_round_up: /* Round to +Inf */
|
||||
return !sign_bit;
|
||||
case float_round_down: /* Round to -Inf */
|
||||
return sign_bit;
|
||||
case float_round_to_zero: /* Round to Zero */
|
||||
return false;
|
||||
}
|
||||
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
float32 HELPER(recpe_f32)(float32 input, void *fpstp)
|
||||
{
|
||||
float_status *fpst = fpstp;
|
||||
float32 f32 = float32_squash_input_denormal(input, fpst);
|
||||
uint32_t f32_val = float32_val(f32);
|
||||
uint32_t f32_sbit = 0x80000000ULL & f32_val;
|
||||
int32_t f32_exp = extract32(f32_val, 23, 8);
|
||||
uint32_t f32_frac = extract32(f32_val, 0, 23);
|
||||
float64 f64, r64;
|
||||
uint64_t r64_val;
|
||||
int64_t r64_exp;
|
||||
uint64_t r64_frac;
|
||||
|
||||
if (float32_is_any_nan(f32)) {
|
||||
float32 nan = f32;
|
||||
if (float32_is_signaling_nan(f32)) {
|
||||
float_raise(float_flag_invalid, fpst);
|
||||
nan = float32_maybe_silence_nan(f32);
|
||||
}
|
||||
if (fpst->default_nan_mode) {
|
||||
nan = float32_default_nan;
|
||||
}
|
||||
return nan;
|
||||
} else if (float32_is_infinity(f32)) {
|
||||
return float32_set_sign(float32_zero, float32_is_neg(f32));
|
||||
} else if (float32_is_zero(f32)) {
|
||||
float_raise(float_flag_divbyzero, fpst);
|
||||
return float32_set_sign(float32_infinity, float32_is_neg(f32));
|
||||
} else if ((f32_val & ~(1ULL << 31)) < (1ULL << 21)) {
|
||||
/* Abs(value) < 2.0^-128 */
|
||||
float_raise(float_flag_overflow | float_flag_inexact, fpst);
|
||||
if (round_to_inf(fpst, f32_sbit)) {
|
||||
return float32_set_sign(float32_infinity, float32_is_neg(f32));
|
||||
} else {
|
||||
return float32_set_sign(float32_maxnorm, float32_is_neg(f32));
|
||||
}
|
||||
} else if (f32_exp >= 253 && fpst->flush_to_zero) {
|
||||
float_raise(float_flag_underflow, fpst);
|
||||
return float32_set_sign(float32_zero, float32_is_neg(f32));
|
||||
}
|
||||
|
||||
|
||||
f64 = make_float64(((int64_t)(f32_exp) << 52) | (int64_t)(f32_frac) << 29);
|
||||
r64 = call_recip_estimate(f64, 253, fpst);
|
||||
r64_val = float64_val(r64);
|
||||
r64_exp = extract64(r64_val, 52, 11);
|
||||
r64_frac = extract64(r64_val, 0, 52);
|
||||
|
||||
/* result = sign : result_exp<7:0> : fraction<51:29>; */
|
||||
return make_float32(f32_sbit |
|
||||
(r64_exp & 0xff) << 23 |
|
||||
extract64(r64_frac, 29, 24));
|
||||
}
|
||||
|
||||
float64 HELPER(recpe_f64)(float64 input, void *fpstp)
|
||||
{
|
||||
float_status *fpst = fpstp;
|
||||
float64 f64 = float64_squash_input_denormal(input, fpst);
|
||||
uint64_t f64_val = float64_val(f64);
|
||||
uint64_t f64_sbit = 0x8000000000000000ULL & f64_val;
|
||||
int64_t f64_exp = extract64(f64_val, 52, 11);
|
||||
float64 r64;
|
||||
uint64_t r64_val;
|
||||
int64_t r64_exp;
|
||||
uint64_t r64_frac;
|
||||
|
||||
/* Deal with any special cases */
|
||||
if (float64_is_any_nan(f64)) {
|
||||
float64 nan = f64;
|
||||
if (float64_is_signaling_nan(f64)) {
|
||||
float_raise(float_flag_invalid, fpst);
|
||||
nan = float64_maybe_silence_nan(f64);
|
||||
}
|
||||
if (fpst->default_nan_mode) {
|
||||
nan = float64_default_nan;
|
||||
}
|
||||
return nan;
|
||||
} else if (float64_is_infinity(f64)) {
|
||||
return float64_set_sign(float64_zero, float64_is_neg(f64));
|
||||
} else if (float64_is_zero(f64)) {
|
||||
float_raise(float_flag_divbyzero, fpst);
|
||||
return float64_set_sign(float64_infinity, float64_is_neg(f64));
|
||||
} else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
|
||||
/* Abs(value) < 2.0^-1024 */
|
||||
float_raise(float_flag_overflow | float_flag_inexact, fpst);
|
||||
if (round_to_inf(fpst, f64_sbit)) {
|
||||
return float64_set_sign(float64_infinity, float64_is_neg(f64));
|
||||
} else {
|
||||
return float64_set_sign(float64_maxnorm, float64_is_neg(f64));
|
||||
}
|
||||
} else if (f64_exp >= 1023 && fpst->flush_to_zero) {
|
||||
float_raise(float_flag_underflow, fpst);
|
||||
return float64_set_sign(float64_zero, float64_is_neg(f64));
|
||||
}
|
||||
|
||||
r64 = call_recip_estimate(f64, 2045, fpst);
|
||||
r64_val = float64_val(r64);
|
||||
r64_exp = extract64(r64_val, 52, 11);
|
||||
r64_frac = extract64(r64_val, 0, 52);
|
||||
|
||||
/* result = sign : result_exp<10:0> : fraction<51:0> */
|
||||
return make_float64(f64_sbit |
|
||||
((r64_exp & 0x7ff) << 52) |
|
||||
r64_frac);
|
||||
}
|
||||
|
||||
/* The algorithm that must be used to calculate the estimate
|
||||
* is specified by the ARM ARM.
|
||||
*/
|
||||
static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
|
||||
static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status)
|
||||
{
|
||||
/* These calculations mustn't set any fp exception flags,
|
||||
* so we use a local copy of the fp_status.
|
||||
*/
|
||||
float_status dummy_status = env->vfp.standard_fp_status;
|
||||
float_status dummy_status = *real_fp_status;
|
||||
float_status *s = &dummy_status;
|
||||
float64 q;
|
||||
int64_t q_int;
|
||||
@ -4645,49 +4773,64 @@ static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
|
||||
return float64_div(int64_to_float64(q_int, s), float64_256, s);
|
||||
}
|
||||
|
||||
float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
|
||||
float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
|
||||
{
|
||||
float_status *s = &env->vfp.standard_fp_status;
|
||||
float_status *s = fpstp;
|
||||
float32 f32 = float32_squash_input_denormal(input, s);
|
||||
uint32_t val = float32_val(f32);
|
||||
uint32_t f32_sbit = 0x80000000 & val;
|
||||
int32_t f32_exp = extract32(val, 23, 8);
|
||||
uint32_t f32_frac = extract32(val, 0, 23);
|
||||
uint64_t f64_frac;
|
||||
uint64_t val64;
|
||||
int result_exp;
|
||||
float64 f64;
|
||||
uint32_t val;
|
||||
uint64_t val64;
|
||||
|
||||
val = float32_val(a);
|
||||
|
||||
if (float32_is_any_nan(a)) {
|
||||
if (float32_is_signaling_nan(a)) {
|
||||
if (float32_is_any_nan(f32)) {
|
||||
float32 nan = f32;
|
||||
if (float32_is_signaling_nan(f32)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
nan = float32_maybe_silence_nan(f32);
|
||||
}
|
||||
return float32_default_nan;
|
||||
} else if (float32_is_zero_or_denormal(a)) {
|
||||
if (!float32_is_zero(a)) {
|
||||
float_raise(float_flag_input_denormal, s);
|
||||
if (s->default_nan_mode) {
|
||||
nan = float32_default_nan;
|
||||
}
|
||||
return nan;
|
||||
} else if (float32_is_zero(f32)) {
|
||||
float_raise(float_flag_divbyzero, s);
|
||||
return float32_set_sign(float32_infinity, float32_is_neg(a));
|
||||
} else if (float32_is_neg(a)) {
|
||||
return float32_set_sign(float32_infinity, float32_is_neg(f32));
|
||||
} else if (float32_is_neg(f32)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
return float32_default_nan;
|
||||
} else if (float32_is_infinity(a)) {
|
||||
} else if (float32_is_infinity(f32)) {
|
||||
return float32_zero;
|
||||
}
|
||||
|
||||
/* Normalize to a double-precision value between 0.25 and 1.0,
|
||||
/* Scale and normalize to a double-precision value between 0.25 and 1.0,
|
||||
* preserving the parity of the exponent. */
|
||||
if ((val & 0x800000) == 0) {
|
||||
f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
|
||||
| (0x3feULL << 52)
|
||||
| ((uint64_t)(val & 0x7fffff) << 29));
|
||||
} else {
|
||||
f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
|
||||
| (0x3fdULL << 52)
|
||||
| ((uint64_t)(val & 0x7fffff) << 29));
|
||||
|
||||
f64_frac = ((uint64_t) f32_frac) << 29;
|
||||
if (f32_exp == 0) {
|
||||
while (extract64(f64_frac, 51, 1) == 0) {
|
||||
f64_frac = f64_frac << 1;
|
||||
f32_exp = f32_exp-1;
|
||||
}
|
||||
f64_frac = extract64(f64_frac, 0, 51) << 1;
|
||||
}
|
||||
|
||||
result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
|
||||
if (extract64(f32_exp, 0, 1) == 0) {
|
||||
f64 = make_float64(((uint64_t) f32_sbit) << 32
|
||||
| (0x3feULL << 52)
|
||||
| f64_frac);
|
||||
} else {
|
||||
f64 = make_float64(((uint64_t) f32_sbit) << 32
|
||||
| (0x3fdULL << 52)
|
||||
| f64_frac);
|
||||
}
|
||||
|
||||
f64 = recip_sqrt_estimate(f64, env);
|
||||
result_exp = (380 - f32_exp) / 2;
|
||||
|
||||
f64 = recip_sqrt_estimate(f64, s);
|
||||
|
||||
val64 = float64_val(f64);
|
||||
|
||||
@ -4696,8 +4839,72 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
|
||||
return make_float32(val);
|
||||
}
|
||||
|
||||
uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env)
|
||||
float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
|
||||
{
|
||||
float_status *s = fpstp;
|
||||
float64 f64 = float64_squash_input_denormal(input, s);
|
||||
uint64_t val = float64_val(f64);
|
||||
uint64_t f64_sbit = 0x8000000000000000ULL & val;
|
||||
int64_t f64_exp = extract64(val, 52, 11);
|
||||
uint64_t f64_frac = extract64(val, 0, 52);
|
||||
int64_t result_exp;
|
||||
uint64_t result_frac;
|
||||
|
||||
if (float64_is_any_nan(f64)) {
|
||||
float64 nan = f64;
|
||||
if (float64_is_signaling_nan(f64)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
nan = float64_maybe_silence_nan(f64);
|
||||
}
|
||||
if (s->default_nan_mode) {
|
||||
nan = float64_default_nan;
|
||||
}
|
||||
return nan;
|
||||
} else if (float64_is_zero(f64)) {
|
||||
float_raise(float_flag_divbyzero, s);
|
||||
return float64_set_sign(float64_infinity, float64_is_neg(f64));
|
||||
} else if (float64_is_neg(f64)) {
|
||||
float_raise(float_flag_invalid, s);
|
||||
return float64_default_nan;
|
||||
} else if (float64_is_infinity(f64)) {
|
||||
return float64_zero;
|
||||
}
|
||||
|
||||
/* Scale and normalize to a double-precision value between 0.25 and 1.0,
|
||||
* preserving the parity of the exponent. */
|
||||
|
||||
if (f64_exp == 0) {
|
||||
while (extract64(f64_frac, 51, 1) == 0) {
|
||||
f64_frac = f64_frac << 1;
|
||||
f64_exp = f64_exp - 1;
|
||||
}
|
||||
f64_frac = extract64(f64_frac, 0, 51) << 1;
|
||||
}
|
||||
|
||||
if (extract64(f64_exp, 0, 1) == 0) {
|
||||
f64 = make_float64(f64_sbit
|
||||
| (0x3feULL << 52)
|
||||
| f64_frac);
|
||||
} else {
|
||||
f64 = make_float64(f64_sbit
|
||||
| (0x3fdULL << 52)
|
||||
| f64_frac);
|
||||
}
|
||||
|
||||
result_exp = (3068 - f64_exp) / 2;
|
||||
|
||||
f64 = recip_sqrt_estimate(f64, s);
|
||||
|
||||
result_frac = extract64(float64_val(f64), 0, 52);
|
||||
|
||||
return make_float64(f64_sbit |
|
||||
((result_exp & 0x7ff) << 52) |
|
||||
result_frac);
|
||||
}
|
||||
|
||||
uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
|
||||
{
|
||||
float_status *s = fpstp;
|
||||
float64 f64;
|
||||
|
||||
if ((a & 0x80000000) == 0) {
|
||||
@ -4707,13 +4914,14 @@ uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env)
|
||||
f64 = make_float64((0x3feULL << 52)
|
||||
| ((int64_t)(a & 0x7fffffff) << 21));
|
||||
|
||||
f64 = recip_estimate (f64, env);
|
||||
f64 = recip_estimate(f64, s);
|
||||
|
||||
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
|
||||
}
|
||||
|
||||
uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
|
||||
uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
|
||||
{
|
||||
float_status *fpst = fpstp;
|
||||
float64 f64;
|
||||
|
||||
if ((a & 0xc0000000) == 0) {
|
||||
@ -4728,7 +4936,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
|
||||
| ((uint64_t)(a & 0x3fffffff) << 22));
|
||||
}
|
||||
|
||||
f64 = recip_sqrt_estimate(f64, env);
|
||||
f64 = recip_sqrt_estimate(f64, fpst);
|
||||
|
||||
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
|
||||
}
|
||||
|
@ -167,10 +167,12 @@ DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
|
||||
|
||||
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
|
||||
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
|
||||
DEF_HELPER_2(recpe_f32, f32, f32, env)
|
||||
DEF_HELPER_2(rsqrte_f32, f32, f32, env)
|
||||
DEF_HELPER_2(recpe_u32, i32, i32, env)
|
||||
DEF_HELPER_2(rsqrte_u32, i32, i32, env)
|
||||
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
|
||||
DEF_HELPER_2(recpe_u32, i32, i32, ptr)
|
||||
DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
|
||||
DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
|
||||
|
||||
DEF_HELPER_3(shl_cc, i32, env, i32, i32)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6682,17 +6682,33 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
|
||||
break;
|
||||
}
|
||||
case NEON_2RM_VRECPE:
|
||||
gen_helper_recpe_u32(tmp, tmp, cpu_env);
|
||||
{
|
||||
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
|
||||
gen_helper_recpe_u32(tmp, tmp, fpstatus);
|
||||
tcg_temp_free_ptr(fpstatus);
|
||||
break;
|
||||
}
|
||||
case NEON_2RM_VRSQRTE:
|
||||
gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
|
||||
{
|
||||
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
|
||||
gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
|
||||
tcg_temp_free_ptr(fpstatus);
|
||||
break;
|
||||
}
|
||||
case NEON_2RM_VRECPE_F:
|
||||
gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
|
||||
{
|
||||
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
|
||||
gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
|
||||
tcg_temp_free_ptr(fpstatus);
|
||||
break;
|
||||
}
|
||||
case NEON_2RM_VRSQRTE_F:
|
||||
gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
|
||||
{
|
||||
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
|
||||
gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
|
||||
tcg_temp_free_ptr(fpstatus);
|
||||
break;
|
||||
}
|
||||
case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
|
||||
gen_vfp_sito(0, 1);
|
||||
break;
|
||||
@ -10654,6 +10670,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
|
||||
dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
|
||||
dc->cp_regs = cpu->cp_regs;
|
||||
dc->current_pl = arm_current_pl(env);
|
||||
dc->features = env->features;
|
||||
|
||||
cpu_F0s = tcg_temp_new_i32();
|
||||
cpu_F1s = tcg_temp_new_i32();
|
||||
|
@ -26,6 +26,7 @@ typedef struct DisasContext {
|
||||
int aarch64;
|
||||
int current_pl;
|
||||
GHashTable *cp_regs;
|
||||
uint64_t features; /* CPU features bits */
|
||||
#define TMP_A64_MAX 16
|
||||
int tmp_a64_count;
|
||||
TCGv_i64 tmp_a64[TMP_A64_MAX];
|
||||
@ -33,6 +34,11 @@ typedef struct DisasContext {
|
||||
|
||||
extern TCGv_ptr cpu_env;
|
||||
|
||||
static inline int arm_dc_feature(DisasContext *dc, int feature)
|
||||
{
|
||||
return (dc->features & (1ULL << feature)) != 0;
|
||||
}
|
||||
|
||||
/* target-specific extra values for is_jmp */
|
||||
/* These instructions trap after executing, so the A32/T32 decoder must
|
||||
* defer them until after the conditional execution state has been updated.
|
||||
|
Loading…
Reference in New Issue
Block a user