target-arm queue:

* target/arm: Fix Neon emulation bugs on big-endian hosts
  * target/arm: fix handling of HCR.FB
  * target/arm: fix LORID_EL1 access check
  * disas/capstone: Fix monitor disassembly of >32 bytes
  * hw/arm/smmuv3: Fix potential integer overflow (CID 1432363)
  * hw/arm/boot: fix SVE for EL3 direct kernel boot
  * hw/display/omap_lcdc: Fix potential NULL pointer dereference
  * hw/display/exynos4210_fimd: Fix potential NULL pointer dereference
  * target/arm: Get correct MMU index for other-security-state
  * configure: Test that gio libs from pkg-config work
  * hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work
  * docs: Fix building with Sphinx 3
  * tests/qtest/npcm7xx_rng-test: Disable randomness tests
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAl+gPSwZHHBldGVyLm1h
 eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3rBwD/9kNodk0LilJEbE/UVL5niv
 EnLo0xo+qFx8jPR19VVG6Cp3mBwwImV7MVebAuuh6cgzdyofKwpd03h/XMwIOY0T
 gHlfk/npJnob/7bambBU5UTAZnOHj8EnuCwTKq3AuRROdi35p4OqDZTxAYNNJNQa
 1dRRTEODxuPRi/bmwuYLp1esrjXlJa5KSlv+3gjunVG+uEJ6ygHJOZlgJ22704D/
 2IB3rrtwx/oYBsaQCd9TQ/uIVgkvfRo1feQp5/ukeb4nYDNWtjkk5usPYcGh7h5P
 dCgneinXvyTqZXgk9FpT25rVrp01IBZXNkGjEy/HMmpib6ABsKGywBQfif4ZQXc7
 KlO+A8yCvAvRuJcjsVMV71z9j0MIu5eU9aOW7Oqu/ORMnRSlEionCypPaO3J/kF2
 e6XoGQZJaziIo5hg8hxyALcKKtpwgd2ckAdNxQhw3vsNA7uDe1acs6BzJbiT1J1o
 05zZs6Xy4OheHkFGOKoZyVAmSCsfwqgaHspl62owCRrNcT8URLzGpjEal+l4+FHN
 8kMs012aiOSkDAWldPH2hjt0sYV/F4bVDID4PZj5Cwrz9lXQyq8e8Lw6WUvqXEzf
 Kgl/XVu9mZx4wMWLNg78cneXbM+RQNmJsWMMA/qZn5Lh2p/73a1jA9vjRBl406Tt
 RW00VEbywkOYtj1EZKdgqw==
 =PdV9
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201102' into staging

target-arm queue:
 * target/arm: Fix Neon emulation bugs on big-endian hosts
 * target/arm: fix handling of HCR.FB
 * target/arm: fix LORID_EL1 access check
 * disas/capstone: Fix monitor disassembly of >32 bytes
 * hw/arm/smmuv3: Fix potential integer overflow (CID 1432363)
 * hw/arm/boot: fix SVE for EL3 direct kernel boot
 * hw/display/omap_lcdc: Fix potential NULL pointer dereference
 * hw/display/exynos4210_fimd: Fix potential NULL pointer dereference
 * target/arm: Get correct MMU index for other-security-state
 * configure: Test that gio libs from pkg-config work
 * hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work
 * docs: Fix building with Sphinx 3
 * tests/qtest/npcm7xx_rng-test: Disable randomness tests

# gpg: Signature made Mon 02 Nov 2020 17:09:00 GMT
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20201102: (26 commits)
  tests/qtest/npcm7xx_rng-test: Disable randomness tests
  qemu-option-trace.rst.inc: Don't use option:: markup
  scripts/kerneldoc: For Sphinx 3 use c:macro for macros with arguments
  hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work
  configure: Test that gio libs from pkg-config work
  target/arm: Get correct MMU index for other-security-state
  hw/display/exynos4210_fimd: Fix potential NULL pointer dereference
  hw/display/omap_lcdc: Fix potential NULL pointer dereference
  hw/arm/boot: fix SVE for EL3 direct kernel boot
  hw/arm/smmuv3: Fix potential integer overflow (CID 1432363)
  disas/capstone: Fix monitor disassembly of >32 bytes
  target/arm: fix LORID_EL1 access check
  target/arm: fix handling of HCR.FB
  target/arm: Fix VUDOT/VSDOT (scalar) on big-endian hosts
  target/arm: Fix float16 pairwise Neon ops on big-endian hosts
  target/arm: Improve do_prewiden_3d
  target/arm: Simplify do_long_3d and do_2scalar_long
  target/arm: Rename neon_load_reg64 to vfp_load_reg64
  target/arm: Add read/write_neon_element64
  target/arm: Rename neon_load_reg32 to vfp_load_reg32
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-11-03 10:38:05 +00:00
commit c7a7a877b7
17 changed files with 588 additions and 493 deletions

10
configure vendored
View File

@ -3489,13 +3489,21 @@ if test "$static" = yes && test "$mingw32" = yes; then
fi
if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then
gio=yes
gio_cflags=$($pkg_config --cflags gio-2.0)
gio_libs=$($pkg_config --libs gio-2.0)
gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0)
if [ ! -x "$gdbus_codegen" ]; then
gdbus_codegen=
fi
# Check that the libraries actually work -- Ubuntu 18.04 ships
# with pkg-config --static --libs data for gio-2.0 that is missing
# -lblkid and will give a link error.
write_c_skeleton
if compile_prog "" "gio_libs" ; then
gio=yes
else
gio=no
fi
else
gio=no
fi

View File

@ -286,7 +286,7 @@ bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
/* Make certain that we can make progress. */
assert(tsize != 0);
info->read_memory_func(pc, cap_buf + csize, tsize, info);
info->read_memory_func(pc + csize, cap_buf + csize, tsize, info);
csize += tsize;
if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {

View File

@ -1,7 +1,7 @@
Specify tracing options.
.. option:: [enable=]PATTERN
``[enable=]PATTERN``
Immediately enable events matching *PATTERN*
(either event name or a globbing pattern). This option is only
@ -11,7 +11,7 @@ Specify tracing options.
Use :option:`-trace help` to print a list of names of trace points.
.. option:: events=FILE
``events=FILE``
Immediately enable events listed in *FILE*.
The file must contain one event name (as listed in the ``trace-events-all``
@ -19,7 +19,7 @@ Specify tracing options.
available if QEMU has been compiled with the ``simple``, ``log`` or
``ftrace`` tracing backend.
.. option:: file=FILE
``file=FILE``
Log output traces to *FILE*.
This option is only available if QEMU has been compiled with

View File

@ -742,6 +742,9 @@ static void do_cpu_reset(void *opaque)
if (cpu_isar_feature(aa64_mte, cpu)) {
env->cp15.scr_el3 |= SCR_ATA;
}
if (cpu_isar_feature(aa64_sve, cpu)) {
env->cp15.cptr_el[3] |= CPTR_EZ;
}
/* AArch64 kernels never boot in secure mode */
assert(!info->secure_boot);
/* This hook is only supported for AArch32 currently:

View File

@ -17,6 +17,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/bitops.h"
#include "hw/irq.h"
#include "hw/sysbus.h"
#include "migration/vmstate.h"
@ -864,7 +865,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
scale = CMD_SCALE(cmd);
num = CMD_NUM(cmd);
ttl = CMD_TTL(cmd);
num_pages = (num + 1) * (1 << (scale));
num_pages = (num + 1) * BIT_ULL(scale);
}
if (type == SMMU_CMD_TLBI_NH_VA) {

View File

@ -1275,12 +1275,14 @@ static void exynos4210_fimd_update(void *opaque)
bool blend = false;
uint8_t *host_fb_addr;
bool is_dirty = false;
const int global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
int global_width;
if (!s || !s->console || !s->enabled ||
surface_bits_per_pixel(qemu_console_surface(s->console)) == 0) {
return;
}
global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
exynos4210_update_resolution(s);
surface = qemu_console_surface(s->console);

View File

@ -78,14 +78,18 @@ static void omap_lcd_interrupts(struct omap_lcd_panel_s *s)
static void omap_update_display(void *opaque)
{
struct omap_lcd_panel_s *omap_lcd = (struct omap_lcd_panel_s *) opaque;
DisplaySurface *surface = qemu_console_surface(omap_lcd->con);
DisplaySurface *surface;
draw_line_func draw_line;
int size, height, first, last;
int width, linesize, step, bpp, frame_offset;
hwaddr frame_base;
if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable ||
!surface_bits_per_pixel(surface)) {
if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable) {
return;
}
surface = qemu_console_surface(omap_lcd->con);
if (!surface_bits_per_pixel(surface)) {
return;
}

View File

@ -399,6 +399,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
int irqlevel = 0;
int fiqlevel = 0;
int maintlevel = 0;
ARMCPU *cpu = ARM_CPU(cs->cpu);
idx = hppvi_index(cs);
trace_gicv3_cpuif_virt_update(gicv3_redist_affid(cs), idx);
@ -424,7 +425,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
qemu_set_irq(cs->parent_vfiq, fiqlevel);
qemu_set_irq(cs->parent_virq, irqlevel);
qemu_set_irq(cs->maintenance_irq, maintlevel);
qemu_set_irq(cpu->gicv3_maintenance_interrupt, maintlevel);
}
static uint64_t icv_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
@ -2624,8 +2625,6 @@ void gicv3_init_cpuif(GICv3State *s)
&& cpu->gic_num_lrs) {
int j;
cs->maintenance_irq = cpu->gicv3_maintenance_interrupt;
cs->num_list_regs = cpu->gic_num_lrs;
cs->vpribits = cpu->gic_vpribits;
cs->vprebits = cpu->gic_vprebits;

View File

@ -153,7 +153,6 @@ struct GICv3CPUState {
qemu_irq parent_fiq;
qemu_irq parent_virq;
qemu_irq parent_vfiq;
qemu_irq maintenance_irq;
/* Redistributor */
uint32_t level; /* Current IRQ level */

View File

@ -839,7 +839,23 @@ sub output_function_rst(%) {
output_highlight_rst($args{'purpose'});
$start = "\n\n**Syntax**\n\n ``";
} else {
print ".. c:function:: ";
if ((split(/\./, $sphinx_version))[0] >= 3) {
# Sphinx 3 and later distinguish macros and functions and
# complain if you use c:function with something that's not
# syntactically valid as a function declaration.
# We assume that anything with a return type is a function
# and anything without is a macro.
if ($args{'functiontype'} ne "") {
print ".. c:function:: ";
} else {
print ".. c:macro:: ";
}
} else {
# Older Sphinx don't support documenting macros that take
# arguments with c:macro, and don't complain about the use
# of c:function for this.
print ".. c:function:: ";
}
}
if ($args{'functiontype'} ne "") {
$start .= $args{'functiontype'} . " " . $args{'function'} . " (";

View File

@ -731,13 +731,12 @@ static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
/*
* Non-IS variants of TLB operations are upgraded to
* IS versions if we are at NS EL1 and HCR_EL2.FB is set to
* IS versions if we are at EL1 and HCR_EL2.FB is effectively set to
* force broadcast of these operations.
*/
static bool tlb_force_broadcast(CPUARMState *env)
{
return (env->cp15.hcr_el2 & HCR_FB) &&
arm_current_el(env) == 1 && arm_is_secure_below_el3(env);
return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB);
}
static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -6680,9 +6679,10 @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
#endif
/* Shared logic between LORID and the rest of the LOR* registers.
* Secure state has already been delt with.
* Secure state exclusion has already been dealt with.
*/
static CPAccessResult access_lor_ns(CPUARMState *env)
static CPAccessResult access_lor_ns(CPUARMState *env,
const ARMCPRegInfo *ri, bool isread)
{
int el = arm_current_el(env);
@ -6695,16 +6695,6 @@ static CPAccessResult access_lor_ns(CPUARMState *env)
return CP_ACCESS_OK;
}
static CPAccessResult access_lorid(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
if (arm_is_secure_below_el3(env)) {
/* Access ok in secure mode. */
return CP_ACCESS_OK;
}
return access_lor_ns(env);
}
static CPAccessResult access_lor_other(CPUARMState *env,
const ARMCPRegInfo *ri, bool isread)
{
@ -6712,7 +6702,7 @@ static CPAccessResult access_lor_other(CPUARMState *env,
/* Access denied in secure mode. */
return CP_ACCESS_TRAP;
}
return access_lor_ns(env);
return access_lor_ns(env, ri, isread);
}
/*
@ -6739,7 +6729,7 @@ static const ARMCPRegInfo lor_reginfo[] = {
.type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "LORID_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 7,
.access = PL1_R, .accessfn = access_lorid,
.access = PL1_R, .accessfn = access_lor_ns,
.type = ARM_CP_CONST, .resetvalue = 0 },
REGINFO_SENTINEL
};

View File

@ -2719,7 +2719,8 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
/* Return the MMU index for a v7M CPU in the specified security state */
ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
{
bool priv = arm_current_el(env) != 0;
bool priv = arm_v7m_is_handler_mode(env) ||
!(env->v7m.control[secstate] & 1);
return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
}

View File

@ -60,25 +60,6 @@ static inline int neon_3same_fp_size(DisasContext *s, int x)
#include "decode-neon-ls.c.inc"
#include "decode-neon-shared.c.inc"
/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
* where 0 is the least significant end of the register.
*/
static inline long
neon_element_offset(int reg, int element, MemOp size)
{
int element_size = 1 << size;
int ofs = element * element_size;
#ifdef HOST_WORDS_BIGENDIAN
/* Calculate the offset assuming fully little-endian,
* then XOR to account for the order of the 8-byte units.
*/
if (element_size < 8) {
ofs ^= 8 - element_size;
}
#endif
return neon_reg_offset(reg, 0) + ofs;
}
static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
{
long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
@ -585,12 +566,12 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
* We cannot write 16 bytes at once because the
* destination is unaligned.
*/
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
8, 8, tmp);
tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
neon_reg_offset(vd, 0), 8, 8);
tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
neon_full_reg_offset(vd), 8, 8);
} else {
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
vec_size, vec_size, tmp);
}
tcg_gen_addi_i32(addr, addr, 1 << size);
@ -691,9 +672,9 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
{
int vec_size = a->q ? 16 : 8;
int rd_ofs = neon_reg_offset(a->vd, 0);
int rn_ofs = neon_reg_offset(a->vn, 0);
int rm_ofs = neon_reg_offset(a->vm, 0);
int rd_ofs = neon_full_reg_offset(a->vd);
int rn_ofs = neon_full_reg_offset(a->vn);
int rm_ofs = neon_full_reg_offset(a->vm);
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@ -975,18 +956,24 @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
* early. Since Q is 0 there are always just two passes, so instead
* of a complicated loop over each pass we just unroll.
*/
tmp = neon_load_reg(a->vn, 0);
tmp2 = neon_load_reg(a->vn, 1);
tmp = tcg_temp_new_i32();
tmp2 = tcg_temp_new_i32();
tmp3 = tcg_temp_new_i32();
read_neon_element32(tmp, a->vn, 0, MO_32);
read_neon_element32(tmp2, a->vn, 1, MO_32);
fn(tmp, tmp, tmp2);
tcg_temp_free_i32(tmp2);
tmp3 = neon_load_reg(a->vm, 0);
tmp2 = neon_load_reg(a->vm, 1);
read_neon_element32(tmp3, a->vm, 0, MO_32);
read_neon_element32(tmp2, a->vm, 1, MO_32);
fn(tmp3, tmp3, tmp2);
tcg_temp_free_i32(tmp2);
neon_store_reg(a->vd, 0, tmp);
neon_store_reg(a->vd, 1, tmp3);
write_neon_element32(tmp, a->vd, 0, MO_32);
write_neon_element32(tmp3, a->vd, 1, MO_32);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(tmp3);
return true;
}
@ -1177,8 +1164,8 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
{
/* Handle a 2-reg-shift insn which can be vectorized. */
int vec_size = a->q ? 16 : 8;
int rd_ofs = neon_reg_offset(a->vd, 0);
int rm_ofs = neon_reg_offset(a->vm, 0);
int rd_ofs = neon_full_reg_offset(a->vd);
int rm_ofs = neon_full_reg_offset(a->vm);
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@ -1278,9 +1265,9 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
for (pass = 0; pass < a->q + 1; pass++) {
TCGv_i64 tmp = tcg_temp_new_i64();
neon_load_reg64(tmp, a->vm + pass);
read_neon_element64(tmp, a->vm, pass, MO_64);
fn(tmp, cpu_env, tmp, constimm);
neon_store_reg64(tmp, a->vd + pass);
write_neon_element64(tmp, a->vd, pass, MO_64);
tcg_temp_free_i64(tmp);
}
tcg_temp_free_i64(constimm);
@ -1294,7 +1281,7 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
* 2-reg-and-shift operations, size < 3 case, where the
* helper needs to be passed cpu_env.
*/
TCGv_i32 constimm;
TCGv_i32 constimm, tmp;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@ -1320,12 +1307,14 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
* by immediate using the variable shift operations.
*/
constimm = tcg_const_i32(dup_const(a->size, a->shift));
tmp = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
TCGv_i32 tmp = neon_load_reg(a->vm, pass);
read_neon_element32(tmp, a->vm, pass, MO_32);
fn(tmp, cpu_env, tmp, constimm);
neon_store_reg(a->vd, pass, tmp);
write_neon_element32(tmp, a->vd, pass, MO_32);
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(constimm);
return true;
}
@ -1383,21 +1372,21 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
constimm = tcg_const_i64(-a->shift);
rm1 = tcg_temp_new_i64();
rm2 = tcg_temp_new_i64();
rd = tcg_temp_new_i32();
/* Load both inputs first to avoid potential overwrite if rm == rd */
neon_load_reg64(rm1, a->vm);
neon_load_reg64(rm2, a->vm + 1);
read_neon_element64(rm1, a->vm, 0, MO_64);
read_neon_element64(rm2, a->vm, 1, MO_64);
shiftfn(rm1, rm1, constimm);
rd = tcg_temp_new_i32();
narrowfn(rd, cpu_env, rm1);
neon_store_reg(a->vd, 0, rd);
write_neon_element32(rd, a->vd, 0, MO_32);
shiftfn(rm2, rm2, constimm);
rd = tcg_temp_new_i32();
narrowfn(rd, cpu_env, rm2);
neon_store_reg(a->vd, 1, rd);
write_neon_element32(rd, a->vd, 1, MO_32);
tcg_temp_free_i32(rd);
tcg_temp_free_i64(rm1);
tcg_temp_free_i64(rm2);
tcg_temp_free_i64(constimm);
@ -1447,10 +1436,14 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
constimm = tcg_const_i32(imm);
/* Load all inputs first to avoid potential overwrite */
rm1 = neon_load_reg(a->vm, 0);
rm2 = neon_load_reg(a->vm, 1);
rm3 = neon_load_reg(a->vm + 1, 0);
rm4 = neon_load_reg(a->vm + 1, 1);
rm1 = tcg_temp_new_i32();
rm2 = tcg_temp_new_i32();
rm3 = tcg_temp_new_i32();
rm4 = tcg_temp_new_i32();
read_neon_element32(rm1, a->vm, 0, MO_32);
read_neon_element32(rm2, a->vm, 1, MO_32);
read_neon_element32(rm3, a->vm, 2, MO_32);
read_neon_element32(rm4, a->vm, 3, MO_32);
rtmp = tcg_temp_new_i64();
shiftfn(rm1, rm1, constimm);
@ -1460,7 +1453,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
tcg_temp_free_i32(rm2);
narrowfn(rm1, cpu_env, rtmp);
neon_store_reg(a->vd, 0, rm1);
write_neon_element32(rm1, a->vd, 0, MO_32);
tcg_temp_free_i32(rm1);
shiftfn(rm3, rm3, constimm);
shiftfn(rm4, rm4, constimm);
@ -1471,7 +1465,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
narrowfn(rm3, cpu_env, rtmp);
tcg_temp_free_i64(rtmp);
neon_store_reg(a->vd, 1, rm3);
write_neon_element32(rm3, a->vd, 1, MO_32);
tcg_temp_free_i32(rm3);
return true;
}
@ -1572,8 +1567,10 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
widen_mask = dup_const(a->size + 1, widen_mask);
}
rm0 = neon_load_reg(a->vm, 0);
rm1 = neon_load_reg(a->vm, 1);
rm0 = tcg_temp_new_i32();
rm1 = tcg_temp_new_i32();
read_neon_element32(rm0, a->vm, 0, MO_32);
read_neon_element32(rm1, a->vm, 1, MO_32);
tmp = tcg_temp_new_i64();
widenfn(tmp, rm0);
@ -1582,7 +1579,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
}
neon_store_reg64(tmp, a->vd);
write_neon_element64(tmp, a->vd, 0, MO_64);
widenfn(tmp, rm1);
tcg_temp_free_i32(rm1);
@ -1590,7 +1587,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
}
neon_store_reg64(tmp, a->vd + 1);
write_neon_element64(tmp, a->vd, 1, MO_64);
tcg_temp_free_i64(tmp);
return true;
}
@ -1620,8 +1617,8 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
{
/* FP operations in 2-reg-and-shift group */
int vec_size = a->q ? 16 : 8;
int rd_ofs = neon_reg_offset(a->vd, 0);
int rm_ofs = neon_reg_offset(a->vm, 0);
int rd_ofs = neon_full_reg_offset(a->vd);
int rm_ofs = neon_full_reg_offset(a->vm);
TCGv_ptr fpst;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@ -1756,7 +1753,7 @@ static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
return true;
}
reg_ofs = neon_reg_offset(a->vd, 0);
reg_ofs = neon_full_reg_offset(a->vd);
vec_size = a->q ? 16 : 8;
imm = asimd_imm_const(a->imm, a->cmode, a->op);
@ -1791,11 +1788,10 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
NeonGenWidenFn *widenfn,
NeonGenTwo64OpFn *opfn,
bool src1_wide)
int src1_mop, int src2_mop)
{
/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
TCGv_i64 rn0_64, rn1_64, rm_64;
TCGv_i32 rm;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@ -1807,12 +1803,12 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
return false;
}
if (!widenfn || !opfn) {
if (!opfn) {
/* size == 3 case, which is an entirely different insn group */
return false;
}
if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
return false;
}
@ -1824,38 +1820,50 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
rn1_64 = tcg_temp_new_i64();
rm_64 = tcg_temp_new_i64();
if (src1_wide) {
neon_load_reg64(rn0_64, a->vn);
if (src1_mop >= 0) {
read_neon_element64(rn0_64, a->vn, 0, src1_mop);
} else {
TCGv_i32 tmp = neon_load_reg(a->vn, 0);
TCGv_i32 tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vn, 0, MO_32);
widenfn(rn0_64, tmp);
tcg_temp_free_i32(tmp);
}
rm = neon_load_reg(a->vm, 0);
if (src2_mop >= 0) {
read_neon_element64(rm_64, a->vm, 0, src2_mop);
} else {
TCGv_i32 tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vm, 0, MO_32);
widenfn(rm_64, tmp);
tcg_temp_free_i32(tmp);
}
widenfn(rm_64, rm);
tcg_temp_free_i32(rm);
opfn(rn0_64, rn0_64, rm_64);
/*
* Load second pass inputs before storing the first pass result, to
* avoid incorrect results if a narrow input overlaps with the result.
*/
if (src1_wide) {
neon_load_reg64(rn1_64, a->vn + 1);
if (src1_mop >= 0) {
read_neon_element64(rn1_64, a->vn, 1, src1_mop);
} else {
TCGv_i32 tmp = neon_load_reg(a->vn, 1);
TCGv_i32 tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vn, 1, MO_32);
widenfn(rn1_64, tmp);
tcg_temp_free_i32(tmp);
}
rm = neon_load_reg(a->vm, 1);
if (src2_mop >= 0) {
read_neon_element64(rm_64, a->vm, 1, src2_mop);
} else {
TCGv_i32 tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vm, 1, MO_32);
widenfn(rm_64, tmp);
tcg_temp_free_i32(tmp);
}
neon_store_reg64(rn0_64, a->vd);
write_neon_element64(rn0_64, a->vd, 0, MO_64);
widenfn(rm_64, rm);
tcg_temp_free_i32(rm);
opfn(rn1_64, rn1_64, rm_64);
neon_store_reg64(rn1_64, a->vd + 1);
write_neon_element64(rn1_64, a->vd, 1, MO_64);
tcg_temp_free_i64(rn0_64);
tcg_temp_free_i64(rn1_64);
@ -1864,14 +1872,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
return true;
}
#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
{ \
static NeonGenWidenFn * const widenfn[] = { \
gen_helper_neon_widen_##S##8, \
gen_helper_neon_widen_##S##16, \
tcg_gen_##EXT##_i32_i64, \
NULL, \
NULL, NULL, \
}; \
static NeonGenTwo64OpFn * const addfn[] = { \
gen_helper_neon_##OP##l_u16, \
@ -1879,18 +1886,20 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
tcg_gen_##OP##_i64, \
NULL, \
}; \
return do_prewiden_3d(s, a, widenfn[a->size], \
addfn[a->size], SRC1WIDE); \
int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
SRC1WIDE ? MO_Q : narrow_mop, \
narrow_mop); \
}
DO_PREWIDEN(VADDL_S, s, ext, add, false)
DO_PREWIDEN(VADDL_U, u, extu, add, false)
DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
DO_PREWIDEN(VADDW_S, s, ext, add, true)
DO_PREWIDEN(VADDW_U, u, extu, add, true)
DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
DO_PREWIDEN(VADDL_U, u, add, false, 0)
DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
DO_PREWIDEN(VADDW_U, u, add, true, 0)
DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
@ -1927,23 +1936,25 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
rd0 = tcg_temp_new_i32();
rd1 = tcg_temp_new_i32();
neon_load_reg64(rn_64, a->vn);
neon_load_reg64(rm_64, a->vm);
read_neon_element64(rn_64, a->vn, 0, MO_64);
read_neon_element64(rm_64, a->vm, 0, MO_64);
opfn(rn_64, rn_64, rm_64);
narrowfn(rd0, rn_64);
neon_load_reg64(rn_64, a->vn + 1);
neon_load_reg64(rm_64, a->vm + 1);
read_neon_element64(rn_64, a->vn, 1, MO_64);
read_neon_element64(rm_64, a->vm, 1, MO_64);
opfn(rn_64, rn_64, rm_64);
narrowfn(rd1, rn_64);
neon_store_reg(a->vd, 0, rd0);
neon_store_reg(a->vd, 1, rd1);
write_neon_element32(rd0, a->vd, 0, MO_32);
write_neon_element32(rd1, a->vd, 1, MO_32);
tcg_temp_free_i32(rd0);
tcg_temp_free_i32(rd1);
tcg_temp_free_i64(rn_64);
tcg_temp_free_i64(rm_64);
@ -2018,14 +2029,14 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
rd0 = tcg_temp_new_i64();
rd1 = tcg_temp_new_i64();
rn = neon_load_reg(a->vn, 0);
rm = neon_load_reg(a->vm, 0);
rn = tcg_temp_new_i32();
rm = tcg_temp_new_i32();
read_neon_element32(rn, a->vn, 0, MO_32);
read_neon_element32(rm, a->vm, 0, MO_32);
opfn(rd0, rn, rm);
tcg_temp_free_i32(rn);
tcg_temp_free_i32(rm);
rn = neon_load_reg(a->vn, 1);
rm = neon_load_reg(a->vm, 1);
read_neon_element32(rn, a->vn, 1, MO_32);
read_neon_element32(rm, a->vm, 1, MO_32);
opfn(rd1, rn, rm);
tcg_temp_free_i32(rn);
tcg_temp_free_i32(rm);
@ -2033,18 +2044,15 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
/* Don't store results until after all loads: they might overlap */
if (accfn) {
tmp = tcg_temp_new_i64();
neon_load_reg64(tmp, a->vd);
accfn(tmp, tmp, rd0);
neon_store_reg64(tmp, a->vd);
neon_load_reg64(tmp, a->vd + 1);
accfn(tmp, tmp, rd1);
neon_store_reg64(tmp, a->vd + 1);
read_neon_element64(tmp, a->vd, 0, MO_64);
accfn(rd0, tmp, rd0);
read_neon_element64(tmp, a->vd, 1, MO_64);
accfn(rd1, tmp, rd1);
tcg_temp_free_i64(tmp);
} else {
neon_store_reg64(rd0, a->vd);
neon_store_reg64(rd1, a->vd + 1);
}
write_neon_element64(rd0, a->vd, 0, MO_64);
write_neon_element64(rd1, a->vd, 1, MO_64);
tcg_temp_free_i64(rd0);
tcg_temp_free_i64(rd1);
@ -2300,9 +2308,9 @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
return true;
}
tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
neon_reg_offset(a->vn, 0),
neon_reg_offset(a->vm, 0),
tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
neon_full_reg_offset(a->vn),
neon_full_reg_offset(a->vm),
16, 16, 0, fn_gvec);
return true;
}
@ -2327,16 +2335,16 @@ static void gen_neon_dup_high16(TCGv_i32 var)
static inline TCGv_i32 neon_get_scalar(int size, int reg)
{
TCGv_i32 tmp;
if (size == 1) {
tmp = neon_load_reg(reg & 7, reg >> 4);
TCGv_i32 tmp = tcg_temp_new_i32();
if (size == MO_16) {
read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
if (reg & 8) {
gen_neon_dup_high16(tmp);
} else {
gen_neon_dup_low16(tmp);
}
} else {
tmp = neon_load_reg(reg & 15, reg >> 4);
read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
}
return tmp;
}
@ -2350,7 +2358,7 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
* perform an accumulation operation of that result into the
* destination.
*/
TCGv_i32 scalar;
TCGv_i32 scalar, tmp;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@ -2377,17 +2385,20 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
}
scalar = neon_get_scalar(a->size, a->vm);
tmp = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
TCGv_i32 tmp = neon_load_reg(a->vn, pass);
read_neon_element32(tmp, a->vn, pass, MO_32);
opfn(tmp, tmp, scalar);
if (accfn) {
TCGv_i32 rd = neon_load_reg(a->vd, pass);
TCGv_i32 rd = tcg_temp_new_i32();
read_neon_element32(rd, a->vd, pass, MO_32);
accfn(tmp, rd, tmp);
tcg_temp_free_i32(rd);
}
neon_store_reg(a->vd, pass, tmp);
write_neon_element32(tmp, a->vd, pass, MO_32);
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(scalar);
return true;
}
@ -2445,8 +2456,8 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
{
/* Two registers and a scalar, using gvec */
int vec_size = a->q ? 16 : 8;
int rd_ofs = neon_reg_offset(a->vd, 0);
int rn_ofs = neon_reg_offset(a->vn, 0);
int rd_ofs = neon_full_reg_offset(a->vd);
int rn_ofs = neon_full_reg_offset(a->vn);
int rm_ofs;
int idx;
TCGv_ptr fpstatus;
@ -2477,7 +2488,7 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
/* a->vm is M:Vm, which encodes both register and index */
idx = extract32(a->vm, a->size + 2, 2);
a->vm = extract32(a->vm, 0, a->size + 2);
rm_ofs = neon_reg_offset(a->vm, 0);
rm_ofs = neon_full_reg_offset(a->vm);
fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
@ -2542,7 +2553,7 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
* performs a kind of fused op-then-accumulate using a helper
* function that takes all of rd, rn and the scalar at once.
*/
TCGv_i32 scalar;
TCGv_i32 scalar, rn, rd;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@ -2573,14 +2584,17 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
}
scalar = neon_get_scalar(a->size, a->vm);
rn = tcg_temp_new_i32();
rd = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
TCGv_i32 rn = neon_load_reg(a->vn, pass);
TCGv_i32 rd = neon_load_reg(a->vd, pass);
read_neon_element32(rn, a->vn, pass, MO_32);
read_neon_element32(rd, a->vd, pass, MO_32);
opfn(rd, cpu_env, rn, scalar, rd);
tcg_temp_free_i32(rn);
neon_store_reg(a->vd, pass, rd);
write_neon_element32(rd, a->vd, pass, MO_32);
}
tcg_temp_free_i32(rn);
tcg_temp_free_i32(rd);
tcg_temp_free_i32(scalar);
return true;
@ -2647,12 +2661,12 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
scalar = neon_get_scalar(a->size, a->vm);
/* Load all inputs before writing any outputs, in case of overlap */
rn = neon_load_reg(a->vn, 0);
rn = tcg_temp_new_i32();
read_neon_element32(rn, a->vn, 0, MO_32);
rn0_64 = tcg_temp_new_i64();
opfn(rn0_64, rn, scalar);
tcg_temp_free_i32(rn);
rn = neon_load_reg(a->vn, 1);
read_neon_element32(rn, a->vn, 1, MO_32);
rn1_64 = tcg_temp_new_i64();
opfn(rn1_64, rn, scalar);
tcg_temp_free_i32(rn);
@ -2660,17 +2674,15 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
if (accfn) {
TCGv_i64 t64 = tcg_temp_new_i64();
neon_load_reg64(t64, a->vd);
accfn(t64, t64, rn0_64);
neon_store_reg64(t64, a->vd);
neon_load_reg64(t64, a->vd + 1);
accfn(t64, t64, rn1_64);
neon_store_reg64(t64, a->vd + 1);
read_neon_element64(t64, a->vd, 0, MO_64);
accfn(rn0_64, t64, rn0_64);
read_neon_element64(t64, a->vd, 1, MO_64);
accfn(rn1_64, t64, rn1_64);
tcg_temp_free_i64(t64);
} else {
neon_store_reg64(rn0_64, a->vd);
neon_store_reg64(rn1_64, a->vd + 1);
}
write_neon_element64(rn0_64, a->vd, 0, MO_64);
write_neon_element64(rn1_64, a->vd, 1, MO_64);
tcg_temp_free_i64(rn0_64);
tcg_temp_free_i64(rn1_64);
return true;
@ -2803,10 +2815,10 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
right = tcg_temp_new_i64();
dest = tcg_temp_new_i64();
neon_load_reg64(right, a->vn);
neon_load_reg64(left, a->vm);
read_neon_element64(right, a->vn, 0, MO_64);
read_neon_element64(left, a->vm, 0, MO_64);
tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
neon_store_reg64(dest, a->vd);
write_neon_element64(dest, a->vd, 0, MO_64);
tcg_temp_free_i64(left);
tcg_temp_free_i64(right);
@ -2822,21 +2834,21 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
destright = tcg_temp_new_i64();
if (a->imm < 8) {
neon_load_reg64(right, a->vn);
neon_load_reg64(middle, a->vn + 1);
read_neon_element64(right, a->vn, 0, MO_64);
read_neon_element64(middle, a->vn, 1, MO_64);
tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
neon_load_reg64(left, a->vm);
read_neon_element64(left, a->vm, 0, MO_64);
tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
} else {
neon_load_reg64(right, a->vn + 1);
neon_load_reg64(middle, a->vm);
read_neon_element64(right, a->vn, 1, MO_64);
read_neon_element64(middle, a->vm, 0, MO_64);
tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
neon_load_reg64(left, a->vm + 1);
read_neon_element64(left, a->vm, 1, MO_64);
tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
}
neon_store_reg64(destright, a->vd);
neon_store_reg64(destleft, a->vd + 1);
write_neon_element64(destright, a->vd, 0, MO_64);
write_neon_element64(destleft, a->vd, 1, MO_64);
tcg_temp_free_i64(destright);
tcg_temp_free_i64(destleft);
@ -2876,30 +2888,34 @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
return false;
}
n <<= 3;
tmp = tcg_temp_new_i32();
if (a->op) {
tmp = neon_load_reg(a->vd, 0);
read_neon_element32(tmp, a->vd, 0, MO_32);
} else {
tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
}
tmp2 = neon_load_reg(a->vm, 0);
tmp2 = tcg_temp_new_i32();
read_neon_element32(tmp2, a->vm, 0, MO_32);
ptr1 = vfp_reg_ptr(true, a->vn);
tmp4 = tcg_const_i32(n);
gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
tcg_temp_free_i32(tmp);
if (a->op) {
tmp = neon_load_reg(a->vd, 1);
read_neon_element32(tmp, a->vd, 1, MO_32);
} else {
tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
}
tmp3 = neon_load_reg(a->vm, 1);
tmp3 = tcg_temp_new_i32();
read_neon_element32(tmp3, a->vm, 1, MO_32);
gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(tmp4);
tcg_temp_free_ptr(ptr1);
neon_store_reg(a->vd, 0, tmp2);
neon_store_reg(a->vd, 1, tmp3);
tcg_temp_free_i32(tmp);
write_neon_element32(tmp2, a->vd, 0, MO_32);
write_neon_element32(tmp3, a->vd, 1, MO_32);
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(tmp3);
return true;
}
@ -2923,7 +2939,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
return true;
}
tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
neon_element_offset(a->vm, a->index, a->size),
a->q ? 16 : 8, a->q ? 16 : 8);
return true;
@ -2932,6 +2948,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
{
int pass, half;
TCGv_i32 tmp[2];
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@ -2955,11 +2972,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
return true;
}
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
TCGv_i32 tmp[2];
tmp[0] = tcg_temp_new_i32();
tmp[1] = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
for (half = 0; half < 2; half++) {
tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
switch (a->size) {
case 0:
tcg_gen_bswap32_i32(tmp[half], tmp[half]);
@ -2973,9 +2991,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
g_assert_not_reached();
}
}
neon_store_reg(a->vd, pass * 2, tmp[1]);
neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
}
tcg_temp_free_i32(tmp[0]);
tcg_temp_free_i32(tmp[1]);
return true;
}
@ -3020,23 +3041,25 @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
rm0_64 = tcg_temp_new_i64();
rm1_64 = tcg_temp_new_i64();
rd_64 = tcg_temp_new_i64();
tmp = neon_load_reg(a->vm, pass * 2);
tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vm, pass * 2, MO_32);
widenfn(rm0_64, tmp);
tcg_temp_free_i32(tmp);
tmp = neon_load_reg(a->vm, pass * 2 + 1);
read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
widenfn(rm1_64, tmp);
tcg_temp_free_i32(tmp);
opfn(rd_64, rm0_64, rm1_64);
tcg_temp_free_i64(rm0_64);
tcg_temp_free_i64(rm1_64);
if (accfn) {
TCGv_i64 tmp64 = tcg_temp_new_i64();
neon_load_reg64(tmp64, a->vd + pass);
read_neon_element64(tmp64, a->vd, pass, MO_64);
accfn(rd_64, tmp64, rd_64);
tcg_temp_free_i64(tmp64);
}
neon_store_reg64(rd_64, a->vd + pass);
write_neon_element64(rd_64, a->vd, pass, MO_64);
tcg_temp_free_i64(rd_64);
}
return true;
@ -3234,12 +3257,14 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a,
rd0 = tcg_temp_new_i32();
rd1 = tcg_temp_new_i32();
neon_load_reg64(rm, a->vm);
read_neon_element64(rm, a->vm, 0, MO_64);
narrowfn(rd0, cpu_env, rm);
neon_load_reg64(rm, a->vm + 1);
read_neon_element64(rm, a->vm, 1, MO_64);
narrowfn(rd1, cpu_env, rm);
neon_store_reg(a->vd, 0, rd0);
neon_store_reg(a->vd, 1, rd1);
write_neon_element32(rd0, a->vd, 0, MO_32);
write_neon_element32(rd1, a->vd, 1, MO_32);
tcg_temp_free_i32(rd0);
tcg_temp_free_i32(rd1);
tcg_temp_free_i64(rm);
return true;
}
@ -3296,16 +3321,18 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
}
rd = tcg_temp_new_i64();
rm0 = tcg_temp_new_i32();
rm1 = tcg_temp_new_i32();
rm0 = neon_load_reg(a->vm, 0);
rm1 = neon_load_reg(a->vm, 1);
read_neon_element32(rm0, a->vm, 0, MO_32);
read_neon_element32(rm1, a->vm, 1, MO_32);
widenfn(rd, rm0);
tcg_gen_shli_i64(rd, rd, 8 << a->size);
neon_store_reg64(rd, a->vd);
write_neon_element64(rd, a->vd, 0, MO_64);
widenfn(rd, rm1);
tcg_gen_shli_i64(rd, rd, 8 << a->size);
neon_store_reg64(rd, a->vd + 1);
write_neon_element64(rd, a->vd, 1, MO_64);
tcg_temp_free_i64(rd);
tcg_temp_free_i32(rm0);
@ -3339,21 +3366,25 @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
fpst = fpstatus_ptr(FPST_STD);
ahp = get_ahp_flag();
tmp = neon_load_reg(a->vm, 0);
tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vm, 0, MO_32);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
tmp2 = neon_load_reg(a->vm, 1);
tmp2 = tcg_temp_new_i32();
read_neon_element32(tmp2, a->vm, 1, MO_32);
gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
tcg_gen_shli_i32(tmp2, tmp2, 16);
tcg_gen_or_i32(tmp2, tmp2, tmp);
tcg_temp_free_i32(tmp);
tmp = neon_load_reg(a->vm, 2);
read_neon_element32(tmp, a->vm, 2, MO_32);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
tmp3 = neon_load_reg(a->vm, 3);
neon_store_reg(a->vd, 0, tmp2);
tmp3 = tcg_temp_new_i32();
read_neon_element32(tmp3, a->vm, 3, MO_32);
write_neon_element32(tmp2, a->vd, 0, MO_32);
tcg_temp_free_i32(tmp2);
gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
tcg_gen_shli_i32(tmp3, tmp3, 16);
tcg_gen_or_i32(tmp3, tmp3, tmp);
neon_store_reg(a->vd, 1, tmp3);
write_neon_element32(tmp3, a->vd, 1, MO_32);
tcg_temp_free_i32(tmp3);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
@ -3388,21 +3419,25 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
fpst = fpstatus_ptr(FPST_STD);
ahp = get_ahp_flag();
tmp3 = tcg_temp_new_i32();
tmp = neon_load_reg(a->vm, 0);
tmp2 = neon_load_reg(a->vm, 1);
tmp2 = tcg_temp_new_i32();
tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vm, 0, MO_32);
read_neon_element32(tmp2, a->vm, 1, MO_32);
tcg_gen_ext16u_i32(tmp3, tmp);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
neon_store_reg(a->vd, 0, tmp3);
write_neon_element32(tmp3, a->vd, 0, MO_32);
tcg_gen_shri_i32(tmp, tmp, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
neon_store_reg(a->vd, 1, tmp);
tmp3 = tcg_temp_new_i32();
write_neon_element32(tmp, a->vd, 1, MO_32);
tcg_temp_free_i32(tmp);
tcg_gen_ext16u_i32(tmp3, tmp2);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
neon_store_reg(a->vd, 2, tmp3);
write_neon_element32(tmp3, a->vd, 2, MO_32);
tcg_temp_free_i32(tmp3);
tcg_gen_shri_i32(tmp2, tmp2, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
neon_store_reg(a->vd, 3, tmp2);
write_neon_element32(tmp2, a->vd, 3, MO_32);
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
@ -3412,8 +3447,8 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
{
int vec_size = a->q ? 16 : 8;
int rd_ofs = neon_reg_offset(a->vd, 0);
int rm_ofs = neon_reg_offset(a->vm, 0);
int rd_ofs = neon_full_reg_offset(a->vd);
int rm_ofs = neon_full_reg_offset(a->vm);
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@ -3508,6 +3543,7 @@ DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
{
TCGv_i32 tmp;
int pass;
/* Handle a 2-reg-misc operation by iterating 32 bits at a time */
@ -3533,11 +3569,13 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
return true;
}
tmp = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
TCGv_i32 tmp = neon_load_reg(a->vm, pass);
read_neon_element32(tmp, a->vm, pass, MO_32);
fn(tmp, tmp);
neon_store_reg(a->vd, pass, tmp);
write_neon_element32(tmp, a->vd, pass, MO_32);
}
tcg_temp_free_i32(tmp);
return true;
}
@ -3812,10 +3850,10 @@ static bool trans_VSWP(DisasContext *s, arg_2misc *a)
rm = tcg_temp_new_i64();
rd = tcg_temp_new_i64();
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
neon_load_reg64(rm, a->vm + pass);
neon_load_reg64(rd, a->vd + pass);
neon_store_reg64(rm, a->vd + pass);
neon_store_reg64(rd, a->vm + pass);
read_neon_element64(rm, a->vm, pass, MO_64);
read_neon_element64(rd, a->vd, pass, MO_64);
write_neon_element64(rm, a->vd, pass, MO_64);
write_neon_element64(rd, a->vm, pass, MO_64);
}
tcg_temp_free_i64(rm);
tcg_temp_free_i64(rd);
@ -3890,25 +3928,29 @@ static bool trans_VTRN(DisasContext *s, arg_2misc *a)
return true;
}
if (a->size == 2) {
tmp = tcg_temp_new_i32();
tmp2 = tcg_temp_new_i32();
if (a->size == MO_32) {
for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
tmp = neon_load_reg(a->vm, pass);
tmp2 = neon_load_reg(a->vd, pass + 1);
neon_store_reg(a->vm, pass, tmp2);
neon_store_reg(a->vd, pass + 1, tmp);
read_neon_element32(tmp, a->vm, pass, MO_32);
read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
write_neon_element32(tmp2, a->vm, pass, MO_32);
write_neon_element32(tmp, a->vd, pass + 1, MO_32);
}
} else {
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
tmp = neon_load_reg(a->vm, pass);
tmp2 = neon_load_reg(a->vd, pass);
if (a->size == 0) {
read_neon_element32(tmp, a->vm, pass, MO_32);
read_neon_element32(tmp2, a->vd, pass, MO_32);
if (a->size == MO_8) {
gen_neon_trn_u8(tmp, tmp2);
} else {
gen_neon_trn_u16(tmp, tmp2);
}
neon_store_reg(a->vm, pass, tmp2);
neon_store_reg(a->vd, pass, tmp);
write_neon_element32(tmp2, a->vm, pass, MO_32);
write_neon_element32(tmp, a->vd, pass, MO_32);
}
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(tmp2);
return true;
}

View File

@ -236,8 +236,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
tcg_gen_ext_i32_i64(nf, cpu_NF);
tcg_gen_ext_i32_i64(vf, cpu_VF);
neon_load_reg64(frn, rn);
neon_load_reg64(frm, rm);
vfp_load_reg64(frn, rn);
vfp_load_reg64(frm, rm);
switch (a->cc) {
case 0: /* eq: Z */
tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
@ -264,7 +264,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
tcg_temp_free_i64(tmp);
break;
}
neon_store_reg64(dest, rd);
vfp_store_reg64(dest, rd);
tcg_temp_free_i64(frn);
tcg_temp_free_i64(frm);
tcg_temp_free_i64(dest);
@ -283,8 +283,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
frn = tcg_temp_new_i32();
frm = tcg_temp_new_i32();
dest = tcg_temp_new_i32();
neon_load_reg32(frn, rn);
neon_load_reg32(frm, rm);
vfp_load_reg32(frn, rn);
vfp_load_reg32(frm, rm);
switch (a->cc) {
case 0: /* eq: Z */
tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
@ -315,7 +315,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
if (sz == 1) {
tcg_gen_andi_i32(dest, dest, 0xffff);
}
neon_store_reg32(dest, rd);
vfp_store_reg32(dest, rd);
tcg_temp_free_i32(frn);
tcg_temp_free_i32(frm);
tcg_temp_free_i32(dest);
@ -385,9 +385,9 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
TCGv_i64 tcg_res;
tcg_op = tcg_temp_new_i64();
tcg_res = tcg_temp_new_i64();
neon_load_reg64(tcg_op, rm);
vfp_load_reg64(tcg_op, rm);
gen_helper_rintd(tcg_res, tcg_op, fpst);
neon_store_reg64(tcg_res, rd);
vfp_store_reg64(tcg_res, rd);
tcg_temp_free_i64(tcg_op);
tcg_temp_free_i64(tcg_res);
} else {
@ -395,13 +395,13 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
TCGv_i32 tcg_res;
tcg_op = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
neon_load_reg32(tcg_op, rm);
vfp_load_reg32(tcg_op, rm);
if (sz == 1) {
gen_helper_rinth(tcg_res, tcg_op, fpst);
} else {
gen_helper_rints(tcg_res, tcg_op, fpst);
}
neon_store_reg32(tcg_res, rd);
vfp_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res);
}
@ -463,14 +463,14 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
tcg_double = tcg_temp_new_i64();
tcg_res = tcg_temp_new_i64();
tcg_tmp = tcg_temp_new_i32();
neon_load_reg64(tcg_double, rm);
vfp_load_reg64(tcg_double, rm);
if (is_signed) {
gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
} else {
gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
}
tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
neon_store_reg32(tcg_tmp, rd);
vfp_store_reg32(tcg_tmp, rd);
tcg_temp_free_i32(tcg_tmp);
tcg_temp_free_i64(tcg_res);
tcg_temp_free_i64(tcg_double);
@ -478,7 +478,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
TCGv_i32 tcg_single, tcg_res;
tcg_single = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
neon_load_reg32(tcg_single, rm);
vfp_load_reg32(tcg_single, rm);
if (sz == 1) {
if (is_signed) {
gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
@ -492,7 +492,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
}
}
neon_store_reg32(tcg_res, rd);
vfp_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_single);
}
@ -511,11 +511,9 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
{
/* VMOV scalar to general purpose register */
TCGv_i32 tmp;
int pass;
uint32_t offset;
/* SIZE == 2 is a VFP instruction; otherwise NEON. */
if (a->size == 2
/* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
if (a->size == MO_32
? !dc_isar_feature(aa32_fpsp_v2, s)
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@ -526,44 +524,12 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
return false;
}
offset = a->index << a->size;
pass = extract32(offset, 2, 1);
offset = extract32(offset, 0, 2) * 8;
if (!vfp_access_check(s)) {
return true;
}
tmp = neon_load_reg(a->vn, pass);
switch (a->size) {
case 0:
if (offset) {
tcg_gen_shri_i32(tmp, tmp, offset);
}
if (a->u) {
gen_uxtb(tmp);
} else {
gen_sxtb(tmp);
}
break;
case 1:
if (a->u) {
if (offset) {
tcg_gen_shri_i32(tmp, tmp, 16);
} else {
gen_uxth(tmp);
}
} else {
if (offset) {
tcg_gen_sari_i32(tmp, tmp, 16);
} else {
gen_sxth(tmp);
}
}
break;
case 2:
break;
}
tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
store_reg(s, a->rt, tmp);
return true;
@ -572,12 +538,10 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
{
/* VMOV general purpose register to scalar */
TCGv_i32 tmp, tmp2;
int pass;
uint32_t offset;
TCGv_i32 tmp;
/* SIZE == 2 is a VFP instruction; otherwise NEON. */
if (a->size == 2
/* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
if (a->size == MO_32
? !dc_isar_feature(aa32_fpsp_v2, s)
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@ -588,30 +552,13 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
return false;
}
offset = a->index << a->size;
pass = extract32(offset, 2, 1);
offset = extract32(offset, 0, 2) * 8;
if (!vfp_access_check(s)) {
return true;
}
tmp = load_reg(s, a->rt);
switch (a->size) {
case 0:
tmp2 = neon_load_reg(a->vn, pass);
tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
tcg_temp_free_i32(tmp2);
break;
case 1:
tmp2 = neon_load_reg(a->vn, pass);
tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
tcg_temp_free_i32(tmp2);
break;
case 2:
break;
}
neon_store_reg(a->vn, pass, tmp);
write_neon_element32(tmp, a->vn, a->index, a->size);
tcg_temp_free_i32(tmp);
return true;
}
@ -653,7 +600,7 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
}
tmp = load_reg(s, a->rt);
tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
vec_size, vec_size, tmp);
tcg_temp_free_i32(tmp);
@ -829,14 +776,14 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
if (a->l) {
/* VFP to general purpose register */
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vn);
vfp_load_reg32(tmp, a->vn);
tcg_gen_andi_i32(tmp, tmp, 0xffff);
store_reg(s, a->rt, tmp);
} else {
/* general purpose register to VFP */
tmp = load_reg(s, a->rt);
tcg_gen_andi_i32(tmp, tmp, 0xffff);
neon_store_reg32(tmp, a->vn);
vfp_store_reg32(tmp, a->vn);
tcg_temp_free_i32(tmp);
}
@ -858,7 +805,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
if (a->l) {
/* VFP to general purpose register */
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vn);
vfp_load_reg32(tmp, a->vn);
if (a->rt == 15) {
/* Set the 4 flag bits in the CPSR. */
gen_set_nzcv(tmp);
@ -869,7 +816,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
} else {
/* general purpose register to VFP */
tmp = load_reg(s, a->rt);
neon_store_reg32(tmp, a->vn);
vfp_store_reg32(tmp, a->vn);
tcg_temp_free_i32(tmp);
}
@ -895,18 +842,18 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
if (a->op) {
/* fpreg to gpreg */
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
store_reg(s, a->rt, tmp);
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm + 1);
vfp_load_reg32(tmp, a->vm + 1);
store_reg(s, a->rt2, tmp);
} else {
/* gpreg to fpreg */
tmp = load_reg(s, a->rt);
neon_store_reg32(tmp, a->vm);
vfp_store_reg32(tmp, a->vm);
tcg_temp_free_i32(tmp);
tmp = load_reg(s, a->rt2);
neon_store_reg32(tmp, a->vm + 1);
vfp_store_reg32(tmp, a->vm + 1);
tcg_temp_free_i32(tmp);
}
@ -938,18 +885,18 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
if (a->op) {
/* fpreg to gpreg */
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm * 2);
vfp_load_reg32(tmp, a->vm * 2);
store_reg(s, a->rt, tmp);
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm * 2 + 1);
vfp_load_reg32(tmp, a->vm * 2 + 1);
store_reg(s, a->rt2, tmp);
} else {
/* gpreg to fpreg */
tmp = load_reg(s, a->rt);
neon_store_reg32(tmp, a->vm * 2);
vfp_store_reg32(tmp, a->vm * 2);
tcg_temp_free_i32(tmp);
tmp = load_reg(s, a->rt2);
neon_store_reg32(tmp, a->vm * 2 + 1);
vfp_store_reg32(tmp, a->vm * 2 + 1);
tcg_temp_free_i32(tmp);
}
@ -980,9 +927,9 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
tmp = tcg_temp_new_i32();
if (a->l) {
gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
} else {
neon_load_reg32(tmp, a->vd);
vfp_load_reg32(tmp, a->vd);
gen_aa32_st16(s, tmp, addr, get_mem_index(s));
}
tcg_temp_free_i32(tmp);
@ -1014,9 +961,9 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
tmp = tcg_temp_new_i32();
if (a->l) {
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
} else {
neon_load_reg32(tmp, a->vd);
vfp_load_reg32(tmp, a->vd);
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
}
tcg_temp_free_i32(tmp);
@ -1055,9 +1002,9 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
tmp = tcg_temp_new_i64();
if (a->l) {
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
neon_store_reg64(tmp, a->vd);
vfp_store_reg64(tmp, a->vd);
} else {
neon_load_reg64(tmp, a->vd);
vfp_load_reg64(tmp, a->vd);
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
}
tcg_temp_free_i64(tmp);
@ -1119,10 +1066,10 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
if (a->l) {
/* load */
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
neon_store_reg32(tmp, a->vd + i);
vfp_store_reg32(tmp, a->vd + i);
} else {
/* store */
neon_load_reg32(tmp, a->vd + i);
vfp_load_reg32(tmp, a->vd + i);
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
}
tcg_gen_addi_i32(addr, addr, offset);
@ -1202,10 +1149,10 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
if (a->l) {
/* load */
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
neon_store_reg64(tmp, a->vd + i);
vfp_store_reg64(tmp, a->vd + i);
} else {
/* store */
neon_load_reg64(tmp, a->vd + i);
vfp_load_reg64(tmp, a->vd + i);
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
}
tcg_gen_addi_i32(addr, addr, offset);
@ -1338,15 +1285,15 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
fd = tcg_temp_new_i32();
fpst = fpstatus_ptr(FPST_FPCR);
neon_load_reg32(f0, vn);
neon_load_reg32(f1, vm);
vfp_load_reg32(f0, vn);
vfp_load_reg32(f1, vm);
for (;;) {
if (reads_vd) {
neon_load_reg32(fd, vd);
vfp_load_reg32(fd, vd);
}
fn(fd, f0, f1, fpst);
neon_store_reg32(fd, vd);
vfp_store_reg32(fd, vd);
if (veclen == 0) {
break;
@ -1356,10 +1303,10 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
veclen--;
vd = vfp_advance_sreg(vd, delta_d);
vn = vfp_advance_sreg(vn, delta_d);
neon_load_reg32(f0, vn);
vfp_load_reg32(f0, vn);
if (delta_m) {
vm = vfp_advance_sreg(vm, delta_m);
neon_load_reg32(f1, vm);
vfp_load_reg32(f1, vm);
}
}
@ -1402,14 +1349,14 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
fd = tcg_temp_new_i32();
fpst = fpstatus_ptr(FPST_FPCR_F16);
neon_load_reg32(f0, vn);
neon_load_reg32(f1, vm);
vfp_load_reg32(f0, vn);
vfp_load_reg32(f1, vm);
if (reads_vd) {
neon_load_reg32(fd, vd);
vfp_load_reg32(fd, vd);
}
fn(fd, f0, f1, fpst);
neon_store_reg32(fd, vd);
vfp_store_reg32(fd, vd);
tcg_temp_free_i32(f0);
tcg_temp_free_i32(f1);
@ -1469,15 +1416,15 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
fd = tcg_temp_new_i64();
fpst = fpstatus_ptr(FPST_FPCR);
neon_load_reg64(f0, vn);
neon_load_reg64(f1, vm);
vfp_load_reg64(f0, vn);
vfp_load_reg64(f1, vm);
for (;;) {
if (reads_vd) {
neon_load_reg64(fd, vd);
vfp_load_reg64(fd, vd);
}
fn(fd, f0, f1, fpst);
neon_store_reg64(fd, vd);
vfp_store_reg64(fd, vd);
if (veclen == 0) {
break;
@ -1486,10 +1433,10 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
veclen--;
vd = vfp_advance_dreg(vd, delta_d);
vn = vfp_advance_dreg(vn, delta_d);
neon_load_reg64(f0, vn);
vfp_load_reg64(f0, vn);
if (delta_m) {
vm = vfp_advance_dreg(vm, delta_m);
neon_load_reg64(f1, vm);
vfp_load_reg64(f1, vm);
}
}
@ -1542,11 +1489,11 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
f0 = tcg_temp_new_i32();
fd = tcg_temp_new_i32();
neon_load_reg32(f0, vm);
vfp_load_reg32(f0, vm);
for (;;) {
fn(fd, f0);
neon_store_reg32(fd, vd);
vfp_store_reg32(fd, vd);
if (veclen == 0) {
break;
@ -1556,7 +1503,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
/* single source one-many */
while (veclen--) {
vd = vfp_advance_sreg(vd, delta_d);
neon_store_reg32(fd, vd);
vfp_store_reg32(fd, vd);
}
break;
}
@ -1565,7 +1512,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
veclen--;
vd = vfp_advance_sreg(vd, delta_d);
vm = vfp_advance_sreg(vm, delta_m);
neon_load_reg32(f0, vm);
vfp_load_reg32(f0, vm);
}
tcg_temp_free_i32(f0);
@ -1598,9 +1545,9 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
}
f0 = tcg_temp_new_i32();
neon_load_reg32(f0, vm);
vfp_load_reg32(f0, vm);
fn(f0, f0);
neon_store_reg32(f0, vd);
vfp_store_reg32(f0, vd);
tcg_temp_free_i32(f0);
return true;
@ -1652,11 +1599,11 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
f0 = tcg_temp_new_i64();
fd = tcg_temp_new_i64();
neon_load_reg64(f0, vm);
vfp_load_reg64(f0, vm);
for (;;) {
fn(fd, f0);
neon_store_reg64(fd, vd);
vfp_store_reg64(fd, vd);
if (veclen == 0) {
break;
@ -1666,7 +1613,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
/* single source one-many */
while (veclen--) {
vd = vfp_advance_dreg(vd, delta_d);
neon_store_reg64(fd, vd);
vfp_store_reg64(fd, vd);
}
break;
}
@ -1675,7 +1622,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
veclen--;
vd = vfp_advance_dreg(vd, delta_d);
vd = vfp_advance_dreg(vm, delta_m);
neon_load_reg64(f0, vm);
vfp_load_reg64(f0, vm);
}
tcg_temp_free_i64(f0);
@ -2090,20 +2037,20 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i32();
neon_load_reg32(vn, a->vn);
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vn, a->vn);
vfp_load_reg32(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
gen_helper_vfp_negh(vn, vn);
}
neon_load_reg32(vd, a->vd);
vfp_load_reg32(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
gen_helper_vfp_negh(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
neon_store_reg32(vd, a->vd);
vfp_store_reg32(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(vn);
@ -2155,20 +2102,20 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i32();
neon_load_reg32(vn, a->vn);
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vn, a->vn);
vfp_load_reg32(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
gen_helper_vfp_negs(vn, vn);
}
neon_load_reg32(vd, a->vd);
vfp_load_reg32(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
gen_helper_vfp_negs(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
neon_store_reg32(vd, a->vd);
vfp_store_reg32(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(vn);
@ -2226,20 +2173,20 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i64();
neon_load_reg64(vn, a->vn);
neon_load_reg64(vm, a->vm);
vfp_load_reg64(vn, a->vn);
vfp_load_reg64(vm, a->vm);
if (neg_n) {
/* VFNMS, VFMS */
gen_helper_vfp_negd(vn, vn);
}
neon_load_reg64(vd, a->vd);
vfp_load_reg64(vd, a->vd);
if (neg_d) {
/* VFNMA, VFNMS */
gen_helper_vfp_negd(vd, vd);
}
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
neon_store_reg64(vd, a->vd);
vfp_store_reg64(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(vn);
@ -2283,7 +2230,7 @@ static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
}
fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
neon_store_reg32(fd, a->vd);
vfp_store_reg32(fd, a->vd);
tcg_temp_free_i32(fd);
return true;
}
@ -2323,7 +2270,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
for (;;) {
neon_store_reg32(fd, vd);
vfp_store_reg32(fd, vd);
if (veclen == 0) {
break;
@ -2378,7 +2325,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
for (;;) {
neon_store_reg64(fd, vd);
vfp_store_reg64(fd, vd);
if (veclen == 0) {
break;
@ -2450,11 +2397,11 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i32();
neon_load_reg32(vd, a->vd);
vfp_load_reg32(vd, a->vd);
if (a->z) {
tcg_gen_movi_i32(vm, 0);
} else {
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
}
if (a->e) {
@ -2489,11 +2436,11 @@ static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i32();
neon_load_reg32(vd, a->vd);
vfp_load_reg32(vd, a->vd);
if (a->z) {
tcg_gen_movi_i32(vm, 0);
} else {
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
}
if (a->e) {
@ -2533,11 +2480,11 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
vd = tcg_temp_new_i64();
vm = tcg_temp_new_i64();
neon_load_reg64(vd, a->vd);
vfp_load_reg64(vd, a->vd);
if (a->z) {
tcg_gen_movi_i64(vm, 0);
} else {
neon_load_reg64(vm, a->vm);
vfp_load_reg64(vm, a->vm);
}
if (a->e) {
@ -2572,7 +2519,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
/* The T bit tells us if we want the low or high 16 bits of Vm */
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
tcg_temp_free_i32(ahp_mode);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
@ -2610,7 +2557,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
vd = tcg_temp_new_i64();
gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
neon_store_reg64(vd, a->vd);
vfp_store_reg64(vd, a->vd);
tcg_temp_free_i32(ahp_mode);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
@ -2636,7 +2583,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
tcg_temp_free_i32(ahp_mode);
@ -2674,7 +2621,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
tmp = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
neon_load_reg64(vm, a->vm);
vfp_load_reg64(vm, a->vm);
gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
tcg_temp_free_i64(vm);
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
@ -2698,10 +2645,10 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth(tmp, tmp, fpst);
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@ -2721,10 +2668,10 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rints(tmp, tmp, fpst);
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@ -2753,10 +2700,10 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
}
tmp = tcg_temp_new_i64();
neon_load_reg64(tmp, a->vm);
vfp_load_reg64(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rintd(tmp, tmp, fpst);
neon_store_reg64(tmp, a->vd);
vfp_store_reg64(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(tmp);
return true;
@ -2777,13 +2724,13 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_rmode = tcg_const_i32(float_round_to_zero);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rinth(tmp, tmp, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_i32(tmp);
@ -2805,13 +2752,13 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
tcg_rmode = tcg_const_i32(float_round_to_zero);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rints(tmp, tmp, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_i32(tmp);
@ -2842,13 +2789,13 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
}
tmp = tcg_temp_new_i64();
neon_load_reg64(tmp, a->vm);
vfp_load_reg64(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
tcg_rmode = tcg_const_i32(float_round_to_zero);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rintd(tmp, tmp, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
neon_store_reg64(tmp, a->vd);
vfp_store_reg64(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(tmp);
tcg_temp_free_i32(tcg_rmode);
@ -2869,10 +2816,10 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth_exact(tmp, tmp, fpst);
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@ -2892,10 +2839,10 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
vfp_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rints_exact(tmp, tmp, fpst);
neon_store_reg32(tmp, a->vd);
vfp_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
@ -2924,10 +2871,10 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
}
tmp = tcg_temp_new_i64();
neon_load_reg64(tmp, a->vm);
vfp_load_reg64(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_rintd_exact(tmp, tmp, fpst);
neon_store_reg64(tmp, a->vd);
vfp_store_reg64(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(tmp);
return true;
@ -2953,9 +2900,9 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
gen_helper_vfp_fcvtds(vd, vm, cpu_env);
neon_store_reg64(vd, a->vd);
vfp_store_reg64(vd, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_i64(vd);
return true;
@ -2981,9 +2928,9 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
neon_load_reg64(vm, a->vm);
vfp_load_reg64(vm, a->vm);
gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
neon_store_reg32(vd, a->vd);
vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i64(vm);
return true;
@ -3003,7 +2950,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
}
vm = tcg_temp_new_i32();
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
if (a->s) {
/* i32 -> f16 */
@ -3012,7 +2959,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
/* u32 -> f16 */
gen_helper_vfp_uitoh(vm, vm, fpst);
}
neon_store_reg32(vm, a->vd);
vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@ -3032,7 +2979,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
}
vm = tcg_temp_new_i32();
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
if (a->s) {
/* i32 -> f32 */
@ -3041,7 +2988,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
/* u32 -> f32 */
gen_helper_vfp_uitos(vm, vm, fpst);
}
neon_store_reg32(vm, a->vd);
vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@ -3068,7 +3015,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
fpst = fpstatus_ptr(FPST_FPCR);
if (a->s) {
/* i32 -> f64 */
@ -3077,7 +3024,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
/* u32 -> f64 */
gen_helper_vfp_uitod(vd, vm, fpst);
}
neon_store_reg64(vd, a->vd);
vfp_store_reg64(vd, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_i64(vd);
tcg_temp_free_ptr(fpst);
@ -3108,9 +3055,9 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i32();
neon_load_reg64(vm, a->vm);
vfp_load_reg64(vm, a->vm);
gen_helper_vjcvt(vd, vm, cpu_env);
neon_store_reg32(vd, a->vd);
vfp_store_reg32(vd, a->vd);
tcg_temp_free_i64(vm);
tcg_temp_free_i32(vd);
return true;
@ -3133,7 +3080,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
vd = tcg_temp_new_i32();
neon_load_reg32(vd, a->vd);
vfp_load_reg32(vd, a->vd);
fpst = fpstatus_ptr(FPST_FPCR_F16);
shift = tcg_const_i32(frac_bits);
@ -3168,7 +3115,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
g_assert_not_reached();
}
neon_store_reg32(vd, a->vd);
vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i32(shift);
tcg_temp_free_ptr(fpst);
@ -3192,7 +3139,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
vd = tcg_temp_new_i32();
neon_load_reg32(vd, a->vd);
vfp_load_reg32(vd, a->vd);
fpst = fpstatus_ptr(FPST_FPCR);
shift = tcg_const_i32(frac_bits);
@ -3227,7 +3174,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
g_assert_not_reached();
}
neon_store_reg32(vd, a->vd);
vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i32(shift);
tcg_temp_free_ptr(fpst);
@ -3257,7 +3204,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
vd = tcg_temp_new_i64();
neon_load_reg64(vd, a->vd);
vfp_load_reg64(vd, a->vd);
fpst = fpstatus_ptr(FPST_FPCR);
shift = tcg_const_i32(frac_bits);
@ -3292,7 +3239,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
g_assert_not_reached();
}
neon_store_reg64(vd, a->vd);
vfp_store_reg64(vd, a->vd);
tcg_temp_free_i64(vd);
tcg_temp_free_i32(shift);
tcg_temp_free_ptr(fpst);
@ -3314,7 +3261,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
fpst = fpstatus_ptr(FPST_FPCR_F16);
vm = tcg_temp_new_i32();
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
if (a->s) {
if (a->rz) {
@ -3329,7 +3276,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
gen_helper_vfp_touih(vm, vm, fpst);
}
}
neon_store_reg32(vm, a->vd);
vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@ -3350,7 +3297,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
fpst = fpstatus_ptr(FPST_FPCR);
vm = tcg_temp_new_i32();
neon_load_reg32(vm, a->vm);
vfp_load_reg32(vm, a->vm);
if (a->s) {
if (a->rz) {
@ -3365,7 +3312,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
gen_helper_vfp_touis(vm, vm, fpst);
}
}
neon_store_reg32(vm, a->vd);
vfp_store_reg32(vm, a->vd);
tcg_temp_free_i32(vm);
tcg_temp_free_ptr(fpst);
return true;
@ -3393,7 +3340,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
fpst = fpstatus_ptr(FPST_FPCR);
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i32();
neon_load_reg64(vm, a->vm);
vfp_load_reg64(vm, a->vm);
if (a->s) {
if (a->rz) {
@ -3408,7 +3355,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
gen_helper_vfp_touid(vd, vm, fpst);
}
}
neon_store_reg32(vd, a->vd);
vfp_store_reg32(vd, a->vd);
tcg_temp_free_i32(vd);
tcg_temp_free_i64(vm);
tcg_temp_free_ptr(fpst);
@ -3521,10 +3468,10 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a)
/* Insert low half of Vm into high half of Vd */
rm = tcg_temp_new_i32();
rd = tcg_temp_new_i32();
neon_load_reg32(rm, a->vm);
neon_load_reg32(rd, a->vd);
vfp_load_reg32(rm, a->vm);
vfp_load_reg32(rd, a->vd);
tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
neon_store_reg32(rd, a->vd);
vfp_store_reg32(rd, a->vd);
tcg_temp_free_i32(rm);
tcg_temp_free_i32(rd);
return true;
@ -3548,9 +3495,9 @@ static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
/* Set Vd to high half of Vm */
rm = tcg_temp_new_i32();
neon_load_reg32(rm, a->vm);
vfp_load_reg32(rm, a->vm);
tcg_gen_shri_i32(rm, rm, 16);
neon_store_reg32(rm, a->vd);
vfp_store_reg32(rm, a->vd);
tcg_temp_free_i32(rm);
return true;
}

View File

@ -1094,64 +1094,141 @@ static inline void gen_hlt(DisasContext *s, int imm)
unallocated_encoding(s);
}
static inline long vfp_reg_offset(bool dp, unsigned reg)
/*
* Return the offset of a "full" NEON Dreg.
*/
static long neon_full_reg_offset(unsigned reg)
{
return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
}
/*
* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
* where 0 is the least significant end of the register.
*/
static long neon_element_offset(int reg, int element, MemOp memop)
{
int element_size = 1 << (memop & MO_SIZE);
int ofs = element * element_size;
#ifdef HOST_WORDS_BIGENDIAN
/*
* Calculate the offset assuming fully little-endian,
* then XOR to account for the order of the 8-byte units.
*/
if (element_size < 8) {
ofs ^= 8 - element_size;
}
#endif
return neon_full_reg_offset(reg) + ofs;
}
/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
static long vfp_reg_offset(bool dp, unsigned reg)
{
if (dp) {
return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
return neon_element_offset(reg, 0, MO_64);
} else {
long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
if (reg & 1) {
ofs += offsetof(CPU_DoubleU, l.upper);
} else {
ofs += offsetof(CPU_DoubleU, l.lower);
}
return ofs;
return neon_element_offset(reg >> 1, reg & 1, MO_32);
}
}
/* Return the offset of a 32-bit piece of a NEON register.
zero is the least significant end of the register. */
static inline long
neon_reg_offset (int reg, int n)
static inline void vfp_load_reg64(TCGv_i64 var, int reg)
{
int sreg;
sreg = reg * 2 + n;
return vfp_reg_offset(0, sreg);
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
}
static TCGv_i32 neon_load_reg(int reg, int pass)
static inline void vfp_store_reg64(TCGv_i64 var, int reg)
{
TCGv_i32 tmp = tcg_temp_new_i32();
tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
return tmp;
tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
}
static void neon_store_reg(int reg, int pass, TCGv_i32 var)
{
tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
tcg_temp_free_i32(var);
}
static inline void neon_load_reg64(TCGv_i64 var, int reg)
{
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
}
static inline void neon_store_reg64(TCGv_i64 var, int reg)
{
tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
}
static inline void neon_load_reg32(TCGv_i32 var, int reg)
static inline void vfp_load_reg32(TCGv_i32 var, int reg)
{
tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
}
static inline void neon_store_reg32(TCGv_i32 var, int reg)
static inline void vfp_store_reg32(TCGv_i32 var, int reg)
{
tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
}
static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
{
long off = neon_element_offset(reg, ele, memop);
switch (memop) {
case MO_SB:
tcg_gen_ld8s_i32(dest, cpu_env, off);
break;
case MO_UB:
tcg_gen_ld8u_i32(dest, cpu_env, off);
break;
case MO_SW:
tcg_gen_ld16s_i32(dest, cpu_env, off);
break;
case MO_UW:
tcg_gen_ld16u_i32(dest, cpu_env, off);
break;
case MO_UL:
case MO_SL:
tcg_gen_ld_i32(dest, cpu_env, off);
break;
default:
g_assert_not_reached();
}
}
static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
{
long off = neon_element_offset(reg, ele, memop);
switch (memop) {
case MO_SL:
tcg_gen_ld32s_i64(dest, cpu_env, off);
break;
case MO_UL:
tcg_gen_ld32u_i64(dest, cpu_env, off);
break;
case MO_Q:
tcg_gen_ld_i64(dest, cpu_env, off);
break;
default:
g_assert_not_reached();
}
}
static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
{
long off = neon_element_offset(reg, ele, memop);
switch (memop) {
case MO_8:
tcg_gen_st8_i32(src, cpu_env, off);
break;
case MO_16:
tcg_gen_st16_i32(src, cpu_env, off);
break;
case MO_32:
tcg_gen_st_i32(src, cpu_env, off);
break;
default:
g_assert_not_reached();
}
}
static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
{
long off = neon_element_offset(reg, ele, memop);
switch (memop) {
case MO_64:
tcg_gen_st_i64(src, cpu_env, off);
break;
default:
g_assert_not_reached();
}
}
static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
{
TCGv_ptr ret = tcg_temp_new_ptr();

View File

@ -293,7 +293,7 @@ void HELPER(gvec_sdot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
intptr_t index = simd_data(desc);
uint32_t *d = vd;
int8_t *n = vn;
int8_t *m_indexed = (int8_t *)vm + index * 4;
int8_t *m_indexed = (int8_t *)vm + H4(index) * 4;
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
* Otherwise opr_sz is a multiple of 16.
@ -324,7 +324,7 @@ void HELPER(gvec_udot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
intptr_t index = simd_data(desc);
uint32_t *d = vd;
uint8_t *n = vn;
uint8_t *m_indexed = (uint8_t *)vm + index * 4;
uint8_t *m_indexed = (uint8_t *)vm + H4(index) * 4;
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
* Otherwise opr_sz is a multiple of 16.
@ -1858,10 +1858,10 @@ DO_ABA(gvec_uaba_d, uint64_t)
r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
\
d[H4(0)] = r0; \
d[H4(1)] = r1; \
d[H4(2)] = r2; \
d[H4(3)] = r3; \
d[H2(0)] = r0; \
d[H2(1)] = r1; \
d[H2(2)] = r2; \
d[H2(3)] = r3; \
}
DO_NEON_PAIRWISE(neon_padd, add)

View File

@ -265,10 +265,16 @@ int main(int argc, char **argv)
qtest_add_func("npcm7xx_rng/enable_disable", test_enable_disable);
qtest_add_func("npcm7xx_rng/rosel", test_rosel);
qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
/*
* These tests fail intermittently; only run them on explicit
* request until we figure out why.
*/
if (getenv("QEMU_TEST_FLAKY_RNG_TESTS")) {
qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
}
qtest_start("-machine npcm750-evb");
ret = g_test_run();