target-arm: A64: Implement SADDLP, UADDLP, SADALP, UADALP
Implement the SADDLP, UADDLP, SADALP and UADALP instructions in the SIMD 2-reg misc category. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1394822294-14837-8-git-send-email-peter.maydell@linaro.org
This commit is contained in:
parent
c1b876b2e9
commit
6781fa119f
@ -293,3 +293,64 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
|
||||
}
|
||||
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
|
||||
}
|
||||
|
||||
/* Pairwise long add: add pairs of adjacent elements into
|
||||
* double-width elements in the result (eg _s8 is an 8x8->16 op)
|
||||
*/
|
||||
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
|
||||
{
|
||||
uint64_t nsignmask = 0x0080008000800080ULL;
|
||||
uint64_t wsignmask = 0x8000800080008000ULL;
|
||||
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
|
||||
uint64_t tmp1, tmp2;
|
||||
uint64_t res, signres;
|
||||
|
||||
/* Extract odd elements, sign extend each to a 16 bit field */
|
||||
tmp1 = a & elementmask;
|
||||
tmp1 ^= nsignmask;
|
||||
tmp1 |= wsignmask;
|
||||
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
|
||||
/* Ditto for the even elements */
|
||||
tmp2 = (a >> 8) & elementmask;
|
||||
tmp2 ^= nsignmask;
|
||||
tmp2 |= wsignmask;
|
||||
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
|
||||
|
||||
/* calculate the result by summing bits 0..14, 16..22, etc,
|
||||
* and then adjusting the sign bits 15, 23, etc manually.
|
||||
* This ensures the addition can't overflow the 16 bit field.
|
||||
*/
|
||||
signres = (tmp1 ^ tmp2) & wsignmask;
|
||||
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
|
||||
res ^= signres;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
|
||||
{
|
||||
uint64_t tmp;
|
||||
|
||||
tmp = a & 0x00ff00ff00ff00ffULL;
|
||||
tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
|
||||
{
|
||||
int32_t reslo, reshi;
|
||||
|
||||
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
|
||||
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
|
||||
|
||||
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
|
||||
{
|
||||
uint64_t tmp;
|
||||
|
||||
tmp = a & 0x0000ffff0000ffffULL;
|
||||
tmp += (a >> 16) & 0x0000ffff0000ffffULL;
|
||||
return tmp;
|
||||
}
|
||||
|
@ -39,3 +39,7 @@ DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||
|
@ -81,6 +81,7 @@ typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
|
||||
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
|
||||
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
|
||||
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
|
||||
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
|
||||
|
||||
/* initialize TCG globals. */
|
||||
void a64_translate_init(void)
|
||||
@ -8456,6 +8457,78 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
|
||||
}
|
||||
}
|
||||
|
||||
static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
|
||||
bool is_q, int size, int rn, int rd)
|
||||
{
|
||||
/* Implement the pairwise operations from 2-misc:
|
||||
* SADDLP, UADDLP, SADALP, UADALP.
|
||||
* These all add pairs of elements in the input to produce a
|
||||
* double-width result element in the output (possibly accumulating).
|
||||
*/
|
||||
bool accum = (opcode == 0x6);
|
||||
int maxpass = is_q ? 2 : 1;
|
||||
int pass;
|
||||
TCGv_i64 tcg_res[2];
|
||||
|
||||
if (size == 2) {
|
||||
/* 32 + 32 -> 64 op */
|
||||
TCGMemOp memop = size + (u ? 0 : MO_SIGN);
|
||||
|
||||
for (pass = 0; pass < maxpass; pass++) {
|
||||
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
|
||||
|
||||
tcg_res[pass] = tcg_temp_new_i64();
|
||||
|
||||
read_vec_element(s, tcg_op1, rn, pass * 2, memop);
|
||||
read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
|
||||
tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
|
||||
if (accum) {
|
||||
read_vec_element(s, tcg_op1, rd, pass, MO_64);
|
||||
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
|
||||
}
|
||||
|
||||
tcg_temp_free_i64(tcg_op1);
|
||||
tcg_temp_free_i64(tcg_op2);
|
||||
}
|
||||
} else {
|
||||
for (pass = 0; pass < maxpass; pass++) {
|
||||
TCGv_i64 tcg_op = tcg_temp_new_i64();
|
||||
NeonGenOneOpFn *genfn;
|
||||
static NeonGenOneOpFn * const fns[2][2] = {
|
||||
{ gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
|
||||
{ gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
|
||||
};
|
||||
|
||||
genfn = fns[size][u];
|
||||
|
||||
tcg_res[pass] = tcg_temp_new_i64();
|
||||
|
||||
read_vec_element(s, tcg_op, rn, pass, MO_64);
|
||||
genfn(tcg_res[pass], tcg_op);
|
||||
|
||||
if (accum) {
|
||||
read_vec_element(s, tcg_op, rd, pass, MO_64);
|
||||
if (size == 0) {
|
||||
gen_helper_neon_addl_u16(tcg_res[pass],
|
||||
tcg_res[pass], tcg_op);
|
||||
} else {
|
||||
gen_helper_neon_addl_u32(tcg_res[pass],
|
||||
tcg_res[pass], tcg_op);
|
||||
}
|
||||
}
|
||||
tcg_temp_free_i64(tcg_op);
|
||||
}
|
||||
}
|
||||
if (!is_q) {
|
||||
tcg_res[1] = tcg_const_i64(0);
|
||||
}
|
||||
for (pass = 0; pass < 2; pass++) {
|
||||
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
|
||||
tcg_temp_free_i64(tcg_res[pass]);
|
||||
}
|
||||
}
|
||||
|
||||
/* C3.6.17 AdvSIMD two reg misc
|
||||
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
|
||||
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
|
||||
@ -8510,7 +8583,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
}
|
||||
unsupported_encoding(s, insn);
|
||||
handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
|
||||
return;
|
||||
case 0x13: /* SHLL, SHLL2 */
|
||||
if (u == 0 || size == 3) {
|
||||
|
Loading…
Reference in New Issue
Block a user