target-arm: A64: Implement long vector x indexed insns

Implement the 'long' operations in the vector x indexed
element category.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Peter Maydell 2014-02-20 10:35:49 +00:00
parent f5e51e7f10
commit c44ad1fddc
1 changed files with 139 additions and 5 deletions

View File

@ -7909,11 +7909,6 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
}
}
if (is_long) {
unsupported_encoding(s, insn);
return;
}
if (is_fp) {
fpst = get_fpstatus_ptr();
} else {
@ -8055,6 +8050,145 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
}
} else {
/* long ops: 16x16->32 or 32x32->64 */
TCGv_i64 tcg_res[2];
int pass;
bool satop = extract32(opcode, 0, 1);
TCGMemOp memop = MO_32;
if (satop || !u) {
memop |= MO_SIGN;
}
if (size == 2) {
TCGv_i64 tcg_idx = tcg_temp_new_i64();
read_vec_element(s, tcg_idx, rm, index, memop);
for (pass = 0; pass < 2; pass++) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
TCGv_i64 tcg_passres;
read_vec_element(s, tcg_op, rn, pass + (is_q * 2), memop);
tcg_res[pass] = tcg_temp_new_i64();
if (opcode == 0xa || opcode == 0xb) {
/* Non-accumulating ops */
tcg_passres = tcg_res[pass];
} else {
tcg_passres = tcg_temp_new_i64();
}
tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
tcg_temp_free_i64(tcg_op);
if (satop) {
/* saturating, doubling */
gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
tcg_passres, tcg_passres);
}
if (opcode == 0xa || opcode == 0xb) {
continue;
}
/* Accumulating op: handle accumulate step */
read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
switch (opcode) {
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
break;
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
break;
case 0x7: /* SQDMLSL, SQDMLSL2 */
tcg_gen_neg_i64(tcg_passres, tcg_passres);
/* fall through */
case 0x3: /* SQDMLAL, SQDMLAL2 */
gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
tcg_res[pass],
tcg_passres);
break;
default:
g_assert_not_reached();
}
tcg_temp_free_i64(tcg_passres);
}
tcg_temp_free_i64(tcg_idx);
} else {
TCGv_i32 tcg_idx = tcg_temp_new_i32();
assert(size == 1);
read_vec_element_i32(s, tcg_idx, rm, index, size);
/* The simplest way to handle the 16x16 indexed ops is to duplicate
* the index into both halves of the 32 bit tcg_idx and then use
* the usual Neon helpers.
*/
tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
for (pass = 0; pass < 2; pass++) {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i64 tcg_passres;
read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2), MO_32);
tcg_res[pass] = tcg_temp_new_i64();
if (opcode == 0xa || opcode == 0xb) {
/* Non-accumulating ops */
tcg_passres = tcg_res[pass];
} else {
tcg_passres = tcg_temp_new_i64();
}
if (memop & MO_SIGN) {
gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
} else {
gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
}
if (satop) {
gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
tcg_passres, tcg_passres);
}
tcg_temp_free_i32(tcg_op);
if (opcode == 0xa || opcode == 0xb) {
continue;
}
/* Accumulating op: handle accumulate step */
read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
switch (opcode) {
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
tcg_passres);
break;
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
tcg_passres);
break;
case 0x7: /* SQDMLSL, SQDMLSL2 */
gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
/* fall through */
case 0x3: /* SQDMLAL, SQDMLAL2 */
gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
tcg_res[pass],
tcg_passres);
break;
default:
g_assert_not_reached();
}
tcg_temp_free_i64(tcg_passres);
}
tcg_temp_free_i32(tcg_idx);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
tcg_temp_free_i64(tcg_res[pass]);
}
}
if (!TCGV_IS_UNUSED_PTR(fpst)) {