target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree

Convert the Neon floating point VFMA and VFMS insn to decodetree.
These are the last insns in the 3-reg-same group so we can
remove all the support/loop code from the old decoder.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200512163904.10918-18-peter.maydell@linaro.org
This commit is contained in:
Peter Maydell 2020-05-12 17:39:04 +01:00
parent d5fdf9e9e1
commit e95485f856
3 changed files with 46 additions and 174 deletions

View File

@ -174,6 +174,9 @@ SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp

View File

@ -1207,6 +1207,47 @@ static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
return do_3same(s, a, gen_VRSQRTS_fp_3s);
}
static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
TCGv_ptr fpstatus)
{
gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
}
static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
{
if (!dc_isar_feature(aa32_simdfmac, s)) {
return false;
}
if (a->size != 0) {
/* TODO fp16 support */
return false;
}
return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
}
static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
TCGv_ptr fpstatus)
{
gen_helper_vfp_negs(vn, vn);
gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
}
static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
{
if (!dc_isar_feature(aa32_simdfmac, s)) {
return false;
}
if (a->size != 0) {
/* TODO fp16 support */
return false;
}
return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
}
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
{
/* FP operations handled pairwise 32 bits at a time */

View File

@ -3391,78 +3391,6 @@ static void gen_neon_narrow_op(int op, int u, int size,
}
}
/* Symbolic constants for op fields for Neon 3-register same-length.
* The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
* table A7-9.
*/
#define NEON_3R_VHADD 0
#define NEON_3R_VQADD 1
#define NEON_3R_VRHADD 2
#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
#define NEON_3R_VHSUB 4
#define NEON_3R_VQSUB 5
#define NEON_3R_VCGT 6
#define NEON_3R_VCGE 7
#define NEON_3R_VSHL 8
#define NEON_3R_VQSHL 9
#define NEON_3R_VRSHL 10
#define NEON_3R_VQRSHL 11
#define NEON_3R_VMAX 12
#define NEON_3R_VMIN 13
#define NEON_3R_VABD 14
#define NEON_3R_VABA 15
#define NEON_3R_VADD_VSUB 16
#define NEON_3R_VTST_VCEQ 17
#define NEON_3R_VML 18 /* VMLA, VMLS */
#define NEON_3R_VMUL 19
#define NEON_3R_VPMAX 20
#define NEON_3R_VPMIN 21
#define NEON_3R_VQDMULH_VQRDMULH 22
#define NEON_3R_VPADD_VQRDMLAH 23
#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
static const uint8_t neon_3r_sizes[] = {
[NEON_3R_VHADD] = 0x7,
[NEON_3R_VQADD] = 0xf,
[NEON_3R_VRHADD] = 0x7,
[NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
[NEON_3R_VHSUB] = 0x7,
[NEON_3R_VQSUB] = 0xf,
[NEON_3R_VCGT] = 0x7,
[NEON_3R_VCGE] = 0x7,
[NEON_3R_VSHL] = 0xf,
[NEON_3R_VQSHL] = 0xf,
[NEON_3R_VRSHL] = 0xf,
[NEON_3R_VQRSHL] = 0xf,
[NEON_3R_VMAX] = 0x7,
[NEON_3R_VMIN] = 0x7,
[NEON_3R_VABD] = 0x7,
[NEON_3R_VABA] = 0x7,
[NEON_3R_VADD_VSUB] = 0xf,
[NEON_3R_VTST_VCEQ] = 0x7,
[NEON_3R_VML] = 0x7,
[NEON_3R_VMUL] = 0x7,
[NEON_3R_VPMAX] = 0x7,
[NEON_3R_VPMIN] = 0x7,
[NEON_3R_VQDMULH_VQRDMULH] = 0x6,
[NEON_3R_VPADD_VQRDMLAH] = 0x7,
[NEON_3R_SHA] = 0xf, /* size field encodes op type */
[NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
[NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
};
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
* table A7-13.
@ -5383,108 +5311,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
rm_ofs = neon_reg_offset(rm, 0);
if ((insn & (1 << 23)) == 0) {
/* Three register same length. */
op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
/* Catch invalid op and bad size combinations: UNDEF */
if ((neon_3r_sizes[op] & (1 << size)) == 0) {
return 1;
}
/* All insns of this form UNDEF for either this condition or the
* superset of cases "Q==1"; we catch the latter later.
*/
if (q && ((rd | rn | rm) & 1)) {
return 1;
}
switch (op) {
case NEON_3R_VFM_VQRDMLSH:
if (!u) {
/* VFM, VFMS */
if (size == 1) {
return 1;
}
break;
}
/* VQRDMLSH : handled by decodetree */
return 1;
case NEON_3R_VADD_VSUB:
case NEON_3R_LOGIC:
case NEON_3R_VMAX:
case NEON_3R_VMIN:
case NEON_3R_VTST_VCEQ:
case NEON_3R_VCGT:
case NEON_3R_VCGE:
case NEON_3R_VQADD:
case NEON_3R_VQSUB:
case NEON_3R_VMUL:
case NEON_3R_VML:
case NEON_3R_VSHL:
case NEON_3R_SHA:
case NEON_3R_VHADD:
case NEON_3R_VRHADD:
case NEON_3R_VHSUB:
case NEON_3R_VABD:
case NEON_3R_VABA:
case NEON_3R_VQSHL:
case NEON_3R_VRSHL:
case NEON_3R_VQRSHL:
case NEON_3R_VPMAX:
case NEON_3R_VPMIN:
case NEON_3R_VPADD_VQRDMLAH:
case NEON_3R_VQDMULH_VQRDMULH:
case NEON_3R_FLOAT_ARITH:
case NEON_3R_FLOAT_MULTIPLY:
case NEON_3R_FLOAT_CMP:
case NEON_3R_FLOAT_ACMP:
case NEON_3R_FLOAT_MINMAX:
case NEON_3R_FLOAT_MISC:
/* Already handled by decodetree */
return 1;
}
if (size == 3) {
/* 64-bit element instructions: handled by decodetree */
return 1;
}
switch (op) {
case NEON_3R_VFM_VQRDMLSH:
if (!dc_isar_feature(aa32_simdfmac, s)) {
return 1;
}
break;
default:
break;
}
for (pass = 0; pass < (q ? 4 : 2); pass++) {
/* Elementwise. */
tmp = neon_load_reg(rn, pass);
tmp2 = neon_load_reg(rm, pass);
switch (op) {
case NEON_3R_VFM_VQRDMLSH:
{
/* VFMA, VFMS: fused multiply-add */
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
TCGv_i32 tmp3 = neon_load_reg(rd, pass);
if (size) {
/* VFMS */
gen_helper_vfp_negs(tmp, tmp);
}
gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
tcg_temp_free_i32(tmp3);
tcg_temp_free_ptr(fpstatus);
break;
}
default:
abort();
}
tcg_temp_free_i32(tmp2);
neon_store_reg(rd, pass, tmp);
} /* for pass */
/* End of 3 register same size operations. */
/* Three register same length: handled by decodetree */
return 1;
} else if (insn & (1 << 4)) {
if ((insn & 0x00380080) != 0) {
/* Two registers and shift. */