target/arm: Implement SVE Floating Point Multiply Indexed Group

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20180627043328.11531-19-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2018-06-29 15:11:08 +01:00 committed by Peter Maydell
parent cc48affe83
commit ca40a6e6e3
4 changed files with 131 additions and 0 deletions

View File

@ -620,6 +620,20 @@ DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"

View File

@ -29,6 +29,7 @@
%imm9_16_10 16:s6 10:3
%size_23 23:2
%dtype_23_13 23:2 13:2
%index3_22_19 22:1 19:2
# A combination of tsz:imm3 -- extract esize.
%tszimm_esz 22:2 5:5 !function=tszimm_esz
@ -716,6 +717,24 @@ UMIN_zzi 00100101 .. 101 011 110 ........ ..... @rdn_i8u
# SVE integer multiply immediate (unpredicated)
MUL_zzi 00100101 .. 110 000 110 ........ ..... @rdn_i8s
### SVE FP Multiply-Add Indexed Group
# SVE floating-point multiply-add (indexed)
FMLA_zzxz 01100100 0.1 .. rm:3 00000 sub:1 rn:5 rd:5 \
ra=%reg_movprfx index=%index3_22_19 esz=1
FMLA_zzxz 01100100 101 index:2 rm:3 00000 sub:1 rn:5 rd:5 \
ra=%reg_movprfx esz=2
FMLA_zzxz 01100100 111 index:1 rm:4 00000 sub:1 rn:5 rd:5 \
ra=%reg_movprfx esz=3
### SVE FP Multiply Indexed Group
# SVE floating-point multiply (indexed)
FMUL_zzx 01100100 0.1 .. rm:3 001000 rn:5 rd:5 \
index=%index3_22_19 esz=1
FMUL_zzx 01100100 101 index:2 rm:3 001000 rn:5 rd:5 esz=2
FMUL_zzx 01100100 111 index:1 rm:4 001000 rn:5 rd:5 esz=3
### SVE FP Accumulating Reduction Group
# SVE floating-point serial reduction (predicated)

View File

@ -3400,6 +3400,56 @@ DO_ZZI(UMIN, umin)
#undef DO_ZZI
/*
*** SVE Floating Point Multiply-Add Indexed Group
*/
static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
{
static gen_helper_gvec_4_ptr * const fns[3] = {
gen_helper_gvec_fmla_idx_h,
gen_helper_gvec_fmla_idx_s,
gen_helper_gvec_fmla_idx_d,
};
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vec_full_reg_offset(s, a->rm),
vec_full_reg_offset(s, a->ra),
status, vsz, vsz, (a->index << 1) | a->sub,
fns[a->esz - 1]);
tcg_temp_free_ptr(status);
}
return true;
}
/*
*** SVE Floating Point Multiply Indexed Group
*/
static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
{
static gen_helper_gvec_3_ptr * const fns[3] = {
gen_helper_gvec_fmul_idx_h,
gen_helper_gvec_fmul_idx_s,
gen_helper_gvec_fmul_idx_d,
};
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vec_full_reg_offset(s, a->rm),
status, vsz, vsz, a->index, fns[a->esz - 1]);
tcg_temp_free_ptr(status);
}
return true;
}
/*
*** SVE Floating Point Accumulating Reduction Group
*/

View File

@ -495,3 +495,51 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
#endif
#undef DO_3OP
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
* For AdvSIMD, there is of course only one such vector segment.
*/
#define DO_MUL_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm; \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
} \
} \
}
DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
DO_MUL_IDX(gvec_fmul_idx_d, float64, )
#undef DO_MUL_IDX
#define DO_FMLA_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
void *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
op1_neg <<= (8 * sizeof(TYPE) - 1); \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
mm, a[i + j], 0, stat); \
} \
} \
}
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
#undef DO_FMLA_IDX