target/arm: Implement SVE Floating Point Multiply Indexed Group
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20180627043328.11531-19-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
cc48affe83
commit
ca40a6e6e3
@ -620,6 +620,20 @@ DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
#ifdef TARGET_AARCH64
|
||||
#include "helper-a64.h"
|
||||
#include "helper-sve.h"
|
||||
|
@ -29,6 +29,7 @@
|
||||
%imm9_16_10 16:s6 10:3
|
||||
%size_23 23:2
|
||||
%dtype_23_13 23:2 13:2
|
||||
%index3_22_19 22:1 19:2
|
||||
|
||||
# A combination of tsz:imm3 -- extract esize.
|
||||
%tszimm_esz 22:2 5:5 !function=tszimm_esz
|
||||
@ -716,6 +717,24 @@ UMIN_zzi 00100101 .. 101 011 110 ........ ..... @rdn_i8u
|
||||
# SVE integer multiply immediate (unpredicated)
|
||||
MUL_zzi 00100101 .. 110 000 110 ........ ..... @rdn_i8s
|
||||
|
||||
### SVE FP Multiply-Add Indexed Group
|
||||
|
||||
# SVE floating-point multiply-add (indexed)
|
||||
FMLA_zzxz 01100100 0.1 .. rm:3 00000 sub:1 rn:5 rd:5 \
|
||||
ra=%reg_movprfx index=%index3_22_19 esz=1
|
||||
FMLA_zzxz 01100100 101 index:2 rm:3 00000 sub:1 rn:5 rd:5 \
|
||||
ra=%reg_movprfx esz=2
|
||||
FMLA_zzxz 01100100 111 index:1 rm:4 00000 sub:1 rn:5 rd:5 \
|
||||
ra=%reg_movprfx esz=3
|
||||
|
||||
### SVE FP Multiply Indexed Group
|
||||
|
||||
# SVE floating-point multiply (indexed)
|
||||
FMUL_zzx 01100100 0.1 .. rm:3 001000 rn:5 rd:5 \
|
||||
index=%index3_22_19 esz=1
|
||||
FMUL_zzx 01100100 101 index:2 rm:3 001000 rn:5 rd:5 esz=2
|
||||
FMUL_zzx 01100100 111 index:1 rm:4 001000 rn:5 rd:5 esz=3
|
||||
|
||||
### SVE FP Accumulating Reduction Group
|
||||
|
||||
# SVE floating-point serial reduction (predicated)
|
||||
|
@ -3400,6 +3400,56 @@ DO_ZZI(UMIN, umin)
|
||||
|
||||
#undef DO_ZZI
|
||||
|
||||
/*
|
||||
*** SVE Floating Point Multiply-Add Indexed Group
|
||||
*/
|
||||
|
||||
static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
|
||||
{
|
||||
static gen_helper_gvec_4_ptr * const fns[3] = {
|
||||
gen_helper_gvec_fmla_idx_h,
|
||||
gen_helper_gvec_fmla_idx_s,
|
||||
gen_helper_gvec_fmla_idx_d,
|
||||
};
|
||||
|
||||
if (sve_access_check(s)) {
|
||||
unsigned vsz = vec_full_reg_size(s);
|
||||
TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
|
||||
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
|
||||
vec_full_reg_offset(s, a->rn),
|
||||
vec_full_reg_offset(s, a->rm),
|
||||
vec_full_reg_offset(s, a->ra),
|
||||
status, vsz, vsz, (a->index << 1) | a->sub,
|
||||
fns[a->esz - 1]);
|
||||
tcg_temp_free_ptr(status);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
*** SVE Floating Point Multiply Indexed Group
|
||||
*/
|
||||
|
||||
static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
|
||||
{
|
||||
static gen_helper_gvec_3_ptr * const fns[3] = {
|
||||
gen_helper_gvec_fmul_idx_h,
|
||||
gen_helper_gvec_fmul_idx_s,
|
||||
gen_helper_gvec_fmul_idx_d,
|
||||
};
|
||||
|
||||
if (sve_access_check(s)) {
|
||||
unsigned vsz = vec_full_reg_size(s);
|
||||
TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
|
||||
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
|
||||
vec_full_reg_offset(s, a->rn),
|
||||
vec_full_reg_offset(s, a->rm),
|
||||
status, vsz, vsz, a->index, fns[a->esz - 1]);
|
||||
tcg_temp_free_ptr(status);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
*** SVE Floating Point Accumulating Reduction Group
|
||||
*/
|
||||
|
@ -495,3 +495,51 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
|
||||
|
||||
#endif
|
||||
#undef DO_3OP
|
||||
|
||||
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
|
||||
* For AdvSIMD, there is of course only one such vector segment.
|
||||
*/
|
||||
|
||||
#define DO_MUL_IDX(NAME, TYPE, H) \
|
||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
|
||||
{ \
|
||||
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
|
||||
intptr_t idx = simd_data(desc); \
|
||||
TYPE *d = vd, *n = vn, *m = vm; \
|
||||
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
|
||||
TYPE mm = m[H(i + idx)]; \
|
||||
for (j = 0; j < segment; j++) { \
|
||||
d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
|
||||
DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
|
||||
DO_MUL_IDX(gvec_fmul_idx_d, float64, )
|
||||
|
||||
#undef DO_MUL_IDX
|
||||
|
||||
#define DO_FMLA_IDX(NAME, TYPE, H) \
|
||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
|
||||
void *stat, uint32_t desc) \
|
||||
{ \
|
||||
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
|
||||
TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
|
||||
intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
|
||||
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
|
||||
op1_neg <<= (8 * sizeof(TYPE) - 1); \
|
||||
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
|
||||
TYPE mm = m[H(i + idx)]; \
|
||||
for (j = 0; j < segment; j++) { \
|
||||
d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
|
||||
mm, a[i + j], 0, stat); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
|
||||
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
|
||||
DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
|
||||
|
||||
#undef DO_FMLA_IDX
|
||||
|
Loading…
Reference in New Issue
Block a user