target/arm: Implement SVE2 saturating extract narrow

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525010358.152808-25-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2021-05-24 18:02:50 -07:00 committed by Peter Maydell
parent 289a17976d
commit 5ff2838d3d
4 changed files with 330 additions and 0 deletions

View File

@ -2419,3 +2419,27 @@ DEF_HELPER_FLAGS_5(sve2_uabal_d, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(sve2_adcl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_adcl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqxtnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqxtnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqxtnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqxtnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqxtnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqxtnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_sqxtunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

View File

@ -1272,3 +1272,15 @@ SLI 01000101 .. 0 ..... 11110 1 ..... ..... @rd_rn_tszimm_shl
# TODO: Use @rda and %reg_movprfx here.
SABA 01000101 .. 0 ..... 11111 0 ..... ..... @rd_rn_rm
UABA 01000101 .. 0 ..... 11111 1 ..... ..... @rd_rn_rm
#### SVE2 Narrowing
## SVE2 saturating extract narrow
# Bits 23, 18-16 are zero, limited in the translator via esz < 3 & imm == 0.
SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl
SQXTNT 01000101 .. 1 ..... 010 001 ..... ..... @rd_rn_tszimm_shl
UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl
UQXTNT 01000101 .. 1 ..... 010 011 ..... ..... @rd_rn_tszimm_shl
SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl
SQXTUNT 01000101 .. 1 ..... 010 101 ..... ..... @rd_rn_tszimm_shl

View File

@ -1269,6 +1269,62 @@ DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, , H1_4, DO_ABD)
#undef DO_ZZZW_ACC
#define DO_XTNB(NAME, TYPE, OP) \
void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
{ \
intptr_t i, opr_sz = simd_oprsz(desc); \
for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
TYPE nn = *(TYPE *)(vn + i); \
nn = OP(nn) & MAKE_64BIT_MASK(0, sizeof(TYPE) * 4); \
*(TYPE *)(vd + i) = nn; \
} \
}
#define DO_XTNT(NAME, TYPE, TYPEN, H, OP) \
void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
{ \
intptr_t i, opr_sz = simd_oprsz(desc), odd = H(sizeof(TYPEN)); \
for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
TYPE nn = *(TYPE *)(vn + i); \
*(TYPEN *)(vd + i + odd) = OP(nn); \
} \
}
#define DO_SQXTN_H(n) do_sat_bhs(n, INT8_MIN, INT8_MAX)
#define DO_SQXTN_S(n) do_sat_bhs(n, INT16_MIN, INT16_MAX)
#define DO_SQXTN_D(n) do_sat_bhs(n, INT32_MIN, INT32_MAX)
DO_XTNB(sve2_sqxtnb_h, int16_t, DO_SQXTN_H)
DO_XTNB(sve2_sqxtnb_s, int32_t, DO_SQXTN_S)
DO_XTNB(sve2_sqxtnb_d, int64_t, DO_SQXTN_D)
DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, DO_SQXTN_H)
DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, DO_SQXTN_S)
DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, DO_SQXTN_D)
#define DO_UQXTN_H(n) do_sat_bhs(n, 0, UINT8_MAX)
#define DO_UQXTN_S(n) do_sat_bhs(n, 0, UINT16_MAX)
#define DO_UQXTN_D(n) do_sat_bhs(n, 0, UINT32_MAX)
DO_XTNB(sve2_uqxtnb_h, uint16_t, DO_UQXTN_H)
DO_XTNB(sve2_uqxtnb_s, uint32_t, DO_UQXTN_S)
DO_XTNB(sve2_uqxtnb_d, uint64_t, DO_UQXTN_D)
DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, DO_UQXTN_H)
DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, DO_UQXTN_S)
DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, DO_UQXTN_D)
DO_XTNB(sve2_sqxtunb_h, int16_t, DO_UQXTN_H)
DO_XTNB(sve2_sqxtunb_s, int32_t, DO_UQXTN_S)
DO_XTNB(sve2_sqxtunb_d, int64_t, DO_UQXTN_D)
DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, DO_UQXTN_H)
DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, DO_UQXTN_S)
DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, DO_UQXTN_D)
#undef DO_XTNB
#undef DO_XTNT
void HELPER(sve2_adcl_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);

View File

@ -6459,3 +6459,241 @@ static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
{
return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
}
static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
const GVecGen2 ops[3])
{
if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
!dc_isar_feature(aa64_sve2, s)) {
return false;
}
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vsz, vsz, &ops[a->esz]);
}
return true;
}
static const TCGOpcode sqxtn_list[] = {
INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
};
static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
tcg_gen_dupi_vec(vece, t, min);
tcg_gen_smax_vec(vece, d, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_smin_vec(vece, d, d, t);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_and_vec(vece, d, d, t);
tcg_temp_free_vec(t);
}
static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
{
static const GVecGen2 ops[3] = {
{ .fniv = gen_sqxtnb_vec,
.opt_opc = sqxtn_list,
.fno = gen_helper_sve2_sqxtnb_h,
.vece = MO_16 },
{ .fniv = gen_sqxtnb_vec,
.opt_opc = sqxtn_list,
.fno = gen_helper_sve2_sqxtnb_s,
.vece = MO_32 },
{ .fniv = gen_sqxtnb_vec,
.opt_opc = sqxtn_list,
.fno = gen_helper_sve2_sqxtnb_d,
.vece = MO_64 },
};
return do_sve2_narrow_extract(s, a, ops);
}
static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
tcg_gen_dupi_vec(vece, t, min);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_smin_vec(vece, n, n, t);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_temp_free_vec(t);
}
static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
{
static const GVecGen2 ops[3] = {
{ .fniv = gen_sqxtnt_vec,
.opt_opc = sqxtn_list,
.load_dest = true,
.fno = gen_helper_sve2_sqxtnt_h,
.vece = MO_16 },
{ .fniv = gen_sqxtnt_vec,
.opt_opc = sqxtn_list,
.load_dest = true,
.fno = gen_helper_sve2_sqxtnt_s,
.vece = MO_32 },
{ .fniv = gen_sqxtnt_vec,
.opt_opc = sqxtn_list,
.load_dest = true,
.fno = gen_helper_sve2_sqxtnt_d,
.vece = MO_64 },
};
return do_sve2_narrow_extract(s, a, ops);
}
static const TCGOpcode uqxtn_list[] = {
INDEX_op_shli_vec, INDEX_op_umin_vec, 0
};
static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, d, n, t);
tcg_temp_free_vec(t);
}
static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
{
static const GVecGen2 ops[3] = {
{ .fniv = gen_uqxtnb_vec,
.opt_opc = uqxtn_list,
.fno = gen_helper_sve2_uqxtnb_h,
.vece = MO_16 },
{ .fniv = gen_uqxtnb_vec,
.opt_opc = uqxtn_list,
.fno = gen_helper_sve2_uqxtnb_s,
.vece = MO_32 },
{ .fniv = gen_uqxtnb_vec,
.opt_opc = uqxtn_list,
.fno = gen_helper_sve2_uqxtnb_d,
.vece = MO_64 },
};
return do_sve2_narrow_extract(s, a, ops);
}
static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_temp_free_vec(t);
}
static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
{
static const GVecGen2 ops[3] = {
{ .fniv = gen_uqxtnt_vec,
.opt_opc = uqxtn_list,
.load_dest = true,
.fno = gen_helper_sve2_uqxtnt_h,
.vece = MO_16 },
{ .fniv = gen_uqxtnt_vec,
.opt_opc = uqxtn_list,
.load_dest = true,
.fno = gen_helper_sve2_uqxtnt_s,
.vece = MO_32 },
{ .fniv = gen_uqxtnt_vec,
.opt_opc = uqxtn_list,
.load_dest = true,
.fno = gen_helper_sve2_uqxtnt_d,
.vece = MO_64 },
};
return do_sve2_narrow_extract(s, a, ops);
}
static const TCGOpcode sqxtun_list[] = {
INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
};
static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
tcg_gen_dupi_vec(vece, t, 0);
tcg_gen_smax_vec(vece, d, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, d, d, t);
tcg_temp_free_vec(t);
}
static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
{
static const GVecGen2 ops[3] = {
{ .fniv = gen_sqxtunb_vec,
.opt_opc = sqxtun_list,
.fno = gen_helper_sve2_sqxtunb_h,
.vece = MO_16 },
{ .fniv = gen_sqxtunb_vec,
.opt_opc = sqxtun_list,
.fno = gen_helper_sve2_sqxtunb_s,
.vece = MO_32 },
{ .fniv = gen_sqxtunb_vec,
.opt_opc = sqxtun_list,
.fno = gen_helper_sve2_sqxtunb_d,
.vece = MO_64 },
};
return do_sve2_narrow_extract(s, a, ops);
}
static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
tcg_gen_dupi_vec(vece, t, 0);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_temp_free_vec(t);
}
static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
{
static const GVecGen2 ops[3] = {
{ .fniv = gen_sqxtunt_vec,
.opt_opc = sqxtun_list,
.load_dest = true,
.fno = gen_helper_sve2_sqxtunt_h,
.vece = MO_16 },
{ .fniv = gen_sqxtunt_vec,
.opt_opc = sqxtun_list,
.load_dest = true,
.fno = gen_helper_sve2_sqxtunt_s,
.vece = MO_32 },
{ .fniv = gen_sqxtunt_vec,
.opt_opc = sqxtun_list,
.load_dest = true,
.fno = gen_helper_sve2_sqxtunt_d,
.vece = MO_64 },
};
return do_sve2_narrow_extract(s, a, ops);
}