target/arm: Implement SME ADDHA, ADDVA

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20220708151540.18136-24-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2022-07-08 20:45:18 +05:30 committed by Peter Maydell
parent 4c46a5f12c
commit bc4420d9bd
4 changed files with 137 additions and 0 deletions

View File

@ -115,3 +115,8 @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)

View File

@ -53,3 +53,14 @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
### SME Add Vector to Array
&adda zad zn pm pn
@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64

View File

@ -828,3 +828,93 @@ DO_ST(q, _be, MO_128)
DO_ST(q, _le, MO_128)
#undef DO_ST
void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
void *vpm, uint32_t desc)
{
intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
uint64_t *pn = vpn, *pm = vpm;
uint32_t *zda = vzda, *zn = vzn;
for (row = 0; row < oprsz; ) {
uint64_t pa = pn[row >> 4];
do {
if (pa & 1) {
for (col = 0; col < oprsz; ) {
uint64_t pb = pm[col >> 4];
do {
if (pb & 1) {
zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
}
pb >>= 4;
} while (++col & 15);
}
}
pa >>= 4;
} while (++row & 15);
}
}
void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
void *vpm, uint32_t desc)
{
intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
uint8_t *pn = vpn, *pm = vpm;
uint64_t *zda = vzda, *zn = vzn;
for (row = 0; row < oprsz; ++row) {
if (pn[H1(row)] & 1) {
for (col = 0; col < oprsz; ++col) {
if (pm[H1(col)] & 1) {
zda[tile_vslice_index(row) + col] += zn[col];
}
}
}
}
}
void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
void *vpm, uint32_t desc)
{
intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
uint64_t *pn = vpn, *pm = vpm;
uint32_t *zda = vzda, *zn = vzn;
for (row = 0; row < oprsz; ) {
uint64_t pa = pn[row >> 4];
do {
if (pa & 1) {
uint32_t zn_row = zn[H4(row)];
for (col = 0; col < oprsz; ) {
uint64_t pb = pm[col >> 4];
do {
if (pb & 1) {
zda[tile_vslice_index(row) + H4(col)] += zn_row;
}
pb >>= 4;
} while (++col & 15);
}
}
pa >>= 4;
} while (++row & 15);
}
}
void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
void *vpm, uint32_t desc)
{
intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
uint8_t *pn = vpn, *pm = vpm;
uint64_t *zda = vzda, *zn = vzn;
for (row = 0; row < oprsz; ++row) {
if (pn[H1(row)] & 1) {
uint64_t zn_row = zn[row];
for (col = 0; col < oprsz; ++col) {
if (pm[H1(col)] & 1) {
zda[tile_vslice_index(row) + col] += zn_row;
}
}
}
}
}

View File

@ -267,3 +267,34 @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
gen_helper_gvec_4 *fn)
{
int svl = streaming_vec_reg_size(s);
uint32_t desc = simd_desc(svl, svl, 0);
TCGv_ptr za, zn, pn, pm;
if (!sme_smza_enabled_check(s)) {
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
zn = vec_full_reg_ptr(s, a->zn);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
fn(za, zn, pn, pm, tcg_constant_i32(desc));
tcg_temp_free_ptr(za);
tcg_temp_free_ptr(zn);
tcg_temp_free_ptr(pn);
tcg_temp_free_ptr(pm);
return true;
}
TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)