target/arm: Implement SVE2 WHILEGT, WHILEGE, WHILEHI, WHILEHS

Rename the existing sve_while (less-than) helper to sve_whilel
to make room for a new sve_whileg helper for greater-than.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525010358.152808-31-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2021-05-24 18:02:56 -07:00 committed by Peter Maydell
parent 743bb14773
commit 34688dbc1c
4 changed files with 82 additions and 17 deletions

View File

@ -913,7 +913,8 @@ DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_while, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

View File

@ -700,7 +700,7 @@ SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred
CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
# SVE integer compare scalar count and limit
WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4
WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
### SVE Integer Wide Immediate - Unpredicated Group

View File

@ -3750,7 +3750,7 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
return sum;
}
uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
uint32_t HELPER(sve_whilel)(void *vd, uint32_t count, uint32_t pred_desc)
{
intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
@ -3776,6 +3776,42 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
return predtest_ones(d, oprsz, esz_mask);
}
uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc)
{
intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
uint64_t esz_mask = pred_esz_masks[esz];
ARMPredicateReg *d = vd;
intptr_t i, invcount, oprbits;
uint64_t bits;
if (count == 0) {
return do_zero(d, oprsz);
}
oprbits = oprsz * 8;
tcg_debug_assert(count <= oprbits);
bits = esz_mask;
if (oprbits & 63) {
bits &= MAKE_64BIT_MASK(0, oprbits & 63);
}
invcount = oprbits - count;
for (i = (oprsz - 1) / 8; i > invcount / 64; --i) {
d->p[i] = bits;
bits = esz_mask;
}
d->p[i] = bits & MAKE_64BIT_MASK(invcount & 63, 64);
while (--i >= 0) {
d->p[i] = 0;
}
return predtest_ones(d, oprsz, esz_mask);
}
/* Recursive reduction on a function;
* C.f. the ARM ARM function ReducePredicated.
*

View File

@ -3112,7 +3112,14 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
unsigned vsz = vec_full_reg_size(s);
unsigned desc = 0;
TCGCond cond;
uint64_t maxval;
/* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
bool eq = a->eq == a->lt;
/* The greater-than conditions are all SVE2. */
if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
return false;
}
if (!sve_access_check(s)) {
return true;
}
@ -3135,22 +3142,42 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
*/
t0 = tcg_temp_new_i64();
t1 = tcg_temp_new_i64();
tcg_gen_sub_i64(t0, op1, op0);
if (a->lt) {
tcg_gen_sub_i64(t0, op1, op0);
if (a->u) {
maxval = a->sf ? UINT64_MAX : UINT32_MAX;
cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
} else {
maxval = a->sf ? INT64_MAX : INT32_MAX;
cond = eq ? TCG_COND_LE : TCG_COND_LT;
}
} else {
tcg_gen_sub_i64(t0, op0, op1);
if (a->u) {
maxval = 0;
cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
} else {
maxval = a->sf ? INT64_MIN : INT32_MIN;
cond = eq ? TCG_COND_GE : TCG_COND_GT;
}
}
tmax = tcg_const_i64(vsz >> a->esz);
if (a->eq) {
if (eq) {
/* Equality means one more iteration. */
tcg_gen_addi_i64(t0, t0, 1);
/* If op1 is max (un)signed integer (and the only time the addition
* above could overflow), then we produce an all-true predicate by
* setting the count to the vector length. This is because the
* pseudocode is described as an increment + compare loop, and the
* max integer would always compare true.
/*
* For the less-than while, if op1 is maxval (and the only time
* the addition above could overflow), then we produce an all-true
* predicate by setting the count to the vector length. This is
* because the pseudocode is described as an increment + compare
* loop, and the maximum integer would always compare true.
* Similarly, the greater-than while has the same issue with the
* minimum integer due to the decrement + compare loop.
*/
tcg_gen_movi_i64(t1, (a->sf
? (a->u ? UINT64_MAX : INT64_MAX)
: (a->u ? UINT32_MAX : INT32_MAX)));
tcg_gen_movi_i64(t1, maxval);
tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
}
@ -3159,9 +3186,6 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
tcg_temp_free_i64(tmax);
/* Set the count to zero if the condition is false. */
cond = (a->u
? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
: (a->eq ? TCG_COND_LE : TCG_COND_LT));
tcg_gen_movi_i64(t1, 0);
tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
tcg_temp_free_i64(t1);
@ -3181,7 +3205,11 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
ptr = tcg_temp_new_ptr();
tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
gen_helper_sve_while(t2, ptr, t2, t3);
if (a->lt) {
gen_helper_sve_whilel(t2, ptr, t2, t3);
} else {
gen_helper_sve_whileg(t2, ptr, t2, t3);
}
do_pred_flags(t2);
tcg_temp_free_ptr(ptr);