target/arm: Implement MVE shift-by-scalar

Implement the MVE instructions which perform shifts by a scalar.
These are VSHL T2, VRSHL T2, VQSHL T1 and VQRSHL T2.  They take the
shift amount in a general purpose register and shift every element in
the vector by that amount.

Mostly we can reuse the helper functions for shift-by-immediate; we
do need two new helpers for VQRSHL.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Peter Maydell 2021-08-13 17:11:52 +01:00
parent 6b895bf8fb
commit 1b15a97d4c
4 changed files with 76 additions and 3 deletions

View File

@ -414,6 +414,14 @@ DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vqrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vqrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vqrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vqrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vqrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vqrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)

View File

@ -39,6 +39,7 @@
&viwdup qd rn rm size imm
&vcmp qm qn size mask
&vcmp_scalar qn rm size mask
&shl_scalar qda rm size
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
# Note that both Rn and Qd are 3 bits only (no D bit)
@ -88,6 +89,8 @@
@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \
size=2 shift=%rshift_i5
@shl_scalar .... .... .... size:2 .. .... .... .... rm:4 &shl_scalar qda=%qd
# Vector comparison; 4-bit Qm but 3-bit Qn
%mask_22_13 22:1 13:3
@vcmp .... .... .. size:2 qn:3 . .... .... .... .... &vcmp qm=%qm mask=%mask_22_13
@ -320,7 +323,23 @@ VRMLSLDAVH 1111 1110 1 ... ... 0 ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav_no
VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
VSUB_scalar 1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar
VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
{
VSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
VRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
VQSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
VQRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
}
{
VSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
VRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
VQSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
VQRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
}
VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
@ -340,8 +359,6 @@ VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
size=%size_28
}
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar

View File

@ -1334,6 +1334,8 @@ DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
DO_2SHIFT_SAT_U(vqrshli_u, DO_UQRSHL_OP)
DO_2SHIFT_SAT_S(vqrshli_s, DO_SQRSHL_OP)
/* Shift-and-insert; we always work with 64 bits at a time */
#define DO_2SHIFT_INSERT(OP, ESIZE, SHIFTFN, MASKFN) \

View File

@ -1003,6 +1003,52 @@ DO_2SHIFT(VRSHRI_U, vrshli_u, true)
DO_2SHIFT(VSRI, vsri, false)
DO_2SHIFT(VSLI, vsli, false)
static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
MVEGenTwoOpShiftFn *fn)
{
TCGv_ptr qda;
TCGv_i32 rm;
if (!dc_isar_feature(aa32_mve, s) ||
!mve_check_qreg_bank(s, a->qda) ||
a->rm == 13 || a->rm == 15 || !fn) {
/* Rm cases are UNPREDICTABLE */
return false;
}
if (!mve_eci_check(s) || !vfp_access_check(s)) {
return true;
}
qda = mve_qreg_ptr(a->qda);
rm = load_reg(s, a->rm);
fn(cpu_env, qda, qda, rm);
tcg_temp_free_ptr(qda);
tcg_temp_free_i32(rm);
mve_update_eci(s);
return true;
}
#define DO_2SHIFT_SCALAR(INSN, FN) \
static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
{ \
static MVEGenTwoOpShiftFn * const fns[] = { \
gen_helper_mve_##FN##b, \
gen_helper_mve_##FN##h, \
gen_helper_mve_##FN##w, \
NULL, \
}; \
return do_2shift_scalar(s, a, fns[a->size]); \
}
DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
#define DO_VSHLL(INSN, FN) \
static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
{ \