target/arm: Implement MVE VSHLL

Implement the MVE VHLL (vector shift left long) insn.  This has two
encodings: the T1 encoding is the usual shift-by-immediate format,
and the T2 encoding is a special case where the shift count is always
equal to the element size.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210628135835.6690-10-peter.maydell@linaro.org
This commit is contained in:
Peter Maydell 2021-06-28 14:58:26 +01:00
parent 3394116f47
commit c226270703
4 changed files with 105 additions and 4 deletions

View File

@ -387,3 +387,12 @@ DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshllbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)

View File

@ -64,6 +64,14 @@
@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
# VSHLL encoding T2 where shift == esize
@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \
qd=%qd qm=%qm size=0 shift=8
@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \
qd=%qd qm=%qm size=1 shift=16
# Right shifts are encoded as N - shift, where N is the element size in bits.
%rshift_i5 16:5 !function=rsub_32
%rshift_i4 16:4 !function=rsub_16
@ -122,11 +130,35 @@ VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
# The VSHLL T2 encoding is not a @2op pattern, but is here because it
# overlaps what would be size=0b11 VMULH/VRMULH
{
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
}
{
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
}
{
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
}
{
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
}
VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
@ -326,3 +358,16 @@ VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file
VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h

View File

@ -1250,3 +1250,35 @@ DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
/*
* Long shifts taking half-sized inputs from top or bottom of the input
* vector and producing a double-width result. ESIZE, TYPE are for
* the input, and LESIZE, LTYPE for the output.
* Unlike the normal shift helpers, we do not handle negative shift counts,
* because the long shift is strictly left-only.
*/
#define DO_VSHLL(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
void *vm, uint32_t shift) \
{ \
LTYPE *d = vd; \
TYPE *m = vm; \
uint16_t mask = mve_element_mask(env); \
unsigned le; \
assert(shift <= 16); \
for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
LTYPE r = (LTYPE)m[H##ESIZE(le * 2 + TOP)] << shift; \
mergemask(&d[H##LESIZE(le)], r, mask); \
} \
mve_advance_vpt(env); \
}
#define DO_VSHLL_ALL(OP, TOP) \
DO_VSHLL(OP##sb, TOP, 1, int8_t, 2, int16_t) \
DO_VSHLL(OP##ub, TOP, 1, uint8_t, 2, uint16_t) \
DO_VSHLL(OP##sh, TOP, 2, int16_t, 4, int32_t) \
DO_VSHLL(OP##uh, TOP, 2, uint16_t, 4, uint32_t) \
DO_VSHLL_ALL(vshllb, false)
DO_VSHLL_ALL(vshllt, true)

View File

@ -893,3 +893,18 @@ DO_2SHIFT(VSHRI_S, vshli_s, true)
DO_2SHIFT(VSHRI_U, vshli_u, true)
DO_2SHIFT(VRSHRI_S, vrshli_s, true)
DO_2SHIFT(VRSHRI_U, vrshli_u, true)
#define DO_VSHLL(INSN, FN) \
static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
{ \
static MVEGenTwoOpShiftFn * const fns[] = { \
gen_helper_mve_##FN##b, \
gen_helper_mve_##FN##h, \
}; \
return do_2shift(s, a, fns[a->size], false); \
}
DO_VSHLL(VSHLL_BS, vshllbs)
DO_VSHLL(VSHLL_BU, vshllbu)
DO_VSHLL(VSHLL_TS, vshllts)
DO_VSHLL(VSHLL_TU, vshlltu)