target/arm: Implement MVE narrowing moves

Implement the MVE narrowing move insns VMOVN, VQMOVN and VQMOVUN.
These take a double-width input, narrow it (possibly saturating) and
store the result to either the top or bottom half of the output
element.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Peter Maydell 2021-08-13 17:11:53 +01:00
parent 7f061c0ab9
commit 54dc78a901
4 changed files with 132 additions and 0 deletions

View File

@ -76,6 +76,26 @@ DEF_HELPER_FLAGS_3(mve_vnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vfnegh, TCG_CALL_NO_WG, void, env, ptr, ptr) DEF_HELPER_FLAGS_3(mve_vfnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vfnegs, TCG_CALL_NO_WG, void, env, ptr, ptr) DEF_HELPER_FLAGS_3(mve_vfnegs, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovnbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovnbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovnth, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovunbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovunbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovuntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovunth, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovnbsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovnbsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovntsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovntsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovnbub, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovnbuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovntub, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vqmovntuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vand, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(mve_vand, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vbic, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(mve_vbic, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vorr, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(mve_vorr, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)

View File

@ -153,6 +153,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
VQMOVUNB 111 0 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
VQMOVN_BS 111 0 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
} }
@ -160,6 +163,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
VMOVNB 111 1 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
VQMOVN_BU 111 1 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
} }
@ -167,6 +173,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
VQMOVUNT 111 0 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
VQMOVN_TS 111 0 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
} }
@ -174,6 +183,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
VMOVNT 111 1 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
VQMOVN_TU 111 1 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
} }

View File

@ -1650,6 +1650,84 @@ DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B) DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H) DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
#define DO_VMOVN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
{ \
LTYPE *m = vm; \
TYPE *d = vd; \
uint16_t mask = mve_element_mask(env); \
unsigned le; \
mask >>= ESIZE * TOP; \
for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
mergemask(&d[H##ESIZE(le * 2 + TOP)], \
m[H##LESIZE(le)], mask); \
} \
mve_advance_vpt(env); \
}
DO_VMOVN(vmovnbb, false, 1, uint8_t, 2, uint16_t)
DO_VMOVN(vmovnbh, false, 2, uint16_t, 4, uint32_t)
DO_VMOVN(vmovntb, true, 1, uint8_t, 2, uint16_t)
DO_VMOVN(vmovnth, true, 2, uint16_t, 4, uint32_t)
#define DO_VMOVN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
{ \
LTYPE *m = vm; \
TYPE *d = vd; \
uint16_t mask = mve_element_mask(env); \
bool qc = false; \
unsigned le; \
mask >>= ESIZE * TOP; \
for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
bool sat = false; \
TYPE r = FN(m[H##LESIZE(le)], &sat); \
mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
qc |= sat & mask & 1; \
} \
if (qc) { \
env->vfp.qc[0] = qc; \
} \
mve_advance_vpt(env); \
}
#define DO_VMOVN_SAT_UB(BOP, TOP, FN) \
DO_VMOVN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \
DO_VMOVN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
#define DO_VMOVN_SAT_UH(BOP, TOP, FN) \
DO_VMOVN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \
DO_VMOVN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
#define DO_VMOVN_SAT_SB(BOP, TOP, FN) \
DO_VMOVN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \
DO_VMOVN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
#define DO_VMOVN_SAT_SH(BOP, TOP, FN) \
DO_VMOVN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \
DO_VMOVN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
#define DO_VQMOVN_SB(N, SATP) \
do_sat_bhs((int64_t)(N), INT8_MIN, INT8_MAX, SATP)
#define DO_VQMOVN_UB(N, SATP) \
do_sat_bhs((uint64_t)(N), 0, UINT8_MAX, SATP)
#define DO_VQMOVUN_B(N, SATP) \
do_sat_bhs((int64_t)(N), 0, UINT8_MAX, SATP)
#define DO_VQMOVN_SH(N, SATP) \
do_sat_bhs((int64_t)(N), INT16_MIN, INT16_MAX, SATP)
#define DO_VQMOVN_UH(N, SATP) \
do_sat_bhs((uint64_t)(N), 0, UINT16_MAX, SATP)
#define DO_VQMOVUN_H(N, SATP) \
do_sat_bhs((int64_t)(N), 0, UINT16_MAX, SATP)
DO_VMOVN_SAT_SB(vqmovnbsb, vqmovntsb, DO_VQMOVN_SB)
DO_VMOVN_SAT_SH(vqmovnbsh, vqmovntsh, DO_VQMOVN_SH)
DO_VMOVN_SAT_UB(vqmovnbub, vqmovntub, DO_VQMOVN_UB)
DO_VMOVN_SAT_UH(vqmovnbuh, vqmovntuh, DO_VQMOVN_UH)
DO_VMOVN_SAT_SB(vqmovunbb, vqmovuntb, DO_VQMOVUN_B)
DO_VMOVN_SAT_SH(vqmovunbh, vqmovunth, DO_VQMOVUN_H)
uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm, uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
uint32_t shift) uint32_t shift)
{ {

View File

@ -275,6 +275,28 @@ DO_1OP(VCLS, vcls)
DO_1OP(VABS, vabs) DO_1OP(VABS, vabs)
DO_1OP(VNEG, vneg) DO_1OP(VNEG, vneg)
/* Narrowing moves: only size 0 and 1 are valid */
#define DO_VMOVN(INSN, FN) \
static bool trans_##INSN(DisasContext *s, arg_1op *a) \
{ \
static MVEGenOneOpFn * const fns[] = { \
gen_helper_mve_##FN##b, \
gen_helper_mve_##FN##h, \
NULL, \
NULL, \
}; \
return do_1op(s, a, fns[a->size]); \
}
DO_VMOVN(VMOVNB, vmovnb)
DO_VMOVN(VMOVNT, vmovnt)
DO_VMOVN(VQMOVUNB, vqmovunb)
DO_VMOVN(VQMOVUNT, vqmovunt)
DO_VMOVN(VQMOVN_BS, vqmovnbs)
DO_VMOVN(VQMOVN_TS, vqmovnts)
DO_VMOVN(VQMOVN_BU, vqmovnbu)
DO_VMOVN(VQMOVN_TU, vqmovntu)
static bool trans_VREV16(DisasContext *s, arg_1op *a) static bool trans_VREV16(DisasContext *s, arg_1op *a)
{ {
static MVEGenOneOpFn * const fns[] = { static MVEGenOneOpFn * const fns[] = {