target/arm: Implement MVE integer vector-vs-scalar comparisons

Implement the MVE integer vector comparison instructions that compare
each element against a scalar from a general purpose register.  These
are "VCMP (vector)" encodings T4, T5 and T6 and "VPT (vector)"
encodings T4, T5 and T6.

We have to move the decodetree pattern for VPST, because it
overlaps with VCMP T4 with size = 0b11.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Peter Maydell 2021-08-13 17:11:51 +01:00
parent eff5d9a9bd
commit cce81873bc
4 changed files with 126 additions and 11 deletions

View File

@ -512,3 +512,35 @@ DEF_HELPER_FLAGS_3(mve_vcmpgtw, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vcmpleb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vcmpleh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vcmplew, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpne_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpne_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpne_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmphi_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmphi_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmphi_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpge_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpge_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpge_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmplt_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmplt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmplt_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmple_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmple_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_3(mve_vcmple_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)

View File

@ -38,6 +38,7 @@
&vidup qd rn size imm
&viwdup qd rn rm size imm
&vcmp qm qn size mask
&vcmp_scalar qn rm size mask
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
# Note that both Rn and Qd are 3 bits only (no D bit)
@ -90,6 +91,8 @@
# Vector comparison; 4-bit Qm but 3-bit Qn
%mask_22_13 22:1 13:3
@vcmp .... .... .. size:2 qn:3 . .... .... .... .... &vcmp qm=%qm mask=%mask_22_13
@vcmp_scalar .... .... .. size:2 qn:3 . .... .... .... rm:4 &vcmp_scalar \
mask=%mask_22_13
# Vector loads and stores
@ -349,9 +352,6 @@ VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
rdahi=%rdahi rdalo=%rdalo
}
# Predicate operations
VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
# Logical immediate operations (1 reg and modified-immediate)
# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but
@ -474,3 +474,15 @@ VCMPGE 1111 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp
VCMPLT 1111 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp
VCMPGT 1111 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp
VCMPLE 1111 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp
{
VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
VCMPEQ_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0 1 0 0 .... @vcmp_scalar
}
VCMPNE_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1 1 0 0 .... @vcmp_scalar
VCMPCS_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0 1 1 0 .... @vcmp_scalar
VCMPHI_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1 1 1 0 .... @vcmp_scalar
VCMPGE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0 1 0 0 .... @vcmp_scalar
VCMPLT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1 1 0 0 .... @vcmp_scalar
VCMPGT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0 1 1 0 .... @vcmp_scalar
VCMPLE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1 1 1 0 .... @vcmp_scalar

View File

@ -1787,15 +1787,43 @@ DO_VIWDUP_ALL(vdwdup, do_sub_wrap)
mve_advance_vpt(env); \
}
#define DO_VCMP_S(OP, FN) \
DO_VCMP(OP##b, 1, int8_t, FN) \
DO_VCMP(OP##h, 2, int16_t, FN) \
DO_VCMP(OP##w, 4, int32_t, FN)
#define DO_VCMP_SCALAR(OP, ESIZE, TYPE, FN) \
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
uint32_t rm) \
{ \
TYPE *n = vn; \
uint16_t mask = mve_element_mask(env); \
uint16_t eci_mask = mve_eci_mask(env); \
uint16_t beatpred = 0; \
uint16_t emask = MAKE_64BIT_MASK(0, ESIZE); \
unsigned e; \
for (e = 0; e < 16 / ESIZE; e++) { \
bool r = FN(n[H##ESIZE(e)], (TYPE)rm); \
/* Comparison sets 0/1 bits for each byte in the element */ \
beatpred |= r * emask; \
emask <<= ESIZE; \
} \
beatpred &= mask; \
env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
(beatpred & eci_mask); \
mve_advance_vpt(env); \
}
#define DO_VCMP_U(OP, FN) \
DO_VCMP(OP##b, 1, uint8_t, FN) \
DO_VCMP(OP##h, 2, uint16_t, FN) \
DO_VCMP(OP##w, 4, uint32_t, FN)
#define DO_VCMP_S(OP, FN) \
DO_VCMP(OP##b, 1, int8_t, FN) \
DO_VCMP(OP##h, 2, int16_t, FN) \
DO_VCMP(OP##w, 4, int32_t, FN) \
DO_VCMP_SCALAR(OP##_scalarb, 1, int8_t, FN) \
DO_VCMP_SCALAR(OP##_scalarh, 2, int16_t, FN) \
DO_VCMP_SCALAR(OP##_scalarw, 4, int32_t, FN)
#define DO_VCMP_U(OP, FN) \
DO_VCMP(OP##b, 1, uint8_t, FN) \
DO_VCMP(OP##h, 2, uint16_t, FN) \
DO_VCMP(OP##w, 4, uint32_t, FN) \
DO_VCMP_SCALAR(OP##_scalarb, 1, uint8_t, FN) \
DO_VCMP_SCALAR(OP##_scalarh, 2, uint16_t, FN) \
DO_VCMP_SCALAR(OP##_scalarw, 4, uint32_t, FN)
#define DO_EQ(N, M) ((N) == (M))
#define DO_NE(N, M) ((N) != (M))

View File

@ -44,6 +44,7 @@ typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
static inline long mve_qreg_offset(unsigned reg)
@ -1209,6 +1210,37 @@ static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
return true;
}
static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
MVEGenScalarCmpFn *fn)
{
TCGv_ptr qn;
TCGv_i32 rm;
if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
return false;
}
if (!mve_eci_check(s) || !vfp_access_check(s)) {
return true;
}
qn = mve_qreg_ptr(a->qn);
if (a->rm == 15) {
/* Encoding Rm=0b1111 means "constant zero" */
rm = tcg_constant_i32(0);
} else {
rm = load_reg(s, a->rm);
}
fn(cpu_env, qn, rm);
tcg_temp_free_ptr(qn);
tcg_temp_free_i32(rm);
if (a->mask) {
/* VPT */
gen_vpst(s, a->mask);
}
mve_update_eci(s);
return true;
}
#define DO_VCMP(INSN, FN) \
static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
{ \
@ -1219,6 +1251,17 @@ static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
NULL, \
}; \
return do_vcmp(s, a, fns[a->size]); \
} \
static bool trans_##INSN##_scalar(DisasContext *s, \
arg_vcmp_scalar *a) \
{ \
static MVEGenScalarCmpFn * const fns[] = { \
gen_helper_mve_##FN##_scalarb, \
gen_helper_mve_##FN##_scalarh, \
gen_helper_mve_##FN##_scalarw, \
NULL, \
}; \
return do_vcmp_scalar(s, a, fns[a->size]); \
}
DO_VCMP(VCMPEQ, vcmpeq)