target/arm: Implement PSEL

This is an SVE instruction that operates using the SVE vector
length but that it is present only if SME is implemented.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20220708151540.18136-29-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2022-07-08 20:45:23 +05:30 committed by Peter Maydell
parent 23a5e3859f
commit 598ab0b24c
2 changed files with 77 additions and 0 deletions

View File

@ -1674,3 +1674,23 @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
### SVE2 floating-point bfloat16 dot-product (indexed)
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
### SVE broadcast predicate element
&psel esz pd pn pm rv imm
%psel_rv 16:2 !function=plus_12
%psel_imm_b 22:2 19:2
%psel_imm_h 22:2 20:1
%psel_imm_s 22:2
%psel_imm_d 23:1
@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
&psel rv=%psel_rv
PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
@psel esz=0 imm=%psel_imm_b
PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
@psel esz=1 imm=%psel_imm_h
PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
@psel esz=2 imm=%psel_imm_s
PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
@psel esz=3 imm=%psel_imm_d

View File

@ -7419,3 +7419,60 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
static bool trans_PSEL(DisasContext *s, arg_psel *a)
{
int vl = vec_full_reg_size(s);
int pl = pred_gvec_reg_size(s);
int elements = vl >> a->esz;
TCGv_i64 tmp, didx, dbit;
TCGv_ptr ptr;
if (!dc_isar_feature(aa64_sme, s)) {
return false;
}
if (!sve_access_check(s)) {
return true;
}
tmp = tcg_temp_new_i64();
dbit = tcg_temp_new_i64();
didx = tcg_temp_new_i64();
ptr = tcg_temp_new_ptr();
/* Compute the predicate element. */
tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
if (is_power_of_2(elements)) {
tcg_gen_andi_i64(tmp, tmp, elements - 1);
} else {
tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
}
/* Extract the predicate byte and bit indices. */
tcg_gen_shli_i64(tmp, tmp, a->esz);
tcg_gen_andi_i64(dbit, tmp, 7);
tcg_gen_shri_i64(didx, tmp, 3);
if (HOST_BIG_ENDIAN) {
tcg_gen_xori_i64(didx, didx, 7);
}
/* Load the predicate word. */
tcg_gen_trunc_i64_ptr(ptr, didx);
tcg_gen_add_ptr(ptr, ptr, cpu_env);
tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
/* Extract the predicate bit and replicate to MO_64. */
tcg_gen_shr_i64(tmp, tmp, dbit);
tcg_gen_andi_i64(tmp, tmp, 1);
tcg_gen_neg_i64(tmp, tmp);
/* Apply to either copy the source, or write zeros. */
tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
pred_full_reg_offset(s, a->pn), tmp, pl, pl);
tcg_temp_free_i64(tmp);
tcg_temp_free_i64(dbit);
tcg_temp_free_i64(didx);
tcg_temp_free_ptr(ptr);
return true;
}