target/arm: Implement MVE LETP insn

Implement the MVE LETP insn.  This is like the existing LE loop-end
insn, but it must perform an FPU-enabled check, and on loop-exit it
resets LTPSIZE to 4.

To accommodate the requirement to do something on loop-exit, we drop
the use of condlabel and instead manage both the TB exits manually,
in the same way we already do in trans_WLS().

The other MVE-specific change to the LE insn is that we must raise an
INVSTATE UsageFault insn if LTPSIZE is not 4.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210614151007.4545-10-peter.maydell@linaro.org
This commit is contained in:
Peter Maydell 2021-06-14 16:09:19 +01:00
parent 40a36f003c
commit a454ea1e6d
2 changed files with 97 additions and 9 deletions

View File

@ -674,7 +674,7 @@ BL 1111 0. .......... 11.1 ............ @branch24
DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4 DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4
WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4 WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
{ {
LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm LE 1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm
# This is WLSTP # This is WLSTP
WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
} }

View File

@ -8223,25 +8223,113 @@ static bool trans_LE(DisasContext *s, arg_LE *a)
* any faster. * any faster.
*/ */
TCGv_i32 tmp; TCGv_i32 tmp;
TCGLabel *loopend;
bool fpu_active;
if (!dc_isar_feature(aa32_lob, s)) { if (!dc_isar_feature(aa32_lob, s)) {
return false; return false;
} }
if (a->f && a->tp) {
return false;
}
if (s->condexec_mask) {
/*
* LE in an IT block is CONSTRAINED UNPREDICTABLE;
* we choose to UNDEF, because otherwise our use of
* gen_goto_tb(1) would clash with the use of TB exit 1
* in the dc->condjmp condition-failed codepath in
* arm_tr_tb_stop() and we'd get an assertion.
*/
return false;
}
if (a->tp) {
/* LETP */
if (!dc_isar_feature(aa32_mve, s)) {
return false;
}
if (!vfp_access_check(s)) {
s->eci_handled = true;
return true;
}
}
/* LE/LETP is OK with ECI set and leaves it untouched */ /* LE/LETP is OK with ECI set and leaves it untouched */
s->eci_handled = true; s->eci_handled = true;
if (!a->f) { /*
/* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */ * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
arm_gen_condlabel(s); * UsageFault exception for the LE insn in that case. Note that we
tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel); * are not directly checking FPSCR.LTPSIZE but instead check the
/* Decrement LR */ * pseudocode LTPSIZE() function, which returns 4 if the FPU is
tmp = load_reg(s, 14); * not currently active (ie ActiveFPState() returns false). We
tcg_gen_addi_i32(tmp, tmp, -1); * can identify not-active purely from our TB state flags, as the
store_reg(s, 14, tmp); * FPU is active only if:
* the FPU is enabled
* AND lazy state preservation is not active
* AND we do not need a new fp context (this is the ASPEN/FPCA check)
*
* Usually we don't need to care about this distinction between
* LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
* will either take an exception or clear the conditions that make
* the FPU not active. But LE is an unusual case of a non-FP insn
* that looks at LTPSIZE.
*/
fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
/* Need to do a runtime check for LTPSIZE != 4 */
TCGLabel *skipexc = gen_new_label();
tmp = load_cpu_field(v7m.ltpsize);
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
tcg_temp_free_i32(tmp);
gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
default_exception_el(s));
gen_set_label(skipexc);
}
if (a->f) {
/* Loop-forever: just jump back to the loop start */
gen_jmp(s, read_pc(s) - a->imm);
return true;
}
/*
* Not loop-forever. If LR <= loop-decrement-value this is the last loop.
* For LE, we know at this point that LTPSIZE must be 4 and the
* loop decrement value is 1. For LETP we need to calculate the decrement
* value from LTPSIZE.
*/
loopend = gen_new_label();
if (!a->tp) {
tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
} else {
/*
* Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
* so that decr stays live after the brcondi.
*/
TCGv_i32 decr = tcg_temp_local_new_i32();
TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
tcg_temp_free_i32(ltpsize);
tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
tcg_temp_free_i32(decr);
} }
/* Jump back to the loop start */ /* Jump back to the loop start */
gen_jmp(s, read_pc(s) - a->imm); gen_jmp(s, read_pc(s) - a->imm);
gen_set_label(loopend);
if (a->tp) {
/* Exits from tail-pred loops must reset LTPSIZE to 4 */
tmp = tcg_const_i32(4);
store_cpu_field(tmp, v7m.ltpsize);
}
/* End TB, continuing to following insn */
gen_jmp_tb(s, s->base.pc_next, 1);
return true; return true;
} }