target/arm: Implement MVE LETP insn
Implement the MVE LETP insn. This is like the existing LE loop-end insn, but it must perform an FPU-enabled check, and on loop-exit it resets LTPSIZE to 4. To accommodate the requirement to do something on loop-exit, we drop the use of condlabel and instead manage both the TB exits manually, in the same way we already do in trans_WLS(). The other MVE-specific change to the LE insn is that we must raise an INVSTATE UsageFault insn if LTPSIZE is not 4. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210614151007.4545-10-peter.maydell@linaro.org
This commit is contained in:
parent
40a36f003c
commit
a454ea1e6d
@ -674,7 +674,7 @@ BL 1111 0. .......... 11.1 ............ @branch24
|
||||
DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4
|
||||
WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
|
||||
{
|
||||
LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
|
||||
LE 1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm
|
||||
# This is WLSTP
|
||||
WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
|
||||
}
|
||||
|
@ -8223,25 +8223,113 @@ static bool trans_LE(DisasContext *s, arg_LE *a)
|
||||
* any faster.
|
||||
*/
|
||||
TCGv_i32 tmp;
|
||||
TCGLabel *loopend;
|
||||
bool fpu_active;
|
||||
|
||||
if (!dc_isar_feature(aa32_lob, s)) {
|
||||
return false;
|
||||
}
|
||||
if (a->f && a->tp) {
|
||||
return false;
|
||||
}
|
||||
if (s->condexec_mask) {
|
||||
/*
|
||||
* LE in an IT block is CONSTRAINED UNPREDICTABLE;
|
||||
* we choose to UNDEF, because otherwise our use of
|
||||
* gen_goto_tb(1) would clash with the use of TB exit 1
|
||||
* in the dc->condjmp condition-failed codepath in
|
||||
* arm_tr_tb_stop() and we'd get an assertion.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
if (a->tp) {
|
||||
/* LETP */
|
||||
if (!dc_isar_feature(aa32_mve, s)) {
|
||||
return false;
|
||||
}
|
||||
if (!vfp_access_check(s)) {
|
||||
s->eci_handled = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* LE/LETP is OK with ECI set and leaves it untouched */
|
||||
s->eci_handled = true;
|
||||
|
||||
if (!a->f) {
|
||||
/* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
|
||||
arm_gen_condlabel(s);
|
||||
tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel);
|
||||
/* Decrement LR */
|
||||
tmp = load_reg(s, 14);
|
||||
tcg_gen_addi_i32(tmp, tmp, -1);
|
||||
store_reg(s, 14, tmp);
|
||||
/*
|
||||
* With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
|
||||
* UsageFault exception for the LE insn in that case. Note that we
|
||||
* are not directly checking FPSCR.LTPSIZE but instead check the
|
||||
* pseudocode LTPSIZE() function, which returns 4 if the FPU is
|
||||
* not currently active (ie ActiveFPState() returns false). We
|
||||
* can identify not-active purely from our TB state flags, as the
|
||||
* FPU is active only if:
|
||||
* the FPU is enabled
|
||||
* AND lazy state preservation is not active
|
||||
* AND we do not need a new fp context (this is the ASPEN/FPCA check)
|
||||
*
|
||||
* Usually we don't need to care about this distinction between
|
||||
* LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
|
||||
* will either take an exception or clear the conditions that make
|
||||
* the FPU not active. But LE is an unusual case of a non-FP insn
|
||||
* that looks at LTPSIZE.
|
||||
*/
|
||||
fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
|
||||
|
||||
if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
|
||||
/* Need to do a runtime check for LTPSIZE != 4 */
|
||||
TCGLabel *skipexc = gen_new_label();
|
||||
tmp = load_cpu_field(v7m.ltpsize);
|
||||
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
|
||||
tcg_temp_free_i32(tmp);
|
||||
gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
|
||||
default_exception_el(s));
|
||||
gen_set_label(skipexc);
|
||||
}
|
||||
|
||||
if (a->f) {
|
||||
/* Loop-forever: just jump back to the loop start */
|
||||
gen_jmp(s, read_pc(s) - a->imm);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Not loop-forever. If LR <= loop-decrement-value this is the last loop.
|
||||
* For LE, we know at this point that LTPSIZE must be 4 and the
|
||||
* loop decrement value is 1. For LETP we need to calculate the decrement
|
||||
* value from LTPSIZE.
|
||||
*/
|
||||
loopend = gen_new_label();
|
||||
if (!a->tp) {
|
||||
tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
|
||||
tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
|
||||
} else {
|
||||
/*
|
||||
* Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
|
||||
* so that decr stays live after the brcondi.
|
||||
*/
|
||||
TCGv_i32 decr = tcg_temp_local_new_i32();
|
||||
TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
|
||||
tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
|
||||
tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
|
||||
tcg_temp_free_i32(ltpsize);
|
||||
|
||||
tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
|
||||
|
||||
tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
|
||||
tcg_temp_free_i32(decr);
|
||||
}
|
||||
/* Jump back to the loop start */
|
||||
gen_jmp(s, read_pc(s) - a->imm);
|
||||
|
||||
gen_set_label(loopend);
|
||||
if (a->tp) {
|
||||
/* Exits from tail-pred loops must reset LTPSIZE to 4 */
|
||||
tmp = tcg_const_i32(4);
|
||||
store_cpu_field(tmp, v7m.ltpsize);
|
||||
}
|
||||
/* End TB, continuing to following insn */
|
||||
gen_jmp_tb(s, s->base.pc_next, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user