tcg/i386: Clear dest first in tcg_out_setcond if possible

Using XOR first is both smaller and more efficient,
though cannot be applied if it clobbers an input.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-08-05 15:51:30 -07:00
parent 6950f68b62
commit 96658acafd

View File

@ -1532,6 +1532,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
int const_arg2) int const_arg2)
{ {
bool inv = false; bool inv = false;
bool cleared;
switch (cond) { switch (cond) {
case TCG_COND_NE: case TCG_COND_NE:
@ -1581,9 +1582,23 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
break; break;
} }
/*
* If dest does not overlap the inputs, clearing it first is preferred.
* The XOR breaks any false dependency for the low-byte write to dest,
* and is also one byte smaller than MOVZBL.
*/
cleared = false;
if (dest != arg1 && (const_arg2 || dest != arg2)) {
tgen_arithr(s, ARITH_XOR, dest, dest);
cleared = true;
}
tcg_out_cmp(s, arg1, arg2, const_arg2, rexw); tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
if (!cleared) {
tcg_out_ext8u(s, dest, dest); tcg_out_ext8u(s, dest, dest);
}
} }
#if TCG_TARGET_REG_BITS == 32 #if TCG_TARGET_REG_BITS == 32