tcg: Introduce INDEX_op_qemu_st8_i32

Enable this on i386 to restrict the set of input registers
for an 8-bit store, as required by the architecture.  This
removes the last use of scratch registers for user-only mode.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2020-12-09 13:58:39 -06:00
parent d2ef1b83a7
commit 07ce0b0530
15 changed files with 49 additions and 12 deletions

View File

@ -211,6 +211,11 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1, DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
/* Only used by i386 to cope with stupid register constraints. */
DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
/* Host vector support. */ /* Host vector support. */
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec) #define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)

View File

@ -502,6 +502,7 @@ goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
* qemu_ld_i32/i64 t0, t1, flags, memidx * qemu_ld_i32/i64 t0, t1, flags, memidx
* qemu_st_i32/i64 t0, t1, flags, memidx * qemu_st_i32/i64 t0, t1, flags, memidx
* qemu_st8_i32 t0, t1, flags, memidx
Load data at the guest address t1 into t0, or store data in t0 at guest Load data at the guest address t1 into t0, or store data in t0 at guest
address t1. The _i32/_i64 size applies to the size of the input/output address t1. The _i32/_i64 size applies to the size of the input/output
@ -518,6 +519,10 @@ of the memory access.
For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
64-bit memory access specified in flags. 64-bit memory access specified in flags.
For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
the memory operation is known to be 8-bit. This allows the backend to
provide a different set of register constraints.
********* Host vector operations ********* Host vector operations
All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE. All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE.

View File

@ -88,6 +88,7 @@ typedef enum {
#define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0
#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0
#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_rem_i64 1

View File

@ -126,6 +126,7 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_rem_i32 0
#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_direct_jump 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
enum { enum {
TCG_AREG0 = TCG_REG_R6, TCG_AREG0 = TCG_REG_R6,

View File

@ -245,11 +245,21 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
ct->regs |= ALL_VECTOR_REGS; ct->regs |= ALL_VECTOR_REGS;
break; break;
/* qemu_ld/st address constraint */
case 'L': case 'L':
/* qemu_ld/st data+address constraint */
ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff; ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
#ifdef CONFIG_SOFTMMU
tcg_regset_reset_reg(ct->regs, TCG_REG_L0); tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
tcg_regset_reset_reg(ct->regs, TCG_REG_L1); tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
#endif
break;
case 's':
/* qemu_st8_i32 data constraint */
ct->regs = 0xf;
#ifdef CONFIG_SOFTMMU
tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
#endif
break; break;
case 'e': case 'e':
@ -2120,7 +2130,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs, TCGReg base, int index, intptr_t ofs,
int seg, MemOp memop) int seg, MemOp memop)
{ {
const TCGReg scratch = TCG_REG_L0;
bool use_movbe = false; bool use_movbe = false;
int movop = OPC_MOVL_EvGv; int movop = OPC_MOVL_EvGv;
@ -2136,15 +2145,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
switch (memop & MO_SIZE) { switch (memop & MO_SIZE) {
case MO_8: case MO_8:
/* /* This is handled with constraints on INDEX_op_qemu_st8_i32. */
* In 32-bit mode, 8-bit stores can only happen from [abcd]x. tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4);
* TODO: Adjust constraints such that this is is forced,
* then we won't need a scratch at all for user-only.
*/
if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
datalo = scratch;
}
tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
datalo, base, index, 0, ofs); datalo, base, index, 0, ofs);
break; break;
@ -2491,6 +2493,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_qemu_ld(s, args, 1); tcg_out_qemu_ld(s, args, 1);
break; break;
case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st8_i32:
tcg_out_qemu_st(s, args, 0); tcg_out_qemu_st(s, args, 0);
break; break;
case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i64:
@ -2949,9 +2952,11 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } }; static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } }; static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } }; static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
static const TCGTargetOpDef s_L = { .args_ct_str = { "s", "L" } };
static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } }; static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } }; static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } }; static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
static const TCGTargetOpDef s_L_L = { .args_ct_str = { "s", "L", "L" } };
static const TCGTargetOpDef r_r_L_L static const TCGTargetOpDef r_r_L_L
= { .args_ct_str = { "r", "r", "L", "L" } }; = { .args_ct_str = { "r", "r", "L", "L" } };
static const TCGTargetOpDef L_L_L_L static const TCGTargetOpDef L_L_L_L
@ -3145,6 +3150,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L; return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i32:
return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L; return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
case INDEX_op_qemu_st8_i32:
return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &s_L : &s_L_L;
case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_ld_i64:
return (TCG_TARGET_REG_BITS == 64 ? &r_L return (TCG_TARGET_REG_BITS == 64 ? &r_L
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L

View File

@ -172,6 +172,9 @@ extern bool have_movbe;
#define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
#else
#define TCG_TARGET_HAS_qemu_st8_i32 1
#endif #endif
/* We do not support older SSE systems, only beginning with AVX1. */ /* We do not support older SSE systems, only beginning with AVX1. */

View File

@ -169,6 +169,7 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0
#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions #define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions

View File

@ -1541,6 +1541,7 @@ void tcg_optimize(TCGContext *s)
case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i64:
case INDEX_op_call: case INDEX_op_call:
/* Opcodes that touch guest memory stop the optimization. */ /* Opcodes that touch guest memory stop the optimization. */

View File

@ -108,6 +108,7 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1
#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_direct_jump 1
#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_add2_i32 0

View File

@ -119,6 +119,7 @@ typedef enum {
#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_direct_jump 0
#define TCG_TARGET_HAS_brcond2 1 #define TCG_TARGET_HAS_brcond2 1
#define TCG_TARGET_HAS_setcond2 1 #define TCG_TARGET_HAS_setcond2 1
#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_movcond_i64 0

View File

@ -97,6 +97,7 @@ extern uint64_t s390_facilities;
#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0
#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump (s390_facilities & FACILITY_GEN_INST_EXT) #define TCG_TARGET_HAS_direct_jump (s390_facilities & FACILITY_GEN_INST_EXT)
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_rot_i64 1

View File

@ -126,6 +126,7 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_direct_jump 1
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_extrl_i64_i32 1 #define TCG_TARGET_HAS_extrl_i64_i32 1
#define TCG_TARGET_HAS_extrh_i64_i32 1 #define TCG_TARGET_HAS_extrh_i64_i32 1

View File

@ -2883,7 +2883,11 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
} }
addr = plugin_prep_mem_callbacks(addr); addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
} else {
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
}
plugin_gen_mem_callbacks(addr, info); plugin_gen_mem_callbacks(addr, info);
if (swap) { if (swap) {

View File

@ -1427,6 +1427,9 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i64:
return true; return true;
case INDEX_op_qemu_st8_i32:
return TCG_TARGET_HAS_qemu_st8_i32;
case INDEX_op_goto_ptr: case INDEX_op_goto_ptr:
return TCG_TARGET_HAS_goto_ptr; return TCG_TARGET_HAS_goto_ptr;
@ -2087,6 +2090,7 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
break; break;
case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i64:
{ {

View File

@ -88,6 +88,7 @@
#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_goto_ptr 0 #define TCG_TARGET_HAS_goto_ptr 0
#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_direct_jump 1
#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0