tcg/tci: Change encoding to uint32_t units

This removes all of the problems with unaligned accesses
to the bytecode stream.

With an 8-bit opcode at the bottom, we have 24 bits remaining,
which are generally split into 6 4-bit slots.  This fits well
with the maximum length opcodes, e.g. INDEX_op_add2_i32, which
have 6 register operands.

We have, in previous patches, rearranged things such that there
are no operations with a label which have more than one other
operand.  Which leaves us with a 20-bit field in which to encode
a label, giving us a maximum TB size of 512k -- easily large.

Change the INDEX_op_tci_movi_{i32,i64} opcodes to tci_mov[il].
The former puts the immediate in the upper 20 bits of the insn,
like we do for the label displacement.  The later uses a label
to reference an entry in the constant pool.  Thus, in the worst
case we still have a single memory reference for any constant,
but now the constants are out-of-line of the bytecode and can
be shared between different moves saving space.

Change INDEX_op_call to use a label to reference a pair of
pointers in the constant pool.  This removes the only slightly
dodgy link with the layout of struct TCGHelperInfo.

The re-encode cannot be done in pieces.

Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-02-01 21:27:41 -10:00
parent 7e00a08000
commit 6508988918
5 changed files with 383 additions and 562 deletions

View File

@ -277,8 +277,8 @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
#ifdef TCG_TARGET_INTERPRETER
/* These opcodes are only for use between the tci generator and interpreter. */
DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
#endif
#undef TLADDR_ARGS

539
tcg/tci.c

File diff suppressed because it is too large Load Diff

View File

@ -23,10 +23,12 @@ This is what TCI (Tiny Code Interpreter) does.
Like each TCG host frontend, TCI implements the code generator in
tcg-target.c.inc, tcg-target.h. Both files are in directory tcg/tci.
The additional file tcg/tci.c adds the interpreter.
The additional file tcg/tci.c adds the interpreter and disassembler.
The bytecode consists of opcodes (same numeric values as those used by
TCG), command length and arguments of variable size and number.
The bytecode consists of opcodes (with only a few exceptions, with
the same same numeric values and semantics as used by TCG), and up
to six arguments packed into a 32-bit integer. See comments in tci.c
for details on the encoding.
3) Usage
@ -39,11 +41,6 @@ suggest using this option. Setting it automatically would need
additional code in configure which must be fixed when new native TCG
implementations are added.
System emulation should work on any 32 or 64 bit host.
User mode emulation might work. Maybe a new linker script (*.ld)
is needed. Byte order might be wrong (on big endian hosts)
and need fixes in configure.
For hosts with native TCG, the interpreter TCI can be enabled by
configure --enable-tcg-interpreter
@ -118,13 +115,6 @@ u1 = linux-user-test works
in the interpreter. These opcodes raise a runtime exception, so it is
possible to see where code must be added.
* The pseudo code is not optimized and still ugly. For hosts with special
alignment requirements, it needs some fixes (maybe aligned bytecode
would also improve speed for hosts which support byte alignment).
* A better disassembler for the pseudo code would be nice (a very primitive
disassembler is included in tcg-target.c.inc).
* It might be useful to have a runtime option which selects the native TCG
or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
is a configure option, so you need two compilations of QEMU.

View File

@ -22,20 +22,7 @@
* THE SOFTWARE.
*/
/* TODO list:
* - See TODO comments in code.
*/
/* Marker for missing code. */
#define TODO() \
do { \
fprintf(stderr, "TODO %s:%u: %s()\n", \
__FILE__, __LINE__, __func__); \
tcg_abort(); \
} while (0)
/* Bitfield n...m (in 32 bit value). */
#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
#include "../tcg-pool.c.inc"
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
{
@ -226,52 +213,16 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
/* tcg_out_reloc always uses the same type, addend. */
tcg_debug_assert(type == sizeof(tcg_target_long));
intptr_t diff = value - (intptr_t)(code_ptr + 1);
tcg_debug_assert(addend == 0);
tcg_debug_assert(value != 0);
if (TCG_TARGET_REG_BITS == 32) {
tcg_patch32(code_ptr, value);
} else {
tcg_patch64(code_ptr, value);
}
return true;
}
/* Write value (native size). */
static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
{
if (TCG_TARGET_REG_BITS == 32) {
tcg_out32(s, v);
} else {
tcg_out64(s, v);
}
}
/* Write opcode. */
static void tcg_out_op_t(TCGContext *s, TCGOpcode op)
{
tcg_out8(s, op);
tcg_out8(s, 0);
}
/* Write register. */
static void tcg_out_r(TCGContext *s, TCGArg t0)
{
tcg_debug_assert(t0 < TCG_TARGET_NB_REGS);
tcg_out8(s, t0);
}
/* Write label. */
static void tci_out_label(TCGContext *s, TCGLabel *label)
{
if (label->has_value) {
tcg_out_i(s, label->u.value);
tcg_debug_assert(label->u.value);
} else {
tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0);
s->code_ptr += sizeof(tcg_target_ulong);
tcg_debug_assert(type == 20);
if (diff == sextract32(diff, 0, type)) {
tcg_patch32(code_ptr, deposit32(*code_ptr, 32 - type, type, diff));
return true;
}
return false;
}
static void stack_bounds_check(TCGReg base, target_long offset)
@ -285,239 +236,236 @@ static void stack_bounds_check(TCGReg base, target_long offset)
static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tci_out_label(s, l0);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_out_reloc(s, s->code_ptr, 20, l0, 0);
insn = deposit32(insn, 0, 8, op);
tcg_out32(s, insn);
}
static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
intptr_t diff;
tcg_out_op_t(s, op);
tcg_out_i(s, (uintptr_t)p0);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
/* Special case for exit_tb: map null -> 0. */
if (p0 == NULL) {
diff = 0;
} else {
diff = p0 - (void *)(s->code_ptr + 1);
tcg_debug_assert(diff != 0);
if (diff != sextract32(diff, 0, 20)) {
tcg_raise_tb_overflow(s);
}
}
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 12, 20, diff);
tcg_out32(s, insn);
}
static void tcg_out_op_v(TCGContext *s, TCGOpcode op)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_out_op_t(s, op);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_out32(s, (uint8_t)op);
}
static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out32(s, i1);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_debug_assert(i1 == sextract32(i1, 0, 20));
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 20, i1);
tcg_out32(s, insn);
}
#if TCG_TARGET_REG_BITS == 64
static void tcg_out_op_rI(TCGContext *s, TCGOpcode op,
TCGReg r0, uint64_t i1)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out64(s, i1);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
}
#endif
static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tci_out_label(s, l1);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_out_reloc(s, s->code_ptr, 20, l1, 0);
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
tcg_out32(s, insn);
}
static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
tcg_out32(s, insn);
}
static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGArg m2)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out32(s, m2);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_debug_assert(m2 == extract32(m2, 0, 12));
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 20, 12, m2);
tcg_out32(s, insn);
}
static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
tcg_out32(s, insn);
}
static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, intptr_t i2)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_debug_assert(i2 == (int32_t)i2);
tcg_out32(s, i2);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_debug_assert(i2 == sextract32(i2, 0, 16));
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 16, i2);
tcg_out32(s, insn);
}
static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
tcg_out8(s, c3);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 4, c3);
tcg_out32(s, insn);
}
static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
tcg_out32(s, m3);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_debug_assert(m3 == extract32(m3, 0, 12));
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 12, m3);
tcg_out32(s, insn);
}
static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
tcg_out8(s, b3);
tcg_out8(s, b4);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
tcg_debug_assert(b3 == extract32(b3, 0, 6));
tcg_debug_assert(b4 == extract32(b4, 0, 6));
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 6, b3);
insn = deposit32(insn, 26, 6, b4);
tcg_out32(s, insn);
}
static void tcg_out_op_rrrrm(TCGContext *s, TCGOpcode op, TCGReg r0,
TCGReg r1, TCGReg r2, TCGReg r3, TCGArg m4)
static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
tcg_out_r(s, r3);
tcg_out32(s, m4);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 4, r3);
insn = deposit32(insn, 24, 4, r4);
tcg_out32(s, insn);
}
#if TCG_TARGET_REG_BITS == 32
static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
tcg_out_r(s, r3);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 4, r3);
tcg_out32(s, insn);
}
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg r3, TCGReg r4, TCGCond c5)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
tcg_out_r(s, r3);
tcg_out_r(s, r4);
tcg_out8(s, c5);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 4, r3);
insn = deposit32(insn, 24, 4, r4);
insn = deposit32(insn, 28, 4, c5);
tcg_out32(s, insn);
}
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg r3, TCGReg r4, TCGReg r5)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
tcg_out_op_t(s, op);
tcg_out_r(s, r0);
tcg_out_r(s, r1);
tcg_out_r(s, r2);
tcg_out_r(s, r3);
tcg_out_r(s, r4);
tcg_out_r(s, r5);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 4, r3);
insn = deposit32(insn, 24, 4, r4);
insn = deposit32(insn, 28, 4, r5);
tcg_out32(s, insn);
}
#endif
static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
TCGReg base, intptr_t offset)
{
stack_bounds_check(base, offset);
if (offset != sextract32(offset, 0, 16)) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32
? INDEX_op_add_i32 : INDEX_op_add_i64),
TCG_REG_TMP, TCG_REG_TMP, base);
base = TCG_REG_TMP;
offset = 0;
}
tcg_out_op_rrs(s, op, val, base, offset);
}
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base,
intptr_t offset)
{
stack_bounds_check(base, offset);
switch (type) {
case TCG_TYPE_I32:
tcg_out_op_rrs(s, INDEX_op_ld_i32, val, base, offset);
tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset);
break;
#if TCG_TARGET_REG_BITS == 64
case TCG_TYPE_I64:
tcg_out_op_rrs(s, INDEX_op_ld_i64, val, base, offset);
tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset);
break;
#endif
default:
@ -547,22 +495,32 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
{
switch (type) {
case TCG_TYPE_I32:
tcg_out_op_ri(s, INDEX_op_tci_movi_i32, ret, arg);
break;
#if TCG_TARGET_REG_BITS == 64
arg = (int32_t)arg;
/* fall through */
case TCG_TYPE_I64:
tcg_out_op_rI(s, INDEX_op_tci_movi_i64, ret, arg);
break;
#endif
break;
default:
g_assert_not_reached();
}
if (arg == sextract32(arg, 0, 20)) {
tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg);
} else {
tcg_insn_unit insn = 0;
new_pool_label(s, arg, 20, s->code_ptr, 0);
insn = deposit32(insn, 0, 8, INDEX_op_tci_movl);
insn = deposit32(insn, 8, 4, ret);
tcg_out32(s, insn);
}
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
ffi_cif *cif)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_insn_unit insn = 0;
uint8_t which;
if (cif->rtype == &ffi_type_void) {
@ -573,12 +531,10 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
tcg_debug_assert(cif->rtype->size == 8);
which = 2;
}
tcg_out_op_t(s, INDEX_op_call);
tcg_out8(s, which);
tcg_out_i(s, (uintptr_t)func);
tcg_out_i(s, (uintptr_t)cif);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif);
insn = deposit32(insn, 0, 8, INDEX_op_call);
insn = deposit32(insn, 8, 4, which);
tcg_out32(s, insn);
}
#if TCG_TARGET_REG_BITS == 64
@ -637,8 +593,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st_i32:
CASE_64(st32)
CASE_64(st)
stack_bounds_check(args[1], args[2]);
tcg_out_op_rrs(s, opc, args[0], args[1], args[2]);
tcg_out_ldst(s, opc, args[0], args[1], args[2]);
break;
CASE_32_64(add)
@ -731,8 +686,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
} else {
tcg_out_op_rrrrm(s, opc, args[0], args[1],
args[2], args[3], args[4]);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
tcg_out_op_rrrrr(s, opc, args[0], args[1],
args[2], args[3], TCG_REG_TMP);
}
break;
@ -778,6 +734,11 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
return ct & TCG_CT_CONST;
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
memset(p, 0, sizeof(*p) * count);
}
static void tcg_target_init(TCGContext *s)
{
#if defined(CONFIG_DEBUG_TCG_INTERPRETER)

View File

@ -41,7 +41,7 @@
#define TCG_TARGET_H
#define TCG_TARGET_INTERPRETER 1
#define TCG_TARGET_INSN_UNIT_SIZE 1
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
@ -166,6 +166,7 @@ typedef enum {
#define TCG_TARGET_STACK_ALIGN 8
#define HAVE_TCG_QEMU_TB_EXEC
#define TCG_TARGET_NEED_POOL_LABELS
/* We could notice __i386__ or __s390x__ and reduce the barriers depending
on the host. But if you want performance, you use the normal backend.