diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index 2d561b32e1..d1241b5692 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -40,14 +40,6 @@ /* Bitfield n...m (in 32 bit value). */ #define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) -/* Used for function call generation. */ -#define TCG_REG_CALL_STACK TCG_REG_R4 -#define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_CALL_STACK_OFFSET 0 - -/* TODO: documentation. */ -static uint8_t *tb_ret_addr; - /* Macros used in tcg_target_op_defs. */ #define R "r" #define RI "ri" @@ -513,7 +505,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, tcg_out_op_t(s, INDEX_op_ld_i64); tcg_out_r(s, ret); tcg_out_r(s, arg1); - assert(arg2 == (uint32_t)arg2); + assert(arg2 == (int32_t)arg2); tcg_out32(s, arg2); #else TODO(); @@ -636,7 +628,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_st_i64: tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); - assert(args[2] == (uint32_t)args[2]); + assert(args[2] == (int32_t)args[2]); tcg_out32(s, args[2]); break; case INDEX_op_add_i32: @@ -904,15 +896,19 @@ static void tcg_target_init(TCGContext *s) /* TODO: Which registers should be set here? */ tcg_regset_set32(tcg_target_call_clobber_regs, 0, BIT(TCG_TARGET_NB_REGS) - 1); + tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); tcg_add_target_add_op_defs(tcg_target_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), + + /* We use negative offsets from "sp" so that we can distinguish + stores that might pretend to be call arguments. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, + -CPU_TEMP_BUF_NLONGS * sizeof(long), CPU_TEMP_BUF_NLONGS * sizeof(long)); } /* Generate global QEMU prologue and epilogue code. */ -static void tcg_target_qemu_prologue(TCGContext *s) +static inline void tcg_target_qemu_prologue(TCGContext *s) { - tb_ret_addr = s->code_ptr; } diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 1f17576f54..0395bbb8e4 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -127,7 +127,6 @@ typedef enum { TCG_REG_R5, TCG_REG_R6, TCG_REG_R7, - TCG_AREG0 = TCG_REG_R7, #if TCG_TARGET_NB_REGS >= 16 TCG_REG_R8, TCG_REG_R9, @@ -160,6 +159,13 @@ typedef enum { TCG_CONST = UINT8_MAX } TCGReg; +#define TCG_AREG0 (TCG_TARGET_NB_REGS - 2) + +/* Used for function call generation. */ +#define TCG_REG_CALL_STACK (TCG_TARGET_NB_REGS - 1) +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_STACK_ALIGN 16 + void tci_disas(uint8_t opc); tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); diff --git a/tci.c b/tci.c index 2b2c11f259..c742c8df5c 100644 --- a/tci.c +++ b/tci.c @@ -51,11 +51,6 @@ typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, tcg_target_ulong); #endif -/* TCI can optionally use a global register variable for env. */ -#if !defined(AREG0) -CPUArchState *env; -#endif - /* Targets which don't use GETPC also don't need tci_tb_ptr which makes them a little faster. */ #if defined(GETPC) @@ -117,6 +112,7 @@ static void tci_write_reg(TCGReg index, tcg_target_ulong value) { assert(index < ARRAY_SIZE(tci_reg)); assert(index != TCG_AREG0); + assert(index != TCG_REG_CALL_STACK); tci_reg[index] = value; } @@ -182,7 +178,7 @@ static tcg_target_ulong tci_read_i(uint8_t **tb_ptr) return value; } -/* Read constant (32 bit) from bytecode. */ +/* Read unsigned constant (32 bit) from bytecode. */ static uint32_t tci_read_i32(uint8_t **tb_ptr) { uint32_t value = *(uint32_t *)(*tb_ptr); @@ -190,6 +186,14 @@ static uint32_t tci_read_i32(uint8_t **tb_ptr) return value; } +/* Read signed constant (32 bit) from bytecode. */ +static int32_t tci_read_s32(uint8_t **tb_ptr) +{ + int32_t value = *(int32_t *)(*tb_ptr); + *tb_ptr += sizeof(value); + return value; +} + #if TCG_TARGET_REG_BITS == 64 /* Read constant (64 bit) from bytecode. */ static uint64_t tci_read_i64(uint8_t **tb_ptr) @@ -430,18 +434,17 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) } /* Interpret pseudo code in tb. */ -tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr) +tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) { + long tcg_temps[CPU_TEMP_BUF_NLONGS]; + uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS); tcg_target_ulong next_tb = 0; - env = cpustate; tci_reg[TCG_AREG0] = (tcg_target_ulong)env; + tci_reg[TCG_REG_CALL_STACK] = sp_value; assert(tb_ptr); for (;;) { -#if defined(GETPC) - tci_tb_ptr = (uintptr_t)tb_ptr; -#endif TCGOpcode opc = tb_ptr[0]; #if !defined(NDEBUG) uint8_t op_size = tb_ptr[1]; @@ -464,6 +467,10 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr) uint64_t v64; #endif +#if defined(GETPC) + tci_tb_ptr = (uintptr_t)tb_ptr; +#endif + /* Skip opcode and size entry. */ tb_ptr += 2; @@ -550,7 +557,7 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr) case INDEX_op_ld8u_i32: t0 = *tb_ptr++; t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); tci_write_reg8(t0, *(uint8_t *)(t1 + t2)); break; case INDEX_op_ld8s_i32: @@ -563,25 +570,26 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr) case INDEX_op_ld_i32: t0 = *tb_ptr++; t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); tci_write_reg32(t0, *(uint32_t *)(t1 + t2)); break; case INDEX_op_st8_i32: t0 = tci_read_r8(&tb_ptr); t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); *(uint8_t *)(t1 + t2) = t0; break; case INDEX_op_st16_i32: t0 = tci_read_r16(&tb_ptr); t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); *(uint16_t *)(t1 + t2) = t0; break; case INDEX_op_st_i32: t0 = tci_read_r32(&tb_ptr); t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); + assert(t1 != sp_value || (int32_t)t2 < 0); *(uint32_t *)(t1 + t2) = t0; break; @@ -818,7 +826,7 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr) case INDEX_op_ld8u_i64: t0 = *tb_ptr++; t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); tci_write_reg8(t0, *(uint8_t *)(t1 + t2)); break; case INDEX_op_ld8s_i64: @@ -829,43 +837,44 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr) case INDEX_op_ld32u_i64: t0 = *tb_ptr++; t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); tci_write_reg32(t0, *(uint32_t *)(t1 + t2)); break; case INDEX_op_ld32s_i64: t0 = *tb_ptr++; t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); tci_write_reg32s(t0, *(int32_t *)(t1 + t2)); break; case INDEX_op_ld_i64: t0 = *tb_ptr++; t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); tci_write_reg64(t0, *(uint64_t *)(t1 + t2)); break; case INDEX_op_st8_i64: t0 = tci_read_r8(&tb_ptr); t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); *(uint8_t *)(t1 + t2) = t0; break; case INDEX_op_st16_i64: t0 = tci_read_r16(&tb_ptr); t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); *(uint16_t *)(t1 + t2) = t0; break; case INDEX_op_st32_i64: t0 = tci_read_r32(&tb_ptr); t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); *(uint32_t *)(t1 + t2) = t0; break; case INDEX_op_st_i64: t0 = tci_read_r64(&tb_ptr); t1 = tci_read_r(&tb_ptr); - t2 = tci_read_i32(&tb_ptr); + t2 = tci_read_s32(&tb_ptr); + assert(t1 != sp_value || (int32_t)t2 < 0); *(uint64_t *)(t1 + t2) = t0; break;