indirect register lowering

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJXpMAtAAoJEK0ScMxN0Cebh60H/17kh95KYPERqTVI6eu9sMep
 Nn0tKXl1j4jGr5/w+1UIoY3zVCEI+17GeXQY6+XMtAvHrbFsvlbM7QgUST82l3ww
 dm9cMRMYgqAdsuUZHNAHkTxXtwgKgQkw06nJuYLDCpY1Skjw/vNt3pKqy4GDD7OJ
 FTHhq360hvE/mf7aFQV4477Cg8QdzvNTqoJgCC1waDN1N5BBNraq+wIjtyJZ299R
 6jAxjPBeGEIyv4/g4CdxrNPDdsBahnewO4wynQTbH52Whui1sRic2eSNzdKDK0hy
 aDVN2TDG1YnfhKCKAF73Gvpyb2eHcXDSdYQgFaVjaZtJpBXH845CRKHpo2kFrVw=
 =+piJ
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160805' into staging

indirect register lowering

# gpg: Signature made Fri 05 Aug 2016 17:34:53 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20160805:
  tcg: Lower indirect registers in a separate pass
  tcg: Require liveness analysis
  tcg: Include liveness info in the dumps
  tcg: Compress dead_temps and mem_temps into a single array
  tcg: Fold life data into TCGOp
  tcg: Reorg TCGOp chaining
  tcg: Compress liveness data to 16 bits

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2016-08-08 10:39:18 +01:00
commit cf5198d580
7 changed files with 442 additions and 268 deletions

View File

@ -59,7 +59,7 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns)
}
/* Terminate the linked list. */
tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0;
}
static inline void gen_io_start(void)

View File

@ -42,6 +42,7 @@ static inline bool qemu_log_separate(void)
#define CPU_LOG_TB_NOCHAIN (1 << 13)
#define CPU_LOG_PAGE (1 << 14)
#define LOG_TRACE (1 << 15)
#define CPU_LOG_TB_OP_IND (1 << 16)
/* Returns true if a bit is set in the current loglevel mask
*/
@ -54,7 +55,7 @@ static inline bool qemu_loglevel_mask(int mask)
/* main logging function
*/
void GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
/* vfprintf-like logging function
*/

View File

@ -82,37 +82,6 @@ static void init_temp_info(TCGArg temp)
}
}
static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
TCGOpcode opc, int nargs)
{
int oi = s->gen_next_op_idx;
int pi = s->gen_next_parm_idx;
int prev = old_op->prev;
int next = old_op - s->gen_op_buf;
TCGOp *new_op;
tcg_debug_assert(oi < OPC_BUF_SIZE);
tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
s->gen_next_op_idx = oi + 1;
s->gen_next_parm_idx = pi + nargs;
new_op = &s->gen_op_buf[oi];
*new_op = (TCGOp){
.opc = opc,
.args = pi,
.prev = prev,
.next = next
};
if (prev >= 0) {
s->gen_op_buf[prev].next = oi;
} else {
s->gen_first_op_idx = oi;
}
old_op->prev = oi;
return new_op;
}
static int op_bits(TCGOpcode op)
{
const TCGOpDef *def = &tcg_op_defs[op];
@ -583,7 +552,7 @@ void tcg_optimize(TCGContext *s)
nb_globals = s->nb_globals;
reset_all_temps(nb_temps);
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
tcg_target_ulong mask, partmask, affected;
int nb_oargs, nb_iargs, i;
TCGArg tmp;
@ -1120,7 +1089,7 @@ void tcg_optimize(TCGContext *s)
uint64_t a = ((uint64_t)ah << 32) | al;
uint64_t b = ((uint64_t)bh << 32) | bl;
TCGArg rl, rh;
TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
if (opc == INDEX_op_add2_i32) {
@ -1146,7 +1115,7 @@ void tcg_optimize(TCGContext *s)
uint32_t b = temps[args[3]].val;
uint64_t r = (uint64_t)a * b;
TCGArg rl, rh;
TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
rl = args[0];

View File

@ -52,7 +52,7 @@ static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
int pi = oi - 1;
tcg_debug_assert(oi < OPC_BUF_SIZE);
ctx->gen_last_op_idx = oi;
ctx->gen_op_buf[0].prev = oi;
ctx->gen_next_op_idx = ni;
ctx->gen_op_buf[oi] = (TCGOp){

540
tcg/tcg.c
View File

@ -23,7 +23,6 @@
*/
/* define it to use liveness analysis (better code) */
#define USE_LIVENESS_ANALYSIS
#define USE_TCG_OPTIMIZATIONS
#include "qemu/osdep.h"
@ -438,9 +437,9 @@ void tcg_func_start(TCGContext *s)
s->goto_tb_issue_mask = 0;
#endif
s->gen_first_op_idx = 0;
s->gen_last_op_idx = -1;
s->gen_next_op_idx = 0;
s->gen_op_buf[0].next = 1;
s->gen_op_buf[0].prev = 0;
s->gen_next_op_idx = 1;
s->gen_next_parm_idx = 0;
s->be = tcg_malloc(sizeof(TCGBackendData));
@ -532,8 +531,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
#endif
if (!base_ts->fixed_reg) {
indirect_reg = 1;
/* We do not support double-indirect registers. */
tcg_debug_assert(!base_ts->indirect_reg);
base_ts->indirect_base = 1;
s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
? 2 : 1);
indirect_reg = 1;
}
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
@ -869,7 +872,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
/* Make sure the calli field didn't overflow. */
tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
s->gen_last_op_idx = i;
s->gen_op_buf[0].prev = i;
s->gen_next_op_idx = i + 1;
s->gen_next_parm_idx = pi;
@ -1021,11 +1024,12 @@ void tcg_dump_ops(TCGContext *s)
TCGOp *op;
int oi;
for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
int i, k, nb_oargs, nb_iargs, nb_cargs;
const TCGOpDef *def;
const TCGArg *args;
TCGOpcode c;
int col = 0;
op = &s->gen_op_buf[oi];
c = op->opc;
@ -1033,7 +1037,7 @@ void tcg_dump_ops(TCGContext *s)
args = &s->gen_opparam_buf[op->args];
if (c == INDEX_op_insn_start) {
qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
@ -1042,7 +1046,7 @@ void tcg_dump_ops(TCGContext *s)
#else
a = args[i];
#endif
qemu_log(" " TARGET_FMT_lx, a);
col += qemu_log(" " TARGET_FMT_lx, a);
}
} else if (c == INDEX_op_call) {
/* variable number of arguments */
@ -1051,11 +1055,11 @@ void tcg_dump_ops(TCGContext *s)
nb_cargs = def->nb_cargs;
/* function name, flags, out args */
qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
tcg_find_helper(s, args[nb_oargs + nb_iargs]),
args[nb_oargs + nb_iargs + 1], nb_oargs);
for (i = 0; i < nb_oargs; i++) {
qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
args[i]));
}
for (i = 0; i < nb_iargs; i++) {
@ -1064,10 +1068,10 @@ void tcg_dump_ops(TCGContext *s)
if (arg != TCG_CALL_DUMMY_ARG) {
t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
}
qemu_log(",%s", t);
col += qemu_log(",%s", t);
}
} else {
qemu_log(" %s ", def->name);
col += qemu_log(" %s ", def->name);
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
@ -1076,16 +1080,16 @@ void tcg_dump_ops(TCGContext *s)
k = 0;
for (i = 0; i < nb_oargs; i++) {
if (k != 0) {
qemu_log(",");
col += qemu_log(",");
}
qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
args[k++]));
}
for (i = 0; i < nb_iargs; i++) {
if (k != 0) {
qemu_log(",");
col += qemu_log(",");
}
qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
args[k++]));
}
switch (c) {
@ -1098,9 +1102,9 @@ void tcg_dump_ops(TCGContext *s)
case INDEX_op_setcond_i64:
case INDEX_op_movcond_i64:
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
qemu_log(",%s", cond_name[args[k++]]);
col += qemu_log(",%s", cond_name[args[k++]]);
} else {
qemu_log(",$0x%" TCG_PRIlx, args[k++]);
col += qemu_log(",$0x%" TCG_PRIlx, args[k++]);
}
i = 1;
break;
@ -1114,12 +1118,12 @@ void tcg_dump_ops(TCGContext *s)
unsigned ix = get_mmuidx(oi);
if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
qemu_log(",$0x%x,%u", op, ix);
col += qemu_log(",$0x%x,%u", op, ix);
} else {
const char *s_al, *s_op;
s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
qemu_log(",%s%s,%u", s_al, s_op, ix);
col += qemu_log(",%s%s,%u", s_al, s_op, ix);
}
i = 1;
}
@ -1134,14 +1138,39 @@ void tcg_dump_ops(TCGContext *s)
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
case INDEX_op_brcond2_i32:
qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
i++, k++;
break;
default:
break;
}
for (; i < nb_cargs; i++, k++) {
qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
}
}
if (op->life) {
unsigned life = op->life;
for (; col < 48; ++col) {
putc(' ', qemu_logfile);
}
if (life & (SYNC_ARG * 3)) {
qemu_log(" sync:");
for (i = 0; i < 2; ++i) {
if (life & (SYNC_ARG << i)) {
qemu_log(" %d", i);
}
}
}
life /= DEAD_ARG;
if (life) {
qemu_log(" dead:");
for (i = 0; life; ++i, life >>= 1) {
if (life & 1) {
qemu_log(" %d", i);
}
}
}
}
qemu_log("\n");
@ -1298,71 +1327,116 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
int next = op->next;
int prev = op->prev;
if (next >= 0) {
s->gen_op_buf[next].prev = prev;
} else {
s->gen_last_op_idx = prev;
}
if (prev >= 0) {
s->gen_op_buf[prev].next = next;
} else {
s->gen_first_op_idx = next;
}
/* We should never attempt to remove the list terminator. */
tcg_debug_assert(op != &s->gen_op_buf[0]);
memset(op, -1, sizeof(*op));
s->gen_op_buf[next].prev = prev;
s->gen_op_buf[prev].next = next;
memset(op, 0, sizeof(*op));
#ifdef CONFIG_PROFILER
s->del_op_count++;
#endif
}
#ifdef USE_LIVENESS_ANALYSIS
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
TCGOpcode opc, int nargs)
{
int oi = s->gen_next_op_idx;
int pi = s->gen_next_parm_idx;
int prev = old_op->prev;
int next = old_op - s->gen_op_buf;
TCGOp *new_op;
tcg_debug_assert(oi < OPC_BUF_SIZE);
tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
s->gen_next_op_idx = oi + 1;
s->gen_next_parm_idx = pi + nargs;
new_op = &s->gen_op_buf[oi];
*new_op = (TCGOp){
.opc = opc,
.args = pi,
.prev = prev,
.next = next
};
s->gen_op_buf[prev].next = oi;
old_op->prev = oi;
return new_op;
}
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
TCGOpcode opc, int nargs)
{
int oi = s->gen_next_op_idx;
int pi = s->gen_next_parm_idx;
int prev = old_op - s->gen_op_buf;
int next = old_op->next;
TCGOp *new_op;
tcg_debug_assert(oi < OPC_BUF_SIZE);
tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
s->gen_next_op_idx = oi + 1;
s->gen_next_parm_idx = pi + nargs;
new_op = &s->gen_op_buf[oi];
*new_op = (TCGOp){
.opc = opc,
.args = pi,
.prev = prev,
.next = next
};
s->gen_op_buf[next].prev = oi;
old_op->next = oi;
return new_op;
}
#define TS_DEAD 1
#define TS_MEM 2
#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
uint8_t *mem_temps)
static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
{
memset(dead_temps, 1, s->nb_temps);
memset(mem_temps, 1, s->nb_globals);
memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
}
/* liveness analysis: end of basic block: all temps are dead, globals
and local temps should be in memory. */
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
uint8_t *mem_temps)
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
{
int i;
int i, n;
memset(dead_temps, 1, s->nb_temps);
memset(mem_temps, 1, s->nb_globals);
for(i = s->nb_globals; i < s->nb_temps; i++) {
mem_temps[i] = s->temps[i].temp_local;
tcg_la_func_end(s, temp_state);
for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
if (s->temps[i].temp_local) {
temp_state[i] |= TS_MEM;
}
}
}
/* Liveness analysis : update the opc_dead_args array to tell if a
/* Liveness analysis : update the opc_arg_life array to tell if a
given input arguments is dead. Instructions updating dead
temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s)
static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
{
uint8_t *dead_temps, *mem_temps;
int oi, oi_prev, nb_ops;
int nb_globals = s->nb_globals;
int oi, oi_prev;
nb_ops = s->gen_next_op_idx;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
tcg_la_func_end(s, temp_state);
dead_temps = tcg_malloc(s->nb_temps);
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs;
TCGOpcode opc_new, opc_new2;
bool have_opc_new2;
uint16_t dead_args;
uint8_t sync_args;
TCGLifeData arg_life = 0;
TCGArg arg;
TCGOp * const op = &s->gen_op_buf[oi];
@ -1385,7 +1459,7 @@ static void tcg_liveness_analysis(TCGContext *s)
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (!dead_temps[arg] || mem_temps[arg]) {
if (temp_state[arg] != TS_DEAD) {
goto do_not_remove_call;
}
}
@ -1394,46 +1468,44 @@ static void tcg_liveness_analysis(TCGContext *s)
do_not_remove_call:
/* output args are dead */
dead_args = 0;
sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
if (mem_temps[arg]) {
sync_args |= (1 << i);
if (temp_state[arg] & TS_MEM) {
arg_life |= SYNC_ARG << i;
}
dead_temps[arg] = 1;
mem_temps[arg] = 0;
temp_state[arg] = TS_DEAD;
}
if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
/* globals should be synced to memory */
memset(mem_temps, 1, s->nb_globals);
}
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
TCG_CALL_NO_READ_GLOBALS))) {
/* globals should go back to memory */
memset(dead_temps, 1, s->nb_globals);
memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
/* globals should be synced to memory */
for (i = 0; i < nb_globals; i++) {
temp_state[i] |= TS_MEM;
}
}
/* record arguments that die in this helper */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
if (arg != TCG_CALL_DUMMY_ARG) {
if (dead_temps[arg]) {
dead_args |= (1 << i);
if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
}
/* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
dead_temps[arg] = 0;
if (arg != TCG_CALL_DUMMY_ARG) {
temp_state[arg] &= ~TS_DEAD;
}
}
s->op_dead_args[oi] = dead_args;
s->op_sync_args[oi] = sync_args;
}
}
break;
@ -1441,8 +1513,7 @@ static void tcg_liveness_analysis(TCGContext *s)
break;
case INDEX_op_discard:
/* mark the temporary as dead */
dead_temps[args[0]] = 1;
mem_temps[args[0]] = 0;
temp_state[args[0]] = TS_DEAD;
break;
case INDEX_op_add2_i32:
@ -1463,8 +1534,8 @@ static void tcg_liveness_analysis(TCGContext *s)
the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
if (temp_state[args[1]] == TS_DEAD) {
if (temp_state[args[0]] == TS_DEAD) {
goto do_remove;
}
/* Replace the opcode and adjust the args in place,
@ -1501,8 +1572,8 @@ static void tcg_liveness_analysis(TCGContext *s)
do_mul2:
nb_iargs = 2;
nb_oargs = 2;
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
if (temp_state[args[1]] == TS_DEAD) {
if (temp_state[args[0]] == TS_DEAD) {
/* Both parts of the operation are dead. */
goto do_remove;
}
@ -1510,8 +1581,7 @@ static void tcg_liveness_analysis(TCGContext *s)
op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[3];
} else if (have_opc_new2 && dead_temps[args[0]]
&& !mem_temps[args[0]]) {
} else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
/* The low part of the operation is dead; generate the high. */
op->opc = opc = opc_new2;
args[0] = args[1];
@ -1534,8 +1604,7 @@ static void tcg_liveness_analysis(TCGContext *s)
implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (!dead_temps[arg] || mem_temps[arg]) {
if (temp_state[args[i]] != TS_DEAD) {
goto do_not_remove;
}
}
@ -1544,59 +1613,203 @@ static void tcg_liveness_analysis(TCGContext *s)
} else {
do_not_remove:
/* output args are dead */
dead_args = 0;
sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
if (mem_temps[arg]) {
sync_args |= (1 << i);
if (temp_state[arg] & TS_MEM) {
arg_life |= SYNC_ARG << i;
}
dead_temps[arg] = 1;
mem_temps[arg] = 0;
temp_state[arg] = TS_DEAD;
}
/* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) {
tcg_la_bb_end(s, dead_temps, mem_temps);
tcg_la_bb_end(s, temp_state);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* globals should be synced to memory */
memset(mem_temps, 1, s->nb_globals);
for (i = 0; i < nb_globals; i++) {
temp_state[i] |= TS_MEM;
}
}
/* record arguments that die in this opcode */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
/* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
dead_temps[arg] = 0;
temp_state[args[i]] &= ~TS_DEAD;
}
s->op_dead_args[oi] = dead_args;
s->op_sync_args[oi] = sync_args;
}
break;
}
op->life = arg_life;
}
}
#else
/* dummy liveness analysis */
static void tcg_liveness_analysis(TCGContext *s)
{
int nb_ops = s->gen_next_op_idx;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
/* Liveness analysis: Convert indirect regs to direct temporaries. */
static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
{
int nb_globals = s->nb_globals;
int16_t *dir_temps;
int i, oi, oi_next;
bool changes = false;
dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
memset(dir_temps, 0, nb_globals * sizeof(int16_t));
/* Create a temporary for each indirect global. */
for (i = 0; i < nb_globals; ++i) {
TCGTemp *its = &s->temps[i];
if (its->indirect_reg) {
TCGTemp *dts = tcg_temp_alloc(s);
dts->type = its->type;
dts->base_type = its->base_type;
dir_temps[i] = temp_idx(s, dts);
}
}
memset(temp_state, TS_DEAD, nb_globals);
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
TCGOp *op = &s->gen_op_buf[oi];
TCGArg *args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
TCGLifeData arg_life = op->life;
int nb_iargs, nb_oargs, call_flags;
TCGArg arg, dir;
oi_next = op->next;
if (opc == INDEX_op_call) {
nb_oargs = op->callo;
nb_iargs = op->calli;
call_flags = args[nb_oargs + nb_iargs + 1];
} else {
nb_iargs = def->nb_iargs;
nb_oargs = def->nb_oargs;
/* Set flags similar to how calls require. */
if (def->flags & TCG_OPF_BB_END) {
/* Like writing globals: save_globals */
call_flags = 0;
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* Like reading globals: sync_globals */
call_flags = TCG_CALL_NO_WRITE_GLOBALS;
} else {
/* No effect on globals. */
call_flags = (TCG_CALL_NO_READ_GLOBALS |
TCG_CALL_NO_WRITE_GLOBALS);
}
}
/* Make sure that input arguments are available. */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
/* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
if (arg < nb_globals) {
dir = dir_temps[arg];
if (dir != 0 && temp_state[arg] == TS_DEAD) {
TCGTemp *its = &s->temps[arg];
TCGOpcode lopc = (its->type == TCG_TYPE_I32
? INDEX_op_ld_i32
: INDEX_op_ld_i64);
TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
TCGArg *largs = &s->gen_opparam_buf[lop->args];
largs[0] = dir;
largs[1] = temp_idx(s, its->mem_base);
largs[2] = its->mem_offset;
/* Loaded, but synced with memory. */
temp_state[arg] = TS_MEM;
}
}
}
/* Perform input replacement, and mark inputs that became dead.
No action is required except keeping temp_state up to date
so that we reload when needed. */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
if (arg < nb_globals) {
dir = dir_temps[arg];
if (dir != 0) {
args[i] = dir;
changes = true;
if (IS_DEAD_ARG(i)) {
temp_state[arg] = TS_DEAD;
}
}
}
}
/* Liveness analysis should ensure that the following are
all correct, for call sites and basic block end points. */
if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
/* Nothing to do */
} else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
for (i = 0; i < nb_globals; ++i) {
/* Liveness should see that globals are synced back,
that is, either TS_DEAD or TS_MEM. */
tcg_debug_assert(dir_temps[i] == 0
|| temp_state[i] != 0);
}
} else {
for (i = 0; i < nb_globals; ++i) {
/* Liveness should see that globals are saved back,
that is, TS_DEAD, waiting to be reloaded. */
tcg_debug_assert(dir_temps[i] == 0
|| temp_state[i] == TS_DEAD);
}
}
/* Outputs become available. */
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (arg >= nb_globals) {
continue;
}
dir = dir_temps[arg];
if (dir == 0) {
continue;
}
args[i] = dir;
changes = true;
/* The output is now live and modified. */
temp_state[arg] = 0;
/* Sync outputs upon their last write. */
if (NEED_SYNC_ARG(i)) {
TCGTemp *its = &s->temps[arg];
TCGOpcode sopc = (its->type == TCG_TYPE_I32
? INDEX_op_st_i32
: INDEX_op_st_i64);
TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
TCGArg *sargs = &s->gen_opparam_buf[sop->args];
sargs[0] = dir;
sargs[1] = temp_idx(s, its->mem_base);
sargs[2] = its->mem_offset;
temp_state[arg] = TS_MEM;
}
/* Drop outputs that are dead. */
if (IS_DEAD_ARG(i)) {
temp_state[arg] = TS_DEAD;
}
}
}
return changes;
}
#endif
#ifdef CONFIG_DEBUG_TCG
static void dump_regs(TCGContext *s)
@ -1728,14 +1941,6 @@ static void temp_sync(TCGContext *s, TCGTemp *ts,
if (!ts->mem_allocated) {
temp_allocate_frame(s, temp_idx(s, ts));
}
if (ts->indirect_reg) {
if (ts->val_type == TEMP_VAL_REG) {
tcg_regset_set_reg(allocated_regs, ts->reg);
}
temp_load(s, ts->mem_base,
tcg_target_available_regs[TCG_TYPE_PTR],
allocated_regs);
}
switch (ts->val_type) {
case TEMP_VAL_CONST:
/* If we're going to free the temp immediately, then we won't
@ -1826,12 +2031,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
break;
case TEMP_VAL_MEM:
reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
if (ts->indirect_reg) {
tcg_regset_set_reg(allocated_regs, reg);
temp_load(s, ts->mem_base,
tcg_target_available_regs[TCG_TYPE_PTR],
allocated_regs);
}
tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
ts->mem_coherent = 1;
break;
@ -1848,16 +2047,9 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
temporary registers needs to be allocated to store a constant. */
static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
{
#ifdef USE_LIVENESS_ANALYSIS
/* ??? Liveness does not yet incorporate indirect bases. */
if (!ts->indirect_base) {
/* The liveness analysis already ensures that globals are back
in memory. Keep an tcg_debug_assert for safety. */
tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
return;
}
#endif
temp_sync(s, ts, allocated_regs, 1);
}
/* save globals to their canonical location and assume they can be
@ -1881,16 +2073,9 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
for (i = 0; i < s->nb_globals; i++) {
TCGTemp *ts = &s->temps[i];
#ifdef USE_LIVENESS_ANALYSIS
/* ??? Liveness does not yet incorporate indirect bases. */
if (!ts->indirect_base) {
tcg_debug_assert(ts->val_type != TEMP_VAL_REG
|| ts->fixed_reg
|| ts->mem_coherent);
continue;
}
#endif
temp_sync(s, ts, allocated_regs, 0);
}
}
@ -1905,27 +2090,17 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
if (ts->temp_local) {
temp_save(s, ts, allocated_regs);
} else {
#ifdef USE_LIVENESS_ANALYSIS
/* ??? Liveness does not yet incorporate indirect bases. */
if (!ts->indirect_base) {
/* The liveness analysis already ensures that temps are dead.
Keep an tcg_debug_assert for safety. */
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
continue;
}
#endif
temp_dead(s, ts);
}
}
save_globals(s, allocated_regs);
}
#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
uint16_t dead_args, uint8_t sync_args)
TCGLifeData arg_life)
{
TCGTemp *ots;
tcg_target_ulong val;
@ -1954,8 +2129,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
}
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
const TCGArg *args, uint16_t dead_args,
uint8_t sync_args)
const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
TCGTemp *ts, *ots;
@ -1987,12 +2161,6 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
if (!ots->mem_allocated) {
temp_allocate_frame(s, args[0]);
}
if (ots->indirect_reg) {
tcg_regset_set_reg(allocated_regs, ts->reg);
temp_load(s, ots->mem_base,
tcg_target_available_regs[TCG_TYPE_PTR],
allocated_regs);
}
tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
if (IS_DEAD_ARG(1)) {
temp_dead(s, ts);
@ -2040,8 +2208,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
static void tcg_reg_alloc_op(TCGContext *s,
const TCGOpDef *def, TCGOpcode opc,
const TCGArg *args, uint16_t dead_args,
uint8_t sync_args)
const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
int i, k, nb_iargs, nb_oargs;
@ -2206,8 +2373,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
#endif
static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
const TCGArg * const args, uint16_t dead_args,
uint8_t sync_args)
const TCGArg * const args, TCGLifeData arg_life)
{
int flags, nb_regs, i;
TCGReg reg;
@ -2363,7 +2529,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
{
int n;
n = s->gen_last_op_idx + 1;
n = s->gen_op_buf[0].prev + 1;
s->op_count += n;
if (n > s->op_count_max) {
s->op_count_max = n;
@ -2399,7 +2565,27 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
s->la_time -= profile_getclock();
#endif
tcg_liveness_analysis(s);
{
uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
liveness_pass_1(s, temp_state);
if (s->nb_indirects > 0) {
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
&& qemu_log_in_addr_range(tb->pc))) {
qemu_log("OP before indirect lowering:\n");
tcg_dump_ops(s);
qemu_log("\n");
}
#endif
/* Replace indirect temps with direct temps. */
if (liveness_pass_2(s, temp_state)) {
/* If changes were made, re-run liveness. */
liveness_pass_1(s, temp_state);
}
}
}
#ifdef CONFIG_PROFILER
s->la_time += profile_getclock();
@ -2422,13 +2608,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_out_tb_init(s);
num_insns = -1;
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
uint16_t dead_args = s->op_dead_args[oi];
uint8_t sync_args = s->op_sync_args[oi];
TCGLifeData arg_life = op->life;
oi_next = op->next;
#ifdef CONFIG_PROFILER
@ -2438,11 +2623,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
tcg_reg_alloc_mov(s, def, args, arg_life);
break;
case INDEX_op_movi_i32:
case INDEX_op_movi_i64:
tcg_reg_alloc_movi(s, args, dead_args, sync_args);
tcg_reg_alloc_movi(s, args, arg_life);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
@ -2467,8 +2652,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_out_label(s, arg_label(args[0]), s->code_ptr);
break;
case INDEX_op_call:
tcg_reg_alloc_call(s, op->callo, op->calli, args,
dead_args, sync_args);
tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
break;
default:
/* Sanity check that we've not introduced any unhandled opcodes. */
@ -2478,7 +2662,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
some common argument patterns */
tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
tcg_reg_alloc_op(s, def, opc, args, arg_life);
break;
}
#ifdef CONFIG_DEBUG_TCG

View File

@ -575,24 +575,41 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
There are never more than 2 outputs, which means that we can store all
dead + sync data within 16 bits. */
#define DEAD_ARG 4
#define SYNC_ARG 1
typedef uint16_t TCGLifeData;
/* The layout here is designed to avoid crossing of a 32-bit boundary.
If we do so, gcc adds padding, expanding the size to 12. */
typedef struct TCGOp {
TCGOpcode opc : 8;
TCGOpcode opc : 8; /* 8 */
/* Index of the prev/next op, or 0 for the end of the list. */
unsigned prev : 10; /* 18 */
unsigned next : 10; /* 28 */
/* The number of out and in parameter for a call. */
unsigned callo : 2;
unsigned calli : 6;
unsigned calli : 4; /* 32 */
unsigned callo : 2; /* 34 */
/* Index of the arguments for this op, or -1 for zero-operand ops. */
signed args : 16;
/* Index of the arguments for this op, or 0 for zero-operand ops. */
unsigned args : 14; /* 48 */
/* Index of the prex/next op, or -1 for the end of the list. */
signed prev : 16;
signed next : 16;
/* Lifetime data of the operands. */
unsigned life : 16; /* 64 */
} TCGOp;
QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
/* Make sure operands fit in the bitfields above. */
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 10));
QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 14));
/* Make sure that we don't overflow 64 bits without noticing. */
QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8);
struct TCGContext {
uint8_t *pool_cur, *pool_end;
@ -600,6 +617,7 @@ struct TCGContext {
int nb_labels;
int nb_globals;
int nb_temps;
int nb_indirects;
/* goto_tb support */
tcg_insn_unit *code_buf;
@ -607,13 +625,6 @@ struct TCGContext {
uint16_t *tb_jmp_insn_offset; /* tb->jmp_insn_offset if USE_DIRECT_JUMP */
uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
/* liveness analysis */
uint16_t *op_dead_args; /* for each operation, each bit tells if the
corresponding argument is dead */
uint8_t *op_sync_args; /* for each operation, each bit tells if the
corresponding output argument needs to be
sync to memory. */
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;
@ -649,8 +660,6 @@ struct TCGContext {
int goto_tb_issue_mask;
#endif
int gen_first_op_idx;
int gen_last_op_idx;
int gen_next_op_idx;
int gen_next_parm_idx;
@ -890,6 +899,9 @@ void tcg_gen_callN(TCGContext *s, void *func,
TCGArg ret, int nargs, TCGArg *args);
void tcg_op_remove(TCGContext *s, TCGOp *op);
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
void tcg_optimize(TCGContext *s);
/* only used for debugging purposes */

View File

@ -32,15 +32,22 @@ int qemu_loglevel;
static int log_append = 0;
static GArray *debug_regions;
void qemu_log(const char *fmt, ...)
/* Return the number of characters emitted. */
int qemu_log(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
int ret = 0;
if (qemu_logfile) {
vfprintf(qemu_logfile, fmt, ap);
}
va_list ap;
va_start(ap, fmt);
ret = vfprintf(qemu_logfile, fmt, ap);
va_end(ap);
/* Don't pass back error results. */
if (ret < 0) {
ret = 0;
}
}
return ret;
}
static bool log_uses_own_buffers;
@ -240,8 +247,9 @@ const QEMULogItem qemu_log_items[] = {
{ CPU_LOG_TB_OP, "op",
"show micro ops for each compiled TB" },
{ CPU_LOG_TB_OP_OPT, "op_opt",
"show micro ops (x86 only: before eflags optimization) and\n"
"after liveness analysis" },
"show micro ops after optimization" },
{ CPU_LOG_TB_OP_IND, "op_ind",
"show micro ops before indirect lowering" },
{ CPU_LOG_INT, "int",
"show interrupts/exceptions in short format" },
{ CPU_LOG_EXEC, "exec",