From 2b7ec66f025263a5331f37d5ad78a625496fd7bd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 29 May 2015 09:16:51 -0700 Subject: [PATCH 1/9] tcg: Mask TCGMemOp appropriately for indexing The addition of MO_AMASK means that places that used inverted masks need to be changed to use positive masks, and places that failed to mask the intended bits need updating. Reviewed-by: Yongbok Kim Tested-by: Yongbok Kim Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c | 4 ++-- tcg/arm/tcg-target.c | 6 +++--- tcg/i386/tcg-target.c | 4 ++-- tcg/mips/tcg-target.c | 4 ++-- tcg/ppc/tcg-target.c | 8 ++++---- tcg/s390/tcg-target.c | 4 ++-- tcg/sparc/tcg-target.c | 30 +++++++++++++++--------------- tci.c | 8 ++++---- 8 files changed, 34 insertions(+), 34 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index b3be6f3177..fe44ad709c 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1004,7 +1004,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); tcg_out_adr(s, TCG_REG_X3, lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); if (opc & MO_SIGN) { tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); } else { @@ -1027,7 +1027,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); tcg_out_adr(s, TCG_REG_X4, lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc]); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_goto(s, lb->raddr); } diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 06a8064a9f..ae2ec7a922 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1260,9 +1260,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) icache usage. For pre-armv6, use the signed helpers since we do not have a single insn sign-extend. */ if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & ~MO_SIGN]; + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; } else { - func = qemu_ld_helpers[opc]; + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; if (opc & MO_SIGN) { opc = MO_UL; } @@ -1337,7 +1337,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]); + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); } #endif /* SOFTMMU */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 2e4bf52aae..ff4d9cfec7 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1307,7 +1307,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) (uintptr_t)l->raddr); } - tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); data_reg = l->datalo_reg; switch (opc & MO_SSIZE) { @@ -1413,7 +1413,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) /* "Tail call" to the helper, with the return address back inline. */ tcg_out_push(s, retaddr); - tcg_out_jmp(s, qemu_st_helpers[opc]); + tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); } #elif defined(__x86_64__) && defined(__linux__) # include diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index f64c89c3c0..f643eca3df 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1031,7 +1031,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } i = tcg_out_call_iarg_imm(s, i, oi); i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); - tcg_out_call_int(s, qemu_ld_helpers[opc], false); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); @@ -1094,7 +1094,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) computation to take place in the return address register. */ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); - tcg_out_call_int(s, qemu_st_helpers[opc], true); + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); } diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index d49c7d925f..2b6eafa03c 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -1495,7 +1495,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); tcg_out32(s, MFSPR | RT(arg) | LR); - tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); lo = lb->datalo_reg; hi = lb->datahi_reg; @@ -1565,7 +1565,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); tcg_out32(s, MFSPR | RT(arg) | LR); - tcg_out_call(s, qemu_st_helpers[opc]); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_b(s, 0, lb->raddr); } @@ -1624,7 +1624,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); } } else { - uint32_t insn = qemu_ldx_opc[opc]; + uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; if (!HAVE_ISA_2_06 && insn == LDBRX) { tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); @@ -1696,7 +1696,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) tcg_out32(s, STW | TAI(datalo, addrlo, 4)); } } else { - uint32_t insn = qemu_stx_opc[opc]; + uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; if (!HAVE_ISA_2_06 && insn == STDBRX) { tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 46dedc9f82..669fafe24f 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -1573,7 +1573,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc]); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); @@ -1610,7 +1610,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc]); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index c1794a33ed..1a870a81d7 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -1075,12 +1075,11 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, TCGMemOp memop = get_memop(oi); #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; TCGReg addrz, param; tcg_insn_unit *func; tcg_insn_unit *label_ptr; - addrz = tcg_out_tlb_load(s, addr, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, offsetof(CPUTLBEntry, addr_read)); /* The fast path is exactly one insn. Thus we can perform the @@ -1092,7 +1091,8 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]); + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); /* TLB Miss. */ @@ -1105,10 +1105,10 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, /* We use the helpers to extend SB and SW data, leaving the case of SL needing explicit extending below. */ - if ((memop & ~MO_BSWAP) == MO_SL) { - func = qemu_ld_trampoline[memop & ~MO_SIGN]; + if ((memop & MO_SSIZE) == MO_SL) { + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)]; } else { - func = qemu_ld_trampoline[memop]; + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; } assert(func != NULL); tcg_out_call_nodelay(s, func); @@ -1119,13 +1119,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, Which complicates things for sparcv8plus. */ if (SPARC64) { /* We let the helper sign-extend SB and SW, but leave SL for here. */ - if (is_64 && (memop & ~MO_BSWAP) == MO_SL) { + if (is_64 && (memop & MO_SSIZE) == MO_SL) { tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); } else { tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); } } else { - if (s_bits == MO_64) { + if ((memop & MO_SIZE) == MO_64) { tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); @@ -1147,7 +1147,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, } tcg_out_ldst_rr(s, data, addr, (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1157,12 +1157,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, TCGMemOp memop = get_memop(oi); #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; TCGReg addrz, param; tcg_insn_unit *func; tcg_insn_unit *label_ptr; - addrz = tcg_out_tlb_load(s, addr, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, offsetof(CPUTLBEntry, addr_write)); /* The fast path is exactly one insn. Thus we can perform the entire @@ -1172,7 +1171,8 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]); + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); /* TLB Miss. */ @@ -1182,13 +1182,13 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, param++; } tcg_out_mov(s, TCG_TYPE_REG, param++, addr); - if (!SPARC64 && s_bits == MO_64) { + if (!SPARC64 && (memop & MO_SIZE) == MO_64) { /* Skip the high-part; we'll perform the extract in the trampoline. */ param++; } tcg_out_mov(s, TCG_TYPE_REG, param++, data); - func = qemu_st_trampoline[memop]; + func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; assert(func != NULL); tcg_out_call_nodelay(s, func); /* delay slot */ @@ -1202,7 +1202,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, } tcg_out_ldst_rr(s, data, addr, (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_st_opc[memop]); + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); #endif /* CONFIG_SOFTMMU */ } diff --git a/tci.c b/tci.c index a14717d0d5..84449489d2 100644 --- a/tci.c +++ b/tci.c @@ -1107,7 +1107,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) t0 = *tb_ptr++; taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { case MO_UB: tmp32 = qemu_ld_ub; break; @@ -1144,7 +1144,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) } taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { case MO_UB: tmp64 = qemu_ld_ub; break; @@ -1193,7 +1193,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) t0 = tci_read_r(&tb_ptr); taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { case MO_UB: qemu_st_b(t0); break; @@ -1217,7 +1217,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) tmp64 = tci_read_r64(&tb_ptr); taddr = tci_read_ulong(&tb_ptr); oi = tci_read_i(&tb_ptr); - switch (get_memop(oi)) { + switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { case MO_UB: qemu_st_b(tmp64); break; From 59c4b7e8dfab0cdc41434fedbf2686222f541e57 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 Jun 2015 14:38:56 -0700 Subject: [PATCH 2/9] tcg: Handle MO_AMASK in tcg_dump_ops Reviewed-by: Yongbok Kim Tested-by: Yongbok Kim Signed-off-by: Richard Henderson --- tcg/tcg.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 8b43bbb122..427b66bd6b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1076,10 +1076,19 @@ void tcg_dump_ops(TCGContext *s) TCGMemOp op = get_memop(oi); unsigned ix = get_mmuidx(oi); - if (op < ARRAY_SIZE(ldst_name) && ldst_name[op]) { - qemu_log(",%s,%u", ldst_name[op], ix); - } else { + if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { qemu_log(",$0x%x,%u", op, ix); + } else { + const char *s_al = "", *s_op; + if (op & MO_AMASK) { + if ((op & MO_AMASK) == MO_ALIGN) { + s_al = "al+"; + } else { + s_al = "un+"; + } + } + s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; + qemu_log(",%s%s,%u", s_al, s_op, ix); } i = 1; } From 7e1df267a7e8b39fc0cf1d84d2afc2e88ccbfeac Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 4 Jun 2015 21:47:07 +0200 Subject: [PATCH 3/9] tcg: fix register allocation with two aliased dead inputs For TCG ops with two outputs registers (add2, sub2, div2, div2u), when the same input temp is used for the two inputs aliased to the two outputs, and when these inputs are both dead, the register allocation code wrongly assigned the same register to the same output. This happens for example with sub2 t1, t2, t3, t3, t4, t5, when t3 is not used anymore after the TCG op. In that case the same register is used for t1, t2 and t3. The fix is to look for already allocated aliased input when allocating a dead aliased input and check that the register is not already used. Cc: Richard Henderson Signed-off-by: Aurelien Jarno Message-Id: <1433447228-29425-2-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/tcg.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tcg/tcg.c b/tcg/tcg.c index 427b66bd6b..4a6eb26d0a 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2007,6 +2007,16 @@ static void tcg_reg_alloc_op(TCGContext *s, if (!IS_DEAD_ARG(i)) { goto allocate_in_reg; } + /* check if the current register has already been allocated + for another input aliased to an output */ + int k2, i2; + for (k2 = 0 ; k2 < k ; k2++) { + i2 = def->sorted_args[nb_oargs + k2]; + if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && + (new_args[i2] == ts->reg)) { + goto allocate_in_reg; + } + } } } reg = ts->reg; From c19f47bf5e8fe3dbd10206a52d0e6e348f803933 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 4 Jun 2015 21:47:08 +0200 Subject: [PATCH 4/9] tcg: fix dead computation for repeated input arguments When the same temp is used twice or more as an input argument to a TCG instruction, the dead computation code doesn't recognize the second use as a dead temp. This is because the temp is marked as live in the same loop where dead inputs are checked. The fix is to split the loop in two parts. This avoid emitting a move and using a register for the movcond instruction when used as "move if true" on x86-64. This might bring more improvements on RISC TCG targets which don't have outputs aliased to inputs. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno Message-Id: <1433447228-29425-3-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/tcg.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 4a6eb26d0a..7e088b1f28 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1387,16 +1387,20 @@ static void tcg_liveness_analysis(TCGContext *s) memset(dead_temps, 1, s->nb_globals); } - /* input args are live */ + /* record arguments that die in this helper */ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { arg = args[i]; if (arg != TCG_CALL_DUMMY_ARG) { if (dead_temps[arg]) { dead_args |= (1 << i); } - dead_temps[arg] = 0; } } + /* input arguments are live for preceeding opcodes */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + arg = args[i]; + dead_temps[arg] = 0; + } s->op_dead_args[oi] = dead_args; s->op_sync_args[oi] = sync_args; } @@ -1531,12 +1535,16 @@ static void tcg_liveness_analysis(TCGContext *s) memset(mem_temps, 1, s->nb_globals); } - /* input args are live */ + /* record arguments that die in this opcode */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { arg = args[i]; if (dead_temps[arg]) { dead_args |= (1 << i); } + } + /* input arguments are live for preceeding opcodes */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + arg = args[i]; dead_temps[arg] = 0; } s->op_dead_args[oi] = dead_args; From ebd27391b00cdafc81e0541a940686137b3b48df Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 4 Jun 2015 21:53:23 +0200 Subject: [PATCH 5/9] tcg/optimize: remove opc argument from tcg_opt_gen_movi We can get the opcode using the TCGOp pointer. It needs to be dereferenced, but it's anyway done a few lines below to write the new value. Cc: Richard Henderson Signed-off-by: Aurelien Jarno Message-Id: <1433447607-31184-2-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/optimize.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 585f1ed7bb..0bfc9a5a8c 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -229,9 +229,9 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, } static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, - TCGOpcode old_op, TCGArg dst, TCGArg val) + TCGArg dst, TCGArg val) { - TCGOpcode new_op = op_to_movi(old_op); + TCGOpcode new_op = op_to_movi(op->opc); tcg_target_ulong mask; op->opc = new_op; @@ -670,7 +670,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(rotr): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } break; @@ -942,7 +942,7 @@ static void tcg_constant_folding(TCGContext *s) if (partmask == 0) { assert(nb_oargs == 1); - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } if (affected == 0) { @@ -952,7 +952,7 @@ static void tcg_constant_folding(TCGContext *s) } else if (temps[args[1]].state != TCG_TEMP_CONST) { tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); } else { - tcg_opt_gen_movi(s, op, args, opc, + tcg_opt_gen_movi(s, op, args, args[0], temps[args[1]].val); } continue; @@ -966,7 +966,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } break; @@ -984,7 +984,7 @@ static void tcg_constant_folding(TCGContext *s) } else if (temps[args[1]].state != TCG_TEMP_CONST) { tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); } else { - tcg_opt_gen_movi(s, op, args, opc, + tcg_opt_gen_movi(s, op, args, args[0], temps[args[1]].val); } continue; @@ -1000,7 +1000,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(sub): CASE_OP_32_64(xor): if (temps_are_copies(args[1], args[2])) { - tcg_opt_gen_movi(s, op, args, opc, args[0], 0); + tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } break; @@ -1026,7 +1026,7 @@ static void tcg_constant_folding(TCGContext *s) args[1] = temps[args[1]].val; /* fallthrough */ CASE_OP_32_64(movi): - tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]); + tcg_opt_gen_movi(s, op, args, args[0], args[1]); break; CASE_OP_32_64(not): @@ -1039,7 +1039,7 @@ static void tcg_constant_folding(TCGContext *s) case INDEX_op_ext32u_i64: if (temps[args[1]].state == TCG_TEMP_CONST) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1047,7 +1047,7 @@ static void tcg_constant_folding(TCGContext *s) case INDEX_op_trunc_shr_i32: if (temps[args[1]].state == TCG_TEMP_CONST) { tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1078,7 +1078,7 @@ static void tcg_constant_folding(TCGContext *s) && temps[args[2]].state == TCG_TEMP_CONST) { tmp = do_constant_folding(opc, temps[args[1]].val, temps[args[2]].val); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1088,7 +1088,7 @@ static void tcg_constant_folding(TCGContext *s) && temps[args[2]].state == TCG_TEMP_CONST) { tmp = deposit64(temps[args[1]].val, args[3], args[4], temps[args[2]].val); - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1096,7 +1096,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); if (tmp != 2) { - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; @@ -1121,7 +1121,7 @@ static void tcg_constant_folding(TCGContext *s) if (temps_are_copies(args[0], args[4-tmp])) { tcg_op_remove(s, op); } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { - tcg_opt_gen_movi(s, op, args, opc, + tcg_opt_gen_movi(s, op, args, args[0], temps[args[4-tmp]].val); } else { tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]); @@ -1154,8 +1154,8 @@ static void tcg_constant_folding(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a); - tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a); + tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1175,8 +1175,8 @@ static void tcg_constant_folding(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r); - tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r); + tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1260,7 +1260,7 @@ static void tcg_constant_folding(TCGContext *s) tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); if (tmp != 2) { do_setcond_const: - tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); + tcg_opt_gen_movi(s, op, args, args[0], tmp); } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) && temps[args[3]].state == TCG_TEMP_CONST && temps[args[4]].state == TCG_TEMP_CONST From 8d6a91602ea824ef4435ea38fd475387eecc098c Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 4 Jun 2015 21:53:24 +0200 Subject: [PATCH 6/9] tcg/optimize: remove opc argument from tcg_opt_gen_mov We can get the opcode using the TCGOp pointer. It needs to be dereferenced, but it's anyway done a few lines below to write the new value. Cc: Richard Henderson Signed-off-by: Aurelien Jarno Message-Id: <1433447607-31184-3-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/optimize.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 0bfc9a5a8c..db159769ec 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -194,9 +194,9 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) } static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, - TCGOpcode old_op, TCGArg dst, TCGArg src) + TCGArg dst, TCGArg src) { - TCGOpcode new_op = op_to_mov(old_op); + TCGOpcode new_op = op_to_mov(op->opc); tcg_target_ulong mask; op->opc = new_op; @@ -791,7 +791,7 @@ static void tcg_constant_folding(TCGContext *s) if (temps_are_copies(args[0], args[1])) { tcg_op_remove(s, op); } else { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); + tcg_opt_gen_mov(s, op, args, args[0], args[1]); } continue; default: @@ -950,7 +950,7 @@ static void tcg_constant_folding(TCGContext *s) if (temps_are_copies(args[0], args[1])) { tcg_op_remove(s, op); } else if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); + tcg_opt_gen_mov(s, op, args, args[0], args[1]); } else { tcg_opt_gen_movi(s, op, args, args[0], temps[args[1]].val); @@ -982,7 +982,7 @@ static void tcg_constant_folding(TCGContext *s) if (temps_are_copies(args[0], args[1])) { tcg_op_remove(s, op); } else if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); + tcg_opt_gen_mov(s, op, args, args[0], args[1]); } else { tcg_opt_gen_movi(s, op, args, args[0], temps[args[1]].val); @@ -1018,7 +1018,7 @@ static void tcg_constant_folding(TCGContext *s) break; } if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); + tcg_opt_gen_mov(s, op, args, args[0], args[1]); break; } /* Source argument is constant. Rewrite the operation and @@ -1124,7 +1124,7 @@ static void tcg_constant_folding(TCGContext *s) tcg_opt_gen_movi(s, op, args, args[0], temps[args[4-tmp]].val); } else { - tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]); + tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); } break; } From 5365718a9afeeabde3784d82a542f8ad909b18cf Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 4 Jun 2015 21:53:25 +0200 Subject: [PATCH 7/9] tcg/optimize: fold temp copies test in tcg_opt_gen_mov Each call to tcg_opt_gen_mov is preceeded by a test to check if the source and destination temps are copies. Fold that into the tcg_opt_gen_mov function. Cc: Richard Henderson Signed-off-by: Aurelien Jarno Message-Id: <1433447607-31184-4-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/optimize.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index db159769ec..d5c0398f44 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -196,6 +196,11 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, TCGArg dst, TCGArg src) { + if (temps_are_copies(dst, src)) { + tcg_op_remove(s, op); + return; + } + TCGOpcode new_op = op_to_mov(op->opc); tcg_target_ulong mask; @@ -788,11 +793,7 @@ static void tcg_constant_folding(TCGContext *s) } break; do_mov3: - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - } else { - tcg_opt_gen_mov(s, op, args, args[0], args[1]); - } + tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; default: break; @@ -947,9 +948,7 @@ static void tcg_constant_folding(TCGContext *s) } if (affected == 0) { assert(nb_oargs == 1); - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - } else if (temps[args[1]].state != TCG_TEMP_CONST) { + if (temps[args[1]].state != TCG_TEMP_CONST) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); } else { tcg_opt_gen_movi(s, op, args, @@ -979,9 +978,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(or): CASE_OP_32_64(and): if (temps_are_copies(args[1], args[2])) { - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - } else if (temps[args[1]].state != TCG_TEMP_CONST) { + if (temps[args[1]].state != TCG_TEMP_CONST) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); } else { tcg_opt_gen_movi(s, op, args, @@ -1013,10 +1010,6 @@ static void tcg_constant_folding(TCGContext *s) allocator where needed and possible. Also detect copies. */ switch (opc) { CASE_OP_32_64(mov): - if (temps_are_copies(args[0], args[1])) { - tcg_op_remove(s, op); - break; - } if (temps[args[1]].state != TCG_TEMP_CONST) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); break; @@ -1118,9 +1111,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(movcond): tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); if (tmp != 2) { - if (temps_are_copies(args[0], args[4-tmp])) { - tcg_op_remove(s, op); - } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { + if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { tcg_opt_gen_movi(s, op, args, args[0], temps[args[4-tmp]].val); } else { From 97a79eb70dd35a24fda87d86196afba5e6f21c5d Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Fri, 5 Jun 2015 11:19:18 +0200 Subject: [PATCH 8/9] tcg/optimize: fold constant test in tcg_opt_gen_mov Most of the calls to tcg_opt_gen_mov are preceeded by a test to check if the source temp is a constant. Fold that into the tcg_opt_gen_mov function. Cc: Richard Henderson Signed-off-by: Aurelien Jarno Message-Id: <1433495958-9508-1-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/optimize.c | 89 ++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 53 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index d5c0398f44..859a6c15d3 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -193,6 +193,28 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return false; } +static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, + TCGArg dst, TCGArg val) +{ + TCGOpcode new_op = op_to_movi(op->opc); + tcg_target_ulong mask; + + op->opc = new_op; + + reset_temp(dst); + temps[dst].state = TCG_TEMP_CONST; + temps[dst].val = val; + mask = val; + if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { + /* High bits of the destination are now garbage. */ + mask |= ~0xffffffffull; + } + temps[dst].mask = mask; + + args[0] = dst; + args[1] = val; +} + static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, TCGArg dst, TCGArg src) { @@ -201,6 +223,11 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, return; } + if (temps[src].state == TCG_TEMP_CONST) { + tcg_opt_gen_movi(s, op, args, dst, temps[src].val); + return; + } + TCGOpcode new_op = op_to_mov(op->opc); tcg_target_ulong mask; @@ -233,28 +260,6 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, args[1] = src; } -static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, - TCGArg dst, TCGArg val) -{ - TCGOpcode new_op = op_to_movi(op->opc); - tcg_target_ulong mask; - - op->opc = new_op; - - reset_temp(dst); - temps[dst].state = TCG_TEMP_CONST; - temps[dst].val = val; - mask = val; - if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { - /* High bits of the destination are now garbage. */ - mask |= ~0xffffffffull; - } - temps[dst].mask = mask; - - args[0] = dst; - args[1] = val; -} - static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) { uint64_t l64, h64; @@ -780,7 +785,8 @@ static void tcg_constant_folding(TCGContext *s) if (temps[args[1]].state != TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0) { - goto do_mov3; + tcg_opt_gen_mov(s, op, args, args[0], args[1]); + continue; } break; CASE_OP_32_64(and): @@ -789,12 +795,10 @@ static void tcg_constant_folding(TCGContext *s) if (temps[args[1]].state != TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == -1) { - goto do_mov3; + tcg_opt_gen_mov(s, op, args, args[0], args[1]); + continue; } break; - do_mov3: - tcg_opt_gen_mov(s, op, args, args[0], args[1]); - continue; default: break; } @@ -948,12 +952,7 @@ static void tcg_constant_folding(TCGContext *s) } if (affected == 0) { assert(nb_oargs == 1); - if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, args[0], args[1]); - } else { - tcg_opt_gen_movi(s, op, args, - args[0], temps[args[1]].val); - } + tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -978,12 +977,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(or): CASE_OP_32_64(and): if (temps_are_copies(args[1], args[2])) { - if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, args[0], args[1]); - } else { - tcg_opt_gen_movi(s, op, args, - args[0], temps[args[1]].val); - } + tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } break; @@ -1010,14 +1004,8 @@ static void tcg_constant_folding(TCGContext *s) allocator where needed and possible. Also detect copies. */ switch (opc) { CASE_OP_32_64(mov): - if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op, args, args[0], args[1]); - break; - } - /* Source argument is constant. Rewrite the operation and - let movi case handle it. */ - args[1] = temps[args[1]].val; - /* fallthrough */ + tcg_opt_gen_mov(s, op, args, args[0], args[1]); + break; CASE_OP_32_64(movi): tcg_opt_gen_movi(s, op, args, args[0], args[1]); break; @@ -1111,12 +1099,7 @@ static void tcg_constant_folding(TCGContext *s) CASE_OP_32_64(movcond): tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); if (tmp != 2) { - if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { - tcg_opt_gen_movi(s, op, args, - args[0], temps[args[4-tmp]].val); - } else { - tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); - } + tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); break; } goto do_default; From 36e60ef6ac5d8a262d0fbeedfdb2b588514cb1ea Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 4 Jun 2015 21:53:27 +0200 Subject: [PATCH 9/9] tcg/optimize: rename tcg_constant_folding The tcg_constant_folding folding ends up doing all the optimizations (which is a good thing to avoid looping on all ops multiple time), so make it clear and just rename it tcg_optimize. Cc: Richard Henderson Signed-off-by: Aurelien Jarno Message-Id: <1433447607-31184-6-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/optimize.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 859a6c15d3..0f6f7008da 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -574,7 +574,7 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) } /* Propagate constants and copies, fold constant expressions. */ -static void tcg_constant_folding(TCGContext *s) +void tcg_optimize(TCGContext *s) { int oi, oi_next, nb_temps, nb_globals; @@ -1328,8 +1328,3 @@ static void tcg_constant_folding(TCGContext *s) } } } - -void tcg_optimize(TCGContext *s) -{ - tcg_constant_folding(s); -}