diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 98a12535a5..dc83f3e5be 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -1103,6 +1103,56 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0) } } +static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + TCGReg al = args[0]; + TCGReg ah = args[1]; + TCGArg bl = args[2]; + TCGArg bh = args[3]; + TCGCond cond = args[4]; + int const_bl = const_args[2]; + int const_bh = const_args[3]; + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + case TCG_COND_LTU: + case TCG_COND_LEU: + case TCG_COND_GTU: + case TCG_COND_GEU: + /* We perform a conditional comparision. If the high half is + equal, then overwrite the flags with the comparison of the + low half. The resulting flags cover the whole. */ + tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh); + tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl); + return cond; + + case TCG_COND_LT: + case TCG_COND_GE: + /* We perform a double-word subtraction and examine the result. + We do not actually need the result of the subtract, so the + low part "subtract" is a compare. For the high half we have + no choice but to compute into a temporary. */ + tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl); + tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR, + TCG_REG_TMP, ah, bh, const_bh); + return cond; + + case TCG_COND_LE: + case TCG_COND_GT: + /* Similar, but with swapped arguments, via reversed subtract. */ + tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, + TCG_REG_TMP, al, bl, const_bl); + tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR, + TCG_REG_TMP, ah, bh, const_bh); + return tcg_swap_cond(cond); + + default: + g_assert_not_reached(); + } +} + #ifdef CONFIG_SOFTMMU #include "tcg-ldst.inc.c" @@ -1197,12 +1247,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, /* We're expecting to use an 8-bit immediate and to mask. */ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); -/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset. - Using the offset of the second entry in the last tlb table ensures - that we can index all of the elements of the first entry. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) - > 0xffff); - /* Load and compare a TLB entry, leaving the flags set. Returns the register containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ @@ -1215,6 +1259,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + int mask_off; unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); @@ -1246,16 +1291,25 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); } - /* We checked that the offset is contained within 16 bits above. */ - if (add_off > 0xfff - || (use_armv6_instructions && TARGET_LONG_BITS == 64 - && cmp_off > 0xff)) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, - (24 << 7) | (cmp_off >> 8)); - base = TCG_REG_R2; - add_off -= cmp_off & 0xff00; - cmp_off &= 0xff; + /* Add portions of the offset until the memory access is in range. + * If we plan on using ldrd, reduce to an 8-bit offset; otherwise + * we can use a 12-bit offset. */ + if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + mask_off = 0xff; + } else { + mask_off = 0xfff; } + while (cmp_off > mask_off) { + int shift = ctz32(cmp_off & ~mask_off) & ~1; + int rot = ((32 - shift) << 7) & 0xf00; + int addend = cmp_off & (0xff << shift); + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + rot | ((cmp_off >> shift) & 0xff)); + base = TCG_REG_R2; + add_off -= addend; + cmp_off -= addend; + } + if (!use_armv7_instructions) { tcg_out_dat_imm(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); @@ -1964,22 +2018,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], arg_label(args[3])); break; - case INDEX_op_brcond2_i32: - /* The resulting conditions are: - * TCG_COND_EQ --> a0 == a2 && a1 == a3, - * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3, - * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3, - * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3), - * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), - * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3, - */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[3], const_args[3]); - tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, - args[0], args[2], const_args[2]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], - arg_label(args[5])); - break; case INDEX_op_setcond_i32: tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, args[1], args[2], const_args[2]); @@ -1988,15 +2026,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], ARITH_MOV, args[0], 0, 0); break; + + case INDEX_op_brcond2_i32: + c = tcg_out_cmp2(s, args, const_args); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5])); + break; case INDEX_op_setcond2_i32: - /* See brcond2_i32 comment */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[2], args[4], const_args[4]); - tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, - args[1], args[3], const_args[3]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], - ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], + c = tcg_out_cmp2(s, args + 1, const_args + 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], ARITH_MOV, args[0], 0, 0); break; @@ -2093,9 +2131,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) static const TCGTargetOpDef sub2 = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } }; static const TCGTargetOpDef br2 - = { .args_ct_str = { "r", "r", "rIN", "rIN" } }; + = { .args_ct_str = { "r", "r", "rI", "rI" } }; static const TCGTargetOpDef setc2 - = { .args_ct_str = { "r", "r", "r", "rIN", "rIN" } }; + = { .args_ct_str = { "r", "r", "r", "rI", "rI" } }; switch (op) { case INDEX_op_goto_ptr: diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 879885b68b..86f7de5f7e 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -222,33 +222,6 @@ static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) tcg_out32(s, insn | retrans); } -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - tcg_insn_unit *target; - tcg_insn_unit old; - - value += addend; - target = (tcg_insn_unit *)value; - - switch (type) { - case R_PPC_REL14: - reloc_pc14(code_ptr, target); - break; - case R_PPC_REL24: - reloc_pc24(code_ptr, target); - break; - case R_PPC_ADDR16: - assert(value == (int16_t)value); - old = *code_ptr; - old = deposit32(old, 0, 16, value); - *code_ptr = old; - break; - default: - tcg_abort(); - } -} - /* parse target specific constraints */ static const char *target_parse_constraint(TCGArgConstraint *ct, const char *ct_str, TCGType type) @@ -552,6 +525,43 @@ static const uint32_t tcg_to_isel[] = { [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), }; +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + tcg_insn_unit *target; + tcg_insn_unit old; + + value += addend; + target = (tcg_insn_unit *)value; + + switch (type) { + case R_PPC_REL14: + reloc_pc14(code_ptr, target); + break; + case R_PPC_REL24: + reloc_pc24(code_ptr, target); + break; + case R_PPC_ADDR16: + /* We are abusing this relocation type. This points to a pair + of insns, addis + load. If the displacement is small, we + can nop out the addis. */ + if (value == (int16_t)value) { + code_ptr[0] = NOP; + old = deposit32(code_ptr[1], 0, 16, value); + code_ptr[1] = deposit32(old, 16, 5, TCG_REG_TB); + } else { + int16_t lo = value; + int hi = value - lo; + assert(hi + lo == value); + code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); + code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); + } + break; + default: + g_assert_not_reached(); + } +} + static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, TCGReg base, tcg_target_long offset); @@ -690,7 +700,8 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, if (!in_prologue && USE_REG_TB) { new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, -(intptr_t)s->code_gen_ptr); - tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); + tcg_out32(s, ADDIS | TAI(ret, TCG_REG_TB, 0)); + tcg_out32(s, LD | TAI(ret, ret, 0)); return; } @@ -1524,16 +1535,15 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, /* Compensate for very large offsets. */ if (add_off >= 0x8000) { - /* Most target env are smaller than 32k; none are larger than 64k. - Simplify the logic here merely to offset by 0x7ff0, giving us a - range just shy of 64k. Check this assumption. */ - QEMU_BUILD_BUG_ON(offsetof(CPUArchState, - tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ff0 + 0x7fff); - tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0)); + int low = (int16_t)cmp_off; + int high = cmp_off - low; + assert((high & 0xffff) == 0); + assert(cmp_off - high == (int16_t)(cmp_off - high)); + assert(add_off - high == (int16_t)(add_off - high)); + tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, base, high >> 16)); base = TCG_REG_TMP1; - cmp_off -= 0x7ff0; - add_off -= 0x7ff0; + cmp_off -= high; + add_off -= high; } /* Extraction and shifting, part 2. */