tcg/ppc: Support 128-bit load/store
Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required. Note that these instructions do not require 16-byte alignment. Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
929124ec0b
commit
526cd4ec01
@ -14,6 +14,7 @@ C_O0_I2(r, r)
|
|||||||
C_O0_I2(r, ri)
|
C_O0_I2(r, ri)
|
||||||
C_O0_I2(v, r)
|
C_O0_I2(v, r)
|
||||||
C_O0_I3(r, r, r)
|
C_O0_I3(r, r, r)
|
||||||
|
C_O0_I3(o, m, r)
|
||||||
C_O0_I4(r, r, ri, ri)
|
C_O0_I4(r, r, ri, ri)
|
||||||
C_O0_I4(r, r, r, r)
|
C_O0_I4(r, r, r, r)
|
||||||
C_O1_I1(r, r)
|
C_O1_I1(r, r)
|
||||||
@ -34,6 +35,7 @@ C_O1_I3(v, v, v, v)
|
|||||||
C_O1_I4(r, r, ri, rZ, rZ)
|
C_O1_I4(r, r, ri, rZ, rZ)
|
||||||
C_O1_I4(r, r, r, ri, ri)
|
C_O1_I4(r, r, r, ri, ri)
|
||||||
C_O2_I1(r, r, r)
|
C_O2_I1(r, r, r)
|
||||||
|
C_O2_I1(o, m, r)
|
||||||
C_O2_I2(r, r, r, r)
|
C_O2_I2(r, r, r, r)
|
||||||
C_O2_I4(r, r, rI, rZM, r, r)
|
C_O2_I4(r, r, rI, rZM, r, r)
|
||||||
C_O2_I4(r, r, r, r, rI, rZM)
|
C_O2_I4(r, r, r, r, rI, rZM)
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
* REGS(letter, register_mask)
|
* REGS(letter, register_mask)
|
||||||
*/
|
*/
|
||||||
REGS('r', ALL_GENERAL_REGS)
|
REGS('r', ALL_GENERAL_REGS)
|
||||||
|
REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
|
||||||
REGS('v', ALL_VECTOR_REGS)
|
REGS('v', ALL_VECTOR_REGS)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -295,25 +295,27 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
|
|||||||
|
|
||||||
#define B OPCD( 18)
|
#define B OPCD( 18)
|
||||||
#define BC OPCD( 16)
|
#define BC OPCD( 16)
|
||||||
|
|
||||||
#define LBZ OPCD( 34)
|
#define LBZ OPCD( 34)
|
||||||
#define LHZ OPCD( 40)
|
#define LHZ OPCD( 40)
|
||||||
#define LHA OPCD( 42)
|
#define LHA OPCD( 42)
|
||||||
#define LWZ OPCD( 32)
|
#define LWZ OPCD( 32)
|
||||||
#define LWZUX XO31( 55)
|
#define LWZUX XO31( 55)
|
||||||
#define STB OPCD( 38)
|
|
||||||
#define STH OPCD( 44)
|
|
||||||
#define STW OPCD( 36)
|
|
||||||
|
|
||||||
#define STD XO62( 0)
|
|
||||||
#define STDU XO62( 1)
|
|
||||||
#define STDX XO31(149)
|
|
||||||
|
|
||||||
#define LD XO58( 0)
|
#define LD XO58( 0)
|
||||||
#define LDX XO31( 21)
|
#define LDX XO31( 21)
|
||||||
#define LDU XO58( 1)
|
#define LDU XO58( 1)
|
||||||
#define LDUX XO31( 53)
|
#define LDUX XO31( 53)
|
||||||
#define LWA XO58( 2)
|
#define LWA XO58( 2)
|
||||||
#define LWAX XO31(341)
|
#define LWAX XO31(341)
|
||||||
|
#define LQ OPCD( 56)
|
||||||
|
|
||||||
|
#define STB OPCD( 38)
|
||||||
|
#define STH OPCD( 44)
|
||||||
|
#define STW OPCD( 36)
|
||||||
|
#define STD XO62( 0)
|
||||||
|
#define STDU XO62( 1)
|
||||||
|
#define STDX XO31(149)
|
||||||
|
#define STQ XO62( 2)
|
||||||
|
|
||||||
#define ADDIC OPCD( 12)
|
#define ADDIC OPCD( 12)
|
||||||
#define ADDI OPCD( 14)
|
#define ADDI OPCD( 14)
|
||||||
@ -2020,7 +2022,18 @@ typedef struct {
|
|||||||
|
|
||||||
bool tcg_target_has_memory_bswap(MemOp memop)
|
bool tcg_target_has_memory_bswap(MemOp memop)
|
||||||
{
|
{
|
||||||
|
TCGAtomAlign aa;
|
||||||
|
|
||||||
|
if ((memop & MO_SIZE) <= MO_64) {
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reject 16-byte memop with 16-byte atomicity,
|
||||||
|
* but do allow a pair of 64-bit operations.
|
||||||
|
*/
|
||||||
|
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
|
||||||
|
return aa.atom <= MO_64;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2035,7 +2048,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||||||
{
|
{
|
||||||
TCGLabelQemuLdst *ldst = NULL;
|
TCGLabelQemuLdst *ldst = NULL;
|
||||||
MemOp opc = get_memop(oi);
|
MemOp opc = get_memop(oi);
|
||||||
MemOp a_bits;
|
MemOp a_bits, s_bits;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
|
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
|
||||||
@ -2047,10 +2060,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||||||
* As of 3.0, "the non-atomic access is performed as described in
|
* As of 3.0, "the non-atomic access is performed as described in
|
||||||
* the corresponding list", which matches MO_ATOM_SUBALIGN.
|
* the corresponding list", which matches MO_ATOM_SUBALIGN.
|
||||||
*/
|
*/
|
||||||
|
s_bits = opc & MO_SIZE;
|
||||||
h->aa = atom_and_align_for_opc(s, opc,
|
h->aa = atom_and_align_for_opc(s, opc,
|
||||||
have_isa_3_00 ? MO_ATOM_SUBALIGN
|
have_isa_3_00 ? MO_ATOM_SUBALIGN
|
||||||
: MO_ATOM_IFALIGN,
|
: MO_ATOM_IFALIGN,
|
||||||
false);
|
s_bits == MO_128);
|
||||||
a_bits = h->aa.align;
|
a_bits = h->aa.align;
|
||||||
|
|
||||||
#ifdef CONFIG_SOFTMMU
|
#ifdef CONFIG_SOFTMMU
|
||||||
@ -2060,7 +2074,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||||||
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
|
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
|
||||||
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
|
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
|
||||||
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
|
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
|
||||||
unsigned s_bits = opc & MO_SIZE;
|
|
||||||
|
|
||||||
ldst = new_ldst_label(s);
|
ldst = new_ldst_label(s);
|
||||||
ldst->is_ld = is_ld;
|
ldst->is_ld = is_ld;
|
||||||
@ -2303,6 +2316,60 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
||||||
|
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
|
||||||
|
{
|
||||||
|
TCGLabelQemuLdst *ldst;
|
||||||
|
HostAddress h;
|
||||||
|
bool need_bswap;
|
||||||
|
uint32_t insn;
|
||||||
|
TCGReg index;
|
||||||
|
|
||||||
|
ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
|
||||||
|
|
||||||
|
/* Compose the final address, as LQ/STQ have no indexing. */
|
||||||
|
index = h.index;
|
||||||
|
if (h.base != 0) {
|
||||||
|
index = TCG_REG_TMP1;
|
||||||
|
tcg_out32(s, ADD | TAB(index, h.base, h.index));
|
||||||
|
}
|
||||||
|
need_bswap = get_memop(oi) & MO_BSWAP;
|
||||||
|
|
||||||
|
if (h.aa.atom == MO_128) {
|
||||||
|
tcg_debug_assert(!need_bswap);
|
||||||
|
tcg_debug_assert(datalo & 1);
|
||||||
|
tcg_debug_assert(datahi == datalo - 1);
|
||||||
|
insn = is_ld ? LQ : STQ;
|
||||||
|
tcg_out32(s, insn | TAI(datahi, index, 0));
|
||||||
|
} else {
|
||||||
|
TCGReg d1, d2;
|
||||||
|
|
||||||
|
if (HOST_BIG_ENDIAN ^ need_bswap) {
|
||||||
|
d1 = datahi, d2 = datalo;
|
||||||
|
} else {
|
||||||
|
d1 = datalo, d2 = datahi;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_bswap) {
|
||||||
|
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
|
||||||
|
insn = is_ld ? LDBRX : STDBRX;
|
||||||
|
tcg_out32(s, insn | TAB(d1, 0, index));
|
||||||
|
tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
|
||||||
|
} else {
|
||||||
|
insn = is_ld ? LD : STD;
|
||||||
|
tcg_out32(s, insn | TAI(d1, index, 0));
|
||||||
|
tcg_out32(s, insn | TAI(d2, index, 8));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ldst) {
|
||||||
|
ldst->type = TCG_TYPE_I128;
|
||||||
|
ldst->datalo_reg = datalo;
|
||||||
|
ldst->datahi_reg = datahi;
|
||||||
|
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
|
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -2860,6 +2927,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||||||
args[4], TCG_TYPE_I64);
|
args[4], TCG_TYPE_I64);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case INDEX_op_qemu_ld_a32_i128:
|
||||||
|
case INDEX_op_qemu_ld_a64_i128:
|
||||||
|
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||||
|
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
|
||||||
|
break;
|
||||||
|
|
||||||
case INDEX_op_qemu_st_a64_i32:
|
case INDEX_op_qemu_st_a64_i32:
|
||||||
if (TCG_TARGET_REG_BITS == 32) {
|
if (TCG_TARGET_REG_BITS == 32) {
|
||||||
@ -2889,6 +2961,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||||||
args[4], TCG_TYPE_I64);
|
args[4], TCG_TYPE_I64);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case INDEX_op_qemu_st_a32_i128:
|
||||||
|
case INDEX_op_qemu_st_a64_i128:
|
||||||
|
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||||
|
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
|
||||||
|
break;
|
||||||
|
|
||||||
case INDEX_op_setcond_i32:
|
case INDEX_op_setcond_i32:
|
||||||
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
|
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
|
||||||
@ -3722,6 +3799,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|||||||
case INDEX_op_qemu_st_a64_i64:
|
case INDEX_op_qemu_st_a64_i64:
|
||||||
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
|
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
|
||||||
|
|
||||||
|
case INDEX_op_qemu_ld_a32_i128:
|
||||||
|
case INDEX_op_qemu_ld_a64_i128:
|
||||||
|
return C_O2_I1(o, m, r);
|
||||||
|
case INDEX_op_qemu_st_a32_i128:
|
||||||
|
case INDEX_op_qemu_st_a64_i128:
|
||||||
|
return C_O0_I3(o, m, r);
|
||||||
|
|
||||||
case INDEX_op_add_vec:
|
case INDEX_op_add_vec:
|
||||||
case INDEX_op_sub_vec:
|
case INDEX_op_sub_vec:
|
||||||
case INDEX_op_mul_vec:
|
case INDEX_op_mul_vec:
|
||||||
|
@ -149,7 +149,8 @@ extern bool have_vsx;
|
|||||||
#define TCG_TARGET_HAS_mulsh_i64 1
|
#define TCG_TARGET_HAS_mulsh_i64 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
#define TCG_TARGET_HAS_qemu_ldst_i128 \
|
||||||
|
(TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* While technically Altivec could support V64, it has no 64-bit store
|
* While technically Altivec could support V64, it has no 64-bit store
|
||||||
|
Loading…
x
Reference in New Issue
Block a user