qemu-e2k/tcg/arm/tcg-target.c

1902 lines
62 KiB
C
Raw Normal View History

/*
* Tiny Code Generator for QEMU
*
* Copyright (c) 2008 Andrzej Zaborowski
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#if defined(__ARM_ARCH_7__) || \
defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7EM__) || \
defined(__ARM_ARCH_7M__) || \
defined(__ARM_ARCH_7R__)
#define USE_ARMV7_INSTRUCTIONS
#endif
#if defined(USE_ARMV7_INSTRUCTIONS) || \
defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__) || \
defined(__ARM_ARCH_6T2__) || \
defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__)
#define USE_ARMV6_INSTRUCTIONS
#endif
#if defined(USE_ARMV6_INSTRUCTIONS) || \
defined(__ARM_ARCH_5T__) || \
defined(__ARM_ARCH_5TE__) || \
defined(__ARM_ARCH_5TEJ__)
#define USE_ARMV5_INSTRUCTIONS
#endif
#ifdef USE_ARMV5_INSTRUCTIONS
static const int use_armv5_instructions = 1;
#else
static const int use_armv5_instructions = 0;
#endif
#undef USE_ARMV5_INSTRUCTIONS
#ifdef USE_ARMV6_INSTRUCTIONS
static const int use_armv6_instructions = 1;
#else
static const int use_armv6_instructions = 0;
#endif
#undef USE_ARMV6_INSTRUCTIONS
#ifdef USE_ARMV7_INSTRUCTIONS
static const int use_armv7_instructions = 1;
#else
static const int use_armv7_instructions = 0;
#endif
#undef USE_ARMV7_INSTRUCTIONS
#ifndef NDEBUG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
"%r0",
"%r1",
"%r2",
"%r3",
"%r4",
"%r5",
"%r6",
"%r7",
"%r8",
"%r9",
"%r10",
"%r11",
"%r12",
"%r13",
"%r14",
"%pc",
};
#endif
static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R4,
TCG_REG_R5,
TCG_REG_R6,
TCG_REG_R7,
TCG_REG_R8,
TCG_REG_R9,
TCG_REG_R10,
TCG_REG_R11,
TCG_REG_R13,
TCG_REG_R0,
TCG_REG_R1,
TCG_REG_R2,
TCG_REG_R3,
TCG_REG_R12,
TCG_REG_R14,
};
static const int tcg_target_call_iarg_regs[4] = {
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
};
static const int tcg_target_call_oarg_regs[2] = {
TCG_REG_R0, TCG_REG_R1
};
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
static inline void reloc_abs32(void *code_ptr, tcg_target_long target)
{
*(uint32_t *) code_ptr = target;
}
static inline void reloc_pc24(void *code_ptr, tcg_target_long target)
{
uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2);
*(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff)
| (offset & 0xffffff);
}
static void patch_reloc(uint8_t *code_ptr, int type,
tcg_target_long value, tcg_target_long addend)
{
switch (type) {
case R_ARM_ABS32:
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
reloc_abs32(code_ptr, value);
break;
case R_ARM_CALL:
case R_ARM_JUMP24:
default:
tcg_abort();
case R_ARM_PC24:
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
reloc_pc24(code_ptr, value);
break;
}
}
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
const char *ct_str;
ct_str = *pct_str;
switch (ct_str[0]) {
case 'I':
ct->ct |= TCG_CT_CONST_ARM;
break;
case 'r':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
break;
/* qemu_ld address */
case 'l':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
#ifdef CONFIG_SOFTMMU
/* r0 and r1 will be overwritten when reading the tlb entry,
so don't use these. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
#if TARGET_LONG_BITS == 64
/* If we're passing env to the helper as r0 and need a regpair
* for the address then r2 will be overwritten as we're setting
* up the args to the helper.
*/
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
#endif
#endif
break;
case 'L':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
#ifdef CONFIG_SOFTMMU
/* r1 is still needed to load data_reg or data_reg2,
so don't use it. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
#endif
break;
/* qemu_st address & data_reg */
case 's':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
/* r0 and r1 will be overwritten when reading the tlb entry
(softmmu only) and doing the byte swapping, so don't
use these. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
#if defined(CONFIG_SOFTMMU) && (TARGET_LONG_BITS == 64)
/* Avoid clashes with registers being used for helper args */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
#endif
break;
/* qemu_st64 data_reg2 */
case 'S':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
/* r0 and r1 will be overwritten when reading the tlb entry
(softmmu only) and doing the byte swapping, so don't
use these. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
#ifdef CONFIG_SOFTMMU
/* r2 is still needed to load data_reg, so don't use it. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
#if TARGET_LONG_BITS == 64
/* Avoid clashes with registers being used for helper args */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
#endif
#endif
break;
default:
return -1;
}
ct_str++;
*pct_str = ct_str;
return 0;
}
static inline uint32_t rotl(uint32_t val, int n)
{
return (val << n) | (val >> (32 - n));
}
/* ARM immediates for ALU instructions are made of an unsigned 8-bit
right-rotated by an even amount between 0 and 30. */
static inline int encode_imm(uint32_t imm)
{
int shift;
/* simple case, only lower bits */
if ((imm & ~0xff) == 0)
return 0;
/* then try a simple even shift */
shift = ctz32(imm) & ~1;
if (((imm >> shift) & ~0xff) == 0)
return 32 - shift;
/* now try harder with rotations */
if ((rotl(imm, 2) & ~0xff) == 0)
return 2;
if ((rotl(imm, 4) & ~0xff) == 0)
return 4;
if ((rotl(imm, 6) & ~0xff) == 0)
return 6;
/* imm can't be encoded */
return -1;
}
static inline int check_fit_imm(uint32_t imm)
{
return encode_imm(imm) >= 0;
}
/* Test if a constant matches the constraint.
* TODO: define constraints for:
*
* ldr/str offset: between -0xfff and 0xfff
* ldrh/strh offset: between -0xff and 0xff
* mov operand2: values represented with x << (2 * y), x < 0x100
* add, sub, eor...: ditto
*/
static inline int tcg_target_const_match(tcg_target_long val,
const TCGArgConstraint *arg_ct)
{
int ct;
ct = arg_ct->ct;
if (ct & TCG_CT_CONST)
return 1;
else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val))
return 1;
else
return 0;
}
enum arm_data_opc_e {
ARITH_AND = 0x0,
ARITH_EOR = 0x1,
ARITH_SUB = 0x2,
ARITH_RSB = 0x3,
ARITH_ADD = 0x4,
ARITH_ADC = 0x5,
ARITH_SBC = 0x6,
ARITH_RSC = 0x7,
ARITH_TST = 0x8,
ARITH_CMP = 0xa,
ARITH_CMN = 0xb,
ARITH_ORR = 0xc,
ARITH_MOV = 0xd,
ARITH_BIC = 0xe,
ARITH_MVN = 0xf,
};
#define TO_CPSR(opc) \
((opc == ARITH_CMP || opc == ARITH_CMN || opc == ARITH_TST) << 20)
#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
#define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
#define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
#define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
#define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
#define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
#define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
#define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
enum arm_cond_code_e {
COND_EQ = 0x0,
COND_NE = 0x1,
COND_CS = 0x2, /* Unsigned greater or equal */
COND_CC = 0x3, /* Unsigned less than */
COND_MI = 0x4, /* Negative */
COND_PL = 0x5, /* Zero or greater */
COND_VS = 0x6, /* Overflow */
COND_VC = 0x7, /* No overflow */
COND_HI = 0x8, /* Unsigned greater than */
COND_LS = 0x9, /* Unsigned less or equal */
COND_GE = 0xa,
COND_LT = 0xb,
COND_GT = 0xc,
COND_LE = 0xd,
COND_AL = 0xe,
};
static const uint8_t tcg_cond_to_arm_cond[] = {
[TCG_COND_EQ] = COND_EQ,
[TCG_COND_NE] = COND_NE,
[TCG_COND_LT] = COND_LT,
[TCG_COND_GE] = COND_GE,
[TCG_COND_LE] = COND_LE,
[TCG_COND_GT] = COND_GT,
/* unsigned */
[TCG_COND_LTU] = COND_CC,
[TCG_COND_GEU] = COND_CS,
[TCG_COND_LEU] = COND_LS,
[TCG_COND_GTU] = COND_HI,
};
static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
{
tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
}
static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
{
tcg_out32(s, (cond << 28) | 0x0a000000 |
(((offset - 8) >> 2) & 0x00ffffff));
}
static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
{
/* We pay attention here to not modify the branch target by skipping
the corresponding bytes. This ensure that caches and memory are
kept coherent during retranslation. */
#ifdef HOST_WORDS_BIGENDIAN
tcg_out8(s, (cond << 4) | 0x0a);
s->code_ptr += 3;
#else
s->code_ptr += 3;
tcg_out8(s, (cond << 4) | 0x0a);
#endif
}
static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
{
tcg_out32(s, (cond << 28) | 0x0b000000 |
(((offset - 8) >> 2) & 0x00ffffff));
}
static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
{
tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
}
static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
{
tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
(((offset - 8) >> 2) & 0x00ffffff));
}
static inline void tcg_out_dat_reg(TCGContext *s,
int cond, int opc, int rd, int rn, int rm, int shift)
{
tcg_out32(s, (cond << 28) | (0 << 25) | (opc << 21) | TO_CPSR(opc) |
(rn << 16) | (rd << 12) | shift | rm);
}
static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
{
/* Simple reg-reg move, optimising out the 'do nothing' case */
if (rd != rm) {
tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
}
}
static inline void tcg_out_dat_reg2(TCGContext *s,
int cond, int opc0, int opc1, int rd0, int rd1,
int rn0, int rn1, int rm0, int rm1, int shift)
{
if (rd0 == rn1 || rd0 == rm1) {
tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) |
(rn0 << 16) | (8 << 12) | shift | rm0);
tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) |
(rn1 << 16) | (rd1 << 12) | shift | rm1);
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd0, 0, TCG_REG_R8, SHIFT_IMM_LSL(0));
} else {
tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) |
(rn0 << 16) | (rd0 << 12) | shift | rm0);
tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) |
(rn1 << 16) | (rd1 << 12) | shift | rm1);
}
}
static inline void tcg_out_dat_imm(TCGContext *s,
int cond, int opc, int rd, int rn, int im)
{
tcg_out32(s, (cond << 28) | (1 << 25) | (opc << 21) | TO_CPSR(opc) |
(rn << 16) | (rd << 12) | im);
}
static inline void tcg_out_movi32(TCGContext *s,
int cond, int rd, uint32_t arg)
{
/* TODO: This is very suboptimal, we can easily have a constant
* pool somewhere after all the instructions. */
if ((int)arg < 0 && (int)arg >= -0x100) {
tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff);
} else if (use_armv7_instructions) {
/* use movw/movt */
/* movw */
tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
| ((arg << 4) & 0x000f0000) | (arg & 0xfff));
if (arg & 0xffff0000) {
/* movt */
tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
| ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
}
} else {
int opc = ARITH_MOV;
int rn = 0;
do {
int i, rot;
i = ctz32(arg) & ~1;
rot = ((32 - i) << 7) & 0xf00;
tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
arg &= ~(0xff << i);
opc = ARITH_ORR;
rn = rd;
} while (arg);
}
}
static inline void tcg_out_mul32(TCGContext *s,
int cond, int rd, int rs, int rm)
{
if (rd != rm)
tcg_out32(s, (cond << 28) | (rd << 16) | (0 << 12) |
(rs << 8) | 0x90 | rm);
else if (rd != rs)
tcg_out32(s, (cond << 28) | (rd << 16) | (0 << 12) |
(rm << 8) | 0x90 | rs);
else {
tcg_out32(s, (cond << 28) | ( 8 << 16) | (0 << 12) |
(rs << 8) | 0x90 | rm);
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, TCG_REG_R8, SHIFT_IMM_LSL(0));
}
}
static inline void tcg_out_umull32(TCGContext *s,
int cond, int rd0, int rd1, int rs, int rm)
{
if (rd0 != rm && rd1 != rm)
tcg_out32(s, (cond << 28) | 0x800090 |
(rd1 << 16) | (rd0 << 12) | (rs << 8) | rm);
else if (rd0 != rs && rd1 != rs)
tcg_out32(s, (cond << 28) | 0x800090 |
(rd1 << 16) | (rd0 << 12) | (rm << 8) | rs);
else {
tcg_out_dat_reg(s, cond, ARITH_MOV,
TCG_REG_R8, 0, rm, SHIFT_IMM_LSL(0));
tcg_out32(s, (cond << 28) | 0x800098 |
(rd1 << 16) | (rd0 << 12) | (rs << 8));
}
}
static inline void tcg_out_smull32(TCGContext *s,
int cond, int rd0, int rd1, int rs, int rm)
{
if (rd0 != rm && rd1 != rm)
tcg_out32(s, (cond << 28) | 0xc00090 |
(rd1 << 16) | (rd0 << 12) | (rs << 8) | rm);
else if (rd0 != rs && rd1 != rs)
tcg_out32(s, (cond << 28) | 0xc00090 |
(rd1 << 16) | (rd0 << 12) | (rm << 8) | rs);
else {
tcg_out_dat_reg(s, cond, ARITH_MOV,
TCG_REG_R8, 0, rm, SHIFT_IMM_LSL(0));
tcg_out32(s, (cond << 28) | 0xc00098 |
(rd1 << 16) | (rd0 << 12) | (rs << 8));
}
}
static inline void tcg_out_ext8s(TCGContext *s, int cond,
int rd, int rn)
{
if (use_armv6_instructions) {
/* sxtb */
tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
} else {
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, rn, SHIFT_IMM_LSL(24));
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, rd, SHIFT_IMM_ASR(24));
}
}
static inline void tcg_out_ext8u(TCGContext *s, int cond,
int rd, int rn)
{
tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
}
static inline void tcg_out_ext16s(TCGContext *s, int cond,
int rd, int rn)
{
if (use_armv6_instructions) {
/* sxth */
tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
} else {
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, rn, SHIFT_IMM_LSL(16));
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, rd, SHIFT_IMM_ASR(16));
}
}
static inline void tcg_out_ext16u(TCGContext *s, int cond,
int rd, int rn)
{
if (use_armv6_instructions) {
/* uxth */
tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
} else {
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, rn, SHIFT_IMM_LSL(16));
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, rd, SHIFT_IMM_LSR(16));
}
}
static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
{
if (use_armv6_instructions) {
/* revsh */
tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
} else {
tcg_out_dat_reg(s, cond, ARITH_MOV,
TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24));
tcg_out_dat_reg(s, cond, ARITH_MOV,
TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_ASR(16));
tcg_out_dat_reg(s, cond, ARITH_ORR,
rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8));
}
}
static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
{
if (use_armv6_instructions) {
/* rev16 */
tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
} else {
tcg_out_dat_reg(s, cond, ARITH_MOV,
TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24));
tcg_out_dat_reg(s, cond, ARITH_MOV,
TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_LSR(16));
tcg_out_dat_reg(s, cond, ARITH_ORR,
rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8));
}
}
static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
{
if (use_armv6_instructions) {
/* rev */
tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
} else {
tcg_out_dat_reg(s, cond, ARITH_EOR,
TCG_REG_R8, rn, rn, SHIFT_IMM_ROR(16));
tcg_out_dat_imm(s, cond, ARITH_BIC,
TCG_REG_R8, TCG_REG_R8, 0xff | 0x800);
tcg_out_dat_reg(s, cond, ARITH_MOV,
rd, 0, rn, SHIFT_IMM_ROR(8));
tcg_out_dat_reg(s, cond, ARITH_EOR,
rd, rd, TCG_REG_R8, SHIFT_IMM_LSR(8));
}
}
static inline void tcg_out_ld32_12(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x05900000 |
(rn << 16) | (rd << 12) | (im & 0xfff));
else
tcg_out32(s, (cond << 28) | 0x05100000 |
(rn << 16) | (rd << 12) | ((-im) & 0xfff));
}
static inline void tcg_out_st32_12(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x05800000 |
(rn << 16) | (rd << 12) | (im & 0xfff));
else
tcg_out32(s, (cond << 28) | 0x05000000 |
(rn << 16) | (rd << 12) | ((-im) & 0xfff));
}
static inline void tcg_out_ld32_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x07900000 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_st32_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x07800000 |
(rn << 16) | (rd << 12) | rm);
}
/* Register pre-increment with base writeback. */
static inline void tcg_out_ld32_rwb(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x07b00000 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_st32_rwb(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x07a00000 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_ld16u_8(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x01d000b0 |
(rn << 16) | (rd << 12) |
((im & 0xf0) << 4) | (im & 0xf));
else
tcg_out32(s, (cond << 28) | 0x015000b0 |
(rn << 16) | (rd << 12) |
(((-im) & 0xf0) << 4) | ((-im) & 0xf));
}
static inline void tcg_out_st16_8(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x01c000b0 |
(rn << 16) | (rd << 12) |
((im & 0xf0) << 4) | (im & 0xf));
else
tcg_out32(s, (cond << 28) | 0x014000b0 |
(rn << 16) | (rd << 12) |
(((-im) & 0xf0) << 4) | ((-im) & 0xf));
}
static inline void tcg_out_ld16u_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x019000b0 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_st16_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x018000b0 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_ld16s_8(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x01d000f0 |
(rn << 16) | (rd << 12) |
((im & 0xf0) << 4) | (im & 0xf));
else
tcg_out32(s, (cond << 28) | 0x015000f0 |
(rn << 16) | (rd << 12) |
(((-im) & 0xf0) << 4) | ((-im) & 0xf));
}
static inline void tcg_out_ld16s_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x019000f0 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_ld8_12(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x05d00000 |
(rn << 16) | (rd << 12) | (im & 0xfff));
else
tcg_out32(s, (cond << 28) | 0x05500000 |
(rn << 16) | (rd << 12) | ((-im) & 0xfff));
}
static inline void tcg_out_st8_12(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x05c00000 |
(rn << 16) | (rd << 12) | (im & 0xfff));
else
tcg_out32(s, (cond << 28) | 0x05400000 |
(rn << 16) | (rd << 12) | ((-im) & 0xfff));
}
static inline void tcg_out_ld8_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x07d00000 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_st8_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x07c00000 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_ld8s_8(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
if (im >= 0)
tcg_out32(s, (cond << 28) | 0x01d000d0 |
(rn << 16) | (rd << 12) |
((im & 0xf0) << 4) | (im & 0xf));
else
tcg_out32(s, (cond << 28) | 0x015000d0 |
(rn << 16) | (rd << 12) |
(((-im) & 0xf0) << 4) | ((-im) & 0xf));
}
static inline void tcg_out_ld8s_r(TCGContext *s, int cond,
int rd, int rn, int rm)
{
tcg_out32(s, (cond << 28) | 0x019000d0 |
(rn << 16) | (rd << 12) | rm);
}
static inline void tcg_out_ld32u(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xfff || offset < -0xfff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_ld32_12(s, cond, rd, rn, offset);
}
static inline void tcg_out_st32(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xfff || offset < -0xfff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_st32_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_st32_12(s, cond, rd, rn, offset);
}
static inline void tcg_out_ld16u(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xff || offset < -0xff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_ld16u_8(s, cond, rd, rn, offset);
}
static inline void tcg_out_ld16s(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xff || offset < -0xff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_ld16s_8(s, cond, rd, rn, offset);
}
static inline void tcg_out_st16(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xff || offset < -0xff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_st16_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_st16_8(s, cond, rd, rn, offset);
}
static inline void tcg_out_ld8u(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xfff || offset < -0xfff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_ld8_12(s, cond, rd, rn, offset);
}
static inline void tcg_out_ld8s(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xff || offset < -0xff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_ld8s_8(s, cond, rd, rn, offset);
}
static inline void tcg_out_st8(TCGContext *s, int cond,
int rd, int rn, int32_t offset)
{
if (offset > 0xfff || offset < -0xfff) {
tcg_out_movi32(s, cond, TCG_REG_R8, offset);
tcg_out_st8_r(s, cond, rd, rn, TCG_REG_R8);
} else
tcg_out_st8_12(s, cond, rd, rn, offset);
}
/* The _goto case is normally between TBs within the same code buffer,
* and with the code buffer limited to 16MB we shouldn't need the long
* case.
*
* .... except to the prologue that is in its own buffer.
*/
static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
{
int32_t val;
if (addr & 1) {
/* goto to a Thumb destination isn't supported */
tcg_abort();
}
val = addr - (tcg_target_long) s->code_ptr;
if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd)
tcg_out_b(s, cond, val);
else {
if (cond == COND_AL) {
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
tcg_out32(s, addr);
} else {
tcg_out_movi32(s, cond, TCG_REG_R8, val - 8);
tcg_out_dat_reg(s, cond, ARITH_ADD,
TCG_REG_PC, TCG_REG_PC,
TCG_REG_R8, SHIFT_IMM_LSL(0));
}
}
}
/* The call case is mostly used for helpers - so it's not unreasonable
* for them to be beyond branch range */
static inline void tcg_out_call(TCGContext *s, uint32_t addr)
{
int32_t val;
val = addr - (tcg_target_long) s->code_ptr;
if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) {
if (addr & 1) {
/* Use BLX if the target is in Thumb mode */
if (!use_armv5_instructions) {
tcg_abort();
}
tcg_out_blx_imm(s, val);
} else {
tcg_out_bl(s, COND_AL, val);
}
} else {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
tcg_out32(s, addr);
}
}
static inline void tcg_out_callr(TCGContext *s, int cond, int arg)
{
if (use_armv5_instructions) {
tcg_out_blx(s, cond, arg);
} else {
tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
TCG_REG_PC, SHIFT_IMM_LSL(0));
tcg_out_bx(s, cond, arg);
}
}
static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
{
TCGLabel *l = &s->labels[label_index];
if (l->has_value)
tcg_out_goto(s, cond, l->u.value);
else if (cond == COND_AL) {
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
tcg_out_reloc(s, s->code_ptr, R_ARM_ABS32, label_index, 31337);
s->code_ptr += 4;
} else {
/* Probably this should be preferred even for COND_AL... */
tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337);
tcg_out_b_noaddr(s, cond);
}
}
#ifdef CONFIG_SOFTMMU
#include "../../softmmu_defs.h"
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
helper_ldb_mmu,
helper_ldw_mmu,
helper_ldl_mmu,
helper_ldq_mmu,
};
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
uintxx_t val, int mmu_idx) */
static const void * const qemu_st_helpers[4] = {
helper_stb_mmu,
helper_stw_mmu,
helper_stl_mmu,
helper_stq_mmu,
};
/* Helper routines for marshalling helper function arguments into
* the correct registers and stack.
* argreg is where we want to put this argument, arg is the argument itself.
* Return value is the updated argreg ready for the next call.
* Note that argreg 0..3 is real registers, 4+ on stack.
* When we reach the first stacked argument, we allocate space for it
* and the following stacked arguments using "str r8, [sp, #-0x10]!".
* Following arguments are filled in with "str r8, [sp, #0xNN]".
* For more than 4 stacked arguments we'd need to know how much
* space to allocate when we pushed the first stacked argument.
* We don't need this, so don't implement it (and will assert if you try it.)
*
* We provide routines for arguments which are: immediate, 32 bit
* value in register, 16 and 8 bit values in register (which must be zero
* extended before use) and 64 bit value in a lo:hi register pair.
*/
#define DEFINE_TCG_OUT_ARG(NAME, ARGPARAM) \
static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGPARAM) \
{ \
if (argreg < 4) { \
TCG_OUT_ARG_GET_ARG(argreg); \
} else if (argreg == 4) { \
TCG_OUT_ARG_GET_ARG(TCG_REG_R8); \
tcg_out32(s, (COND_AL << 28) | 0x052d8010); \
} else { \
assert(argreg < 8); \
TCG_OUT_ARG_GET_ARG(TCG_REG_R8); \
tcg_out32(s, (COND_AL << 28) | 0x058d8000 | (argreg - 4) * 4); \
} \
return argreg + 1; \
}
#define TCG_OUT_ARG_GET_ARG(A) tcg_out_dat_imm(s, COND_AL, ARITH_MOV, A, 0, arg)
DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t arg)
#undef TCG_OUT_ARG_GET_ARG
#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext8u(s, COND_AL, A, arg)
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg arg)
#undef TCG_OUT_ARG_GET_ARG
#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext16u(s, COND_AL, A, arg)
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg arg)
#undef TCG_OUT_ARG_GET_ARG
/* We don't use the macro for this one to avoid an unnecessary reg-reg
* move when storing to the stack.
*/
static TCGReg tcg_out_arg_reg32(TCGContext *s, TCGReg argreg, TCGReg arg)
{
if (argreg < 4) {
tcg_out_mov_reg(s, COND_AL, argreg, arg);
} else if (argreg == 4) {
/* str arg, [sp, #-0x10]! */
tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (arg << 12));
} else {
assert(argreg < 8);
/* str arg, [sp, #0xNN] */
tcg_out32(s, (COND_AL << 28) | 0x058d0000 |
(arg << 12) | (argreg - 4) * 4);
}
return argreg + 1;
}
static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
TCGReg arglo, TCGReg arghi)
{
/* 64 bit arguments must go in even/odd register pairs
* and in 8-aligned stack slots.
*/
if (argreg & 1) {
argreg++;
}
argreg = tcg_out_arg_reg32(s, argreg, arglo);
argreg = tcg_out_arg_reg32(s, argreg, arghi);
return argreg;
}
static inline void tcg_out_arg_stacktidy(TCGContext *s, TCGReg argreg)
{
/* Output any necessary post-call cleanup of the stack */
if (argreg > 4) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
}
}
#endif
#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, data_reg, data_reg2, bswap;
#ifdef CONFIG_SOFTMMU
int mem_index, s_bits;
TCGReg argreg;
# if TARGET_LONG_BITS == 64
int addr_reg2;
# endif
uint32_t *label_ptr;
#endif
#ifdef TARGET_WORDS_BIGENDIAN
bswap = 1;
#else
bswap = 0;
#endif
data_reg = *args++;
if (opc == 3)
data_reg2 = *args++;
else
data_reg2 = 0; /* suppress warning */
addr_reg = *args++;
#ifdef CONFIG_SOFTMMU
# if TARGET_LONG_BITS == 64
addr_reg2 = *args++;
# endif
mem_index = *args;
s_bits = opc & 3;
/* Should generate something like the following:
* shr r8, addr_reg, #TARGET_PAGE_BITS
* and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8
* add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS
*/
# if CPU_TLB_BITS > 8
# error
# endif
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8,
0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
tcg_out_dat_imm(s, COND_AL, ARITH_AND,
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
/* In the
* ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
* below, the offset is likely to exceed 12 bits if mem_index != 0 and
* not exceed otherwise, so use an
* add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
* before.
*/
if (mem_index)
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
(mem_index << (TLB_SHIFT & 1)) |
((16 - (TLB_SHIFT >> 1)) << 8));
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
offsetof(CPUArchState, tlb_table[0][0].addr_read));
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
/* Check alignment. */
if (s_bits)
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
0, addr_reg, (1 << s_bits) - 1);
# if TARGET_LONG_BITS == 64
/* XXX: possibly we could use a block data load or writeback in
* the first access. */
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
# endif
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
offsetof(CPUArchState, tlb_table[0][0].addend));
switch (opc) {
case 0:
tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
break;
case 0 | 4:
tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
break;
case 1:
tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
if (bswap) {
tcg_out_bswap16(s, COND_EQ, data_reg, data_reg);
}
break;
case 1 | 4:
if (bswap) {
tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg);
} else {
tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
}
break;
case 2:
default:
tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
if (bswap) {
tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
}
break;
case 3:
if (bswap) {
tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg);
tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4);
tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2);
tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
} else {
tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
}
break;
}
label_ptr = (void *) s->code_ptr;
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
tcg_out_b_noaddr(s, COND_EQ);
/* TODO: move this code to where the constants pool will be */
/* Note that this code relies on the constraints we set in arm_op_defs[]
* to ensure that later arguments are not passed to us in registers we
* trash by moving the earlier arguments into them.
*/
argreg = TCG_REG_R0;
argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
#if TARGET_LONG_BITS == 64
argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
#else
argreg = tcg_out_arg_reg32(s, argreg, addr_reg);
#endif
argreg = tcg_out_arg_imm32(s, argreg, mem_index);
tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
tcg_out_arg_stacktidy(s, argreg);
switch (opc) {
case 0 | 4:
tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
break;
case 1 | 4:
tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
break;
case 0:
case 1:
case 2:
default:
tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
break;
case 3:
tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
break;
}
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
#else /* !CONFIG_SOFTMMU */
if (GUEST_BASE) {
uint32_t offset = GUEST_BASE;
int i;
int rot;
while (offset) {
i = ctz32(offset) & ~1;
rot = ((32 - i) << 7) & 0xf00;
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg,
((offset >> i) & 0xff) | rot);
addr_reg = TCG_REG_R8;
offset &= ~(0xff << i);
}
}
switch (opc) {
case 0:
tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0);
break;
case 0 | 4:
tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0);
break;
case 1:
tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
if (bswap) {
tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
}
break;
case 1 | 4:
if (bswap) {
tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
} else {
tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
}
break;
case 2:
default:
tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
if (bswap) {
tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
}
break;
case 3:
/* TODO: use block load -
* check that data_reg2 > data_reg or the other way */
if (data_reg == addr_reg) {
tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
} else {
tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
}
if (bswap) {
tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
}
break;
}
#endif
}
static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, data_reg, data_reg2, bswap;
#ifdef CONFIG_SOFTMMU
int mem_index, s_bits;
TCGReg argreg;
# if TARGET_LONG_BITS == 64
int addr_reg2;
# endif
uint32_t *label_ptr;
#endif
#ifdef TARGET_WORDS_BIGENDIAN
bswap = 1;
#else
bswap = 0;
#endif
data_reg = *args++;
if (opc == 3)
data_reg2 = *args++;
else
data_reg2 = 0; /* suppress warning */
addr_reg = *args++;
#ifdef CONFIG_SOFTMMU
# if TARGET_LONG_BITS == 64
addr_reg2 = *args++;
# endif
mem_index = *args;
s_bits = opc & 3;
/* Should generate something like the following:
* shr r8, addr_reg, #TARGET_PAGE_BITS
* and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8
* add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS
*/
tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
TCG_REG_R8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
tcg_out_dat_imm(s, COND_AL, ARITH_AND,
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
/* In the
* ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))]
* below, the offset is likely to exceed 12 bits if mem_index != 0 and
* not exceed otherwise, so use an
* add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
* before.
*/
if (mem_index)
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
(mem_index << (TLB_SHIFT & 1)) |
((16 - (TLB_SHIFT >> 1)) << 8));
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
offsetof(CPUArchState, tlb_table[0][0].addr_write));
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
/* Check alignment. */
if (s_bits)
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
0, addr_reg, (1 << s_bits) - 1);
# if TARGET_LONG_BITS == 64
/* XXX: possibly we could use a block data load or writeback in
* the first access. */
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
# endif
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
offsetof(CPUArchState, tlb_table[0][0].addend));
switch (opc) {
case 0:
tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
break;
case 1:
if (bswap) {
tcg_out_bswap16(s, COND_EQ, TCG_REG_R0, data_reg);
tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
} else {
tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
}
break;
case 2:
default:
if (bswap) {
tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
} else {
tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
}
break;
case 3:
if (bswap) {
tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2);
tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg);
tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4);
} else {
tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
}
break;
}
label_ptr = (void *) s->code_ptr;
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
tcg_out_b_noaddr(s, COND_EQ);
/* TODO: move this code to where the constants pool will be */
/* Note that this code relies on the constraints we set in arm_op_defs[]
* to ensure that later arguments are not passed to us in registers we
* trash by moving the earlier arguments into them.
*/
argreg = TCG_REG_R0;
argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
#if TARGET_LONG_BITS == 64
argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
#else
argreg = tcg_out_arg_reg32(s, argreg, addr_reg);
#endif
switch (opc) {
case 0:
argreg = tcg_out_arg_reg8(s, argreg, data_reg);
break;
case 1:
argreg = tcg_out_arg_reg16(s, argreg, data_reg);
break;
case 2:
argreg = tcg_out_arg_reg32(s, argreg, data_reg);
break;
case 3:
argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2);
break;
}
argreg = tcg_out_arg_imm32(s, argreg, mem_index);
tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
tcg_out_arg_stacktidy(s, argreg);
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
#else /* !CONFIG_SOFTMMU */
if (GUEST_BASE) {
uint32_t offset = GUEST_BASE;
int i;
int rot;
while (offset) {
i = ctz32(offset) & ~1;
rot = ((32 - i) << 7) & 0xf00;
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R1, addr_reg,
((offset >> i) & 0xff) | rot);
addr_reg = TCG_REG_R1;
offset &= ~(0xff << i);
}
}
switch (opc) {
case 0:
tcg_out_st8_12(s, COND_AL, data_reg, addr_reg, 0);
break;
case 1:
if (bswap) {
tcg_out_bswap16(s, COND_AL, TCG_REG_R0, data_reg);
tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0);
} else {
tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0);
}
break;
case 2:
default:
if (bswap) {
tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
} else {
tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
}
break;
case 3:
/* TODO: use block store -
* check that data_reg2 > data_reg or the other way */
if (bswap) {
tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2);
tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 4);
} else {
tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4);
}
break;
}
#endif
}
static uint8_t *tb_ret_addr;
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
int c;
switch (opc) {
case INDEX_op_exit_tb:
{
uint8_t *ld_ptr = s->code_ptr;
if (args[0] >> 8)
tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
else
tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]);
tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
if (args[0] >> 8) {
*ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8;
tcg_out32(s, args[0]);
}
}
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_offset) {
/* Direct jump method */
#if defined(USE_DIRECT_JUMP)
s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
tcg/arm: fix branch target change during code retranslation QEMU uses code retranslation to restore the CPU state when an exception happens. For it to work the retranslation must not modify the generated code. This is what is currently implemented in ARM TCG. However on CPU that don't have icache/dcache/memory synchronised like ARM, this requirement is stronger and code retranslation must not modify the generated code "atomically", as the cache line might be flushed at any moment (interrupt, exception, task switching), even if not triggered by QEMU. The probability for this to happen is very low, and depends on cache size and associativiy, machine load, interrupts, so the symptoms are might happen randomly. This requirement is currently not followed in tcg/arm, for the load/store code, which basically has the following structure: 1) tlb access code is written 2) conditional fast path code is written 3) branch is written with a temporary target 4) slow path code is written 5) branch target is updated The cache lines corresponding to the retranslated code is not flushed after code retranslation as the generated code is supposed to be the same. However if the cache line corresponding to the branch instruction is flushed between step 3 and 5, and is not flushed again before the code is executed again, the branch target is wrong. In the guest, the symptoms are MMU page fault at a random addresses, which leads to kernel page fault or segmentation faults. The patch fixes this issue by avoiding writing the branch target until it is known, that is by writing only the branch instruction first, and later only the offset. This fixes booting linux guests on ARM hosts (tested: arm, i386, mips, mipsel, sh4, sparc). Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
2011-01-06 22:43:13 +01:00
tcg_out_b_noaddr(s, COND_AL);
#else
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
tcg_out32(s, 0);
#endif
} else {
/* Indirect jump method */
#if 1
c = (int) (s->tb_next + args[0]) - ((int) s->code_ptr + 8);
if (c > 0xfff || c < -0xfff) {
tcg_out_movi32(s, COND_AL, TCG_REG_R0,
(tcg_target_long) (s->tb_next + args[0]));
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
} else
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c);
#else
tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
tcg_out32(s, (tcg_target_long) (s->tb_next + args[0]));
#endif
}
s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
break;
case INDEX_op_call:
if (const_args[0])
tcg_out_call(s, args[0]);
else
tcg_out_callr(s, COND_AL, args[0]);
break;
case INDEX_op_br:
tcg_out_goto_label(s, COND_AL, args[0]);
break;
case INDEX_op_ld8u_i32:
tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_ld8s_i32:
tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_ld16u_i32:
tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_ld16s_i32:
tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_ld_i32:
tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_st8_i32:
tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_st16_i32:
tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_st_i32:
tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_mov_i32:
tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
args[0], 0, args[1], SHIFT_IMM_LSL(0));
break;
case INDEX_op_movi_i32:
tcg_out_movi32(s, COND_AL, args[0], args[1]);
break;
case INDEX_op_add_i32:
c = ARITH_ADD;
goto gen_arith;
case INDEX_op_sub_i32:
c = ARITH_SUB;
goto gen_arith;
case INDEX_op_and_i32:
c = ARITH_AND;
goto gen_arith;
case INDEX_op_andc_i32:
c = ARITH_BIC;
goto gen_arith;
case INDEX_op_or_i32:
c = ARITH_ORR;
goto gen_arith;
case INDEX_op_xor_i32:
c = ARITH_EOR;
/* Fall through. */
gen_arith:
if (const_args[2]) {
int rot;
rot = encode_imm(args[2]);
tcg_out_dat_imm(s, COND_AL, c,
args[0], args[1], rotl(args[2], rot) | (rot << 7));
} else
tcg_out_dat_reg(s, COND_AL, c,
args[0], args[1], args[2], SHIFT_IMM_LSL(0));
break;
case INDEX_op_add2_i32:
tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC,
args[0], args[1], args[2], args[3],
args[4], args[5], SHIFT_IMM_LSL(0));
break;
case INDEX_op_sub2_i32:
tcg_out_dat_reg2(s, COND_AL, ARITH_SUB, ARITH_SBC,
args[0], args[1], args[2], args[3],
args[4], args[5], SHIFT_IMM_LSL(0));
break;
case INDEX_op_neg_i32:
tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
break;
case INDEX_op_not_i32:
tcg_out_dat_reg(s, COND_AL,
ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
break;
case INDEX_op_mul_i32:
tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
break;
case INDEX_op_mulu2_i32:
tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
break;
/* XXX: Perhaps args[2] & 0x1f is wrong */
case INDEX_op_shl_i32:
c = const_args[2] ?
SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
goto gen_shift32;
case INDEX_op_shr_i32:
c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
goto gen_shift32;
case INDEX_op_sar_i32:
c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
goto gen_shift32;
case INDEX_op_rotr_i32:
c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
/* Fall through. */
gen_shift32:
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
break;
case INDEX_op_rotl_i32:
if (const_args[2]) {
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
((0x20 - args[2]) & 0x1f) ?
SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
SHIFT_IMM_LSL(0));
} else {
tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_R8, args[1], 0x20);
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
SHIFT_REG_ROR(TCG_REG_R8));
}
break;
case INDEX_op_brcond_i32:
if (const_args[1]) {
int rot;
rot = encode_imm(args[1]);
tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
args[0], rotl(args[1], rot) | (rot << 7));
} else {
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
args[0], args[1], SHIFT_IMM_LSL(0));
}
tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
break;
case INDEX_op_brcond2_i32:
/* The resulting conditions are:
* TCG_COND_EQ --> a0 == a2 && a1 == a3,
* TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
* TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
* TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
* TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
* TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
*/
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
args[1], args[3], SHIFT_IMM_LSL(0));
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
args[0], args[2], SHIFT_IMM_LSL(0));
tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
break;
case INDEX_op_setcond_i32:
if (const_args[2]) {
int rot;
rot = encode_imm(args[2]);
tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
args[1], rotl(args[2], rot) | (rot << 7));
} else {
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
args[1], args[2], SHIFT_IMM_LSL(0));
}
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
ARITH_MOV, args[0], 0, 1);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
ARITH_MOV, args[0], 0, 0);
break;
case INDEX_op_setcond2_i32:
/* See brcond2_i32 comment */
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
args[2], args[4], SHIFT_IMM_LSL(0));
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
args[1], args[3], SHIFT_IMM_LSL(0));
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
ARITH_MOV, args[0], 0, 1);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
ARITH_MOV, args[0], 0, 0);
2010-03-02 22:26:04 +01:00
break;
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
case INDEX_op_qemu_ld8s:
tcg_out_qemu_ld(s, args, 0 | 4);
break;
case INDEX_op_qemu_ld16u:
tcg_out_qemu_ld(s, args, 1);
break;
case INDEX_op_qemu_ld16s:
tcg_out_qemu_ld(s, args, 1 | 4);
break;
case INDEX_op_qemu_ld32:
tcg_out_qemu_ld(s, args, 2);
break;
case INDEX_op_qemu_ld64:
tcg_out_qemu_ld(s, args, 3);
break;
case INDEX_op_qemu_st8:
tcg_out_qemu_st(s, args, 0);
break;
case INDEX_op_qemu_st16:
tcg_out_qemu_st(s, args, 1);
break;
case INDEX_op_qemu_st32:
tcg_out_qemu_st(s, args, 2);
break;
case INDEX_op_qemu_st64:
tcg_out_qemu_st(s, args, 3);
break;
case INDEX_op_bswap16_i32:
tcg_out_bswap16(s, COND_AL, args[0], args[1]);
break;
case INDEX_op_bswap32_i32:
tcg_out_bswap32(s, COND_AL, args[0], args[1]);
break;
case INDEX_op_ext8s_i32:
tcg_out_ext8s(s, COND_AL, args[0], args[1]);
break;
case INDEX_op_ext16s_i32:
tcg_out_ext16s(s, COND_AL, args[0], args[1]);
break;
case INDEX_op_ext16u_i32:
tcg_out_ext16u(s, COND_AL, args[0], args[1]);
break;
default:
tcg_abort();
}
}
static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
{ INDEX_op_movi_i32, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
{ INDEX_op_ld16u_i32, { "r", "r" } },
{ INDEX_op_ld16s_i32, { "r", "r" } },
{ INDEX_op_ld_i32, { "r", "r" } },
{ INDEX_op_st8_i32, { "r", "r" } },
{ INDEX_op_st16_i32, { "r", "r" } },
{ INDEX_op_st_i32, { "r", "r" } },
/* TODO: "r", "r", "ri" */
{ INDEX_op_add_i32, { "r", "r", "rI" } },
{ INDEX_op_sub_i32, { "r", "r", "rI" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
{ INDEX_op_and_i32, { "r", "r", "rI" } },
{ INDEX_op_andc_i32, { "r", "r", "rI" } },
{ INDEX_op_or_i32, { "r", "r", "rI" } },
{ INDEX_op_xor_i32, { "r", "r", "rI" } },
{ INDEX_op_neg_i32, { "r", "r" } },
{ INDEX_op_not_i32, { "r", "r" } },
{ INDEX_op_shl_i32, { "r", "r", "ri" } },
{ INDEX_op_shr_i32, { "r", "r", "ri" } },
{ INDEX_op_sar_i32, { "r", "r", "ri" } },
{ INDEX_op_rotl_i32, { "r", "r", "ri" } },
{ INDEX_op_rotr_i32, { "r", "r", "ri" } },
{ INDEX_op_brcond_i32, { "r", "rI" } },
{ INDEX_op_setcond_i32, { "r", "r", "rI" } },
/* TODO: "r", "r", "r", "r", "ri", "ri" */
{ INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
{ INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } },
{ INDEX_op_brcond2_i32, { "r", "r", "r", "r" } },
{ INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r" } },
#if TARGET_LONG_BITS == 32
{ INDEX_op_qemu_ld8u, { "r", "l" } },
{ INDEX_op_qemu_ld8s, { "r", "l" } },
{ INDEX_op_qemu_ld16u, { "r", "l" } },
{ INDEX_op_qemu_ld16s, { "r", "l" } },
{ INDEX_op_qemu_ld32, { "r", "l" } },
{ INDEX_op_qemu_ld64, { "L", "L", "l" } },
{ INDEX_op_qemu_st8, { "s", "s" } },
{ INDEX_op_qemu_st16, { "s", "s" } },
{ INDEX_op_qemu_st32, { "s", "s" } },
{ INDEX_op_qemu_st64, { "S", "S", "s" } },
#else
{ INDEX_op_qemu_ld8u, { "r", "l", "l" } },
{ INDEX_op_qemu_ld8s, { "r", "l", "l" } },
{ INDEX_op_qemu_ld16u, { "r", "l", "l" } },
{ INDEX_op_qemu_ld16s, { "r", "l", "l" } },
{ INDEX_op_qemu_ld32, { "r", "l", "l" } },
{ INDEX_op_qemu_ld64, { "L", "L", "l", "l" } },
{ INDEX_op_qemu_st8, { "s", "s", "s" } },
{ INDEX_op_qemu_st16, { "s", "s", "s" } },
{ INDEX_op_qemu_st32, { "s", "s", "s" } },
{ INDEX_op_qemu_st64, { "S", "S", "s", "s" } },
#endif
{ INDEX_op_bswap16_i32, { "r", "r" } },
{ INDEX_op_bswap32_i32, { "r", "r" } },
{ INDEX_op_ext8s_i32, { "r", "r" } },
{ INDEX_op_ext16s_i32, { "r", "r" } },
{ INDEX_op_ext16u_i32, { "r", "r" } },
{ -1 },
};
static void tcg_target_init(TCGContext *s)
{
#if !defined(CONFIG_USER_ONLY)
/* fail safe */
if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
tcg_abort();
#endif
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
tcg_regset_set32(tcg_target_call_clobber_regs, 0,
(1 << TCG_REG_R0) |
(1 << TCG_REG_R1) |
(1 << TCG_REG_R2) |
(1 << TCG_REG_R3) |
(1 << TCG_REG_R12) |
(1 << TCG_REG_R14));
tcg_regset_clear(s->reserved_regs);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R8);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
tcg_add_target_add_op_defs(arm_op_defs);
tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
CPU_TEMP_BUF_NLONGS * sizeof(long));
}
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, tcg_target_long arg2)
{
tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
}
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, tcg_target_long arg2)
{
tcg_out_st32(s, COND_AL, arg, arg1, arg2);
}
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
}
static inline void tcg_out_movi(TCGContext *s, TCGType type,
TCGReg ret, tcg_target_long arg)
{
tcg_out_movi32(s, COND_AL, ret, arg);
}
static void tcg_target_qemu_prologue(TCGContext *s)
{
/* Calling convention requires us to save r4-r11 and lr;
* save also r12 to maintain stack 8-alignment.
*/
/* stmdb sp!, { r4 - r12, lr } */
tcg_out32(s, (COND_AL << 28) | 0x092d5ff0);
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
tb_ret_addr = s->code_ptr;
/* ldmia sp!, { r4 - r12, pc } */
tcg_out32(s, (COND_AL << 28) | 0x08bd9ff0);
}