TCI build fix and cleanup

Streamline tb_lookup
 Fixes for tcg/aarch64
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmBD9XYdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9jqwf/cIr0aafPuLyMwAFB
 WCL2+S7oZT8n9dXTwel9MTjnLg4tApvHcxwdNMeCnWIvJ2wtLg4NHsOv+mitWTpD
 D1AMgVRvEaLYFFMa582ewopLee1Yp0vTWYtVwXIRMFW2qJv4b6h/cEJae3DjbFLs
 TlU6XBlPjNLjWECCO+cxVUJRojLpf0WuJrn7REALYdiAAK/+X7g3wwfxc4VP0D6a
 NO54gLH2XSVDosWn0vJu5czDGTA3ZD7mLRuFscyVsK7KKx2xgZq2WzcMATeKo5Qy
 qn/E2bYn2nMEv78ptt3h06sSwGs0W41a68Y7uqWIkdfI1aGeIBzFn2rmXoXaaAfd
 IrJnLQ==
 =f77X
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210306' into staging

TCI build fix and cleanup
Streamline tb_lookup
Fixes for tcg/aarch64

# gpg: Signature made Sat 06 Mar 2021 21:34:46 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-tcg-20210306: (27 commits)
  accel/tcg: Precompute curr_cflags into cpu->tcg_cflags
  include/exec: lightly re-arrange TranslationBlock
  accel/tcg: drop the use of CF_HASH_MASK and rename params
  accel/tcg: move CF_CLUSTER calculation to curr_cflags
  accel/tcg: rename tb_lookup__cpu_state and hoist state extraction
  tcg/tci: Merge mov, not and neg operations
  tcg/tci: Merge bswap operations
  tcg/tci: Merge extension operations
  tcg/tci: Merge basic arithmetic operations
  tcg/tci: Reduce use of tci_read_r64
  tcg/tci: Remove tci_read_r32s
  tcg/tci: Remove tci_read_r32
  tcg/tci: Remove tci_read_r16s
  tcg/tci: Remove tci_read_r16
  tcg/tci: Remove tci_read_r8s
  tcg/tci: Remove tci_read_r8
  tcg/tci: Merge identical cases in generation (load/store opcodes)
  tcg/tci: Merge identical cases in generation (conditional opcodes)
  tcg/tci: Merge identical cases in generation (deposit opcode)
  tcg/tci: Merge identical cases in generation (exchange opcodes)
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-03-08 20:07:37 +00:00
commit 74fd46ed44
18 changed files with 529 additions and 610 deletions

View File

@ -245,11 +245,11 @@ static void cpu_exec_exit(CPUState *cpu)
void cpu_exec_step_atomic(CPUState *cpu)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
uint32_t cflags = 1;
uint32_t cf_mask = cflags & CF_HASH_MASK;
uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1;
int tb_exit;
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
@ -258,15 +258,15 @@ void cpu_exec_step_atomic(CPUState *cpu)
g_assert(!cpu->running);
cpu->running = true;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
mmap_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
mmap_unlock();
}
/* Since we got here, we know that parallel_cpus must be true. */
parallel_cpus = false;
cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb(tb, pc);
@ -294,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
* the execution.
*/
g_assert(cpu_in_exclusive_context(cpu));
parallel_cpus = true;
cpu->running = false;
end_exclusive();
}
@ -305,7 +304,7 @@ struct tb_desc {
CPUArchState *env;
tb_page_addr_t phys_page1;
uint32_t flags;
uint32_t cf_mask;
uint32_t cflags;
uint32_t trace_vcpu_dstate;
};
@ -319,7 +318,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
tb_cflags(tb) == desc->cflags) {
/* check next page if needed */
if (tb->page_addr[1] == -1) {
return true;
@ -339,7 +338,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask)
uint32_t cflags)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
@ -348,7 +347,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
desc.cf_mask = cf_mask;
desc.cflags = cflags;
desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
@ -356,7 +355,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
return NULL;
}
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}
@ -416,16 +415,19 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
static inline TranslationBlock *tb_find(CPUState *cpu,
TranslationBlock *last_tb,
int tb_exit, uint32_t cf_mask)
int tb_exit, uint32_t cflags)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
mmap_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
mmap_unlock();
/* We add the TB in the virtual pc hash table for the fast lookup */
qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
@ -491,7 +493,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
if (replay_has_exception()
&& cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
/* Execute just one insn to trigger exception pending in the log */
cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1;
cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1;
}
#endif
return false;
@ -788,7 +790,7 @@ int cpu_exec(CPUState *cpu)
have CF_INVALID set, -1 is a convenient invalid value that
does not require tcg headers for cpu_common_reset. */
if (cflags == -1) {
cflags = curr_cflags();
cflags = curr_cflags(cpu);
} else {
cpu->cflags_next_tb = -1;
}

View File

@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
char thread_name[VCPU_THREAD_NAME_SIZE];
g_assert(tcg_enabled());
parallel_cpus = (current_machine->smp.max_cpus > 1);
tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));

View File

@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu)
static QemuThread *single_tcg_cpu_thread;
g_assert(tcg_enabled());
parallel_cpus = false;
tcg_cpu_init_cflags(cpu, false);
if (!single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));

View File

@ -41,6 +41,14 @@
/* common functionality among all TCG variants */
void tcg_cpu_init_cflags(CPUState *cpu, bool parallel)
{
uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT;
cflags |= parallel ? CF_PARALLEL : 0;
cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
cpu->tcg_cflags = cflags;
}
void tcg_cpus_destroy(CPUState *cpu)
{
cpu_thread_signal_destroyed(cpu);

View File

@ -17,5 +17,6 @@
void tcg_cpus_destroy(CPUState *cpu);
int tcg_cpus_exec(CPUState *cpu);
void tcg_handle_interrupt(CPUState *cpu, int mask);
void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
#endif /* TCG_CPUS_H */

View File

@ -27,10 +27,10 @@
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
#include "exec/tb-lookup.h"
#include "disas/disas.h"
#include "exec/log.h"
#include "tcg/tcg.h"
#include "exec/tb-lookup.h"
/* 32-bit helpers */
@ -152,7 +152,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
target_ulong cs_base, pc;
uint32_t flags;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu));
if (tb == NULL) {
return tcg_code_gen_epilogue;
}

View File

@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE];
TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
TBContext tb_ctx;
bool parallel_cpus;
static void page_table_config_init(void)
{
@ -1311,7 +1310,7 @@ static bool tb_cmp(const void *ap, const void *bp)
return a->pc == b->pc &&
a->cs_base == b->cs_base &&
a->flags == b->flags &&
(tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
a->page_addr[0] == b->page_addr[0] &&
a->page_addr[1] == b->page_addr[1];
@ -1616,6 +1615,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
PageDesc *p;
uint32_t h;
tb_page_addr_t phys_pc;
uint32_t orig_cflags = tb_cflags(tb);
assert_memory_lock();
@ -1626,7 +1626,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
tb->trace_vcpu_dstate);
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
@ -1793,6 +1793,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
uint32_t h;
assert_memory_lock();
tcg_debug_assert(!(tb->cflags & CF_INVALID));
/*
* Add the TB to the page list, acquiring first the pages's locks.
@ -1811,7 +1812,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
}
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
tb->trace_vcpu_dstate);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
@ -1865,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
cflags = (cflags & ~CF_COUNT_MASK) | 1;
}
cflags &= ~CF_CLUSTER_MASK;
cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;
max_insns = cflags & CF_COUNT_MASK;
if (max_insns == 0) {
max_insns = CF_COUNT_MASK;
@ -2194,7 +2192,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
if (current_tb_modified) {
page_collection_unlock(pages);
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
mmap_unlock();
cpu_loop_exit_noexc(cpu);
}
@ -2362,7 +2360,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
#ifdef TARGET_HAS_PRECISE_SMC
if (current_tb_modified) {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
return true;
}
#endif
@ -2438,7 +2436,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
* operations only (which execute after completion) so we don't
* double instrument the instruction.
*/
cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"cpu_io_recompile: rewound execution of TB to "

View File

@ -448,9 +448,6 @@ struct TranslationBlock {
target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
target_ulong cs_base; /* CS base for this block */
uint32_t flags; /* flags defining in which context the code was generated */
uint16_t size; /* size of target code for this block (1 <=
size <= TARGET_PAGE_SIZE) */
uint16_t icount;
uint32_t cflags; /* compile flags */
#define CF_COUNT_MASK 0x00007fff
#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
@ -460,12 +457,18 @@ struct TranslationBlock {
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24
/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
#define CF_HASH_MASK (~CF_INVALID)
/* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate;
/*
* Above fields used for comparing
*/
/* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */
uint16_t size;
uint16_t icount;
struct tb_tc tc;
/* first and second physical page containing code. The lower bit
@ -510,8 +513,6 @@ struct TranslationBlock {
uintptr_t jmp_dest[2];
};
extern bool parallel_cpus;
/* Hide the qatomic_read to make code a little easier on the eyes */
static inline uint32_t tb_cflags(const TranslationBlock *tb)
{
@ -519,10 +520,9 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb)
}
/* current cflags for hashing/comparison */
static inline uint32_t curr_cflags(void)
static inline uint32_t curr_cflags(CPUState *cpu)
{
return (parallel_cpus ? CF_PARALLEL : 0)
| (icount_enabled() ? CF_USE_ICOUNT : 0);
return cpu->tcg_cflags;
}
/* TranslationBlock invalidate API */
@ -536,7 +536,7 @@ void tb_flush(CPUState *cpu);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask);
uint32_t cflags);
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
/* GETPC is the true target of the return instruction that we'll execute. */

View File

@ -17,30 +17,28 @@
#include "exec/tb-hash.h"
/* Might cause an exception, so have a longjmp destination ready */
static inline TranslationBlock *
tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
uint32_t *flags, uint32_t cf_mask)
static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base,
uint32_t flags, uint32_t cflags)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
uint32_t hash;
cpu_get_tb_cpu_state(env, pc, cs_base, flags);
hash = tb_jmp_cache_hash_func(*pc);
/* we should never be trying to look up an INVALID tb */
tcg_debug_assert(!(cflags & CF_INVALID));
hash = tb_jmp_cache_hash_func(pc);
tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
cf_mask &= ~CF_CLUSTER_MASK;
cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT;
if (likely(tb &&
tb->pc == *pc &&
tb->cs_base == *cs_base &&
tb->flags == *flags &&
tb->pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
tb_cflags(tb) == cflags)) {
return tb;
}
tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
return NULL;
}

View File

@ -282,6 +282,7 @@ struct qemu_work_item;
* to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
* be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER
* QOM parent.
* @tcg_cflags: Pre-computed cflags for this cpu.
* @nr_cores: Number of cores within this CPU package.
* @nr_threads: Number of threads within this CPU.
* @running: #true if CPU is currently running (lockless).
@ -412,6 +413,7 @@ struct CPUState {
/* TODO Move common fields from CPUArchState here. */
int cpu_index;
int cluster_index;
uint32_t tcg_cflags;
uint32_t halted;
uint32_t can_do_io;
int32_t exception_index;

View File

@ -205,6 +205,7 @@ CPUArchState *cpu_copy(CPUArchState *env)
/* Reset non arch specific state */
cpu_reset(new_cpu);
new_cpu->tcg_cflags = cpu->tcg_cflags;
memcpy(new_env, env, sizeof(CPUArchState));
/* Clone all break/watchpoints.

View File

@ -82,9 +82,11 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
return (sp - frame_size) & -8ul;
}
/* Notice when we're in the middle of a gUSA region and reset.
Note that this will only occur for !parallel_cpus, as we will
translate such sequences differently in a parallel context. */
/*
* Notice when we're in the middle of a gUSA region and reset.
* Note that this will only occur when #CF_PARALLEL is unset, as we
* will translate such sequences differently in a parallel context.
*/
static void unwind_gusa(CPUSH4State *regs)
{
/* If the stack pointer is sufficiently negative, and we haven't

View File

@ -6481,6 +6481,16 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
/* Grab a mutex so that thread setup appears atomic. */
pthread_mutex_lock(&clone_lock);
/*
* If this is our first additional thread, we need to ensure we
* generate code for parallel execution and flush old translations.
* Do this now so that the copy gets CF_PARALLEL too.
*/
if (!(cpu->tcg_cflags & CF_PARALLEL)) {
cpu->tcg_cflags |= CF_PARALLEL;
tb_flush(cpu);
}
/* we create a new CPU instance. */
new_env = cpu_copy(env);
/* Init regs that differ from the parent. */
@ -6521,14 +6531,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask);
cpu->random_seed = qemu_guest_random_seed_thread_part1();
/* If this is our first additional thread, we need to ensure we
* generate code for parallel execution and flush old translations.
*/
if (!parallel_cpus) {
parallel_cpus = true;
tb_flush(cpu);
}
ret = pthread_create(&info.thread, &attr, clone_func, &info);
/* TODO: Free new CPU state if thread creation failed. */

View File

@ -937,7 +937,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
cpu_loop_exit_restore(cpu, ra);
} else {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
mmap_unlock();
if (ra) {
cpu_restore_state(cpu, ra, true);

View File

@ -519,6 +519,39 @@ typedef enum {
I3606_BIC = 0x2f001400,
I3606_ORR = 0x0f001400,
/* AdvSIMD scalar shift by immediate */
I3609_SSHR = 0x5f000400,
I3609_SSRA = 0x5f001400,
I3609_SHL = 0x5f005400,
I3609_USHR = 0x7f000400,
I3609_USRA = 0x7f001400,
I3609_SLI = 0x7f005400,
/* AdvSIMD scalar three same */
I3611_SQADD = 0x5e200c00,
I3611_SQSUB = 0x5e202c00,
I3611_CMGT = 0x5e203400,
I3611_CMGE = 0x5e203c00,
I3611_SSHL = 0x5e204400,
I3611_ADD = 0x5e208400,
I3611_CMTST = 0x5e208c00,
I3611_UQADD = 0x7e200c00,
I3611_UQSUB = 0x7e202c00,
I3611_CMHI = 0x7e203400,
I3611_CMHS = 0x7e203c00,
I3611_USHL = 0x7e204400,
I3611_SUB = 0x7e208400,
I3611_CMEQ = 0x7e208c00,
/* AdvSIMD scalar two-reg misc */
I3612_CMGT0 = 0x5e208800,
I3612_CMEQ0 = 0x5e209800,
I3612_CMLT0 = 0x5e20a800,
I3612_ABS = 0x5e20b800,
I3612_CMGE0 = 0x7e208800,
I3612_CMLE0 = 0x7e209800,
I3612_NEG = 0x7e20b800,
/* AdvSIMD shift by immediate */
I3614_SSHR = 0x0f000400,
I3614_SSRA = 0x0f001400,
@ -561,7 +594,7 @@ typedef enum {
I3617_CMEQ0 = 0x0e209800,
I3617_CMLT0 = 0x0e20a800,
I3617_CMGE0 = 0x2e208800,
I3617_CMLE0 = 0x2e20a800,
I3617_CMLE0 = 0x2e209800,
I3617_NOT = 0x2e205800,
I3617_ABS = 0x0e20b800,
I3617_NEG = 0x2e20b800,
@ -735,6 +768,25 @@ static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
| (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
}
static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
TCGReg rd, TCGReg rn, unsigned immhb)
{
tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
}
static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
{
tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
| (rn & 0x1f) << 5 | (rd & 0x1f));
}
static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
unsigned size, TCGReg rd, TCGReg rn)
{
tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
}
static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
TCGReg rd, TCGReg rn, unsigned immhb)
{
@ -1410,10 +1462,10 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
}
}
static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
TCGReg rh, TCGReg al, TCGReg ah,
tcg_target_long bl, tcg_target_long bh,
bool const_bl, bool const_bh, bool sub)
static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
TCGReg rh, TCGReg al, TCGReg ah,
tcg_target_long bl, tcg_target_long bh,
bool const_bl, bool const_bh, bool sub)
{
TCGReg orig_rl = rl;
AArch64Insn insn;
@ -1423,11 +1475,13 @@ static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
}
if (const_bl) {
insn = I3401_ADDSI;
if ((bl < 0) ^ sub) {
insn = I3401_SUBSI;
if (bl < 0) {
bl = -bl;
insn = sub ? I3401_ADDSI : I3401_SUBSI;
} else {
insn = sub ? I3401_SUBSI : I3401_ADDSI;
}
if (unlikely(al == TCG_REG_XZR)) {
/* ??? We want to allow al to be zero for the benefit of
negation via subtraction. However, that leaves open the
@ -2234,23 +2288,38 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg *args, const int *const_args)
{
static const AArch64Insn cmp_insn[16] = {
static const AArch64Insn cmp_vec_insn[16] = {
[TCG_COND_EQ] = I3616_CMEQ,
[TCG_COND_GT] = I3616_CMGT,
[TCG_COND_GE] = I3616_CMGE,
[TCG_COND_GTU] = I3616_CMHI,
[TCG_COND_GEU] = I3616_CMHS,
};
static const AArch64Insn cmp0_insn[16] = {
static const AArch64Insn cmp_scalar_insn[16] = {
[TCG_COND_EQ] = I3611_CMEQ,
[TCG_COND_GT] = I3611_CMGT,
[TCG_COND_GE] = I3611_CMGE,
[TCG_COND_GTU] = I3611_CMHI,
[TCG_COND_GEU] = I3611_CMHS,
};
static const AArch64Insn cmp0_vec_insn[16] = {
[TCG_COND_EQ] = I3617_CMEQ0,
[TCG_COND_GT] = I3617_CMGT0,
[TCG_COND_GE] = I3617_CMGE0,
[TCG_COND_LT] = I3617_CMLT0,
[TCG_COND_LE] = I3617_CMLE0,
};
static const AArch64Insn cmp0_scalar_insn[16] = {
[TCG_COND_EQ] = I3612_CMEQ0,
[TCG_COND_GT] = I3612_CMGT0,
[TCG_COND_GE] = I3612_CMGE0,
[TCG_COND_LT] = I3612_CMLT0,
[TCG_COND_LE] = I3612_CMLE0,
};
TCGType type = vecl + TCG_TYPE_V64;
unsigned is_q = vecl;
bool is_scalar = !is_q && vece == MO_64;
TCGArg a0, a1, a2, a3;
int cmode, imm8;
@ -2269,19 +2338,35 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_add_vec:
tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_sub_vec:
tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_mul_vec:
tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
break;
case INDEX_op_neg_vec:
tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
if (is_scalar) {
tcg_out_insn(s, 3612, NEG, vece, a0, a1);
} else {
tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
}
break;
case INDEX_op_abs_vec:
tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
if (is_scalar) {
tcg_out_insn(s, 3612, ABS, vece, a0, a1);
} else {
tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
}
break;
case INDEX_op_and_vec:
if (const_args[2]) {
@ -2335,16 +2420,32 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
break;
case INDEX_op_ssadd_vec:
tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_sssub_vec:
tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_usadd_vec:
tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_ussub_vec:
tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_smax_vec:
tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
@ -2362,22 +2463,46 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
break;
case INDEX_op_shli_vec:
tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
if (is_scalar) {
tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
} else {
tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
}
break;
case INDEX_op_shri_vec:
tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
if (is_scalar) {
tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
} else {
tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
}
break;
case INDEX_op_sari_vec:
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
if (is_scalar) {
tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
} else {
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
}
break;
case INDEX_op_aa64_sli_vec:
tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
if (is_scalar) {
tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
} else {
tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
}
break;
case INDEX_op_shlv_vec:
tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_aa64_sshl_vec:
tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
}
break;
case INDEX_op_cmp_vec:
{
@ -2386,30 +2511,58 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
if (cond == TCG_COND_NE) {
if (const_args[2]) {
tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
if (is_scalar) {
tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
} else {
tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
}
} else {
tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
if (is_scalar) {
tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
} else {
tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
}
tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
}
} else {
if (const_args[2]) {
insn = cmp0_insn[cond];
if (insn) {
tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
break;
if (is_scalar) {
insn = cmp0_scalar_insn[cond];
if (insn) {
tcg_out_insn_3612(s, insn, vece, a0, a1);
break;
}
} else {
insn = cmp0_vec_insn[cond];
if (insn) {
tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
break;
}
}
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
a2 = TCG_VEC_TMP;
}
insn = cmp_insn[cond];
if (insn == 0) {
TCGArg t;
t = a1, a1 = a2, a2 = t;
cond = tcg_swap_cond(cond);
insn = cmp_insn[cond];
tcg_debug_assert(insn != 0);
if (is_scalar) {
insn = cmp_scalar_insn[cond];
if (insn == 0) {
TCGArg t;
t = a1, a1 = a2, a2 = t;
cond = tcg_swap_cond(cond);
insn = cmp_scalar_insn[cond];
tcg_debug_assert(insn != 0);
}
tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
} else {
insn = cmp_vec_insn[cond];
if (insn == 0) {
TCGArg t;
t = a1, a1 = a2, a2 = t;
cond = tcg_swap_cond(cond);
insn = cmp_vec_insn[cond];
tcg_debug_assert(insn != 0);
}
tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
}
tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
}
}
break;

View File

@ -346,6 +346,12 @@ static void set_jmp_reset_offset(TCGContext *s, int which)
s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
}
/* Signal overflow, starting over with fewer guest insns. */
static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
{
siglongjmp(s->jmp_trans, -2);
}
#define C_PFX1(P, A) P##A
#define C_PFX2(P, A, B) P##A##_##B
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
@ -507,11 +513,21 @@ static void tcg_region_trees_init(void)
}
}
static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
{
void *p = tcg_splitwx_to_rw(cp);
size_t region_idx;
/*
* Like tcg_splitwx_to_rw, with no assert. The pc may come from
* a signal handler over which the caller has no control.
*/
if (!in_code_gen_buffer(p)) {
p -= tcg_splitwx_diff;
if (!in_code_gen_buffer(p)) {
return NULL;
}
}
if (p < region.start_aligned) {
region_idx = 0;
} else {
@ -530,6 +546,7 @@ void tcg_tb_insert(TranslationBlock *tb)
{
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
g_assert(rt != NULL);
qemu_mutex_lock(&rt->lock);
g_tree_insert(rt->tree, &tb->tc, tb);
qemu_mutex_unlock(&rt->lock);
@ -539,6 +556,7 @@ void tcg_tb_remove(TranslationBlock *tb)
{
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
g_assert(rt != NULL);
qemu_mutex_lock(&rt->lock);
g_tree_remove(rt->tree, &tb->tc);
qemu_mutex_unlock(&rt->lock);
@ -555,6 +573,10 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
TranslationBlock *tb;
struct tb_tc s = { .ptr = (void *)tc_ptr };
if (rt == NULL) {
return NULL;
}
qemu_mutex_lock(&rt->lock);
tb = g_tree_lookup(rt->tree, &s);
qemu_mutex_unlock(&rt->lock);
@ -1310,8 +1332,7 @@ static TCGTemp *tcg_temp_alloc(TCGContext *s)
int n = s->nb_temps++;
if (n >= TCG_MAX_TEMPS) {
/* Signal overflow, starting over with fewer guest insns. */
siglongjmp(s->jmp_trans, -2);
tcg_raise_tb_overflow(s);
}
return memset(&s->temps[n], 0, sizeof(TCGTemp));
}

526
tcg/tci.c
View File

@ -57,49 +57,6 @@ static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index)
return regs[index];
}
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
static int8_t tci_read_reg8s(const tcg_target_ulong *regs, TCGReg index)
{
return (int8_t)tci_read_reg(regs, index);
}
#endif
#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
static int16_t tci_read_reg16s(const tcg_target_ulong *regs, TCGReg index)
{
return (int16_t)tci_read_reg(regs, index);
}
#endif
#if TCG_TARGET_REG_BITS == 64
static int32_t tci_read_reg32s(const tcg_target_ulong *regs, TCGReg index)
{
return (int32_t)tci_read_reg(regs, index);
}
#endif
static uint8_t tci_read_reg8(const tcg_target_ulong *regs, TCGReg index)
{
return (uint8_t)tci_read_reg(regs, index);
}
static uint16_t tci_read_reg16(const tcg_target_ulong *regs, TCGReg index)
{
return (uint16_t)tci_read_reg(regs, index);
}
static uint32_t tci_read_reg32(const tcg_target_ulong *regs, TCGReg index)
{
return (uint32_t)tci_read_reg(regs, index);
}
#if TCG_TARGET_REG_BITS == 64
static uint64_t tci_read_reg64(const tcg_target_ulong *regs, TCGReg index)
{
return tci_read_reg(regs, index);
}
#endif
static void
tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
{
@ -169,78 +126,20 @@ tci_read_r(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
return value;
}
/* Read indexed register (8 bit) from bytecode. */
static uint8_t tci_read_r8(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
{
uint8_t value = tci_read_reg8(regs, **tb_ptr);
*tb_ptr += 1;
return value;
}
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
/* Read indexed register (8 bit signed) from bytecode. */
static int8_t tci_read_r8s(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
{
int8_t value = tci_read_reg8s(regs, **tb_ptr);
*tb_ptr += 1;
return value;
}
#endif
/* Read indexed register (16 bit) from bytecode. */
static uint16_t tci_read_r16(const tcg_target_ulong *regs,
const uint8_t **tb_ptr)
{
uint16_t value = tci_read_reg16(regs, **tb_ptr);
*tb_ptr += 1;
return value;
}
#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
/* Read indexed register (16 bit signed) from bytecode. */
static int16_t tci_read_r16s(const tcg_target_ulong *regs,
const uint8_t **tb_ptr)
{
int16_t value = tci_read_reg16s(regs, **tb_ptr);
*tb_ptr += 1;
return value;
}
#endif
/* Read indexed register (32 bit) from bytecode. */
static uint32_t tci_read_r32(const tcg_target_ulong *regs,
const uint8_t **tb_ptr)
{
uint32_t value = tci_read_reg32(regs, **tb_ptr);
*tb_ptr += 1;
return value;
}
#if TCG_TARGET_REG_BITS == 32
/* Read two indexed registers (2 * 32 bit) from bytecode. */
static uint64_t tci_read_r64(const tcg_target_ulong *regs,
const uint8_t **tb_ptr)
{
uint32_t low = tci_read_r32(regs, tb_ptr);
return tci_uint64(tci_read_r32(regs, tb_ptr), low);
uint32_t low = tci_read_r(regs, tb_ptr);
return tci_uint64(tci_read_r(regs, tb_ptr), low);
}
#elif TCG_TARGET_REG_BITS == 64
/* Read indexed register (32 bit signed) from bytecode. */
static int32_t tci_read_r32s(const tcg_target_ulong *regs,
const uint8_t **tb_ptr)
{
int32_t value = tci_read_reg32s(regs, **tb_ptr);
*tb_ptr += 1;
return value;
}
/* Read indexed register (64 bit) from bytecode. */
static uint64_t tci_read_r64(const tcg_target_ulong *regs,
const uint8_t **tb_ptr)
{
uint64_t value = tci_read_reg64(regs, **tb_ptr);
*tb_ptr += 1;
return value;
return tci_read_r(regs, tb_ptr);
}
#endif
@ -346,51 +245,34 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
return result;
}
#ifdef CONFIG_SOFTMMU
# define qemu_ld_ub \
helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
# define qemu_ld_leuw \
helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
# define qemu_ld_leul \
helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
# define qemu_ld_leq \
helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
# define qemu_ld_beuw \
helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
# define qemu_ld_beul \
helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
# define qemu_ld_beq \
helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
# define qemu_st_b(X) \
helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
# define qemu_st_lew(X) \
helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
# define qemu_st_lel(X) \
helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
# define qemu_st_leq(X) \
helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
# define qemu_st_bew(X) \
helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
# define qemu_st_bel(X) \
helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
# define qemu_st_beq(X) \
helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
#else
# define qemu_ld_ub ldub_p(g2h(taddr))
# define qemu_ld_leuw lduw_le_p(g2h(taddr))
# define qemu_ld_leul (uint32_t)ldl_le_p(g2h(taddr))
# define qemu_ld_leq ldq_le_p(g2h(taddr))
# define qemu_ld_beuw lduw_be_p(g2h(taddr))
# define qemu_ld_beul (uint32_t)ldl_be_p(g2h(taddr))
# define qemu_ld_beq ldq_be_p(g2h(taddr))
# define qemu_st_b(X) stb_p(g2h(taddr), X)
# define qemu_st_lew(X) stw_le_p(g2h(taddr), X)
# define qemu_st_lel(X) stl_le_p(g2h(taddr), X)
# define qemu_st_leq(X) stq_le_p(g2h(taddr), X)
# define qemu_st_bew(X) stw_be_p(g2h(taddr), X)
# define qemu_st_bel(X) stl_be_p(g2h(taddr), X)
# define qemu_st_beq(X) stq_be_p(g2h(taddr), X)
#endif
#define qemu_ld_ub \
cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_ld_leuw \
cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_ld_leul \
cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_ld_leq \
cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_ld_beuw \
cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_ld_beul \
cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_ld_beq \
cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_st_b(X) \
cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_st_lew(X) \
cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_st_lel(X) \
cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_st_leq(X) \
cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_st_bew(X) \
cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_st_bel(X) \
cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
#define qemu_st_beq(X) \
cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
#if TCG_TARGET_REG_BITS == 64
# define CASE_32_64(x) \
@ -483,8 +365,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
continue;
case INDEX_op_setcond_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
condition = *tb_ptr++;
tci_write_reg(regs, t0, tci_compare32(t1, t2, condition));
break;
@ -499,15 +381,15 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
#elif TCG_TARGET_REG_BITS == 64
case INDEX_op_setcond_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
condition = *tb_ptr++;
tci_write_reg(regs, t0, tci_compare64(t1, t2, condition));
break;
#endif
case INDEX_op_mov_i32:
CASE_32_64(mov)
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
break;
case INDEX_op_tci_movi_i32:
@ -550,127 +432,130 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_write_reg(regs, t0, *(uint32_t *)(t1 + t2));
break;
CASE_32_64(st8)
t0 = tci_read_r8(regs, &tb_ptr);
t0 = tci_read_r(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_s32(&tb_ptr);
*(uint8_t *)(t1 + t2) = t0;
break;
CASE_32_64(st16)
t0 = tci_read_r16(regs, &tb_ptr);
t0 = tci_read_r(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_s32(&tb_ptr);
*(uint16_t *)(t1 + t2) = t0;
break;
case INDEX_op_st_i32:
CASE_64(st32)
t0 = tci_read_r32(regs, &tb_ptr);
t0 = tci_read_r(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_s32(&tb_ptr);
*(uint32_t *)(t1 + t2) = t0;
break;
/* Arithmetic operations (32 bit). */
/* Arithmetic operations (mixed 32/64 bit). */
case INDEX_op_add_i32:
CASE_32_64(add)
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 + t2);
break;
case INDEX_op_sub_i32:
CASE_32_64(sub)
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 - t2);
break;
case INDEX_op_mul_i32:
CASE_32_64(mul)
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 * t2);
break;
CASE_32_64(and)
t0 = *tb_ptr++;
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 & t2);
break;
CASE_32_64(or)
t0 = *tb_ptr++;
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 | t2);
break;
CASE_32_64(xor)
t0 = *tb_ptr++;
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 ^ t2);
break;
/* Arithmetic operations (32 bit). */
case INDEX_op_div_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int32_t)t1 / (int32_t)t2);
break;
case INDEX_op_divu_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 / t2);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint32_t)t1 / (uint32_t)t2);
break;
case INDEX_op_rem_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int32_t)t1 % (int32_t)t2);
break;
case INDEX_op_remu_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 % t2);
break;
case INDEX_op_and_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 & t2);
break;
case INDEX_op_or_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 | t2);
break;
case INDEX_op_xor_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 ^ t2);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint32_t)t1 % (uint32_t)t2);
break;
/* Shift/rotate operations (32 bit). */
case INDEX_op_shl_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 << (t2 & 31));
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint32_t)t1 << (t2 & 31));
break;
case INDEX_op_shr_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 >> (t2 & 31));
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint32_t)t1 >> (t2 & 31));
break;
case INDEX_op_sar_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, ((int32_t)t1 >> (t2 & 31)));
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int32_t)t1 >> (t2 & 31));
break;
#if TCG_TARGET_HAS_rot_i32
case INDEX_op_rotl_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, rol32(t1, t2 & 31));
break;
case INDEX_op_rotr_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, ror32(t1, t2 & 31));
break;
#endif
#if TCG_TARGET_HAS_deposit_i32
case INDEX_op_deposit_i32:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t2 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tmp16 = *tb_ptr++;
tmp8 = *tb_ptr++;
tmp32 = (((1 << tmp8) - 1) << tmp16);
@ -678,8 +563,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
break;
#endif
case INDEX_op_brcond_i32:
t0 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r32(regs, &tb_ptr);
t0 = tci_read_r(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
condition = *tb_ptr++;
label = tci_read_label(&tb_ptr);
if (tci_compare32(t0, t1, condition)) {
@ -717,73 +602,68 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
case INDEX_op_mulu2_i32:
t0 = *tb_ptr++;
t1 = *tb_ptr++;
t2 = tci_read_r32(regs, &tb_ptr);
tmp64 = tci_read_r32(regs, &tb_ptr);
tci_write_reg64(regs, t1, t0, t2 * tmp64);
t2 = tci_read_r(regs, &tb_ptr);
tmp64 = (uint32_t)tci_read_r(regs, &tb_ptr);
tci_write_reg64(regs, t1, t0, (uint32_t)t2 * tmp64);
break;
#endif /* TCG_TARGET_REG_BITS == 32 */
#if TCG_TARGET_HAS_ext8s_i32
case INDEX_op_ext8s_i32:
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
CASE_32_64(ext8s)
t0 = *tb_ptr++;
t1 = tci_read_r8s(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int8_t)t1);
break;
#endif
#if TCG_TARGET_HAS_ext16s_i32
case INDEX_op_ext16s_i32:
#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
CASE_32_64(ext16s)
t0 = *tb_ptr++;
t1 = tci_read_r16s(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int16_t)t1);
break;
#endif
#if TCG_TARGET_HAS_ext8u_i32
case INDEX_op_ext8u_i32:
#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64
CASE_32_64(ext8u)
t0 = *tb_ptr++;
t1 = tci_read_r8(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint8_t)t1);
break;
#endif
#if TCG_TARGET_HAS_ext16u_i32
case INDEX_op_ext16u_i32:
#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64
CASE_32_64(ext16u)
t0 = *tb_ptr++;
t1 = tci_read_r16(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint16_t)t1);
break;
#endif
#if TCG_TARGET_HAS_bswap16_i32
case INDEX_op_bswap16_i32:
#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
CASE_32_64(bswap16)
t0 = *tb_ptr++;
t1 = tci_read_r16(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, bswap16(t1));
break;
#endif
#if TCG_TARGET_HAS_bswap32_i32
case INDEX_op_bswap32_i32:
#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64
CASE_32_64(bswap32)
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, bswap32(t1));
break;
#endif
#if TCG_TARGET_HAS_not_i32
case INDEX_op_not_i32:
#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64
CASE_32_64(not)
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, ~t1);
break;
#endif
#if TCG_TARGET_HAS_neg_i32
case INDEX_op_neg_i32:
#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64
CASE_32_64(neg)
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, -t1);
break;
#endif
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
break;
case INDEX_op_tci_movi_i64:
t0 = *tb_ptr++;
t1 = tci_read_i64(&tb_ptr);
@ -805,7 +685,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_write_reg(regs, t0, *(uint64_t *)(t1 + t2));
break;
case INDEX_op_st_i64:
t0 = tci_read_r64(regs, &tb_ptr);
t0 = tci_read_r(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_s32(&tb_ptr);
*(uint64_t *)(t1 + t2) = t0;
@ -813,106 +693,70 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
/* Arithmetic operations (64 bit). */
case INDEX_op_add_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 + t2);
break;
case INDEX_op_sub_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 - t2);
break;
case INDEX_op_mul_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 * t2);
break;
case INDEX_op_div_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int64_t)t1 / (int64_t)t2);
break;
case INDEX_op_divu_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint64_t)t1 / (uint64_t)t2);
break;
case INDEX_op_rem_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int64_t)t1 % (int64_t)t2);
break;
case INDEX_op_remu_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint64_t)t1 % (uint64_t)t2);
break;
case INDEX_op_and_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 & t2);
break;
case INDEX_op_or_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 | t2);
break;
case INDEX_op_xor_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 ^ t2);
break;
/* Shift/rotate operations (64 bit). */
case INDEX_op_shl_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 << (t2 & 63));
break;
case INDEX_op_shr_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, t1 >> (t2 & 63));
break;
case INDEX_op_sar_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, ((int64_t)t1 >> (t2 & 63)));
break;
#if TCG_TARGET_HAS_rot_i64
case INDEX_op_rotl_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, rol64(t1, t2 & 63));
break;
case INDEX_op_rotr_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, ror64(t1, t2 & 63));
break;
#endif
#if TCG_TARGET_HAS_deposit_i64
case INDEX_op_deposit_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t2 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
t2 = tci_read_r(regs, &tb_ptr);
tmp16 = *tb_ptr++;
tmp8 = *tb_ptr++;
tmp64 = (((1ULL << tmp8) - 1) << tmp16);
@ -920,8 +764,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
break;
#endif
case INDEX_op_brcond_i64:
t0 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r64(regs, &tb_ptr);
t0 = tci_read_r(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
condition = *tb_ptr++;
label = tci_read_label(&tb_ptr);
if (tci_compare64(t0, t1, condition)) {
@ -930,85 +774,29 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
continue;
}
break;
#if TCG_TARGET_HAS_ext8u_i64
case INDEX_op_ext8u_i64:
t0 = *tb_ptr++;
t1 = tci_read_r8(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
break;
#endif
#if TCG_TARGET_HAS_ext8s_i64
case INDEX_op_ext8s_i64:
t0 = *tb_ptr++;
t1 = tci_read_r8s(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
break;
#endif
#if TCG_TARGET_HAS_ext16s_i64
case INDEX_op_ext16s_i64:
t0 = *tb_ptr++;
t1 = tci_read_r16s(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
break;
#endif
#if TCG_TARGET_HAS_ext16u_i64
case INDEX_op_ext16u_i64:
t0 = *tb_ptr++;
t1 = tci_read_r16(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
break;
#endif
#if TCG_TARGET_HAS_ext32s_i64
case INDEX_op_ext32s_i64:
#endif
case INDEX_op_ext_i32_i64:
t0 = *tb_ptr++;
t1 = tci_read_r32s(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (int32_t)t1);
break;
#if TCG_TARGET_HAS_ext32u_i64
case INDEX_op_ext32u_i64:
#endif
case INDEX_op_extu_i32_i64:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, t1);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, (uint32_t)t1);
break;
#if TCG_TARGET_HAS_bswap16_i64
case INDEX_op_bswap16_i64:
t0 = *tb_ptr++;
t1 = tci_read_r16(regs, &tb_ptr);
tci_write_reg(regs, t0, bswap16(t1));
break;
#endif
#if TCG_TARGET_HAS_bswap32_i64
case INDEX_op_bswap32_i64:
t0 = *tb_ptr++;
t1 = tci_read_r32(regs, &tb_ptr);
tci_write_reg(regs, t0, bswap32(t1));
break;
#endif
#if TCG_TARGET_HAS_bswap64_i64
case INDEX_op_bswap64_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
t1 = tci_read_r(regs, &tb_ptr);
tci_write_reg(regs, t0, bswap64(t1));
break;
#endif
#if TCG_TARGET_HAS_not_i64
case INDEX_op_not_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, ~t1);
break;
#endif
#if TCG_TARGET_HAS_neg_i64
case INDEX_op_neg_i64:
t0 = *tb_ptr++;
t1 = tci_read_r64(regs, &tb_ptr);
tci_write_reg(regs, t0, -t1);
break;
#endif
#endif /* TCG_TARGET_REG_BITS == 64 */
/* QEMU specific operations. */

View File

@ -380,6 +380,18 @@ static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
old_code_ptr[1] = s->code_ptr - old_code_ptr;
}
#if TCG_TARGET_REG_BITS == 64
# define CASE_32_64(x) \
case glue(glue(INDEX_op_, x), _i64): \
case glue(glue(INDEX_op_, x), _i32):
# define CASE_64(x) \
case glue(glue(INDEX_op_, x), _i64):
#else
# define CASE_32_64(x) \
case glue(glue(INDEX_op_, x), _i32):
# define CASE_64(x)
#endif
static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
const int *const_args)
{
@ -391,6 +403,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_exit_tb:
tcg_out64(s, args[0]);
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_insn_offset) {
/* Direct jump method. */
@ -404,15 +417,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
set_jmp_reset_offset(s, args[0]);
break;
case INDEX_op_br:
tci_out_label(s, arg_label(args[0]));
break;
case INDEX_op_setcond_i32:
CASE_32_64(setcond)
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out_r(s, args[2]);
tcg_out8(s, args[3]); /* condition */
break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_setcond2_i32:
/* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */
@ -423,60 +439,54 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_r(s, args[4]);
tcg_out8(s, args[5]); /* condition */
break;
#elif TCG_TARGET_REG_BITS == 64
case INDEX_op_setcond_i64:
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out_r(s, args[2]);
tcg_out8(s, args[3]); /* condition */
break;
#endif
case INDEX_op_ld8u_i32:
case INDEX_op_ld8s_i32:
case INDEX_op_ld16u_i32:
case INDEX_op_ld16s_i32:
CASE_32_64(ld8u)
CASE_32_64(ld8s)
CASE_32_64(ld16u)
CASE_32_64(ld16s)
case INDEX_op_ld_i32:
case INDEX_op_st8_i32:
case INDEX_op_st16_i32:
CASE_64(ld32u)
CASE_64(ld32s)
CASE_64(ld)
CASE_32_64(st8)
CASE_32_64(st16)
case INDEX_op_st_i32:
case INDEX_op_ld8u_i64:
case INDEX_op_ld8s_i64:
case INDEX_op_ld16u_i64:
case INDEX_op_ld16s_i64:
case INDEX_op_ld32u_i64:
case INDEX_op_ld32s_i64:
case INDEX_op_ld_i64:
case INDEX_op_st8_i64:
case INDEX_op_st16_i64:
case INDEX_op_st32_i64:
case INDEX_op_st_i64:
CASE_64(st32)
CASE_64(st)
stack_bounds_check(args[1], args[2]);
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_debug_assert(args[2] == (int32_t)args[2]);
tcg_out32(s, args[2]);
break;
case INDEX_op_add_i32:
case INDEX_op_sub_i32:
case INDEX_op_mul_i32:
case INDEX_op_and_i32:
case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */
case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */
case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */
case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */
case INDEX_op_or_i32:
case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */
case INDEX_op_xor_i32:
case INDEX_op_shl_i32:
case INDEX_op_shr_i32:
case INDEX_op_sar_i32:
case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
CASE_32_64(add)
CASE_32_64(sub)
CASE_32_64(mul)
CASE_32_64(and)
CASE_32_64(or)
CASE_32_64(xor)
CASE_32_64(andc) /* Optional (TCG_TARGET_HAS_andc_*). */
CASE_32_64(orc) /* Optional (TCG_TARGET_HAS_orc_*). */
CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */
CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */
CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */
CASE_32_64(shl)
CASE_32_64(shr)
CASE_32_64(sar)
CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */
CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */
CASE_32_64(div) /* Optional (TCG_TARGET_HAS_div_*). */
CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */
CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */
CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out_r(s, args[2]);
break;
case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */
CASE_32_64(deposit) /* Optional (TCG_TARGET_HAS_deposit_*). */
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out_r(s, args[2]);
@ -486,79 +496,30 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out8(s, args[4]);
break;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_add_i64:
case INDEX_op_sub_i64:
case INDEX_op_mul_i64:
case INDEX_op_and_i64:
case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */
case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */
case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */
case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */
case INDEX_op_or_i64:
case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */
case INDEX_op_xor_i64:
case INDEX_op_shl_i64:
case INDEX_op_shr_i64:
case INDEX_op_sar_i64:
case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out_r(s, args[2]);
break;
case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out_r(s, args[2]);
tcg_debug_assert(args[3] <= UINT8_MAX);
tcg_out8(s, args[3]);
tcg_debug_assert(args[4] <= UINT8_MAX);
tcg_out8(s, args[4]);
break;
case INDEX_op_brcond_i64:
CASE_32_64(brcond)
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out8(s, args[2]); /* condition */
tci_out_label(s, arg_label(args[3]));
break;
case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */
case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */
case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */
case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */
case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */
case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */
case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */
case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */
case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */
case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */
case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */
case INDEX_op_ext_i32_i64:
case INDEX_op_extu_i32_i64:
#endif /* TCG_TARGET_REG_BITS == 64 */
case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */
case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */
case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */
case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */
case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */
case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */
case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */
case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */
CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */
CASE_32_64(not) /* Optional (TCG_TARGET_HAS_not_*). */
CASE_32_64(ext8s) /* Optional (TCG_TARGET_HAS_ext8s_*). */
CASE_32_64(ext8u) /* Optional (TCG_TARGET_HAS_ext8u_*). */
CASE_32_64(ext16s) /* Optional (TCG_TARGET_HAS_ext16s_*). */
CASE_32_64(ext16u) /* Optional (TCG_TARGET_HAS_ext16u_*). */
CASE_64(ext32s) /* Optional (TCG_TARGET_HAS_ext32s_i64). */
CASE_64(ext32u) /* Optional (TCG_TARGET_HAS_ext32u_i64). */
CASE_64(ext_i32)
CASE_64(extu_i32)
CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */
CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */
CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
break;
case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out_r(s, args[2]);
break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_add2_i32:
case INDEX_op_sub2_i32:
@ -584,31 +545,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_r(s, args[3]);
break;
#endif
case INDEX_op_brcond_i32:
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
tcg_out8(s, args[2]); /* condition */
tci_out_label(s, arg_label(args[3]));
break;
case INDEX_op_qemu_ld_i32:
tcg_out_r(s, *args++);
tcg_out_r(s, *args++);
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
tcg_out_r(s, *args++);
}
tcg_out_i(s, *args++);
break;
case INDEX_op_qemu_ld_i64:
tcg_out_r(s, *args++);
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_r(s, *args++);
}
tcg_out_r(s, *args++);
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
tcg_out_r(s, *args++);
}
tcg_out_i(s, *args++);
break;
case INDEX_op_qemu_st_i32:
tcg_out_r(s, *args++);
tcg_out_r(s, *args++);
@ -617,6 +555,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
tcg_out_i(s, *args++);
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
tcg_out_r(s, *args++);
if (TCG_TARGET_REG_BITS == 32) {
@ -628,8 +568,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
tcg_out_i(s, *args++);
break;
case INDEX_op_mb:
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
case INDEX_op_call: /* Always emitted via tcg_out_call. */