Queued TCG patches
-----BEGIN PGP SIGNATURE----- iQEcBAABAgAGBQJZSBP2AAoJEK0ScMxN0CebnyMH/1ZiDhYiqCD7PYfk4/Y7Db+h MNKNozrWKyChWQp1RzwWqcBaIzbuMZkDYn8dfS419PNtFRNoYtHjhYvjSTfcrxS0 U8dGOoqQUHCr/jlyIDUE4y5+aFA9R/1Ih5IQv+QCi5QNXcfeST8zcYF+ImuikP6C 7heIc7dE9kXdA8ycWJ39kYErHK9qEJbvDx6dxMPmb4cM36U239Zb9so985TXULlQ LoHrDpOCBzCbsICBE8iP2RKDvcwENIx21Dwv+9gW/NqR+nRdKcxhTjKEodkS8gl/ UxMxM/TjIPQOLLUhdck5DFgIgBgQWHRqPMJKqt466I0JlXvSpifmWxckWzslXLc= =R+em -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170619' into staging Queued TCG patches # gpg: Signature made Mon 19 Jun 2017 19:12:06 BST # gpg: using RSA key 0xAD1270CC4DD0279B # gpg: Good signature from "Richard Henderson <rth7680@gmail.com>" # gpg: aka "Richard Henderson <rth@redhat.com>" # gpg: aka "Richard Henderson <rth@twiddle.net>" # Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC 16A4 AD12 70CC 4DD0 279B * remotes/rth/tags/pull-tcg-20170619: target/arm: Exit after clearing aarch64 interrupt mask target/s390x: Exit after changing PSW mask target/alpha: Use tcg_gen_lookup_and_goto_ptr tcg: Increase hit rate of lookup_tb_ptr tcg/arm: Use ldr (literal) for goto_tb tcg/arm: Try pc-relative addresses for movi tcg/arm: Remove limit on code buffer size tcg/arm: Use indirect branch for goto_tb tcg/aarch64: Use ADR in tcg_out_movi translate-all: consolidate tb init in tb_gen_code tcg: allocate TB structs before the corresponding translated code util: add cacheinfo Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
db7a99cdc1
@ -523,8 +523,6 @@ static inline PageDesc *page_find(tb_page_addr_t index)
|
|||||||
# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
|
# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024)
|
# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024)
|
||||||
#elif defined(__arm__)
|
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
|
|
||||||
#elif defined(__s390x__)
|
#elif defined(__s390x__)
|
||||||
/* We have a +- 4GB range on the branches; leave some slop. */
|
/* We have a +- 4GB range on the branches; leave some slop. */
|
||||||
# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
|
# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
|
||||||
@ -781,12 +779,13 @@ static inline void code_gen_alloc(size_t tb_size)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Estimate a good size for the number of TBs we can support. We
|
/* size this conservatively -- realloc later if needed */
|
||||||
still haven't deducted the prologue from the buffer size here,
|
tcg_ctx.tb_ctx.tbs_size =
|
||||||
but that's minimal and won't affect the estimate much. */
|
tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8;
|
||||||
tcg_ctx.code_gen_max_blocks
|
if (unlikely(!tcg_ctx.tb_ctx.tbs_size)) {
|
||||||
= tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
|
tcg_ctx.tb_ctx.tbs_size = 64 * 1024;
|
||||||
tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
|
}
|
||||||
|
tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx.tb_ctx.tbs_size);
|
||||||
|
|
||||||
qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
|
qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
|
||||||
}
|
}
|
||||||
@ -828,16 +827,20 @@ bool tcg_enabled(void)
|
|||||||
static TranslationBlock *tb_alloc(target_ulong pc)
|
static TranslationBlock *tb_alloc(target_ulong pc)
|
||||||
{
|
{
|
||||||
TranslationBlock *tb;
|
TranslationBlock *tb;
|
||||||
|
TBContext *ctx;
|
||||||
|
|
||||||
assert_tb_locked();
|
assert_tb_locked();
|
||||||
|
|
||||||
if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
|
tb = tcg_tb_alloc(&tcg_ctx);
|
||||||
|
if (unlikely(tb == NULL)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
|
ctx = &tcg_ctx.tb_ctx;
|
||||||
tb->pc = pc;
|
if (unlikely(ctx->nb_tbs == ctx->tbs_size)) {
|
||||||
tb->cflags = 0;
|
ctx->tbs_size *= 2;
|
||||||
tb->invalid = false;
|
ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size);
|
||||||
|
}
|
||||||
|
ctx->tbs[ctx->nb_tbs++] = tb;
|
||||||
return tb;
|
return tb;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -850,8 +853,10 @@ void tb_free(TranslationBlock *tb)
|
|||||||
Ignore the hard cases and just back up if this TB happens to
|
Ignore the hard cases and just back up if this TB happens to
|
||||||
be the last one generated. */
|
be the last one generated. */
|
||||||
if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
|
if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
|
||||||
tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
|
tb == tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
|
||||||
tcg_ctx.code_gen_ptr = tb->tc_ptr;
|
size_t struct_size = ROUND_UP(sizeof(*tb), qemu_icache_linesize);
|
||||||
|
|
||||||
|
tcg_ctx.code_gen_ptr = tb->tc_ptr - struct_size;
|
||||||
tcg_ctx.tb_ctx.nb_tbs--;
|
tcg_ctx.tb_ctx.nb_tbs--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1279,9 +1284,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||||||
|
|
||||||
gen_code_buf = tcg_ctx.code_gen_ptr;
|
gen_code_buf = tcg_ctx.code_gen_ptr;
|
||||||
tb->tc_ptr = gen_code_buf;
|
tb->tc_ptr = gen_code_buf;
|
||||||
|
tb->pc = pc;
|
||||||
tb->cs_base = cs_base;
|
tb->cs_base = cs_base;
|
||||||
tb->flags = flags;
|
tb->flags = flags;
|
||||||
tb->cflags = cflags;
|
tb->cflags = cflags;
|
||||||
|
tb->invalid = false;
|
||||||
|
|
||||||
#ifdef CONFIG_PROFILER
|
#ifdef CONFIG_PROFILER
|
||||||
tcg_ctx.tb_count1++; /* includes aborted translations because of
|
tcg_ctx.tb_count1++; /* includes aborted translations because of
|
||||||
@ -1666,7 +1673,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
|
|||||||
m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
|
m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
|
||||||
while (m_min <= m_max) {
|
while (m_min <= m_max) {
|
||||||
m = (m_min + m_max) >> 1;
|
m = (m_min + m_max) >> 1;
|
||||||
tb = &tcg_ctx.tb_ctx.tbs[m];
|
tb = tcg_ctx.tb_ctx.tbs[m];
|
||||||
v = (uintptr_t)tb->tc_ptr;
|
v = (uintptr_t)tb->tc_ptr;
|
||||||
if (v == tc_ptr) {
|
if (v == tc_ptr) {
|
||||||
return tb;
|
return tb;
|
||||||
@ -1676,7 +1683,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
|
|||||||
m_min = m + 1;
|
m_min = m + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &tcg_ctx.tb_ctx.tbs[m_max];
|
return tcg_ctx.tb_ctx.tbs[m_max];
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(CONFIG_USER_ONLY)
|
#if !defined(CONFIG_USER_ONLY)
|
||||||
@ -1874,7 +1881,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
|
|||||||
direct_jmp_count = 0;
|
direct_jmp_count = 0;
|
||||||
direct_jmp2_count = 0;
|
direct_jmp2_count = 0;
|
||||||
for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
|
for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
|
||||||
tb = &tcg_ctx.tb_ctx.tbs[i];
|
tb = tcg_ctx.tb_ctx.tbs[i];
|
||||||
target_code_size += tb->size;
|
target_code_size += tb->size;
|
||||||
if (tb->size > max_target_code_size) {
|
if (tb->size > max_target_code_size) {
|
||||||
max_target_code_size = tb->size;
|
max_target_code_size = tb->size;
|
||||||
@ -1894,8 +1901,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
|
|||||||
cpu_fprintf(f, "gen code size %td/%zd\n",
|
cpu_fprintf(f, "gen code size %td/%zd\n",
|
||||||
tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
|
tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
|
||||||
tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
|
tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
|
||||||
cpu_fprintf(f, "TB count %d/%d\n",
|
cpu_fprintf(f, "TB count %d\n", tcg_ctx.tb_ctx.nb_tbs);
|
||||||
tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
|
|
||||||
cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
|
cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
|
||||||
tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
|
tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
|
||||||
tcg_ctx.tb_ctx.nb_tbs : 0,
|
tcg_ctx.tb_ctx.nb_tbs : 0,
|
||||||
|
@ -301,7 +301,7 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
|
|||||||
#define CODE_GEN_AVG_BLOCK_SIZE 150
|
#define CODE_GEN_AVG_BLOCK_SIZE 150
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__arm__) || defined(_ARCH_PPC) \
|
#if defined(_ARCH_PPC) \
|
||||||
|| defined(__x86_64__) || defined(__i386__) \
|
|| defined(__x86_64__) || defined(__i386__) \
|
||||||
|| defined(__sparc__) || defined(__aarch64__) \
|
|| defined(__sparc__) || defined(__aarch64__) \
|
||||||
|| defined(__s390x__) || defined(__mips__) \
|
|| defined(__s390x__) || defined(__mips__) \
|
||||||
@ -401,9 +401,6 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
|
|||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
|
void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
|
||||||
#define tb_set_jmp_target1 aarch64_tb_set_jmp_target
|
#define tb_set_jmp_target1 aarch64_tb_set_jmp_target
|
||||||
#elif defined(__arm__)
|
|
||||||
void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
|
|
||||||
#define tb_set_jmp_target1 arm_tb_set_jmp_target
|
|
||||||
#elif defined(__sparc__) || defined(__mips__)
|
#elif defined(__sparc__) || defined(__mips__)
|
||||||
void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
|
void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
|
||||||
#else
|
#else
|
||||||
|
@ -31,8 +31,9 @@ typedef struct TBContext TBContext;
|
|||||||
|
|
||||||
struct TBContext {
|
struct TBContext {
|
||||||
|
|
||||||
TranslationBlock *tbs;
|
TranslationBlock **tbs;
|
||||||
struct qht htable;
|
struct qht htable;
|
||||||
|
size_t tbs_size;
|
||||||
int nb_tbs;
|
int nb_tbs;
|
||||||
/* any access to the tbs or the page table must use this lock */
|
/* any access to the tbs or the page table must use this lock */
|
||||||
QemuMutex tb_lock;
|
QemuMutex tb_lock;
|
||||||
|
@ -483,4 +483,7 @@ char *qemu_get_pid_name(pid_t pid);
|
|||||||
*/
|
*/
|
||||||
pid_t qemu_fork(Error **errp);
|
pid_t qemu_fork(Error **errp);
|
||||||
|
|
||||||
|
extern int qemu_icache_linesize;
|
||||||
|
extern int qemu_dcache_linesize;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -84,6 +84,7 @@ typedef enum {
|
|||||||
the PC (for whatever reason), so there's no need to do it again on
|
the PC (for whatever reason), so there's no need to do it again on
|
||||||
exiting the TB. */
|
exiting the TB. */
|
||||||
EXIT_PC_UPDATED,
|
EXIT_PC_UPDATED,
|
||||||
|
EXIT_PC_UPDATED_NOCHAIN,
|
||||||
|
|
||||||
/* We are exiting the TB, but have neither emitted a goto_tb, nor
|
/* We are exiting the TB, but have neither emitted a goto_tb, nor
|
||||||
updated the PC for the next instruction to be executed. */
|
updated the PC for the next instruction to be executed. */
|
||||||
@ -458,11 +459,17 @@ static bool in_superpage(DisasContext *ctx, int64_t addr)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool use_exit_tb(DisasContext *ctx)
|
||||||
|
{
|
||||||
|
return ((ctx->tb->cflags & CF_LAST_IO)
|
||||||
|
|| ctx->singlestep_enabled
|
||||||
|
|| singlestep);
|
||||||
|
}
|
||||||
|
|
||||||
static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
|
static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
|
||||||
{
|
{
|
||||||
/* Suppress goto_tb in the case of single-steping and IO. */
|
/* Suppress goto_tb in the case of single-steping and IO. */
|
||||||
if ((ctx->tb->cflags & CF_LAST_IO)
|
if (unlikely(use_exit_tb(ctx))) {
|
||||||
|| ctx->singlestep_enabled || singlestep) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#ifndef CONFIG_USER_ONLY
|
#ifndef CONFIG_USER_ONLY
|
||||||
@ -1198,7 +1205,10 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
|
|||||||
tcg_gen_andi_i64(tmp, ctx->ir[IR_A0], PS_INT_MASK);
|
tcg_gen_andi_i64(tmp, ctx->ir[IR_A0], PS_INT_MASK);
|
||||||
tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, ps));
|
tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, ps));
|
||||||
tcg_temp_free(tmp);
|
tcg_temp_free(tmp);
|
||||||
break;
|
|
||||||
|
/* Allow interrupts to be recognized right away. */
|
||||||
|
tcg_gen_movi_i64(cpu_pc, ctx->pc);
|
||||||
|
return EXIT_PC_UPDATED_NOCHAIN;
|
||||||
|
|
||||||
case 0x36:
|
case 0x36:
|
||||||
/* RDPS */
|
/* RDPS */
|
||||||
@ -1266,7 +1276,7 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
|
|||||||
need the page permissions check. We'll see the existence of
|
need the page permissions check. We'll see the existence of
|
||||||
the page when we create the TB, and we'll flush all TBs if
|
the page when we create the TB, and we'll flush all TBs if
|
||||||
we change the PAL base register. */
|
we change the PAL base register. */
|
||||||
if (!ctx->singlestep_enabled && !(ctx->tb->cflags & CF_LAST_IO)) {
|
if (!use_exit_tb(ctx)) {
|
||||||
tcg_gen_goto_tb(0);
|
tcg_gen_goto_tb(0);
|
||||||
tcg_gen_movi_i64(cpu_pc, entry);
|
tcg_gen_movi_i64(cpu_pc, entry);
|
||||||
tcg_gen_exit_tb((uintptr_t)ctx->tb);
|
tcg_gen_exit_tb((uintptr_t)ctx->tb);
|
||||||
@ -2686,7 +2696,8 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
|
|||||||
tcg_gen_andi_i64(tmp, vb, 1);
|
tcg_gen_andi_i64(tmp, vb, 1);
|
||||||
tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, pal_mode));
|
tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, pal_mode));
|
||||||
tcg_gen_andi_i64(cpu_pc, vb, ~3);
|
tcg_gen_andi_i64(cpu_pc, vb, ~3);
|
||||||
ret = EXIT_PC_UPDATED;
|
/* Allow interrupts to be recognized right away. */
|
||||||
|
ret = EXIT_PC_UPDATED_NOCHAIN;
|
||||||
break;
|
break;
|
||||||
#else
|
#else
|
||||||
goto invalid_opc;
|
goto invalid_opc;
|
||||||
@ -3010,6 +3021,12 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
|
|||||||
tcg_gen_movi_i64(cpu_pc, ctx.pc);
|
tcg_gen_movi_i64(cpu_pc, ctx.pc);
|
||||||
/* FALLTHRU */
|
/* FALLTHRU */
|
||||||
case EXIT_PC_UPDATED:
|
case EXIT_PC_UPDATED:
|
||||||
|
if (!use_exit_tb(&ctx)) {
|
||||||
|
tcg_gen_lookup_and_goto_ptr(cpu_pc);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* FALLTHRU */
|
||||||
|
case EXIT_PC_UPDATED_NOCHAIN:
|
||||||
if (ctx.singlestep_enabled) {
|
if (ctx.singlestep_enabled) {
|
||||||
gen_excp_1(EXCP_DEBUG, 0);
|
gen_excp_1(EXCP_DEBUG, 0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1422,7 +1422,9 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
|
|||||||
gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
|
gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
|
||||||
tcg_temp_free_i32(tcg_imm);
|
tcg_temp_free_i32(tcg_imm);
|
||||||
tcg_temp_free_i32(tcg_op);
|
tcg_temp_free_i32(tcg_op);
|
||||||
s->is_jmp = DISAS_UPDATE;
|
/* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
|
||||||
|
gen_a64_set_pc_im(s->pc);
|
||||||
|
s->is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -11369,6 +11371,9 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
|
|||||||
case DISAS_JUMP:
|
case DISAS_JUMP:
|
||||||
tcg_gen_lookup_and_goto_ptr(cpu_pc);
|
tcg_gen_lookup_and_goto_ptr(cpu_pc);
|
||||||
break;
|
break;
|
||||||
|
case DISAS_EXIT:
|
||||||
|
tcg_gen_exit_tb(0);
|
||||||
|
break;
|
||||||
case DISAS_TB_JUMP:
|
case DISAS_TB_JUMP:
|
||||||
case DISAS_EXC:
|
case DISAS_EXC:
|
||||||
case DISAS_SWI:
|
case DISAS_SWI:
|
||||||
|
@ -1173,6 +1173,8 @@ typedef enum {
|
|||||||
/* We are exiting the TB, but have neither emitted a goto_tb, nor
|
/* We are exiting the TB, but have neither emitted a goto_tb, nor
|
||||||
updated the PC for the next instruction to be executed. */
|
updated the PC for the next instruction to be executed. */
|
||||||
EXIT_PC_STALE,
|
EXIT_PC_STALE,
|
||||||
|
/* We are exiting the TB to the main loop. */
|
||||||
|
EXIT_PC_STALE_NOCHAIN,
|
||||||
/* We are ending the TB with a noreturn function call, e.g. longjmp.
|
/* We are ending the TB with a noreturn function call, e.g. longjmp.
|
||||||
No following code will be executed. */
|
No following code will be executed. */
|
||||||
EXIT_NORETURN,
|
EXIT_NORETURN,
|
||||||
@ -3795,7 +3797,8 @@ static ExitStatus op_ssm(DisasContext *s, DisasOps *o)
|
|||||||
{
|
{
|
||||||
check_privileged(s);
|
check_privileged(s);
|
||||||
tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8);
|
tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8);
|
||||||
return NO_EXIT;
|
/* Exit to main loop to reevaluate s390_cpu_exec_interrupt. */
|
||||||
|
return EXIT_PC_STALE_NOCHAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ExitStatus op_stap(DisasContext *s, DisasOps *o)
|
static ExitStatus op_stap(DisasContext *s, DisasOps *o)
|
||||||
@ -4038,7 +4041,9 @@ static ExitStatus op_stnosm(DisasContext *s, DisasOps *o)
|
|||||||
} else {
|
} else {
|
||||||
tcg_gen_ori_i64(psw_mask, psw_mask, i2 << 56);
|
tcg_gen_ori_i64(psw_mask, psw_mask, i2 << 56);
|
||||||
}
|
}
|
||||||
return NO_EXIT;
|
|
||||||
|
/* Exit to main loop to reevaluate s390_cpu_exec_interrupt. */
|
||||||
|
return EXIT_PC_STALE_NOCHAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ExitStatus op_stura(DisasContext *s, DisasOps *o)
|
static ExitStatus op_stura(DisasContext *s, DisasOps *o)
|
||||||
@ -5788,6 +5793,7 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
|
|||||||
case EXIT_NORETURN:
|
case EXIT_NORETURN:
|
||||||
break;
|
break;
|
||||||
case EXIT_PC_STALE:
|
case EXIT_PC_STALE:
|
||||||
|
case EXIT_PC_STALE_NOCHAIN:
|
||||||
update_psw_addr(&dc);
|
update_psw_addr(&dc);
|
||||||
/* FALLTHRU */
|
/* FALLTHRU */
|
||||||
case EXIT_PC_UPDATED:
|
case EXIT_PC_UPDATED:
|
||||||
@ -5799,14 +5805,14 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
|
|||||||
/* Exit the TB, either by raising a debug exception or by return. */
|
/* Exit the TB, either by raising a debug exception or by return. */
|
||||||
if (do_debug) {
|
if (do_debug) {
|
||||||
gen_exception(EXCP_DEBUG);
|
gen_exception(EXCP_DEBUG);
|
||||||
} else if (use_exit_tb(&dc)) {
|
} else if (use_exit_tb(&dc) || status == EXIT_PC_STALE_NOCHAIN) {
|
||||||
tcg_gen_exit_tb(0);
|
tcg_gen_exit_tb(0);
|
||||||
} else {
|
} else {
|
||||||
tcg_gen_lookup_and_goto_ptr(psw_addr);
|
tcg_gen_lookup_and_goto_ptr(psw_addr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
abort();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
|
||||||
gen_tb_end(tb, num_insns);
|
gen_tb_end(tb, num_insns);
|
||||||
|
@ -616,7 +616,12 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
|
|||||||
/* Look for host pointer values within 4G of the PC. This happens
|
/* Look for host pointer values within 4G of the PC. This happens
|
||||||
often when loading pointers to QEMU's own data structures. */
|
often when loading pointers to QEMU's own data structures. */
|
||||||
if (type == TCG_TYPE_I64) {
|
if (type == TCG_TYPE_I64) {
|
||||||
tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
|
tcg_target_long disp = value - (intptr_t)s->code_ptr;
|
||||||
|
if (disp == sextract64(disp, 0, 21)) {
|
||||||
|
tcg_out_insn(s, 3406, ADR, rd, disp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
|
||||||
if (disp == sextract64(disp, 0, 21)) {
|
if (disp == sextract64(disp, 0, 21)) {
|
||||||
tcg_out_insn(s, 3406, ADRP, rd, disp);
|
tcg_out_insn(s, 3406, ADRP, rd, disp);
|
||||||
if (value & 0xfff) {
|
if (value & 0xfff) {
|
||||||
|
@ -418,23 +418,37 @@ static inline void tcg_out_dat_imm(TCGContext *s,
|
|||||||
|
|
||||||
static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
|
static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
|
||||||
{
|
{
|
||||||
int rot, opc, rn;
|
int rot, opc, rn, diff;
|
||||||
|
|
||||||
/* For armv7, make sure not to use movw+movt when mov/mvn would do.
|
/* Check a single MOV/MVN before anything else. */
|
||||||
Speed things up by only checking when movt would be required.
|
rot = encode_imm(arg);
|
||||||
Prior to armv7, have one go at fully rotated immediates before
|
if (rot >= 0) {
|
||||||
doing the decomposition thing below. */
|
tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
|
||||||
if (!use_armv7_instructions || (arg & 0xffff0000)) {
|
rotl(arg, rot) | (rot << 7));
|
||||||
rot = encode_imm(arg);
|
return;
|
||||||
|
}
|
||||||
|
rot = encode_imm(~arg);
|
||||||
|
if (rot >= 0) {
|
||||||
|
tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
|
||||||
|
rotl(~arg, rot) | (rot << 7));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for a pc-relative address. This will usually be the TB,
|
||||||
|
or within the TB, which is immediately before the code block. */
|
||||||
|
diff = arg - ((intptr_t)s->code_ptr + 8);
|
||||||
|
if (diff >= 0) {
|
||||||
|
rot = encode_imm(diff);
|
||||||
if (rot >= 0) {
|
if (rot >= 0) {
|
||||||
tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
|
tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
|
||||||
rotl(arg, rot) | (rot << 7));
|
rotl(diff, rot) | (rot << 7));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
rot = encode_imm(~arg);
|
} else {
|
||||||
|
rot = encode_imm(-diff);
|
||||||
if (rot >= 0) {
|
if (rot >= 0) {
|
||||||
tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
|
tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
|
||||||
rotl(~arg, rot) | (rot << 7));
|
rotl(-diff, rot) | (rot << 7));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1026,16 +1040,6 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
|
|
||||||
{
|
|
||||||
tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
|
|
||||||
tcg_insn_unit *target = (tcg_insn_unit *)addr;
|
|
||||||
|
|
||||||
/* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
|
|
||||||
reloc_pc24_atomic(code_ptr, target);
|
|
||||||
flush_icache_range(jmp_addr, jmp_addr + 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
|
static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
|
||||||
{
|
{
|
||||||
if (l->has_value) {
|
if (l->has_value) {
|
||||||
@ -1665,17 +1669,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case INDEX_op_goto_tb:
|
case INDEX_op_goto_tb:
|
||||||
if (s->tb_jmp_insn_offset) {
|
{
|
||||||
/* Direct jump method */
|
|
||||||
s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
|
|
||||||
tcg_out_b_noaddr(s, COND_AL);
|
|
||||||
} else {
|
|
||||||
/* Indirect jump method */
|
/* Indirect jump method */
|
||||||
intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
|
intptr_t ptr, dif, dil;
|
||||||
tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
|
TCGReg base = TCG_REG_PC;
|
||||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
|
|
||||||
|
tcg_debug_assert(s->tb_jmp_insn_offset == 0);
|
||||||
|
ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
|
||||||
|
dif = ptr - ((intptr_t)s->code_ptr + 8);
|
||||||
|
dil = sextract32(dif, 0, 12);
|
||||||
|
if (dif != dil) {
|
||||||
|
/* The TB is close, but outside the 12 bits addressable by
|
||||||
|
the load. We can extend this to 20 bits with a sub of a
|
||||||
|
shifted immediate from pc. In the vastly unlikely event
|
||||||
|
the code requires more than 1MB, we'll use 2 insns and
|
||||||
|
be no worse off. */
|
||||||
|
base = TCG_REG_R0;
|
||||||
|
tcg_out_movi32(s, COND_AL, base, ptr - dil);
|
||||||
|
}
|
||||||
|
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
|
||||||
|
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
|
||||||
}
|
}
|
||||||
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
|
|
||||||
break;
|
break;
|
||||||
case INDEX_op_goto_ptr:
|
case INDEX_op_goto_ptr:
|
||||||
tcg_out_bx(s, COND_AL, args[0]);
|
tcg_out_bx(s, COND_AL, args[0]);
|
||||||
|
@ -2820,14 +2820,11 @@ void tcg_register_jit(void *buf, size_t buf_size)
|
|||||||
}
|
}
|
||||||
#endif /* __ELF__ */
|
#endif /* __ELF__ */
|
||||||
|
|
||||||
static size_t dcache_bsize = 16;
|
|
||||||
static size_t icache_bsize = 16;
|
|
||||||
|
|
||||||
void flush_icache_range(uintptr_t start, uintptr_t stop)
|
void flush_icache_range(uintptr_t start, uintptr_t stop)
|
||||||
{
|
{
|
||||||
uintptr_t p, start1, stop1;
|
uintptr_t p, start1, stop1;
|
||||||
size_t dsize = dcache_bsize;
|
size_t dsize = qemu_dcache_linesize;
|
||||||
size_t isize = icache_bsize;
|
size_t isize = qemu_icache_linesize;
|
||||||
|
|
||||||
start1 = start & ~(dsize - 1);
|
start1 = start & ~(dsize - 1);
|
||||||
stop1 = (stop + dsize - 1) & ~(dsize - 1);
|
stop1 = (stop + dsize - 1) & ~(dsize - 1);
|
||||||
@ -2844,67 +2841,3 @@ void flush_icache_range(uintptr_t start, uintptr_t stop)
|
|||||||
asm volatile ("sync" : : : "memory");
|
asm volatile ("sync" : : : "memory");
|
||||||
asm volatile ("isync" : : : "memory");
|
asm volatile ("isync" : : : "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined _AIX
|
|
||||||
#include <sys/systemcfg.h>
|
|
||||||
|
|
||||||
static void __attribute__((constructor)) tcg_cache_init(void)
|
|
||||||
{
|
|
||||||
icache_bsize = _system_configuration.icache_line;
|
|
||||||
dcache_bsize = _system_configuration.dcache_line;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined __linux__
|
|
||||||
static void __attribute__((constructor)) tcg_cache_init(void)
|
|
||||||
{
|
|
||||||
unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE);
|
|
||||||
unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE);
|
|
||||||
|
|
||||||
if (dsize == 0 || isize == 0) {
|
|
||||||
if (dsize == 0) {
|
|
||||||
fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n");
|
|
||||||
}
|
|
||||||
if (isize == 0) {
|
|
||||||
fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n");
|
|
||||||
}
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
dcache_bsize = dsize;
|
|
||||||
icache_bsize = isize;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined __APPLE__
|
|
||||||
#include <sys/sysctl.h>
|
|
||||||
|
|
||||||
static void __attribute__((constructor)) tcg_cache_init(void)
|
|
||||||
{
|
|
||||||
size_t len;
|
|
||||||
unsigned cacheline;
|
|
||||||
int name[2] = { CTL_HW, HW_CACHELINE };
|
|
||||||
|
|
||||||
len = sizeof(cacheline);
|
|
||||||
if (sysctl(name, 2, &cacheline, &len, NULL, 0)) {
|
|
||||||
perror("sysctl CTL_HW HW_CACHELINE failed");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
dcache_bsize = cacheline;
|
|
||||||
icache_bsize = cacheline;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
|
||||||
#include <sys/sysctl.h>
|
|
||||||
|
|
||||||
static void __attribute__((constructor)) tcg_cache_init(void)
|
|
||||||
{
|
|
||||||
size_t len = 4;
|
|
||||||
unsigned cacheline;
|
|
||||||
|
|
||||||
if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) {
|
|
||||||
fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n",
|
|
||||||
strerror(errno));
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
dcache_bsize = cacheline;
|
|
||||||
icache_bsize = cacheline;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
@ -149,23 +149,23 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
|
|||||||
CPUState *cpu = ENV_GET_CPU(env);
|
CPUState *cpu = ENV_GET_CPU(env);
|
||||||
TranslationBlock *tb;
|
TranslationBlock *tb;
|
||||||
target_ulong cs_base, pc;
|
target_ulong cs_base, pc;
|
||||||
uint32_t flags;
|
uint32_t flags, addr_hash;
|
||||||
|
|
||||||
tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
|
addr_hash = tb_jmp_cache_hash_func(addr);
|
||||||
if (likely(tb)) {
|
tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
|
||||||
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
|
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
|
||||||
if (likely(tb->pc == addr && tb->cs_base == cs_base &&
|
|
||||||
tb->flags == flags)) {
|
if (unlikely(!(tb
|
||||||
goto found;
|
&& tb->pc == addr
|
||||||
}
|
&& tb->cs_base == cs_base
|
||||||
|
&& tb->flags == flags))) {
|
||||||
tb = tb_htable_lookup(cpu, addr, cs_base, flags);
|
tb = tb_htable_lookup(cpu, addr, cs_base, flags);
|
||||||
if (likely(tb)) {
|
if (!tb) {
|
||||||
atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
|
return tcg_ctx.code_gen_epilogue;
|
||||||
goto found;
|
|
||||||
}
|
}
|
||||||
|
atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
|
||||||
}
|
}
|
||||||
return tcg_ctx.code_gen_epilogue;
|
|
||||||
found:
|
|
||||||
qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
|
qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
|
||||||
"Chain %p [%d: " TARGET_FMT_lx "] %s\n",
|
"Chain %p [%d: " TARGET_FMT_lx "] %s\n",
|
||||||
tb->tc_ptr, cpu->cpu_index, addr,
|
tb->tc_ptr, cpu->cpu_index, addr,
|
||||||
|
20
tcg/tcg.c
20
tcg/tcg.c
@ -383,6 +383,26 @@ void tcg_context_init(TCGContext *s)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate TBs right before their corresponding translated code, making
|
||||||
|
* sure that TBs and code are on different cache lines.
|
||||||
|
*/
|
||||||
|
TranslationBlock *tcg_tb_alloc(TCGContext *s)
|
||||||
|
{
|
||||||
|
uintptr_t align = qemu_icache_linesize;
|
||||||
|
TranslationBlock *tb;
|
||||||
|
void *next;
|
||||||
|
|
||||||
|
tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
|
||||||
|
next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
|
||||||
|
|
||||||
|
if (unlikely(next > s->code_gen_highwater)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
s->code_gen_ptr = next;
|
||||||
|
return tb;
|
||||||
|
}
|
||||||
|
|
||||||
void tcg_prologue_init(TCGContext *s)
|
void tcg_prologue_init(TCGContext *s)
|
||||||
{
|
{
|
||||||
size_t prologue_size, total_size;
|
size_t prologue_size, total_size;
|
||||||
|
@ -697,7 +697,6 @@ struct TCGContext {
|
|||||||
here, because there's too much arithmetic throughout that relies
|
here, because there's too much arithmetic throughout that relies
|
||||||
on addition and subtraction working on bytes. Rely on the GCC
|
on addition and subtraction working on bytes. Rely on the GCC
|
||||||
extension that allows arithmetic on void*. */
|
extension that allows arithmetic on void*. */
|
||||||
int code_gen_max_blocks;
|
|
||||||
void *code_gen_prologue;
|
void *code_gen_prologue;
|
||||||
void *code_gen_epilogue;
|
void *code_gen_epilogue;
|
||||||
void *code_gen_buffer;
|
void *code_gen_buffer;
|
||||||
@ -756,6 +755,7 @@ static inline bool tcg_op_buf_full(void)
|
|||||||
/* tb_lock must be held for tcg_malloc_internal. */
|
/* tb_lock must be held for tcg_malloc_internal. */
|
||||||
void *tcg_malloc_internal(TCGContext *s, int size);
|
void *tcg_malloc_internal(TCGContext *s, int size);
|
||||||
void tcg_pool_reset(TCGContext *s);
|
void tcg_pool_reset(TCGContext *s);
|
||||||
|
TranslationBlock *tcg_tb_alloc(TCGContext *s);
|
||||||
|
|
||||||
void tb_lock(void);
|
void tb_lock(void);
|
||||||
void tb_unlock(void);
|
void tb_unlock(void);
|
||||||
|
@ -20,6 +20,7 @@ util-obj-y += host-utils.o
|
|||||||
util-obj-y += bitmap.o bitops.o hbitmap.o
|
util-obj-y += bitmap.o bitops.o hbitmap.o
|
||||||
util-obj-y += fifo8.o
|
util-obj-y += fifo8.o
|
||||||
util-obj-y += acl.o
|
util-obj-y += acl.o
|
||||||
|
util-obj-y += cacheinfo.o
|
||||||
util-obj-y += error.o qemu-error.o
|
util-obj-y += error.o qemu-error.o
|
||||||
util-obj-y += id.o
|
util-obj-y += id.o
|
||||||
util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
|
util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
|
||||||
|
185
util/cacheinfo.c
Normal file
185
util/cacheinfo.c
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
/*
|
||||||
|
* cacheinfo.c - helpers to query the host about its caches
|
||||||
|
*
|
||||||
|
* Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
|
||||||
|
* License: GNU GPL, version 2 or later.
|
||||||
|
* See the COPYING file in the top-level directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
|
||||||
|
int qemu_icache_linesize = 0;
|
||||||
|
int qemu_dcache_linesize = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Operating system specific detection mechanisms.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(_AIX)
|
||||||
|
# include <sys/systemcfg.h>
|
||||||
|
|
||||||
|
static void sys_cache_info(int *isize, int *dsize)
|
||||||
|
{
|
||||||
|
*isize = _system_configuration.icache_line;
|
||||||
|
*dsize = _system_configuration.dcache_line;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
|
||||||
|
static void sys_cache_info(int *isize, int *dsize)
|
||||||
|
{
|
||||||
|
SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
|
||||||
|
DWORD size = 0;
|
||||||
|
BOOL success;
|
||||||
|
size_t i, n;
|
||||||
|
|
||||||
|
/* Check for the required buffer size first. Note that if the zero
|
||||||
|
size we use for the probe results in success, then there is no
|
||||||
|
data available; fail in that case. */
|
||||||
|
success = GetLogicalProcessorInformation(0, &size);
|
||||||
|
if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
|
||||||
|
size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
|
||||||
|
buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n);
|
||||||
|
if (!GetLogicalProcessorInformation(buf, &size)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
if (buf[i].Relationship == RelationCache
|
||||||
|
&& buf[i].Cache.Level == 1) {
|
||||||
|
switch (buf[i].Cache.Type) {
|
||||||
|
case CacheUnified:
|
||||||
|
*isize = *dsize = buf[i].Cache.LineSize;
|
||||||
|
break;
|
||||||
|
case CacheInstruction:
|
||||||
|
*isize = buf[i].Cache.LineSize;
|
||||||
|
break;
|
||||||
|
case CacheData:
|
||||||
|
*dsize = buf[i].Cache.LineSize;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fail:
|
||||||
|
g_free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__APPLE__) \
|
||||||
|
|| defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
||||||
|
# include <sys/sysctl.h>
|
||||||
|
# if defined(__APPLE__)
|
||||||
|
# define SYSCTL_CACHELINE_NAME "hw.cachelinesize"
|
||||||
|
# else
|
||||||
|
# define SYSCTL_CACHELINE_NAME "machdep.cacheline_size"
|
||||||
|
# endif
|
||||||
|
|
||||||
|
static void sys_cache_info(int *isize, int *dsize)
|
||||||
|
{
|
||||||
|
/* There's only a single sysctl for both I/D cache line sizes. */
|
||||||
|
long size;
|
||||||
|
size_t len = sizeof(size);
|
||||||
|
if (!sysctlbyname(SYSCTL_CACHELINE_NAME, &size, &len, NULL, 0)) {
|
||||||
|
*isize = *dsize = size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
/* POSIX */
|
||||||
|
|
||||||
|
static void sys_cache_info(int *isize, int *dsize)
|
||||||
|
{
|
||||||
|
# ifdef _SC_LEVEL1_ICACHE_LINESIZE
|
||||||
|
*isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
|
||||||
|
# endif
|
||||||
|
# ifdef _SC_LEVEL1_DCACHE_LINESIZE
|
||||||
|
*dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
#endif /* sys_cache_info */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Architecture (+ OS) specific detection mechanisms.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
|
||||||
|
static void arch_cache_info(int *isize, int *dsize)
|
||||||
|
{
|
||||||
|
if (*isize == 0 || *dsize == 0) {
|
||||||
|
unsigned ctr;
|
||||||
|
|
||||||
|
/* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
|
||||||
|
but (at least under Linux) these are marked protected by the
|
||||||
|
kernel. However, CTR_EL0 contains the minimum linesize in the
|
||||||
|
entire hierarchy, and is used by userspace cache flushing. */
|
||||||
|
asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
|
||||||
|
if (*isize == 0) {
|
||||||
|
*isize = 4 << (ctr & 0xf);
|
||||||
|
}
|
||||||
|
if (*dsize == 0) {
|
||||||
|
*dsize = 4 << ((ctr >> 16) & 0xf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(_ARCH_PPC) && defined(__linux__)
|
||||||
|
|
||||||
|
static void arch_cache_info(int *isize, int *dsize)
|
||||||
|
{
|
||||||
|
if (*isize == 0) {
|
||||||
|
*isize = qemu_getauxval(AT_ICACHEBSIZE);
|
||||||
|
}
|
||||||
|
if (*dsize == 0) {
|
||||||
|
*dsize = qemu_getauxval(AT_DCACHEBSIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static void arch_cache_info(int *isize, int *dsize) { }
|
||||||
|
#endif /* arch_cache_info */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ... and if all else fails ...
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void fallback_cache_info(int *isize, int *dsize)
|
||||||
|
{
|
||||||
|
/* If we can only find one of the two, assume they're the same. */
|
||||||
|
if (*isize) {
|
||||||
|
if (*dsize) {
|
||||||
|
/* Success! */
|
||||||
|
} else {
|
||||||
|
*dsize = *isize;
|
||||||
|
}
|
||||||
|
} else if (*dsize) {
|
||||||
|
*isize = *dsize;
|
||||||
|
} else {
|
||||||
|
#if defined(_ARCH_PPC)
|
||||||
|
/* For PPC, we're going to use the icache size computed for
|
||||||
|
flush_icache_range. Which means that we must use the
|
||||||
|
architecture minimum. */
|
||||||
|
*isize = *dsize = 16;
|
||||||
|
#else
|
||||||
|
/* Otherwise, 64 bytes is not uncommon. */
|
||||||
|
*isize = *dsize = 64;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __attribute__((constructor)) init_cache_info(void)
|
||||||
|
{
|
||||||
|
int isize = 0, dsize = 0;
|
||||||
|
|
||||||
|
sys_cache_info(&isize, &dsize);
|
||||||
|
arch_cache_info(&isize, &dsize);
|
||||||
|
fallback_cache_info(&isize, &dsize);
|
||||||
|
|
||||||
|
qemu_icache_linesize = isize;
|
||||||
|
qemu_dcache_linesize = dsize;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user