accel/tcg: allow plugin instrumentation to be disable via cflags

When icount is enabled and we recompile an MMIO access we end up
double counting the instruction execution. To avoid this we introduce
the CF_MEMI cflag which only allows memory instrumentation for the
next TB (which won't yet have been counted). As this is part of the
hashed compile flags we will only execute the generated TB while
coming out of a cpu_io_recompile.

While we are at it delete the old TODO. We might as well keep the
translation handy as it's likely you will repeatedly hit it on each
MMIO access.

Reported-by: Aaron Lindsay <aaron@os.amperecomputing.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Aaron Lindsay <aaron@os.amperecomputing.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20210213130325.14781-21-alex.bennee@linaro.org>
This commit is contained in:
Alex Bennée 2021-02-13 13:03:22 +00:00
parent c4afb3456c
commit cfd405eae6
7 changed files with 49 additions and 27 deletions

View File

@ -842,7 +842,7 @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
pr_ops(); pr_ops();
} }
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only)
{ {
struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
bool ret = false; bool ret = false;
@ -855,6 +855,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
ptb->vaddr2 = -1; ptb->vaddr2 = -1;
get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1); get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
ptb->haddr2 = NULL; ptb->haddr2 = NULL;
ptb->mem_only = mem_only;
plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB); plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
} }

View File

@ -2400,7 +2400,8 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
} }
#ifndef CONFIG_USER_ONLY #ifndef CONFIG_USER_ONLY
/* in deterministic execution mode, instructions doing device I/Os /*
* In deterministic execution mode, instructions doing device I/Os
* must be at the end of the TB. * must be at the end of the TB.
* *
* Called by softmmu_template.h, with iothread mutex not held. * Called by softmmu_template.h, with iothread mutex not held.
@ -2431,19 +2432,18 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
n = 2; n = 2;
} }
/* Generate a new TB executing the I/O insn. */ /*
cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n; * Exit the loop and potentially generate a new TB executing the
* just the I/O insns. We also limit instrumentation to memory
* operations only (which execute after completion) so we don't
* double instrument the instruction.
*/
cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"cpu_io_recompile: rewound execution of TB to " "cpu_io_recompile: rewound execution of TB to "
TARGET_FMT_lx "\n", tb->pc); TARGET_FMT_lx "\n", tb->pc);
/* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
* the first in the TB) then we end up generating a whole new TB and
* repeating the fault, which is horribly inefficient.
* Better would be to execute just this insn uncached, or generate a
* second new TB.
*/
cpu_loop_exit_noexc(cpu); cpu_loop_exit_noexc(cpu);
} }

View File

@ -58,7 +58,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
ops->tb_start(db, cpu); ops->tb_start(db, cpu);
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
plugin_enabled = plugin_gen_tb_start(cpu, tb); plugin_enabled = plugin_gen_tb_start(cpu, tb,
tb_cflags(db->tb) & CF_MEMI_ONLY);
while (true) { while (true) {
db->num_insns++; db->num_insns++;
@ -100,6 +101,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
gen_io_start(); gen_io_start();
ops->translate_insn(db, cpu); ops->translate_insn(db, cpu);
} else { } else {
/* we should only see CF_MEMI_ONLY for io_recompile */
tcg_debug_assert(!(tb_cflags(db->tb) & CF_MEMI_ONLY));
ops->translate_insn(db, cpu); ops->translate_insn(db, cpu);
} }

View File

@ -454,14 +454,14 @@ struct TranslationBlock {
uint32_t cflags; /* compile flags */ uint32_t cflags; /* compile flags */
#define CF_COUNT_MASK 0x00007fff #define CF_COUNT_MASK 0x00007fff
#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ #define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
#define CF_MEMI_ONLY 0x00010000 /* Only instrument memory ops */
#define CF_USE_ICOUNT 0x00020000 #define CF_USE_ICOUNT 0x00020000
#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ #define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24 #define CF_CLUSTER_SHIFT 24
/* cflags' mask for hashing/comparison */ /* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
#define CF_HASH_MASK \ #define CF_HASH_MASK (~CF_INVALID)
(CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL | CF_CLUSTER_MASK)
/* Per-vCPU dynamic tracing state used to generate this TB */ /* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate; uint32_t trace_vcpu_dstate;

View File

@ -19,7 +19,7 @@ struct DisasContextBase;
#ifdef CONFIG_PLUGIN #ifdef CONFIG_PLUGIN
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb); bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
void plugin_gen_tb_end(CPUState *cpu); void plugin_gen_tb_end(CPUState *cpu);
void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db); void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
void plugin_gen_insn_end(void); void plugin_gen_insn_end(void);
@ -41,7 +41,7 @@ static inline void plugin_insn_append(const void *from, size_t size)
#else /* !CONFIG_PLUGIN */ #else /* !CONFIG_PLUGIN */
static inline static inline
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
{ {
return false; return false;
} }

View File

@ -92,6 +92,7 @@ struct qemu_plugin_dyn_cb {
}; };
}; };
/* Internal context for instrumenting an instruction */
struct qemu_plugin_insn { struct qemu_plugin_insn {
GByteArray *data; GByteArray *data;
uint64_t vaddr; uint64_t vaddr;
@ -99,6 +100,7 @@ struct qemu_plugin_insn {
GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES]; GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES];
bool calls_helpers; bool calls_helpers;
bool mem_helper; bool mem_helper;
bool mem_only;
}; };
/* /*
@ -128,6 +130,7 @@ static inline struct qemu_plugin_insn *qemu_plugin_insn_alloc(void)
return insn; return insn;
} }
/* Internal context for this TranslationBlock */
struct qemu_plugin_tb { struct qemu_plugin_tb {
GPtrArray *insns; GPtrArray *insns;
size_t n; size_t n;
@ -135,6 +138,7 @@ struct qemu_plugin_tb {
uint64_t vaddr2; uint64_t vaddr2;
void *haddr1; void *haddr1;
void *haddr2; void *haddr2;
bool mem_only;
GArray *cbs[PLUGIN_N_CB_SUBTYPES]; GArray *cbs[PLUGIN_N_CB_SUBTYPES];
}; };

View File

@ -84,15 +84,19 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct qemu_plugin_tb *tb,
enum qemu_plugin_cb_flags flags, enum qemu_plugin_cb_flags flags,
void *udata) void *udata)
{ {
plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR], if (!tb->mem_only) {
cb, flags, udata); plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
cb, flags, udata);
}
} }
void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb, void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
enum qemu_plugin_op op, enum qemu_plugin_op op,
void *ptr, uint64_t imm) void *ptr, uint64_t imm)
{ {
plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm); if (!tb->mem_only) {
plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
}
} }
void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn, void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
@ -100,20 +104,27 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
enum qemu_plugin_cb_flags flags, enum qemu_plugin_cb_flags flags,
void *udata) void *udata)
{ {
plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], if (!insn->mem_only) {
cb, flags, udata); plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
cb, flags, udata);
}
} }
void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn, void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
enum qemu_plugin_op op, enum qemu_plugin_op op,
void *ptr, uint64_t imm) void *ptr, uint64_t imm)
{ {
plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE], if (!insn->mem_only) {
0, op, ptr, imm); plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
0, op, ptr, imm);
}
} }
/*
* We always plant memory instrumentation because they don't finalise until
* after the operation has complete.
*/
void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn, void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
qemu_plugin_vcpu_mem_cb_t cb, qemu_plugin_vcpu_mem_cb_t cb,
enum qemu_plugin_cb_flags flags, enum qemu_plugin_cb_flags flags,
@ -121,7 +132,7 @@ void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
void *udata) void *udata)
{ {
plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR], plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR],
cb, flags, rw, udata); cb, flags, rw, udata);
} }
void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn, void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
@ -130,7 +141,7 @@ void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
uint64_t imm) uint64_t imm)
{ {
plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE], plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE],
rw, op, ptr, imm); rw, op, ptr, imm);
} }
void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id, void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
@ -181,10 +192,13 @@ uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
struct qemu_plugin_insn * struct qemu_plugin_insn *
qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx) qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx)
{ {
struct qemu_plugin_insn *insn;
if (unlikely(idx >= tb->n)) { if (unlikely(idx >= tb->n)) {
return NULL; return NULL;
} }
return g_ptr_array_index(tb->insns, idx); insn = g_ptr_array_index(tb->insns, idx);
insn->mem_only = tb->mem_only;
return insn;
} }
/* /*