Plugin updates:

- expose vdev name in PCI memory registration
   - new hwprofile plugin
   - bunch of style cleanups to contrib/plugins
   - fix call signature of inline instrumentation
   - re-factor the io_recompile code to push specialisation into hooks
   - add some acceptance tests for the plugins
   - clean-up and remove CF_NOCACHE handling from TCG
   - fix instrumentation of cpu_io_recompile sections
   - expand tests to check inline and cb count the same
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCgAdFiEEZoWumedRZ7yvyN81+9DbCVqeKkQFAmAuJFkACgkQ+9DbCVqe
 KkTIGQf/e5FhBWxTIO3Y+N9CppnZM2gmo1U4kEOuCoJxa079/Jy9j7l7ZeWicmTR
 hhatxPKcKzodHGCSTJ9m+g9T8ye/hBgDgsuM1MZMQNtdZ8enEhU+z9V6MVmbRf55
 s31HUksf7kiY+ahBVCjEE41f/JGLNiOBltmNQtfDap7LvcEXXLp+CLvYfiMmtPW3
 0aZfKFAbfaEj2x4WSv7tjFsnzFUTFYkwsgVmcuDbI6uD5sX864W0XPdEjTrB1pPO
 r3qJ26yZeDrJEyMykQodWbKK12NIuLAtOCezc05UwpXmSKOueBdyEehNtraIQT1k
 TnMW2Vs10jDWN0RdEY2II7KZbcwTDQ==
 =/I6l
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/stsquad/tags/pull-plugin-updates-180221-1' into staging

Plugin updates:

  - expose vdev name in PCI memory registration
  - new hwprofile plugin
  - bunch of style cleanups to contrib/plugins
  - fix call signature of inline instrumentation
  - re-factor the io_recompile code to push specialisation into hooks
  - add some acceptance tests for the plugins
  - clean-up and remove CF_NOCACHE handling from TCG
  - fix instrumentation of cpu_io_recompile sections
  - expand tests to check inline and cb count the same

# gpg: Signature made Thu 18 Feb 2021 08:24:57 GMT
# gpg:                using RSA key 6685AE99E75167BCAFC8DF35FBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>" [full]
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-plugin-updates-180221-1: (23 commits)
  tests/acceptance: add a memory callback check
  tests/plugin: allow memory plugin to do both inline and callbacks
  tests/acceptance: add a new tests to detect counting errors
  accel/tcg: allow plugin instrumentation to be disable via cflags
  accel/tcg: remove CF_NOCACHE and special cases
  accel/tcg: re-factor non-RAM execution code
  accel/tcg: cache single instruction TB on pending replay exception
  accel/tcg: actually cache our partial icount TB
  tests/acceptance: add a new set of tests to exercise plugins
  tests/plugin: expand insn test to detect duplicate instructions
  target/sh4: Create superh_io_recompile_replay_branch
  target/mips: Create mips_io_recompile_replay_branch
  accel/tcg: Create io_recompile_replay_branch hook
  exec: Move TranslationBlock typedef to qemu/typedefs.h
  accel/tcg/plugin-gen: fix the call signature for inline callbacks
  contrib: Open brace '{' following struct go on the same line
  contrib: space required after that ','
  contrib: Add spaces around operator
  contrib: Fix some code style problems, ERROR: "foo * bar" should be "foo *bar"
  contrib: Don't use '#' flag of printf format
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-02-18 13:27:03 +00:00
commit 91416a4254
36 changed files with 769 additions and 215 deletions

View File

@ -2903,6 +2903,7 @@ S: Maintained
F: docs/devel/tcg-plugins.rst
F: plugins/
F: tests/plugin/
F: tests/acceptance/tcg_plugins.py
F: contrib/plugins/
AArch64 TCG target

View File

@ -224,40 +224,6 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
return last_tb;
}
#ifndef CONFIG_USER_ONLY
/* Execute the code without caching the generated code. An interpreter
could be used if available. */
static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
TranslationBlock *orig_tb, bool ignore_icount)
{
TranslationBlock *tb;
uint32_t cflags = curr_cflags() | CF_NOCACHE;
int tb_exit;
if (ignore_icount) {
cflags &= ~CF_USE_ICOUNT;
}
/* Should never happen.
We only end up here when an existing TB is too long. */
cflags |= MIN(max_cycles, CF_COUNT_MASK);
mmap_lock();
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base,
orig_tb->flags, cflags);
tb->orig_tb = orig_tb;
mmap_unlock();
/* execute the generated code */
trace_exec_tb_nocache(tb, tb->pc);
cpu_tb_exec(cpu, tb, &tb_exit);
mmap_lock();
tb_phys_invalidate(tb, -1);
mmap_unlock();
tcg_tb_remove(tb);
}
#endif
static void cpu_exec_enter(CPUState *cpu)
{
@ -524,15 +490,12 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
#ifndef CONFIG_USER_ONLY
if (replay_has_exception()
&& cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
/* try to cause an exception pending in the log */
cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0, curr_cflags()), true);
/* Execute just one insn to trigger exception pending in the log */
cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1;
}
#endif
if (cpu->exception_index < 0) {
return false;
}
return false;
}
if (cpu->exception_index >= EXCP_INTERRUPT) {
/* exit request from the cpu execution loop */
*ret = cpu->exception_index;
@ -688,6 +651,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
/* Finally, check if we need to exit to the main loop. */
if (unlikely(qatomic_read(&cpu->exit_request))
|| (icount_enabled()
&& (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
&& cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
qatomic_set(&cpu->exit_request, 0);
if (cpu->exception_index == -1) {
@ -730,16 +694,17 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
/* Ensure global icount has gone forward */
icount_update(cpu);
/* Refill decrementer and continue execution. */
insns_left = MIN(0xffff, cpu->icount_budget);
insns_left = MIN(CF_COUNT_MASK, cpu->icount_budget);
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
if (!cpu->icount_extra && insns_left < tb->icount) {
/* Execute any remaining instructions, then let the main loop
* handle the next event.
*/
if (insns_left > 0) {
cpu_exec_nocache(cpu, insns_left, tb, false);
}
/*
* If the next tb has more instructions than we have left to
* execute we need to ensure we find/generate a TB with exactly
* insns_left instructions in it.
*/
if (!cpu->icount_extra && insns_left > 0 && insns_left < tb->icount) {
cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
}
#endif
}

View File

@ -320,22 +320,6 @@ static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
return op;
}
static TCGOp *copy_const_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
{
if (TCG_TARGET_REG_BITS == 32) {
/* 2x mov_i32 */
op = copy_op(begin_op, op, INDEX_op_mov_i32);
op->args[1] = tcgv_i32_arg(tcg_constant_i32(v));
op = copy_op(begin_op, op, INDEX_op_mov_i32);
op->args[1] = tcgv_i32_arg(tcg_constant_i32(v >> 32));
} else {
/* mov_i64 */
op = copy_op(begin_op, op, INDEX_op_mov_i64);
op->args[1] = tcgv_i64_arg(tcg_constant_i64(v));
}
return op;
}
static TCGOp *copy_extu_tl_i64(TCGOp **begin_op, TCGOp *op)
{
if (TARGET_LONG_BITS == 32) {
@ -374,14 +358,17 @@ static TCGOp *copy_st_i64(TCGOp **begin_op, TCGOp *op)
return op;
}
static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op)
static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
{
if (TCG_TARGET_REG_BITS == 32) {
/* all 32-bit backends must implement add2_i32 */
g_assert(TCG_TARGET_HAS_add2_i32);
op = copy_op(begin_op, op, INDEX_op_add2_i32);
op->args[4] = tcgv_i32_arg(tcg_constant_i32(v));
op->args[5] = tcgv_i32_arg(tcg_constant_i32(v >> 32));
} else {
op = copy_op(begin_op, op, INDEX_op_add_i64);
op->args[2] = tcgv_i64_arg(tcg_constant_i64(v));
}
return op;
}
@ -431,6 +418,12 @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func,
return op;
}
/*
* When we append/replace ops here we are sensitive to changing patterns of
* TCGOps generated by the tcg_gen_FOO calls when we generated the
* empty callbacks. This will assert very quickly in a debug build as
* we assert the ops we are replacing are the correct ones.
*/
static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb,
TCGOp *begin_op, TCGOp *op, int *cb_idx)
{
@ -462,11 +455,8 @@ static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb,
/* ld_i64 */
op = copy_ld_i64(&begin_op, op);
/* const_i64 */
op = copy_const_i64(&begin_op, op, cb->inline_insn.imm);
/* add_i64 */
op = copy_add_i64(&begin_op, op);
op = copy_add_i64(&begin_op, op, cb->inline_insn.imm);
/* st_i64 */
op = copy_st_i64(&begin_op, op);
@ -852,7 +842,7 @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
pr_ops();
}
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only)
{
struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
bool ret = false;
@ -865,6 +855,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
ptb->vaddr2 = -1;
get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
ptb->haddr2 = NULL;
ptb->mem_only = mem_only;
plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
}

View File

@ -60,6 +60,7 @@
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
#include "qapi/error.h"
#include "hw/core/tcg-cpu-ops.h"
#include "internal.h"
/* #define DEBUG_TB_INVALIDATE */
@ -409,12 +410,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
TranslationBlock *tb = tcg_tb_lookup(host_pc);
if (tb) {
cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
if (tb_cflags(tb) & CF_NOCACHE) {
/* one-shot translation, invalidate it immediately */
tb_phys_invalidate(tb, -1);
tcg_tb_remove(tb);
tb_destroy(tb);
}
return true;
}
}
@ -1633,8 +1628,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
tb->trace_vcpu_dstate);
if (!(tb->cflags & CF_NOCACHE) &&
!qht_remove(&tb_ctx.htable, tb, h)) {
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
}
@ -1778,7 +1772,8 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
#endif
}
/* add a new TB and link it to the physical page tables. phys_page2 is
/*
* Add a new TB and link it to the physical page tables. phys_page2 is
* (-1) to indicate that only one page contains the TB.
*
* Called with mmap_lock held for user-mode emulation.
@ -1794,20 +1789,11 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
{
PageDesc *p;
PageDesc *p2 = NULL;
void *existing_tb = NULL;
uint32_t h;
assert_memory_lock();
if (phys_pc == -1) {
/*
* If the TB is not associated with a physical RAM page then
* it must be a temporary one-insn TB, and we have nothing to do
* except fill in the page_addr[] fields.
*/
assert(tb->cflags & CF_NOCACHE);
tb->page_addr[0] = tb->page_addr[1] = -1;
return tb;
}
/*
* Add the TB to the page list, acquiring first the pages's locks.
* We keep the locks held until after inserting the TB in the hash table,
@ -1824,25 +1810,20 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb->page_addr[1] = -1;
}
if (!(tb->cflags & CF_NOCACHE)) {
void *existing_tb = NULL;
uint32_t h;
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
tb->trace_vcpu_dstate);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
tb->trace_vcpu_dstate);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
/* remove TB from the page(s) if we couldn't insert it */
if (unlikely(existing_tb)) {
tb_page_remove(p, tb);
invalidate_page_bitmap(p);
if (p2) {
tb_page_remove(p2, tb);
invalidate_page_bitmap(p2);
}
tb = existing_tb;
/* remove TB from the page(s) if we couldn't insert it */
if (unlikely(existing_tb)) {
tb_page_remove(p, tb);
invalidate_page_bitmap(p);
if (p2) {
tb_page_remove(p2, tb);
invalidate_page_bitmap(p2);
}
tb = existing_tb;
}
if (p2 && p2 != p) {
@ -1880,9 +1861,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
phys_pc = get_page_addr_code(env, pc);
if (phys_pc == -1) {
/* Generate a temporary TB with 1 insn in it */
cflags &= ~CF_COUNT_MASK;
cflags |= CF_NOCACHE | 1;
/* Generate a one-shot TB with 1 insn in it */
cflags = (cflags & ~CF_COUNT_MASK) | 1;
}
cflags &= ~CF_CLUSTER_MASK;
@ -1916,7 +1896,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
tb->orig_tb = NULL;
tb->trace_vcpu_dstate = *cpu->trace_dstate;
tcg_ctx->tb_cflags = cflags;
tb_overflow:
@ -2096,6 +2075,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_reset_jump(tb, 1);
}
/*
* If the TB is not associated with a physical RAM page then
* it must be a temporary one-insn TB, and we have nothing to do
* except fill in the page_addr[] fields. Return early before
* attempting to link to other TBs or add to the lookup table.
*/
if (phys_pc == -1) {
tb->page_addr[0] = tb->page_addr[1] = -1;
return tb;
}
/* check next page if needed */
virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
phys_page2 = -1;
@ -2410,17 +2400,16 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
}
#ifndef CONFIG_USER_ONLY
/* in deterministic execution mode, instructions doing device I/Os
/*
* In deterministic execution mode, instructions doing device I/Os
* must be at the end of the TB.
*
* Called by softmmu_template.h, with iothread mutex not held.
*/
void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
{
#if defined(TARGET_MIPS) || defined(TARGET_SH4)
CPUArchState *env = cpu->env_ptr;
#endif
TranslationBlock *tb;
CPUClass *cc;
uint32_t n;
tb = tcg_tb_lookup(retaddr);
@ -2430,52 +2419,31 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
}
cpu_restore_state_from_tb(cpu, tb, retaddr, true);
/* On MIPS and SH, delay slot instructions can only be restarted if
they were already the first instruction in the TB. If this is not
the first instruction in a TB then re-execute the preceding
branch. */
/*
* Some guests must re-execute the branch when re-executing a delay
* slot instruction. When this is the case, adjust icount and N
* to account for the re-execution of the branch.
*/
n = 1;
#if defined(TARGET_MIPS)
if ((env->hflags & MIPS_HFLAG_BMASK) != 0
&& env->active_tc.PC != tb->pc) {
env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
cc = CPU_GET_CLASS(cpu);
if (cc->tcg_ops->io_recompile_replay_branch &&
cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
cpu_neg(cpu)->icount_decr.u16.low++;
env->hflags &= ~MIPS_HFLAG_BMASK;
n = 2;
}
#elif defined(TARGET_SH4)
if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
&& env->pc != tb->pc) {
env->pc -= 2;
cpu_neg(cpu)->icount_decr.u16.low++;
env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
n = 2;
}
#endif
/* Generate a new TB executing the I/O insn. */
cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
if (tb_cflags(tb) & CF_NOCACHE) {
if (tb->orig_tb) {
/* Invalidate original TB if this TB was generated in
* cpu_exec_nocache() */
tb_phys_invalidate(tb->orig_tb, -1);
}
tcg_tb_remove(tb);
tb_destroy(tb);
}
/*
* Exit the loop and potentially generate a new TB executing the
* just the I/O insns. We also limit instrumentation to memory
* operations only (which execute after completion) so we don't
* double instrument the instruction.
*/
cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"cpu_io_recompile: rewound execution of TB to "
TARGET_FMT_lx "\n", tb->pc);
/* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
* the first in the TB) then we end up generating a whole new TB and
* repeating the fault, which is horribly inefficient.
* Better would be to execute just this insn uncached, or generate a
* second new TB.
*/
cpu_loop_exit_noexc(cpu);
}

View File

@ -58,7 +58,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
ops->tb_start(db, cpu);
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
plugin_enabled = plugin_gen_tb_start(cpu, tb);
plugin_enabled = plugin_gen_tb_start(cpu, tb,
tb_cflags(db->tb) & CF_MEMI_ONLY);
while (true) {
db->num_insns++;
@ -100,6 +101,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
gen_io_start();
ops->translate_insn(db, cpu);
} else {
/* we should only see CF_MEMI_ONLY for io_recompile */
tcg_debug_assert(!(tb_cflags(db->tb) & CF_MEMI_ONLY));
ops->translate_insn(db, cpu);
}

View File

@ -17,7 +17,7 @@
#define IVSHMEM_SERVER_DEFAULT_PID_FILE "/var/run/ivshmem-server.pid"
#define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
#define IVSHMEM_SERVER_DEFAULT_SHM_PATH "ivshmem"
#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4*1024*1024)
#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4 * 1024 * 1024)
#define IVSHMEM_SERVER_DEFAULT_N_VECTORS 1
/* used to quit on signal SIGTERM */

View File

@ -17,6 +17,7 @@ NAMES += hotblocks
NAMES += hotpages
NAMES += howvec
NAMES += lockstep
NAMES += hwprofile
SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))

View File

@ -63,7 +63,7 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
for (i = 0; i < limit && it->next; i++, it = it->next) {
ExecCount *rec = (ExecCount *) it->data;
g_string_append_printf(report, "%#016"PRIx64", %d, %ld, %"PRId64"\n",
g_string_append_printf(report, "0x%016"PRIx64", %d, %ld, %"PRId64"\n",
rec->start_addr, rec->trans_count,
rec->insns, rec->exec_count);
}

View File

@ -88,7 +88,7 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
for (i = 0; i < limit && it->next; i++, it = it->next) {
PageCounters *rec = (PageCounters *) it->data;
g_string_append_printf(report,
"%#016"PRIx64", 0x%04x, %"PRId64
"0x%016"PRIx64", 0x%04x, %"PRId64
", 0x%04x, %"PRId64"\n",
rec->page_address,
rec->cpu_read, rec->reads,

View File

@ -68,7 +68,7 @@ static InsnClassExecCount aarch64_insn_classes[] = {
{ "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS},
/* Data Processing Immediate */
{ " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS},
{ " Add/Sub (imm,tags)","asit", 0x1f800000, 0x11800000, COUNT_CLASS},
{ " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS},
{ " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS},
{ " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS},
{ " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS},
@ -91,17 +91,17 @@ static InsnClassExecCount aarch64_insn_classes[] = {
{ "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
/* Loads and Stores */
{ " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
{ " AdvSimd ldstmult++","advlsmp",0xbfb00000, 0x0c800000, COUNT_CLASS},
{ " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS},
{ " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
{ " AdvSimd ldst++", "advlssp",0xbf800000, 0x0d800000, COUNT_CLASS},
{ " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS},
{ " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS},
{ " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS},
{ " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS},
{ " ldst noalloc pair", "ldstnap",0x3b800000, 0x28000000, COUNT_CLASS},
{ " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS},
{ " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS},
{ " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS},
{ " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
{ " ldst reg (reg off)","ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
{ " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
{ " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
{ " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS},
{ "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS},
@ -145,8 +145,7 @@ typedef struct {
int table_sz;
} ClassSelector;
static ClassSelector class_tables[] =
{
static ClassSelector class_tables[] = {
{ "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
{ "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
{ "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
@ -202,14 +201,14 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
counts = g_hash_table_get_values(insns);
if (counts && g_list_next(counts)) {
g_string_append_printf(report,"Individual Instructions:\n");
g_string_append_printf(report, "Individual Instructions:\n");
counts = g_list_sort(counts, cmp_exec_count);
for (i = 0; i < limit && g_list_next(counts);
i++, counts = g_list_next(counts)) {
InsnExecCount *rec = (InsnExecCount *) counts->data;
g_string_append_printf(report,
"Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n",
"Instr: %-24s\t(%ld hits)\t(op=0x%08x/%s)\n",
rec->insn,
rec->count,
rec->opcode,
@ -235,7 +234,7 @@ static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
(*count)++;
}
static uint64_t * find_counter(struct qemu_plugin_insn *insn)
static uint64_t *find_counter(struct qemu_plugin_insn *insn)
{
int i;
uint64_t *cnt = NULL;

305
contrib/plugins/hwprofile.c Normal file
View File

@ -0,0 +1,305 @@
/*
* Copyright (C) 2020, Alex Bennée <alex.bennee@linaro.org>
*
* HW Profile - breakdown access patterns for IO to devices
*
* License: GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include <inttypes.h>
#include <assert.h>
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <glib.h>
#include <qemu-plugin.h>
QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
typedef struct {
uint64_t cpu_read;
uint64_t cpu_write;
uint64_t reads;
uint64_t writes;
} IOCounts;
typedef struct {
uint64_t off_or_pc;
IOCounts counts;
} IOLocationCounts;
typedef struct {
const char *name;
uint64_t base;
IOCounts totals;
GHashTable *detail;
} DeviceCounts;
static GMutex lock;
static GHashTable *devices;
/* track the access pattern to a piece of HW */
static bool pattern;
/* track the source address of access to HW */
static bool source;
/* track only matched regions of HW */
static bool check_match;
static gchar **matches;
static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
static inline bool track_reads(void)
{
return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_R;
}
static inline bool track_writes(void)
{
return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_W;
}
static void plugin_init(void)
{
devices = g_hash_table_new(NULL, NULL);
}
static gint sort_cmp(gconstpointer a, gconstpointer b)
{
DeviceCounts *ea = (DeviceCounts *) a;
DeviceCounts *eb = (DeviceCounts *) b;
return ea->totals.reads + ea->totals.writes >
eb->totals.reads + eb->totals.writes ? -1 : 1;
}
static gint sort_loc(gconstpointer a, gconstpointer b)
{
IOLocationCounts *ea = (IOLocationCounts *) a;
IOLocationCounts *eb = (IOLocationCounts *) b;
return ea->off_or_pc > eb->off_or_pc;
}
static void fmt_iocount_record(GString *s, IOCounts *rec)
{
if (track_reads()) {
g_string_append_printf(s, ", %"PRIx64", %"PRId64,
rec->cpu_read, rec->reads);
}
if (track_writes()) {
g_string_append_printf(s, ", %"PRIx64", %"PRId64,
rec->cpu_write, rec->writes);
}
}
static void fmt_dev_record(GString *s, DeviceCounts *rec)
{
g_string_append_printf(s, "%s, 0x%"PRIx64,
rec->name, rec->base);
fmt_iocount_record(s, &rec->totals);
g_string_append_c(s, '\n');
}
static void plugin_exit(qemu_plugin_id_t id, void *p)
{
g_autoptr(GString) report = g_string_new("");
GList *counts;
if (!(pattern || source)) {
g_string_printf(report, "Device, Address");
if (track_reads()) {
g_string_append_printf(report, ", RCPUs, Reads");
}
if (track_writes()) {
g_string_append_printf(report, ", WCPUs, Writes");
}
g_string_append_c(report, '\n');
}
counts = g_hash_table_get_values(devices);
if (counts && g_list_next(counts)) {
GList *it;
it = g_list_sort(counts, sort_cmp);
while (it) {
DeviceCounts *rec = (DeviceCounts *) it->data;
if (rec->detail) {
GList *accesses = g_hash_table_get_values(rec->detail);
GList *io_it = g_list_sort(accesses, sort_loc);
const char *prefix = pattern ? "off" : "pc";
g_string_append_printf(report, "%s @ 0x%"PRIx64"\n",
rec->name, rec->base);
while (io_it) {
IOLocationCounts *loc = (IOLocationCounts *) io_it->data;
g_string_append_printf(report, " %s:%08"PRIx64,
prefix, loc->off_or_pc);
fmt_iocount_record(report, &loc->counts);
g_string_append_c(report, '\n');
io_it = io_it->next;
}
} else {
fmt_dev_record(report, rec);
}
it = it->next;
};
g_list_free(it);
}
qemu_plugin_outs(report->str);
}
static DeviceCounts *new_count(const char *name, uint64_t base)
{
DeviceCounts *count = g_new0(DeviceCounts, 1);
count->name = name;
count->base = base;
if (pattern || source) {
count->detail = g_hash_table_new(NULL, NULL);
}
g_hash_table_insert(devices, (gpointer) name, count);
return count;
}
static IOLocationCounts *new_location(GHashTable *table, uint64_t off_or_pc)
{
IOLocationCounts *loc = g_new0(IOLocationCounts, 1);
loc->off_or_pc = off_or_pc;
g_hash_table_insert(table, (gpointer) off_or_pc, loc);
return loc;
}
static void hwprofile_match_hit(DeviceCounts *rec, uint64_t off)
{
g_autoptr(GString) report = g_string_new("hwprofile: match @ offset");
g_string_append_printf(report, "%"PRIx64", previous hits\n", off);
fmt_dev_record(report, rec);
qemu_plugin_outs(report->str);
}
static void inc_count(IOCounts *count, bool is_write, unsigned int cpu_index)
{
if (is_write) {
count->writes++;
count->cpu_write |= (1 << cpu_index);
} else {
count->reads++;
count->cpu_read |= (1 << cpu_index);
}
}
static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
uint64_t vaddr, void *udata)
{
struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) {
return;
} else {
const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr);
bool is_write = qemu_plugin_mem_is_store(meminfo);
DeviceCounts *counts;
g_mutex_lock(&lock);
counts = (DeviceCounts *) g_hash_table_lookup(devices, name);
if (!counts) {
uint64_t base = vaddr - off;
counts = new_count(name, base);
}
if (check_match) {
if (g_strv_contains((const char * const *)matches, counts->name)) {
hwprofile_match_hit(counts, off);
inc_count(&counts->totals, is_write, cpu_index);
}
} else {
inc_count(&counts->totals, is_write, cpu_index);
}
/* either track offsets or source of access */
if (source) {
off = (uint64_t) udata;
}
if (pattern || source) {
IOLocationCounts *io_count = g_hash_table_lookup(counts->detail,
(gpointer) off);
if (!io_count) {
io_count = new_location(counts->detail, off);
}
inc_count(&io_count->counts, is_write, cpu_index);
}
g_mutex_unlock(&lock);
}
}
static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
{
size_t n = qemu_plugin_tb_n_insns(tb);
size_t i;
for (i = 0; i < n; i++) {
struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
gpointer udata = (gpointer) (source ? qemu_plugin_insn_vaddr(insn) : 0);
qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,
QEMU_PLUGIN_CB_NO_REGS,
rw, udata);
}
}
QEMU_PLUGIN_EXPORT
int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
int argc, char **argv)
{
int i;
for (i = 0; i < argc; i++) {
char *opt = argv[i];
if (g_strcmp0(opt, "read") == 0) {
rw = QEMU_PLUGIN_MEM_R;
} else if (g_strcmp0(opt, "write") == 0) {
rw = QEMU_PLUGIN_MEM_W;
} else if (g_strcmp0(opt, "pattern") == 0) {
pattern = true;
} else if (g_strcmp0(opt, "source") == 0) {
source = true;
} else if (g_str_has_prefix(opt, "match")) {
gchar **parts = g_strsplit(opt, "=", 2);
check_match = true;
matches = g_strsplit(parts[1], ",", -1);
g_strfreev(parts);
} else {
fprintf(stderr, "option parsing failed: %s\n", opt);
return -1;
}
}
if (source && pattern) {
fprintf(stderr, "can only currently track either source or pattern.\n");
return -1;
}
if (!info->system_emulation) {
fprintf(stderr, "hwprofile: plugin only useful for system emulation\n");
return -1;
}
/* Just warn about overflow */
if (info->system.smp_vcpus > 64 ||
info->system.max_vcpus > 64) {
fprintf(stderr, "hwprofile: can only track up to 64 CPUs\n");
}
plugin_init();
qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
return 0;
}

View File

@ -134,7 +134,7 @@ static void report_divergance(ExecState *us, ExecState *them)
/* Output short log entry of going out of sync... */
if (verbose || divrec.distance == 1 || diverged) {
g_string_printf(out, "@ %#016lx vs %#016lx (%d/%d since last)\n",
g_string_printf(out, "@ 0x%016lx vs 0x%016lx (%d/%d since last)\n",
us->pc, them->pc, g_slist_length(divergence_log),
divrec.distance);
qemu_plugin_outs(out->str);
@ -144,7 +144,7 @@ static void report_divergance(ExecState *us, ExecState *them)
int i;
GSList *entry;
g_string_printf(out, "Δ insn_count @ %#016lx (%ld) vs %#016lx (%ld)\n",
g_string_printf(out, "Δ insn_count @ 0x%016lx (%ld) vs 0x%016lx (%ld)\n",
us->pc, us->insn_count, them->pc, them->insn_count);
for (entry = log, i = 0;
@ -152,7 +152,7 @@ static void report_divergance(ExecState *us, ExecState *them)
entry = g_slist_next(entry), i++) {
ExecInfo *prev = (ExecInfo *) entry->data;
g_string_append_printf(out,
" previously @ %#016lx/%ld (%ld insns)\n",
" previously @ 0x%016lx/%ld (%ld insns)\n",
prev->block->pc, prev->block->insns,
prev->insn_count);
}

View File

@ -280,3 +280,37 @@ which will eventually report::
previously @ 0x000000ffd08098/5 (809900593 insns)
previously @ 0x000000ffd080c0/1 (809900588 insns)
- contrib/plugins/hwprofile
The hwprofile tool can only be used with system emulation and allows
the user to see what hardware is accessed how often. It has a number of options:
* arg=read or arg=write
By default the plugin tracks both reads and writes. You can use one
of these options to limit the tracking to just one class of accesses.
* arg=source
Will include a detailed break down of what the guest PC that made the
access was. Not compatible with arg=pattern. Example output::
cirrus-low-memory @ 0xfffffd00000a0000
pc:fffffc0000005cdc, 1, 256
pc:fffffc0000005ce8, 1, 256
pc:fffffc0000005cec, 1, 256
* arg=pattern
Instead break down the accesses based on the offset into the HW
region. This can be useful for seeing the most used registers of a
device. Example output::
pci0-conf @ 0xfffffd01fe000000
off:00000004, 1, 1
off:00000010, 1, 3
off:00000014, 1, 3
off:00000018, 1, 2
off:0000001c, 1, 2
off:00000020, 1, 2
...

View File

@ -1423,7 +1423,8 @@ static void virtio_pci_device_write(void *opaque, hwaddr addr,
}
}
static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy)
static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
const char *vdev_name)
{
static const MemoryRegionOps common_ops = {
.read = virtio_pci_common_read,
@ -1470,36 +1471,41 @@ static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy)
},
.endianness = DEVICE_LITTLE_ENDIAN,
};
g_autoptr(GString) name = g_string_new(NULL);
g_string_printf(name, "virtio-pci-common-%s", vdev_name);
memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
&common_ops,
proxy,
"virtio-pci-common",
name->str,
proxy->common.size);
g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
&isr_ops,
proxy,
"virtio-pci-isr",
name->str,
proxy->isr.size);
g_string_printf(name, "virtio-pci-device-%s", vdev_name);
memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
&device_ops,
proxy,
"virtio-pci-device",
name->str,
proxy->device.size);
g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
&notify_ops,
proxy,
"virtio-pci-notify",
name->str,
proxy->notify.size);
g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
&notify_pio_ops,
proxy,
"virtio-pci-notify-pio",
name->str,
proxy->notify_pio.size);
}
@ -1654,7 +1660,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
struct virtio_pci_cfg_cap *cfg_mask;
virtio_pci_modern_regions_init(proxy);
virtio_pci_modern_regions_init(proxy, vdev->name);
virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);

View File

@ -454,23 +454,20 @@ struct TranslationBlock {
uint32_t cflags; /* compile flags */
#define CF_COUNT_MASK 0x00007fff
#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
#define CF_NOCACHE 0x00010000 /* To be freed after execution */
#define CF_MEMI_ONLY 0x00010000 /* Only instrument memory ops */
#define CF_USE_ICOUNT 0x00020000
#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24
/* cflags' mask for hashing/comparison */
#define CF_HASH_MASK \
(CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL | CF_CLUSTER_MASK)
/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
#define CF_HASH_MASK (~CF_INVALID)
/* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate;
struct tb_tc tc;
/* original tb when cflags has CF_NOCACHE */
struct TranslationBlock *orig_tb;
/* first and second physical page containing code. The lower bit
of the pointer tells the index in page_next[].
The list is protected by the TB's page('s) lock(s) */

View File

@ -19,7 +19,7 @@ struct DisasContextBase;
#ifdef CONFIG_PLUGIN
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb);
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
void plugin_gen_tb_end(CPUState *cpu);
void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
void plugin_gen_insn_end(void);
@ -41,7 +41,7 @@ static inline void plugin_insn_append(const void *from, size_t size)
#else /* !CONFIG_PLUGIN */
static inline
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
{
return false;
}

View File

@ -26,7 +26,6 @@
#define CODE_GEN_HTABLE_BITS 15
#define CODE_GEN_HTABLE_SIZE (1 << CODE_GEN_HTABLE_BITS)
typedef struct TranslationBlock TranslationBlock;
typedef struct TBContext TBContext;
struct TBContext {

View File

@ -74,8 +74,6 @@ typedef enum MMUAccessType {
typedef struct CPUWatchpoint CPUWatchpoint;
struct TranslationBlock;
/* see tcg-cpu-ops.h */
struct TCGCPUOps;
@ -375,7 +373,7 @@ struct CPUState {
IcountDecr *icount_decr_ptr;
/* Accessed in parallel; all accesses must be atomic */
struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
struct GDBRegisterState *gdb_regs;
int gdb_num_regs;

View File

@ -30,8 +30,7 @@ struct TCGCPUOps {
* If more state needs to be restored, the target must implement a
* function to restore all the state, and register it here.
*/
void (*synchronize_from_tb)(CPUState *cpu,
const struct TranslationBlock *tb);
void (*synchronize_from_tb)(CPUState *cpu, const TranslationBlock *tb);
/** @cpu_exec_enter: Callback for cpu_exec preparation */
void (*cpu_exec_enter)(CPUState *cpu);
/** @cpu_exec_exit: Callback for cpu_exec cleanup */
@ -89,6 +88,16 @@ struct TCGCPUOps {
*/
bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp);
/**
* @io_recompile_replay_branch: Callback for cpu_io_recompile.
*
* The cpu has been stopped, and cpu_restore_state_from_tb has been
* called. If the faulting instruction is in a delay slot, and the
* target architecture requires re-execution of the branch, then
* adjust the cpu state as required and return true.
*/
bool (*io_recompile_replay_branch)(CPUState *cpu,
const TranslationBlock *tb);
#endif /* CONFIG_SOFTMMU */
#endif /* NEED_CPU_H */

View File

@ -92,6 +92,7 @@ struct qemu_plugin_dyn_cb {
};
};
/* Internal context for instrumenting an instruction */
struct qemu_plugin_insn {
GByteArray *data;
uint64_t vaddr;
@ -99,6 +100,7 @@ struct qemu_plugin_insn {
GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES];
bool calls_helpers;
bool mem_helper;
bool mem_only;
};
/*
@ -128,6 +130,7 @@ static inline struct qemu_plugin_insn *qemu_plugin_insn_alloc(void)
return insn;
}
/* Internal context for this TranslationBlock */
struct qemu_plugin_tb {
GPtrArray *insns;
size_t n;
@ -135,6 +138,7 @@ struct qemu_plugin_tb {
uint64_t vaddr2;
void *haddr1;
void *haddr2;
bool mem_only;
GArray *cbs[PLUGIN_N_CB_SUBTYPES];
};

View File

@ -330,6 +330,12 @@ struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr);
uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr);
/*
* Returns a string representing the device. The string is valid for
* the lifetime of the plugin.
*/
const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h);
typedef void
(*qemu_plugin_vcpu_mem_cb_t)(unsigned int vcpu_index,
qemu_plugin_meminfo_t info, uint64_t vaddr,

View File

@ -120,6 +120,7 @@ typedef struct ReservedRegion ReservedRegion;
typedef struct SavedIOTLB SavedIOTLB;
typedef struct SHPCDevice SHPCDevice;
typedef struct SSIBus SSIBus;
typedef struct TranslationBlock TranslationBlock;
typedef struct VirtIODevice VirtIODevice;
typedef struct Visitor Visitor;
typedef struct VMChangeStateEntry VMChangeStateEntry;

View File

@ -84,15 +84,19 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct qemu_plugin_tb *tb,
enum qemu_plugin_cb_flags flags,
void *udata)
{
plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
cb, flags, udata);
if (!tb->mem_only) {
plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
cb, flags, udata);
}
}
void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
enum qemu_plugin_op op,
void *ptr, uint64_t imm)
{
plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
if (!tb->mem_only) {
plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
}
}
void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
@ -100,20 +104,27 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
enum qemu_plugin_cb_flags flags,
void *udata)
{
plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
cb, flags, udata);
if (!insn->mem_only) {
plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
cb, flags, udata);
}
}
void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
enum qemu_plugin_op op,
void *ptr, uint64_t imm)
{
plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
0, op, ptr, imm);
if (!insn->mem_only) {
plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
0, op, ptr, imm);
}
}
/*
* We always plant memory instrumentation because they don't finalise until
* after the operation has complete.
*/
void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
qemu_plugin_vcpu_mem_cb_t cb,
enum qemu_plugin_cb_flags flags,
@ -121,7 +132,7 @@ void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
void *udata)
{
plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR],
cb, flags, rw, udata);
cb, flags, rw, udata);
}
void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
@ -130,7 +141,7 @@ void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
uint64_t imm)
{
plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE],
rw, op, ptr, imm);
rw, op, ptr, imm);
}
void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
@ -181,10 +192,13 @@ uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
struct qemu_plugin_insn *
qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx)
{
struct qemu_plugin_insn *insn;
if (unlikely(idx >= tb->n)) {
return NULL;
}
return g_ptr_array_index(tb->insns, idx);
insn = g_ptr_array_index(tb->insns, idx);
insn->mem_only = tb->mem_only;
return insn;
}
/*
@ -303,6 +317,26 @@ uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr
return 0;
}
const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h)
{
#ifdef CONFIG_SOFTMMU
if (h && h->is_io) {
MemoryRegionSection *mrs = h->v.io.section;
if (!mrs->mr->name) {
unsigned long maddr = 0xffffffff & (uintptr_t) mrs->mr;
g_autofree char *temp = g_strdup_printf("anon%08lx", maddr);
return g_intern_string(temp);
} else {
return g_intern_string(mrs->mr->name);
}
} else {
return g_intern_static_string("RAM");
}
#else
return g_intern_static_string("Invalid");
#endif
}
/*
* Queries to the number and potential maximum number of vCPUs there
* will be. This helps the plugin dimension per-vcpu arrays.

View File

@ -173,8 +173,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
void arm_translate_init(void);
#ifdef CONFIG_TCG
void arm_cpu_synchronize_from_tb(CPUState *cs,
const struct TranslationBlock *tb);
void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
#endif /* CONFIG_TCG */

View File

@ -132,7 +132,7 @@ typedef struct DisasContext {
int delayed_branch;
struct TranslationBlock *tb;
TranslationBlock *tb;
int singlestep_enabled;
} DisasContext;

View File

@ -93,7 +93,7 @@ typedef struct DisasContext {
unsigned int tb_flags, synced_flags; /* tb dependent flags. */
int is_jmp;
struct TranslationBlock *tb;
TranslationBlock *tb;
int singlestep_enabled;
uint32_t features;

View File

@ -268,6 +268,23 @@ static void mips_cpu_synchronize_from_tb(CPUState *cs,
env->hflags &= ~MIPS_HFLAG_BMASK;
env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
}
# ifndef CONFIG_USER_ONLY
static bool mips_io_recompile_replay_branch(CPUState *cs,
const TranslationBlock *tb)
{
MIPSCPU *cpu = MIPS_CPU(cs);
CPUMIPSState *env = &cpu->env;
if ((env->hflags & MIPS_HFLAG_BMASK) != 0
&& env->active_tc.PC != tb->pc) {
env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
env->hflags &= ~MIPS_HFLAG_BMASK;
return true;
}
return false;
}
# endif /* !CONFIG_USER_ONLY */
#endif /* CONFIG_TCG */
static bool mips_cpu_has_work(CPUState *cs)
@ -679,6 +696,7 @@ static struct TCGCPUOps mips_tcg_ops = {
.do_interrupt = mips_cpu_do_interrupt,
.do_transaction_failed = mips_cpu_do_transaction_failed,
.do_unaligned_access = mips_cpu_do_unaligned_access,
.io_recompile_replay_branch = mips_io_recompile_replay_branch,
#endif /* !CONFIG_USER_ONLY */
};
#endif /* CONFIG_TCG */

View File

@ -36,7 +36,7 @@
/* This is the state at translation time. */
typedef struct DisasContext {
struct TranslationBlock *tb;
TranslationBlock *tb;
target_ulong pc, saved_pc;
uint32_t opcode;
uint32_t fp_status;

View File

@ -43,6 +43,23 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
}
#ifndef CONFIG_USER_ONLY
static bool superh_io_recompile_replay_branch(CPUState *cs,
const TranslationBlock *tb)
{
SuperHCPU *cpu = SUPERH_CPU(cs);
CPUSH4State *env = &cpu->env;
if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
&& env->pc != tb->pc) {
env->pc -= 2;
env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
return true;
}
return false;
}
#endif
static bool superh_cpu_has_work(CPUState *cs)
{
return cs->interrupt_request & CPU_INTERRUPT_HARD;
@ -217,6 +234,7 @@ static struct TCGCPUOps superh_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.do_interrupt = superh_cpu_do_interrupt,
.do_unaligned_access = superh_cpu_do_unaligned_access,
.io_recompile_replay_branch = superh_io_recompile_replay_branch,
#endif /* !CONFIG_USER_ONLY */
};

View File

@ -34,7 +34,7 @@ typedef struct DisasContext {
int condjmp;
/* The label that will be jumped to when the instruction is skipped. */
TCGLabel *condlabel;
struct TranslationBlock *tb;
TranslationBlock *tb;
int singlestep_enabled;
#ifndef CONFIG_USER_ONLY
int user;

View File

@ -0,0 +1,148 @@
# TCG Plugins tests
#
# These are a little more involved than the basic tests run by check-tcg.
#
# Copyright (c) 2021 Linaro
#
# Author:
# Alex Bennée <alex.bennee@linaro.org>
#
# SPDX-License-Identifier: GPL-2.0-or-later
import tempfile
import mmap
import re
from boot_linux_console import LinuxKernelTest
class PluginKernelBase(LinuxKernelTest):
"""
Boots a Linux kernel with a TCG plugin enabled.
"""
timeout = 120
KERNEL_COMMON_COMMAND_LINE = 'printk.time=1 panic=-1 '
def run_vm(self, kernel_path, kernel_command_line,
plugin, plugin_log, console_pattern, args):
vm = self.get_vm()
vm.set_console()
vm.add_args('-kernel', kernel_path,
'-append', kernel_command_line,
'-plugin', plugin,
'-d', 'plugin',
'-D', plugin_log,
'-net', 'none',
'-no-reboot')
if args:
vm.add_args(*args)
try:
vm.launch()
except:
# TODO: probably fails because plugins not enabled but we
# can't currently probe for the feature.
self.cancel("TCG Plugins not enabled?")
self.wait_for_console_pattern(console_pattern, vm)
# ensure logs are flushed
vm.shutdown()
class PluginKernelNormal(PluginKernelBase):
def _grab_aarch64_kernel(self):
kernel_url = ('http://security.debian.org/'
'debian-security/pool/updates/main/l/linux-signed-arm64/'
'linux-image-4.19.0-12-arm64_4.19.152-1_arm64.deb')
kernel_sha1 = '2036c2792f80ac9c4ccaae742b2e0a28385b6010'
kernel_deb = self.fetch_asset(kernel_url, asset_hash=kernel_sha1)
kernel_path = self.extract_from_deb(kernel_deb,
"/boot/vmlinuz-4.19.0-12-arm64")
return kernel_path
def test_aarch64_virt_insn(self):
"""
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
:avocado: tags=cpu:cortex-a57
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
'console=ttyAMA0')
console_pattern = 'Kernel panic - not syncing: VFS:'
plugin_log = tempfile.NamedTemporaryFile(mode="r+t", prefix="plugin",
suffix=".log")
self.run_vm(kernel_path, kernel_command_line,
"tests/plugin/libinsn.so", plugin_log.name,
console_pattern,
args=('-cpu', 'cortex-a53'))
with plugin_log as lf, \
mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s:
m = re.search(br"insns: (?P<count>\d+)", s)
if "count" not in m.groupdict():
self.fail("Failed to find instruction count")
def test_aarch64_virt_insn_icount(self):
"""
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
:avocado: tags=cpu:cortex-a57
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
'console=ttyAMA0')
console_pattern = 'Kernel panic - not syncing: VFS:'
plugin_log = tempfile.NamedTemporaryFile(mode="r+t", prefix="plugin",
suffix=".log")
self.run_vm(kernel_path, kernel_command_line,
"tests/plugin/libinsn.so", plugin_log.name,
console_pattern,
args=('-cpu', 'cortex-a53', '-icount', 'shift=1'))
with plugin_log as lf, \
mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s:
m = re.search(br"detected repeat execution @ (?P<addr>0x[0-9A-Fa-f]+)", s)
if m is not None and "addr" in m.groupdict():
self.fail("detected repeated instructions")
def test_aarch64_virt_mem_icount(self):
"""
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
:avocado: tags=cpu:cortex-a57
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
'console=ttyAMA0')
console_pattern = 'Kernel panic - not syncing: VFS:'
plugin_log = tempfile.NamedTemporaryFile(mode="r+t", prefix="plugin",
suffix=".log")
self.run_vm(kernel_path, kernel_command_line,
"tests/plugin/libmem.so,arg=both", plugin_log.name,
console_pattern,
args=('-cpu', 'cortex-a53', '-icount', 'shift=1'))
with plugin_log as lf, \
mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s:
m = re.findall(br"mem accesses: (?P<count>\d+)", s)
if m is None or len(m) != 2:
self.fail("no memory access counts found")
else:
inline = int(m[0])
callback = int(m[1])
if inline != callback:
self.fail("mismatched access counts")

View File

@ -21,6 +21,14 @@ static bool do_inline;
static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
{
static uint64_t last_pc;
uint64_t this_pc = GPOINTER_TO_UINT(udata);
if (this_pc == last_pc) {
g_autofree gchar *out = g_strdup_printf("detected repeat execution @ 0x%"
PRIx64 "\n", this_pc);
qemu_plugin_outs(out);
}
last_pc = this_pc;
insn_count++;
}
@ -36,8 +44,10 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
qemu_plugin_register_vcpu_insn_exec_inline(
insn, QEMU_PLUGIN_INLINE_ADD_U64, &insn_count, 1);
} else {
uint64_t vaddr = qemu_plugin_insn_vaddr(insn);
qemu_plugin_register_vcpu_insn_exec_cb(
insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, NULL);
insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS,
GUINT_TO_POINTER(vaddr));
}
}
}

View File

@ -16,9 +16,10 @@
QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
static uint64_t mem_count;
static uint64_t inline_mem_count;
static uint64_t cb_mem_count;
static uint64_t io_count;
static bool do_inline;
static bool do_inline, do_callback;
static bool do_haddr;
static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
@ -26,7 +27,12 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
{
g_autoptr(GString) out = g_string_new("");
g_string_printf(out, "mem accesses: %" PRIu64 "\n", mem_count);
if (do_inline) {
g_string_printf(out, "inline mem accesses: %" PRIu64 "\n", inline_mem_count);
}
if (do_callback) {
g_string_append_printf(out, "callback mem accesses: %" PRIu64 "\n", cb_mem_count);
}
if (do_haddr) {
g_string_append_printf(out, "io accesses: %" PRIu64 "\n", io_count);
}
@ -42,10 +48,10 @@ static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
if (qemu_plugin_hwaddr_is_io(hwaddr)) {
io_count++;
} else {
mem_count++;
cb_mem_count++;
}
} else {
mem_count++;
cb_mem_count++;
}
}
@ -60,8 +66,9 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
if (do_inline) {
qemu_plugin_register_vcpu_mem_inline(insn, rw,
QEMU_PLUGIN_INLINE_ADD_U64,
&mem_count, 1);
} else {
&inline_mem_count, 1);
}
if (do_callback) {
qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
QEMU_PLUGIN_CB_NO_REGS,
rw, NULL);
@ -90,6 +97,12 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
}
if (!strcmp(argv[0], "inline")) {
do_inline = true;
do_callback = false;
} else if (!strcmp(argv[0], "both")) {
do_inline = true;
do_callback = true;
} else {
do_callback = true;
}
}

View File

@ -33,5 +33,15 @@ EXTRA_RUNS+=$(MULTIARCH_RUNS)
memory: CFLAGS+=-DCHECK_UNALIGNED=1
# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
run-plugin-%-with-libinsn.so:
$(call run-test, $@, \
$(QEMU) -monitor none -display none \
-chardev file$(COMMA)path=$@.out$(COMMA)id=output \
-plugin ../../plugin/libinsn.so$(COMMA)arg=inline \
-d plugin -D $*-with-libinsn.so.pout \
$(QEMU_OPTS) $*, \
"$* on $(TARGET_NAME)")
# Running
QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel

View File

@ -48,6 +48,13 @@ else
SKIP_I386_TESTS+=test-i386-fprem
endif
# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
run-plugin-%-with-libinsn.so:
$(call run-test, $@, $(QEMU) $(QEMU_OPTS) \
-plugin ../../plugin/libinsn.so$(COMMA)arg=inline \
-d plugin -D $*-with-libinsn.so.pout $*, \
"$* (inline) on $(TARGET_NAME)")
# Update TESTS
I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)

View File

@ -33,5 +33,15 @@ EXTRA_RUNS+=$(MULTIARCH_RUNS)
memory: CFLAGS+=-DCHECK_UNALIGNED=1
# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
run-plugin-%-with-libinsn.so:
$(call run-test, $@, \
$(QEMU) -monitor none -display none \
-chardev file$(COMMA)path=$@.out$(COMMA)id=output \
-plugin ../../plugin/libinsn.so$(COMMA)arg=inline \
-d plugin -D $*-with-libinsn.so.pout \
$(QEMU_OPTS) $*, \
"$* on $(TARGET_NAME)")
# Running
QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel