diff --git a/MAINTAINERS b/MAINTAINERS index 68ee271792..1d711e0cb8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2903,6 +2903,7 @@ S: Maintained F: docs/devel/tcg-plugins.rst F: plugins/ F: tests/plugin/ +F: tests/acceptance/tcg_plugins.py F: contrib/plugins/ AArch64 TCG target diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index f2819eec7d..16e4fe3ccd 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -224,40 +224,6 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) return last_tb; } -#ifndef CONFIG_USER_ONLY -/* Execute the code without caching the generated code. An interpreter - could be used if available. */ -static void cpu_exec_nocache(CPUState *cpu, int max_cycles, - TranslationBlock *orig_tb, bool ignore_icount) -{ - TranslationBlock *tb; - uint32_t cflags = curr_cflags() | CF_NOCACHE; - int tb_exit; - - if (ignore_icount) { - cflags &= ~CF_USE_ICOUNT; - } - - /* Should never happen. - We only end up here when an existing TB is too long. */ - cflags |= MIN(max_cycles, CF_COUNT_MASK); - - mmap_lock(); - tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, - orig_tb->flags, cflags); - tb->orig_tb = orig_tb; - mmap_unlock(); - - /* execute the generated code */ - trace_exec_tb_nocache(tb, tb->pc); - cpu_tb_exec(cpu, tb, &tb_exit); - - mmap_lock(); - tb_phys_invalidate(tb, -1); - mmap_unlock(); - tcg_tb_remove(tb); -} -#endif static void cpu_exec_enter(CPUState *cpu) { @@ -524,15 +490,12 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) #ifndef CONFIG_USER_ONLY if (replay_has_exception() && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { - /* try to cause an exception pending in the log */ - cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0, curr_cflags()), true); + /* Execute just one insn to trigger exception pending in the log */ + cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1; } #endif - if (cpu->exception_index < 0) { - return false; - } + return false; } - if (cpu->exception_index >= EXCP_INTERRUPT) { /* exit request from the cpu execution loop */ *ret = cpu->exception_index; @@ -688,6 +651,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, /* Finally, check if we need to exit to the main loop. */ if (unlikely(qatomic_read(&cpu->exit_request)) || (icount_enabled() + && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT) && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) { qatomic_set(&cpu->exit_request, 0); if (cpu->exception_index == -1) { @@ -730,16 +694,17 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, /* Ensure global icount has gone forward */ icount_update(cpu); /* Refill decrementer and continue execution. */ - insns_left = MIN(0xffff, cpu->icount_budget); + insns_left = MIN(CF_COUNT_MASK, cpu->icount_budget); cpu_neg(cpu)->icount_decr.u16.low = insns_left; cpu->icount_extra = cpu->icount_budget - insns_left; - if (!cpu->icount_extra && insns_left < tb->icount) { - /* Execute any remaining instructions, then let the main loop - * handle the next event. - */ - if (insns_left > 0) { - cpu_exec_nocache(cpu, insns_left, tb, false); - } + + /* + * If the next tb has more instructions than we have left to + * execute we need to ensure we find/generate a TB with exactly + * insns_left instructions in it. + */ + if (!cpu->icount_extra && insns_left > 0 && insns_left < tb->icount) { + cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left; } #endif } diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index e5dc9d0ca9..c3dc3effe7 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -320,22 +320,6 @@ static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr) return op; } -static TCGOp *copy_const_i64(TCGOp **begin_op, TCGOp *op, uint64_t v) -{ - if (TCG_TARGET_REG_BITS == 32) { - /* 2x mov_i32 */ - op = copy_op(begin_op, op, INDEX_op_mov_i32); - op->args[1] = tcgv_i32_arg(tcg_constant_i32(v)); - op = copy_op(begin_op, op, INDEX_op_mov_i32); - op->args[1] = tcgv_i32_arg(tcg_constant_i32(v >> 32)); - } else { - /* mov_i64 */ - op = copy_op(begin_op, op, INDEX_op_mov_i64); - op->args[1] = tcgv_i64_arg(tcg_constant_i64(v)); - } - return op; -} - static TCGOp *copy_extu_tl_i64(TCGOp **begin_op, TCGOp *op) { if (TARGET_LONG_BITS == 32) { @@ -374,14 +358,17 @@ static TCGOp *copy_st_i64(TCGOp **begin_op, TCGOp *op) return op; } -static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op) +static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op, uint64_t v) { if (TCG_TARGET_REG_BITS == 32) { /* all 32-bit backends must implement add2_i32 */ g_assert(TCG_TARGET_HAS_add2_i32); op = copy_op(begin_op, op, INDEX_op_add2_i32); + op->args[4] = tcgv_i32_arg(tcg_constant_i32(v)); + op->args[5] = tcgv_i32_arg(tcg_constant_i32(v >> 32)); } else { op = copy_op(begin_op, op, INDEX_op_add_i64); + op->args[2] = tcgv_i64_arg(tcg_constant_i64(v)); } return op; } @@ -431,6 +418,12 @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, return op; } +/* + * When we append/replace ops here we are sensitive to changing patterns of + * TCGOps generated by the tcg_gen_FOO calls when we generated the + * empty callbacks. This will assert very quickly in a debug build as + * we assert the ops we are replacing are the correct ones. + */ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, TCGOp *begin_op, TCGOp *op, int *cb_idx) { @@ -462,11 +455,8 @@ static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb, /* ld_i64 */ op = copy_ld_i64(&begin_op, op); - /* const_i64 */ - op = copy_const_i64(&begin_op, op, cb->inline_insn.imm); - /* add_i64 */ - op = copy_add_i64(&begin_op, op); + op = copy_add_i64(&begin_op, op, cb->inline_insn.imm); /* st_i64 */ op = copy_st_i64(&begin_op, op); @@ -852,7 +842,7 @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb) pr_ops(); } -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only) { struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; bool ret = false; @@ -865,6 +855,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) ptb->vaddr2 = -1; get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1); ptb->haddr2 = NULL; + ptb->mem_only = mem_only; plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB); } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 2c34adccce..bbd919a393 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -60,6 +60,7 @@ #include "sysemu/cpu-timers.h" #include "sysemu/tcg.h" #include "qapi/error.h" +#include "hw/core/tcg-cpu-ops.h" #include "internal.h" /* #define DEBUG_TB_INVALIDATE */ @@ -409,12 +410,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) TranslationBlock *tb = tcg_tb_lookup(host_pc); if (tb) { cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); - if (tb_cflags(tb) & CF_NOCACHE) { - /* one-shot translation, invalidate it immediately */ - tb_phys_invalidate(tb, -1); - tcg_tb_remove(tb); - tb_destroy(tb); - } return true; } } @@ -1633,8 +1628,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK, tb->trace_vcpu_dstate); - if (!(tb->cflags & CF_NOCACHE) && - !qht_remove(&tb_ctx.htable, tb, h)) { + if (!qht_remove(&tb_ctx.htable, tb, h)) { return; } @@ -1778,7 +1772,8 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, #endif } -/* add a new TB and link it to the physical page tables. phys_page2 is +/* + * Add a new TB and link it to the physical page tables. phys_page2 is * (-1) to indicate that only one page contains the TB. * * Called with mmap_lock held for user-mode emulation. @@ -1794,20 +1789,11 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, { PageDesc *p; PageDesc *p2 = NULL; + void *existing_tb = NULL; + uint32_t h; assert_memory_lock(); - if (phys_pc == -1) { - /* - * If the TB is not associated with a physical RAM page then - * it must be a temporary one-insn TB, and we have nothing to do - * except fill in the page_addr[] fields. - */ - assert(tb->cflags & CF_NOCACHE); - tb->page_addr[0] = tb->page_addr[1] = -1; - return tb; - } - /* * Add the TB to the page list, acquiring first the pages's locks. * We keep the locks held until after inserting the TB in the hash table, @@ -1824,25 +1810,20 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb->page_addr[1] = -1; } - if (!(tb->cflags & CF_NOCACHE)) { - void *existing_tb = NULL; - uint32_t h; + /* add in the hash table */ + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + tb->trace_vcpu_dstate); + qht_insert(&tb_ctx.htable, tb, h, &existing_tb); - /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, - tb->trace_vcpu_dstate); - qht_insert(&tb_ctx.htable, tb, h, &existing_tb); - - /* remove TB from the page(s) if we couldn't insert it */ - if (unlikely(existing_tb)) { - tb_page_remove(p, tb); - invalidate_page_bitmap(p); - if (p2) { - tb_page_remove(p2, tb); - invalidate_page_bitmap(p2); - } - tb = existing_tb; + /* remove TB from the page(s) if we couldn't insert it */ + if (unlikely(existing_tb)) { + tb_page_remove(p, tb); + invalidate_page_bitmap(p); + if (p2) { + tb_page_remove(p2, tb); + invalidate_page_bitmap(p2); } + tb = existing_tb; } if (p2 && p2 != p) { @@ -1880,9 +1861,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, phys_pc = get_page_addr_code(env, pc); if (phys_pc == -1) { - /* Generate a temporary TB with 1 insn in it */ - cflags &= ~CF_COUNT_MASK; - cflags |= CF_NOCACHE | 1; + /* Generate a one-shot TB with 1 insn in it */ + cflags = (cflags & ~CF_COUNT_MASK) | 1; } cflags &= ~CF_CLUSTER_MASK; @@ -1916,7 +1896,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; - tb->orig_tb = NULL; tb->trace_vcpu_dstate = *cpu->trace_dstate; tcg_ctx->tb_cflags = cflags; tb_overflow: @@ -2096,6 +2075,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb_reset_jump(tb, 1); } + /* + * If the TB is not associated with a physical RAM page then + * it must be a temporary one-insn TB, and we have nothing to do + * except fill in the page_addr[] fields. Return early before + * attempting to link to other TBs or add to the lookup table. + */ + if (phys_pc == -1) { + tb->page_addr[0] = tb->page_addr[1] = -1; + return tb; + } + /* check next page if needed */ virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK; phys_page2 = -1; @@ -2410,17 +2400,16 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) } #ifndef CONFIG_USER_ONLY -/* in deterministic execution mode, instructions doing device I/Os +/* + * In deterministic execution mode, instructions doing device I/Os * must be at the end of the TB. * * Called by softmmu_template.h, with iothread mutex not held. */ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) { -#if defined(TARGET_MIPS) || defined(TARGET_SH4) - CPUArchState *env = cpu->env_ptr; -#endif TranslationBlock *tb; + CPUClass *cc; uint32_t n; tb = tcg_tb_lookup(retaddr); @@ -2430,52 +2419,31 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) } cpu_restore_state_from_tb(cpu, tb, retaddr, true); - /* On MIPS and SH, delay slot instructions can only be restarted if - they were already the first instruction in the TB. If this is not - the first instruction in a TB then re-execute the preceding - branch. */ + /* + * Some guests must re-execute the branch when re-executing a delay + * slot instruction. When this is the case, adjust icount and N + * to account for the re-execution of the branch. + */ n = 1; -#if defined(TARGET_MIPS) - if ((env->hflags & MIPS_HFLAG_BMASK) != 0 - && env->active_tc.PC != tb->pc) { - env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4); + cc = CPU_GET_CLASS(cpu); + if (cc->tcg_ops->io_recompile_replay_branch && + cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { cpu_neg(cpu)->icount_decr.u16.low++; - env->hflags &= ~MIPS_HFLAG_BMASK; n = 2; } -#elif defined(TARGET_SH4) - if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 - && env->pc != tb->pc) { - env->pc -= 2; - cpu_neg(cpu)->icount_decr.u16.low++; - env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); - n = 2; - } -#endif - /* Generate a new TB executing the I/O insn. */ - cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n; - - if (tb_cflags(tb) & CF_NOCACHE) { - if (tb->orig_tb) { - /* Invalidate original TB if this TB was generated in - * cpu_exec_nocache() */ - tb_phys_invalidate(tb->orig_tb, -1); - } - tcg_tb_remove(tb); - tb_destroy(tb); - } + /* + * Exit the loop and potentially generate a new TB executing the + * just the I/O insns. We also limit instrumentation to memory + * operations only (which execute after completion) so we don't + * double instrument the instruction. + */ + cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n; qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "cpu_io_recompile: rewound execution of TB to " TARGET_FMT_lx "\n", tb->pc); - /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not - * the first in the TB) then we end up generating a whole new TB and - * repeating the fault, which is horribly inefficient. - * Better would be to execute just this insn uncached, or generate a - * second new TB. - */ cpu_loop_exit_noexc(cpu); } diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index a49a794065..2dfc27102f 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -58,7 +58,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ - plugin_enabled = plugin_gen_tb_start(cpu, tb); + plugin_enabled = plugin_gen_tb_start(cpu, tb, + tb_cflags(db->tb) & CF_MEMI_ONLY); while (true) { db->num_insns++; @@ -100,6 +101,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, gen_io_start(); ops->translate_insn(db, cpu); } else { + /* we should only see CF_MEMI_ONLY for io_recompile */ + tcg_debug_assert(!(tb_cflags(db->tb) & CF_MEMI_ONLY)); ops->translate_insn(db, cpu); } diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c index ee08c4ced0..224dbeb547 100644 --- a/contrib/ivshmem-server/main.c +++ b/contrib/ivshmem-server/main.c @@ -17,7 +17,7 @@ #define IVSHMEM_SERVER_DEFAULT_PID_FILE "/var/run/ivshmem-server.pid" #define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket" #define IVSHMEM_SERVER_DEFAULT_SHM_PATH "ivshmem" -#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4*1024*1024) +#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4 * 1024 * 1024) #define IVSHMEM_SERVER_DEFAULT_N_VECTORS 1 /* used to quit on signal SIGTERM */ diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile index 7801b08b0d..b9d7935e5e 100644 --- a/contrib/plugins/Makefile +++ b/contrib/plugins/Makefile @@ -17,6 +17,7 @@ NAMES += hotblocks NAMES += hotpages NAMES += howvec NAMES += lockstep +NAMES += hwprofile SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES))) diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c index 37435a3fc7..4b08340143 100644 --- a/contrib/plugins/hotblocks.c +++ b/contrib/plugins/hotblocks.c @@ -63,7 +63,7 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) for (i = 0; i < limit && it->next; i++, it = it->next) { ExecCount *rec = (ExecCount *) it->data; - g_string_append_printf(report, "%#016"PRIx64", %d, %ld, %"PRId64"\n", + g_string_append_printf(report, "0x%016"PRIx64", %d, %ld, %"PRId64"\n", rec->start_addr, rec->trans_count, rec->insns, rec->exec_count); } diff --git a/contrib/plugins/hotpages.c b/contrib/plugins/hotpages.c index ecd6c18732..eacc678eac 100644 --- a/contrib/plugins/hotpages.c +++ b/contrib/plugins/hotpages.c @@ -88,7 +88,7 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) for (i = 0; i < limit && it->next; i++, it = it->next) { PageCounters *rec = (PageCounters *) it->data; g_string_append_printf(report, - "%#016"PRIx64", 0x%04x, %"PRId64 + "0x%016"PRIx64", 0x%04x, %"PRId64 ", 0x%04x, %"PRId64"\n", rec->page_address, rec->cpu_read, rec->reads, diff --git a/contrib/plugins/howvec.c b/contrib/plugins/howvec.c index 3b9a6939f2..600f7facc1 100644 --- a/contrib/plugins/howvec.c +++ b/contrib/plugins/howvec.c @@ -68,7 +68,7 @@ static InsnClassExecCount aarch64_insn_classes[] = { { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS}, /* Data Processing Immediate */ { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS}, - { " Add/Sub (imm,tags)","asit", 0x1f800000, 0x11800000, COUNT_CLASS}, + { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS}, { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS}, { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS}, { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS}, @@ -91,17 +91,17 @@ static InsnClassExecCount aarch64_insn_classes[] = { { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS}, /* Loads and Stores */ { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS}, - { " AdvSimd ldstmult++","advlsmp",0xbfb00000, 0x0c800000, COUNT_CLASS}, + { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS}, { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS}, - { " AdvSimd ldst++", "advlssp",0xbf800000, 0x0d800000, COUNT_CLASS}, + { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS}, { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS}, { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS}, { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS}, - { " ldst noalloc pair", "ldstnap",0x3b800000, 0x28000000, COUNT_CLASS}, + { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS}, { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS}, { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS}, { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS}, - { " ldst reg (reg off)","ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS}, + { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS}, { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS}, { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS}, { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS}, @@ -145,8 +145,7 @@ typedef struct { int table_sz; } ClassSelector; -static ClassSelector class_tables[] = -{ +static ClassSelector class_tables[] = { { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) }, { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) }, { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) }, @@ -202,14 +201,14 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) counts = g_hash_table_get_values(insns); if (counts && g_list_next(counts)) { - g_string_append_printf(report,"Individual Instructions:\n"); + g_string_append_printf(report, "Individual Instructions:\n"); counts = g_list_sort(counts, cmp_exec_count); for (i = 0; i < limit && g_list_next(counts); i++, counts = g_list_next(counts)) { InsnExecCount *rec = (InsnExecCount *) counts->data; g_string_append_printf(report, - "Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n", + "Instr: %-24s\t(%ld hits)\t(op=0x%08x/%s)\n", rec->insn, rec->count, rec->opcode, @@ -235,7 +234,7 @@ static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) (*count)++; } -static uint64_t * find_counter(struct qemu_plugin_insn *insn) +static uint64_t *find_counter(struct qemu_plugin_insn *insn) { int i; uint64_t *cnt = NULL; diff --git a/contrib/plugins/hwprofile.c b/contrib/plugins/hwprofile.c new file mode 100644 index 0000000000..6dac1d5f85 --- /dev/null +++ b/contrib/plugins/hwprofile.c @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2020, Alex Bennée + * + * HW Profile - breakdown access patterns for IO to devices + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef struct { + uint64_t cpu_read; + uint64_t cpu_write; + uint64_t reads; + uint64_t writes; +} IOCounts; + +typedef struct { + uint64_t off_or_pc; + IOCounts counts; +} IOLocationCounts; + +typedef struct { + const char *name; + uint64_t base; + IOCounts totals; + GHashTable *detail; +} DeviceCounts; + +static GMutex lock; +static GHashTable *devices; + +/* track the access pattern to a piece of HW */ +static bool pattern; +/* track the source address of access to HW */ +static bool source; +/* track only matched regions of HW */ +static bool check_match; +static gchar **matches; + +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; + +static inline bool track_reads(void) +{ + return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_R; +} + +static inline bool track_writes(void) +{ + return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_W; +} + +static void plugin_init(void) +{ + devices = g_hash_table_new(NULL, NULL); +} + +static gint sort_cmp(gconstpointer a, gconstpointer b) +{ + DeviceCounts *ea = (DeviceCounts *) a; + DeviceCounts *eb = (DeviceCounts *) b; + return ea->totals.reads + ea->totals.writes > + eb->totals.reads + eb->totals.writes ? -1 : 1; +} + +static gint sort_loc(gconstpointer a, gconstpointer b) +{ + IOLocationCounts *ea = (IOLocationCounts *) a; + IOLocationCounts *eb = (IOLocationCounts *) b; + return ea->off_or_pc > eb->off_or_pc; +} + +static void fmt_iocount_record(GString *s, IOCounts *rec) +{ + if (track_reads()) { + g_string_append_printf(s, ", %"PRIx64", %"PRId64, + rec->cpu_read, rec->reads); + } + if (track_writes()) { + g_string_append_printf(s, ", %"PRIx64", %"PRId64, + rec->cpu_write, rec->writes); + } +} + +static void fmt_dev_record(GString *s, DeviceCounts *rec) +{ + g_string_append_printf(s, "%s, 0x%"PRIx64, + rec->name, rec->base); + fmt_iocount_record(s, &rec->totals); + g_string_append_c(s, '\n'); +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + g_autoptr(GString) report = g_string_new(""); + GList *counts; + + if (!(pattern || source)) { + g_string_printf(report, "Device, Address"); + if (track_reads()) { + g_string_append_printf(report, ", RCPUs, Reads"); + } + if (track_writes()) { + g_string_append_printf(report, ", WCPUs, Writes"); + } + g_string_append_c(report, '\n'); + } + + counts = g_hash_table_get_values(devices); + if (counts && g_list_next(counts)) { + GList *it; + + it = g_list_sort(counts, sort_cmp); + + while (it) { + DeviceCounts *rec = (DeviceCounts *) it->data; + if (rec->detail) { + GList *accesses = g_hash_table_get_values(rec->detail); + GList *io_it = g_list_sort(accesses, sort_loc); + const char *prefix = pattern ? "off" : "pc"; + g_string_append_printf(report, "%s @ 0x%"PRIx64"\n", + rec->name, rec->base); + while (io_it) { + IOLocationCounts *loc = (IOLocationCounts *) io_it->data; + g_string_append_printf(report, " %s:%08"PRIx64, + prefix, loc->off_or_pc); + fmt_iocount_record(report, &loc->counts); + g_string_append_c(report, '\n'); + io_it = io_it->next; + } + } else { + fmt_dev_record(report, rec); + } + it = it->next; + }; + g_list_free(it); + } + + qemu_plugin_outs(report->str); +} + +static DeviceCounts *new_count(const char *name, uint64_t base) +{ + DeviceCounts *count = g_new0(DeviceCounts, 1); + count->name = name; + count->base = base; + if (pattern || source) { + count->detail = g_hash_table_new(NULL, NULL); + } + g_hash_table_insert(devices, (gpointer) name, count); + return count; +} + +static IOLocationCounts *new_location(GHashTable *table, uint64_t off_or_pc) +{ + IOLocationCounts *loc = g_new0(IOLocationCounts, 1); + loc->off_or_pc = off_or_pc; + g_hash_table_insert(table, (gpointer) off_or_pc, loc); + return loc; +} + +static void hwprofile_match_hit(DeviceCounts *rec, uint64_t off) +{ + g_autoptr(GString) report = g_string_new("hwprofile: match @ offset"); + g_string_append_printf(report, "%"PRIx64", previous hits\n", off); + fmt_dev_record(report, rec); + qemu_plugin_outs(report->str); +} + +static void inc_count(IOCounts *count, bool is_write, unsigned int cpu_index) +{ + if (is_write) { + count->writes++; + count->cpu_write |= (1 << cpu_index); + } else { + count->reads++; + count->cpu_read |= (1 << cpu_index); + } +} + +static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo, + uint64_t vaddr, void *udata) +{ + struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr); + + if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) { + return; + } else { + const char *name = qemu_plugin_hwaddr_device_name(hwaddr); + uint64_t off = qemu_plugin_hwaddr_device_offset(hwaddr); + bool is_write = qemu_plugin_mem_is_store(meminfo); + DeviceCounts *counts; + + g_mutex_lock(&lock); + counts = (DeviceCounts *) g_hash_table_lookup(devices, name); + + if (!counts) { + uint64_t base = vaddr - off; + counts = new_count(name, base); + } + + if (check_match) { + if (g_strv_contains((const char * const *)matches, counts->name)) { + hwprofile_match_hit(counts, off); + inc_count(&counts->totals, is_write, cpu_index); + } + } else { + inc_count(&counts->totals, is_write, cpu_index); + } + + /* either track offsets or source of access */ + if (source) { + off = (uint64_t) udata; + } + + if (pattern || source) { + IOLocationCounts *io_count = g_hash_table_lookup(counts->detail, + (gpointer) off); + if (!io_count) { + io_count = new_location(counts->detail, off); + } + inc_count(&io_count->counts, is_write, cpu_index); + } + + g_mutex_unlock(&lock); + } +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n = qemu_plugin_tb_n_insns(tb); + size_t i; + + for (i = 0; i < n; i++) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); + gpointer udata = (gpointer) (source ? qemu_plugin_insn_vaddr(insn) : 0); + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr, + QEMU_PLUGIN_CB_NO_REGS, + rw, udata); + } +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + int i; + + for (i = 0; i < argc; i++) { + char *opt = argv[i]; + if (g_strcmp0(opt, "read") == 0) { + rw = QEMU_PLUGIN_MEM_R; + } else if (g_strcmp0(opt, "write") == 0) { + rw = QEMU_PLUGIN_MEM_W; + } else if (g_strcmp0(opt, "pattern") == 0) { + pattern = true; + } else if (g_strcmp0(opt, "source") == 0) { + source = true; + } else if (g_str_has_prefix(opt, "match")) { + gchar **parts = g_strsplit(opt, "=", 2); + check_match = true; + matches = g_strsplit(parts[1], ",", -1); + g_strfreev(parts); + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + if (source && pattern) { + fprintf(stderr, "can only currently track either source or pattern.\n"); + return -1; + } + + if (!info->system_emulation) { + fprintf(stderr, "hwprofile: plugin only useful for system emulation\n"); + return -1; + } + + /* Just warn about overflow */ + if (info->system.smp_vcpus > 64 || + info->system.max_vcpus > 64) { + fprintf(stderr, "hwprofile: can only track up to 64 CPUs\n"); + } + + plugin_init(); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + return 0; +} diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c index 5aad50869d..7fd35eb669 100644 --- a/contrib/plugins/lockstep.c +++ b/contrib/plugins/lockstep.c @@ -134,7 +134,7 @@ static void report_divergance(ExecState *us, ExecState *them) /* Output short log entry of going out of sync... */ if (verbose || divrec.distance == 1 || diverged) { - g_string_printf(out, "@ %#016lx vs %#016lx (%d/%d since last)\n", + g_string_printf(out, "@ 0x%016lx vs 0x%016lx (%d/%d since last)\n", us->pc, them->pc, g_slist_length(divergence_log), divrec.distance); qemu_plugin_outs(out->str); @@ -144,7 +144,7 @@ static void report_divergance(ExecState *us, ExecState *them) int i; GSList *entry; - g_string_printf(out, "Δ insn_count @ %#016lx (%ld) vs %#016lx (%ld)\n", + g_string_printf(out, "Δ insn_count @ 0x%016lx (%ld) vs 0x%016lx (%ld)\n", us->pc, us->insn_count, them->pc, them->insn_count); for (entry = log, i = 0; @@ -152,7 +152,7 @@ static void report_divergance(ExecState *us, ExecState *them) entry = g_slist_next(entry), i++) { ExecInfo *prev = (ExecInfo *) entry->data; g_string_append_printf(out, - " previously @ %#016lx/%ld (%ld insns)\n", + " previously @ 0x%016lx/%ld (%ld insns)\n", prev->block->pc, prev->block->insns, prev->insn_count); } diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst index 0568dfa6a4..39ce86ed96 100644 --- a/docs/devel/tcg-plugins.rst +++ b/docs/devel/tcg-plugins.rst @@ -280,3 +280,37 @@ which will eventually report:: previously @ 0x000000ffd08098/5 (809900593 insns) previously @ 0x000000ffd080c0/1 (809900588 insns) +- contrib/plugins/hwprofile + +The hwprofile tool can only be used with system emulation and allows +the user to see what hardware is accessed how often. It has a number of options: + + * arg=read or arg=write + + By default the plugin tracks both reads and writes. You can use one + of these options to limit the tracking to just one class of accesses. + + * arg=source + + Will include a detailed break down of what the guest PC that made the + access was. Not compatible with arg=pattern. Example output:: + + cirrus-low-memory @ 0xfffffd00000a0000 + pc:fffffc0000005cdc, 1, 256 + pc:fffffc0000005ce8, 1, 256 + pc:fffffc0000005cec, 1, 256 + + * arg=pattern + + Instead break down the accesses based on the offset into the HW + region. This can be useful for seeing the most used registers of a + device. Example output:: + + pci0-conf @ 0xfffffd01fe000000 + off:00000004, 1, 1 + off:00000010, 1, 3 + off:00000014, 1, 3 + off:00000018, 1, 2 + off:0000001c, 1, 2 + off:00000020, 1, 2 + ... diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 094c36aa3e..883045a223 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1423,7 +1423,8 @@ static void virtio_pci_device_write(void *opaque, hwaddr addr, } } -static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy) +static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy, + const char *vdev_name) { static const MemoryRegionOps common_ops = { .read = virtio_pci_common_read, @@ -1470,36 +1471,41 @@ static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy) }, .endianness = DEVICE_LITTLE_ENDIAN, }; + g_autoptr(GString) name = g_string_new(NULL); - + g_string_printf(name, "virtio-pci-common-%s", vdev_name); memory_region_init_io(&proxy->common.mr, OBJECT(proxy), &common_ops, proxy, - "virtio-pci-common", + name->str, proxy->common.size); + g_string_printf(name, "virtio-pci-isr-%s", vdev_name); memory_region_init_io(&proxy->isr.mr, OBJECT(proxy), &isr_ops, proxy, - "virtio-pci-isr", + name->str, proxy->isr.size); + g_string_printf(name, "virtio-pci-device-%s", vdev_name); memory_region_init_io(&proxy->device.mr, OBJECT(proxy), &device_ops, proxy, - "virtio-pci-device", + name->str, proxy->device.size); + g_string_printf(name, "virtio-pci-notify-%s", vdev_name); memory_region_init_io(&proxy->notify.mr, OBJECT(proxy), ¬ify_ops, proxy, - "virtio-pci-notify", + name->str, proxy->notify.size); + g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name); memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy), ¬ify_pio_ops, proxy, - "virtio-pci-notify-pio", + name->str, proxy->notify_pio.size); } @@ -1654,7 +1660,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) struct virtio_pci_cfg_cap *cfg_mask; - virtio_pci_modern_regions_init(proxy); + virtio_pci_modern_regions_init(proxy, vdev->name); virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap); virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d30c7a84f6..b7b3c0ef12 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -454,23 +454,20 @@ struct TranslationBlock { uint32_t cflags; /* compile flags */ #define CF_COUNT_MASK 0x00007fff #define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ -#define CF_NOCACHE 0x00010000 /* To be freed after execution */ +#define CF_MEMI_ONLY 0x00010000 /* Only instrument memory ops */ #define CF_USE_ICOUNT 0x00020000 #define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 -/* cflags' mask for hashing/comparison */ -#define CF_HASH_MASK \ - (CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL | CF_CLUSTER_MASK) +/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */ +#define CF_HASH_MASK (~CF_INVALID) /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; struct tb_tc tc; - /* original tb when cflags has CF_NOCACHE */ - struct TranslationBlock *orig_tb; /* first and second physical page containing code. The lower bit of the pointer tells the index in page_next[]. The list is protected by the TB's page('s) lock(s) */ diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h index 4834a9e2f4..b1b72b5d90 100644 --- a/include/exec/plugin-gen.h +++ b/include/exec/plugin-gen.h @@ -19,7 +19,7 @@ struct DisasContextBase; #ifdef CONFIG_PLUGIN -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb); +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress); void plugin_gen_tb_end(CPUState *cpu); void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db); void plugin_gen_insn_end(void); @@ -41,7 +41,7 @@ static inline void plugin_insn_append(const void *from, size_t size) #else /* !CONFIG_PLUGIN */ static inline -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb) +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress) { return false; } diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index ec4c13b455..cc33979113 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -26,7 +26,6 @@ #define CODE_GEN_HTABLE_BITS 15 #define CODE_GEN_HTABLE_SIZE (1 << CODE_GEN_HTABLE_BITS) -typedef struct TranslationBlock TranslationBlock; typedef struct TBContext TBContext; struct TBContext { diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 38d813c389..c005d3dc2d 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -74,8 +74,6 @@ typedef enum MMUAccessType { typedef struct CPUWatchpoint CPUWatchpoint; -struct TranslationBlock; - /* see tcg-cpu-ops.h */ struct TCGCPUOps; @@ -375,7 +373,7 @@ struct CPUState { IcountDecr *icount_decr_ptr; /* Accessed in parallel; all accesses must be atomic */ - struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; + TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; struct GDBRegisterState *gdb_regs; int gdb_num_regs; diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h index ccc97d1894..72d791438c 100644 --- a/include/hw/core/tcg-cpu-ops.h +++ b/include/hw/core/tcg-cpu-ops.h @@ -30,8 +30,7 @@ struct TCGCPUOps { * If more state needs to be restored, the target must implement a * function to restore all the state, and register it here. */ - void (*synchronize_from_tb)(CPUState *cpu, - const struct TranslationBlock *tb); + void (*synchronize_from_tb)(CPUState *cpu, const TranslationBlock *tb); /** @cpu_exec_enter: Callback for cpu_exec preparation */ void (*cpu_exec_enter)(CPUState *cpu); /** @cpu_exec_exit: Callback for cpu_exec cleanup */ @@ -89,6 +88,16 @@ struct TCGCPUOps { */ bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp); + /** + * @io_recompile_replay_branch: Callback for cpu_io_recompile. + * + * The cpu has been stopped, and cpu_restore_state_from_tb has been + * called. If the faulting instruction is in a delay slot, and the + * target architecture requires re-execution of the branch, then + * adjust the cpu state as required and return true. + */ + bool (*io_recompile_replay_branch)(CPUState *cpu, + const TranslationBlock *tb); #endif /* CONFIG_SOFTMMU */ #endif /* NEED_CPU_H */ diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h index 841deed79c..c5a79a89f0 100644 --- a/include/qemu/plugin.h +++ b/include/qemu/plugin.h @@ -92,6 +92,7 @@ struct qemu_plugin_dyn_cb { }; }; +/* Internal context for instrumenting an instruction */ struct qemu_plugin_insn { GByteArray *data; uint64_t vaddr; @@ -99,6 +100,7 @@ struct qemu_plugin_insn { GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES]; bool calls_helpers; bool mem_helper; + bool mem_only; }; /* @@ -128,6 +130,7 @@ static inline struct qemu_plugin_insn *qemu_plugin_insn_alloc(void) return insn; } +/* Internal context for this TranslationBlock */ struct qemu_plugin_tb { GPtrArray *insns; size_t n; @@ -135,6 +138,7 @@ struct qemu_plugin_tb { uint64_t vaddr2; void *haddr1; void *haddr2; + bool mem_only; GArray *cbs[PLUGIN_N_CB_SUBTYPES]; }; diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h index 5775e82c4e..c66507fe8f 100644 --- a/include/qemu/qemu-plugin.h +++ b/include/qemu/qemu-plugin.h @@ -330,6 +330,12 @@ struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info, bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr); uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr); +/* + * Returns a string representing the device. The string is valid for + * the lifetime of the plugin. + */ +const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h); + typedef void (*qemu_plugin_vcpu_mem_cb_t)(unsigned int vcpu_index, qemu_plugin_meminfo_t info, uint64_t vaddr, diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index dc39b05c30..ee60eb3de4 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -120,6 +120,7 @@ typedef struct ReservedRegion ReservedRegion; typedef struct SavedIOTLB SavedIOTLB; typedef struct SHPCDevice SHPCDevice; typedef struct SSIBus SSIBus; +typedef struct TranslationBlock TranslationBlock; typedef struct VirtIODevice VirtIODevice; typedef struct Visitor Visitor; typedef struct VMChangeStateEntry VMChangeStateEntry; diff --git a/plugins/api.c b/plugins/api.c index bbdc5a4eb4..0b04380d57 100644 --- a/plugins/api.c +++ b/plugins/api.c @@ -84,15 +84,19 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct qemu_plugin_tb *tb, enum qemu_plugin_cb_flags flags, void *udata) { - plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR], - cb, flags, udata); + if (!tb->mem_only) { + plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR], + cb, flags, udata); + } } void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb, enum qemu_plugin_op op, void *ptr, uint64_t imm) { - plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm); + if (!tb->mem_only) { + plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm); + } } void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn, @@ -100,20 +104,27 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn, enum qemu_plugin_cb_flags flags, void *udata) { - plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], - cb, flags, udata); + if (!insn->mem_only) { + plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], + cb, flags, udata); + } } void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn, enum qemu_plugin_op op, void *ptr, uint64_t imm) { - plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE], - 0, op, ptr, imm); + if (!insn->mem_only) { + plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE], + 0, op, ptr, imm); + } } - +/* + * We always plant memory instrumentation because they don't finalise until + * after the operation has complete. + */ void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn, qemu_plugin_vcpu_mem_cb_t cb, enum qemu_plugin_cb_flags flags, @@ -121,7 +132,7 @@ void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn, void *udata) { plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR], - cb, flags, rw, udata); + cb, flags, rw, udata); } void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn, @@ -130,7 +141,7 @@ void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn, uint64_t imm) { plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE], - rw, op, ptr, imm); + rw, op, ptr, imm); } void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id, @@ -181,10 +192,13 @@ uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb) struct qemu_plugin_insn * qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx) { + struct qemu_plugin_insn *insn; if (unlikely(idx >= tb->n)) { return NULL; } - return g_ptr_array_index(tb->insns, idx); + insn = g_ptr_array_index(tb->insns, idx); + insn->mem_only = tb->mem_only; + return insn; } /* @@ -303,6 +317,26 @@ uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr return 0; } +const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h) +{ +#ifdef CONFIG_SOFTMMU + if (h && h->is_io) { + MemoryRegionSection *mrs = h->v.io.section; + if (!mrs->mr->name) { + unsigned long maddr = 0xffffffff & (uintptr_t) mrs->mr; + g_autofree char *temp = g_strdup_printf("anon%08lx", maddr); + return g_intern_string(temp); + } else { + return g_intern_string(mrs->mr->name); + } + } else { + return g_intern_static_string("RAM"); + } +#else + return g_intern_static_string("Invalid"); +#endif +} + /* * Queries to the number and potential maximum number of vCPUs there * will be. This helps the plugin dimension per-vcpu arrays. diff --git a/target/arm/internals.h b/target/arm/internals.h index c38d541017..05cebc8597 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -173,8 +173,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); void arm_translate_init(void); #ifdef CONFIG_TCG -void arm_cpu_synchronize_from_tb(CPUState *cs, - const struct TranslationBlock *tb); +void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb); #endif /* CONFIG_TCG */ diff --git a/target/cris/translate.c b/target/cris/translate.c index c893f877ab..65c168c0c7 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -132,7 +132,7 @@ typedef struct DisasContext { int delayed_branch; - struct TranslationBlock *tb; + TranslationBlock *tb; int singlestep_enabled; } DisasContext; diff --git a/target/lm32/translate.c b/target/lm32/translate.c index 030b232d66..20c70d03f1 100644 --- a/target/lm32/translate.c +++ b/target/lm32/translate.c @@ -93,7 +93,7 @@ typedef struct DisasContext { unsigned int tb_flags, synced_flags; /* tb dependent flags. */ int is_jmp; - struct TranslationBlock *tb; + TranslationBlock *tb; int singlestep_enabled; uint32_t features; diff --git a/target/mips/cpu.c b/target/mips/cpu.c index ad163ead62..bf70c77295 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -268,6 +268,23 @@ static void mips_cpu_synchronize_from_tb(CPUState *cs, env->hflags &= ~MIPS_HFLAG_BMASK; env->hflags |= tb->flags & MIPS_HFLAG_BMASK; } + +# ifndef CONFIG_USER_ONLY +static bool mips_io_recompile_replay_branch(CPUState *cs, + const TranslationBlock *tb) +{ + MIPSCPU *cpu = MIPS_CPU(cs); + CPUMIPSState *env = &cpu->env; + + if ((env->hflags & MIPS_HFLAG_BMASK) != 0 + && env->active_tc.PC != tb->pc) { + env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4); + env->hflags &= ~MIPS_HFLAG_BMASK; + return true; + } + return false; +} +# endif /* !CONFIG_USER_ONLY */ #endif /* CONFIG_TCG */ static bool mips_cpu_has_work(CPUState *cs) @@ -679,6 +696,7 @@ static struct TCGCPUOps mips_tcg_ops = { .do_interrupt = mips_cpu_do_interrupt, .do_transaction_failed = mips_cpu_do_transaction_failed, .do_unaligned_access = mips_cpu_do_unaligned_access, + .io_recompile_replay_branch = mips_io_recompile_replay_branch, #endif /* !CONFIG_USER_ONLY */ }; #endif /* CONFIG_TCG */ diff --git a/target/moxie/translate.c b/target/moxie/translate.c index d5fb27dfb8..24a742b25e 100644 --- a/target/moxie/translate.c +++ b/target/moxie/translate.c @@ -36,7 +36,7 @@ /* This is the state at translation time. */ typedef struct DisasContext { - struct TranslationBlock *tb; + TranslationBlock *tb; target_ulong pc, saved_pc; uint32_t opcode; uint32_t fp_status; diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index a78d283bc8..ac65c88f1f 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -43,6 +43,23 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs, cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK; } +#ifndef CONFIG_USER_ONLY +static bool superh_io_recompile_replay_branch(CPUState *cs, + const TranslationBlock *tb) +{ + SuperHCPU *cpu = SUPERH_CPU(cs); + CPUSH4State *env = &cpu->env; + + if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 + && env->pc != tb->pc) { + env->pc -= 2; + env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + return true; + } + return false; +} +#endif + static bool superh_cpu_has_work(CPUState *cs) { return cs->interrupt_request & CPU_INTERRUPT_HARD; @@ -217,6 +234,7 @@ static struct TCGCPUOps superh_tcg_ops = { #ifndef CONFIG_USER_ONLY .do_interrupt = superh_cpu_do_interrupt, .do_unaligned_access = superh_cpu_do_unaligned_access, + .io_recompile_replay_branch = superh_io_recompile_replay_branch, #endif /* !CONFIG_USER_ONLY */ }; diff --git a/target/unicore32/translate.c b/target/unicore32/translate.c index 962f9877a0..370709c9ea 100644 --- a/target/unicore32/translate.c +++ b/target/unicore32/translate.c @@ -34,7 +34,7 @@ typedef struct DisasContext { int condjmp; /* The label that will be jumped to when the instruction is skipped. */ TCGLabel *condlabel; - struct TranslationBlock *tb; + TranslationBlock *tb; int singlestep_enabled; #ifndef CONFIG_USER_ONLY int user; diff --git a/tests/acceptance/tcg_plugins.py b/tests/acceptance/tcg_plugins.py new file mode 100644 index 0000000000..c21bf9e52a --- /dev/null +++ b/tests/acceptance/tcg_plugins.py @@ -0,0 +1,148 @@ +# TCG Plugins tests +# +# These are a little more involved than the basic tests run by check-tcg. +# +# Copyright (c) 2021 Linaro +# +# Author: +# Alex Bennée +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import tempfile +import mmap +import re + +from boot_linux_console import LinuxKernelTest + + +class PluginKernelBase(LinuxKernelTest): + """ + Boots a Linux kernel with a TCG plugin enabled. + """ + + timeout = 120 + KERNEL_COMMON_COMMAND_LINE = 'printk.time=1 panic=-1 ' + + def run_vm(self, kernel_path, kernel_command_line, + plugin, plugin_log, console_pattern, args): + + vm = self.get_vm() + vm.set_console() + vm.add_args('-kernel', kernel_path, + '-append', kernel_command_line, + '-plugin', plugin, + '-d', 'plugin', + '-D', plugin_log, + '-net', 'none', + '-no-reboot') + if args: + vm.add_args(*args) + + try: + vm.launch() + except: + # TODO: probably fails because plugins not enabled but we + # can't currently probe for the feature. + self.cancel("TCG Plugins not enabled?") + + self.wait_for_console_pattern(console_pattern, vm) + # ensure logs are flushed + vm.shutdown() + + +class PluginKernelNormal(PluginKernelBase): + + def _grab_aarch64_kernel(self): + kernel_url = ('http://security.debian.org/' + 'debian-security/pool/updates/main/l/linux-signed-arm64/' + 'linux-image-4.19.0-12-arm64_4.19.152-1_arm64.deb') + kernel_sha1 = '2036c2792f80ac9c4ccaae742b2e0a28385b6010' + kernel_deb = self.fetch_asset(kernel_url, asset_hash=kernel_sha1) + kernel_path = self.extract_from_deb(kernel_deb, + "/boot/vmlinuz-4.19.0-12-arm64") + return kernel_path + + def test_aarch64_virt_insn(self): + """ + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt + :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + + 'console=ttyAMA0') + console_pattern = 'Kernel panic - not syncing: VFS:' + + plugin_log = tempfile.NamedTemporaryFile(mode="r+t", prefix="plugin", + suffix=".log") + + self.run_vm(kernel_path, kernel_command_line, + "tests/plugin/libinsn.so", plugin_log.name, + console_pattern, + args=('-cpu', 'cortex-a53')) + + with plugin_log as lf, \ + mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s: + + m = re.search(br"insns: (?P\d+)", s) + if "count" not in m.groupdict(): + self.fail("Failed to find instruction count") + + def test_aarch64_virt_insn_icount(self): + """ + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt + :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + + 'console=ttyAMA0') + console_pattern = 'Kernel panic - not syncing: VFS:' + + plugin_log = tempfile.NamedTemporaryFile(mode="r+t", prefix="plugin", + suffix=".log") + + self.run_vm(kernel_path, kernel_command_line, + "tests/plugin/libinsn.so", plugin_log.name, + console_pattern, + args=('-cpu', 'cortex-a53', '-icount', 'shift=1')) + + with plugin_log as lf, \ + mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s: + m = re.search(br"detected repeat execution @ (?P0x[0-9A-Fa-f]+)", s) + if m is not None and "addr" in m.groupdict(): + self.fail("detected repeated instructions") + + def test_aarch64_virt_mem_icount(self): + """ + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt + :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + + 'console=ttyAMA0') + console_pattern = 'Kernel panic - not syncing: VFS:' + + plugin_log = tempfile.NamedTemporaryFile(mode="r+t", prefix="plugin", + suffix=".log") + + self.run_vm(kernel_path, kernel_command_line, + "tests/plugin/libmem.so,arg=both", plugin_log.name, + console_pattern, + args=('-cpu', 'cortex-a53', '-icount', 'shift=1')) + + with plugin_log as lf, \ + mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s: + m = re.findall(br"mem accesses: (?P\d+)", s) + if m is None or len(m) != 2: + self.fail("no memory access counts found") + else: + inline = int(m[0]) + callback = int(m[1]) + if inline != callback: + self.fail("mismatched access counts") diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c index a9a6e41237..c253980ec8 100644 --- a/tests/plugin/insn.c +++ b/tests/plugin/insn.c @@ -21,6 +21,14 @@ static bool do_inline; static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) { + static uint64_t last_pc; + uint64_t this_pc = GPOINTER_TO_UINT(udata); + if (this_pc == last_pc) { + g_autofree gchar *out = g_strdup_printf("detected repeat execution @ 0x%" + PRIx64 "\n", this_pc); + qemu_plugin_outs(out); + } + last_pc = this_pc; insn_count++; } @@ -36,8 +44,10 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) qemu_plugin_register_vcpu_insn_exec_inline( insn, QEMU_PLUGIN_INLINE_ADD_U64, &insn_count, 1); } else { + uint64_t vaddr = qemu_plugin_insn_vaddr(insn); qemu_plugin_register_vcpu_insn_exec_cb( - insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, NULL); + insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, + GUINT_TO_POINTER(vaddr)); } } } diff --git a/tests/plugin/mem.c b/tests/plugin/mem.c index 4725bd851d..afd1d27e5c 100644 --- a/tests/plugin/mem.c +++ b/tests/plugin/mem.c @@ -16,9 +16,10 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; -static uint64_t mem_count; +static uint64_t inline_mem_count; +static uint64_t cb_mem_count; static uint64_t io_count; -static bool do_inline; +static bool do_inline, do_callback; static bool do_haddr; static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; @@ -26,7 +27,12 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) { g_autoptr(GString) out = g_string_new(""); - g_string_printf(out, "mem accesses: %" PRIu64 "\n", mem_count); + if (do_inline) { + g_string_printf(out, "inline mem accesses: %" PRIu64 "\n", inline_mem_count); + } + if (do_callback) { + g_string_append_printf(out, "callback mem accesses: %" PRIu64 "\n", cb_mem_count); + } if (do_haddr) { g_string_append_printf(out, "io accesses: %" PRIu64 "\n", io_count); } @@ -42,10 +48,10 @@ static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo, if (qemu_plugin_hwaddr_is_io(hwaddr)) { io_count++; } else { - mem_count++; + cb_mem_count++; } } else { - mem_count++; + cb_mem_count++; } } @@ -60,8 +66,9 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) if (do_inline) { qemu_plugin_register_vcpu_mem_inline(insn, rw, QEMU_PLUGIN_INLINE_ADD_U64, - &mem_count, 1); - } else { + &inline_mem_count, 1); + } + if (do_callback) { qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem, QEMU_PLUGIN_CB_NO_REGS, rw, NULL); @@ -90,6 +97,12 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, } if (!strcmp(argv[0], "inline")) { do_inline = true; + do_callback = false; + } else if (!strcmp(argv[0], "both")) { + do_inline = true; + do_callback = true; + } else { + do_callback = true; } } diff --git a/tests/tcg/i386/Makefile.softmmu-target b/tests/tcg/i386/Makefile.softmmu-target index 5266f2335a..fa9b1b9f90 100644 --- a/tests/tcg/i386/Makefile.softmmu-target +++ b/tests/tcg/i386/Makefile.softmmu-target @@ -33,5 +33,15 @@ EXTRA_RUNS+=$(MULTIARCH_RUNS) memory: CFLAGS+=-DCHECK_UNALIGNED=1 +# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so +run-plugin-%-with-libinsn.so: + $(call run-test, $@, \ + $(QEMU) -monitor none -display none \ + -chardev file$(COMMA)path=$@.out$(COMMA)id=output \ + -plugin ../../plugin/libinsn.so$(COMMA)arg=inline \ + -d plugin -D $*-with-libinsn.so.pout \ + $(QEMU_OPTS) $*, \ + "$* on $(TARGET_NAME)") + # Running QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target index ad187cb2c9..c4a6f91966 100644 --- a/tests/tcg/i386/Makefile.target +++ b/tests/tcg/i386/Makefile.target @@ -48,6 +48,13 @@ else SKIP_I386_TESTS+=test-i386-fprem endif +# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so +run-plugin-%-with-libinsn.so: + $(call run-test, $@, $(QEMU) $(QEMU_OPTS) \ + -plugin ../../plugin/libinsn.so$(COMMA)arg=inline \ + -d plugin -D $*-with-libinsn.so.pout $*, \ + "$* (inline) on $(TARGET_NAME)") + # Update TESTS I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) TESTS=$(MULTIARCH_TESTS) $(I386_TESTS) diff --git a/tests/tcg/x86_64/Makefile.softmmu-target b/tests/tcg/x86_64/Makefile.softmmu-target index 1bd763f2e6..9896319f0e 100644 --- a/tests/tcg/x86_64/Makefile.softmmu-target +++ b/tests/tcg/x86_64/Makefile.softmmu-target @@ -33,5 +33,15 @@ EXTRA_RUNS+=$(MULTIARCH_RUNS) memory: CFLAGS+=-DCHECK_UNALIGNED=1 +# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so +run-plugin-%-with-libinsn.so: + $(call run-test, $@, \ + $(QEMU) -monitor none -display none \ + -chardev file$(COMMA)path=$@.out$(COMMA)id=output \ + -plugin ../../plugin/libinsn.so$(COMMA)arg=inline \ + -d plugin -D $*-with-libinsn.so.pout \ + $(QEMU_OPTS) $*, \ + "$* on $(TARGET_NAME)") + # Running QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel