From cf9b5790db77e5efdae589acff8a98165d2543e0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Nov 2023 08:21:23 -0800 Subject: [PATCH 1/3] accel/tcg: Remove CF_LAST_IO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cpu_exec_step_atomic, we did not set CF_LAST_IO, which lead to a loop with cpu_io_recompile. But since 18a536f1f8 ("Always require can_do_io") we no longer need a flag to indicate when the last insn should have can_do_io set, so remove the flag entirely. Reported-by: Clément Chigot Tested-by: Clément Chigot Reviewed-by: Claudio Fontana Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1961 Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 2 +- accel/tcg/tb-maint.c | 6 ++---- accel/tcg/translate-all.c | 4 ++-- accel/tcg/translator.c | 22 +++++++++------------- docs/devel/tcg-icount.rst | 6 ------ include/exec/translation-block.h | 13 ++++++------- system/watchpoint.c | 6 ++---- 7 files changed, 22 insertions(+), 37 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 1a5bc90220..c938eb96f8 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -721,7 +721,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0) { /* Execute just one insn to trigger exception pending in the log */ cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) - | CF_LAST_IO | CF_NOIRQ | 1; + | CF_NOIRQ | 1; } #endif return false; diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index e678d20dc2..3d2a896220 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -1083,8 +1083,7 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) if (current_tb_modified) { /* Force execution of one insn next time. */ CPUState *cpu = current_cpu; - cpu->cflags_next_tb = - 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); return true; } return false; @@ -1154,8 +1153,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (current_tb_modified) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - current_cpu->cflags_next_tb = - 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu); + current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); mmap_unlock(); cpu_loop_exit_noexc(current_cpu); } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index b263857ecc..79a88f5fb7 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -304,7 +304,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, if (phys_pc == -1) { /* Generate a one-shot TB with 1 insn in it */ - cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; + cflags = (cflags & ~CF_COUNT_MASK) | 1; } max_insns = cflags & CF_COUNT_MASK; @@ -632,7 +632,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) * operations only (which execute after completion) so we don't * double instrument the instruction. */ - cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; + cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | n; if (qemu_loglevel_mask(CPU_LOG_EXEC)) { vaddr pc = log_pc(cpu, tb); diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 575b9812ad..38c34009a5 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -89,7 +89,7 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags) * each translation block. The cost is minimal, plus it would be * very easy to forget doing it in the translator. */ - set_can_do_io(db, db->max_insns == 1 && (cflags & CF_LAST_IO)); + set_can_do_io(db, db->max_insns == 1); return icount_start_insn; } @@ -151,13 +151,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ - if (cflags & CF_MEMI_ONLY) { - /* We should only see CF_MEMI_ONLY for io_recompile. */ - assert(cflags & CF_LAST_IO); - plugin_enabled = plugin_gen_tb_start(cpu, db, true); - } else { - plugin_enabled = plugin_gen_tb_start(cpu, db, false); - } + plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY); db->plugin_enabled = plugin_enabled; while (true) { @@ -169,11 +163,13 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, plugin_gen_insn_start(cpu, db); } - /* Disassemble one instruction. The translate_insn hook should - update db->pc_next and db->is_jmp to indicate what should be - done next -- either exiting this loop or locate the start of - the next instruction. */ - if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) { + /* + * Disassemble one instruction. The translate_insn hook should + * update db->pc_next and db->is_jmp to indicate what should be + * done next -- either exiting this loop or locate the start of + * the next instruction. + */ + if (db->num_insns == db->max_insns) { /* Accept I/O on the last instruction. */ set_can_do_io(db, true); } diff --git a/docs/devel/tcg-icount.rst b/docs/devel/tcg-icount.rst index 50c8e8dabc..7df883446a 100644 --- a/docs/devel/tcg-icount.rst +++ b/docs/devel/tcg-icount.rst @@ -62,12 +62,6 @@ To deal with this case, when an I/O access is made we: - re-compile a single [1]_ instruction block for the current PC - exit the cpu loop and execute the re-compiled block -The new block is created with the CF_LAST_IO compile flag which -ensures the final instruction translation starts with a call to -gen_io_start() so we don't enter a perpetual loop constantly -recompiling a single instruction block. For translators using the -common translator_loop this is done automatically. - .. [1] sometimes two instructions if dealing with delay slots Other I/O operations diff --git a/include/exec/translation-block.h b/include/exec/translation-block.h index b785751774..e2b26e16da 100644 --- a/include/exec/translation-block.h +++ b/include/exec/translation-block.h @@ -71,13 +71,12 @@ struct TranslationBlock { #define CF_NO_GOTO_TB 0x00000200 /* Do not chain with goto_tb */ #define CF_NO_GOTO_PTR 0x00000400 /* Do not chain with goto_ptr */ #define CF_SINGLE_STEP 0x00000800 /* gdbstub single-step in effect */ -#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ -#define CF_MEMI_ONLY 0x00010000 /* Only instrument memory ops */ -#define CF_USE_ICOUNT 0x00020000 -#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ -#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ -#define CF_NOIRQ 0x00100000 /* Generate an uninterruptible TB */ -#define CF_PCREL 0x00200000 /* Opcodes in TB are PC-relative */ +#define CF_MEMI_ONLY 0x00001000 /* Only instrument memory ops */ +#define CF_USE_ICOUNT 0x00002000 +#define CF_INVALID 0x00004000 /* TB is stale. Set with @jmp_lock held */ +#define CF_PARALLEL 0x00008000 /* Generate code for a parallel context */ +#define CF_NOIRQ 0x00010000 /* Generate an uninterruptible TB */ +#define CF_PCREL 0x00020000 /* Opcodes in TB are PC-relative */ #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 diff --git a/system/watchpoint.c b/system/watchpoint.c index 45d1f12faf..ba5ad13352 100644 --- a/system/watchpoint.c +++ b/system/watchpoint.c @@ -179,8 +179,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, */ if (!cpu->neg.can_do_io) { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ - | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); cpu_loop_exit_restore(cpu, ra); } /* @@ -212,8 +211,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, cpu_loop_exit(cpu); } else { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ - | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); mmap_unlock(); cpu_loop_exit_noexc(cpu); } From e2faabee78ff127848f59892747d4c07c56de033 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Fri, 10 Nov 2023 21:43:03 -0800 Subject: [PATCH 2/3] accel/tcg: Forward probe size on to notdirty_write Without this, we just dirty a single byte, and so if the caller writes more than one byte to the host memory then we won't have invalidated any translation blocks that start after the first byte and overlap those writes. In particular, AArch64's DC ZVA implementation uses probe_access (via probe_write), and so we don't invalidate the entire block, only the TB overlapping the first byte (and, in the unusual case an unaligned VA is given to the instruction, we also probe that specific address in order to get the right VA reported on an exception, so will invalidate a TB overlapping that address too). Since our IC IVAU implementation is a no-op for system emulation that relies on the softmmu already having detected self-modifying code via this mechanism, this means we have observably wrong behaviour when jumping to code that has been DC ZVA'ed. In practice this is an unusual thing for software to do, as in reality the OS will DC ZVA the page and the application will go and write actual instructions to it that aren't UDF #0, but you can write a test that clearly shows the faulty behaviour. For functions other than probe_access it's not clear what size to use when 0 is passed in. Arguably a size of 0 shouldn't dirty at all, since if you want to actually write then you should pass in a real size, but I have conservatively kept the implementation as dirtying the first byte in that case so as to avoid breaking any assumptions about that behaviour. Signed-off-by: Jessica Clarke Message-Id: <20231104031232.3246614-1-jrtc27@jrtc27.com> [rth: Move the dirtysize computation next to notdirty_write.] Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 765805e70b..db3f93fda9 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1479,7 +1479,8 @@ int probe_access_full(CPUArchState *env, vaddr addr, int size, /* Handle clean RAM pages. */ if (unlikely(flags & TLB_NOTDIRTY)) { - notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr); + int dirtysize = size == 0 ? 1 : size; + notdirty_write(env_cpu(env), addr, dirtysize, *pfull, retaddr); flags &= ~TLB_NOTDIRTY; } @@ -1502,7 +1503,8 @@ int probe_access_full_mmu(CPUArchState *env, vaddr addr, int size, /* Handle clean RAM pages. */ if (unlikely(flags & TLB_NOTDIRTY)) { - notdirty_write(env_cpu(env), addr, 1, *pfull, 0); + int dirtysize = size == 0 ? 1 : size; + notdirty_write(env_cpu(env), addr, dirtysize, *pfull, 0); flags &= ~TLB_NOTDIRTY; } @@ -1524,7 +1526,8 @@ int probe_access_flags(CPUArchState *env, vaddr addr, int size, /* Handle clean RAM pages. */ if (unlikely(flags & TLB_NOTDIRTY)) { - notdirty_write(env_cpu(env), addr, 1, full, retaddr); + int dirtysize = size == 0 ? 1 : size; + notdirty_write(env_cpu(env), addr, dirtysize, full, retaddr); flags &= ~TLB_NOTDIRTY; } @@ -1560,7 +1563,7 @@ void *probe_access(CPUArchState *env, vaddr addr, int size, /* Handle clean RAM pages. */ if (flags & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, 1, full, retaddr); + notdirty_write(env_cpu(env), addr, size, full, retaddr); } } From 0dfae4f94813995c365bfabac22cba1cea8758bd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Nov 2023 12:47:00 -0800 Subject: [PATCH 3/3] target/sparc: Fix RETURN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perform window restore before pc update. Required in order to recognize any window underflow trap with the current pc. Fixes: 86b82fe021f4 ("target/sparc: Move JMPL, RETT, RETURN to decodetree") Reported-by: Mark Cave-Ayland Reviewed-by: Philippe Mathieu-Daudé Tested-by: Mark Cave-Ayland Acked-by: Mark Cave-Ayland Signed-off-by: Richard Henderson --- target/sparc/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 6fc333a6b8..9387299559 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -4096,12 +4096,12 @@ TRANS(RETT, 32, do_add_special, a, do_rett) static bool do_return(DisasContext *dc, int rd, TCGv src) { gen_check_align(dc, src, 3); + gen_helper_restore(tcg_env); gen_mov_pc_npc(dc); tcg_gen_mov_tl(cpu_npc, src); gen_address_mask(dc, cpu_npc); - gen_helper_restore(tcg_env); dc->npc = DYNAMIC_PC_LOOKUP; return true; }