diff --git a/include/qemu/cacheflush.h b/include/qemu/cacheflush.h index 58ae488491..ae20bcda73 100644 --- a/include/qemu/cacheflush.h +++ b/include/qemu/cacheflush.h @@ -8,16 +8,27 @@ #ifndef QEMU_CACHEFLUSH_H #define QEMU_CACHEFLUSH_H +/** + * flush_idcache_range: + * @rx: instruction address + * @rw: data address + * @len: length to flush + * + * Flush @len bytes of the data cache at @rw and the icache at @rx + * to bring them in sync. The two addresses may be different virtual + * mappings of the same physical page(s). + */ + #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { /* icache is coherent and does not require flushing. */ } #else -void flush_icache_range(uintptr_t start, uintptr_t stop); +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len); #endif diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 8b9ffc41c2..6301f4f0a5 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2946,7 +2946,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, invalidate_and_set_dirty(mr, addr1, l); break; case FLUSH_CACHE: - flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l); + flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l); break; } } diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 26f71cb599..83af3108a4 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, } pair = (uint64_t)i2 << 32 | i1; qatomic_set((uint64_t *)jmp_addr, pair); - flush_icache_range(jmp_addr, jmp_addr + 8); + flush_idcache_range(jmp_addr, jmp_addr, 8); } static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 41be574e89..c255ecb444 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, uintptr_t addr) { qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2)); - flush_icache_range(jmp_addr, jmp_addr + 4); + flush_idcache_range(jmp_addr, jmp_addr, 4); } typedef struct { diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 0d068ec8ab..b756281042 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, /* As per the enclosing if, this is ppc64. Avoid the _Static_assert within qatomic_set that would fail to build a ppc32 host. */ qatomic_set__nocheck((uint64_t *)jmp_addr, pair); - flush_icache_range(jmp_addr, jmp_addr + 8); + flush_idcache_range(jmp_addr, jmp_addr, 8); } else { intptr_t diff = addr - jmp_addr; tcg_debug_assert(in_range_b(diff)); qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc)); - flush_icache_range(jmp_addr, jmp_addr + 4); + flush_idcache_range(jmp_addr, jmp_addr, 4); } } diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 6775bd30fc..6e2d755f6a 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, if (!USE_REG_TB) { qatomic_set((uint32_t *)jmp_addr, deposit32(CALL, 0, 30, br_disp >> 2)); - flush_icache_range(jmp_addr, jmp_addr + 4); + flush_idcache_range(jmp_addr, jmp_addr, 4); return; } @@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, } qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1)); - flush_icache_range(jmp_addr, jmp_addr + 8); + flush_idcache_range(jmp_addr, jmp_addr, 8); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 9bdc450196..759a41d848 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1079,7 +1079,8 @@ void tcg_prologue_init(TCGContext *s) buf1 = s->code_ptr; #ifndef CONFIG_TCG_INTERPRETER - flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); + flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0, + tcg_ptr_byte_diff(buf1, buf0)); #endif /* Deduct the prologue from the buffer. */ @@ -4328,7 +4329,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) #ifndef CONFIG_TCG_INTERPRETER /* flush instruction cache */ - flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); + flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf, + tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); #endif return tcg_current_code_size(s); diff --git a/util/cacheflush.c b/util/cacheflush.c index 2881832a38..92805efe49 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -21,29 +21,32 @@ #include #endif -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - cacheflush((void *)start, stop - start, ICACHE); + if (rx != rw) { + cacheflush((void *)rw, len, DCACHE); + } + cacheflush((void *)rx, len, ICACHE); } #elif defined(__powerpc__) -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - uintptr_t p, start1, stop1; + uintptr_t p, b, e; size_t dsize = qemu_dcache_linesize; size_t isize = qemu_icache_linesize; - start1 = start & ~(dsize - 1); - stop1 = (stop + dsize - 1) & ~(dsize - 1); - for (p = start1; p < stop1; p += dsize) { + b = rw & ~(dsize - 1); + e = (rw + len + dsize - 1) & ~(dsize - 1); + for (p = b; p < e; p += dsize) { asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); } asm volatile ("sync" : : : "memory"); - start &= start & ~(isize - 1); - stop1 = (stop + isize - 1) & ~(isize - 1); - for (p = start1; p < stop1; p += isize) { + b = rx & ~(isize - 1); + e = (rx + len + isize - 1) & ~(isize - 1); + for (p = b; p < e; p += isize) { asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); } asm volatile ("sync" : : : "memory"); @@ -52,20 +55,23 @@ void flush_icache_range(uintptr_t start, uintptr_t stop) #elif defined(__sparc__) -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - uintptr_t p; - - for (p = start & -8; p < ((stop + 7) & -8); p += 8) { + /* No additional data flush to the RW virtual address required. */ + uintptr_t p, end = (rx + len + 7) & -8; + for (p = rx & -8; p < end; p += 8) { __asm__ __volatile__("flush\t%0" : : "r" (p)); } } #else -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - __builtin___clear_cache((char *)start, (char *)stop); + if (rw != rx) { + __builtin___clear_cache((char *)rw, (char *)rw + len); + } + __builtin___clear_cache((char *)rx, (char *)rx + len); } #endif diff --git a/util/cacheinfo.c b/util/cacheinfo.c index 7804c186b6..b182f0b693 100644 --- a/util/cacheinfo.c +++ b/util/cacheinfo.c @@ -166,9 +166,11 @@ static void fallback_cache_info(int *isize, int *dsize) *isize = *dsize; } else { #if defined(_ARCH_PPC) - /* For PPC, we're going to use the icache size computed for - flush_icache_range. Which means that we must use the - architecture minimum. */ + /* + * For PPC, we're going to use the cache sizes computed for + * flush_idcache_range. Which means that we must use the + * architecture minimum. + */ *isize = *dsize = 16; #else /* Otherwise, 64 bytes is not uncommon. */