tcg: Add tlb_fast_offset to TCGContext

Disconnect the layout of ArchCPU from TCG compilation.
Pass the relative offset of 'env' and 'neg.tlb.f' as a parameter.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-03-27 16:07:15 -07:00
parent 238f43809a
commit d0a9bb5ecb
14 changed files with 110 additions and 66 deletions

View File

@ -355,6 +355,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tcg_ctx->page_bits = TARGET_PAGE_BITS;
tcg_ctx->page_mask = TARGET_PAGE_MASK;
tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
tcg_ctx->tlb_fast_offset =
(int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env);
#endif
tb_overflow:

View File

@ -61,12 +61,11 @@
#define NB_MMU_MODES 16
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
#include "exec/tlb-common.h"
/* use a fully associative victim tlb of 8 entries */
#define CPU_VTLB_SIZE 8
#define CPU_TLB_ENTRY_BITS 5
#define CPU_TLB_DYN_MIN_BITS 6
#define CPU_TLB_DYN_DEFAULT_BITS 8
@ -90,27 +89,6 @@
# endif
# endif
/* Minimalized TLB entry for use by TCG fast path. */
typedef union CPUTLBEntry {
struct {
uint64_t addr_read;
uint64_t addr_write;
uint64_t addr_code;
/*
* Addend to virtual address to get host address. IO accesses
* use the corresponding iotlb value.
*/
uintptr_t addend;
};
/*
* Padding to get a power of two size, as well as index
* access to addr_{read,write,code}.
*/
uint64_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uint64_t)];
} CPUTLBEntry;
QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
#endif /* !CONFIG_USER_ONLY && CONFIG_TCG */
#if !defined(CONFIG_USER_ONLY)
@ -184,17 +162,6 @@ typedef struct CPUTLBDesc {
CPUTLBEntryFull *fulltlb;
} CPUTLBDesc;
/*
* Data elements that are per MMU mode, accessed by the fast path.
* The structure is aligned to aid loading the pair with one insn.
*/
typedef struct CPUTLBDescFast {
/* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
uintptr_t mask;
/* The array of tlb entries itself. */
CPUTLBEntry *table;
} CPUTLBDescFast QEMU_ALIGNED(2 * sizeof(void *));
/*
* Data elements that are shared between all MMU modes.
*/
@ -230,10 +197,6 @@ typedef struct CPUTLB {
CPUTLBDescFast f[NB_MMU_MODES];
} CPUTLB;
/* This will be used by TCG backends to compute offsets. */
#define TLB_MASK_TABLE_OFS(IDX) \
((int)offsetof(ArchCPU, neg.tlb.f[IDX]) - (int)offsetof(ArchCPU, env))
#else
typedef struct CPUTLB { } CPUTLB;

56
include/exec/tlb-common.h Normal file
View File

@ -0,0 +1,56 @@
/*
* Common definitions for the softmmu tlb
*
* Copyright (c) 2003 Fabrice Bellard
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef EXEC_TLB_COMMON_H
#define EXEC_TLB_COMMON_H 1
#define CPU_TLB_ENTRY_BITS 5
/* Minimalized TLB entry for use by TCG fast path. */
typedef union CPUTLBEntry {
struct {
uint64_t addr_read;
uint64_t addr_write;
uint64_t addr_code;
/*
* Addend to virtual address to get host address. IO accesses
* use the corresponding iotlb value.
*/
uintptr_t addend;
};
/*
* Padding to get a power of two size, as well as index
* access to addr_{read,write,code}.
*/
uint64_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uint64_t)];
} CPUTLBEntry;
QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
/*
* Data elements that are per MMU mode, accessed by the fast path.
* The structure is aligned to aid loading the pair with one insn.
*/
typedef struct CPUTLBDescFast {
/* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
uintptr_t mask;
/* The array of tlb entries itself. */
CPUTLBEntry *table;
} CPUTLBDescFast QEMU_ALIGNED(2 * sizeof(void *));
#endif /* EXEC_TLB_COMMON_H */

View File

@ -547,6 +547,7 @@ struct TCGContext {
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
#ifdef CONFIG_SOFTMMU
int tlb_fast_offset;
int page_mask;
uint8_t page_bits;
uint8_t tlb_dyn_max_bits;

View File

@ -1636,6 +1636,9 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
return true;
}
/* We expect to use a 7-bit scaled negative offset from ENV. */
#define MIN_TLB_MASK_TABLE_OFS -512
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -1674,12 +1677,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
? TCG_TYPE_I64 : TCG_TYPE_I32);
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
tlb_mask_table_ofs(s, mem_index), 1, 0);
/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,

View File

@ -1374,6 +1374,9 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
return true;
}
/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
#define MIN_TLB_MASK_TABLE_OFS -256
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, bool is_ld)
@ -1405,7 +1408,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
int mem_index = get_mmuidx(oi);
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int fast_off = tlb_mask_table_ofs(s, mem_index);
unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
TCGReg t_addr;
@ -1416,8 +1419,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addrhi_reg = addrhi;
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);

View File

@ -1900,6 +1900,8 @@ static inline int setup_guest_base_seg(void)
#endif /* setup_guest_base_seg */
#endif /* !SOFTMMU */
#define MIN_TLB_MASK_TABLE_OFS INT_MIN
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -1934,6 +1936,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
int trexw = 0, hrexw = 0, tlbrexw = 0;
unsigned mem_index = get_mmuidx(oi);
unsigned s_mask = (1 << s_bits) - 1;
int fast_ofs = tlb_mask_table_ofs(s, mem_index);
int tlb_mask;
ldst = new_ldst_label(s);
@ -1959,12 +1962,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
s->page_bits - CPU_TLB_ENTRY_BITS);
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index) +
offsetof(CPUTLBDescFast, mask));
fast_ofs + offsetof(CPUTLBDescFast, mask));
tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index) +
offsetof(CPUTLBDescFast, table));
fast_ofs + offsetof(CPUTLBDescFast, table));
/*
* If the required alignment is at least as large as the access, simply

View File

@ -834,6 +834,9 @@ bool tcg_target_has_memory_bswap(MemOp memop)
return false;
}
/* We expect to use a 12-bit negative offset from ENV. */
#define MIN_TLB_MASK_TABLE_OFS -(1 << 11)
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -855,7 +858,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
int mem_index = get_mmuidx(oi);
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int fast_ofs = tlb_mask_table_ofs(s, mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
@ -864,8 +867,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);

View File

@ -1254,6 +1254,9 @@ bool tcg_target_has_memory_bswap(MemOp memop)
return false;
}
/* We expect to use a 16-bit negative offset from ENV. */
#define MIN_TLB_MASK_TABLE_OFS -32768
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -1279,7 +1282,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
#ifdef CONFIG_SOFTMMU
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int fast_off = tlb_mask_table_ofs(s, mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
int add_off = offsetof(CPUTLBEntry, addend);
@ -1293,8 +1296,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addrhi_reg = addrhi;
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);

View File

@ -2036,6 +2036,9 @@ bool tcg_target_has_memory_bswap(MemOp memop)
return aa.atom <= MO_64;
}
/* We expect to use a 16-bit negative offset from ENV. */
#define MIN_TLB_MASK_TABLE_OFS -32768
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -2072,7 +2075,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
int mem_index = get_mmuidx(oi);
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int fast_off = tlb_mask_table_ofs(s, mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
@ -2083,8 +2086,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addrhi_reg = addrhi;
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);

View File

@ -1185,6 +1185,9 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
return true;
}
/* We expect to use a 12-bit negative offset from ENV. */
#define MIN_TLB_MASK_TABLE_OFS -(1 << 11)
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -1208,7 +1211,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1u << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int fast_ofs = tlb_mask_table_ofs(s, mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
int compare_mask;
@ -1219,8 +1222,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);

View File

@ -1735,6 +1735,9 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
return true;
}
/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
#define MIN_TLB_MASK_TABLE_OFS -(1 << 19)
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -1757,7 +1760,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
#ifdef CONFIG_SOFTMMU
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int fast_off = tlb_mask_table_ofs(s, mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
int ofs, a_off;
@ -1771,8 +1774,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
s->page_bits - CPU_TLB_ENTRY_BITS);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);

View File

@ -1017,6 +1017,9 @@ bool tcg_target_has_memory_bswap(MemOp memop)
return true;
}
/* We expect to use a 13-bit negative offset from ENV. */
#define MIN_TLB_MASK_TABLE_OFS -(1 << 12)
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -1040,7 +1043,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
#ifdef CONFIG_SOFTMMU
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int fast_off = tlb_mask_table_ofs(s, mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
@ -1050,8 +1053,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
int cc;
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T2, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T3, TCG_AREG0, table_off);

View File

@ -41,6 +41,7 @@
#define NO_CPU_IO_DEFS
#include "exec/exec-all.h"
#include "exec/tlb-common.h"
#include "tcg/tcg-op.h"
#if UINTPTR_MAX == UINT32_MAX
@ -407,6 +408,13 @@ static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
}
#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
static int tlb_mask_table_ofs(TCGContext *s, int which)
{
return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
}
#endif
/* Signal overflow, starting over with fewer guest insns. */
static G_NORETURN
void tcg_raise_tb_overflow(TCGContext *s)
@ -1521,6 +1529,11 @@ void tcg_func_start(TCGContext *s)
tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
s->addr_type == TCG_TYPE_I64);
#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
tcg_debug_assert(s->tlb_fast_offset < 0);
tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
#endif
}
static TCGTemp *tcg_temp_alloc(TCGContext *s)