From d7553f3591bbf495b4c35355179c793d03e106dc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 27 Oct 2023 00:24:30 -0700 Subject: [PATCH] target/hppa: Populate an interval tree with valid tlb entries Complete the data structure conversion started earlier. This reduces the perf overhead of hppa_get_physical_address from ~5% to ~0.25%. Signed-off-by: Richard Henderson --- target/hppa/cpu.c | 2 + target/hppa/cpu.h | 24 +++++- target/hppa/machine.c | 51 ++++++++++++- target/hppa/mem_helper.c | 161 +++++++++++++++++++++++---------------- 4 files changed, 167 insertions(+), 71 deletions(-) diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 1644297bf8..5e1240c631 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -137,8 +137,10 @@ static void hppa_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY { HPPACPU *cpu = HPPA_CPU(cs); + cpu->alarm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, hppa_cpu_alarm_timer, cpu); + hppa_ptlbe(&cpu->env); } #endif } diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index 1480d0237a..08de894393 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -176,7 +176,10 @@ typedef int64_t target_sreg; #endif typedef struct HPPATLBEntry { - IntervalTreeNode itree; + union { + IntervalTreeNode itree; + struct HPPATLBEntry *unused_next; + }; target_ureg pa; @@ -234,10 +237,22 @@ typedef struct CPUArchState { #define HPPA_TLB_ENTRIES 256 #define HPPA_BTLB_ENTRIES (HPPA_BTLB_FIXED + HPPA_BTLB_VARIABLE) - /* ??? Implement a unified itlb/dtlb for the moment. */ - /* ??? We should use a more intelligent data structure. */ - HPPATLBEntry tlb[HPPA_TLB_ENTRIES]; + /* Index for round-robin tlb eviction. */ uint32_t tlb_last; + + /* + * For pa1.x, the partial initialized, still invalid tlb entry + * which has had ITLBA performed, but not yet ITLBP. + */ + HPPATLBEntry *tlb_partial; + + /* Linked list of all invalid (unused) tlb entries. */ + HPPATLBEntry *tlb_unused; + + /* Root of the search tree for all valid tlb entries. */ + IntervalTreeRoot tlb_root; + + HPPATLBEntry tlb[HPPA_TLB_ENTRIES]; } CPUHPPAState; /** @@ -356,6 +371,7 @@ int hppa_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hppa_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void hppa_cpu_dump_state(CPUState *cs, FILE *f, int); #ifndef CONFIG_USER_ONLY +void hppa_ptlbe(CPUHPPAState *env); hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr); bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, diff --git a/target/hppa/machine.c b/target/hppa/machine.c index 4535195ca2..ab3e8c81fa 100644 --- a/target/hppa/machine.c +++ b/target/hppa/machine.c @@ -72,8 +72,6 @@ static int get_tlb(QEMUFile *f, void *opaque, size_t size, HPPATLBEntry *ent = opaque; uint32_t val; - memset(ent, 0, sizeof(*ent)); - ent->itree.start = qemu_get_be64(f); ent->pa = qemu_get_betr(f); val = qemu_get_be32(f); @@ -122,6 +120,53 @@ static const VMStateInfo vmstate_tlb = { .put = put_tlb, }; +static int tlb_pre_load(void *opaque) +{ + CPUHPPAState *env = opaque; + + /* + * Zap the entire tlb, on-the-side data structures and all. + * Each tlb entry will have data re-filled by put_tlb. + */ + memset(env->tlb, 0, sizeof(env->tlb)); + memset(&env->tlb_root, 0, sizeof(env->tlb_root)); + env->tlb_unused = NULL; + env->tlb_partial = NULL; + + return 0; +} + +static int tlb_post_load(void *opaque, int version_id) +{ + CPUHPPAState *env = opaque; + HPPATLBEntry **unused = &env->tlb_unused; + HPPATLBEntry *partial = NULL; + + /* + * Re-create the interval tree from the valid entries. + * Truely invalid entries should have start == end == 0. + * Otherwise it should be the in-flight tlb_partial entry. + */ + for (uint32_t i = 0; i < ARRAY_SIZE(env->tlb); ++i) { + HPPATLBEntry *e = &env->tlb[i]; + + if (e->entry_valid) { + interval_tree_insert(&e->itree, &env->tlb_root); + } else if (i < HPPA_BTLB_ENTRIES) { + /* btlb not in unused list */ + } else if (partial == NULL && e->itree.start < e->itree.last) { + partial = e; + } else { + *unused = e; + unused = &e->unused_next; + } + } + env->tlb_partial = partial; + *unused = NULL; + + return 0; +} + static VMStateField vmstate_env_fields[] = { VMSTATE_UINTTR_ARRAY(gr, CPUHPPAState, 32), VMSTATE_UINT64_ARRAY(fr, CPUHPPAState, 32), @@ -164,6 +209,8 @@ static const VMStateDescription vmstate_env = { .version_id = 1, .minimum_version_id = 1, .fields = vmstate_env_fields, + .pre_load = tlb_pre_load, + .post_load = tlb_post_load, }; static VMStateField vmstate_cpu_fields[] = { diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c index 828cceb29c..b1773ece61 100644 --- a/target/hppa/mem_helper.c +++ b/target/hppa/mem_helper.c @@ -27,16 +27,13 @@ static HPPATLBEntry *hppa_find_tlb(CPUHPPAState *env, vaddr addr) { - int i; + IntervalTreeNode *i = interval_tree_iter_first(&env->tlb_root, addr, addr); - for (i = 0; i < ARRAY_SIZE(env->tlb); ++i) { - HPPATLBEntry *ent = &env->tlb[i]; - if (ent->itree.start <= addr && addr <= ent->itree.last) { - trace_hppa_tlb_find_entry(env, ent + i, ent->entry_valid, - ent->itree.start, ent->itree.last, - ent->pa); - return ent; - } + if (i) { + HPPATLBEntry *ent = container_of(i, HPPATLBEntry, itree); + trace_hppa_tlb_find_entry(env, ent, ent->entry_valid, + ent->itree.start, ent->itree.last, ent->pa); + return ent; } trace_hppa_tlb_find_entry_not_found(env, addr); return NULL; @@ -46,6 +43,7 @@ static void hppa_flush_tlb_ent(CPUHPPAState *env, HPPATLBEntry *ent, bool force_flush_btlb) { CPUState *cs = env_cpu(env); + bool is_btlb; if (!ent->entry_valid) { return; @@ -58,50 +56,55 @@ static void hppa_flush_tlb_ent(CPUHPPAState *env, HPPATLBEntry *ent, ent->itree.last - ent->itree.start + 1, HPPA_MMU_FLUSH_MASK, TARGET_LONG_BITS); - /* never clear BTLBs, unless forced to do so. */ - if (ent < &env->tlb[HPPA_BTLB_ENTRIES] && !force_flush_btlb) { + /* Never clear BTLBs, unless forced to do so. */ + is_btlb = ent < &env->tlb[HPPA_BTLB_ENTRIES]; + if (is_btlb && !force_flush_btlb) { return; } + interval_tree_remove(&ent->itree, &env->tlb_root); memset(ent, 0, sizeof(*ent)); - ent->itree.start = -1; + + if (!is_btlb) { + ent->unused_next = env->tlb_unused; + env->tlb_unused = ent; + } } -static HPPATLBEntry *hppa_flush_tlb_range(CPUHPPAState *env, - vaddr va_b, vaddr va_e) +static void hppa_flush_tlb_range(CPUHPPAState *env, vaddr va_b, vaddr va_e) { - HPPATLBEntry *empty = NULL; + IntervalTreeNode *i, *n; - /* Zap any old entries covering ADDR; notice empty entries on the way. */ - for (int i = HPPA_BTLB_ENTRIES; i < ARRAY_SIZE(env->tlb); ++i) { - HPPATLBEntry *ent = &env->tlb[i]; + i = interval_tree_iter_first(&env->tlb_root, va_b, va_e); + for (; i ; i = n) { + HPPATLBEntry *ent = container_of(i, HPPATLBEntry, itree); - if (!ent->entry_valid) { - empty = ent; - } else if (va_e >= ent->itree.start && va_b <= ent->itree.last) { - hppa_flush_tlb_ent(env, ent, false); - empty = ent; - } + /* + * Find the next entry now: In the normal case the current entry + * will be removed, but in the BTLB case it will remain. + */ + n = interval_tree_iter_next(i, va_b, va_e); + hppa_flush_tlb_ent(env, ent, false); } - return empty; } static HPPATLBEntry *hppa_alloc_tlb_ent(CPUHPPAState *env) { - HPPATLBEntry *ent; - uint32_t i; + HPPATLBEntry *ent = env->tlb_unused; - if (env->tlb_last < HPPA_BTLB_ENTRIES || env->tlb_last >= ARRAY_SIZE(env->tlb)) { - i = HPPA_BTLB_ENTRIES; - env->tlb_last = HPPA_BTLB_ENTRIES + 1; - } else { - i = env->tlb_last; - env->tlb_last++; + if (ent == NULL) { + uint32_t i = env->tlb_last; + + if (i < HPPA_BTLB_ENTRIES || i >= ARRAY_SIZE(env->tlb)) { + i = HPPA_BTLB_ENTRIES; + } + env->tlb_last = i + 1; + + ent = &env->tlb[i]; + hppa_flush_tlb_ent(env, ent, false); } - ent = &env->tlb[i]; - - hppa_flush_tlb_ent(env, ent, false); + env->tlb_unused = ent->unused_next; return ent; } @@ -127,7 +130,7 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, /* Find a valid tlb entry that matches the virtual address. */ ent = hppa_find_tlb(env, addr); - if (ent == NULL || !ent->entry_valid) { + if (ent == NULL) { phys = 0; prot = 0; ret = (type == PAGE_EXEC) ? EXCP_ITLB_MISS : EXCP_DTLB_MISS; @@ -303,23 +306,23 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, /* Insert (Insn/Data) TLB Address. Note this is PA 1.1 only. */ void HELPER(itlba)(CPUHPPAState *env, target_ulong addr, target_ureg reg) { - HPPATLBEntry *empty; + HPPATLBEntry *ent; - /* Zap any old entries covering ADDR; notice empty entries on the way. */ + /* Zap any old entries covering ADDR. */ addr &= TARGET_PAGE_MASK; - empty = hppa_flush_tlb_range(env, addr, addr + TARGET_PAGE_SIZE - 1); + hppa_flush_tlb_range(env, addr, addr + TARGET_PAGE_SIZE - 1); - /* If we didn't see an empty entry, evict one. */ - if (empty == NULL) { - empty = hppa_alloc_tlb_ent(env); + ent = env->tlb_partial; + if (ent == NULL) { + ent = hppa_alloc_tlb_ent(env); + env->tlb_partial = ent; } - /* Note that empty->entry_valid == 0 already. */ - empty->itree.start = addr; - empty->itree.last = addr + TARGET_PAGE_SIZE - 1; - empty->pa = extract32(reg, 5, 20) << TARGET_PAGE_BITS; - trace_hppa_tlb_itlba(env, empty, empty->itree.start, - empty->itree.last, empty->pa); + /* Note that ent->entry_valid == 0 already. */ + ent->itree.start = addr; + ent->itree.last = addr + TARGET_PAGE_SIZE - 1; + ent->pa = extract32(reg, 5, 20) << TARGET_PAGE_BITS; + trace_hppa_tlb_itlba(env, ent, ent->itree.start, ent->itree.last, ent->pa); } static void set_access_bits(CPUHPPAState *env, HPPATLBEntry *ent, target_ureg reg) @@ -333,6 +336,8 @@ static void set_access_bits(CPUHPPAState *env, HPPATLBEntry *ent, target_ureg re ent->d = extract32(reg, 28, 1); ent->t = extract32(reg, 29, 1); ent->entry_valid = 1; + + interval_tree_insert(&ent->itree, &env->tlb_root); trace_hppa_tlb_itlbp(env, ent, ent->access_id, ent->u, ent->ar_pl2, ent->ar_pl1, ent->ar_type, ent->b, ent->d, ent->t); } @@ -340,14 +345,16 @@ static void set_access_bits(CPUHPPAState *env, HPPATLBEntry *ent, target_ureg re /* Insert (Insn/Data) TLB Protection. Note this is PA 1.1 only. */ void HELPER(itlbp)(CPUHPPAState *env, target_ulong addr, target_ureg reg) { - HPPATLBEntry *ent = hppa_find_tlb(env, addr); + HPPATLBEntry *ent = env->tlb_partial; - if (unlikely(ent == NULL)) { - qemu_log_mask(LOG_GUEST_ERROR, "ITLBP not following ITLBA\n"); - return; + if (ent) { + env->tlb_partial = NULL; + if (ent->itree.start <= addr && addr <= ent->itree.last) { + set_access_bits(env, ent, reg); + return; + } } - - set_access_bits(env, ent, reg); + qemu_log_mask(LOG_GUEST_ERROR, "ITLBP not following ITLBA\n"); } /* Purge (Insn/Data) TLB. This is explicitly page-based, and is @@ -356,17 +363,15 @@ static void ptlb_work(CPUState *cpu, run_on_cpu_data data) { CPUHPPAState *env = cpu_env(cpu); target_ulong addr = (target_ulong) data.target_ptr; - HPPATLBEntry *ent = hppa_find_tlb(env, addr); - if (ent && ent->entry_valid) { - hppa_flush_tlb_ent(env, ent, false); - } + hppa_flush_tlb_range(env, addr, addr); } void HELPER(ptlb)(CPUHPPAState *env, target_ulong addr) { CPUState *src = env_cpu(env); CPUState *cpu; + trace_hppa_tlb_ptlb(env); run_on_cpu_data data = RUN_ON_CPU_TARGET_PTR(addr); @@ -378,16 +383,40 @@ void HELPER(ptlb)(CPUHPPAState *env, target_ulong addr) async_safe_run_on_cpu(src, ptlb_work, data); } +void hppa_ptlbe(CPUHPPAState *env) +{ + uint32_t i; + + /* Zap the (non-btlb) tlb entries themselves. */ + memset(&env->tlb[HPPA_BTLB_ENTRIES], 0, + sizeof(env->tlb) - HPPA_BTLB_ENTRIES * sizeof(env->tlb[0])); + env->tlb_last = HPPA_BTLB_ENTRIES; + env->tlb_partial = NULL; + + /* Put them all onto the unused list. */ + env->tlb_unused = &env->tlb[HPPA_BTLB_ENTRIES]; + for (i = HPPA_BTLB_ENTRIES; i < ARRAY_SIZE(env->tlb) - 1; ++i) { + env->tlb[i].unused_next = &env->tlb[i + 1]; + } + + /* Re-initialize the interval tree with only the btlb entries. */ + memset(&env->tlb_root, 0, sizeof(env->tlb_root)); + for (i = 0; i < HPPA_BTLB_ENTRIES; ++i) { + if (env->tlb[i].entry_valid) { + interval_tree_insert(&env->tlb[i].itree, &env->tlb_root); + } + } + + tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_MASK); +} + /* Purge (Insn/Data) TLB entry. This affects an implementation-defined number of pages/entries (we choose all), and is local to the cpu. */ void HELPER(ptlbe)(CPUHPPAState *env) { trace_hppa_tlb_ptlbe(env); qemu_log_mask(CPU_LOG_MMU, "FLUSH ALL TLB ENTRIES\n"); - memset(&env->tlb[HPPA_BTLB_ENTRIES], 0, - sizeof(env->tlb) - HPPA_BTLB_ENTRIES * sizeof(env->tlb[0])); - env->tlb_last = HPPA_BTLB_ENTRIES; - tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_MASK); + hppa_ptlbe(env); } void cpu_hppa_change_prot_id(CPUHPPAState *env) @@ -483,9 +512,11 @@ void HELPER(diag_btlb)(CPUHPPAState *env) (long long) virt_page, phys_page, len, slot); if (slot < HPPA_BTLB_ENTRIES) { btlb = &env->tlb[slot]; - /* force flush of possibly existing BTLB entry */ + + /* Force flush of possibly existing BTLB entry. */ hppa_flush_tlb_ent(env, btlb, true); - /* create new BTLB entry */ + + /* Create new BTLB entry */ btlb->itree.start = virt_page << TARGET_PAGE_BITS; btlb->itree.last = btlb->itree.start + len * TARGET_PAGE_SIZE - 1; btlb->pa = phys_page << TARGET_PAGE_BITS;