linux/arch/powerpc/include/asm/tlbflush.h
Aneesh Kumar K.V aefa5688c0 powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault
upatepp can get called for a nohpte fault when we find from the linux
page table that the translation was hashed before. In that case
we are sure that there is no existing translation, hence we could
avoid doing tlbie.

We could possibly race with a parallel fault filling the TLB. But
that should be ok because updatepp is only ever relaxing permissions.
We also look at linux pte permission bits when filling hash pte
permission bits. We also hold the linux pte busy bits while
inserting/updating a hashpte entry, hence a paralle update of
linux pte is not possible. On the other hand mprotect involves
ptep_modify_prot_start which cause a hpte invalidate and not updatepp.

Performance number:
We use randbox_access_bench written by Anton.

Kernel with THP disabled and smaller hash page table size.

    86.60%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_updatepp
     2.10%  random_access_b  random_access_bench              [.] doit
     1.99%  random_access_b  [kernel.kallsyms]                [k] .do_raw_spin_lock
     1.85%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_insert
     1.26%  random_access_b  [kernel.kallsyms]                [k] .native_flush_hash_range
     1.18%  random_access_b  [kernel.kallsyms]                [k] .__delay
     0.69%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_remove
     0.37%  random_access_b  [kernel.kallsyms]                [k] .clear_user_page
     0.34%  random_access_b  [kernel.kallsyms]                [k] .__hash_page_64K
     0.32%  random_access_b  [kernel.kallsyms]                [k] fast_exception_return
     0.30%  random_access_b  [kernel.kallsyms]                [k] .hash_page_mm

With Fix:

    27.54%  random_access_b  random_access_bench              [.] doit
    22.90%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_insert
     5.76%  random_access_b  [kernel.kallsyms]                [k] .native_hpte_remove
     5.20%  random_access_b  [kernel.kallsyms]                [k] fast_exception_return
     5.12%  random_access_b  [kernel.kallsyms]                [k] .__hash_page_64K
     4.80%  random_access_b  [kernel.kallsyms]                [k] .hash_page_mm
     3.31%  random_access_b  [kernel.kallsyms]                [k] data_access_common
     1.84%  random_access_b  [kernel.kallsyms]                [k] .trace_hardirqs_on_caller

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2014-12-05 16:26:15 +11:00

178 lines
5.0 KiB
C

#ifndef _ASM_POWERPC_TLBFLUSH_H
#define _ASM_POWERPC_TLBFLUSH_H
/*
* TLB flushing:
*
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
* - local_flush_tlb_mm(mm, full) flushes the specified mm context on
* the local processor
* - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor
* - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifdef __KERNEL__
#ifdef CONFIG_PPC_MMU_NOHASH
/*
* TLB flushing for software loaded TLB chips
*
* TODO: (CONFIG_FSL_BOOKE) determine if flush_tlb_range &
* flush_tlb_kernel_range are best implemented as tlbia vs
* specific tlbie's
*/
struct vm_area_struct;
struct mm_struct;
#define MMU_NO_CONTEXT ((unsigned int)-1)
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void local_flush_tlb_mm(struct mm_struct *mm);
extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
int tsize, int ind);
#ifdef CONFIG_SMP
extern void flush_tlb_mm(struct mm_struct *mm);
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
int tsize, int ind);
#else
#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr)
#define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i)
#endif
#define flush_tlb_page_nohash(vma,addr) flush_tlb_page(vma,addr)
#elif defined(CONFIG_PPC_STD_MMU_32)
/*
* TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
*/
extern void flush_tlb_mm(struct mm_struct *mm);
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr);
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
static inline void local_flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
flush_tlb_page(vma, vmaddr);
}
static inline void local_flush_tlb_mm(struct mm_struct *mm)
{
flush_tlb_mm(mm);
}
#elif defined(CONFIG_PPC_STD_MMU_64)
#define MMU_NO_CONTEXT 0
/*
* TLB flushing for 64-bit hash-MMU CPUs
*/
#include <linux/percpu.h>
#include <asm/page.h>
#define PPC64_TLB_BATCH_NR 192
struct ppc64_tlb_batch {
int active;
unsigned long index;
struct mm_struct *mm;
real_pte_t pte[PPC64_TLB_BATCH_NR];
unsigned long vpn[PPC64_TLB_BATCH_NR];
unsigned int psize;
int ssize;
};
DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
{
struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
static inline void arch_leave_lazy_mmu_mode(void)
{
struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->index)
__flush_tlb_pending(batch);
batch->active = 0;
}
#define arch_flush_lazy_mmu_mode() do {} while (0)
extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
int ssize, unsigned long flags);
extern void flush_hash_range(unsigned long number, int local);
extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
pmd_t *pmdp, unsigned int psize, int ssize,
unsigned long flags);
static inline void local_flush_tlb_mm(struct mm_struct *mm)
{
}
static inline void flush_tlb_mm(struct mm_struct *mm)
{
}
static inline void local_flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
}
static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
unsigned long vmaddr)
{
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
}
/* Private function for use by PCI IO mapping code */
extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
unsigned long end);
extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr);
#else
#error Unsupported MMU type
#endif
#endif /*__KERNEL__ */
#endif /* _ASM_POWERPC_TLBFLUSH_H */