powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP

This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required
page table functions.

This enables huge (2MB and 1GB) ioremap mappings. I don't have a
benchmark for this change, but huge vmap will be used by a later core
kernel change to enable huge vmalloc memory mappings. This improves
cached `git diff` performance by about 5% on a 2-node POWER9 with 32MB
size dentry cache hash.

  Profiling git diff dTLB misses with a vanilla kernel:

  81.75%  git      [kernel.vmlinux]    [k] __d_lookup_rcu
   7.21%  git      [kernel.vmlinux]    [k] strncpy_from_user
   1.77%  git      [kernel.vmlinux]    [k] find_get_entry
   1.59%  git      [kernel.vmlinux]    [k] kmem_cache_free

            40,168      dTLB-miss
       0.100342754 seconds time elapsed

  With powerpc huge vmalloc:

             2,987      dTLB-miss
       0.095933138 seconds time elapsed

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
Nicholas Piggin 2019-06-10 13:08:18 +10:00 committed by Michael Ellerman
parent d38153f9cc
commit d909f9109c
4 changed files with 110 additions and 1 deletions

View File

@ -2927,7 +2927,7 @@
register save and restore. The kernel will only save
legacy floating-point registers on task switch.
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
nohugeiomap [KNL,x86,PPC] Disable kernel huge I/O mappings.
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.

View File

@ -167,6 +167,7 @@ config PPC
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if PPC32
select HAVE_ARCH_KGDB

View File

@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
#define VMALLOC_START __vmalloc_start
#define VMALLOC_END __vmalloc_end
static inline unsigned int ioremap_max_order(void)
{
if (radix_enabled())
return PUD_SHIFT;
return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
}
#define IOREMAP_MAX_ORDER ioremap_max_order()
extern unsigned long __kernel_virt_start;
extern unsigned long __kernel_virt_size;
extern unsigned long __kernel_io_start;

View File

@ -1124,6 +1124,106 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
}
int __init arch_ioremap_pud_supported(void)
{
/* HPT does not cope with large pages in the vmalloc area */
return radix_enabled();
}
int __init arch_ioremap_pmd_supported(void)
{
return radix_enabled();
}
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
{
return 0;
}
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
pte_t *ptep = (pte_t *)pud;
pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
if (!radix_enabled())
return 0;
set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
return 1;
}
int pud_clear_huge(pud_t *pud)
{
if (pud_huge(*pud)) {
pud_clear(pud);
return 1;
}
return 0;
}
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
pmd_t *pmd;
int i;
pmd = (pmd_t *)pud_page_vaddr(*pud);
pud_clear(pud);
flush_tlb_kernel_range(addr, addr + PUD_SIZE);
for (i = 0; i < PTRS_PER_PMD; i++) {
if (!pmd_none(pmd[i])) {
pte_t *pte;
pte = (pte_t *)pmd_page_vaddr(pmd[i]);
pte_free_kernel(&init_mm, pte);
}
}
pmd_free(&init_mm, pmd);
return 1;
}
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
pte_t *ptep = (pte_t *)pmd;
pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
if (!radix_enabled())
return 0;
set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
return 1;
}
int pmd_clear_huge(pmd_t *pmd)
{
if (pmd_huge(*pmd)) {
pmd_clear(pmd);
return 1;
}
return 0;
}
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
pte_t *pte;
pte = (pte_t *)pmd_page_vaddr(*pmd);
pmd_clear(pmd);
flush_tlb_kernel_range(addr, addr + PMD_SIZE);
pte_free_kernel(&init_mm, pte);
return 1;
}
int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
pgprot_t prot, int nid)
{