From f967db0b9ed44ec3057a28f3b28efc51df51b835 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 27 Jun 2018 08:13:46 -0600 Subject: [PATCH 1/3] x86/mm: Disable ioremap free page handling on x86-PAE ioremap() supports pmd mappings on x86-PAE. However, kernel's pmd tables are not shared among processes on x86-PAE. Therefore, any update to sync'd pmd entries need re-syncing. Freeing a pte page also leads to a vmalloc fault and hits the BUG_ON in vmalloc_sync_one(). Disable free page handling on x86-PAE. pud_free_pmd_page() and pmd_free_pte_page() simply return 0 if a given pud/pmd entry is present. This assures that ioremap() does not update sync'd pmd entries at the cost of falling back to pte mappings. Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Reported-by: Joerg Roedel Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: cpandya@codeaurora.org Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-2-toshi.kani@hpe.com --- arch/x86/mm/pgtable.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 47b5951e592b..1aeb7a5dbce5 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -719,6 +719,7 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } +#ifdef CONFIG_X86_64 /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. @@ -766,4 +767,22 @@ int pmd_free_pte_page(pmd_t *pmd) return 1; } + +#else /* !CONFIG_X86_64 */ + +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +/* + * Disable free page handling on x86-PAE. This assures that ioremap() + * does not update sync'd pmd entries. See vmalloc_sync_one(). + */ +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} + +#endif /* CONFIG_X86_64 */ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ From 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Wed, 27 Jun 2018 08:13:47 -0600 Subject: [PATCH 2/3] ioremap: Update pgtable free interfaces with addr The following kernel panic was observed on ARM64 platform due to a stale TLB entry. 1. ioremap with 4K size, a valid pte page table is set. 2. iounmap it, its pte entry is set to 0. 3. ioremap the same address with 2M size, update its pmd entry with a new value. 4. CPU may hit an exception because the old pmd entry is still in TLB, which leads to a kernel panic. Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page table") has addressed this panic by falling to pte mappings in the above case on ARM64. To support pmd mappings in all cases, TLB purge needs to be performed in this case on ARM64. Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page() so that TLB purge can be added later in seprate patches. [toshi.kani@hpe.com: merge changes, rewrite patch description] Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Chintan Pandya Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: Will Deacon Cc: Joerg Roedel Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-3-toshi.kani@hpe.com --- arch/arm64/mm/mmu.c | 4 ++-- arch/x86/mm/pgtable.c | 12 +++++++----- include/asm-generic/pgtable.h | 8 ++++---- lib/ioremap.c | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 493ff75670ff..8ae5d7ae4af3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp) return 1; } -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1aeb7a5dbce5..fbd14e506758 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -723,11 +723,12 @@ int pmd_clear_huge(pmd_t *pmd) /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. + * @addr: Virtual address associated with pud. * * Context: The pud range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { pmd_t *pmd; int i; @@ -738,7 +739,7 @@ int pud_free_pmd_page(pud_t *pud) pmd = (pmd_t *)pud_page_vaddr(*pud); for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_free_pte_page(&pmd[i])) + if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE))) return 0; pud_clear(pud); @@ -750,11 +751,12 @@ int pud_free_pmd_page(pud_t *pud) /** * pmd_free_pte_page - Clear pmd entry and free pte page. * @pmd: Pointer to a PMD. + * @addr: Virtual address associated with pmd. * * Context: The pmd range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { pte_t *pte; @@ -770,7 +772,7 @@ int pmd_free_pte_page(pmd_t *pmd) #else /* !CONFIG_X86_64 */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } @@ -779,7 +781,7 @@ int pud_free_pmd_page(pud_t *pud) * Disable free page handling on x86-PAE. This assures that ioremap() * does not update sync'd pmd entries. See vmalloc_sync_one(). */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f59639afaa39..b081794ba135 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1019,8 +1019,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); -int pud_free_pmd_page(pud_t *pud); -int pmd_free_pte_page(pmd_t *pmd); +int pud_free_pmd_page(pud_t *pud, unsigned long addr); +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) { @@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } -static inline int pud_free_pmd_page(pud_t *pud) +static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return 0; } -static inline int pmd_free_pte_page(pmd_t *pmd) +static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return 0; } diff --git a/lib/ioremap.c b/lib/ioremap.c index 54e5bbaa3200..517f5853ffed 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && IS_ALIGNED(phys_addr + addr, PMD_SIZE) && - pmd_free_pte_page(pmd)) { + pmd_free_pte_page(pmd, addr)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && IS_ALIGNED(phys_addr + addr, PUD_SIZE) && - pud_free_pmd_page(pud)) { + pud_free_pmd_page(pud, addr)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } From 5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 27 Jun 2018 08:13:48 -0600 Subject: [PATCH 3/3] x86/mm: Add TLB purge to free pmd/pte page interfaces ioremap() calls pud_free_pmd_page() / pmd_free_pte_page() when it creates a pud / pmd map. The following preconditions are met at their entry. - All pte entries for a target pud/pmd address range have been cleared. - System-wide TLB purges have been peformed for a target pud/pmd address range. The preconditions assure that there is no stale TLB entry for the range. Speculation may not cache TLB entries since it requires all levels of page entries, including ptes, to have P & A-bits set for an associated address. However, speculation may cache pud/pmd entries (paging-structure caches) when they have P-bit set. Add a system-wide TLB purge (INVLPG) to a single page after clearing pud/pmd entry's P-bit. SDM 4.10.4.1, Operation that Invalidate TLBs and Paging-Structure Caches, states that: INVLPG invalidates all paging-structure caches associated with the current PCID regardless of the liner addresses to which they correspond. Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: cpandya@codeaurora.org Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: Joerg Roedel Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-4-toshi.kani@hpe.com --- arch/x86/mm/pgtable.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index fbd14e506758..e3deefb891da 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -725,24 +725,44 @@ int pmd_clear_huge(pmd_t *pmd) * @pud: Pointer to a PUD. * @addr: Virtual address associated with pud. * - * Context: The pud range has been unmaped and TLB purged. + * Context: The pud range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. + * + * NOTE: Callers must allow a single page allocation. */ int pud_free_pmd_page(pud_t *pud, unsigned long addr) { - pmd_t *pmd; + pmd_t *pmd, *pmd_sv; + pte_t *pte; int i; if (pud_none(*pud)) return 1; pmd = (pmd_t *)pud_page_vaddr(*pud); + pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); + if (!pmd_sv) + return 0; - for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE))) - return 0; + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd_sv[i] = pmd[i]; + if (!pmd_none(pmd[i])) + pmd_clear(&pmd[i]); + } pud_clear(pud); + + /* INVLPG to clear all paging-structure caches */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd_sv[i])) { + pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); + free_page((unsigned long)pte); + } + } + + free_page((unsigned long)pmd_sv); free_page((unsigned long)pmd); return 1; @@ -753,7 +773,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) * @pmd: Pointer to a PMD. * @addr: Virtual address associated with pmd. * - * Context: The pmd range has been unmaped and TLB purged. + * Context: The pmd range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) @@ -765,6 +785,10 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) pte = (pte_t *)pmd_page_vaddr(*pmd); pmd_clear(pmd); + + /* INVLPG to clear all paging-structure caches */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + free_page((unsigned long)pte); return 1;