x86/mm: Do not use set_{pud, pmd}_safe() when splitting a large page
The commit0a9fe8ca84
("x86/mm: Validate kernel_physical_mapping_init() PTE population") triggers this warning in SEV guests: WARNING: CPU: 0 PID: 0 at arch/x86/include/asm/pgalloc.h:87 phys_pmd_init+0x30d/0x386 Call Trace: kernel_physical_mapping_init+0xce/0x259 early_set_memory_enc_dec+0x10f/0x160 kvm_smp_prepare_boot_cpu+0x71/0x9d start_kernel+0x1c9/0x50b secondary_startup_64+0xa4/0xb0 A SEV guest calls kernel_physical_mapping_init() to clear the encryption mask from an existing mapping. While doing so, it also splits large pages into smaller. To split a page, kernel_physical_mapping_init() allocates a new page and updates the existing entry. The set_{pud,pmd}_safe() helpers trigger a warning when updating an entry with a page in the present state. Add a new kernel_physical_mapping_change() helper which uses the non-safe variants of set_{pmd,pud,p4d}() and {pmd,pud,p4d}_populate() routines when updating the entry. Since kernel_physical_mapping_change() may replace an existing entry with a new entry, the caller is responsible to flush the TLB at the end. Change early_set_memory_enc_dec() to use kernel_physical_mapping_change() when it wants to clear the memory encryption mask from the page table entry. [ bp: - massage commit message. - flesh out comment according to dhansen's request. - align function arguments at opening brace. ] Fixes:0a9fe8ca84
("x86/mm: Validate kernel_physical_mapping_init() PTE population") Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Dave Hansen <dave.hansen@intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Lendacky <Thomas.Lendacky@amd.com> Cc: x86-ml <x86@kernel.org> Link: https://lkml.kernel.org/r/20190417154102.22613-1-brijesh.singh@amd.com
This commit is contained in:
parent
0e72499c3c
commit
eccd906484
@ -58,6 +58,37 @@
|
||||
|
||||
#include "ident_map.c"
|
||||
|
||||
#define DEFINE_POPULATE(fname, type1, type2, init) \
|
||||
static inline void fname##_init(struct mm_struct *mm, \
|
||||
type1##_t *arg1, type2##_t *arg2, bool init) \
|
||||
{ \
|
||||
if (init) \
|
||||
fname##_safe(mm, arg1, arg2); \
|
||||
else \
|
||||
fname(mm, arg1, arg2); \
|
||||
}
|
||||
|
||||
DEFINE_POPULATE(p4d_populate, p4d, pud, init)
|
||||
DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
|
||||
DEFINE_POPULATE(pud_populate, pud, pmd, init)
|
||||
DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
|
||||
|
||||
#define DEFINE_ENTRY(type1, type2, init) \
|
||||
static inline void set_##type1##_init(type1##_t *arg1, \
|
||||
type2##_t arg2, bool init) \
|
||||
{ \
|
||||
if (init) \
|
||||
set_##type1##_safe(arg1, arg2); \
|
||||
else \
|
||||
set_##type1(arg1, arg2); \
|
||||
}
|
||||
|
||||
DEFINE_ENTRY(p4d, p4d, init)
|
||||
DEFINE_ENTRY(pud, pud, init)
|
||||
DEFINE_ENTRY(pmd, pmd, init)
|
||||
DEFINE_ENTRY(pte, pte, init)
|
||||
|
||||
|
||||
/*
|
||||
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
|
||||
* physical space so we can cache the place of the first one and move
|
||||
@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
|
||||
*/
|
||||
static unsigned long __meminit
|
||||
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
|
||||
pgprot_t prot)
|
||||
pgprot_t prot, bool init)
|
||||
{
|
||||
unsigned long pages = 0, paddr_next;
|
||||
unsigned long paddr_last = paddr_end;
|
||||
@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
set_pte_safe(pte, __pte(0));
|
||||
set_pte_init(pte, __pte(0), init);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
|
||||
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
|
||||
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
|
||||
pages++;
|
||||
set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
|
||||
set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
|
||||
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
|
||||
}
|
||||
|
||||
@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
|
||||
*/
|
||||
static unsigned long __meminit
|
||||
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
||||
unsigned long page_size_mask, pgprot_t prot)
|
||||
unsigned long page_size_mask, pgprot_t prot, bool init)
|
||||
{
|
||||
unsigned long pages = 0, paddr_next;
|
||||
unsigned long paddr_last = paddr_end;
|
||||
@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
set_pmd_safe(pmd, __pmd(0));
|
||||
set_pmd_init(pmd, __pmd(0), init);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte = (pte_t *)pmd_page_vaddr(*pmd);
|
||||
paddr_last = phys_pte_init(pte, paddr,
|
||||
paddr_end, prot);
|
||||
paddr_end, prot,
|
||||
init);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
continue;
|
||||
}
|
||||
@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
||||
if (page_size_mask & (1<<PG_LEVEL_2M)) {
|
||||
pages++;
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
set_pte_safe((pte_t *)pmd,
|
||||
pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
|
||||
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
|
||||
set_pte_init((pte_t *)pmd,
|
||||
pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
|
||||
__pgprot(pgprot_val(prot) | _PAGE_PSE)),
|
||||
init);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
paddr_last = paddr_next;
|
||||
continue;
|
||||
}
|
||||
|
||||
pte = alloc_low_page();
|
||||
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
|
||||
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_populate_kernel_safe(&init_mm, pmd, pte);
|
||||
pmd_populate_kernel_init(&init_mm, pmd, pte, init);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
update_page_count(PG_LEVEL_2M, pages);
|
||||
@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
||||
*/
|
||||
static unsigned long __meminit
|
||||
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
unsigned long page_size_mask)
|
||||
unsigned long page_size_mask, bool init)
|
||||
{
|
||||
unsigned long pages = 0, paddr_next;
|
||||
unsigned long paddr_last = paddr_end;
|
||||
@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
set_pud_safe(pud, __pud(0));
|
||||
set_pud_init(pud, __pud(0), init);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
paddr_last = phys_pmd_init(pmd, paddr,
|
||||
paddr_end,
|
||||
page_size_mask,
|
||||
prot);
|
||||
prot, init);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
@ -610,9 +643,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
if (page_size_mask & (1<<PG_LEVEL_1G)) {
|
||||
pages++;
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
set_pte_safe((pte_t *)pud,
|
||||
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
|
||||
PAGE_KERNEL_LARGE));
|
||||
set_pte_init((pte_t *)pud,
|
||||
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
|
||||
PAGE_KERNEL_LARGE),
|
||||
init);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
paddr_last = paddr_next;
|
||||
continue;
|
||||
@ -620,10 +654,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
|
||||
pmd = alloc_low_page();
|
||||
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
|
||||
page_size_mask, prot);
|
||||
page_size_mask, prot, init);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_populate_safe(&init_mm, pud, pmd);
|
||||
pud_populate_init(&init_mm, pud, pmd, init);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
@ -634,14 +668,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
|
||||
static unsigned long __meminit
|
||||
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
|
||||
unsigned long page_size_mask)
|
||||
unsigned long page_size_mask, bool init)
|
||||
{
|
||||
unsigned long paddr_next, paddr_last = paddr_end;
|
||||
unsigned long vaddr = (unsigned long)__va(paddr);
|
||||
int i = p4d_index(vaddr);
|
||||
|
||||
if (!pgtable_l5_enabled())
|
||||
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
|
||||
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
|
||||
page_size_mask, init);
|
||||
|
||||
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
|
||||
p4d_t *p4d;
|
||||
@ -657,39 +692,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
set_p4d_safe(p4d, __p4d(0));
|
||||
set_p4d_init(p4d, __p4d(0), init);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!p4d_none(*p4d)) {
|
||||
pud = pud_offset(p4d, 0);
|
||||
paddr_last = phys_pud_init(pud, paddr,
|
||||
paddr_end,
|
||||
page_size_mask);
|
||||
paddr_last = phys_pud_init(pud, paddr, paddr_end,
|
||||
page_size_mask, init);
|
||||
continue;
|
||||
}
|
||||
|
||||
pud = alloc_low_page();
|
||||
paddr_last = phys_pud_init(pud, paddr, paddr_end,
|
||||
page_size_mask);
|
||||
page_size_mask, init);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
p4d_populate_safe(&init_mm, p4d, pud);
|
||||
p4d_populate_init(&init_mm, p4d, pud, init);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
return paddr_last;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create page table mapping for the physical memory for specific physical
|
||||
* addresses. The virtual and physical addresses have to be aligned on PMD level
|
||||
* down. It returns the last physical address mapped.
|
||||
*/
|
||||
unsigned long __meminit
|
||||
kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
unsigned long paddr_end,
|
||||
unsigned long page_size_mask)
|
||||
static unsigned long __meminit
|
||||
__kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
unsigned long paddr_end,
|
||||
unsigned long page_size_mask,
|
||||
bool init)
|
||||
{
|
||||
bool pgd_changed = false;
|
||||
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
|
||||
@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
|
||||
paddr_last = phys_p4d_init(p4d, __pa(vaddr),
|
||||
__pa(vaddr_end),
|
||||
page_size_mask);
|
||||
page_size_mask,
|
||||
init);
|
||||
continue;
|
||||
}
|
||||
|
||||
p4d = alloc_low_page();
|
||||
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
|
||||
page_size_mask);
|
||||
page_size_mask, init);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
if (pgtable_l5_enabled())
|
||||
pgd_populate_safe(&init_mm, pgd, p4d);
|
||||
pgd_populate_init(&init_mm, pgd, p4d, init);
|
||||
else
|
||||
p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
|
||||
p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
|
||||
(pud_t *) p4d, init);
|
||||
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pgd_changed = true;
|
||||
}
|
||||
@ -732,6 +765,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
return paddr_last;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Create page table mapping for the physical memory for specific physical
|
||||
* addresses. Note that it can only be used to populate non-present entries.
|
||||
* The virtual and physical addresses have to be aligned on PMD level
|
||||
* down. It returns the last physical address mapped.
|
||||
*/
|
||||
unsigned long __meminit
|
||||
kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
unsigned long paddr_end,
|
||||
unsigned long page_size_mask)
|
||||
{
|
||||
return __kernel_physical_mapping_init(paddr_start, paddr_end,
|
||||
page_size_mask, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is similar to kernel_physical_mapping_init() above with the
|
||||
* exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
|
||||
* when updating the mapping. The caller is responsible to flush the TLBs after
|
||||
* the function returns.
|
||||
*/
|
||||
unsigned long __meminit
|
||||
kernel_physical_mapping_change(unsigned long paddr_start,
|
||||
unsigned long paddr_end,
|
||||
unsigned long page_size_mask)
|
||||
{
|
||||
return __kernel_physical_mapping_init(paddr_start, paddr_end,
|
||||
page_size_mask, false);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NUMA
|
||||
void __init initmem_init(void)
|
||||
{
|
||||
|
@ -301,9 +301,13 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
|
||||
else
|
||||
split_page_size_mask = 1 << PG_LEVEL_2M;
|
||||
|
||||
kernel_physical_mapping_init(__pa(vaddr & pmask),
|
||||
__pa((vaddr_end & pmask) + psize),
|
||||
split_page_size_mask);
|
||||
/*
|
||||
* kernel_physical_mapping_change() does not flush the TLBs, so
|
||||
* a TLB flush is required after we exit from the for loop.
|
||||
*/
|
||||
kernel_physical_mapping_change(__pa(vaddr & pmask),
|
||||
__pa((vaddr_end & pmask) + psize),
|
||||
split_page_size_mask);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
|
||||
unsigned long kernel_physical_mapping_init(unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long page_size_mask);
|
||||
unsigned long kernel_physical_mapping_change(unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long page_size_mask);
|
||||
void zone_sizes_init(void);
|
||||
|
||||
extern int after_bootmem;
|
||||
|
Loading…
Reference in New Issue
Block a user