Merge branch 'x86/mm' into efi/core, to pick up dependencies

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2020-01-10 18:53:14 +01:00
commit 57ad87ddce
67 changed files with 795 additions and 679 deletions

View File

@ -0,0 +1,4 @@
#ifndef _ASM_ALPHA_VMALLOC_H
#define _ASM_ALPHA_VMALLOC_H
#endif /* _ASM_ALPHA_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_ARC_VMALLOC_H
#define _ASM_ARC_VMALLOC_H
#endif /* _ASM_ARC_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_ARM_VMALLOC_H
#define _ASM_ARM_VMALLOC_H
#endif /* _ASM_ARM_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_ARM64_VMALLOC_H
#define _ASM_ARM64_VMALLOC_H
#endif /* _ASM_ARM64_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_C6X_VMALLOC_H
#define _ASM_C6X_VMALLOC_H
#endif /* _ASM_C6X_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_CSKY_VMALLOC_H
#define _ASM_CSKY_VMALLOC_H
#endif /* _ASM_CSKY_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_H8300_VMALLOC_H
#define _ASM_H8300_VMALLOC_H
#endif /* _ASM_H8300_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_HEXAGON_VMALLOC_H
#define _ASM_HEXAGON_VMALLOC_H
#endif /* _ASM_HEXAGON_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_IA64_VMALLOC_H
#define _ASM_IA64_VMALLOC_H
#endif /* _ASM_IA64_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_M68K_VMALLOC_H
#define _ASM_M68K_VMALLOC_H
#endif /* _ASM_M68K_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_MICROBLAZE_VMALLOC_H
#define _ASM_MICROBLAZE_VMALLOC_H
#endif /* _ASM_MICROBLAZE_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_MIPS_VMALLOC_H
#define _ASM_MIPS_VMALLOC_H
#endif /* _ASM_MIPS_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_NDS32_VMALLOC_H
#define _ASM_NDS32_VMALLOC_H
#endif /* _ASM_NDS32_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_NIOS2_VMALLOC_H
#define _ASM_NIOS2_VMALLOC_H
#endif /* _ASM_NIOS2_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_OPENRISC_VMALLOC_H
#define _ASM_OPENRISC_VMALLOC_H
#endif /* _ASM_OPENRISC_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_PARISC_VMALLOC_H
#define _ASM_PARISC_VMALLOC_H
#endif /* _ASM_PARISC_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_POWERPC_VMALLOC_H
#define _ASM_POWERPC_VMALLOC_H
#endif /* _ASM_POWERPC_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_RISCV_VMALLOC_H
#define _ASM_RISCV_VMALLOC_H
#endif /* _ASM_RISCV_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_S390_VMALLOC_H
#define _ASM_S390_VMALLOC_H
#endif /* _ASM_S390_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_SH_VMALLOC_H
#define _ASM_SH_VMALLOC_H
#endif /* _ASM_SH_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_SPARC_VMALLOC_H
#define _ASM_SPARC_VMALLOC_H
#endif /* _ASM_SPARC_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_UM_VMALLOC_H
#define _ASM_UM_VMALLOC_H
#endif /* _ASM_UM_VMALLOC_H */

View File

@ -0,0 +1,4 @@
#ifndef _ASM_UNICORE32_VMALLOC_H
#define _ASM_UNICORE32_VMALLOC_H
#endif /* _ASM_UNICORE32_VMALLOC_H */

View File

@ -1512,7 +1512,7 @@ config X86_CPA_STATISTICS
bool "Enable statistic for Change Page Attribute"
depends on DEBUG_FS
---help---
Expose statistics about the Change Page Attribute mechanims, which
Expose statistics about the Change Page Attribute mechanism, which
helps to determine the effectiveness of preserving large and huge
page mappings when mapping protections are changed.

View File

@ -6,6 +6,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
#include <asm/pgtable_areas.h>
#ifdef CONFIG_X86_64
@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
/* Single page reserved for the readonly IDT mapping: */
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
#define CPU_ENTRY_AREA_MAP_SIZE \
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
static inline struct entry_stack *cpu_entry_stack(int cpu)

View File

@ -0,0 +1,27 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_MEMTYPE_H
#define _ASM_X86_MEMTYPE_H
#include <linux/types.h>
#include <asm/pgtable_types.h>
extern bool pat_enabled(void);
extern void pat_disable(const char *reason);
extern void pat_init(void);
extern void init_cache_modes(void);
extern int memtype_reserve(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
extern int memtype_free(u64 start, u64 end);
extern int memtype_kernel_map_sync(u64 base, unsigned long size,
enum page_cache_mode pcm);
extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
enum page_cache_mode *pcm);
extern void memtype_free_io(resource_size_t start, resource_size_t end);
extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
#endif /* _ASM_X86_MEMTYPE_H */

View File

@ -69,14 +69,6 @@ struct ldt_struct {
int slot;
};
/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
static inline void *ldt_slot_va(int slot)
{
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
}
/*
* Used for LDT copy/destruction.
*/
@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
extern void load_mm_ldt(struct mm_struct *mm);
extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
#else
static inline void load_mm_ldt(struct mm_struct *mm)
{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
/* READ_ONCE synchronizes with smp_store_release */
ldt = READ_ONCE(mm->context.ldt);
/*
* Any change to mm->context.ldt is followed by an IPI to all
* CPUs with the mm active. The LDT will not be freed until
* after the IPI is handled by all such CPUs. This means that,
* if the ldt_struct changes before we return, the values we see
* will be safe, and the new values will be loaded before we run
* any user code.
*
* NB: don't try to convert this to use RCU without extreme care.
* We would still need IRQs off, because we don't want to change
* the local LDT after an IPI loaded a newer value than the one
* that we can see.
*/
if (unlikely(ldt)) {
if (static_cpu_has(X86_FEATURE_PTI)) {
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
/*
* Whoops -- either the new LDT isn't mapped
* (if slot == -1) or is mapped into a bogus
* slot (if slot > 1).
*/
clear_LDT();
return;
}
/*
* If page table isolation is enabled, ldt->entries
* will not be mapped in the userspace pagetables.
* Tell the CPU to access the LDT through the alias
* at ldt_slot_va(ldt->slot).
*/
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
} else {
set_ldt(ldt->entries, ldt->nr_entries);
}
} else {
clear_LDT();
}
#else
clear_LDT();
#endif
}
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* Load the LDT if either the old or new mm had an LDT.
*
* An mm will never go from having an LDT to not having an LDT. Two
* mms never share an LDT, so we don't gain anything by checking to
* see whether the LDT changed. There's also no guarantee that
* prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
* then prev->context.ldt will also be non-NULL.
*
* If we really cared, we could optimize the case where prev == next
* and we're exiting lazy mode. Most of the time, if this happens,
* we don't actually need to reload LDTR, but modify_ldt() is mostly
* used by legacy code and emulators where we don't need this level of
* performance.
*
* This uses | instead of || because it generates better code.
*/
if (unlikely((unsigned long)prev->context.ldt |
(unsigned long)next->context.ldt))
load_mm_ldt(next);
#endif
DEBUG_LOCKS_WARN_ON(preemptible());
}
#endif
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
/*
* Init a new mm. Used on mm copies, like at fork()

View File

@ -24,7 +24,7 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
#include <asm/pat.h>
#include <asm/memtype.h>
/*
@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
}
static inline void mtrr_bp_init(void)
{
pat_disable("MTRRs disabled, skipping PAT initialization too.");
pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
}
#define mtrr_ap_init() do {} while (0)

View File

@ -1,27 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PAT_H
#define _ASM_X86_PAT_H
#include <linux/types.h>
#include <asm/pgtable_types.h>
bool pat_enabled(void);
void pat_disable(const char *reason);
extern void pat_init(void);
extern void init_cache_modes(void);
extern int reserve_memtype(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
extern int free_memtype(u64 start, u64 end);
extern int kernel_map_sync_memtype(u64 base, unsigned long size,
enum page_cache_mode pcm);
int io_reserve_memtype(resource_size_t start, resource_size_t end,
enum page_cache_mode *pcm);
void io_free_memtype(resource_size_t start, resource_size_t end);
bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
#endif /* _ASM_X86_PAT_H */

View File

@ -9,7 +9,7 @@
#include <linux/scatterlist.h>
#include <linux/numa.h>
#include <asm/io.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/x86_init.h>
struct pci_sysdata {

View File

@ -0,0 +1,53 @@
#ifndef _ASM_X86_PGTABLE_32_AREAS_H
#define _ASM_X86_PGTABLE_32_AREAS_H
#include <asm/cpu_entry_area.h>
/*
* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the
* physical memory until the kernel virtual memory starts. That means that
* any out-of-bounds memory accesses will hopefully be caught.
* The vmalloc() routines leaves a hole of 4kB between each vmalloced
* area for the same reason. ;)
*/
#define VMALLOC_OFFSET (8 * 1024 * 1024)
#ifndef __ASSEMBLY__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
#ifdef CONFIG_X86_PAE
#define LAST_PKMAP 512
#else
#define LAST_PKMAP 1024
#endif
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE \
((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
#define LDT_BASE_ADDR \
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
#define PKMAP_BASE \
((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
#else
# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
#endif
#define MODULES_VADDR VMALLOC_START
#define MODULES_END VMALLOC_END
#define MODULES_LEN (MODULES_VADDR - MODULES_END)
#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
#endif /* _ASM_X86_PGTABLE_32_AREAS_H */

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PGTABLE_32_DEFS_H
#define _ASM_X86_PGTABLE_32_DEFS_H
#ifndef _ASM_X86_PGTABLE_32_TYPES_H
#define _ASM_X86_PGTABLE_32_TYPES_H
/*
* The Linux x86 paging architecture is 'compile-time dual-mode', it
@ -20,55 +20,4 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the
* physical memory until the kernel virtual memory starts. That means that
* any out-of-bounds memory accesses will hopefully be caught.
* The vmalloc() routines leaves a hole of 4kB between each vmalloced
* area for the same reason. ;)
*/
#define VMALLOC_OFFSET (8 * 1024 * 1024)
#ifndef __ASSEMBLY__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
#ifdef CONFIG_X86_PAE
#define LAST_PKMAP 512
#else
#define LAST_PKMAP 1024
#endif
/*
* This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
* Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
* to avoid include recursion hell.
*/
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE \
((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
#define LDT_BASE_ADDR \
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
#define PKMAP_BASE \
((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
#else
# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
#endif
#define MODULES_VADDR VMALLOC_START
#define MODULES_END VMALLOC_END
#define MODULES_LEN (MODULES_VADDR - MODULES_END)
#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
#endif /* _ASM_X86_PGTABLE_32_TYPES_H */

View File

@ -0,0 +1,16 @@
#ifndef _ASM_X86_PGTABLE_AREAS_H
#define _ASM_X86_PGTABLE_AREAS_H
#ifdef CONFIG_X86_32
# include <asm/pgtable_32_areas.h>
#endif
/* Single page reserved for the readonly IDT mapping: */
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
#endif /* _ASM_X86_PGTABLE_AREAS_H */

View File

@ -110,11 +110,6 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
_PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \
_PAGE_ACCESSED | _PAGE_DIRTY)
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
@ -136,80 +131,93 @@
*/
#ifndef __ASSEMBLY__
enum page_cache_mode {
_PAGE_CACHE_MODE_WB = 0,
_PAGE_CACHE_MODE_WC = 1,
_PAGE_CACHE_MODE_WB = 0,
_PAGE_CACHE_MODE_WC = 1,
_PAGE_CACHE_MODE_UC_MINUS = 2,
_PAGE_CACHE_MODE_UC = 3,
_PAGE_CACHE_MODE_WT = 4,
_PAGE_CACHE_MODE_WP = 5,
_PAGE_CACHE_MODE_NUM = 8
_PAGE_CACHE_MODE_UC = 3,
_PAGE_CACHE_MODE_WT = 4,
_PAGE_CACHE_MODE_WP = 5,
_PAGE_CACHE_MODE_NUM = 8
};
#endif
#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_NX)
#define __PP _PAGE_PRESENT
#define __RW _PAGE_RW
#define _USR _PAGE_USER
#define ___A _PAGE_ACCESSED
#define ___D _PAGE_DIRTY
#define ___G _PAGE_GLOBAL
#define __NX _PAGE_NX
#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
_PAGE_USER | _PAGE_ACCESSED)
#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_NX)
#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED)
#define PAGE_COPY PAGE_COPY_NOEXEC
#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_NX)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED)
#define _ENC _PAGE_ENC
#define __WP _PAGE_CACHE_WP
#define __NC _PAGE_NOCACHE
#define _PSE _PAGE_PSE
#define __PAGE_KERNEL_EXEC \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
#define __pg(x) __pgprot(x)
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G)
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
#define __PAGE_KERNEL_IO __PAGE_KERNEL
#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
#ifndef __ASSEMBLY__
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0)
#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
_PAGE_DIRTY | _PAGE_ENC)
#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask)
#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC)
#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0)
#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC)
#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC)
#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
#endif /* __ASSEMBLY__ */
@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte)
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
extern uint8_t __pte2cachemode_tbl[8];

View File

@ -0,0 +1,6 @@
#ifndef _ASM_X86_VMALLOC_H
#define _ASM_X86_VMALLOC_H
#include <asm/pgtable_areas.h>
#endif /* _ASM_X86_VMALLOC_H */

View File

@ -49,7 +49,7 @@
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
#include <asm/intel-family.h>

View File

@ -15,7 +15,7 @@
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include "mtrr.h"

View File

@ -52,7 +52,7 @@
#include <asm/e820/api.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include "mtrr.h"

View File

@ -4,7 +4,7 @@
*/
#include <linux/cpu.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/apic.h>
#include <asm/processor.h>

View File

@ -7,7 +7,7 @@
#include <linux/cpu.h>
#include <asm/apic.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/processor.h>
#include "cpu.h"

View File

@ -28,6 +28,89 @@
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
#include <asm/pgtable_areas.h>
/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
static inline void *ldt_slot_va(int slot)
{
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
}
void load_mm_ldt(struct mm_struct *mm)
{
struct ldt_struct *ldt;
/* READ_ONCE synchronizes with smp_store_release */
ldt = READ_ONCE(mm->context.ldt);
/*
* Any change to mm->context.ldt is followed by an IPI to all
* CPUs with the mm active. The LDT will not be freed until
* after the IPI is handled by all such CPUs. This means that,
* if the ldt_struct changes before we return, the values we see
* will be safe, and the new values will be loaded before we run
* any user code.
*
* NB: don't try to convert this to use RCU without extreme care.
* We would still need IRQs off, because we don't want to change
* the local LDT after an IPI loaded a newer value than the one
* that we can see.
*/
if (unlikely(ldt)) {
if (static_cpu_has(X86_FEATURE_PTI)) {
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
/*
* Whoops -- either the new LDT isn't mapped
* (if slot == -1) or is mapped into a bogus
* slot (if slot > 1).
*/
clear_LDT();
return;
}
/*
* If page table isolation is enabled, ldt->entries
* will not be mapped in the userspace pagetables.
* Tell the CPU to access the LDT through the alias
* at ldt_slot_va(ldt->slot).
*/
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
} else {
set_ldt(ldt->entries, ldt->nr_entries);
}
} else {
clear_LDT();
}
}
void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
/*
* Load the LDT if either the old or new mm had an LDT.
*
* An mm will never go from having an LDT to not having an LDT. Two
* mms never share an LDT, so we don't gain anything by checking to
* see whether the LDT changed. There's also no guarantee that
* prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
* then prev->context.ldt will also be non-NULL.
*
* If we really cared, we could optimize the case where prev == next
* and we're exiting lazy mode. Most of the time, if this happens,
* we don't actually need to reload LDTR, but modify_ldt() is mostly
* used by legacy code and emulators where we don't need this level of
* performance.
*
* This uses | instead of || because it generates better code.
*/
if (unlikely((unsigned long)prev->context.ldt |
(unsigned long)next->context.ldt))
load_mm_ldt(next);
DEBUG_LOCKS_WARN_ON(preemptible());
}
static void refresh_ldt_segments(void)
{

View File

@ -2,130 +2,53 @@
/*
* Copyright (C) 1995 Linus Torvalds
*
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
*
* Memory region support
* David Parsons <orc@pell.chi.il.us>, July-August 1999
*
* Added E820 sanitization routine (removes overlapping memory regions);
* Brian Moyle <bmoyle@mvista.com>, February 2001
*
* Moved CPU detection code to cpu/${cpu}.c
* Patrick Mochel <mochel@osdl.org>, March 2002
*
* Provisions for empty E820 memory regions (reported by certain BIOSes).
* Alex Achenbach <xela@slit.de>, December 2002.
*
* This file contains the setup_arch() code, which handles the architecture-dependent
* parts of early kernel initialization.
*/
/*
* This file handles the architecture-dependent parts of initialization
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/screen_info.h>
#include <linux/ioport.h>
#include <linux/acpi.h>
#include <linux/sfi.h>
#include <linux/apm_bios.h>
#include <linux/initrd.h>
#include <linux/memblock.h>
#include <linux/seq_file.h>
#include <linux/console.h>
#include <linux/root_dev.h>
#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/edd.h>
#include <linux/iscsi_ibft.h>
#include <linux/nodemask.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/dmi.h>
#include <linux/pfn.h>
#include <linux/pci.h>
#include <asm/pci-direct.h>
#include <linux/efi.h>
#include <linux/init_ohci1394_dma.h>
#include <linux/kvm_para.h>
#include <linux/dma-contiguous.h>
#include <xen/xen.h>
#include <linux/initrd.h>
#include <linux/iscsi_ibft.h>
#include <linux/memblock.h>
#include <linux/pci.h>
#include <linux/root_dev.h>
#include <linux/sfi.h>
#include <linux/tboot.h>
#include <linux/usb/xhci-dbgp.h>
#include <uapi/linux/mount.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/delay.h>
#include <xen/xen.h>
#include <linux/kallsyms.h>
#include <linux/cpufreq.h>
#include <linux/dma-mapping.h>
#include <linux/ctype.h>
#include <linux/uaccess.h>
#include <linux/percpu.h>
#include <linux/crash_dump.h>
#include <linux/tboot.h>
#include <linux/jiffies.h>
#include <linux/mem_encrypt.h>
#include <linux/sizes.h>
#include <linux/usb/xhci-dbgp.h>
#include <video/edid.h>
#include <asm/mtrr.h>
#include <asm/apic.h>
#include <asm/realmode.h>
#include <asm/e820/api.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/efi.h>
#include <asm/timer.h>
#include <asm/i8259.h>
#include <asm/sections.h>
#include <asm/io_apic.h>
#include <asm/ist.h>
#include <asm/setup_arch.h>
#include <asm/bios_ebda.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/bugs.h>
#include <asm/kasan.h>
#include <asm/vsyscall.h>
#include <asm/cpu.h>
#include <asm/desc.h>
#include <asm/dma.h>
#include <asm/iommu.h>
#include <asm/efi.h>
#include <asm/gart.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/paravirt.h>
#include <asm/hypervisor.h>
#include <asm/olpc_ofw.h>
#include <asm/percpu.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
#include <asm/amd_nb.h>
#include <asm/mce.h>
#include <asm/alternative.h>
#include <asm/prom.h>
#include <asm/microcode.h>
#include <asm/io_apic.h>
#include <asm/kasan.h>
#include <asm/kaslr.h>
#include <asm/mce.h>
#include <asm/mtrr.h>
#include <asm/olpc_ofw.h>
#include <asm/pci-direct.h>
#include <asm/prom.h>
#include <asm/proto.h>
#include <asm/unwind.h>
#include <asm/vsyscall.h>
#include <linux/vmalloc.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
* max_pfn_mapped: highest direct mapped pfn over 4GB
* max_low_pfn_mapped: highest directly mapped pfn < 4 GB
* max_pfn_mapped: highest directly mapped pfn > 4 GB
*
* The direct mapping only covers E820_TYPE_RAM regions, so the ranges and gaps are
* represented by pfn_mapped
* represented by pfn_mapped[].
*/
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
@ -135,14 +58,23 @@ RESERVE_BRK(dmi_alloc, 65536);
#endif
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
/*
* Range of the BSS area. The size of the BSS area is determined
* at link time, with RESERVE_BRK*() facility reserving additional
* chunks.
*/
static __initdata
unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
struct boot_params boot_params;
/*
* Machine setup..
* These are the four main kernel memory regions, we put them into
* the resource tree so that kdump tools and other debugging tools
* recover it:
*/
static struct resource rodata_resource = {
.name = "Kernel rodata",
.start = 0,
@ -173,16 +105,16 @@ static struct resource bss_resource = {
#ifdef CONFIG_X86_32
/* cpu data as detected by the assembly code in head_32.S */
/* CPU data as detected by the assembly code in head_32.S */
struct cpuinfo_x86 new_cpu_data;
/* common cpu data for all cpus */
/* Common CPU data for all CPUs */
struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
unsigned int def_to_bigsmp;
/* for MCA, but anyone else can use it if they want */
/* For MCA, but anyone else can use it if they want */
unsigned int machine_id;
unsigned int machine_submodel_id;
unsigned int BIOS_revision;
@ -468,15 +400,15 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit.
*
* On 32 bits earlier kernels would limit the kernel to the low 512 MiB
* Earlier 32-bits kernels would limit the kernel to the low 512 MB range
* due to mapping restrictions.
*
* On 64bit, kdump kernel need be restricted to be under 64TB, which is
* 64-bit kdump kernels need to be restricted to be under 64 TB, which is
* the upper limit of system RAM in 4-level paging mode. Since the kdump
* jumping could be from 5-level to 4-level, the jumping will fail if
* kernel is put above 64TB, and there's no way to detect the paging mode
* of the kernel which will be loaded for dumping during the 1st kernel
* bootup.
* jump could be from 5-level paging to 4-level paging, the jump will fail if
* the kernel is put above 64 TB, and during the 1st kernel bootup there's
* no good way to detect the paging mode of the target kernel which will be
* loaded for dumping.
*/
#ifdef CONFIG_X86_32
# define CRASH_ADDR_LOW_MAX SZ_512M
@ -887,7 +819,7 @@ void __init setup_arch(char **cmdline_p)
/*
* Note: Quark X1000 CPUs advertise PGE incorrectly and require
* a cr3 based tlb flush, so the following __flush_tlb_all()
* will not flush anything because the cpu quirk which clears
* will not flush anything because the CPU quirk which clears
* X86_FEATURE_PGE has not been invoked yet. Though due to the
* load_cr3() above the TLB has been flushed already. The
* quirk is invoked before subsequent calls to __flush_tlb_all()

View File

@ -20,7 +20,7 @@
#include <asm/irq.h>
#include <asm/io_apic.h>
#include <asm/hpet.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
#include <asm/mach_traps.h>

View File

@ -40,7 +40,7 @@
#include <linux/kthread.h>
#include <asm/page.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/cmpxchg.h>
#include <asm/e820/api.h>
#include <asm/io.h>

View File

@ -12,8 +12,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
obj-y += pat/
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@ -23,8 +25,6 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)
CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
obj-$(CONFIG_X86_PAT) += pat_interval.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o

View File

@ -29,6 +29,7 @@
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#include <asm/desc.h> /* store_idt(), ... */
#include <asm/cpu_entry_area.h> /* exception stack */
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>

View File

@ -52,6 +52,7 @@
#include <asm/page_types.h>
#include <asm/cpu_entry_area.h>
#include <asm/init.h>
#include <asm/pgtable_areas.h>
#include "mm_internal.h"

View File

@ -4,7 +4,7 @@
*/
#include <asm/iomap.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <linux/export.h>
#include <linux/highmem.h>
@ -26,7 +26,7 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
if (!is_io_mapping_possible(base, size))
return -EINVAL;
ret = io_reserve_memtype(base, base + size, &pcm);
ret = memtype_reserve_io(base, base + size, &pcm);
if (ret)
return ret;
@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(iomap_create_wc);
void iomap_free(resource_size_t base, unsigned long size)
{
io_free_memtype(base, base + size);
memtype_free_io(base, base + size);
}
EXPORT_SYMBOL_GPL(iomap_free);

View File

@ -24,7 +24,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/setup.h>
#include "physaddr.h"
@ -196,10 +196,10 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
phys_addr &= PHYSICAL_PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
pcm, &new_pcm);
if (retval) {
printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
return NULL;
}
@ -255,7 +255,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
if (kernel_map_sync_memtype(phys_addr, size, pcm))
if (memtype_kernel_map_sync(phys_addr, size, pcm))
goto err_free_area;
if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
@ -275,7 +275,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
err_free_area:
free_vm_area(area);
err_free_memtype:
free_memtype(phys_addr, phys_addr + size);
memtype_free(phys_addr, phys_addr + size);
return NULL;
}
@ -451,7 +451,7 @@ void iounmap(volatile void __iomem *addr)
return;
}
free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
/* Finally remove it */
o = remove_vm_area((void __force *)addr);

5
arch/x86/mm/pat/Makefile Normal file
View File

@ -0,0 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := set_memory.o memtype.o
obj-$(CONFIG_X86_PAT) += memtype_interval.o

View File

@ -1,11 +1,34 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Handle caching attributes in page tables (PAT)
* Page Attribute Table (PAT) support: handle memory caching attributes in page tables.
*
* Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Suresh B Siddha <suresh.b.siddha@intel.com>
*
* Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
*
* Basic principles:
*
* PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and
* the kernel to set one of a handful of 'caching type' attributes for physical
* memory ranges: uncached, write-combining, write-through, write-protected,
* and the most commonly used and default attribute: write-back caching.
*
* PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is
* a hardware interface to enumerate a limited number of physical memory ranges
* and set their caching attributes explicitly, programmed into the CPU via MSRs.
* Even modern CPUs have MTRRs enabled - but these are typically not touched
* by the kernel or by user-space (such as the X server), we rely on PAT for any
* additional cache attribute logic.
*
* PAT doesn't work via explicit memory ranges, but uses page table entries to add
* cache attribute information to the mapped memory range: there's 3 bits used,
* (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the
* CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT).
*
* ( There's a metric ton of finer details, such as compatibility with CPU quirks
* that only support 4 types of PAT entries, and interaction with MTRRs, see
* below for details. )
*/
#include <linux/seq_file.h>
@ -29,44 +52,48 @@
#include <asm/mtrr.h>
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/io.h>
#include "pat_internal.h"
#include "mm_internal.h"
#include "memtype.h"
#include "../mm_internal.h"
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
static bool __read_mostly boot_cpu_done;
static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
static bool __read_mostly pat_initialized;
static bool __read_mostly init_cm_done;
static bool __read_mostly pat_bp_enabled;
static bool __read_mostly pat_cm_initialized;
void pat_disable(const char *reason)
/*
* PAT support is enabled by default, but can be disabled for
* various user-requested or hardware-forced reasons:
*/
void pat_disable(const char *msg_reason)
{
if (pat_disabled)
return;
if (boot_cpu_done) {
if (pat_bp_initialized) {
WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
return;
}
pat_disabled = true;
pr_info("x86/PAT: %s\n", reason);
pr_info("x86/PAT: %s\n", msg_reason);
}
static int __init nopat(char *str)
{
pat_disable("PAT support disabled.");
pat_disable("PAT support disabled via boot option.");
return 0;
}
early_param("nopat", nopat);
bool pat_enabled(void)
{
return pat_initialized;
return pat_bp_enabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
@ -197,6 +224,8 @@ static void __init_cache_modes(u64 pat)
char pat_msg[33];
int i;
WARN_ON_ONCE(pat_cm_initialized);
pat_msg[32] = 0;
for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
@ -205,28 +234,28 @@ static void __init_cache_modes(u64 pat)
}
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
init_cm_done = true;
pat_cm_initialized = true;
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
static void pat_bsp_init(u64 pat)
static void pat_bp_init(u64 pat)
{
u64 tmp_pat;
if (!boot_cpu_has(X86_FEATURE_PAT)) {
pat_disable("PAT not supported by CPU.");
pat_disable("PAT not supported by the CPU.");
return;
}
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
pat_disable("PAT MSR is 0, disabled.");
pat_disable("PAT support disabled by the firmware.");
return;
}
wrmsrl(MSR_IA32_CR_PAT, pat);
pat_initialized = true;
pat_bp_enabled = true;
__init_cache_modes(pat);
}
@ -248,7 +277,7 @@ void init_cache_modes(void)
{
u64 pat = 0;
if (init_cm_done)
if (pat_cm_initialized)
return;
if (boot_cpu_has(X86_FEATURE_PAT)) {
@ -291,7 +320,7 @@ void init_cache_modes(void)
}
/**
* pat_init - Initialize PAT MSR and PAT table
* pat_init - Initialize the PAT MSR and PAT table on the current CPU
*
* This function initializes PAT MSR and PAT table with an OS-defined value
* to enable additional cache attributes, WC, WT and WP.
@ -305,6 +334,10 @@ void pat_init(void)
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;
#ifndef CONFIG_X86_PAT
pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
#endif
if (pat_disabled)
return;
@ -364,9 +397,9 @@ void pat_init(void)
PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
}
if (!boot_cpu_done) {
pat_bsp_init(pat);
boot_cpu_done = true;
if (!pat_bp_initialized) {
pat_bp_init(pat);
pat_bp_initialized = true;
} else {
pat_ap_init(pat);
}
@ -542,10 +575,10 @@ static u64 sanitize_phys(u64 address)
* available type in new_type in case of no error. In case of any error
* it will return a negative return value.
*/
int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type,
enum page_cache_mode *new_type)
{
struct memtype *new;
struct memtype *entry_new;
enum page_cache_mode actual_type;
int is_range_ram;
int err = 0;
@ -593,22 +626,22 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
return -EINVAL;
}
new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
if (!entry_new)
return -ENOMEM;
new->start = start;
new->end = end;
new->type = actual_type;
entry_new->start = start;
entry_new->end = end;
entry_new->type = actual_type;
spin_lock(&memtype_lock);
err = memtype_check_insert(new, new_type);
err = memtype_check_insert(entry_new, new_type);
if (err) {
pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
pr_info("x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
cattr_name(new->type), cattr_name(req_type));
kfree(new);
cattr_name(entry_new->type), cattr_name(req_type));
kfree(entry_new);
spin_unlock(&memtype_lock);
return err;
@ -616,18 +649,17 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
spin_unlock(&memtype_lock);
dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
start, end - 1, cattr_name(new->type), cattr_name(req_type),
dprintk("memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
start, end - 1, cattr_name(entry_new->type), cattr_name(req_type),
new_type ? cattr_name(*new_type) : "-");
return err;
}
int free_memtype(u64 start, u64 end)
int memtype_free(u64 start, u64 end)
{
int err = -EINVAL;
int is_range_ram;
struct memtype *entry;
struct memtype *entry_old;
if (!pat_enabled())
return 0;
@ -640,28 +672,24 @@ int free_memtype(u64 start, u64 end)
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
if (is_range_ram == 1) {
err = free_ram_pages_type(start, end);
return err;
} else if (is_range_ram < 0) {
if (is_range_ram == 1)
return free_ram_pages_type(start, end);
if (is_range_ram < 0)
return -EINVAL;
}
spin_lock(&memtype_lock);
entry = memtype_erase(start, end);
entry_old = memtype_erase(start, end);
spin_unlock(&memtype_lock);
if (IS_ERR(entry)) {
if (IS_ERR(entry_old)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
}
kfree(entry);
kfree(entry_old);
dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
dprintk("memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1);
return 0;
}
@ -700,6 +728,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
rettype = _PAGE_CACHE_MODE_UC_MINUS;
spin_unlock(&memtype_lock);
return rettype;
}
@ -723,7 +752,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
/**
* io_reserve_memtype - Request a memory type mapping for a region of memory
* memtype_reserve_io - Request a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
* @type: A pointer to memtype, with requested type. On success, requested
@ -732,7 +761,7 @@ EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
* On success, returns 0
* On failure, returns non-zero
*/
int io_reserve_memtype(resource_size_t start, resource_size_t end,
int memtype_reserve_io(resource_size_t start, resource_size_t end,
enum page_cache_mode *type)
{
resource_size_t size = end - start;
@ -742,47 +771,47 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
WARN_ON_ONCE(iomem_map_sanity_check(start, size));
ret = reserve_memtype(start, end, req_type, &new_type);
ret = memtype_reserve(start, end, req_type, &new_type);
if (ret)
goto out_err;
if (!is_new_memtype_allowed(start, size, req_type, new_type))
goto out_free;
if (kernel_map_sync_memtype(start, size, new_type) < 0)
if (memtype_kernel_map_sync(start, size, new_type) < 0)
goto out_free;
*type = new_type;
return 0;
out_free:
free_memtype(start, end);
memtype_free(start, end);
ret = -EBUSY;
out_err:
return ret;
}
/**
* io_free_memtype - Release a memory type mapping for a region of memory
* memtype_free_io - Release a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
*/
void io_free_memtype(resource_size_t start, resource_size_t end)
void memtype_free_io(resource_size_t start, resource_size_t end)
{
free_memtype(start, end);
memtype_free(start, end);
}
int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
{
enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
return io_reserve_memtype(start, start + size, &type);
return memtype_reserve_io(start, start + size, &type);
}
EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
{
io_free_memtype(start, start + size);
memtype_free_io(start, start + size);
}
EXPORT_SYMBOL(arch_io_free_memtype_wc);
@ -839,10 +868,10 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
}
/*
* Change the memory type for the physial address range in kernel identity
* Change the memory type for the physical address range in kernel identity
* mapping space if that range is a part of identity map.
*/
int kernel_map_sync_memtype(u64 base, unsigned long size,
int memtype_kernel_map_sync(u64 base, unsigned long size,
enum page_cache_mode pcm)
{
unsigned long id_sz;
@ -851,15 +880,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
return 0;
/*
* some areas in the middle of the kernel identity range
* are not mapped, like the PCI space.
* Some areas in the middle of the kernel identity range
* are not mapped, for example the PCI space.
*/
if (!page_is_ram(base >> PAGE_SHIFT))
return 0;
id_sz = (__pa(high_memory-1) <= base + size) ?
__pa(high_memory) - base :
size;
__pa(high_memory) - base : size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
@ -873,7 +901,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
/*
* Internal interface to reserve a range of physical memory with prot.
* Reserved non RAM regions only and after successful reserve_memtype,
* Reserved non RAM regions only and after successful memtype_reserve,
* this func also keeps identity mapping (if any) in sync with this new prot.
*/
static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
@ -910,14 +938,14 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
return 0;
}
ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm);
ret = memtype_reserve(paddr, paddr + size, want_pcm, &pcm);
if (ret)
return ret;
if (pcm != want_pcm) {
if (strict_prot ||
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
free_memtype(paddr, paddr + size);
memtype_free(paddr, paddr + size);
pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
@ -935,8 +963,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
cachemode2protval(pcm));
}
if (kernel_map_sync_memtype(paddr, size, pcm) < 0) {
free_memtype(paddr, paddr + size);
if (memtype_kernel_map_sync(paddr, size, pcm) < 0) {
memtype_free(paddr, paddr + size);
return -EINVAL;
}
return 0;
@ -952,7 +980,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram == 0)
free_memtype(paddr, paddr + size);
memtype_free(paddr, paddr + size);
}
/*
@ -1099,25 +1127,30 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough);
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
/*
* We are allocating a temporary printout-entry to be passed
* between seq_start()/next() and seq_show():
*/
static struct memtype *memtype_get_idx(loff_t pos)
{
struct memtype *print_entry;
struct memtype *entry_print;
int ret;
print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
if (!print_entry)
entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL);
if (!entry_print)
return NULL;
spin_lock(&memtype_lock);
ret = memtype_copy_nth_element(print_entry, pos);
ret = memtype_copy_nth_element(entry_print, pos);
spin_unlock(&memtype_lock);
if (!ret) {
return print_entry;
} else {
kfree(print_entry);
/* Free it on error: */
if (ret) {
kfree(entry_print);
return NULL;
}
return entry_print;
}
static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
@ -1142,11 +1175,14 @@ static void memtype_seq_stop(struct seq_file *seq, void *v)
static int memtype_seq_show(struct seq_file *seq, void *v)
{
struct memtype *print_entry = (struct memtype *)v;
struct memtype *entry_print = (struct memtype *)v;
seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
print_entry->start, print_entry->end);
kfree(print_entry);
seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n",
entry_print->start,
entry_print->end,
cattr_name(entry_print->type));
kfree(entry_print);
return 0;
}
@ -1178,7 +1214,6 @@ static int __init pat_memtype_list_init(void)
}
return 0;
}
late_initcall(pat_memtype_list_init);
#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PAT_INTERNAL_H_
#define __PAT_INTERNAL_H_
#ifndef __MEMTYPE_H_
#define __MEMTYPE_H_
extern int pat_debug_enable;
@ -29,13 +29,13 @@ static inline char *cattr_name(enum page_cache_mode pcm)
}
#ifdef CONFIG_X86_PAT
extern int memtype_check_insert(struct memtype *new,
extern int memtype_check_insert(struct memtype *entry_new,
enum page_cache_mode *new_type);
extern struct memtype *memtype_erase(u64 start, u64 end);
extern struct memtype *memtype_lookup(u64 addr);
extern int memtype_copy_nth_element(struct memtype *out, loff_t pos);
extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos);
#else
static inline int memtype_check_insert(struct memtype *new,
static inline int memtype_check_insert(struct memtype *entry_new,
enum page_cache_mode *new_type)
{ return 0; }
static inline struct memtype *memtype_erase(u64 start, u64 end)
@ -46,4 +46,4 @@ static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ return 0; }
#endif
#endif /* __PAT_INTERNAL_H_ */
#endif /* __MEMTYPE_H_ */

View File

@ -0,0 +1,194 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Handle caching attributes in page tables (PAT)
*
* Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Suresh B Siddha <suresh.b.siddha@intel.com>
*
* Interval tree used to store the PAT memory type reservations.
*/
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/interval_tree_generic.h>
#include <linux/sched.h>
#include <linux/gfp.h>
#include <asm/pgtable.h>
#include <asm/memtype.h>
#include "memtype.h"
/*
* The memtype tree keeps track of memory type for specific
* physical memory areas. Without proper tracking, conflicting memory
* types in different mappings can cause CPU cache corruption.
*
* The tree is an interval tree (augmented rbtree) which tree is ordered
* by the starting address. The tree can contain multiple entries for
* different regions which overlap. All the aliases have the same
* cache attributes of course, as enforced by the PAT logic.
*
* memtype_lock protects the rbtree.
*/
static inline u64 interval_start(struct memtype *entry)
{
return entry->start;
}
static inline u64 interval_end(struct memtype *entry)
{
return entry->end - 1;
}
INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
interval_start, interval_end,
static, interval)
static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
enum {
MEMTYPE_EXACT_MATCH = 0,
MEMTYPE_END_MATCH = 1
};
static struct memtype *memtype_match(u64 start, u64 end, int match_type)
{
struct memtype *entry_match;
entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
while (entry_match != NULL && entry_match->start < end) {
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(entry_match->start == start) && (entry_match->end == end))
return entry_match;
if ((match_type == MEMTYPE_END_MATCH) &&
(entry_match->start < start) && (entry_match->end == end))
return entry_match;
entry_match = interval_iter_next(entry_match, start, end-1);
}
return NULL; /* Returns NULL if there is no match */
}
static int memtype_check_conflict(u64 start, u64 end,
enum page_cache_mode reqtype,
enum page_cache_mode *newtype)
{
struct memtype *entry_match;
enum page_cache_mode found_type = reqtype;
entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
if (entry_match == NULL)
goto success;
if (entry_match->type != found_type && newtype == NULL)
goto failure;
dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end);
found_type = entry_match->type;
entry_match = interval_iter_next(entry_match, start, end-1);
while (entry_match) {
if (entry_match->type != found_type)
goto failure;
entry_match = interval_iter_next(entry_match, start, end-1);
}
success:
if (newtype)
*newtype = found_type;
return 0;
failure:
pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
current->comm, current->pid, start, end,
cattr_name(found_type), cattr_name(entry_match->type));
return -EBUSY;
}
int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type)
{
int err = 0;
err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type);
if (err)
return err;
if (ret_type)
entry_new->type = *ret_type;
interval_insert(entry_new, &memtype_rbroot);
return 0;
}
struct memtype *memtype_erase(u64 start, u64 end)
{
struct memtype *entry_old;
/*
* Since the memtype_rbroot tree allows overlapping ranges,
* memtype_erase() checks with EXACT_MATCH first, i.e. free
* a whole node for the munmap case. If no such entry is found,
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
if (!entry_old) {
entry_old = memtype_match(start, end, MEMTYPE_END_MATCH);
if (!entry_old)
return ERR_PTR(-EINVAL);
}
if (entry_old->start == start) {
/* munmap: erase this node */
interval_remove(entry_old, &memtype_rbroot);
} else {
/* mremap: update the end value of this node */
interval_remove(entry_old, &memtype_rbroot);
entry_old->end = start;
interval_insert(entry_old, &memtype_rbroot);
return NULL;
}
return entry_old;
}
struct memtype *memtype_lookup(u64 addr)
{
return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1);
}
/*
* Debugging helper, copy the Nth entry of the tree into a
* a copy for printout. This allows us to print out the tree
* via debugfs, without holding the memtype_lock too long:
*/
#ifdef CONFIG_DEBUG_FS
int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos)
{
struct memtype *entry_match;
int i = 1;
entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
while (entry_match && pos != i) {
entry_match = interval_iter_next(entry_match, 0, ULONG_MAX);
i++;
}
if (entry_match) { /* pos == i */
*entry_out = *entry_match;
return 0;
} else {
return 1;
}
}
#endif

View File

@ -24,10 +24,10 @@
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/set_memory.h>
#include "mm_internal.h"
#include "../mm_internal.h"
/*
* The current flushing context - we pass it instead of 5 arguments:
@ -331,7 +331,7 @@ static void cpa_flush_all(unsigned long cache)
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
void __cpa_flush_tlb(void *data)
static void __cpa_flush_tlb(void *data)
{
struct cpa_data *cpa = data;
unsigned int i;
@ -1801,7 +1801,7 @@ int set_memory_uc(unsigned long addr, int numpages)
/*
* for now UC MINUS. see comments in ioremap()
*/
ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_MODE_UC_MINUS, NULL);
if (ret)
goto out_err;
@ -1813,7 +1813,7 @@ int set_memory_uc(unsigned long addr, int numpages)
return 0;
out_free:
free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
out_err:
return ret;
}
@ -1839,14 +1839,14 @@ int set_memory_wc(unsigned long addr, int numpages)
{
int ret;
ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_MODE_WC, NULL);
if (ret)
return ret;
ret = _set_memory_wc(addr, numpages);
if (ret)
free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return ret;
}
@ -1873,7 +1873,7 @@ int set_memory_wb(unsigned long addr, int numpages)
if (ret)
return ret;
free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return 0;
}
EXPORT_SYMBOL(set_memory_wb);
@ -2014,7 +2014,7 @@ static int _set_pages_array(struct page **pages, int numpages,
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
if (reserve_memtype(start, end, new_type, NULL))
if (memtype_reserve(start, end, new_type, NULL))
goto err_out;
}
@ -2040,7 +2040,7 @@ err_out:
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
memtype_free(start, end);
}
return -EINVAL;
}
@ -2089,7 +2089,7 @@ int set_pages_array_wb(struct page **pages, int numpages)
continue;
start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
memtype_free(start, end);
}
return 0;
@ -2281,5 +2281,5 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
* be exposed to the rest of the kernel. Include these directly here.
*/
#ifdef CONFIG_CPA_DEBUG
#include "pageattr-test.c"
#include "cpa-test.c"
#endif

View File

@ -1,185 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Handle caching attributes in page tables (PAT)
*
* Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Suresh B Siddha <suresh.b.siddha@intel.com>
*
* Interval tree used to store the PAT memory type reservations.
*/
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/interval_tree_generic.h>
#include <linux/sched.h>
#include <linux/gfp.h>
#include <asm/pgtable.h>
#include <asm/pat.h>
#include "pat_internal.h"
/*
* The memtype tree keeps track of memory type for specific
* physical memory areas. Without proper tracking, conflicting memory
* types in different mappings can cause CPU cache corruption.
*
* The tree is an interval tree (augmented rbtree) with tree ordered
* on starting address. Tree can contain multiple entries for
* different regions which overlap. All the aliases have the same
* cache attributes of course.
*
* memtype_lock protects the rbtree.
*/
static inline u64 memtype_interval_start(struct memtype *memtype)
{
return memtype->start;
}
static inline u64 memtype_interval_end(struct memtype *memtype)
{
return memtype->end - 1;
}
INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
memtype_interval_start, memtype_interval_end,
static, memtype_interval)
static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
enum {
MEMTYPE_EXACT_MATCH = 0,
MEMTYPE_END_MATCH = 1
};
static struct memtype *memtype_match(u64 start, u64 end, int match_type)
{
struct memtype *match;
match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
while (match != NULL && match->start < end) {
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
return match;
if ((match_type == MEMTYPE_END_MATCH) &&
(match->start < start) && (match->end == end))
return match;
match = memtype_interval_iter_next(match, start, end-1);
}
return NULL; /* Returns NULL if there is no match */
}
static int memtype_check_conflict(u64 start, u64 end,
enum page_cache_mode reqtype,
enum page_cache_mode *newtype)
{
struct memtype *match;
enum page_cache_mode found_type = reqtype;
match = memtype_interval_iter_first(&memtype_rbroot, start, end-1);
if (match == NULL)
goto success;
if (match->type != found_type && newtype == NULL)
goto failure;
dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
found_type = match->type;
match = memtype_interval_iter_next(match, start, end-1);
while (match) {
if (match->type != found_type)
goto failure;
match = memtype_interval_iter_next(match, start, end-1);
}
success:
if (newtype)
*newtype = found_type;
return 0;
failure:
pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
current->comm, current->pid, start, end,
cattr_name(found_type), cattr_name(match->type));
return -EBUSY;
}
int memtype_check_insert(struct memtype *new,
enum page_cache_mode *ret_type)
{
int err = 0;
err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
if (err)
return err;
if (ret_type)
new->type = *ret_type;
memtype_interval_insert(new, &memtype_rbroot);
return 0;
}
struct memtype *memtype_erase(u64 start, u64 end)
{
struct memtype *data;
/*
* Since the memtype_rbroot tree allows overlapping ranges,
* memtype_erase() checks with EXACT_MATCH first, i.e. free
* a whole node for the munmap case. If no such entry is found,
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
if (!data) {
data = memtype_match(start, end, MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}
if (data->start == start) {
/* munmap: erase this node */
memtype_interval_remove(data, &memtype_rbroot);
} else {
/* mremap: update the end value of this node */
memtype_interval_remove(data, &memtype_rbroot);
data->end = start;
memtype_interval_insert(data, &memtype_rbroot);
return NULL;
}
return data;
}
struct memtype *memtype_lookup(u64 addr)
{
return memtype_interval_iter_first(&memtype_rbroot, addr,
addr + PAGE_SIZE-1);
}
#if defined(CONFIG_DEBUG_FS)
int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{
struct memtype *match;
int i = 1;
match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
while (match && pos != i) {
match = memtype_interval_iter_next(match, 0, ULONG_MAX);
i++;
}
if (match) { /* pos == i */
*out = *match;
return 0;
} else {
return 1;
}
}
#endif

View File

@ -18,6 +18,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
#include <linux/vmalloc.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;

View File

@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <asm/page.h>
#include <linux/vmalloc.h>
#include "physaddr.h"

View File

@ -34,7 +34,7 @@
#include <linux/errno.h>
#include <linux/memblock.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/e820/api.h>
#include <asm/pci_x86.h>
#include <asm/io_apic.h>

View File

@ -67,7 +67,7 @@
#include <asm/linkage.h>
#include <asm/page.h>
#include <asm/init.h>
#include <asm/pat.h>
#include <asm/memtype.h>
#include <asm/smp.h>
#include <asm/tlb.h>

View File

@ -0,0 +1,4 @@
#ifndef _ASM_XTENSA_VMALLOC_H
#define _ASM_XTENSA_VMALLOC_H
#endif /* _ASM_XTENSA_VMALLOC_H */

View File

@ -40,7 +40,7 @@
#include <linux/slab.h>
#include <linux/bitmap.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#include <asm/memtype.h>
#endif
#include <linux/sched.h>
#include <linux/sched/mm.h>

View File

@ -37,7 +37,7 @@
#include <linux/ivtvfb.h>
#ifdef CONFIG_X86_64
#include <asm/pat.h>
#include <asm/memtype.h>
#endif
/* card parameters */

View File

@ -625,24 +625,19 @@ unsigned long vmalloc_to_pfn(const void *addr);
* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
* is no special casing required.
*/
static inline bool is_vmalloc_addr(const void *x)
{
#ifdef CONFIG_MMU
unsigned long addr = (unsigned long)x;
return addr >= VMALLOC_START && addr < VMALLOC_END;
#else
return false;
#endif
}
#ifndef is_ioremap_addr
#define is_ioremap_addr(x) is_vmalloc_addr(x)
#endif
#ifdef CONFIG_MMU
extern bool is_vmalloc_addr(const void *x);
extern int is_vmalloc_or_module_addr(const void *x);
#else
static inline bool is_vmalloc_addr(const void *x)
{
return false;
}
static inline int is_vmalloc_or_module_addr(const void *x)
{
return 0;

View File

@ -10,6 +10,8 @@
#include <linux/rbtree.h>
#include <linux/overflow.h>
#include <asm/vmalloc.h>
struct vm_area_struct; /* vma defining user mapping in mm_types.h */
struct notifier_block; /* in notifier.h */

View File

@ -29,7 +29,7 @@
#include <linux/highmem.h>
#include <linux/kgdb.h>
#include <asm/tlbflush.h>
#include <linux/vmalloc.h>
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
DEFINE_PER_CPU(int, __kmap_atomic_idx);

View File

@ -41,6 +41,14 @@
#include "internal.h"
bool is_vmalloc_addr(const void *x)
{
unsigned long addr = (unsigned long)x;
return addr >= VMALLOC_START && addr < VMALLOC_END;
}
EXPORT_SYMBOL(is_vmalloc_addr);
struct vfree_deferred {
struct llist_head list;
struct work_struct wq;