From faaadaf315b48d40b39bf4f0011fa740f40fbe9e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 24 Aug 2018 13:28:28 +0100 Subject: [PATCH 01/78] asm-generic/tlb: Guard with #ifdef CONFIG_MMU The inner workings of the mmu_gather-based TLB invalidation mechanism are not relevant to nommu configurations, so guard them with an #ifdef. This allows us to implement future functions using static inlines without breaking the build. Acked-by: Nicholas Piggin Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon --- include/asm-generic/tlb.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index b3353e21f3b3..a25e236f7a7f 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -20,6 +20,8 @@ #include #include +#ifdef CONFIG_MMU + #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* * Semi RCU freeing of the page directories. @@ -310,6 +312,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #endif #endif +#endif /* CONFIG_MMU */ + #define tlb_migrate_finish(mm) do {} while (0) #endif /* _ASM_GENERIC__TLB_H */ From 22a61c3c4f1379ef8b0ce0d5cb78baf3178950e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Aug 2018 20:27:25 +0100 Subject: [PATCH 02/78] asm-generic/tlb: Track freeing of page-table directories in struct mmu_gather Some architectures require different TLB invalidation instructions depending on whether it is only the last-level of page table being changed, or whether there are also changes to the intermediate (directory) entries higher up the tree. Add a new bit to the flags bitfield in struct mmu_gather so that the architecture code can operate accordingly if it's the intermediate levels being invalidated. Acked-by: Nicholas Piggin Signed-off-by: Peter Zijlstra Signed-off-by: Will Deacon --- include/asm-generic/tlb.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a25e236f7a7f..2b444ad94566 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -99,12 +99,22 @@ struct mmu_gather { #endif unsigned long start; unsigned long end; - /* we are in the middle of an operation to clear - * a full mm and can make some optimizations */ - unsigned int fullmm : 1, - /* we have performed an operation which - * requires a complete flush of the tlb */ - need_flush_all : 1; + /* + * we are in the middle of an operation to clear + * a full mm and can make some optimizations + */ + unsigned int fullmm : 1; + + /* + * we have performed an operation which + * requires a complete flush of the tlb + */ + unsigned int need_flush_all : 1; + + /* + * we have removed page directories + */ + unsigned int freed_tables : 1; struct mmu_gather_batch *active; struct mmu_gather_batch local; @@ -139,6 +149,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->start = TASK_SIZE; tlb->end = 0; } + tlb->freed_tables = 0; } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -280,6 +291,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif @@ -287,7 +299,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif @@ -297,6 +310,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif @@ -306,7 +320,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif From a6d60245d6d9b1caf66b0d94419988c4836980af Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:01:46 +0100 Subject: [PATCH 03/78] asm-generic/tlb: Track which levels of the page tables have been cleared It is common for architectures with hugepage support to require only a single TLB invalidation operation per hugepage during unmap(), rather than iterating through the mapping at a PAGE_SIZE increment. Currently, however, the level in the page table where the unmap() operation occurs is not stored in the mmu_gather structure, therefore forcing architectures to issue additional TLB invalidation operations or to give up and over-invalidate by e.g. invalidating the entire TLB. Ideally, we could add an interval rbtree to the mmu_gather structure, which would allow us to associate the correct mapping granule with the various sub-mappings within the range being invalidated. However, this is costly in terms of book-keeping and memory management, so instead we approximate by keeping track of the page table levels that are cleared and provide a means to query the smallest granule required for invalidation. Acked-by: Peter Zijlstra (Intel) Acked-by: Nicholas Piggin Signed-off-by: Will Deacon --- include/asm-generic/tlb.h | 58 +++++++++++++++++++++++++++++++++------ mm/memory.c | 4 ++- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 2b444ad94566..9791e98122a0 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -116,6 +116,14 @@ struct mmu_gather { */ unsigned int freed_tables : 1; + /* + * at which levels have we cleared entries? + */ + unsigned int cleared_ptes : 1; + unsigned int cleared_pmds : 1; + unsigned int cleared_puds : 1; + unsigned int cleared_p4ds : 1; + struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; @@ -150,6 +158,10 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->end = 0; } tlb->freed_tables = 0; + tlb->cleared_ptes = 0; + tlb->cleared_pmds = 0; + tlb->cleared_puds = 0; + tlb->cleared_p4ds = 0; } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -199,6 +211,25 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } #endif +static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) +{ + if (tlb->cleared_ptes) + return PAGE_SHIFT; + if (tlb->cleared_pmds) + return PMD_SHIFT; + if (tlb->cleared_puds) + return PUD_SHIFT; + if (tlb->cleared_p4ds) + return P4D_SHIFT; + + return PAGE_SHIFT; +} + +static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) +{ + return 1UL << tlb_get_unmap_shift(tlb); +} + /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, @@ -232,13 +263,19 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->cleared_ptes = 1; \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) -#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ - do { \ - __tlb_adjust_range(tlb, address, huge_page_size(h)); \ - __tlb_remove_tlb_entry(tlb, ptep, address); \ +#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + do { \ + unsigned long _sz = huge_page_size(h); \ + __tlb_adjust_range(tlb, address, _sz); \ + if (_sz == PMD_SIZE) \ + tlb->cleared_pmds = 1; \ + else if (_sz == PUD_SIZE) \ + tlb->cleared_puds = 1; \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** @@ -252,6 +289,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ + tlb->cleared_pmds = 1; \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) @@ -266,6 +304,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ + tlb->cleared_puds = 1; \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) @@ -291,7 +330,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ + tlb->cleared_pmds = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif @@ -300,7 +340,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pmd_free_tlb(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ + tlb->cleared_puds = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif @@ -310,7 +351,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ + tlb->cleared_p4ds = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif @@ -321,7 +363,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif diff --git a/mm/memory.c b/mm/memory.c index c467102a5cbc..9135f48e8d84 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -267,8 +267,10 @@ void arch_tlb_finish_mmu(struct mmu_gather *tlb, { struct mmu_gather_batch *batch, *next; - if (force) + if (force) { + __tlb_reset_range(tlb); __tlb_adjust_range(tlb, start, end - start); + } tlb_flush_mmu(tlb); From 196d9d8bb71deaa2d1c7170c88a2f1a318363047 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Sep 2018 15:07:36 +0100 Subject: [PATCH 04/78] mm/memory: Move mmu_gather and TLB invalidation code into its own file In preparation for maintaining the mmu_gather code as its own entity, move the implementation out of memory.c and into its own file. Cc: "Kirill A. Shutemov" Cc: Andrew Morton Cc: Michal Hocko Signed-off-by: Peter Zijlstra Signed-off-by: Will Deacon --- include/asm-generic/tlb.h | 1 + mm/Makefile | 6 +- mm/memory.c | 249 ------------------------------------ mm/mmu_gather.c | 261 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 265 insertions(+), 252 deletions(-) create mode 100644 mm/mmu_gather.c diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 9791e98122a0..6be86c1c5c58 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -138,6 +138,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); +void tlb_flush_mmu_free(struct mmu_gather *tlb); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/mm/Makefile b/mm/Makefile index 8716bdabe1e6..7c48e0d3d8ab 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -23,9 +23,9 @@ KCOV_INSTRUMENT_vmstat.o := n mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ - mlock.o mmap.o mprotect.o mremap.o msync.o \ - page_vma_mapped.o pagewalk.o pgtable-generic.o \ - rmap.o vmalloc.o + mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ + msync.o page_vma_mapped.o pagewalk.o \ + pgtable-generic.o rmap.o vmalloc.o ifdef CONFIG_CROSS_MEMORY_ATTACH diff --git a/mm/memory.c b/mm/memory.c index 9135f48e8d84..21a5e6e4758b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -186,255 +186,6 @@ static void check_sync_rss_stat(struct task_struct *task) #endif /* SPLIT_RSS_COUNTING */ -#ifdef HAVE_GENERIC_MMU_GATHER - -static bool tlb_next_batch(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - - batch = tlb->active; - if (batch->next) { - tlb->active = batch->next; - return true; - } - - if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) - return false; - - batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); - if (!batch) - return false; - - tlb->batch_count++; - batch->next = NULL; - batch->nr = 0; - batch->max = MAX_GATHER_BATCH; - - tlb->active->next = batch; - tlb->active = batch; - - return true; -} - -void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - tlb->mm = mm; - - /* Is it from 0 to ~0? */ - tlb->fullmm = !(start | (end+1)); - tlb->need_flush_all = 0; - tlb->local.next = NULL; - tlb->local.nr = 0; - tlb->local.max = ARRAY_SIZE(tlb->__pages); - tlb->active = &tlb->local; - tlb->batch_count = 0; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb->batch = NULL; -#endif - tlb->page_size = 0; - - __tlb_reset_range(tlb); -} - -static void tlb_flush_mmu_free(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb_table_flush(tlb); -#endif - for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { - free_pages_and_swap_cache(batch->pages, batch->nr); - batch->nr = 0; - } - tlb->active = &tlb->local; -} - -void tlb_flush_mmu(struct mmu_gather *tlb) -{ - tlb_flush_mmu_tlbonly(tlb); - tlb_flush_mmu_free(tlb); -} - -/* tlb_finish_mmu - * Called at the end of the shootdown operation to free up any resources - * that were required. - */ -void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end, bool force) -{ - struct mmu_gather_batch *batch, *next; - - if (force) { - __tlb_reset_range(tlb); - __tlb_adjust_range(tlb, start, end - start); - } - - tlb_flush_mmu(tlb); - - /* keep the page table cache within bounds */ - check_pgt_cache(); - - for (batch = tlb->local.next; batch; batch = next) { - next = batch->next; - free_pages((unsigned long)batch, 0); - } - tlb->local.next = NULL; -} - -/* __tlb_remove_page - * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while - * handling the additional races in SMP caused by other CPUs caching valid - * mappings in their TLBs. Returns the number of free page slots left. - * When out of page slots we must call tlb_flush_mmu(). - *returns true if the caller should flush. - */ -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) -{ - struct mmu_gather_batch *batch; - - VM_BUG_ON(!tlb->end); - VM_WARN_ON(tlb->page_size != page_size); - - batch = tlb->active; - /* - * Add the page and check if we are full. If so - * force a flush. - */ - batch->pages[batch->nr++] = page; - if (batch->nr == batch->max) { - if (!tlb_next_batch(tlb)) - return true; - batch = tlb->active; - } - VM_BUG_ON_PAGE(batch->nr > batch->max, page); - - return false; -} - -#endif /* HAVE_GENERIC_MMU_GATHER */ - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -/* - * See the comment near struct mmu_table_batch. - */ - -/* - * If we want tlb_remove_table() to imply TLB invalidates. - */ -static inline void tlb_table_invalidate(struct mmu_gather *tlb) -{ -#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE - /* - * Invalidate page-table caches used by hardware walkers. Then we still - * need to RCU-sched wait while freeing the pages because software - * walkers can still be in-flight. - */ - tlb_flush_mmu_tlbonly(tlb); -#endif -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely on - * IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - tlb_table_invalidate(tlb); - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch == NULL) { - *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - tlb_table_invalidate(tlb); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_table_flush(tlb); -} - -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ - -/** - * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down - * @tlb: the mmu_gather structure to initialize - * @mm: the mm_struct of the target address space - * @start: start of the region that will be removed from the page-table - * @end: end of the region that will be removed from the page-table - * - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @start and @end are set to 0 and -1 - * respectively when @mm is without users and we're going to destroy - * the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - arch_tlb_gather_mmu(tlb, mm, start, end); - inc_tlb_flush_pending(tlb->mm); -} - -void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) -{ - /* - * If there are parallel threads are doing PTE changes on same range - * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB - * flush by batching, a thread has stable TLB entry can fail to flush - * the TLB by observing pte_none|!pte_dirty, for example so flush TLB - * forcefully if we detect parallel PTE batching threads. - */ - bool force = mm_tlb_flush_nested(tlb->mm); - - arch_tlb_finish_mmu(tlb, start, end, force); - dec_tlb_flush_pending(tlb->mm); -} - /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c new file mode 100644 index 000000000000..2a9fbc4a37d5 --- /dev/null +++ b/mm/mmu_gather.c @@ -0,0 +1,261 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef HAVE_GENERIC_MMU_GATHER + +static bool tlb_next_batch(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + + batch = tlb->active; + if (batch->next) { + tlb->active = batch->next; + return true; + } + + if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) + return false; + + batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + if (!batch) + return false; + + tlb->batch_count++; + batch->next = NULL; + batch->nr = 0; + batch->max = MAX_GATHER_BATCH; + + tlb->active->next = batch; + tlb->active = batch; + + return true; +} + +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + tlb->mm = mm; + + /* Is it from 0 to ~0? */ + tlb->fullmm = !(start | (end+1)); + tlb->need_flush_all = 0; + tlb->local.next = NULL; + tlb->local.nr = 0; + tlb->local.max = ARRAY_SIZE(tlb->__pages); + tlb->active = &tlb->local; + tlb->batch_count = 0; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb->batch = NULL; +#endif + tlb->page_size = 0; + + __tlb_reset_range(tlb); +} + +void tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb_table_flush(tlb); +#endif + for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { + free_pages_and_swap_cache(batch->pages, batch->nr); + batch->nr = 0; + } + tlb->active = &tlb->local; +} + +void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + tlb_flush_mmu_free(tlb); +} + +/* tlb_finish_mmu + * Called at the end of the shootdown operation to free up any resources + * that were required. + */ +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) +{ + struct mmu_gather_batch *batch, *next; + + if (force) { + __tlb_reset_range(tlb); + __tlb_adjust_range(tlb, start, end - start); + } + + tlb_flush_mmu(tlb); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + for (batch = tlb->local.next; batch; batch = next) { + next = batch->next; + free_pages((unsigned long)batch, 0); + } + tlb->local.next = NULL; +} + +/* __tlb_remove_page + * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while + * handling the additional races in SMP caused by other CPUs caching valid + * mappings in their TLBs. Returns the number of free page slots left. + * When out of page slots we must call tlb_flush_mmu(). + *returns true if the caller should flush. + */ +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) +{ + struct mmu_gather_batch *batch; + + VM_BUG_ON(!tlb->end); + VM_WARN_ON(tlb->page_size != page_size); + + batch = tlb->active; + /* + * Add the page and check if we are full. If so + * force a flush. + */ + batch->pages[batch->nr++] = page; + if (batch->nr == batch->max) { + if (!tlb_next_batch(tlb)) + return true; + batch = tlb->active; + } + VM_BUG_ON_PAGE(batch->nr > batch->max, page); + + return false; +} + +#endif /* HAVE_GENERIC_MMU_GATHER */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + +/* + * See the comment near struct mmu_table_batch. + */ + +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) +{ +#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE + /* + * Invalidate page-table caches used by hardware walkers. Then we still + * need to RCU-sched wait while freeing the pages because software + * walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); +#endif +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely on + * IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + tlb_table_invalidate(tlb); + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + tlb_table_invalidate(tlb); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_table_flush(tlb); +} + +#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ + +/** + * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * @start: start of the region that will be removed from the page-table + * @end: end of the region that will be removed from the page-table + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @start and @end are set to 0 and -1 + * respectively when @mm is without users and we're going to destroy + * the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); +} From 7526aa54b261087014bdea7e361f900d4741c35e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 3 Sep 2018 15:19:37 +0100 Subject: [PATCH 05/78] MAINTAINERS: Add entry for MMU GATHER AND TLB INVALIDATION We recently had to debug a TLB invalidation problem on the munmap() path, which was made more difficult than necessary because: (a) The MMU gather code had changed without people realising (b) Many people subtly misunderstood the operation of the MMU gather code and its interactions with RCU and arch-specific TLB invalidation (c) Untangling the intended behaviour involved educated guesswork and plenty of discussion Hopefully, we can avoid getting into this mess again by designating a cross-arch group of people to look after this code. It is not intended that they will have a separate tree, but they at least provide a point of contact for anybody working in this area and can co-ordinate any proposed future changes to the internal API. Cc: Peter Zijlstra Cc: Nicholas Piggin Cc: Linus Torvalds Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Andrew Morton Cc: Michal Hocko Signed-off-by: Will Deacon --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9ad052aeac39..225c3af1cccd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9681,6 +9681,19 @@ S: Maintained F: arch/arm/boot/dts/mmp* F: arch/arm/mach-mmp/ +MMU GATHER AND TLB INVALIDATION +M: Will Deacon +M: "Aneesh Kumar K.V" +M: Andrew Morton +M: Nick Piggin +M: Peter Zijlstra +L: linux-arch@vger.kernel.org +L: linux-mm@kvack.org +S: Maintained +F: arch/*/include/asm/tlb.h +F: include/asm-generic/tlb.h +F: mm/mmu_gather.c + MN88472 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org From 9784d82db3eb3de7851e5a3f4a2481607de2452c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Aug 2018 13:02:42 +0200 Subject: [PATCH 06/78] lib/crc32: make core crc32() routines weak so they can be overridden Allow architectures to drop in accelerated CRC32 routines by making the crc32_le/__crc32c_le entry points weak, and exposing non-weak aliases for them that may be used by the accelerated versions as fallbacks in case the instructions they rely upon are not available. Acked-by: Herbert Xu Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- lib/crc32.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/crc32.c b/lib/crc32.c index a6c9afafc8c8..45b1d67a1767 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -183,21 +183,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, } #if CRC_LE_BITS == 1 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32table_le, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); @@ -206,6 +206,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); +u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); + /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit From 86d0dd34eafffbc76a81aba6ae2d71927d3835a8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Aug 2018 13:02:43 +0200 Subject: [PATCH 07/78] arm64: cpufeature: add feature for CRC32 instructions Add a CRC32 feature bit and wire it up to the CPU id register so we will be able to use alternatives patching for CRC32 operations. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpufeature.c | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ae1f70450fb2..9932aca9704b 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -51,7 +51,8 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_HAS_CRC32 33 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 34 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e238b7932096..7626b80128f5 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1222,6 +1222,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif + { + .desc = "CRC32 instructions", + .capability = ARM64_HAS_CRC32, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .min_field_value = 1, + }, {}, }; From 7481cddf29ede204b475facc40e6f65459939881 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Aug 2018 13:02:44 +0200 Subject: [PATCH 08/78] arm64/lib: add accelerated crc32 routines Unlike crc32c(), which is wired up to the crypto API internally so the optimal driver is selected based on the platform's capabilities, crc32_le() is implemented as a library function using a slice-by-8 table based C implementation. Even though few of the call sites may be bottlenecks, calling a time variant implementation with a non-negligible D-cache footprint is a bit of a waste, given that ARMv8.1 and up mandates support for the CRC32 instructions that were optional in ARMv8.0, but are already widely available, even on the Cortex-A53 based Raspberry Pi. So implement routines that use these instructions if available, and fall back to the existing generic routines otherwise. The selection is based on alternatives patching. Note that this unconditionally selects CONFIG_CRC32 as a builtin. Since CRC32 is relied upon by core functionality such as CONFIG_OF_FLATTREE, this just codifies the status quo. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/lib/Makefile | 2 ++ arch/arm64/lib/crc32.S | 60 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 arch/arm64/lib/crc32.S diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..b4c1f1f55aec 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) + select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS select EDAC_SUPPORT diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 68755fd70dcf..f28f91fd96a2 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -25,3 +25,5 @@ KCOV_INSTRUMENT_atomic_ll_sc.o := n UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o + +obj-$(CONFIG_CRC32) += crc32.o diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 000000000000..5bc1e85b4e1c --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,60 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + + .cpu generic+crc + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +ENDPROC(__crc32c_le) From 4733c7c79e8c466fedb4fd4eb29bfc5b1bdb336f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Sep 2018 15:12:27 +0100 Subject: [PATCH 09/78] arm64: dump: Use consistent capitalisation for page-table dumps Being consistent in our capitalisation for page-table dumps helps when grepping for things like "end". Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 65dfc8571bf8..fcb1f2a6d7c6 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,8 +36,8 @@ static const struct addr_marker address_markers[] = { #endif { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMALLOC_END, "vmalloc() End" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, @@ -46,7 +46,7 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear Mapping" }, + { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; From a1f33941f7e103bcf471eaf8461b212223c642d6 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 6 Sep 2018 12:09:56 +0100 Subject: [PATCH 10/78] arm64: uaccess: implement unsafe accessors Current implementation of get/put_user_unsafe default to get/put_user which toggle PAN before each access, despite having been told by the caller that multiple accesses to user memory were about to happen. Provide implementations for user_access_begin/end to turn PAN off/on and implement unsafe accessors that assume PAN was already turned off. Tested-by: Will Deacon Signed-off-by: Julien Thierry Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 61 ++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e66b0fca99c2..c4528dd90875 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -277,11 +277,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) -#define __get_user_err(x, ptr, err) \ +#define __get_user_err_unsafe(x, ptr, err) \ do { \ unsigned long __gu_val; \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ @@ -302,17 +300,24 @@ do { \ default: \ BUILD_BUG(); \ } \ - uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_check(x, ptr, err) \ +#define __get_user_err_check(x, ptr, err) \ +do { \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ + __get_user_err_unsafe((x), (ptr), (err)); \ + uaccess_disable_not_uao(); \ +} while (0) + +#define __get_user_err(x, ptr, err, accessor) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __get_user_err((x), __p, (err)); \ + accessor((x), __p, (err)); \ } else { \ (x) = 0; (err) = -EFAULT; \ } \ @@ -320,14 +325,14 @@ do { \ #define __get_user_error(x, ptr, err) \ ({ \ - __get_user_check((x), (ptr), (err)); \ + __get_user_err((x), (ptr), (err), __get_user_err_check); \ (void)0; \ }) #define __get_user(x, ptr) \ ({ \ int __gu_err = 0; \ - __get_user_check((x), (ptr), __gu_err); \ + __get_user_err((x), (ptr), __gu_err, __get_user_err_check); \ __gu_err; \ }) @@ -347,11 +352,9 @@ do { \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) -#define __put_user_err(x, ptr, err) \ +#define __put_user_err_unsafe(x, ptr, err) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ @@ -372,16 +375,24 @@ do { \ default: \ BUILD_BUG(); \ } \ +} while (0) + + +#define __put_user_err_check(x, ptr, err) \ +do { \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ + __put_user_err_unsafe((x), (ptr), (err)); \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user_check(x, ptr, err) \ +#define __put_user_err(x, ptr, err, accessor) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __put_user_err((x), __p, (err)); \ + accessor((x), __p, (err)); \ } else { \ (err) = -EFAULT; \ } \ @@ -389,19 +400,39 @@ do { \ #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_check((x), (ptr), (err)); \ + __put_user_err((x), (ptr), (err), __put_user_err_check); \ (void)0; \ }) #define __put_user(x, ptr) \ ({ \ int __pu_err = 0; \ - __put_user_check((x), (ptr), __pu_err); \ + __put_user_err((x), (ptr), __pu_err, __put_user_err_check); \ __pu_err; \ }) #define put_user __put_user + +#define user_access_begin() uaccess_enable_not_uao() +#define user_access_end() uaccess_disable_not_uao() + +#define unsafe_get_user(x, ptr, err) \ +do { \ + int __gu_err = 0; \ + __get_user_err((x), (ptr), __gu_err, __get_user_err_unsafe); \ + if (__gu_err != 0) \ + goto err; \ +} while (0) + +#define unsafe_put_user(x, ptr, err) \ +do { \ + int __pu_err = 0; \ + __put_user_err((x), (ptr), __pu_err, __put_user_err_unsafe); \ + if (__pu_err != 0) \ + goto err; \ +} while (0) + extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); #define raw_copy_from_user(to, from, n) \ ({ \ From 6899a4c82faf9b41bbddf330651a4d1155f8b64e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:23:05 +0100 Subject: [PATCH 11/78] arm64: tlb: Use last-level invalidation in flush_tlb_kernel_range() flush_tlb_kernel_range() is only ever used to invalidate last-level entries, so we can restrict the scope of the TLB invalidation instruction. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a4a1901140ee..7e2a35424ca4 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -199,7 +199,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaae1is, addr); + __tlbi(vaale1is, addr); dsb(ish); isb(); } From 45a284bc5ee3d629b6da1498c2273cb22361416e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:40:30 +0100 Subject: [PATCH 12/78] arm64: tlb: Add DSB ISHST prior to TLBI in __flush_tlb_[kernel_]pgtable() __flush_tlb_[kernel_]pgtable() rely on set_pXd() having a DSB after writing the new table entry and therefore avoid the barrier prior to the TLBI instruction. In preparation for delaying our walk-cache invalidation on the unmap() path, move the DSB into the TLB invalidation routines. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 7e2a35424ca4..e257f8655b84 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -213,6 +213,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, { unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); + dsb(ishst); __tlbi(vae1is, addr); __tlbi_user(vae1is, addr); dsb(ish); @@ -222,6 +223,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); } From 0795edaf3f1ff1ea58048515211280db004bbd68 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:36:31 +0100 Subject: [PATCH 13/78] arm64: pgtable: Implement p[mu]d_valid() and check in set_p[mu]d() Now that our walk-cache invalidation routines imply a DSB before the invalidation, we no longer need one when we are clearing an entry during unmap. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdeca8918a6..2ab2031b778c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -360,6 +360,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -431,7 +432,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { WRITE_ONCE(*pmdp, pmd); - dsb(ishst); + + if (pmd_valid(pmd)) + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -477,11 +480,14 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { WRITE_ONCE(*pudp, pud); - dsb(ishst); + + if (pud_valid(pud)) + dsb(ishst); } static inline void pud_clear(pud_t *pudp) From d8289d3a5854a2a0ae144bff106a78738fe63050 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:08:15 +0100 Subject: [PATCH 14/78] arm64: tlb: Justify non-leaf invalidation in flush_tlb_range() Add a comment to explain why we can't get away with last-level invalidation in flush_tlb_range() Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index e257f8655b84..ddbf1718669d 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -182,6 +182,10 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + /* + * We cannot use leaf-only invalidation here, since we may be invalidating + * table entries as part of collapsing hugepages or moving page tables. + */ __flush_tlb_range(vma, start, end, false); } From 67a902ac598dca056366a7342f401aa6f605072f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:26:21 +0100 Subject: [PATCH 15/78] arm64: tlbflush: Allow stride to be specified for __flush_tlb_range() When we are unmapping intermediate page-table entries or huge pages, we don't need to issue a TLBI instruction for every PAGE_SIZE chunk in the VA range being unmapped. Allow the invalidation stride to be passed to __flush_tlb_range(), and adjust our "just nuke the ASID" heuristic to take this into account. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlb.h | 2 +- arch/arm64/include/asm/tlbflush.h | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index a3233167be60..1e1f68ce28f4 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -53,7 +53,7 @@ static inline void tlb_flush(struct mmu_gather *tlb) * the __(pte|pmd|pud)_free_tlb() functions, so last level * TLBI is sufficient here. */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, PAGE_SIZE, true); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index ddbf1718669d..37ccdb246b20 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -149,25 +149,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) +#define MAX_TLBI_OPS 1024UL static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - bool last_level) + unsigned long stride, bool last_level) { unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } + /* Convert the stride into units of 4k */ + stride >>= 12; + start = __TLBI_VADDR(start, asid); end = __TLBI_VADDR(end, asid); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + for (addr = start; addr < end; addr += stride) { if (last_level) { __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); @@ -186,14 +189,14 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. */ - __flush_tlb_range(vma, start, end, false); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } From 07212cd47efef1df971e2289a89c087a5f6f6a2b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:48:44 +0100 Subject: [PATCH 16/78] arm64: tlb: Remove redundant !CONFIG_HAVE_RCU_TABLE_FREE code If there's one thing the RCU-based table freeing doesn't need, it's more ifdeffery. Remove the redundant !CONFIG_HAVE_RCU_TABLE_FREE code, since this option is unconditionally selected in our Kconfig. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlb.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 1e1f68ce28f4..bd00017d529a 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,16 +22,10 @@ #include #include -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry) static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } -#else -#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ static void tlb_flush(struct mmu_gather *tlb); @@ -61,7 +55,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, { __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); - tlb_remove_entry(tlb, pte); + tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 @@ -69,7 +63,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pmdp)); + tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -78,7 +72,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pudp)); + tlb_remove_table(tlb, virt_to_page(pudp)); } #endif From f270ab88fdf205be1a7a46ccb61f4a343be543a2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:08:31 +0100 Subject: [PATCH 17/78] arm64: tlb: Adjust stride and type of TLBI according to mmu_gather Now that the core mmu_gather code keeps track of both the levels of page table cleared and also whether or not these entries correspond to intermediate entries, we can use this in our tlb_flush() callback to reduce the number of invalidations we issue as well as their scope. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlb.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index bd00017d529a..b078fdec10d5 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -34,20 +34,21 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); + bool last_level = !tlb->freed_tables; + unsigned long stride = tlb_get_unmap_size(tlb); /* - * The ASID allocator will either invalidate the ASID or mark - * it as used. + * If we're tearing down the address space then we only care about + * invalidating the walk-cache, since the ASID allocator won't + * reallocate our ASID without invalidating the entire TLB. */ - if (tlb->fullmm) + if (tlb->fullmm) { + if (!last_level) + flush_tlb_mm(tlb->mm); return; + } - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, PAGE_SIZE, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, From ace8cb754539077ed75f3f15b77b2b51b5b7a431 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:16:50 +0100 Subject: [PATCH 18/78] arm64: tlb: Avoid synchronous TLBIs when freeing page tables By selecting HAVE_RCU_TABLE_INVALIDATE, we can rely on tlb_flush() being called if we fail to batch table pages for freeing. This in turn allows us to postpone walk-cache invalidation until tlb_finish_mmu(), which avoids lots of unnecessary DSBs and means we can shoot down the ASID if the range is large enough. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/tlb.h | 3 --- arch/arm64/include/asm/tlbflush.h | 11 ----------- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b4c1f1f55aec..58eb02796b16 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -143,6 +143,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index b078fdec10d5..106fdc951b6e 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -54,7 +54,6 @@ static inline void tlb_flush(struct mmu_gather *tlb) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); tlb_remove_table(tlb, pte); } @@ -63,7 +62,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -72,7 +70,6 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); tlb_remove_table(tlb, virt_to_page(pudp)); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 37ccdb246b20..c98ed8871030 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -215,17 +215,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ -static inline void __flush_tlb_pgtable(struct mm_struct *mm, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); - - dsb(ishst); - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - dsb(ish); -} - static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); From 7f08872774eb971693ba79eeb2d4db364c9f5bfb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Aug 2018 14:52:17 +0100 Subject: [PATCH 19/78] arm64: tlb: Rewrite stale comment in asm/tlbflush.h Peter Z asked me to justify the barrier usage in asm/tlbflush.h, but actually that whole block comment needs to be rewritten. Reported-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 80 +++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c98ed8871030..c3c0387aee18 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -70,43 +70,73 @@ }) /* - * TLB Management - * ============== + * TLB Invalidation + * ================ * - * The TLB specific code is expected to perform whatever tests it needs - * to determine if it should invalidate the TLB for each call. Start - * addresses are inclusive and end addresses are exclusive; it is safe to - * round these addresses down. + * This header file implements the low-level TLB invalidation routines + * (sometimes referred to as "flushing" in the kernel) for arm64. + * + * Every invalidation operation uses the following template: + * + * DSB ISHST // Ensure prior page-table updates have completed + * TLBI ... // Invalidate the TLB + * DSB ISH // Ensure the TLB invalidation has completed + * if (invalidated kernel mappings) + * ISB // Discard any instructions fetched from the old mapping + * + * + * The following functions form part of the "core" TLB invalidation API, + * as documented in Documentation/core-api/cachetlb.rst: * * flush_tlb_all() - * - * Invalidate the entire TLB. + * Invalidate the entire TLB (kernel + user) on all CPUs * * flush_tlb_mm(mm) + * Invalidate an entire user address space on all CPUs. + * The 'mm' argument identifies the ASID to invalidate. * - * Invalidate all TLB entries in a particular address space. - * - mm - mm_struct describing address space + * flush_tlb_range(vma, start, end) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * Note that this operation also invalidates any walk-cache + * entries associated with translations for the specified address + * range. * - * flush_tlb_range(mm,start,end) + * flush_tlb_kernel_range(start, end) + * Same as flush_tlb_range(..., start, end), but applies to + * kernel mappings rather than a particular user address space. + * Whilst not explicitly documented, this function is used when + * unmapping pages from vmalloc/io space. * - * Invalidate a range of TLB entries in the specified address - * space. - * - mm - mm_struct describing address space - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) + * flush_tlb_page(vma, addr) + * Invalidate a single user mapping for address 'addr' in the + * address space corresponding to 'vma->mm'. Note that this + * operation only invalidates a single, last-level page-table + * entry and therefore does not affect any walk-caches. * - * flush_tlb_page(vaddr,vma) * - * Invalidate the specified page in the specified address range. - * - vaddr - virtual address (may not be aligned) - * - vma - vma_struct describing address range + * Next, we have some undocumented invalidation routines that you probably + * don't want to call unless you know what you're doing: * - * flush_kern_tlb_page(kaddr) + * local_flush_tlb_all() + * Same as flush_tlb_all(), but only applies to the calling CPU. * - * Invalidate the TLB entry for the specified page. The address - * will be in the kernels virtual memory space. Current uses - * only require the D-TLB to be invalidated. - * - kaddr - Kernel virtual memory address + * __flush_tlb_kernel_pgtable(addr) + * Invalidate a single kernel mapping for address 'addr' on all + * CPUs, ensuring that any walk-cache entries associated with the + * translation are also invalidated. + * + * __flush_tlb_range(vma, start, end, stride, last_level) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * The invalidation operations are issued at a granularity + * determined by 'stride' and only affect any walk-cache entries + * if 'last_level' is equal to false. + * + * + * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented + * on top of these routines, since that is our interface to the mmu_gather + * API as used by munmap() and friends. */ static inline void local_flush_tlb_all(void) { From ca7f686ac9fe87a9175696a8744e095ab9749c49 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2018 11:36:43 +0100 Subject: [PATCH 20/78] arm64: Fix silly typo in comment I was passing through and figuered I'd fix this up: featuer -> feature Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1717ba1db35d..9079715794af 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -262,7 +262,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on system-wide value of a * feature. It is safe for a late CPU to have this feature even though - * the system hasn't enabled it, although the featuer will not be used + * the system hasn't enabled it, although the feature will not be used * by Linux in this case. If the system has enabled this feature already, * then every late CPU must have it. */ From d71be2b6c0e19180b5f80a6d42039cc074a693a2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2018 11:37:34 +0100 Subject: [PATCH 21/78] arm64: cpufeature: Detect SSBS and advertise to userspace Armv8.5 introduces a new PSTATE bit known as Speculative Store Bypass Safe (SSBS) which can be used as a mitigation against Spectre variant 4. Additionally, a CPU may provide instructions to manipulate PSTATE.SSBS directly, so that userspace can toggle the SSBS control without trapping to the kernel. This patch probes for the existence of SSBS and advertise the new instructions to userspace if they exist. Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/sysreg.h | 16 ++++++++++++---- arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 19 +++++++++++++++++-- arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 9932aca9704b..38eec9cf30f2 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -52,7 +52,8 @@ #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 +#define ARM64_SSBS 34 -#define ARM64_NCAPS 34 +#define ARM64_NCAPS 35 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c1470931b897..2fc6242baf11 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -419,6 +419,7 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) #define SCTLR_ELx_IESB (1 << 21) #define SCTLR_ELx_WXN (1 << 19) @@ -439,7 +440,7 @@ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -453,7 +454,7 @@ #define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) #define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) #if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL2 set/clear bits" @@ -477,7 +478,7 @@ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -494,7 +495,7 @@ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" @@ -544,6 +545,13 @@ #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +/* id_aa64pfr1 */ +#define ID_AA64PFR1_SSBS_SHIFT 4 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 17c65c8f33cb..2bcd6e4f3474 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -48,5 +48,6 @@ #define HWCAP_USCAT (1 << 25) #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7626b80128f5..5794959d8beb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -164,6 +164,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -371,7 +376,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -657,7 +662,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -1231,6 +1235,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_CRC32_SHIFT, .min_field_value = 1, }, + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + }, {}, }; @@ -1276,6 +1290,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index e9ab7b3ed317..dce971f2c167 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { "uscat", "ilrcpc", "flagm", + "ssbs", NULL }; From 2d1b2a91d56b19636b740ea70c8399d1df249f20 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2018 11:50:42 +0100 Subject: [PATCH 22/78] arm64: ssbd: Drop #ifdefs for PR_SPEC_STORE_BYPASS Now that we're all merged nicely into mainline, there's no need to check to see if PR_SPEC_STORE_BYPASS is defined. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ssbd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 3432e5ef9f41..07b12c034ec2 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -11,9 +11,7 @@ /* * prctl interface for SSBD - * FIXME: Drop the below ifdefery once merged in 4.18. */ -#ifdef PR_SPEC_STORE_BYPASS static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) { int state = arm64_get_ssbd_state(); @@ -107,4 +105,3 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } -#endif /* PR_SPEC_STORE_BYPASS */ From 0bf0f444b2c49241b2b39aa3cf210d7c95ef6c34 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Aug 2018 13:43:06 +0100 Subject: [PATCH 23/78] arm64: entry: Allow handling of undefined instructions from EL1 Rather than panic() when taking an undefined instruction exception from EL1, allow a hook to be registered in case we want to emulate the instruction, like we will for the SSBS PSTATE manipulation instructions. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/traps.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 09dbea221a27..8556876c9109 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -589,7 +589,7 @@ el1_undef: inherit_daif pstate=x23, tmp=x2 mov x0, sp bl do_undefinstr - ASM_BUG() + kernel_exit 1 el1_dbg: /* * Debug exception handling diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 039e9ff379cc..b9da093e0341 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -310,10 +310,12 @@ static int call_undef_hook(struct pt_regs *regs) int (*fn)(struct pt_regs *regs, u32 instr) = NULL; void __user *pc = (void __user *)instruction_pointer(regs); - if (!user_mode(regs)) - return 1; - - if (compat_thumb_mode(regs)) { + if (!user_mode(regs)) { + __le32 instr_le; + if (probe_kernel_address((__force __le32 *)pc, instr_le)) + goto exit; + instr = le32_to_cpu(instr_le); + } else if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ __le16 instr_le; if (get_user(instr_le, (__le16 __user *)pc)) @@ -407,6 +409,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) return; force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + BUG_ON(!user_mode(regs)); } void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) From 8f04e8e6e29c93421a95b61cad62e3918425eac7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Aug 2018 13:47:06 +0100 Subject: [PATCH 24/78] arm64: ssbd: Add support for PSTATE.SSBS rather than trapping to EL3 On CPUs with support for PSTATE.SSBS, the kernel can toggle the SSBD state without needing to call into firmware. This patch hooks into the existing SSBD infrastructure so that SSBS is used on CPUs that support it, but it's all made horribly complicated by the very real possibility of big/little systems that don't uniformly provide the new capability. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 7 +++++ arch/arm64/include/asm/ptrace.h | 1 + arch/arm64/include/asm/sysreg.h | 3 ++ arch/arm64/include/uapi/asm/ptrace.h | 1 + arch/arm64/kernel/cpu_errata.c | 26 ++++++++++++++-- arch/arm64/kernel/cpufeature.c | 45 ++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 4 +++ arch/arm64/kernel/ssbd.c | 21 +++++++++++++ 8 files changed, 106 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 79657ad91397..f6835374ed9f 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -174,6 +174,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; + + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_SSBS_BIT; + regs->sp = sp; } @@ -190,6 +194,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_AA32_SSBS_BIT; + regs->compat_sp = sp; } #endif diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 177b851ca6d9..6bc43889d11e 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -50,6 +50,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 2fc6242baf11..3091ae5975a3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -86,11 +86,14 @@ #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) #define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +#define REG_PSTATE_SSBS_IMM sys_reg(0, 3, 4, 0, 1) #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ (!!x)<<8 | 0x1f) #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ (!!x)<<8 | 0x1f) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd5000000 | REG_PSTATE_SSBS_IMM | \ + (!!x)<<8 | 0x1f) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 98c4ce55d9c3..a36227fdb084 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_V_BIT 0x10000000 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dec10898d688..c063490d7b51 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -312,6 +312,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); @@ -336,6 +344,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; return false; @@ -384,7 +397,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (ssbd_state) { case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); arm64_set_ssbd_mitigation(false); required = false; break; @@ -397,7 +409,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); required = true; break; @@ -407,6 +418,17 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; } +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + return required; } #endif /* CONFIG_ARM64_SSBD */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5794959d8beb..9aa18a0df0d7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1039,6 +1039,48 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(CRm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << CRm_shift), + .instr_val = 0xd500001f | REG_PSTATE_SSBS_IMM, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1226,6 +1268,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif +#ifdef CONFIG_ARM64_SSBD { .desc = "CRC32 instructions", .capability = ARM64_HAS_CRC32, @@ -1244,7 +1287,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR1_SSBS_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, }, +#endif {}, }; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7f1628effe6d..ce99c58cd1f1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -358,6 +358,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 07b12c034ec2..885f13e58708 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -3,12 +3,30 @@ * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ +#include #include #include +#include #include #include +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + /* * prctl interface for SSBD */ @@ -44,12 +62,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) return -EPERM; task_clear_spec_ssb_disable(task); clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); break; case PR_SPEC_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) return -EPERM; task_set_spec_ssb_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; case PR_SPEC_FORCE_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) @@ -57,6 +77,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; default: return -ERANGE; From 7c36447ae5a090729e7b129f24705bb231a07e0b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Aug 2018 16:10:54 +0100 Subject: [PATCH 25/78] KVM: arm64: Set SCTLR_EL2.DSSBS if SSBD is forcefully disabled and !vhe When running without VHE, it is necessary to set SCTLR_EL2.DSSBS if SSBD has been forcefully disabled on the kernel command-line. Acked-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_host.h | 11 +++++++++++ arch/arm64/kvm/hyp/sysreg-sr.c | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f26055f2306e..15501921fc75 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -389,6 +389,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +void __kvm_enable_ssbs(void); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -409,6 +411,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..76d016b446b2 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,3 +288,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} From b8925ee2e12d1cb9a11d6f28b5814f2bfa59dce1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Aug 2018 13:53:41 +0100 Subject: [PATCH 26/78] arm64: cpu: Move errata and feature enable callbacks closer to callers The cpu errata and feature enable callbacks are only called via their respective arm64_cpu_capabilities structure and therefore shouldn't exist in the global namespace. Move the PAN, RAS and cache maintenance emulation enable callbacks into the same files as their corresponding arm64_cpu_capabilities structures, making them static in the process. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 4 ---- arch/arm64/kernel/cpu_errata.c | 6 ++++++ arch/arm64/kernel/cpufeature.c | 28 ++++++++++++++++++++++------ arch/arm64/kernel/traps.c | 5 ----- arch/arm64/mm/fault.c | 14 -------------- 5 files changed, 28 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f6835374ed9f..2bf6691371c2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -251,10 +251,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c063490d7b51..8900cb0615f8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -433,6 +433,12 @@ out_printmsg: } #endif /* CONFIG_ARM64_SSBD */ +static void __maybe_unused +cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +} + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9aa18a0df0d7..35796ca1db50 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1081,6 +1081,28 @@ static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) } #endif /* CONFIG_ARM64_SSBD */ +#ifdef CONFIG_ARM64_PAN +static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +{ + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); +} +#endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_RAS_EXTN +static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); +} +#endif /* CONFIG_ARM64_RAS_EXTN */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1824,9 +1846,3 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); - -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) -{ - /* Firmware may have left a deferred SError in this register. */ - write_sysreg_s(0, SYS_DISR_EL1); -} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b9da093e0341..148de417ed3e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -412,11 +412,6 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) BUG_ON(!user_mode(regs)); } -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) -{ - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); -} - #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 50b30ff30de4..6342f1793c70 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -864,17 +864,3 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } NOKPROBE_SYMBOL(do_debug_exception); - -#ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) -{ - /* - * We modify PSTATE. This won't work from irq context as the PSTATE - * is discarded once we return from the exception. - */ - WARN_ON_ONCE(in_interrupt()); - - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); -} -#endif /* CONFIG_ARM64_PAN */ From 8a60419d36762a1131c2b29f7bd14371db4df1b5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Aug 2018 16:24:54 +0100 Subject: [PATCH 27/78] arm64: force_signal_inject: WARN if called from kernel context force_signal_inject() is designed to send a fatal signal to userspace, so WARN if the current pt_regs indicates a kernel context. This can currently happen for the undefined instruction trap, so patch that up so we always BUG() if we didn't have a handler. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 148de417ed3e..539b470f9526 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -354,6 +354,9 @@ void force_signal_inject(int signal, int code, unsigned long address) const char *desc; struct pt_regs *regs = current_pt_regs(); + if (WARN_ON(!user_mode(regs))) + return; + clear_siginfo(&info); switch (signal) { @@ -408,8 +411,8 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } #define __user_cache_maint(insn, address, res) \ From e4ba15debcfd27f60d43da940a58108783bff2a6 Mon Sep 17 00:00:00 2001 From: Hari Vyas Date: Tue, 7 Aug 2018 16:33:48 +0530 Subject: [PATCH 28/78] arm64: fix for bad_mode() handler to always result in panic The bad_mode() handler is called if we encounter an uunknown exception, with the expectation that the subsequent call to panic() will halt the system. Unfortunately, if the exception calling bad_mode() is taken from EL0, then the call to die() can end up killing the current user task and calling schedule() instead of falling through to panic(). Remove the die() call altogether, since we really want to bring down the machine in this "impossible" case. Signed-off-by: Hari Vyas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 539b470f9526..ce29973b9bfe 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -606,7 +606,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_daif_mask(); panic("bad mode"); } From 74e248286e1d04b0d9bfdd002450ef0211f6f29f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Sun, 16 Sep 2018 23:17:23 +0100 Subject: [PATCH 29/78] arm64: sysreg: Clean up instructions for modifying PSTATE fields Instructions for modifying the PSTATE fields which were not supported in the older toolchains (e.g, PAN, UAO) are generated using macros. We have so far used the normal sys_reg() helper for defining the PSTATE fields. While this works fine, it is really difficult to correlate the code with the Arm ARM definition. As per Arm ARM, the PSTATE fields are defined only using Op1, Op2 fields, with fixed values for Op0, CRn. Also the CRm field has been reserved for the Immediate value for the instruction. So using the sys_reg() looks quite confusing. This patch cleans up the instruction helpers by bringing them in line with the Arm ARM definitions to make it easier to correlate code with the document. No functional changes. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 28 +++++++++++++++++++--------- arch/arm64/kernel/cpufeature.c | 6 +++--- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 3091ae5975a3..7e9ab1fa090c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -84,16 +84,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) -#define REG_PSTATE_SSBS_IMM sys_reg(0, 3, 4, 0, 1) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_SSBS(x) __emit_inst(0xd5000000 | REG_PSTATE_SSBS_IMM | \ - (!!x)<<8 | 0x1f) +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 35796ca1db50..f15e2fb97011 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1045,7 +1045,7 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) if (user_mode(regs)) return 1; - if (instr & BIT(CRm_shift)) + if (instr & BIT(PSTATE_Imm_shift)) regs->pstate |= PSR_SSBS_BIT; else regs->pstate &= ~PSR_SSBS_BIT; @@ -1055,8 +1055,8 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) } static struct undef_hook ssbs_emulation_hook = { - .instr_mask = ~(1U << CRm_shift), - .instr_val = 0xd500001f | REG_PSTATE_SSBS_IMM, + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, .fn = ssbs_emulation_handler, }; From 5ffdfaedfa0aba3f5db0fbb8ed4f3192be2b39b8 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 31 Jul 2018 14:08:56 +0100 Subject: [PATCH 30/78] arm64: mm: Support Common Not Private translations Common Not Private (CNP) is a feature of ARMv8.2 extension which allows translation table entries to be shared between different PEs in the same inner shareable domain, so the hardware can use this fact to optimise the caching of such entries in the TLB. CNP occupies one bit in TTBRx_ELy and VTTBR_EL2, which advertises to the hardware that the translation table entries pointed to by this TTBR are the same as every PE in the same inner shareable domain for which the equivalent TTBR also has CNP bit set. In case CNP bit is set but TTBR does not point at the same translation table entries for a given ASID and VMID, then the system is mis-configured, so the results of translations are UNPREDICTABLE. For kernel we postpone setting CNP till all cpus are up and rely on cpufeature framework to 1) patch the code which is sensitive to CNP and 2) update TTBR1_EL1 with CNP bit set. TTBR1_EL1 can be reprogrammed as result of hibernation or cpuidle (via __enable_mmu). For these two cases we restore CnP bit via __cpu_suspend_exit(). There are a few cases we need to care of changes in TTBR0_EL1: - a switch to idmap - software emulated PAN we rule out latter via Kconfig options and for the former we make sure that CNP is set for non-zero ASIDs only. Reviewed-by: James Morse Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Signed-off-by: Vladimir Murzin [catalin.marinas@arm.com: default y for CONFIG_ARM64_CNP] Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 14 +++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/cpufeature.h | 6 +++++ arch/arm64/include/asm/mmu_context.h | 17 +++++++++++-- arch/arm64/include/asm/pgtable-hwdef.h | 2 ++ arch/arm64/kernel/cpufeature.c | 34 ++++++++++++++++++++++++++ arch/arm64/kernel/suspend.c | 4 +++ arch/arm64/mm/context.c | 3 +++ arch/arm64/mm/proc.S | 11 ++++++--- 9 files changed, 88 insertions(+), 6 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 58eb02796b16..fabac617d605 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1134,6 +1134,20 @@ config ARM64_RAS_EXTN and access the new registers if the system supports the extension. Platform RAS features may additionally depend on firmware support. +config ARM64_CNP + bool "Enable support for Common Not Private (CNP) translations" + default y + depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN + help + Common Not Private (CNP) allows translation table entries to + be shared between different PEs in the same inner shareable + domain, so the hardware can use this fact to optimise the + caching of such entries in the TLB. + + Selecting this option allows the CNP feature to be detected + at runtime, and does not affect PEs that do not implement + this feature. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 38eec9cf30f2..c51d7e868f3f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -53,7 +53,8 @@ #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 #define ARM64_SSBS 34 +#define ARM64_HAS_CNP 35 -#define ARM64_NCAPS 35 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9079715794af..bb9fbf6f910a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -508,6 +508,12 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static inline bool system_supports_cnp(void) +{ + return IS_ENABLED(CONFIG_ARM64_CNP) && + cpus_have_const_cap(ARM64_HAS_CNP); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39ec0b8a689e..1e58bf58c22b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -147,12 +147,25 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) extern ttbr_replace_func idmap_cpu_replace_ttbr1; ttbr_replace_func *replace_phys; - phys_addr_t pgd_phys = virt_to_phys(pgdp); + /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + + if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { + /* + * cpu_replace_ttbr1() is used when there's a boot CPU + * up (i.e. cpufeature framework is not up yet) and + * latter only when we enable CNP via cpufeature's + * enable() callback. + * Also we rely on the cpu_hwcap bit being set before + * calling the enable() function. + */ + ttbr1 |= TTBR_CNP_BIT; + } replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); cpu_install_idmap(); - replace_phys(pgd_phys); + replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fd208eac9f2a..1d7d8da2ef9b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -211,6 +211,8 @@ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define TTBR_CNP_BIT (UL(1) << 0) + /* * TCR flags. */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f15e2fb97011..237f8822a391 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,7 @@ EXPORT_SYMBOL(cpu_hwcap_keys); static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); /* * NOTE: Any changes to the visibility of features should be kept in @@ -863,6 +865,20 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); } +static bool __maybe_unused +has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* + * Kdump isn't guaranteed to power-off all secondary CPUs, CNP + * may share TLB entries with a CPU stuck in the crashed + * kernel. + */ + if (is_kdump_kernel()) + return false; + + return has_cpuid_feature(entry, scope); +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ @@ -1311,6 +1327,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, .cpu_enable = cpu_enable_ssbs, }, +#endif +#ifdef CONFIG_ARM64_CNP + { + .desc = "Common not Private translations", + .capability = ARM64_HAS_CNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_useable_cnp, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .min_field_value = 1, + .cpu_enable = cpu_enable_cnp, + }, #endif {}, }; @@ -1749,6 +1778,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } +static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) +{ + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 70c283368b64..9405d1b7f4b0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); + /* Restore CnP bit in TTBR1_EL1 */ + if (system_supports_cnp()) + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index c127f94da8e2..a65af49e12e7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -196,6 +196,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) unsigned long flags; u64 asid, old_active_asid; + if (system_supports_cnp()) + cpu_set_reserved_ttbr0(); + asid = atomic64_read(&mm->context.id); /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 03646e6a2ef4..2c75b0b903ae 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -160,6 +160,12 @@ ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 + +alternative_if ARM64_HAS_CNP + cbz x1, 1f // skip CNP for reserved ASID + orr x3, x3, #TTBR_CNP_BIT +1: +alternative_else_nop_endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif @@ -184,7 +190,7 @@ ENDPROC(cpu_do_switch_mm) .endm /* - * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1) * * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. @@ -194,8 +200,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - phys_to_ttbr x3, x0 - msr ttbr1_el1, x3 + msr ttbr1_el1, x0 isb restore_daif x2 From ab510027dc4dbd1eeb611a34b0cda8b21fcde492 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 31 Jul 2018 14:08:57 +0100 Subject: [PATCH 31/78] arm64: KVM: Enable Common Not Private translations We rely on cpufeature framework to detect and enable CNP so for KVM we need to patch hyp to set CNP bit just before TTBR0_EL2 gets written. For the guest we encode CNP bit while building vttbr, so we don't need to bother with that in a world switch. Reviewed-by: James Morse Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Vladimir Murzin Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/include/asm/kvm_mmu.h | 5 +++++ arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/include/asm/kvm_mmu.h | 5 +++++ arch/arm64/kvm/hyp-init.S | 3 +++ virt/kvm/arm/arm.c | 4 ++-- 6 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 3ab8b3781bfe..2d43dca29c72 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,6 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif +#define VTTBR_CNP_BIT _AC(1, UL) #define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 265ea9cf7df7..847f01fa429d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -355,6 +355,11 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return false; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index aa45df752a16..b476bc46f0ab 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -175,6 +175,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) +#define VTTBR_CNP_BIT (UL(1)) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d6fff7de5539..64337afbf124 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -517,5 +517,10 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return system_supports_cnp(); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ea9225160786..4576b86a5579 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -65,6 +65,9 @@ __do_hyp_init: b.lo __kvm_handle_stub_hvc phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c92053bc3f96..150c8a69cdaf 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -496,7 +496,7 @@ static bool need_new_vmid_gen(struct kvm *kvm) static void update_vttbr(struct kvm *kvm) { phys_addr_t pgd_phys; - u64 vmid; + u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; bool new_gen; read_lock(&kvm_vmid_lock); @@ -546,7 +546,7 @@ static void update_vttbr(struct kvm *kvm) pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; + kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; write_unlock(&kvm_vmid_lock); } From 880f7cc47265e7b195781dfa9a0cd62ef78304e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 19 Sep 2018 11:41:21 +0100 Subject: [PATCH 32/78] arm64: cpu_errata: Remove ARM64_MISMATCHED_CACHE_LINE_SIZE There's no need to treat mismatched cache-line sizes reported by CTR_EL0 differently to any other mismatched fields that we treat as "STRICT" in the cpufeature code. In both cases we need to trap and emulate EL0 accesses to the register, so drop ARM64_MISMATCHED_CACHE_LINE_SIZE and rely on ARM64_MISMATCHED_CACHE_TYPE instead. Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon [catalin.marinas@arm.com: move ARM64_HAS_CNP in the empty cpucaps.h slot] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 7 +++---- arch/arm64/include/asm/cpucaps.h | 5 ++--- arch/arm64/kernel/cpu_errata.c | 15 ++------------- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bcc98dbba56..6142402c2eb4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -286,12 +286,11 @@ alternative_endif ldr \rd, [\rn, #MM_CONTEXT_ID] .endm /* - * read_ctr - read CTR_EL0. If the system has mismatched - * cache line sizes, provide the system wide safe value - * from arm64_ftr_reg_ctrel0.sys_val + * read_ctr - read CTR_EL0. If the system has mismatched register fields, + * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg -alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE +alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index c51d7e868f3f..6eb1b3fd0493 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -33,7 +33,7 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HARDEN_EL2_VECTORS 14 -#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 +#define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 @@ -53,8 +53,7 @@ #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 #define ARM64_SSBS 34 -#define ARM64_HAS_CNP 35 -#define ARM64_NCAPS 36 +#define ARM64_NCAPS 35 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 8900cb0615f8..20be4c578e0a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,11 +68,7 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = CTR_CACHE_MINLINE_MASK; - - /* Skip matching the min line sizes for cache type check */ - if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) - mask ^= arm64_ftr_reg_ctrel0.strict_mask; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask;; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); return (read_cpuid_cachetype() & mask) != @@ -644,14 +640,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Mismatched cache line size", - .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .cpu_enable = cpu_enable_trap_ctr_access, - }, - { - .desc = "Mismatched cache type", + .desc = "Mismatched cache type (CTR_EL0)", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, From 1c8391412d7794e0b38393ed98fef9a974401f05 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 20 Sep 2018 09:36:19 +0530 Subject: [PATCH 33/78] arm64/cpufeatures: Introduce ESR_ELx_SYS64_ISS_RT() Extracting target register from ESR.ISS encoding has already been required at multiple instances. Just make it a macro definition and replace all the existing use cases. Reviewed-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 2 ++ arch/arm64/include/asm/kvm_emulate.h | 2 +- arch/arm64/kernel/traps.c | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ce70c3ffb993..cc2d9e7bafe6 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -187,6 +187,8 @@ #define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) /* * User space cache operations have the following sysreg encoding * in System instructions. diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6106a85ae0be..21247870def7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -335,7 +335,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + return ESR_ELx_SYS64_ISS_RT(esr); } static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index ce29973b9bfe..abfb304e1025 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -438,7 +438,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; @@ -473,7 +473,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); pt_regs_write_reg(regs, rt, val); @@ -483,7 +483,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -491,7 +491,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_timer_get_rate()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); From 520ad98871a072471d2583a9386b9d7243fa584d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 20 Sep 2018 09:36:20 +0530 Subject: [PATCH 34/78] arm64/cpufeatures: Factorize emulate_mrs() MRS emulation gets triggered with exception class (0x00 or 0x18) eventually calling the function emulate_mrs() which fetches the user space instruction and analyses it's encodings (OP0, OP1, OP2, CRN, CRM, RT). The kernel tries to emulate the given instruction looking into the encoding details. Going forward these encodings can also be parsed from ESR_ELx.ISS fields without requiring to fetch/decode faulting userspace instruction which can improve performance. This factorizes emulate_mrs() function in a way that it can be called directly with MRS encodings (OP0, OP1, OP2, CRN, CRM) for any given target register which can then be used directly from 0x18 exception class. Reviewed-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index bb9fbf6f910a..6db48d90ad63 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -536,6 +536,7 @@ void arm64_set_ssbd_mitigation(bool state); static inline void arm64_set_ssbd_mitigation(bool state) {} #endif +extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 237f8822a391..00e7c313f088 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1844,25 +1844,30 @@ static int emulate_sys_reg(u32 id, u64 *valp) return 0; } -static int emulate_mrs(struct pt_regs *regs, u32 insn) +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; - u32 sys_reg, dst; u64 val; + rc = emulate_sys_reg(sys_reg, &val); + if (!rc) { + pt_regs_write_reg(regs, rt, val); + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } + return rc; +} + +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + u32 sys_reg, rt; + /* * sys_reg values are defined as used in mrs/msr instruction. * shift the imm value to get the encoding. */ sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; - rc = emulate_sys_reg(sys_reg, &val); - if (!rc) { - dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - pt_regs_write_reg(regs, dst, val); - arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); - } - - return rc; + rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + return do_emulate_mrs(regs, sys_reg, rt); } static struct undef_hook mrs_hook = { From 21f84796177443695680180a8493a9e20d254d5e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 20 Sep 2018 09:36:21 +0530 Subject: [PATCH 35/78] arm64/cpufeatures: Emulate MRS instructions by parsing ESR_ELx.ISS Armv8.4-A extension enables MRS instruction encodings inside ESR_ELx.ISS during exception class ESR_ELx_EC_SYS64 (0x18). This encoding can be used to emulate MRS instructions which can avoid fetch/decode from user space thus improving performance. This adds a new sys64_hook structure element with applicable ESR mask/value pair for MRS instructions on various system registers but constrained by sysreg encodings which is currently allowed to be emulated. Reviewed-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 12 ++++++++++++ arch/arm64/kernel/traps.c | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index cc2d9e7bafe6..37e84f277ee2 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -208,6 +208,18 @@ #define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) #define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index abfb304e1025..21689c6a985f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -497,6 +497,17 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } +static void mrs_handler(unsigned int esr, struct pt_regs *regs) +{ + u32 sysreg, rt; + + rt = ESR_ELx_SYS64_ISS_RT(esr); + sysreg = esr_sys64_to_sysreg(esr); + + if (do_emulate_mrs(regs, sysreg, rt) != 0) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -527,6 +538,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, .handler = cntfrq_read_handler, }, + { + /* Trap read access to CPUID registers */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, + .handler = mrs_handler, + }, {}, }; From 8a695a5873339c2e7c746ee51e3774fedd07d0a9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 31 Aug 2018 16:19:43 +0100 Subject: [PATCH 36/78] arm64: Kconfig: Remove ARCH_HAS_HOLES_MEMORYMODEL include/linux/mmzone.h describes ARCH_HAS_HOLES_MEMORYMODEL as relevant when parts the memmap have been free()d. This would happen on systems where memory is smaller than a sparsemem-section, and the extra struct pages are expensive. pfn_valid() on these systems returns true for the whole sparsemem-section, so an extra memmap_valid_within() check is needed. On arm64 we have nomap memory, so always provide pfn_valid() to test for nomap pages. This means ARCH_HAS_HOLES_MEMORYMODEL's extra checks are already rolled up into pfn_valid(). Remove it. Acked-by: Will Deacon Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 5 +---- arch/arm64/include/asm/page.h | 2 -- arch/arm64/mm/init.c | 2 -- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fabac617d605..da5e6f085561 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -771,9 +771,6 @@ source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_HAS_HOLES_MEMORYMODEL - def_bool y if SPARSEMEM - config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE @@ -788,7 +785,7 @@ config ARCH_FLATMEM_ENABLE def_bool !NUMA config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HW_PERF_EVENTS def_bool y diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 60d02c81a3a2..c88a3cb117a1 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,9 +37,7 @@ extern void clear_page(void *to); typedef struct page *pgtable_t; -#ifdef CONFIG_HAVE_ARCH_PFN_VALID extern int pfn_valid(unsigned long); -#endif #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 787e27964ab9..3cf87341859f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -284,7 +284,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { phys_addr_t addr = pfn << PAGE_SHIFT; @@ -294,7 +293,6 @@ int pfn_valid(unsigned long pfn) return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); -#endif #ifndef CONFIG_SPARSEMEM static void __init arm64_memory_present(void) From 0b8af74549c2f985bfc3d2a983961b2a66882890 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 13 Sep 2018 12:56:40 +0100 Subject: [PATCH 37/78] arm64: Remove unused VGA console support Support for VGA_CONSOLE is not allowable due to commit ee23794b8668 ("video: vgacon: Don't build on arm64"), thus remove the associated unused code. Whilst PCI on arm64 would support VGA a valid screen_info structure is missing. Acked-by: Will Deacon Signed-off-by: Andrew Murray Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..95670a1eafb0 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -351,11 +351,7 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; -#endif #endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" From 2a6c7c367de82951c98a290a21156770f6f82c84 Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Wed, 19 Sep 2018 12:27:50 -0700 Subject: [PATCH 38/78] arm64: lse: remove -fcall-used-x0 flag x0 is not callee-saved in the PCS. So there is no need to specify -fcall-used-x0. Clang doesn't currently support -fcall-used flags. This patch will help building the kernel with clang. Tested-by: Nick Desaulniers Acked-by: Will Deacon Signed-off-by: Tri Vo Signed-off-by: Catalin Marinas --- arch/arm64/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index f28f91fd96a2..69ff9887f724 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ From 693d5639b44a8f3787444902d3600edc7e0105a2 Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 14:51:13 +0100 Subject: [PATCH 39/78] arm64/mm: Pass ttbr1 as a parameter to __enable_mmu() In subsequent patches we'll use a transient pgd during the primary cpu's boot process. To make this work while allowing secondary cpus to use the swapper_pg_dir, we need to pass the relevant TTBR1 pgd as a parameter to __enable_mmu(). This patch updates __enable__mmu() to take this as a parameter, updating callsites to pass swapper_pg_dir for now. There should be no functional change as a result of this patch. Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: simplify assembly, clarify commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 20 +++++++++++--------- arch/arm64/kernel/sleep.S | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b0853069702f..7983ddf0c0e9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -706,6 +706,7 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 @@ -748,6 +749,7 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. + * x1 = TTBR1_EL1 value * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -756,17 +758,16 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x1, ID_AA64MMFR0_EL1 - ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + mrs x2, ID_AA64MMFR0_EL1 + ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - update_early_cpu_boot_status 0, x1, x2 - adrp x1, idmap_pg_dir - adrp x2, swapper_pg_dir - phys_to_ttbr x3, x1 - phys_to_ttbr x4, x2 - msr ttbr0_el1, x3 // load TTBR0 - msr ttbr1_el1, x4 // load TTBR1 + update_early_cpu_boot_status 0, x2, x3 + adrp x2, idmap_pg_dir + phys_to_ttbr x1, x1 + phys_to_ttbr x2, x2 + msr ttbr0_el1, x2 // load TTBR0 + msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -823,6 +824,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + adrp x1, swapper_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index bebec8ef9372..3e53ffa07994 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,6 +101,7 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 From 2b5548b68199c17c1466d5798cf2c9cd806bdaa9 Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 15:47:49 +0100 Subject: [PATCH 40/78] arm64/mm: Separate boot-time page tables from swapper_pg_dir Since the address of swapper_pg_dir is fixed for a given kernel image, it is an attractive target for manipulation via an arbitrary write. To mitigate this we'd like to make it read-only by moving it into the rodata section. We require that swapper_pg_dir is at a fixed offset from tramp_pg_dir and reserved_ttbr0, so these will also need to move into rodata. However, swapper_pg_dir is allocated along with some transient page tables used for boot which we do not want to move into rodata. As a step towards this, this patch separates the boot-time page tables into a new init_pg_dir, and reduces swapper_pg_dir to the single page it needs to be. This allows us to retain the relationship between swapper_pg_dir, tramp_pg_dir, and swapper_pg_dir, while cleanly separating these from the boot-time page tables. The init_pg_dir holds all of the pgd/pud/pmd/pte levels needed during boot, and all of these levels will be freed when we switch to the swapper_pg_dir, which is initialized by the existing code in paging_init(). Since we start off on the init_pg_dir, we no longer need to allocate a transient page table in paging_init() in order to ensure that swapper_pg_dir isn't live while we initialize it. There should be no functional change as a result of this patch. Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: place init_pg_dir after BSS, fold mm changes, commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kernel-pgtable.h | 2 +- arch/arm64/include/asm/mmu.h | 3 +++ arch/arm64/include/asm/pgtable.h | 3 ++- arch/arm64/kernel/head.S | 10 ++++----- arch/arm64/kernel/vmlinux.lds.S | 6 ++++- arch/arm64/mm/mmu.c | 30 +++++-------------------- 6 files changed, 21 insertions(+), 33 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a780f6714b44..850e2122d53f 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -97,7 +97,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index dd320df0d026..7689c7aa1d77 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -95,5 +95,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys); extern void mark_linear_text_alias_ro(void); +#define INIT_MM_CONTEXT(name) \ + .pgd = init_pg_dir, + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2ab2031b778c..92bcc50b0cc2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -718,8 +718,9 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t swapper_pg_end[]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7983ddf0c0e9..7bf52ee0a805 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -291,7 +291,7 @@ __create_page_tables: * dirty cache lines being evicted. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area @@ -299,7 +299,7 @@ __create_page_tables: * Clear the idmap and swapper page tables. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -373,7 +373,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - adrp x0, swapper_pg_dir + adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD @@ -390,7 +390,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 dmb sy bl __inval_dcache_area @@ -824,7 +824,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif - adrp x1, swapper_pg_dir + adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 605d1b60469c..ac4c10351b17 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -229,9 +229,13 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif swapper_pg_dir = .; - . += SWAPPER_DIR_SIZE; + . += PAGE_SIZE; swapper_pg_end = .; + init_pg_dir = .; + . += INIT_DIR_SIZE; + init_pg_end = .; + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 65f86271f02b..8a5491053717 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -629,34 +629,14 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgdp = pgd_set_fixmap(pgd_phys); + map_kernel(swapper_pg_dir); + map_mem(swapper_pg_dir); - map_kernel(pgdp); - map_mem(pgdp); - - /* - * We want to reuse the original swapper_pg_dir so we don't have to - * communicate the new address to non-coherent secondaries in - * secondary_entry, and so cpu_switch_mm can generate the address with - * adrp+add rather than a load from some global variable. - * - * To do this we need to go via a temporary pgd. - */ - cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgdp, PGD_SIZE); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + init_mm.pgd = swapper_pg_dir; - pgd_clear_fixmap(); - memblock_free(pgd_phys, PAGE_SIZE); - - /* - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd - * allocated with it. - */ - memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, - __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) - - PAGE_SIZE); + memblock_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); } /* From 2330b7ca78350efcb1a3b919ea4b3e0e4c57d99f Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 17:15:02 +0100 Subject: [PATCH 41/78] arm64/mm: use fixmap to modify swapper_pg_dir Once swapper_pg_dir is in the rodata section, it will not be possible to modify it directly, but we will need to modify it in some cases. To enable this, we can use the fixmap when deliberately modifying swapper_pg_dir. As the pgd is only transiently mapped, this provides some resilience against illicit modification of the pgd, e.g. for Kernel Space Mirror Attack (KSMA). Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: simplify ifdeffery, commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 35 ++++++++++++++++++++++++++------ arch/arm64/mm/mmu.c | 26 ++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 92bcc50b0cc2..b58f764babf8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -429,8 +429,27 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PUD_TYPE_TABLE) #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; + +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline bool in_swapper_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)swapper_pg_dir & PAGE_MASK); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { + if (__is_defined(__PAGETABLE_PMD_FOLDED) && in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } + WRITE_ONCE(*pmdp, pmd); if (pmd_valid(pmd)) @@ -484,6 +503,11 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) static inline void set_pud(pud_t *pudp, pud_t pud) { + if (__is_defined(__PAGETABLE_PUD_FOLDED) && in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } + WRITE_ONCE(*pudp, pud); if (pud_valid(pud)) @@ -538,6 +562,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, pgd); + return; + } + WRITE_ONCE(*pgdp, pgd); dsb(ishst); } @@ -718,12 +747,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif -extern pgd_t init_pg_dir[PTRS_PER_PGD]; -extern pgd_t init_pg_end[]; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8a5491053717..6f0e2edcc114 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static DEFINE_SPINLOCK(swapper_pgdir_lock); + +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap(__pa(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap(); + spin_unlock(&swapper_pgdir_lock); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -629,8 +647,12 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - map_kernel(swapper_pg_dir); - map_mem(swapper_pg_dir); + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); + + map_kernel(pgdp); + map_mem(pgdp); + + pgd_clear_fixmap(); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); init_mm.pgd = swapper_pg_dir; From 8eb7e28d4c642c310f25c18f80a44dd4b01c694e Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 17:56:18 +0100 Subject: [PATCH 42/78] arm64/mm: move runtime pgds to rodata Now that deliberate writes to swapper_pg_dir are made via the fixmap, we can defend against errant writes by moving it into the rodata section. Since tramp_pg_dir and reserved_ttbr0 must be at a fixed offset from swapper_pg_dir, and are not modified at runtime, these are also moved into the rodata section. Likewise, idmap_pg_dir is not modified at runtime, and is moved into rodata. Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: simplify linker script, commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 12 +++++++----- arch/arm64/kernel/vmlinux.lds.S | 33 +++++++++++++++++---------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7bf52ee0a805..4471f570a295 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -287,18 +287,20 @@ __create_page_tables: mov x28, lr /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. + * Invalidate the init page tables to avoid potential dirty cache lines + * being evicted. Other page tables are allocated in rodata as part of + * the kernel image, and thus are clean to the PoC per the boot + * protocol. */ - adrp x0, idmap_pg_dir + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area /* - * Clear the idmap and swapper page tables. + * Clear the init page tables. */ - adrp x0, idmap_pg_dir + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ac4c10351b17..ab29c06a7d4b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -138,6 +138,23 @@ SECTIONS EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + swapper_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -216,22 +233,6 @@ SECTIONS BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += PAGE_SIZE; - swapper_pg_end = .; - init_pg_dir = .; . += INIT_DIR_SIZE; init_pg_end = .; From 03630b3b76ccc2999f77e11c4ba60b0a549d023a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 27 Aug 2018 20:52:39 -0500 Subject: [PATCH 43/78] perf: Convert to using %pOFn instead of device_node.name In preparation to remove the node name pointer from struct device_node, convert printf users to use the %pOFn format specifier. Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Acked-by: Will Deacon Signed-off-by: Rob Herring Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu_platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 96075cecb0ae..933bd8410fc2 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -77,14 +77,14 @@ static int pmu_parse_irq_affinity(struct device_node *node, int i) dn = of_parse_phandle(node, "interrupt-affinity", i); if (!dn) { - pr_warn("failed to parse interrupt-affinity[%d] for %s\n", - i, node->name); + pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n", + i, node); return -EINVAL; } cpu = of_cpu_node_to_id(dn); if (cpu < 0) { - pr_warn("failed to find logical CPU for %s\n", dn->name); + pr_warn("failed to find logical CPU for %pOFn\n", dn); cpu = nr_cpu_ids; } From 22839869f21ab3850fbbac9b425ccc4c0023926f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Sep 2018 15:34:42 +0100 Subject: [PATCH 44/78] signal: Introduce COMPAT_SIGMINSTKSZ for use in compat_sys_sigaltstack The sigaltstack(2) system call fails with -ENOMEM if the new alternative signal stack is found to be smaller than SIGMINSTKSZ. On architectures such as arm64, where the native value for SIGMINSTKSZ is larger than the compat value, this can result in an unexpected error being reported to a compat task. See, for example: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=904385 This patch fixes the problem by extending do_sigaltstack to take the minimum signal stack size as an additional parameter, allowing the native and compat system call entry code to pass in their respective values. COMPAT_SIGMINSTKSZ is just defined as SIGMINSTKSZ if it has not been defined by the architecture. Cc: Arnd Bergmann Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Andrew Morton Cc: Al Viro Cc: Oleg Nesterov Reported-by: Steve McIntyre Tested-by: Steve McIntyre <93sam@debian.org> Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- include/linux/compat.h | 3 +++ kernel/signal.c | 14 +++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 1a3c4f37e908..de0c13bdcd2c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -103,6 +103,9 @@ typedef struct compat_sigaltstack { compat_size_t ss_size; } compat_stack_t; #endif +#ifndef COMPAT_MINSIGSTKSZ +#define COMPAT_MINSIGSTKSZ MINSIGSTKSZ +#endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) diff --git a/kernel/signal.c b/kernel/signal.c index 5843c541fda9..e4aad0e90882 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3460,7 +3460,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) } static int -do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) +do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, + size_t min_ss_size) { struct task_struct *t = current; @@ -3490,7 +3491,7 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) ss_size = 0; ss_sp = NULL; } else { - if (unlikely(ss_size < MINSIGSTKSZ)) + if (unlikely(ss_size < min_ss_size)) return -ENOMEM; } @@ -3508,7 +3509,8 @@ SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) if (uss && copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL, - current_user_stack_pointer()); + current_user_stack_pointer(), + MINSIGSTKSZ); if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t))) err = -EFAULT; return err; @@ -3519,7 +3521,8 @@ int restore_altstack(const stack_t __user *uss) stack_t new; if (copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; - (void)do_sigaltstack(&new, NULL, current_user_stack_pointer()); + (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(), + MINSIGSTKSZ); /* squash all but EFAULT for now */ return 0; } @@ -3553,7 +3556,8 @@ static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr, uss.ss_size = uss32.ss_size; } ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, - compat_user_stack_pointer()); + compat_user_stack_pointer(), + COMPAT_MINSIGSTKSZ); if (ret >= 0 && uoss_ptr) { compat_stack_t old; memset(&old, 0, sizeof(old)); From 24951465cbd279f60b1fdc2421b3694405bcff42 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Sep 2018 15:34:43 +0100 Subject: [PATCH 45/78] arm64: compat: Provide definition for COMPAT_SIGMINSTKSZ arch/arm/ defines a SIGMINSTKSZ of 2k, so we should use the same value for compat tasks. Cc: Arnd Bergmann Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Andrew Morton Cc: Al Viro Cc: Oleg Nesterov Reviewed-by: Dave Martin Reported-by: Steve McIntyre Tested-by: Steve McIntyre <93sam@debian.org> Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compat.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 1a037b94eba1..cee28a05ee98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) } #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) +#define COMPAT_MINSIGSTKSZ 2048 static inline void __user *arch_compat_alloc_user_space(long len) { From 9376b1e7b62523c84fde34908af01a85b8d0468a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Sep 2018 15:07:37 +0200 Subject: [PATCH 46/78] arm64: remove unused asm/compiler.h header file arm64 does not define CONFIG_HAVE_ARCH_COMPILER_H, nor does it keep anything useful in its copy of asm/compiler.h, so let's remove it before anybody starts using it. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compiler.h | 30 ------------------------------ arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/include/asm/uaccess.h | 1 - arch/arm64/kernel/psci.c | 1 - 4 files changed, 33 deletions(-) delete mode 100644 arch/arm64/include/asm/compiler.h diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h deleted file mode 100644 index ee35fd0f2236..000000000000 --- a/arch/arm64/include/asm/compiler.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Based on arch/arm/include/asm/compiler.h - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_COMPILER_H -#define __ASM_COMPILER_H - -/* - * This is used to ensure the compiler did actually allocate the register we - * asked it for some inline assembly sequences. Apparently we can't trust the - * compiler from one version to another so a bit of paranoia won't hurt. This - * string is meant to be concatenated with the inline asm string and will - * cause compilation to stop on mismatch. (for details, see gcc PR 15089) - */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" - -#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7e9ab1fa090c..98e6f44cb021 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,7 +20,6 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include #include /* diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index c4528dd90875..8ac6e34922e7 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -32,7 +32,6 @@ #include #include #include -#include #include #define get_ds() (KERNEL_DS) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..8cdaf25e99cd 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -24,7 +24,6 @@ #include -#include #include #include #include From bd7ac140b82f2bedfc792d8ccf9b2a108c1324f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:28 +0100 Subject: [PATCH 47/78] arm64: Add decoding macros for CP15_32 and CP15_64 traps So far, we don't have anything to help decoding ESR_ELx when dealing with ESR_ELx_EC_CP15_{32,64}. As we're about to handle some of those, let's add some useful macros. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 52 ++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 37e84f277ee2..56d32e5557a5 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -263,6 +263,58 @@ #define ESR_ELx_FP_EXC_TFV (UL(1) << 23) +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + #ifndef __ASSEMBLY__ #include From 70c63cdfd6ee615714c5453cff370032587723c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:29 +0100 Subject: [PATCH 48/78] arm64: compat: Add separate CP15 trapping hook Instead of directly generating an UNDEF when trapping a CP15 access, let's add a new entry point to that effect (which only generates an UNDEF for now). Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 15 +++++++++++++-- arch/arm64/kernel/traps.c | 13 +++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 8556876c9109..f0a0464d4809 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -697,9 +697,9 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap @@ -722,6 +722,17 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked + +el0_cp15: + /* + * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions + */ + enable_daif + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_cp15instr + b ret_to_user #endif el0_da: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 21689c6a985f..0e2665936493 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -547,6 +547,19 @@ static struct sys64_hook sys64_hooks[] = { {}, }; + +#ifdef CONFIG_COMPAT +asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +{ + /* + * New cp15 instructions may previously have been undefined at + * EL0. Fall back to our usual undefined instruction handler + * so that we handle these consistently. + */ + do_undefinstr(regs); +} +#endif + asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { struct sys64_hook *hook; From 1f1c014035a8084a768e7e902c6f5857995b1220 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:30 +0100 Subject: [PATCH 49/78] arm64: compat: Add condition code checks and IT advance Here's a /really nice/ part of the architecture: a CP15 access is allowed to trap even if it fails its condition check, and SW must handle it. This includes decoding the IT state if this happens in am IT block. As a consequence, SW must also deal with advancing the IT state machine. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0e2665936493..95a646c154fe 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -549,8 +549,93 @@ static struct sys64_hook sys64_hooks[] = { #ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +{ + int cond; + + /* Only a T32 instruction can trap without CV being set */ + if (!(esr & ESR_ELx_CV)) { + u32 it; + + it = compat_get_it_state(regs); + if (!it) + return true; + + cond = it >> 4; + } else { + cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + } + + return aarch32_opcode_cond_checks[cond](regs->pstate); +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} + +static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, + unsigned int sz) +{ + advance_itstate(regs); + arm64_skip_faulting_instruction(regs, sz); +} + asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) { + if (!cp15_cond_valid(esr, regs)) { + /* + * There is no T16 variant of a CP access, so we + * always advance PC by 4 bytes. + */ + arm64_compat_skip_faulting_instruction(regs, 4); + return; + } + /* * New cp15 instructions may previously have been undefined at * EL0. Fall back to our usual undefined instruction handler From 2a8905e18c55d5576d7a53da495b4de0cfcbc459 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:31 +0100 Subject: [PATCH 50/78] arm64: compat: Add cp15_32 and cp15_64 handler arrays We're now ready to start handling CP15 access. Let's add (empty) arrays for both 32 and 64bit accessors, and the code that deals with them. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 95a646c154fe..76ffb9f42aa4 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -625,8 +625,18 @@ static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, arm64_skip_faulting_instruction(regs, sz); } +static struct sys64_hook cp15_32_hooks[] = { + {}, +}; + +static struct sys64_hook cp15_64_hooks[] = { + {}, +}; + asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) { + struct sys64_hook *hook, *hook_base; + if (!cp15_cond_valid(esr, regs)) { /* * There is no T16 variant of a CP access, so we @@ -636,6 +646,24 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) return; } + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_CP15_32: + hook_base = cp15_32_hooks; + break; + case ESR_ELx_EC_CP15_64: + hook_base = cp15_64_hooks; + break; + default: + do_undefinstr(regs); + return; + } + + for (hook = hook_base; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + /* * New cp15 instructions may previously have been undefined at * EL0. Fall back to our usual undefined instruction handler From 50de013d22e4e112d7b0778a0e7d032f16c46778 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:32 +0100 Subject: [PATCH 51/78] arm64: compat: Add CNTVCT trap handler Since people seem to make a point in breaking the userspace visible counter, we have no choice but to trap the access. We already do this for 64bit userspace, but this is lacking for compat. Let's provide the required handler. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 3 +++ arch/arm64/kernel/traps.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 56d32e5557a5..5548712ce6e5 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -315,6 +315,9 @@ ESR_ELx_CP15_64_ISS_CRM_MASK | \ ESR_ELx_CP15_64_ISS_DIR_MASK) +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 76ffb9f42aa4..3602b900ff1c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -629,7 +629,23 @@ static struct sys64_hook cp15_32_hooks[] = { {}, }; +static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; + u64 val = arch_counter_get_cntvct(); + + pt_regs_write_reg(regs, rt, lower_32_bits(val)); + pt_regs_write_reg(regs, rt2, upper_32_bits(val)); + arm64_compat_skip_faulting_instruction(regs, 4); +} + static struct sys64_hook cp15_64_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, + .handler = compat_cntvct_read_handler, + }, {}, }; From 32a3e635fb0ecc1b197d54f710e76c6481cf19f0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:33 +0100 Subject: [PATCH 52/78] arm64: compat: Add CNTFRQ trap handler Just like CNTVCT, we need to handle userspace trapping into the kernel if we're decided that the timer wasn't fit for purpose... 64bit userspace is already dealt with, but we're missing the equivalent compat handling. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 3 +++ arch/arm64/kernel/traps.c | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 5548712ce6e5..fb7dfe1b51bb 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -318,6 +318,9 @@ #define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ ESR_ELx_CP15_64_ISS_DIR_READ) +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3602b900ff1c..58134a97928f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -625,7 +625,20 @@ static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, arm64_skip_faulting_instruction(regs, sz); } +static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, reg, arch_timer_get_rate()); + arm64_compat_skip_faulting_instruction(regs, 4); +} + static struct sys64_hook cp15_32_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, + .handler = compat_cntfrq_read_handler, + }, {}, }; From 95b861a4a6d94f64d5242605569218160ebacdbe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:34 +0100 Subject: [PATCH 53/78] arm64: arch_timer: Add workaround for ARM erratum 1188873 When running on Cortex-A76, a timer access from an AArch32 EL0 task may end up with a corrupted value or register. The workaround for this is to trap these accesses at EL1/EL2 and execute them there. This only affects versions r0p0, r1p0 and r2p0 of the CPU. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 12 ++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 8 ++++++++ drivers/clocksource/arm_arch_timer.c | 15 +++++++++++++++ 5 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index da5e6f085561..52985d175e5a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -481,6 +481,18 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. +config ARM64_ERRATUM_1188873 + bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" + default y + help + This option adds work arounds for ARM Cortex-A76 erratum 1188873 + + Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause + register corruption when accessing the timer registers from + AArch32 userspace. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 6eb1b3fd0493..6e2d254c09eb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -53,7 +53,8 @@ #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 #define ARM64_SSBS 34 +#define ARM64_WORKAROUND_1188873 35 -#define ARM64_NCAPS 35 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ea690b3562af..12f93e4d2452 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_A75 0xD0A #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +111,7 @@ #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 20be4c578e0a..cde948991d68 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -696,6 +696,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_ssbd_mitigation, }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + /* Cortex-A76 r0p0 to r2p0 */ + .desc = "ARM erratum 1188873", + .capability = ARM64_WORKAROUND_1188873, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + }, #endif { } diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index d8c7f5750cdb..9a7d4dc00b6e 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -319,6 +319,13 @@ static u64 notrace arm64_858921_read_cntvct_el0(void) } #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 +static u64 notrace arm64_1188873_read_cntvct_el0(void) +{ + return read_sysreg(cntvct_el0); +} +#endif + #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); @@ -408,6 +415,14 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + .match_type = ate_match_local_cap_id, + .id = (void *)ARM64_WORKAROUND_1188873, + .desc = "ARM erratum 1188873", + .read_cntvct_el0 = arm64_1188873_read_cntvct_el0, + }, +#endif }; typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, From 00bbd5d9016d49fa8dddef020a06b94fedca9148 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:52 +0530 Subject: [PATCH 54/78] arm64/mm: Use ESR_ELx_FSC macro while decoding fault exception Just replace hard code value of 63 (0x111111) with an existing macro ESR_ELx_FSC when parsing for the status code during fault exception. Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6342f1793c70..6d9acd910104 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -59,7 +59,7 @@ static const struct fault_info fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { - return fault_info + (esr & 63); + return fault_info + (esr & ESR_ELx_FSC); } #ifdef CONFIG_KPROBES From dbfe3828a6f36be904427ed7f196744cb02d3072 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:53 +0530 Subject: [PATCH 55/78] arm64/mm: Reorganize arguments for is_el1_permission_fault() Most memory abort exception handling related functions have the arguments in the order (addr, esr, regs) except is_el1_permission_fault(). This changes the argument order in this function as (addr, esr, regs) like others. Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6d9acd910104..0e0ea5fe6ab3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -235,9 +235,8 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned int esr, - struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -283,7 +282,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - if (is_el1_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -454,7 +453,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die_kernel_fault("access to user memory with fs=KERNEL_DS", From 359048f91db4be5e8be9c2b95c788c79a6756797 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:54 +0530 Subject: [PATCH 56/78] arm64/mm: Define esr_to_debug_fault_info() fault_info[] and debug_fault_info[] are static arrays defining memory abort exception handling functions looking into ESR fault status code encodings. As esr_to_fault_info() is already available providing fault_info[] array lookup, it really makes sense to have a corresponding debug_fault_info[] array lookup function as well. This just adds an equivalent helper function esr_to_debug_fault_info(). Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 0e0ea5fe6ab3..1aa487a37a0a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -56,12 +56,18 @@ struct fault_info { }; static const struct fault_info fault_info[]; +static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { return fault_info + (esr & ESR_ELx_FSC); } +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +{ + return debug_fault_info + DBG_ESR_EVT(esr); +} + #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) { @@ -830,7 +836,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + const struct fault_info *inf = esr_to_debug_fault_info(esr); int rv; /* From 77cfe950901e5c13aca2df6437a05f39dd9a929b Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:55 +0530 Subject: [PATCH 57/78] arm64/numa: Report correct memblock range for the dummy node The dummy node ID is marked into all memory ranges on the system. So the dummy node really extends the entire memblock.memory. Hence report correct extent information for the dummy node using memblock range helper functions instead of the range [0LLU, PFN_PHYS(max_pfn) - 1)]. Fixes: 1a2db30034 ("arm64, numa: Add NUMA support for arm64 platforms") Acked-by: Punit Agrawal Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 146c04ceaa51..54529b4ed513 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -432,7 +432,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); From 52338088ef0569290b7eae0759c58a3de494e6c0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:56 +0530 Subject: [PATCH 58/78] arm64/numa: Unify common error path in numa_init() At present numa_free_distance() is being called before numa_distance is even initialized with numa_alloc_distance() which is really pointless. Instead lets call numa_free_distance() on the common error path inside numa_init() after numa_alloc_distance() has been successful. Fixes: 1a2db30034 ("arm64, numa: Add NUMA support for arm64 platforms") Acked-by: Punit Agrawal Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/numa.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 54529b4ed513..d7b66fc5e1c5 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -391,7 +391,6 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(numa_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); - numa_free_distance(); ret = numa_alloc_distance(); if (ret < 0) @@ -399,20 +398,24 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) - return ret; + goto out_free_distance; if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); - return -EINVAL; + ret = -EINVAL; + goto out_free_distance; } ret = numa_register_nodes(); if (ret < 0) - return ret; + goto out_free_distance; setup_node_to_cpumask_map(); return 0; +out_free_distance: + numa_free_distance(); + return ret; } /** From 2ba0dacbae94482e80b00dbd368f0a4aeaabbdec Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 9 Aug 2018 22:20:40 +0800 Subject: [PATCH 59/78] arm64/kprobes: remove an extra semicolon in arch_prepare_kprobe There is an extra semicolon in arch_prepare_kprobe, remove it. Signed-off-by: zhong jiang Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index e78c3ef04d95..9b65132e789a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -107,7 +107,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.api.insn) return -ENOMEM; break; - }; + } /* prepare the instruction */ if (p->ainsn.api.insn) From 4bfbe5eee309e8c86e1b1155e82136c8fbffd155 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 1 Oct 2018 15:24:47 +0100 Subject: [PATCH 60/78] arm64: docs: Fix typos in ELF hwcaps Fix some typos in our hwcap documentation, where we refer to the wrong ID register for some of the capabilities. Acked-by: Mark Rutland Signed-off-by: Giacomo Travaglini [will: fix amusing binary constants] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index d6aff2c5e9e2..51ccfab408cc 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -78,11 +78,11 @@ HWCAP_EVTSTRM HWCAP_AES - Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0001. + Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001. HWCAP_PMULL - Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0010. + Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010. HWCAP_SHA1 @@ -153,7 +153,7 @@ HWCAP_ASIMDDP HWCAP_SHA512 - Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0002. + Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010. HWCAP_SVE @@ -173,7 +173,7 @@ HWCAP_USCAT HWCAP_ILRCPC - Functionality implied by ID_AA64ISR1_EL1.LRCPC == 0b0002. + Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010. HWCAP_FLAGM From ee91176120bd584aa10c564e7e9fdcaf397190a1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 1 Oct 2018 15:24:48 +0100 Subject: [PATCH 61/78] arm64: docs: Document SSBS HWCAP We advertise the MRS/MSR instructions for toggling SSBS at EL0 using an HWCAP, so document it along with the others. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index 51ccfab408cc..ea819ae024dd 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -178,3 +178,7 @@ HWCAP_ILRCPC HWCAP_FLAGM Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. + +HWCAP_SSBS + + Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. From c219bc4e920518feb025749bdf9623aa57e94b64 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Oct 2018 12:19:43 +0100 Subject: [PATCH 62/78] arm64: Trap WFI executed in userspace It recently came to light that userspace can execute WFI, and that the arm64 kernel doesn't trap this event. This sounds rather benign, but the kernel should decide when it wants to wait for an interrupt, and not userspace. Let's trap WFI and immediately return after having skipped the instruction. This effectively makes WFI a rather expensive NOP. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 5 +++++ arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/entry.S | 1 + arch/arm64/kernel/traps.c | 11 +++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index fb7dfe1b51bb..676de2ec1762 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -137,6 +137,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -148,6 +150,9 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) /* BRK instruction trap from AArch64 state */ #define ESR_ELx_VAL_BRK64(imm) \ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 98e6f44cb021..0c909c4a932f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -502,12 +502,12 @@ #define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ - SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_ELx_DSSBS | SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f0a0464d4809..039144ecbcb2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -665,6 +665,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 58134a97928f..4066da7f1e5e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -508,6 +508,11 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } +static void wfi_handler(unsigned int esr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -544,6 +549,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, .handler = mrs_handler, }, + { + /* Trap WFI instructions executed in userspace */ + .esr_mask = ESR_ELx_WFx_MASK, + .esr_val = ESR_ELx_WFx_WFI_VAL, + .handler = wfi_handler, + }, {}, }; From 040f340134751d73bd03ee92fabb992946c55b3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 2 Oct 2018 23:11:44 +0200 Subject: [PATCH 63/78] arm64: arch_timer: avoid unused function warning arm64_1188873_read_cntvct_el0() is protected by the correct CONFIG_ARM64_ERRATUM_1188873 #ifdef, but the only reference to it is also inside of an CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND section, and causes a warning if that is disabled: drivers/clocksource/arm_arch_timer.c:323:20: error: 'arm64_1188873_read_cntvct_el0' defined but not used [-Werror=unused-function] Since the erratum requires that we always apply the workaround in the timer driver, select that symbol as we do for SoC specific errata. Fixes: 95b861a4a6d9 ("arm64: arch_timer: Add workaround for ARM erratum 1188873") Acked-by: Marc Zyngier Signed-off-by: Arnd Bergmann Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 52985d175e5a..a8ae30fab508 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -484,6 +484,7 @@ config ARM64_ERRATUM_1024718 config ARM64_ERRATUM_1188873 bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" default y + select ARM_ARCH_TIMER_OOL_WORKAROUND help This option adds work arounds for ARM Cortex-A76 erratum 1188873 From f05692919bdd44ad2a1a1fe85222dec16fec7e80 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 28 Aug 2018 16:51:14 +0100 Subject: [PATCH 64/78] arm64: daifflags: Use irqflags functions for daifflags Some of the work done in daifflags save/restore is already provided by irqflags functions. Daifflags should always be a superset of irqflags (it handles irq status + status of other flags). Modifying behaviour of irqflags should alter the behaviour of daifflags. Use irqflags_save/restore functions for the corresponding daifflags operation. Reviewed-by: James Morse Signed-off-by: Julien Thierry Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/daifflags.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 22e4c83de5a5..8d91f2233135 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -36,11 +36,8 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - asm volatile( - "mrs %0, daif // local_daif_save\n" - : "=r" (flags) - : - : "memory"); + flags = arch_local_save_flags(); + local_daif_mask(); return flags; @@ -60,11 +57,9 @@ static inline void local_daif_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) trace_hardirqs_on(); - asm volatile( - "msr daif, %0 // local_daif_restore" - : - : "r" (flags) - : "memory"); + + arch_local_irq_restore(flags); + if (arch_irqs_disabled_flags(flags)) trace_hardirqs_off(); } From 9a0c032825e073e002ee80cf2577431d7296a7f7 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 28 Aug 2018 16:51:15 +0100 Subject: [PATCH 65/78] arm64: Use daifflag_restore after bp_hardening For EL0 entries requiring bp_hardening, daif status is kept at DAIF_PROCCTX_NOIRQ until after hardening has been done. Then interrupts are enabled through local_irq_enable(). Before using local_irq_* functions, daifflags should be properly restored to a state where IRQs are enabled. Enable IRQs by restoring DAIF_PROCCTX state after bp hardening. Acked-by: James Morse Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1aa487a37a0a..d0e638ef3af6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -776,7 +777,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, if (addr > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); do_mem_abort(addr, esr, regs); } @@ -790,7 +791,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, if (user_mode(regs)) { if (instruction_pointer(regs) > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); } clear_siginfo(&info); From b0506a8bbb42a859f6d25b3ecc4b6da93bae8d5a Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 28 Aug 2018 16:51:17 +0100 Subject: [PATCH 66/78] arm64: xen: Use existing helper to check interrupt status The status of interrupts might depend on more than just pstate. Use interrupts_disabled() instead of raw_irqs_disabled_flags() to take the full context into account. Acked-by: Stefano Stabellini Signed-off-by: Julien Thierry Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/xen/events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h index 4e22b7a8c038..2788e95d0ff0 100644 --- a/arch/arm64/include/asm/xen/events.h +++ b/arch/arm64/include/asm/xen/events.h @@ -14,7 +14,7 @@ enum ipi_vector { static inline int xen_irqs_disabled(struct pt_regs *regs) { - return raw_irqs_disabled_flags((unsigned long) regs->pstate); + return !interrupts_enabled(regs); } #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) From 0293c8ba807c8611ea5503f9511029dd9082025a Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Thu, 4 Oct 2018 17:06:46 +0100 Subject: [PATCH 67/78] arm64: Fix typo in a comment in arch/arm64/mm/kasan_init.c "bellow" -> "below" The recommendation from kegel.com/kerspell is to only fix the howlers. "Bellow" is a synonym of "howl" so this should be appropriate. Signed-off-by: Kyrylo Tkachov Signed-off-by: Catalin Marinas --- arch/arm64/mm/kasan_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 12145874c02b..fccb1a6f8c6f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -192,7 +192,7 @@ void __init kasan_init(void) /* * We are going to perform proper setup of shadow memory. - * At first we should unmap early shadow (clear_pgds() call bellow). + * At first we should unmap early shadow (clear_pgds() call below). * However, instrumented code couldn't execute without shadow memory. * tmp_pg_dir used to keep early shadow mapped until full shadow * setup will be finished. From e9ed821be48600ea3ec7f7b76e478c769729f83c Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Oct 2018 14:49:16 +0100 Subject: [PATCH 68/78] arm64: mm: Use #ifdef for the __PAGETABLE_P?D_FOLDED defines __is_defined(__PAGETABLE_P?D_FOLDED) doesn't quite work as intended as these symbols are internal to asm-generic and aren't defined in the way kconfig expects. This makes them always evaluate to false. Switch to #ifdef. Acked-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b58f764babf8..50b1ef8584c0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -445,10 +445,12 @@ static inline bool in_swapper_pgdir(void *addr) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { - if (__is_defined(__PAGETABLE_PMD_FOLDED) && in_swapper_pgdir(pmdp)) { +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); return; } +#endif /* __PAGETABLE_PMD_FOLDED */ WRITE_ONCE(*pmdp, pmd); @@ -503,10 +505,12 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) static inline void set_pud(pud_t *pudp, pud_t pud) { - if (__is_defined(__PAGETABLE_PUD_FOLDED) && in_swapper_pgdir(pudp)) { +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); return; } +#endif /* __PAGETABLE_PUD_FOLDED */ WRITE_ONCE(*pudp, pud); From c20c650d9116a1e7be1b1d38810cbb2168a2ca98 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Oct 2018 15:21:20 +0100 Subject: [PATCH 69/78] MAINTAINERS: fix bad sdei paths The SDEI header files had an 'arm_' namespace added, but the patterns in the MAINTAINERS files were missed. Oops. Reported-by: Joe Perches Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 225c3af1cccd..ba34d052604a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13480,8 +13480,8 @@ L: linux-arm-kernel@lists.infradead.org S: Maintained F: Documentation/devicetree/bindings/arm/firmware/sdei.txt F: drivers/firmware/arm_sdei.c -F: include/linux/sdei.h -F: include/uapi/linux/sdei.h +F: include/linux/arm_sdei.h +F: include/uapi/linux/arm_sdei.h SOFTWARE RAID (Multiple Disks) SUPPORT M: Shaohua Li From 742fafa50b62cb9f379ba14a13443f52afdc0c5d Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Sat, 6 Oct 2018 16:49:04 +0800 Subject: [PATCH 70/78] arm64: mm: Drop the unused cpu parameter Cpu parameter is never used in flush_context, remove it. Acked-by: Will Deacon Signed-off-by: Shaokun Zhang Signed-off-by: Catalin Marinas --- arch/arm64/mm/context.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index a65af49e12e7..1f0ea2facf24 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -88,7 +88,7 @@ void verify_cpu_asid_bits(void) } } -static void flush_context(unsigned int cpu) +static void flush_context(void) { int i; u64 asid; @@ -142,7 +142,7 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid) return hit; } -static u64 new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm) { static u32 cur_idx = 1; u64 asid = atomic64_read(&mm->context.id); @@ -180,7 +180,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* We're out of ASIDs, so increment the global generation count */ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, &asid_generation); - flush_context(cpu); + flush_context(); /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); @@ -226,7 +226,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { - asid = new_context(mm, cpu); + asid = new_context(mm); atomic64_set(&mm->context.id, asid); } From 3b82a6ea23277f56c4f005872bb73c8ce29779d7 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Oct 2018 16:55:44 +0100 Subject: [PATCH 71/78] Revert "arm64: uaccess: implement unsafe accessors" This reverts commit a1f33941f7e103bcf471eaf8461b212223c642d6. The unsafe accessors allow the PAN enable/disable calls to be made once for a group of accesses. Adding these means we can now have sequences that look like this: | user_access_begin(); | unsafe_put_user(static-value, x, err); | unsafe_put_user(helper-that-sleeps(), x, err); | user_access_end(); Calling schedule() without taking an exception doesn't switch the PSTATE or TTBRs. We can switch out of a uaccess-enabled region, and run other code with uaccess enabled for a different thread. We can also switch from uaccess-disabled code back into this region, meaning the unsafe_put_user()s will fault. For software-PAN, threads that do this will get stuck as handle_mm_fault() will determine the page has already been mapped in, but we fault again as the page tables aren't loaded. To solve this we need code in __switch_to() that save/restores the PAN state. Acked-by: Julien Thierry Acked-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 61 ++++++++------------------------ 1 file changed, 15 insertions(+), 46 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8ac6e34922e7..07c34087bd5e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -276,9 +276,11 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) -#define __get_user_err_unsafe(x, ptr, err) \ +#define __get_user_err(x, ptr, err) \ do { \ unsigned long __gu_val; \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ @@ -299,24 +301,17 @@ do { \ default: \ BUILD_BUG(); \ } \ + uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_err_check(x, ptr, err) \ -do { \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ - __get_user_err_unsafe((x), (ptr), (err)); \ - uaccess_disable_not_uao(); \ -} while (0) - -#define __get_user_err(x, ptr, err, accessor) \ +#define __get_user_check(x, ptr, err) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - accessor((x), __p, (err)); \ + __get_user_err((x), __p, (err)); \ } else { \ (x) = 0; (err) = -EFAULT; \ } \ @@ -324,14 +319,14 @@ do { \ #define __get_user_error(x, ptr, err) \ ({ \ - __get_user_err((x), (ptr), (err), __get_user_err_check); \ + __get_user_check((x), (ptr), (err)); \ (void)0; \ }) #define __get_user(x, ptr) \ ({ \ int __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err, __get_user_err_check); \ + __get_user_check((x), (ptr), __gu_err); \ __gu_err; \ }) @@ -351,9 +346,11 @@ do { \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) -#define __put_user_err_unsafe(x, ptr, err) \ +#define __put_user_err(x, ptr, err) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ @@ -374,24 +371,16 @@ do { \ default: \ BUILD_BUG(); \ } \ -} while (0) - - -#define __put_user_err_check(x, ptr, err) \ -do { \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ - __put_user_err_unsafe((x), (ptr), (err)); \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user_err(x, ptr, err, accessor) \ +#define __put_user_check(x, ptr, err) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - accessor((x), __p, (err)); \ + __put_user_err((x), __p, (err)); \ } else { \ (err) = -EFAULT; \ } \ @@ -399,39 +388,19 @@ do { \ #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_err((x), (ptr), (err), __put_user_err_check); \ + __put_user_check((x), (ptr), (err)); \ (void)0; \ }) #define __put_user(x, ptr) \ ({ \ int __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err, __put_user_err_check); \ + __put_user_check((x), (ptr), __pu_err); \ __pu_err; \ }) #define put_user __put_user - -#define user_access_begin() uaccess_enable_not_uao() -#define user_access_end() uaccess_disable_not_uao() - -#define unsafe_get_user(x, ptr, err) \ -do { \ - int __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err, __get_user_err_unsafe); \ - if (__gu_err != 0) \ - goto err; \ -} while (0) - -#define unsafe_put_user(x, ptr, err) \ -do { \ - int __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err, __put_user_err_unsafe); \ - if (__pu_err != 0) \ - goto err; \ -} while (0) - extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); #define raw_copy_from_user(to, from, n) \ ({ \ From e03a4e5bb7430f9294c12f02c69eb045d010e942 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Oct 2018 17:25:51 +0100 Subject: [PATCH 72/78] arm64: Add silicon-errata.txt entry for ARM erratum 1188873 Document that we actually work around ARM erratum 1188873 Fixes: 95b861a4a6d9 ("arm64: arch_timer: Add workaround for ARM erratum 1188873") Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- Documentation/arm64/silicon-errata.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 3b2f2dd82225..76ccded8b74c 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -56,6 +56,7 @@ stable kernels. | ARM | Cortex-A72 | #853709 | N/A | | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | | ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | +| ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | From 26a6f87ef596e612ab79e456155e195f2fa9b891 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Oct 2018 15:43:22 +0100 Subject: [PATCH 73/78] arm64: mm: Use __pa_symbol() for set_swapper_pgd() commit 2330b7ca78350efcb ("arm64/mm: use fixmap to modify swapper_pg_dir") modifies the swapper_pg_dir via the fixmap as the kernel page tables have been moved to a read-only part of the kernel mapping. Using __pa() to setup the fixmap causes CONFIG_DEBUG_VIRTUAL to fire, as this function is used on the kernel-image swapper address. The in_swapper_pgdir() test before each call of this function means set_swapper_pgd() will only ever be called when pgdp points somewhere in the kernel-image mapping of swapper_pd_dir. Use __pa_symbol(). Reported-by: Geert Uytterhoeven Acked-by: Will Deacon Cc: Jun Yao Tested-by: Geert Uytterhoeven Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6f0e2edcc114..6deb836a102a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -74,7 +74,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) pgd_t *fixmap_pgdp; spin_lock(&swapper_pgdir_lock); - fixmap_pgdp = pgd_set_fixmap(__pa(pgdp)); + fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); WRITE_ONCE(*fixmap_pgdp, pgd); /* * We need dsb(ishst) here to ensure the page-table-walker sees From 0c09d4856462bf71e37ccd26d0deb53aad6cee6a Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 8 Oct 2018 11:03:55 +0100 Subject: [PATCH 74/78] Documentation/arm64: HugeTLB page implementation Arm v8 architecture supports multiple page sizes - 4k, 16k and 64k. Based on the active page size, the Linux port supports corresponding hugepage sizes at PMD and PUD(4k only) levels. In addition, the architecture also supports caching larger sized ranges (composed of multiple entries) at the PTE and PMD level in the TLBs using the contiguous bit. The Linux port makes use of this architectural support to enable additional hugepage sizes. Describe the two different types of hugepages supported by the arm64 kernel and the hugepage sizes enabled by each. Acked-by: Randy Dunlap Acked-by: Will Deacon Cc: Jonathan Corbet Signed-off-by: Punit Agrawal Signed-off-by: Catalin Marinas --- Documentation/arm64/hugetlbpage.txt | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 Documentation/arm64/hugetlbpage.txt diff --git a/Documentation/arm64/hugetlbpage.txt b/Documentation/arm64/hugetlbpage.txt new file mode 100644 index 000000000000..cfae87dc653b --- /dev/null +++ b/Documentation/arm64/hugetlbpage.txt @@ -0,0 +1,38 @@ +HugeTLBpage on ARM64 +==================== + +Hugepage relies on making efficient use of TLBs to improve performance of +address translations. The benefit depends on both - + + - the size of hugepages + - size of entries supported by the TLBs + +The ARM64 port supports two flavours of hugepages. + +1) Block mappings at the pud/pmd level +-------------------------------------- + +These are regular hugepages where a pmd or a pud page table entry points to a +block of memory. Regardless of the supported size of entries in TLB, block +mappings reduce the depth of page table walk needed to translate hugepage +addresses. + +2) Using the Contiguous bit +--------------------------- + +The architecture provides a contiguous bit in the translation table entries +(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a +contiguous set of entries that can be cached in a single TLB entry. + +The contiguous bit is used in Linux to increase the mapping size at the pmd and +pte (last) level. The number of supported contiguous entries varies by page size +and level of the page table. + + +The following hugepage sizes are supported - + + CONT PTE PMD CONT PMD PUD + -------- --- -------- --- + 4K: 64K 2M 32M 1G + 16K: 2M 32M 1G + 64K: 2M 512M 16G From 8ab66cbe63aeaf9e5970fb4aaef1c660fca59321 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 9 Oct 2018 14:47:05 +0100 Subject: [PATCH 75/78] arm64: cpufeature: ctr: Fix cpu capability check for late CPUs The matches() routine for a capability must honor the "scope" passed to it and return the proper results. i.e, when passed with SCOPE_LOCAL_CPU, it should check the status of the capability on the current CPU. This is used by verify_local_cpu_capabilities() on a late secondary CPU to make sure that it's compliant with the established system features. However, ARM64_HAS_CACHE_{IDC/DIC} always checks the system wide registers and this could mean that a late secondary CPU could return "true" (since the CPU hasn't updated the system wide registers yet) and thus lead the system in an inconsistent state, where the system assumes it has IDC/DIC feature, while the new CPU doesn't. Fixes: commit 6ae4b6e0578886eb36 ("arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC") Cc: Philip Elcan Cc: Shanker Donthineni Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 00e7c313f088..ba16bb7762ca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -854,15 +854,29 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_IDC_SHIFT); } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_DIC_SHIFT); } static bool __maybe_unused From 1602df02f33f61fe0de1bbfeba0d1c97c14bff19 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 9 Oct 2018 14:47:06 +0100 Subject: [PATCH 76/78] arm64: cpufeature: Fix handling of CTR_EL0.IDC field CTR_EL0.IDC reports the data cache clean requirements for instruction to data coherence. However, if the field is 0, we need to check the CLIDR_EL1 fields to detect the status of the feature. Currently we don't do this and generate a warning with tainting the kernel, when there is a mismatch in the field among the CPUs. Also the userspace doesn't have a reliable way to check the CLIDR_EL1 register to check the status. This patch fixes the problem by checking the CLIDR_EL1 fields, when (CTR_EL0.IDC == 0) and updates the kernel's copy of the CTR_EL0 for the CPU with the actual status of the feature. This would allow the sanity check infrastructure to do the proper checking of the fields and also allow the CTR_EL0 emulation code to supply the real status of the feature. Now, if a CPU has raw CTR_EL0.IDC == 0 and effective IDC == 1 (with overall system wide IDC == 1), we need to expose the real value to the user. So, we trap CTR_EL0 access on the CPU which reports incorrect CTR_EL0.IDC. Fixes: commit 6ae4b6e057888 ("arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC") Cc: Shanker Donthineni Cc: Philip Elcan Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 40 ++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 27 ++++++++++++++++++++--- arch/arm64/kernel/cpufeature.c | 15 ++++++++++++- arch/arm64/kernel/cpuinfo.c | 10 ++++++++- 4 files changed, 87 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5ee5bca8c24b..13dd42c3ad4e 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -40,6 +40,15 @@ #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define CLIDR_LOUU_SHIFT 27 +#define CLIDR_LOC_SHIFT 24 +#define CLIDR_LOUIS_SHIFT 21 + +#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7) +#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) +#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, @@ -84,6 +93,37 @@ static inline int cache_line_size(void) return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +/* + * Read the effective value of CTR_EL0. + * + * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a), + * section D10.2.33 "CTR_EL0, Cache Type Register" : + * + * CTR_EL0.IDC reports the data cache clean requirements for + * instruction to data coherence. + * + * 0 - dcache clean to PoU is required unless : + * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0) + * 1 - dcache clean to PoU is not required for i-to-d coherence. + * + * This routine provides the CTR_EL0 with the IDC field updated to the + * effective state. + */ +static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) +{ + u32 ctr = read_cpuid_cachetype(); + + if (!(ctr & BIT(CTR_IDC_SHIFT))) { + u64 clidr = read_sysreg(clidr_el1); + + if (CLIDR_LOC(clidr) == 0 || + (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) + ctr |= BIT(CTR_IDC_SHIFT); + } + + return ctr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cde948991d68..3deb01c6ed49 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,11 +68,32 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = arm64_ftr_reg_ctrel0.strict_mask;; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask; + u64 ctr_raw, ctr_real; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & mask) != - (arm64_ftr_reg_ctrel0.sys_val & mask); + + /* + * We want to make sure that all the CPUs in the system expose + * a consistent CTR_EL0 to make sure that applications behaves + * correctly with migration. + * + * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 : + * + * 1) It is safe if the system doesn't support IDC, as CPU anyway + * reports IDC = 0, consistent with the rest. + * + * 2) If the system has IDC, it is still safe as we trap CTR_EL0 + * access on this CPU via the ARM64_HAS_CACHE_IDC capability. + * + * So, we need to make sure either the raw CTR_EL0 or the effective + * CTR_EL0 matches the system's copy to allow a secondary CPU to boot. + */ + ctr_raw = read_cpuid_cachetype() & mask; + ctr_real = read_cpuid_effective_cachetype() & mask; + + return (ctr_real != sys) && (ctr_raw != sys); } static void diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ba16bb7762ca..af50064dea51 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -861,11 +861,23 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, if (scope == SCOPE_SYSTEM) ctr = arm64_ftr_reg_ctrel0.sys_val; else - ctr = read_cpuid_cachetype(); + ctr = read_cpuid_effective_cachetype(); return ctr & BIT(CTR_IDC_SHIFT); } +static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) +{ + /* + * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively + * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses + * to the CTR_EL0 on this CPU and emulate it with the real/safe + * value. + */ + if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +} + static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, int scope) { @@ -1282,6 +1294,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CACHE_IDC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, + .cpu_enable = cpu_emulate_effective_ctr, }, { .desc = "Instruction cache invalidation not required for I/D coherence", diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index dce971f2c167..bcc2831399cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -325,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); - info->reg_ctr = read_cpuid_cachetype(); + /* + * Use the effective value of the CTR_EL0 than the raw value + * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * with the CLIDR_EL1 fields to avoid triggering false warnings + * when there is a mismatch across the CPUs. Keep track of the + * effective value of the CTR_EL0 in our internal records for + * acurate sanity check and feature enablement. + */ + info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); From 4afe8e79da920bdf6698b01bc668fffc6758f37b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 9 Oct 2018 14:47:07 +0100 Subject: [PATCH 77/78] arm64: cpufeature: Trap CTR_EL0 access only where it is necessary When there is a mismatch in the CTR_EL0 field, we trap access to CTR from EL0 on all CPUs to expose the safe value. However, we could skip trapping on a CPU which matches the safe value. Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3deb01c6ed49..9af8df96ec49 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -99,7 +99,12 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + + /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ + if ((read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask)) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); From 4debef5510071032c6d5dace31ca1cc42a120073 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 21 Sep 2018 21:49:19 +0100 Subject: [PATCH 78/78] arm64: KVM: Guests can skip __install_bp_hardening_cb()s HYP work enable_smccc_arch_workaround_1() passes NULL as the hyp_vecs start and end if the HVC conduit is in use, and ARM_SMCCC_ARCH_WORKAROUND_1 is detected. If the guest kernel happened to be built with KVM_INDIRECT_VECTORS, we go on to allocate a slot, memcpy() the empty workaround in and do the appropriate cache maintenance. This works as we always tell memcpy() the range is 0, so it never accesses the NULL src pointer, but we still do the cache maintenance. If hyp_vecs_start is NULL we know we're a guest, just update the fn like the !KVM_INDIRECT_VECTORS version. Reviewed-by: Julien Thierry Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9af8df96ec49..a509e35132d2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -138,6 +138,15 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; + /* + * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs + * start/end if we're a guest. Skip the hyp-vectors work. + */ + if (!hyp_vecs_start) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) {