diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f384bb62d8e..36ea3cf7d85e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -531,23 +531,44 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) */ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { - barrier(); + /* + * Must be called with PTL held; such that our PTL acquire will have + * observed the store from set_tlb_flush_pending(). + */ return mm->tlb_flush_pending; } static inline void set_tlb_flush_pending(struct mm_struct *mm) { mm->tlb_flush_pending = true; - /* - * Guarantee that the tlb_flush_pending store does not leak into the - * critical section updating the page tables + * The only time this value is relevant is when there are indeed pages + * to flush. And we'll only flush pages after changing them, which + * requires the PTL. + * + * So the ordering here is: + * + * mm->tlb_flush_pending = true; + * spin_lock(&ptl); + * ... + * set_pte_at(); + * spin_unlock(&ptl); + * + * spin_lock(&ptl) + * mm_tlb_flush_pending(); + * .... + * spin_unlock(&ptl); + * + * flush_tlb_range(); + * mm->tlb_flush_pending = false; + * + * So the =true store is constrained by the PTL unlock, and the =false + * store is constrained by the TLB invalidate. */ - smp_mb__before_spinlock(); } /* Clearing is done after a TLB flush, which also provides a barrier. */ static inline void clear_tlb_flush_pending(struct mm_struct *mm) { - barrier(); + /* see set_tlb_flush_pending */ mm->tlb_flush_pending = false; } #else diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86975dec0ba1..c76a720b936b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1410,6 +1410,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) unsigned long haddr = vmf->address & HPAGE_PMD_MASK; int page_nid = -1, this_nid = numa_node_id(); int target_nid, last_cpupid = -1; + bool need_flush = false; bool page_locked; bool migrated = false; bool was_writable; @@ -1495,11 +1496,30 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) goto clear_pmdnuma; } + /* + * Since we took the NUMA fault, we must have observed the !accessible + * bit. Make sure all other CPUs agree with that, to avoid them + * modifying the page we're about to migrate. + * + * Must be done under PTL such that we'll observe the relevant + * set_tlb_flush_pending(). + */ + if (mm_tlb_flush_pending(vma->vm_mm)) + need_flush = true; + /* * Migrate the THP to the requested node, returns with page unlocked * and access rights restored. */ spin_unlock(vmf->ptl); + + /* + * We are not sure a pending tlb flush here is for a huge page + * mapping or not. Hence use the tlb range variant + */ + if (need_flush) + flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); + migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, vmf->pmd, pmd, vmf->address, page, target_nid); if (migrated) { diff --git a/mm/migrate.c b/mm/migrate.c index 627671551873..d68a41da6abb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, put_page(new_page); goto out_fail; } - /* - * We are not sure a pending tlb flush here is for a huge page - * mapping or not. Hence use the tlb range variant - */ - if (mm_tlb_flush_pending(mm)) - flush_tlb_range(vma, mmun_start, mmun_end); /* Prepare a page as a migration target */ __SetPageLocked(new_page);