khugepaged: collapse_pte_mapped_thp() protect the pmd lock
commit119a5fc161
upstream. When retract_page_tables() removes a page table to make way for a huge pmd, it holds huge page lock, i_mmap_lock_write, mmap_write_trylock and pmd lock; but when collapse_pte_mapped_thp() does the same (to handle the case when the original mmap_write_trylock had failed), only mmap_write_trylock and pmd lock are held. That's not enough. One machine has twice crashed under load, with "BUG: spinlock bad magic" and GPF on 6b6b6b6b6b6b6b6b. Examining the second crash, page_vma_mapped_walk_done()'s spin_unlock of pvmw->ptl (serving page_referenced() on a file THP, that had found a page table at *pmd) discovers that the page table page and its lock have already been freed by the time it comes to unlock. Follow the example of retract_page_tables(), but we only need one of huge page lock or i_mmap_lock_write to secure against this: because it's the narrower lock, and because it simplifies collapse_pte_mapped_thp() to know the hpage earlier, choose to rely on huge page lock here. Fixes:27e1f82731
("khugepaged: enable collapse pmd for pte-mapped THP") Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Song Liu <songliubraving@fb.com> Cc: <stable@vger.kernel.org> [5.4+] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021213070.27773@eggly.anvils Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
687d366d0d
commit
a6b238cac6
|
@ -1294,7 +1294,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||||
{
|
{
|
||||||
unsigned long haddr = addr & HPAGE_PMD_MASK;
|
unsigned long haddr = addr & HPAGE_PMD_MASK;
|
||||||
struct vm_area_struct *vma = find_vma(mm, haddr);
|
struct vm_area_struct *vma = find_vma(mm, haddr);
|
||||||
struct page *hpage = NULL;
|
struct page *hpage;
|
||||||
pte_t *start_pte, *pte;
|
pte_t *start_pte, *pte;
|
||||||
pmd_t *pmd, _pmd;
|
pmd_t *pmd, _pmd;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
|
@ -1314,9 +1314,17 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||||
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
|
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
hpage = find_lock_page(vma->vm_file->f_mapping,
|
||||||
|
linear_page_index(vma, haddr));
|
||||||
|
if (!hpage)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!PageHead(hpage))
|
||||||
|
goto drop_hpage;
|
||||||
|
|
||||||
pmd = mm_find_pmd(mm, haddr);
|
pmd = mm_find_pmd(mm, haddr);
|
||||||
if (!pmd)
|
if (!pmd)
|
||||||
return;
|
goto drop_hpage;
|
||||||
|
|
||||||
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
|
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
|
||||||
|
|
||||||
|
@ -1335,30 +1343,11 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||||
|
|
||||||
page = vm_normal_page(vma, addr, *pte);
|
page = vm_normal_page(vma, addr, *pte);
|
||||||
|
|
||||||
if (!page || !PageCompound(page))
|
|
||||||
goto abort;
|
|
||||||
|
|
||||||
if (!hpage) {
|
|
||||||
hpage = compound_head(page);
|
|
||||||
/*
|
|
||||||
* The mapping of the THP should not change.
|
|
||||||
*
|
|
||||||
* Note that uprobe, debugger, or MAP_PRIVATE may
|
|
||||||
* change the page table, but the new page will
|
|
||||||
* not pass PageCompound() check.
|
|
||||||
*/
|
|
||||||
if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
|
|
||||||
goto abort;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Confirm the page maps to the correct subpage.
|
* Note that uprobe, debugger, or MAP_PRIVATE may change the
|
||||||
*
|
* page table, but the new page will not be a subpage of hpage.
|
||||||
* Note that uprobe, debugger, or MAP_PRIVATE may change
|
|
||||||
* the page table, but the new page will not pass
|
|
||||||
* PageCompound() check.
|
|
||||||
*/
|
*/
|
||||||
if (WARN_ON(hpage + i != page))
|
if (hpage + i != page)
|
||||||
goto abort;
|
goto abort;
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
@ -1377,7 +1366,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||||
pte_unmap_unlock(start_pte, ptl);
|
pte_unmap_unlock(start_pte, ptl);
|
||||||
|
|
||||||
/* step 3: set proper refcount and mm_counters. */
|
/* step 3: set proper refcount and mm_counters. */
|
||||||
if (hpage) {
|
if (count) {
|
||||||
page_ref_sub(hpage, count);
|
page_ref_sub(hpage, count);
|
||||||
add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
|
add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
|
||||||
}
|
}
|
||||||
|
@ -1388,10 +1377,15 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
mm_dec_nr_ptes(mm);
|
mm_dec_nr_ptes(mm);
|
||||||
pte_free(mm, pmd_pgtable(_pmd));
|
pte_free(mm, pmd_pgtable(_pmd));
|
||||||
|
|
||||||
|
drop_hpage:
|
||||||
|
unlock_page(hpage);
|
||||||
|
put_page(hpage);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
abort:
|
abort:
|
||||||
pte_unmap_unlock(start_pte, ptl);
|
pte_unmap_unlock(start_pte, ptl);
|
||||||
|
goto drop_hpage;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
|
static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
|
||||||
|
|
Loading…
Reference in New Issue