diff --git a/mm/mlock.c b/mm/mlock.c index 8746fe3f9730..c83896a72504 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -318,6 +318,7 @@ static void __munlock_vma_pages_range(struct vm_area_struct *vma, int mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + struct mm_struct *mm = vma->vm_mm; int nr_pages = (end - start) / PAGE_SIZE; BUG_ON(!(vma->vm_flags & VM_LOCKED)); @@ -329,8 +330,19 @@ int mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current))) - return __mlock_vma_pages_range(vma, start, end); + vma == get_gate_vma(current))) { + downgrade_write(&mm->mmap_sem); + nr_pages = __mlock_vma_pages_range(vma, start, end); + + up_read(&mm->mmap_sem); + /* vma can change or disappear */ + down_write(&mm->mmap_sem); + vma = find_vma(mm, start); + /* non-NULL vma must contain @start, but need to check @end */ + if (!vma || end > vma->vm_end) + return -EAGAIN; + return nr_pages; + } /* * User mapped kernel pages or huge pages: @@ -424,13 +436,41 @@ success: vma->vm_flags = newflags; if (lock) { + /* + * mmap_sem is currently held for write. Downgrade the write + * lock to a read lock so that other faults, mmap scans, ... + * while we fault in all pages. + */ + downgrade_write(&mm->mmap_sem); + ret = __mlock_vma_pages_range(vma, start, end); if (ret > 0) { mm->locked_vm -= ret; ret = 0; } - } else + /* + * Need to reacquire mmap sem in write mode, as our callers + * expect this. We have no support for atomically upgrading + * a sem to write, so we need to check for ranges while sem + * is unlocked. + */ + up_read(&mm->mmap_sem); + /* vma can change or disappear */ + down_write(&mm->mmap_sem); + *prev = find_vma(mm, start); + /* non-NULL *prev must contain @start, but need to check @end */ + if (!(*prev) || end > (*prev)->vm_end) + ret = -EAGAIN; + } else { + /* + * TODO: for unlocking, pages will already be resident, so + * we don't need to wait for allocations/reclaim/pagein, ... + * However, unlocking a very large region can still take a + * while. Should we downgrade the semaphore for both lock + * AND unlock ? + */ __munlock_vma_pages_range(vma, start, end); + } out: *prev = vma;