mm/hmm: make full use of walk_page_range()
hmm_range_fault() calls find_vma() and walk_page_range() in a loop. This is unnecessary duplication since walk_page_range() calls find_vma() in a loop already. Simplify hmm_range_fault() by defining a walk_test() callback function to filter unhandled vmas. This also fixes a bug where hmm_range_fault() was not checking start >= vma->vm_start before checking vma->vm_flags so hmm_range_fault() could return an error based on the wrong vma for the requested range. It also fixes a bug when the vma has no read access and the caller did not request a fault, there shouldn't be any error return code. Link: https://lore.kernel.org/r/20191104222141.5173-2-rcampbell@nvidia.com Signed-off-by: Ralph Campbell <rcampbell@nvidia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
parent
d3eeb1d77c
commit
d28c2c9a48
120
mm/hmm.c
120
mm/hmm.c
@ -65,18 +65,15 @@ err:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static int hmm_pfns_bad(unsigned long addr,
|
||||
unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
static int hmm_pfns_fill(unsigned long addr, unsigned long end,
|
||||
struct hmm_range *range, enum hmm_pfn_value_e value)
|
||||
{
|
||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
uint64_t *pfns = range->pfns;
|
||||
unsigned long i;
|
||||
|
||||
i = (addr - range->start) >> PAGE_SHIFT;
|
||||
for (; addr < end; addr += PAGE_SIZE, i++)
|
||||
pfns[i] = range->values[HMM_PFN_ERROR];
|
||||
pfns[i] = range->values[value];
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -403,7 +400,7 @@ again:
|
||||
}
|
||||
return 0;
|
||||
} else if (!pmd_present(pmd))
|
||||
return hmm_pfns_bad(start, end, walk);
|
||||
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||
|
||||
if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
|
||||
/*
|
||||
@ -431,7 +428,7 @@ again:
|
||||
* recover.
|
||||
*/
|
||||
if (pmd_bad(pmd))
|
||||
return hmm_pfns_bad(start, end, walk);
|
||||
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||
|
||||
ptep = pte_offset_map(pmdp, addr);
|
||||
i = (addr - range->start) >> PAGE_SHIFT;
|
||||
@ -589,13 +586,47 @@ unlock:
|
||||
#define hmm_vma_walk_hugetlb_entry NULL
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static void hmm_pfns_clear(struct hmm_range *range,
|
||||
uint64_t *pfns,
|
||||
unsigned long addr,
|
||||
unsigned long end)
|
||||
static int hmm_vma_walk_test(unsigned long start, unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
for (; addr < end; addr += PAGE_SIZE, pfns++)
|
||||
*pfns = range->values[HMM_PFN_NONE];
|
||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
|
||||
/*
|
||||
* Skip vma ranges that don't have struct page backing them or
|
||||
* map I/O devices directly.
|
||||
*/
|
||||
if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* If the vma does not allow read access, then assume that it does not
|
||||
* allow write access either. HMM does not support architectures
|
||||
* that allow write without read.
|
||||
*/
|
||||
if (!(vma->vm_flags & VM_READ)) {
|
||||
bool fault, write_fault;
|
||||
|
||||
/*
|
||||
* Check to see if a fault is requested for any page in the
|
||||
* range.
|
||||
*/
|
||||
hmm_range_need_fault(hmm_vma_walk, range->pfns +
|
||||
((start - range->start) >> PAGE_SHIFT),
|
||||
(end - start) >> PAGE_SHIFT,
|
||||
0, &fault, &write_fault);
|
||||
if (fault || write_fault)
|
||||
return -EFAULT;
|
||||
|
||||
hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
|
||||
hmm_vma_walk->last = end;
|
||||
|
||||
/* Skip this vma and continue processing the next vma. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops hmm_walk_ops = {
|
||||
@ -603,6 +634,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
|
||||
.pmd_entry = hmm_vma_walk_pmd,
|
||||
.pte_hole = hmm_vma_walk_hole,
|
||||
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
|
||||
.test_walk = hmm_vma_walk_test,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -635,11 +667,12 @@ static const struct mm_walk_ops hmm_walk_ops = {
|
||||
*/
|
||||
long hmm_range_fault(struct hmm_range *range, unsigned int flags)
|
||||
{
|
||||
const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
|
||||
unsigned long start = range->start, end;
|
||||
struct hmm_vma_walk hmm_vma_walk;
|
||||
struct hmm_vma_walk hmm_vma_walk = {
|
||||
.range = range,
|
||||
.last = range->start,
|
||||
.flags = flags,
|
||||
};
|
||||
struct mm_struct *mm = range->notifier->mm;
|
||||
struct vm_area_struct *vma;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&mm->mmap_sem);
|
||||
@ -649,53 +682,12 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
|
||||
if (mmu_interval_check_retry(range->notifier,
|
||||
range->notifier_seq))
|
||||
return -EBUSY;
|
||||
ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
|
||||
&hmm_walk_ops, &hmm_vma_walk);
|
||||
} while (ret == -EBUSY);
|
||||
|
||||
vma = find_vma(mm, start);
|
||||
if (vma == NULL || (vma->vm_flags & device_vma))
|
||||
return -EFAULT;
|
||||
|
||||
if (!(vma->vm_flags & VM_READ)) {
|
||||
/*
|
||||
* If vma do not allow read access, then assume that it
|
||||
* does not allow write access, either. HMM does not
|
||||
* support architecture that allow write without read.
|
||||
*/
|
||||
hmm_pfns_clear(range, range->pfns,
|
||||
range->start, range->end);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
hmm_vma_walk.pgmap = NULL;
|
||||
hmm_vma_walk.last = start;
|
||||
hmm_vma_walk.flags = flags;
|
||||
hmm_vma_walk.range = range;
|
||||
end = min(range->end, vma->vm_end);
|
||||
|
||||
walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
|
||||
&hmm_vma_walk);
|
||||
|
||||
do {
|
||||
ret = walk_page_range(vma->vm_mm, start, end,
|
||||
&hmm_walk_ops, &hmm_vma_walk);
|
||||
start = hmm_vma_walk.last;
|
||||
|
||||
/* Keep trying while the range is valid. */
|
||||
} while (ret == -EBUSY &&
|
||||
!mmu_interval_check_retry(range->notifier,
|
||||
range->notifier_seq));
|
||||
|
||||
if (ret) {
|
||||
unsigned long i;
|
||||
|
||||
i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
|
||||
hmm_pfns_clear(range, &range->pfns[i],
|
||||
hmm_vma_walk.last, range->end);
|
||||
return ret;
|
||||
}
|
||||
start = end;
|
||||
|
||||
} while (start < range->end);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
|
||||
}
|
||||
EXPORT_SYMBOL(hmm_range_fault);
|
||||
|
Loading…
Reference in New Issue
Block a user