summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2019-08-07 16:03:55 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2019-08-07 16:03:55 +1000
commit22426e9febca626336de02577692c2471fa79502 (patch)
tree65b15f68327c0255bf3ee8414f57f29edfbe6098
parentc2642150b8e6b27f084c412b75152137cca2111f (diff)
parentcc374377a19d2a49d693997b62dc3a6f5fac6d61 (diff)
downloadlinux-next-22426e9febca626336de02577692c2471fa79502.tar.gz
Merge remote-tracking branch 'hmm/hmm'
-rw-r--r--Documentation/vm/hmm.rst17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c15
-rw-r--r--include/linux/hmm.h84
-rw-r--r--mm/hmm.c217
7 files changed, 105 insertions, 246 deletions
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 710ce1c701bf..ddcb5ca8b296 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -192,15 +192,14 @@ read only, or fully unmap, etc.). The device must complete the update before
the driver callback returns.
When the device driver wants to populate a range of virtual addresses, it can
-use either::
+use::
- long hmm_range_snapshot(struct hmm_range *range);
- long hmm_range_fault(struct hmm_range *range, bool block);
+ long hmm_range_fault(struct hmm_range *range, unsigned int flags);
-The first one (hmm_range_snapshot()) will only fetch present CPU page table
+With the HMM_RANGE_SNAPSHOT flag, it will only fetch present CPU page table
entries and will not trigger a page fault on missing or non-present entries.
-The second one does trigger a page fault on missing or read-only entries if
-write access is requested (see below). Page faults use the generic mm page
+Without that flag, it does trigger a page fault on missing or read-only entries
+if write access is requested (see below). Page faults use the generic mm page
fault code path just like a CPU page fault.
Both functions copy CPU page table entries into their pfns array argument. Each
@@ -227,20 +226,20 @@ The usage pattern is::
/*
* Just wait for range to be valid, safe to ignore return value as we
- * will use the return value of hmm_range_snapshot() below under the
+ * will use the return value of hmm_range_fault() below under the
* mmap_sem to ascertain the validity of the range.
*/
hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
again:
down_read(&mm->mmap_sem);
- ret = hmm_range_snapshot(&range);
+ ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT);
if (ret) {
up_read(&mm->mmap_sem);
if (ret == -EBUSY) {
/*
* No need to check hmm_range_wait_until_valid() return value
- * on retry we will get proper error with hmm_range_snapshot()
+ * on retry we will get proper error with hmm_range_fault()
*/
hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
goto again;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 50022acc8a81..ec4f36970d22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -195,13 +195,14 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
- const struct hmm_update *update)
+static int
+amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+ const struct mmu_notifier_range *update)
{
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
unsigned long start = update->start;
unsigned long end = update->end;
- bool blockable = update->blockable;
+ bool blockable = mmu_notifier_range_blockable(update);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
@@ -243,13 +244,14 @@ static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
- const struct hmm_update *update)
+static int
+amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+ const struct mmu_notifier_range *update)
{
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
unsigned long start = update->start;
unsigned long end = update->end;
- bool blockable = update->blockable;
+ bool blockable = mmu_notifier_range_blockable(update);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8f8b7a350b8b..bb41a2ee9cb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -848,7 +848,7 @@ retry:
down_read(&mm->mmap_sem);
- r = hmm_range_fault(range, true);
+ r = hmm_range_fault(range, 0);
if (unlikely(r < 0)) {
if (likely(r == -EAGAIN)) {
/*
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 1333220787a1..345c63cb752a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -845,7 +845,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
struct page *page;
uint64_t addr;
- page = hmm_pfn_to_page(range, range->pfns[i]);
+ page = hmm_device_entry_to_page(range, range->pfns[i]);
if (page == NULL)
continue;
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index a835cebb6d90..a74530b5a523 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -252,13 +252,13 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
static int
nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
- const struct hmm_update *update)
+ const struct mmu_notifier_range *update)
{
struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
unsigned long start = update->start;
unsigned long limit = update->end;
- if (!update->blockable)
+ if (!mmu_notifier_range_blockable(update))
return -EAGAIN;
SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
@@ -496,20 +496,20 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range)
range->start, range->end,
PAGE_SHIFT);
if (ret) {
- up_read(&range->vma->vm_mm->mmap_sem);
+ up_read(&range->hmm->mm->mmap_sem);
return (int)ret;
}
if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
- up_read(&range->vma->vm_mm->mmap_sem);
- return -EAGAIN;
+ up_read(&range->hmm->mm->mmap_sem);
+ return -EBUSY;
}
- ret = hmm_range_fault(range, true);
+ ret = hmm_range_fault(range, 0);
if (ret <= 0) {
if (ret == 0)
ret = -EBUSY;
- up_read(&range->vma->vm_mm->mmap_sem);
+ up_read(&range->hmm->mm->mmap_sem);
hmm_range_unregister(range);
return ret;
}
@@ -682,7 +682,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
args.i.p.addr + args.i.p.size, fn - fi);
/* Have HMM fault pages within the fault window to the GPU. */
- range.vma = vma;
range.start = args.i.p.addr;
range.end = args.i.p.addr + args.i.p.size;
range.pfns = args.phys;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 7ef56dc18050..82265118d94a 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -164,7 +164,6 @@ enum hmm_pfn_value_e {
*/
struct hmm_range {
struct hmm *hmm;
- struct vm_area_struct *vma;
struct list_head list;
unsigned long start;
unsigned long end;
@@ -291,40 +290,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
}
/*
- * Old API:
- * hmm_pfn_to_page()
- * hmm_pfn_to_pfn()
- * hmm_pfn_from_page()
- * hmm_pfn_from_pfn()
- *
- * This are the OLD API please use new API, it is here to avoid cross-tree
- * merge painfullness ie we convert things to new API in stages.
- */
-static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
- uint64_t pfn)
-{
- return hmm_device_entry_to_page(range, pfn);
-}
-
-static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
- uint64_t pfn)
-{
- return hmm_device_entry_to_pfn(range, pfn);
-}
-
-static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
- struct page *page)
-{
- return hmm_device_entry_from_page(range, page);
-}
-
-static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
- unsigned long pfn)
-{
- return hmm_device_entry_from_pfn(range, pfn);
-}
-
-/*
* Mirroring: how to synchronize device page table with CPU page table.
*
* A device driver that is participating in HMM mirroring must always
@@ -375,29 +340,6 @@ static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
struct hmm_mirror;
/*
- * enum hmm_update_event - type of update
- * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
- */
-enum hmm_update_event {
- HMM_UPDATE_INVALIDATE,
-};
-
-/*
- * struct hmm_update - HMM update information for callback
- *
- * @start: virtual start address of the range to update
- * @end: virtual end address of the range to update
- * @event: event triggering the update (what is happening)
- * @blockable: can the callback block/sleep ?
- */
-struct hmm_update {
- unsigned long start;
- unsigned long end;
- enum hmm_update_event event;
- bool blockable;
-};
-
-/*
* struct hmm_mirror_ops - HMM mirror device operations callback
*
* @update: callback to update range on a device
@@ -417,9 +359,9 @@ struct hmm_mirror_ops {
/* sync_cpu_device_pagetables() - synchronize page tables
*
* @mirror: pointer to struct hmm_mirror
- * @update: update information (see struct hmm_update)
- * Return: -EAGAIN if update.blockable false and callback need to
- * block, 0 otherwise.
+ * @update: update information (see struct mmu_notifier_range)
+ * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false
+ * and callback needs to block, 0 otherwise.
*
* This callback ultimately originates from mmu_notifiers when the CPU
* page table is updated. The device driver must update its page table
@@ -430,8 +372,9 @@ struct hmm_mirror_ops {
* page tables are completely updated (TLBs flushed, etc); this is a
* synchronous call.
*/
- int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
- const struct hmm_update *update);
+ int (*sync_cpu_device_pagetables)(
+ struct hmm_mirror *mirror,
+ const struct mmu_notifier_range *update);
};
/*
@@ -463,12 +406,21 @@ int hmm_range_register(struct hmm_range *range,
unsigned long end,
unsigned page_shift);
void hmm_range_unregister(struct hmm_range *range);
-long hmm_range_snapshot(struct hmm_range *range);
-long hmm_range_fault(struct hmm_range *range, bool block);
+
+/*
+ * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case.
+ */
+#define HMM_FAULT_ALLOW_RETRY (1 << 0)
+
+/* Don't fault in missing PTEs, just snapshot the current state. */
+#define HMM_FAULT_SNAPSHOT (1 << 1)
+
+long hmm_range_fault(struct hmm_range *range, unsigned int flags);
+
long hmm_range_dma_map(struct hmm_range *range,
struct device *device,
dma_addr_t *daddrs,
- bool block);
+ unsigned int flags);
long hmm_range_dma_unmap(struct hmm_range *range,
struct vm_area_struct *vma,
struct device *device,
diff --git a/mm/hmm.c b/mm/hmm.c
index 16b6731a34db..9a908902e4cc 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -32,7 +32,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
* hmm_get_or_create - register HMM against an mm (HMM internal)
*
* @mm: mm struct to attach to
- * Returns: returns an HMM object, either by referencing the existing
+ * Return: an HMM object, either by referencing the existing
* (per-process) object, or by creating a new one.
*
* This is not intended to be used directly by device drivers. If mm already
@@ -165,7 +165,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
{
struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
struct hmm_mirror *mirror;
- struct hmm_update update;
struct hmm_range *range;
unsigned long flags;
int ret = 0;
@@ -173,15 +172,10 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
if (!kref_get_unless_zero(&hmm->kref))
return 0;
- update.start = nrange->start;
- update.end = nrange->end;
- update.event = HMM_UPDATE_INVALIDATE;
- update.blockable = mmu_notifier_range_blockable(nrange);
-
spin_lock_irqsave(&hmm->ranges_lock, flags);
hmm->notifiers++;
list_for_each_entry(range, &hmm->ranges, list) {
- if (update.end < range->start || update.start >= range->end)
+ if (nrange->end < range->start || nrange->start >= range->end)
continue;
range->valid = false;
@@ -198,9 +192,10 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
list_for_each_entry(mirror, &hmm->mirrors, list) {
int rc;
- rc = mirror->ops->sync_cpu_device_pagetables(mirror, &update);
+ rc = mirror->ops->sync_cpu_device_pagetables(mirror, nrange);
if (rc) {
- if (WARN_ON(update.blockable || rc != -EAGAIN))
+ if (WARN_ON(mmu_notifier_range_blockable(nrange) ||
+ rc != -EAGAIN))
continue;
ret = -EAGAIN;
break;
@@ -285,8 +280,7 @@ struct hmm_vma_walk {
struct hmm_range *range;
struct dev_pagemap *pgmap;
unsigned long last;
- bool fault;
- bool block;
+ unsigned int flags;
};
static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
@@ -298,11 +292,16 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
vm_fault_t ret;
- flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
- flags |= write_fault ? FAULT_FLAG_WRITE : 0;
+ if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY)
+ flags |= FAULT_FLAG_ALLOW_RETRY;
+ if (write_fault)
+ flags |= FAULT_FLAG_WRITE;
+
ret = handle_mm_fault(vma, addr, flags);
- if (ret & VM_FAULT_RETRY)
+ if (ret & VM_FAULT_RETRY) {
+ /* Note, handle_mm_fault did up_read(&mm->mmap_sem)) */
return -EAGAIN;
+ }
if (ret & VM_FAULT_ERROR) {
*pfn = range->values[HMM_PFN_ERROR];
return -EFAULT;
@@ -328,8 +327,8 @@ static int hmm_pfns_bad(unsigned long addr,
}
/*
- * hmm_vma_walk_hole() - handle a range lacking valid pmd or pte(s)
- * @start: range virtual start address (inclusive)
+ * hmm_vma_walk_hole_() - handle a range lacking valid pmd or pte(s)
+ * @addr: range virtual start address (inclusive)
* @end: range virtual end address (exclusive)
* @fault: should we fault or not ?
* @write_fault: write fault ?
@@ -373,15 +372,15 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
{
struct hmm_range *range = hmm_vma_walk->range;
- if (!hmm_vma_walk->fault)
+ if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT)
return;
/*
* So we not only consider the individual per page request we also
* consider the default flags requested for the range. The API can
- * be use in 2 fashions. The first one where the HMM user coalesce
- * multiple page fault into one request and set flags per pfns for
- * of those faults. The second one where the HMM user want to pre-
+ * be used 2 ways. The first one where the HMM user coalesces
+ * multiple page faults into one request and sets flags per pfn for
+ * those faults. The second one where the HMM user wants to pre-
* fault a range with specific flags. For the latter one it is a
* waste to have the user pre-fill the pfn arrays with a default
* flags value.
@@ -391,7 +390,7 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
/* We aren't ask to do anything ... */
if (!(pfns & range->flags[HMM_PFN_VALID]))
return;
- /* If this is device memory than only fault if explicitly requested */
+ /* If this is device memory then only fault if explicitly requested */
if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
/* Do we fault on device memory ? */
if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
@@ -418,7 +417,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
{
unsigned long i;
- if (!hmm_vma_walk->fault) {
+ if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) {
*fault = *write_fault = false;
return;
}
@@ -505,7 +504,7 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
hmm_vma_walk->last = end;
return 0;
#else
- /* If THP is not enabled then we should never reach that code ! */
+ /* If THP is not enabled then we should never reach this code ! */
return -EINVAL;
#endif
}
@@ -525,7 +524,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
{
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
- struct vm_area_struct *vma = walk->vma;
bool fault, write_fault;
uint64_t cpu_flags;
pte_t pte = *ptep;
@@ -574,8 +572,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
if (fault || write_fault) {
pte_unmap(ptep);
hmm_vma_walk->last = addr;
- migration_entry_wait(vma->vm_mm,
- pmdp, addr);
+ migration_entry_wait(walk->mm, pmdp, addr);
return -EBUSY;
}
return 0;
@@ -623,21 +620,16 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
{
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
- struct vm_area_struct *vma = walk->vma;
uint64_t *pfns = range->pfns;
unsigned long addr = start, i;
pte_t *ptep;
pmd_t pmd;
-
again:
pmd = READ_ONCE(*pmdp);
if (pmd_none(pmd))
return hmm_vma_walk_hole(start, end, walk);
- if (pmd_huge(pmd) && (range->vma->vm_flags & VM_HUGETLB))
- return hmm_pfns_bad(start, end, walk);
-
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
bool fault, write_fault;
unsigned long npages;
@@ -651,7 +643,7 @@ again:
0, &fault, &write_fault);
if (fault || write_fault) {
hmm_vma_walk->last = addr;
- pmd_migration_entry_wait(vma->vm_mm, pmdp);
+ pmd_migration_entry_wait(walk->mm, pmdp);
return -EBUSY;
}
return 0;
@@ -660,11 +652,11 @@ again:
if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
/*
- * No need to take pmd_lock here, even if some other threads
+ * No need to take pmd_lock here, even if some other thread
* is splitting the huge pmd we will get that event through
* mmu_notifier callback.
*
- * So just read pmd value and check again its a transparent
+ * So just read pmd value and check again it's a transparent
* huge or device mapping one and compute corresponding pfn
* values.
*/
@@ -678,7 +670,7 @@ again:
}
/*
- * We have handled all the valid case above ie either none, migration,
+ * We have handled all the valid cases above ie either none, migration,
* huge or transparent huge. At this point either it is a valid pmd
* entry pointing to pte directory or it is a bad pmd that will not
* recover.
@@ -798,10 +790,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
pte_t entry;
int ret = 0;
- size = 1UL << huge_page_shift(h);
+ size = huge_page_size(h);
mask = size - 1;
if (range->page_shift != PAGE_SHIFT) {
- /* Make sure we are looking at full page. */
+ /* Make sure we are looking at a full page. */
if (start & mask)
return -EINVAL;
if (end < (start + size))
@@ -812,8 +804,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
size = PAGE_SIZE;
}
-
- ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+ ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
entry = huge_ptep_get(pte);
i = (start - range->start) >> range->page_shift;
@@ -862,7 +853,7 @@ static void hmm_pfns_clear(struct hmm_range *range,
* @start: start virtual address (inclusive)
* @end: end virtual address (exclusive)
* @page_shift: expect page shift for the range
- * Returns 0 on success, -EFAULT if the address space is no longer valid
+ * Return: 0 on success, -EFAULT if the address space is no longer valid
*
* Track updates to the CPU page table see include/linux/hmm.h
*/
@@ -941,105 +932,26 @@ void hmm_range_unregister(struct hmm_range *range)
}
EXPORT_SYMBOL(hmm_range_unregister);
-/*
- * hmm_range_snapshot() - snapshot CPU page table for a range
- * @range: range
- * Return: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
- * permission (for instance asking for write and range is read only),
- * -EBUSY if you need to retry, -EFAULT invalid (ie either no valid
- * vma or it is illegal to access that range), number of valid pages
- * in range->pfns[] (from range start address).
+/**
+ * hmm_range_fault - try to fault some address in a virtual address range
+ * @range: range being faulted
+ * @flags: HMM_FAULT_* flags
*
- * This snapshots the CPU page table for a range of virtual addresses. Snapshot
- * validity is tracked by range struct. See in include/linux/hmm.h for example
- * on how to use.
- */
-long hmm_range_snapshot(struct hmm_range *range)
-{
- const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
- unsigned long start = range->start, end;
- struct hmm_vma_walk hmm_vma_walk;
- struct hmm *hmm = range->hmm;
- struct vm_area_struct *vma;
- struct mm_walk mm_walk;
-
- lockdep_assert_held(&hmm->mm->mmap_sem);
- do {
- /* If range is no longer valid force retry. */
- if (!range->valid)
- return -EBUSY;
-
- vma = find_vma(hmm->mm, start);
- if (vma == NULL || (vma->vm_flags & device_vma))
- return -EFAULT;
-
- if (is_vm_hugetlb_page(vma)) {
- if (huge_page_shift(hstate_vma(vma)) !=
- range->page_shift &&
- range->page_shift != PAGE_SHIFT)
- return -EINVAL;
- } else {
- if (range->page_shift != PAGE_SHIFT)
- return -EINVAL;
- }
-
- if (!(vma->vm_flags & VM_READ)) {
- /*
- * If vma do not allow read access, then assume that it
- * does not allow write access, either. HMM does not
- * support architecture that allow write without read.
- */
- hmm_pfns_clear(range, range->pfns,
- range->start, range->end);
- return -EPERM;
- }
-
- range->vma = vma;
- hmm_vma_walk.pgmap = NULL;
- hmm_vma_walk.last = start;
- hmm_vma_walk.fault = false;
- hmm_vma_walk.range = range;
- mm_walk.private = &hmm_vma_walk;
- end = min(range->end, vma->vm_end);
-
- mm_walk.vma = vma;
- mm_walk.mm = vma->vm_mm;
- mm_walk.pte_entry = NULL;
- mm_walk.test_walk = NULL;
- mm_walk.hugetlb_entry = NULL;
- mm_walk.pud_entry = hmm_vma_walk_pud;
- mm_walk.pmd_entry = hmm_vma_walk_pmd;
- mm_walk.pte_hole = hmm_vma_walk_hole;
- mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
-
- walk_page_range(start, end, &mm_walk);
- start = end;
- } while (start < range->end);
-
- return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(hmm_range_snapshot);
-
-/*
- * hmm_range_fault() - try to fault some address in a virtual address range
- * @range: range being faulted
- * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
- * Return: number of valid pages in range->pfns[] (from range start
- * address). This may be zero. If the return value is negative,
- * then one of the following values may be returned:
+ * Return: the number of valid pages in range->pfns[] (from range start
+ * address), which may be zero. On error one of the following status codes
+ * can be returned:
*
- * -EINVAL invalid arguments or mm or virtual address are in an
- * invalid vma (for instance device file vma).
- * -ENOMEM: Out of memory.
- * -EPERM: Invalid permission (for instance asking for write and
- * range is read only).
- * -EAGAIN: If you need to retry and mmap_sem was drop. This can only
- * happens if block argument is false.
- * -EBUSY: If the the range is being invalidated and you should wait
- * for invalidation to finish.
- * -EFAULT: Invalid (ie either no valid vma or it is illegal to access
- * that range), number of valid pages in range->pfns[] (from
- * range start address).
+ * -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma
+ * (e.g., device file vma).
+ * -ENOMEM: Out of memory.
+ * -EPERM: Invalid permission (e.g., asking for write and range is read
+ * only).
+ * -EAGAIN: A page fault needs to be retried and mmap_sem was dropped.
+ * -EBUSY: The range has been invalidated and the caller needs to wait for
+ * the invalidation to finish.
+ * -EFAULT: Invalid (i.e., either no valid vma or it is illegal to access
+ * that range) number of valid pages in range->pfns[] (from
+ * range start address).
*
* This is similar to a regular CPU page fault except that it will not trigger
* any memory migration if the memory being faulted is not accessible by CPUs
@@ -1048,7 +960,7 @@ EXPORT_SYMBOL(hmm_range_snapshot);
* On error, for one virtual address in the range, the function will mark the
* corresponding HMM pfn entry with an error flag.
*/
-long hmm_range_fault(struct hmm_range *range, bool block)
+long hmm_range_fault(struct hmm_range *range, unsigned int flags)
{
const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
unsigned long start = range->start, end;
@@ -1090,11 +1002,9 @@ long hmm_range_fault(struct hmm_range *range, bool block)
return -EPERM;
}
- range->vma = vma;
hmm_vma_walk.pgmap = NULL;
hmm_vma_walk.last = start;
- hmm_vma_walk.fault = true;
- hmm_vma_walk.block = block;
+ hmm_vma_walk.flags = flags;
hmm_vma_walk.range = range;
mm_walk.private = &hmm_vma_walk;
end = min(range->end, vma->vm_end);
@@ -1133,25 +1043,22 @@ long hmm_range_fault(struct hmm_range *range, bool block)
EXPORT_SYMBOL(hmm_range_fault);
/**
- * hmm_range_dma_map() - hmm_range_fault() and dma map page all in one.
- * @range: range being faulted
- * @device: device against to dma map page to
- * @daddrs: dma address of mapped pages
- * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
- * Return: number of pages mapped on success, -EAGAIN if mmap_sem have been
- * drop and you need to try again, some other error value otherwise
+ * hmm_range_dma_map - hmm_range_fault() and dma map page all in one.
+ * @range: range being faulted
+ * @device: device to map page to
+ * @daddrs: array of dma addresses for the mapped pages
+ * @flags: HMM_FAULT_*
*
- * Note same usage pattern as hmm_range_fault().
+ * Return: the number of pages mapped on success (including zero), or any
+ * status return from hmm_range_fault() otherwise.
*/
-long hmm_range_dma_map(struct hmm_range *range,
- struct device *device,
- dma_addr_t *daddrs,
- bool block)
+long hmm_range_dma_map(struct hmm_range *range, struct device *device,
+ dma_addr_t *daddrs, unsigned int flags)
{
unsigned long i, npages, mapped;
long ret;
- ret = hmm_range_fault(range, block);
+ ret = hmm_range_fault(range, flags);
if (ret <= 0)
return ret ? ret : -EBUSY;