summaryrefslogtreecommitdiff
path: root/mm/mprotect.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r--mm/mprotect.c86
1 files changed, 67 insertions, 19 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ba5592655ee3..041beeb5fad6 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -38,6 +38,39 @@
#include "internal.h"
+static inline bool can_change_pte_writable(struct vm_area_struct *vma,
+ unsigned long addr, pte_t pte)
+{
+ struct page *page;
+
+ VM_BUG_ON(!(vma->vm_flags & VM_WRITE) || pte_write(pte));
+
+ if (pte_protnone(pte) || !pte_dirty(pte))
+ return false;
+
+ /* Do we need write faults for softdirty tracking? */
+ if ((vma->vm_flags & VM_SOFTDIRTY) && !pte_soft_dirty(pte))
+ return false;
+
+ /* Do we need write faults for uffd-wp tracking? */
+ if (userfaultfd_pte_wp(vma, pte))
+ return false;
+
+ if (!(vma->vm_flags & VM_SHARED)) {
+ /*
+ * We can only special-case on exclusive anonymous pages,
+ * because we know that our write-fault handler similarly would
+ * map them writable without any additional checks while holding
+ * the PT lock.
+ */
+ page = vm_normal_page(vma, addr, pte);
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+ }
+
+ return true;
+}
+
static unsigned long change_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
@@ -46,7 +79,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
spinlock_t *ptl;
unsigned long pages = 0;
int target_node = NUMA_NO_NODE;
- bool dirty_accountable = cp_flags & MM_CP_DIRTY_ACCT;
bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
@@ -95,7 +127,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
continue;
page = vm_normal_page(vma, addr, oldpte);
- if (!page || PageKsm(page))
+ if (!page || is_zone_device_page(page) || PageKsm(page))
continue;
/* Also skip shared copy-on-write pages */
@@ -137,21 +169,27 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
ptent = pte_wrprotect(ptent);
ptent = pte_mkuffd_wp(ptent);
} else if (uffd_wp_resolve) {
- /*
- * Leave the write bit to be handled
- * by PF interrupt handler, then
- * things like COW could be properly
- * handled.
- */
ptent = pte_clear_uffd_wp(ptent);
}
- /* Avoid taking write faults for known dirty pages */
- if (dirty_accountable && pte_dirty(ptent) &&
- (pte_soft_dirty(ptent) ||
- !(vma->vm_flags & VM_SOFTDIRTY))) {
+ /*
+ * In some writable, shared mappings, we might want
+ * to catch actual write access -- see
+ * vma_wants_writenotify().
+ *
+ * In all writable, private mappings, we have to
+ * properly handle COW.
+ *
+ * In both cases, we can sometimes still change PTEs
+ * writable and avoid the write-fault handler, for
+ * example, if a PTE is already dirty and no other
+ * COW or special handling is required.
+ */
+ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
+ !pte_write(ptent) &&
+ can_change_pte_writable(vma, addr, ptent))
ptent = pte_mkwrite(ptent);
- }
+
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
if (pte_needs_flush(oldpte, ptent))
tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
@@ -505,9 +543,9 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long oldflags = vma->vm_flags;
long nrpages = (end - start) >> PAGE_SHIFT;
unsigned long charged = 0;
+ bool try_change_writable;
pgoff_t pgoff;
int error;
- int dirty_accountable = 0;
if (newflags == oldflags) {
*pprev = vma;
@@ -583,11 +621,20 @@ success:
* held in write mode.
*/
vma->vm_flags = newflags;
- dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
+ /*
+ * We want to check manually if we can change individual PTEs writable
+ * if we can't do that automatically for all PTEs in a mapping. For
+ * private mappings, that's always the case when we have write
+ * permissions as we properly have to handle COW.
+ */
+ if (vma->vm_flags & VM_SHARED)
+ try_change_writable = vma_wants_writenotify(vma, vma->vm_page_prot);
+ else
+ try_change_writable = !!(vma->vm_flags & VM_WRITE);
vma_set_page_prot(vma);
change_protection(tlb, vma, start, end, vma->vm_page_prot,
- dirty_accountable ? MM_CP_DIRTY_ACCT : 0);
+ try_change_writable ? MM_CP_TRY_CHANGE_WRITABLE : 0);
/*
* Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
@@ -621,6 +668,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
(prot & PROT_READ);
struct mmu_gather tlb;
+ MA_STATE(mas, &current->mm->mm_mt, start, start);
start = untagged_addr(start);
@@ -652,7 +700,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
goto out;
- vma = find_vma(current->mm, start);
+ vma = mas_find(&mas, ULONG_MAX);
error = -ENOMEM;
if (!vma)
goto out;
@@ -678,7 +726,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
if (start > vma->vm_start)
prev = vma;
else
- prev = vma->vm_prev;
+ prev = mas_prev(&mas, 0);
tlb_gather_mmu(&tlb, current->mm);
for (nstart = start ; ; ) {
@@ -741,7 +789,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
if (nstart >= end)
break;
- vma = prev->vm_next;
+ vma = find_vma(current->mm, prev->vm_end);
if (!vma || vma->vm_start != nstart) {
error = -ENOMEM;
break;