summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2020-07-20 18:29:44 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2020-07-20 18:29:44 +1000
commit61c2dbfd4f3fb10e574f8779763900afb6d60ac6 (patch)
treeba3c722c12bb3726f6fc9587c4dea8c55642633c
parent9dfa5c1dcba0b1e2f06cbc3061d3e1ae5feaea15 (diff)
parentd3a0b365954e5e4728b6a92b171e4659199c6db5 (diff)
downloadlinux-next-61c2dbfd4f3fb10e574f8779763900afb6d60ac6.tar.gz
Merge branch 'akpm/master'
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/alpha/mm/fault.c8
-rw-r--r--arch/arc/kernel/process.c2
-rw-r--r--arch/arc/mm/fault.c18
-rw-r--r--arch/arm/configs/am200epdkit_defconfig1
-rw-r--r--arch/arm/mm/fault.c25
-rw-r--r--arch/arm/tools/syscall.tbl3
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h6
-rw-r--r--arch/arm64/kernel/mte.c2
-rw-r--r--arch/arm64/mm/fault.c29
-rw-r--r--arch/csky/mm/fault.c13
-rw-r--r--arch/hexagon/mm/vm_fault.c9
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/ia64/mm/fault.c9
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/m68k/mm/fault.c14
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/microblaze/mm/fault.c9
-rw-r--r--arch/mips/configs/cu1000-neo_defconfig1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl3
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl3
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl3
-rw-r--r--arch/mips/mm/fault.c14
-rw-r--r--arch/nds32/mm/fault.c19
-rw-r--r--arch/nios2/mm/fault.c14
-rw-r--r--arch/openrisc/mm/fault.c9
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/parisc/mm/fault.c8
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/powerpc/mm/copro_fault.c7
-rw-r--r--arch/powerpc/mm/fault.c11
-rw-r--r--arch/riscv/mm/fault.c16
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/mm/fault.c16
-rw-r--r--arch/s390/mm/gmap.c4
-rw-r--r--arch/sh/configs/dreamcast_defconfig1
-rw-r--r--arch/sh/configs/espt_defconfig1
-rw-r--r--arch/sh/configs/hp6xx_defconfig1
-rw-r--r--arch/sh/configs/landisk_defconfig1
-rw-r--r--arch/sh/configs/lboxre2_defconfig1
-rw-r--r--arch/sh/configs/microdev_defconfig1
-rw-r--r--arch/sh/configs/migor_defconfig1
-rw-r--r--arch/sh/configs/r7780mp_defconfig1
-rw-r--r--arch/sh/configs/r7785rp_defconfig1
-rw-r--r--arch/sh/configs/rts7751r2d1_defconfig1
-rw-r--r--arch/sh/configs/rts7751r2dplus_defconfig1
-rw-r--r--arch/sh/configs/se7206_defconfig1
-rw-r--r--arch/sh/configs/se7343_defconfig1
-rw-r--r--arch/sh/configs/se7619_defconfig1
-rw-r--r--arch/sh/configs/se7705_defconfig1
-rw-r--r--arch/sh/configs/se7750_defconfig1
-rw-r--r--arch/sh/configs/se7751_defconfig1
-rw-r--r--arch/sh/configs/secureedge5410_defconfig1
-rw-r--r--arch/sh/configs/sh03_defconfig1
-rw-r--r--arch/sh/configs/sh7710voipgw_defconfig1
-rw-r--r--arch/sh/configs/sh7757lcr_defconfig1
-rw-r--r--arch/sh/configs/sh7763rdp_defconfig1
-rw-r--r--arch/sh/configs/shmin_defconfig1
-rw-r--r--arch/sh/configs/titan_defconfig1
-rw-r--r--arch/sh/include/asm/string_32.h26
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/sh/lib/delay.c1
-rw-r--r--arch/sh/mm/fault.c11
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/sparc/mm/fault_32.c13
-rw-r--r--arch/sparc/mm/fault_64.c11
-rw-r--r--arch/um/kernel/trap.c6
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl3
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl4
-rw-r--r--arch/x86/mm/fault.c17
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/xtensa/mm/fault.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c2
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/iommu/amd/iommu_v2.c2
-rw-r--r--drivers/iommu/intel/svm.c3
-rw-r--r--drivers/sh/clk/cpg.c23
-rw-r--r--drivers/vfio/vfio_iommu_type1.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/io_uring.c2
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/linux/compat.h5
-rw-r--r--include/linux/hugetlb.h41
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/migrate.h34
-rw-r--r--include/linux/mm.h20
-rw-r--r--include/linux/pid.h1
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/linux/sysctl.h6
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/events/uprobes.c6
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/pid.c17
-rw-r--r--kernel/sys_ni.c3
-rw-r--r--kernel/sysctl_binary.c171
-rw-r--r--mm/filemap.c20
-rw-r--r--mm/frontswap.c8
-rw-r--r--mm/gup.c118
-rw-r--r--mm/hmm.c3
-rw-r--r--mm/hugetlb.c35
-rw-r--r--mm/internal.h8
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/ksm.c3
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/madvise.c190
-rw-r--r--mm/memory-failure.c15
-rw-r--r--mm/memory.c73
-rw-r--r--mm/memory_hotplug.c42
-rw-r--r--mm/mempolicy.c29
-rw-r--r--mm/mempool.c2
-rw-r--r--mm/migrate.c51
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_counter.c13
-rw-r--r--mm/page_io.c8
-rw-r--r--mm/page_isolation.c5
-rw-r--r--mm/process_vm_access.c2
-rw-r--r--mm/rmap.c2
-rw-r--r--mm/swap.c5
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/swapfile.c31
-rw-r--r--scripts/deprecated_terms.txt7
-rw-r--r--security/tomoyo/domain.c2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--virt/kvm/async_pf.c2
-rw-r--r--virt/kvm/kvm_main.c2
133 files changed, 669 insertions, 856 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 86ee19c1dc15..2e156975f573 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -249,7 +249,7 @@
316 common mlockall sys_mlockall
317 common munlockall sys_munlockall
318 common sysinfo sys_sysinfo
-319 common _sysctl sys_sysctl
+319 common _sysctl sys_ni_syscall
# 320 was sys_idle
321 common oldumount sys_oldumount
322 common swapon sys_swapon
@@ -482,3 +482,4 @@
550 common watch_mount sys_watch_mount
551 common watch_sb sys_watch_sb
552 common fsinfo sys_fsinfo
+553 common process_madvise sys_process_madvise
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index c2303a8c2b9f..09172f017efc 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -25,6 +25,7 @@
#include <linux/interrupt.h>
#include <linux/extable.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
@@ -116,6 +117,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
#endif
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -148,7 +150,7 @@ retry:
/* If for any reason at all we couldn't handle the fault,
make sure we exit gracefully rather than endlessly redo
the fault. */
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -164,10 +166,6 @@ retry:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 105420c23c8b..a1d2eea66bba 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -91,7 +91,7 @@ fault:
goto fail;
mmap_read_lock(current->mm);
- ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr,
+ ret = fixup_user_fault(current->mm, (unsigned long) uaddr,
FAULT_FLAG_WRITE, NULL);
mmap_read_unlock(current->mm);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 7287c793d1c9..f5657cb68e4f 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -105,6 +105,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
if (write)
flags |= FAULT_FLAG_WRITE;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
@@ -130,7 +131,7 @@ retry:
goto bad_area;
}
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -155,22 +156,9 @@ bad_area:
* Major/minor page fault accounting
* (in case of retry we only land here once)
*/
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
- if (likely(!(fault & VM_FAULT_ERROR))) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
-
+ if (likely(!(fault & VM_FAULT_ERROR)))
/* Normal return path: fault Handled Gracefully */
return;
- }
if (!user_mode(regs))
goto no_context;
diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig
index f56ac394caf1..4e49d6cb2f62 100644
--- a/arch/arm/configs/am200epdkit_defconfig
+++ b/arch/arm/configs/am200epdkit_defconfig
@@ -3,7 +3,6 @@ CONFIG_LOCALVERSION="gum"
CONFIG_SYSVIPC=y
CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
# CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c6550eddfce1..efa402025031 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -202,7 +202,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
static vm_fault_t __kprobes
__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
- unsigned int flags, struct task_struct *tsk)
+ unsigned int flags, struct task_struct *tsk,
+ struct pt_regs *regs)
{
struct vm_area_struct *vma;
vm_fault_t fault;
@@ -224,7 +225,7 @@ good_area:
goto out;
}
- return handle_mm_fault(vma, addr & PAGE_MASK, flags);
+ return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
check_stack:
/* Don't allow expansion below FIRST_USER_ADDRESS */
@@ -266,6 +267,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
flags |= FAULT_FLAG_WRITE;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -290,7 +293,7 @@ retry:
#endif
}
- fault = __do_page_fault(mm, addr, fsr, flags, tsk);
+ fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because
@@ -302,23 +305,7 @@ retry:
return 0;
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, addr);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
goto retry;
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 91b97d51acef..b166a5383a60 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -162,7 +162,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -456,3 +456,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 949788f5ba40..d1f7d35f986e 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 443
+#define __NR_compat_syscalls 444
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 9822cc41915f..7d2ba5bd7e04 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -308,8 +308,8 @@ __SYSCALL(__NR_writev, compat_sys_writev)
__SYSCALL(__NR_getsid, sys_getsid)
#define __NR_fdatasync 148
__SYSCALL(__NR_fdatasync, sys_fdatasync)
-#define __NR__sysctl 149
-__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+ /* 149 was sys_sysctl */
+__SYSCALL(149, sys_ni_syscall)
#define __NR_mlock 150
__SYSCALL(__NR_mlock, sys_mlock)
#define __NR_munlock 151
@@ -893,6 +893,8 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount)
__SYSCALL(__NR_watch_sb, sys_watch_sb)
#define __NR_fsinfo 442
__SYSCALL(__NR_fsinfo, sys_fsinfo)
+#define __NR_process_madvise 443
+__SYSCALL(__NR_process_madvise, compat_sys_process_madvise)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index eb39504e390a..bd2aad4616ba 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -227,7 +227,7 @@ static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
void *maddr;
struct page *page = NULL;
- ret = get_user_pages_remote(tsk, mm, addr, 1, gup_flags,
+ ret = get_user_pages_remote(mm, addr, 1, gup_flags,
&page, &vma, NULL);
if (ret <= 0)
break;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 5e832b3387f1..a3bd189602df 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -404,7 +404,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADACCESS 0x020000
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int mm_flags, unsigned long vm_flags)
+ unsigned int mm_flags, unsigned long vm_flags,
+ struct pt_regs *regs)
{
struct vm_area_struct *vma = find_vma(mm, addr);
@@ -428,7 +429,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
*/
if (!(vma->vm_flags & vm_flags))
return VM_FAULT_BADACCESS;
- return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
+ return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs);
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -450,7 +451,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
{
const struct fault_info *inf;
struct mm_struct *mm = current->mm;
- vm_fault_t fault, major = 0;
+ vm_fault_t fault;
unsigned long vm_flags = VM_ACCESS_FLAGS;
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
@@ -516,8 +517,7 @@ retry:
#endif
}
- fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
- major |= fault & VM_FAULT_MAJOR;
+ fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -538,25 +538,8 @@ retry:
* Handle the "normal" (no error) case first.
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS)))) {
- /*
- * Major/minor page fault accounting is only done
- * once. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at
- * that point.
- */
- if (major) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
- addr);
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
- addr);
- }
-
+ VM_FAULT_BADACCESS))))
return 0;
- }
/*
* If we are in kernel mode at this point, we have no context to
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index 0b9cbf2cf6a9..c3f580714ee4 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -150,7 +150,8 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0);
+ fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0,
+ regs);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
@@ -160,16 +161,6 @@ good_area:
goto bad_area;
BUG();
}
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
- address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
- address);
- }
-
mmap_read_unlock(mm);
return;
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index cd3808f96b93..ef32c5a84ff3 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -18,6 +18,7 @@
#include <linux/signal.h>
#include <linux/extable.h>
#include <linux/hardirq.h>
+#include <linux/perf_event.h>
/*
* Decode of hardware exception sends us to one of several
@@ -53,6 +54,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -88,7 +91,7 @@ good_area:
break;
}
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -96,10 +99,6 @@ good_area:
/* The most common case -- we are done. */
if (likely(!(fault & VM_FAULT_ERROR))) {
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
goto retry;
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index e6c9396811bb..71337e11f01c 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -135,7 +135,7 @@
123 common writev sys_writev
124 common pread64 sys_pread64
125 common pwrite64 sys_pwrite64
-126 common _sysctl sys_sysctl
+126 common _sysctl sys_ni_syscall
127 common mmap sys_mmap
128 common munmap sys_munmap
129 common mlock sys_mlock
@@ -363,3 +363,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 3a4dec334cc5..cd9766d2b6e0 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -14,6 +14,7 @@
#include <linux/kdebug.h>
#include <linux/prefetch.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/processor.h>
#include <asm/exception.h>
@@ -105,6 +106,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
flags |= FAULT_FLAG_USER;
if (mask & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
@@ -143,7 +146,7 @@ retry:
* sure we exit gracefully rather than endlessly redo the
* fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -166,10 +169,6 @@ retry:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index cc042ace0494..460fb0f9bb4b 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -442,3 +442,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 508abb63da67..795f483b1050 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -12,6 +12,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/setup.h>
#include <asm/traps.h>
@@ -84,6 +85,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
@@ -134,7 +137,7 @@ good_area:
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
pr_debug("handle_mm_fault returns %x\n", fault);
if (fault_signal_pending(fault, regs))
@@ -150,16 +153,7 @@ good_area:
BUG();
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index a9f647b045f7..a95897a4ea76 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -448,3 +448,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index a2bfe587b491..b3fed2cecf84 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -28,6 +28,7 @@
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
+#include <linux/perf_event.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -121,6 +122,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -214,7 +217,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -230,10 +233,6 @@ good_area:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (unlikely(fault & VM_FAULT_MAJOR))
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig
index 6b471cdb16cf..e924c817f73d 100644
--- a/arch/mips/configs/cu1000-neo_defconfig
+++ b/arch/mips/configs/cu1000-neo_defconfig
@@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index bd15c759ad97..5ede2681f4e1 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -159,7 +159,7 @@
149 n32 munlockall sys_munlockall
150 n32 vhangup sys_vhangup
151 n32 pivot_root sys_pivot_root
-152 n32 _sysctl compat_sys_sysctl
+152 n32 _sysctl sys_ni_syscall
153 n32 prctl sys_prctl
154 n32 adjtimex sys_adjtimex_time32
155 n32 setrlimit compat_sys_setrlimit
@@ -381,3 +381,4 @@
440 n32 watch_mount sys_watch_mount
441 n32 watch_sb sys_watch_sb
442 n32 fsinfo sys_fsinfo
+443 n32 process_madvise compat_sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index b2b9538535e6..daa607c4afe6 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -159,7 +159,7 @@
149 n64 munlockall sys_munlockall
150 n64 vhangup sys_vhangup
151 n64 pivot_root sys_pivot_root
-152 n64 _sysctl sys_sysctl
+152 n64 _sysctl sys_ni_syscall
153 n64 prctl sys_prctl
154 n64 adjtimex sys_adjtimex
155 n64 setrlimit sys_setrlimit
@@ -357,3 +357,4 @@
440 n64 watch_mount sys_watch_mount
441 n64 watch_sb sys_watch_sb
442 n64 fsinfo sys_fsinfo
+443 n64 process_madvise sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index bea3d6e34711..0dffd81fb345 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -164,7 +164,7 @@
150 o32 unused150 sys_ni_syscall
151 o32 getsid sys_getsid
152 o32 fdatasync sys_fdatasync
-153 o32 _sysctl sys_sysctl compat_sys_sysctl
+153 o32 _sysctl sys_ni_syscall
154 o32 mlock sys_mlock
155 o32 munlock sys_munlock
156 o32 mlockall sys_mlockall
@@ -430,3 +430,4 @@
440 o32 watch_mount sys_watch_mount
441 o32 watch_sb sys_watch_sb
442 o32 fsinfo sys_fsinfo
+443 o32 process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 01b168a90434..7c871b14e74a 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -96,6 +96,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -152,12 +154,11 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
@@ -168,15 +169,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- tsk->maj_flt++;
- } else {
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- tsk->min_flt++;
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 8fb73f6401a0..f02524eb6d56 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -121,6 +121,8 @@ void do_page_fault(unsigned long entry, unsigned long addr,
if (unlikely(faulthandler_disabled() || !mm))
goto no_context;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -206,7 +208,7 @@ good_area:
* the fault.
*/
- fault = handle_mm_fault(vma, addr, flags);
+ fault = handle_mm_fault(vma, addr, flags, regs);
/*
* If we need to retry but a fatal signal is pending, handle the
@@ -228,22 +230,7 @@ good_area:
goto bad_area;
}
- /*
- * Major/minor page fault accounting is only done on the initial
- * attempt. If we go through a retry, it is extremely likely that the
- * page will be found in page cache at that point.
- */
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, addr);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 4112ef0e247e..9476feecf512 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -24,6 +24,7 @@
#include <linux/mm.h>
#include <linux/extable.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/traps.h>
@@ -83,6 +84,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
if (!mmap_read_trylock(mm)) {
if (!user_mode(regs) && !search_exception_tables(regs->ea))
goto bad_area_nosemaphore;
@@ -131,7 +134,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -146,16 +149,7 @@ good_area:
BUG();
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index d2224ccca294..ca97d9baab51 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -15,6 +15,7 @@
#include <linux/interrupt.h>
#include <linux/extable.h>
#include <linux/sched/signal.h>
+#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <asm/siginfo.h>
@@ -103,6 +104,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_interrupt() || !mm)
goto no_context;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -159,7 +162,7 @@ good_area:
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -176,10 +179,6 @@ good_area:
if (flags & FAULT_FLAG_ALLOW_RETRY) {
/*RGD modeled on Cris */
- if (fault & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 43bbdce3237f..09ac0b4aac30 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -163,7 +163,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -440,3 +440,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 66ac0719bd49..4bfe2da9fbe3 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -18,6 +18,7 @@
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
+#include <linux/perf_event.h>
#include <asm/traps.h>
@@ -281,6 +282,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
acc_type = parisc_acctyp(code, regs->iir);
if (acc_type & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma_prev(mm, address, &prev_vma);
@@ -302,7 +304,7 @@ good_area:
* fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -323,10 +325,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
/*
* No need to mmap_read_unlock(mm) as we would
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 93355266885f..df783ec47ca4 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -197,7 +197,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 nospu _sysctl sys_sysctl compat_sys_sysctl
+149 nospu _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -532,3 +532,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index b83abbead4a2..8acd00178956 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
}
ret = 0;
- *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
if (unlikely(*flt & VM_FAULT_ERROR)) {
if (*flt & VM_FAULT_OOM) {
ret = -ENOMEM;
@@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
BUG();
}
- if (*flt & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
-
out_unlock:
mmap_read_unlock(mm);
return ret;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 641fc5f3d7dd..00259e9b452d 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -607,7 +607,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
major |= fault & VM_FAULT_MAJOR;
@@ -633,14 +633,9 @@ good_area:
/*
* Major/minor page fault accounting.
*/
- if (major) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ if (major)
cmo_account_page_fault();
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
- }
+
return 0;
}
NOKPROBE_SYMBOL(__do_page_fault);
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 5873835a3e6b..716d64e36f83 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -109,7 +109,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, addr, flags);
+ fault = handle_mm_fault(vma, addr, flags, regs);
/*
* If we need to retry but a fatal signal is pending, handle the
@@ -127,21 +127,7 @@ good_area:
BUG();
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, addr);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 621085153509..51c8a704a119 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl - -
150 common mlock sys_mlock sys_mlock
151 common munlock sys_munlock sys_munlock
152 common mlockall sys_mlockall sys_mlockall
@@ -445,3 +445,4 @@
440 common watch_mount sys_watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 1608fd99bbee..2f177298c663 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2768,7 +2768,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
struct page *page = NULL;
mmap_read_lock(kvm->mm);
- get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
+ get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
&page, NULL, NULL);
mmap_read_unlock(kvm->mm);
return page;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 66da278a67fb..6b74b92c1a58 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r) {
- r = fixup_user_fault(current, current->mm, hva,
+ r = fixup_user_fault(current->mm, hva,
FAULT_FLAG_WRITE, &unlocked);
if (r)
break;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2f721a923b54..cd74989ce0b0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -273,7 +273,7 @@ retry:
rc = get_guest_storage_key(current->mm, vmaddr, &key);
if (rc) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
mmap_read_unlock(current->mm);
@@ -319,7 +319,7 @@ retry:
mmap_read_lock(current->mm);
rc = reset_guest_reference_bit(current->mm, vmaddr);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
mmap_read_unlock(current->mm);
@@ -390,7 +390,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
m3 & SSKE_MC);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
@@ -1094,7 +1094,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index d53c2e2ea1fd..9aa201df2e94 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -478,7 +478,7 @@ retry:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;
if (flags & FAULT_FLAG_RETRY_NOWAIT)
@@ -488,21 +488,7 @@ retry:
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
if (fault & VM_FAULT_RETRY) {
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 190357ff86b3..8747487c50a8 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -649,7 +649,7 @@ retry:
rc = vmaddr;
goto out_up;
}
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
&unlocked)) {
rc = -EFAULT;
goto out_up;
@@ -879,7 +879,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
BUG_ON(gmap_is_shadow(gmap));
fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
- if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
+ if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
return -EFAULT;
if (unlocked)
/* lost mmap_lock, caller has to retry __gmap_translate */
diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig
index ae067e0b15e3..6a82c7b8ff32 100644
--- a/arch/sh/configs/dreamcast_defconfig
+++ b/arch/sh/configs/dreamcast_defconfig
@@ -1,7 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index a5b865a75d22..9a988c347e9d 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index a92db6694ce2..70e6605d7f7e 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -3,7 +3,6 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7709=y
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index 567af752b1bb..ba6ec042606f 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -1,6 +1,5 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index 10f6d371ce2c..05e4ac6fed5f 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -1,6 +1,5 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig
index ed84d1303acf..c65667d00313 100644
--- a/arch/sh/configs/microdev_defconfig
+++ b/arch/sh/configs/microdev_defconfig
@@ -2,7 +2,6 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH4_202=y
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index 37e9521a99e5..a24cf8cd2cea 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -4,7 +4,6 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index c97ec60cff27..e922659fdadb 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
CONFIG_SLAB=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index 55fce65eb454..5978866358ec 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -7,7 +7,6 @@ CONFIG_RCU_TRACE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig
index 6a3cfe08295f..fc9c22152b08 100644
--- a/arch/sh/configs/rts7751r2d1_defconfig
+++ b/arch/sh/configs/rts7751r2d1_defconfig
@@ -1,7 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index 2b3d7d280672..ff3fd6787fd6 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -1,7 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index 21a43f14ffac..ff5bb4489922 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -18,7 +18,6 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_ALL=y
# CONFIG_ELF_CORE is not set
# CONFIG_COMPAT_BRK is not set
diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig
index 4e794e719a28..5d6c19338ebf 100644
--- a/arch/sh/configs/se7343_defconfig
+++ b/arch/sh/configs/se7343_defconfig
@@ -2,7 +2,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index 3264415a5931..71a672c30716 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -1,7 +1,6 @@
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_HOTPLUG is not set
# CONFIG_ELF_CORE is not set
diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig
index 4496b94b7d88..ed00a6eeadf5 100644
--- a/arch/sh/configs/se7705_defconfig
+++ b/arch/sh/configs/se7705_defconfig
@@ -2,7 +2,6 @@
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig
index b23f67542728..3f1c13799d79 100644
--- a/arch/sh/configs/se7750_defconfig
+++ b/arch/sh/configs/se7750_defconfig
@@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 162343683937..4a024065bb75 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig
index 360592d63a2f..8422599cfb04 100644
--- a/arch/sh/configs/secureedge5410_defconfig
+++ b/arch/sh/configs/secureedge5410_defconfig
@@ -1,7 +1,6 @@
# CONFIG_SWAP is not set
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 87db9a84b5ec..f0073ed39947 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -3,7 +3,6 @@ CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig
index 08426913c0e3..0d814770b07f 100644
--- a/arch/sh/configs/sh7710voipgw_defconfig
+++ b/arch/sh/configs/sh7710voipgw_defconfig
@@ -2,7 +2,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig
index d0933a9b9799..a2700ab165af 100644
--- a/arch/sh/configs/sh7757lcr_defconfig
+++ b/arch/sh/configs/sh7757lcr_defconfig
@@ -8,7 +8,6 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index d0a0aa74cecf..26c5fd02c87a 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index a27b129b93c5..c0b6f40d01cc 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -1,7 +1,6 @@
# CONFIG_SWAP is not set
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_HOTPLUG is not set
# CONFIG_BUG is not set
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index 4ec961ace688..ba887f1351be 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -6,7 +6,6 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=16
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h
index 3558b1d7123e..be3f9a08cbc9 100644
--- a/arch/sh/include/asm/string_32.h
+++ b/arch/sh/include/asm/string_32.h
@@ -28,32 +28,6 @@ static inline char *strcpy(char *__dest, const char *__src)
return __xdest;
}
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *__dest, const char *__src, size_t __n)
-{
- register char *__xdest = __dest;
- unsigned long __dummy;
-
- if (__n == 0)
- return __xdest;
-
- __asm__ __volatile__(
- "1:\n"
- "mov.b @%1+, %2\n\t"
- "mov.b %2, @%0\n\t"
- "cmp/eq #0, %2\n\t"
- "bt/s 2f\n\t"
- " cmp/eq %5,%1\n\t"
- "bf/s 1b\n\t"
- " add #1, %0\n"
- "2:"
- : "=r" (__dest), "=r" (__src), "=&z" (__dummy)
- : "0" (__dest), "1" (__src), "r" (__src+__n)
- : "memory", "t");
-
- return __xdest;
-}
-
#define __HAVE_ARCH_STRCMP
static inline int strcmp(const char *__cs, const char *__ct)
{
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 3a1230ce4878..792539111ed8 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -445,3 +445,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index 540e670dbafc..cccab21f3e56 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -42,6 +42,7 @@ inline void __const_udelay(unsigned long xloops)
: "macl", "mach");
__delay(++xloops);
}
+EXPORT_SYMBOL(__delay);
void __udelay(unsigned long usecs)
{
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index fbe1f2fe9a8c..482668a2f6d3 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -482,22 +482,13 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
if (mm_fault_error(regs, error_code, address, fault))
return;
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b98abce16ec4..4f8eebfcd07e 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -300,7 +300,7 @@
249 64 nanosleep sys_nanosleep
250 32 mremap sys_mremap
250 64 mremap sys_64_mremap
-251 common _sysctl sys_sysctl compat_sys_sysctl
+251 common _sysctl sys_ni_syscall
252 common getsid sys_getsid
253 common fdatasync sys_fdatasync
254 32 nfsservctl sys_ni_syscall sys_nis_syscall
@@ -488,3 +488,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index cfef656eda0f..8071bfd72349 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -234,7 +234,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -250,15 +250,6 @@ good_area:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, address);
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, address);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
@@ -410,7 +401,7 @@ good_area:
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
- switch (handle_mm_fault(vma, address, flags)) {
+ switch (handle_mm_fault(vma, address, flags, NULL)) {
case VM_FAULT_SIGBUS:
case VM_FAULT_OOM:
goto do_sigbus;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index a3806614e4dc..0a6bcc85fba7 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -422,7 +422,7 @@ good_area:
goto bad_area;
}
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
goto exit_exception;
@@ -438,15 +438,6 @@ good_area:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, address);
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, address);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 2b3afa354a90..ad12f78bda7e 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -71,7 +71,7 @@ good_area:
do {
vm_fault_t fault;
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, NULL);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
goto out_nosemaphore;
@@ -88,10 +88,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ea7c4aaa7043..29e49a70c99e 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -160,7 +160,7 @@
146 i386 writev sys_writev compat_sys_writev
147 i386 getsid sys_getsid
148 i386 fdatasync sys_fdatasync
-149 i386 _sysctl sys_sysctl compat_sys_sysctl
+149 i386 _sysctl sys_ni_syscall
150 i386 mlock sys_mlock
151 i386 munlock sys_munlock
152 i386 mlockall sys_mlockall
@@ -447,3 +447,4 @@
440 i386 watch_mount sys_watch_mount
441 i386 watch_sb sys_watch_sb
442 i386 fsinfo sys_fsinfo
+443 i386 process_madvise sys_process_madvise compat_sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 8756b396e3be..8f959d90338a 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
153 common vhangup sys_vhangup
154 common modify_ldt sys_modify_ldt
155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
+156 64 _sysctl sys_ni_syscall
157 common prctl sys_prctl
158 common arch_prctl sys_arch_prctl
159 common adjtimex sys_adjtimex
@@ -364,6 +364,7 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 64 process_madvise sys_process_madvise
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -407,3 +408,4 @@
545 x32 execveat compat_sys_execveat
546 x32 preadv2 compat_sys_preadv64v2
547 x32 pwritev2 compat_sys_pwritev64v2
+548 x32 process_madvise compat_sys_process_madvise
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5709248f66af..877874a26597 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1139,7 +1139,7 @@ void do_user_addr_fault(struct pt_regs *regs,
struct vm_area_struct *vma;
struct task_struct *tsk;
struct mm_struct *mm;
- vm_fault_t fault, major = 0;
+ vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
tsk = current;
@@ -1291,8 +1291,7 @@ good_area:
* userland). The return to userland is identified whenever
* FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
*/
- fault = handle_mm_fault(vma, address, flags);
- major |= fault & VM_FAULT_MAJOR;
+ fault = handle_mm_fault(vma, address, flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -1319,18 +1318,6 @@ good_area:
return;
}
- /*
- * Major/minor page fault accounting. If any of the events
- * returned VM_FAULT_MAJOR, we account it as a major fault.
- */
- if (major) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
- }
-
check_v8086_mode(regs, address, tsk);
}
NOKPROBE_SYMBOL(do_user_addr_fault);
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 216bf20f9fa4..173bd27f61dd 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -222,7 +222,7 @@
204 common quotactl sys_quotactl
# 205 was old nfsservctl
205 common nfsservctl sys_ni_syscall
-206 common _sysctl sys_sysctl
+206 common _sysctl sys_ni_syscall
207 common bdflush sys_bdflush
208 common uname sys_newuname
209 common sysinfo sys_sysinfo
@@ -413,3 +413,4 @@
440 common watch_mount sys_watch_mount
441 common watch_sb sys_watch_sb
442 common fsinfo sys_fsinfo
+443 common process_madvise sys_process_madvise
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index c128dcc7c85b..7666408ce12a 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -72,6 +72,9 @@ void do_page_fault(struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -107,7 +110,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -122,10 +125,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
@@ -139,12 +138,6 @@ good_area:
}
mmap_read_unlock(mm);
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- if (flags & VM_FAULT_MAJOR)
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
- else
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-
return;
/* Something tried to access memory that isn't in our memory map..
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index e946032b13e4..2c2bf24140c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -469,7 +469,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
locked = 1;
}
ret = pin_user_pages_remote
- (work->task, mm,
+ (mm,
obj->userptr.ptr + pinned * PAGE_SIZE,
npages - pinned,
flags,
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 5e32f61a2fe4..cc6b4befde7c 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -439,7 +439,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
* complex (and doesn't gain us much performance in most use
* cases).
*/
- npages = get_user_pages_remote(owning_process, owning_mm,
+ npages = get_user_pages_remote(owning_mm,
user_virt, gup_num_pages,
flags, local_page_list, NULL, NULL);
mmap_read_unlock(owning_mm);
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index e4b025c5637c..c259108ab6dd 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -495,7 +495,7 @@ static void do_fault(struct work_struct *work)
if (access_error(vma, fault))
goto out;
- ret = handle_mm_fault(vma, address, flags);
+ ret = handle_mm_fault(vma, address, flags, NULL);
out:
mmap_read_unlock(mm);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 6c87c807a0ab..5ae59a6ad681 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -872,7 +872,8 @@ static irqreturn_t prq_event_thread(int irq, void *d)
goto invalid;
ret = handle_mm_fault(vma, address,
- req->wr_req ? FAULT_FLAG_WRITE : 0);
+ req->wr_req ? FAULT_FLAG_WRITE : 0,
+ NULL);
if (ret & VM_FAULT_ERROR)
goto invalid;
diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c
index eeb028b9cdb3..a5cacfe24a42 100644
--- a/drivers/sh/clk/cpg.c
+++ b/drivers/sh/clk/cpg.c
@@ -36,36 +36,21 @@ static void sh_clk_write(int value, struct clk *clk)
iowrite32(value, clk->mapped_reg);
}
-static unsigned int r8(const void __iomem *addr)
-{
- return ioread8(addr);
-}
-
-static unsigned int r16(const void __iomem *addr)
-{
- return ioread16(addr);
-}
-
-static unsigned int r32(const void __iomem *addr)
-{
- return ioread32(addr);
-}
-
static int sh_clk_mstp_enable(struct clk *clk)
{
sh_clk_write(sh_clk_read(clk) & ~(1 << clk->enable_bit), clk);
if (clk->status_reg) {
- unsigned int (*read)(const void __iomem *addr);
+ unsigned int (*read)(void __iomem *addr);
int i;
void __iomem *mapped_status = (phys_addr_t)clk->status_reg -
(phys_addr_t)clk->enable_reg + clk->mapped_reg;
if (clk->flags & CLK_ENABLE_REG_8BIT)
- read = r8;
+ read = ioread8;
else if (clk->flags & CLK_ENABLE_REG_16BIT)
- read = r16;
+ read = ioread16;
else
- read = r32;
+ read = ioread32;
for (i = 1000;
(read(mapped_status) & (1 << clk->enable_bit)) && i;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5e556ac9102a..9d41105bfd01 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -425,7 +425,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret) {
bool unlocked = false;
- ret = fixup_user_fault(NULL, mm, vaddr,
+ ret = fixup_user_fault(mm, vaddr,
FAULT_FLAG_REMOTE |
(write_fault ? FAULT_FLAG_WRITE : 0),
&unlocked);
@@ -453,7 +453,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
flags |= FOLL_WRITE;
mmap_read_lock(mm);
- ret = pin_user_pages_remote(NULL, mm, vaddr, 1, flags | FOLL_LONGTERM,
+ ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM,
page, NULL, NULL);
if (ret == 1) {
*pfn = page_to_pfn(page[0]);
diff --git a/fs/exec.c b/fs/exec.c
index e93baf5da1d2..dc35fe2f6b09 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -217,7 +217,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
* We are doing an exec(). 'current' is the process
* doing the exec and bprm->mm is the new process's mm.
*/
- ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
+ ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
&page, NULL, NULL);
if (ret <= 0)
return NULL;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ae83c8dcd5d5..f81d36d78594 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3711,7 +3711,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock)
if (force_nonblock)
return -EAGAIN;
- ret = do_madvise(ma->addr, ma->len, ma->advice);
+ ret = do_madvise(NULL, current->mm, ma->addr, ma->len, ma->advice);
if (ret < 0)
req_set_fail_links(req);
io_req_complete(req, ret);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 62febf06a1b2..5f96b76e5784 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -393,6 +393,7 @@
*/
#ifndef RO_AFTER_INIT_DATA
#define RO_AFTER_INIT_DATA \
+ . = ALIGN(8); \
__start_ro_after_init = .; \
*(.data..ro_after_init) \
JUMP_TABLE_DATA \
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e90100c0de72..4b48b6c49637 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -827,6 +827,10 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd,
unsigned long vlen, loff_t pos, rwf_t flags);
#endif
+asmlinkage ssize_t compat_sys_process_madvise(compat_int_t pidfd,
+ const struct compat_iovec __user *vec,
+ compat_ulong_t vlen, compat_int_t behavior,
+ compat_uint_t flags);
/*
* Deprecated system calls which are still defined in
@@ -855,7 +859,6 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32);
asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len,
unsigned flags);
-asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args);
/* obsolete: fs/readdir.c */
asmlinkage long compat_sys_old_readdir(unsigned int fd,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 50650d0d01b9..6b9508dc31b4 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -10,6 +10,7 @@
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/pgtable.h>
+#include <linux/gfp.h>
struct ctl_table;
struct user_struct;
@@ -504,9 +505,8 @@ struct huge_bootmem_page {
struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_node(struct hstate *h, int nid);
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask);
+ nodemask_t *nmask, gfp_t gfp_mask);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
@@ -692,6 +692,25 @@ static inline bool hugepage_movable_supported(struct hstate *h)
return true;
}
+/* Movability of hugepages depends on migration support. */
+static inline gfp_t htlb_alloc_mask(struct hstate *h)
+{
+ if (hugepage_movable_supported(h))
+ return GFP_HIGHUSER_MOVABLE;
+ else
+ return GFP_HIGHUSER;
+}
+
+static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
+{
+ gfp_t modified_mask = htlb_alloc_mask(h);
+
+ /* Some callers might want to enfoce node */
+ modified_mask |= (gfp_mask & __GFP_THISNODE);
+
+ return modified_mask;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
@@ -759,13 +778,9 @@ static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
return NULL;
}
-static inline struct page *alloc_huge_page_node(struct hstate *h, int nid)
-{
- return NULL;
-}
-
static inline struct page *
-alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask)
+alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask)
{
return NULL;
}
@@ -878,6 +893,16 @@ static inline bool hugepage_movable_supported(struct hstate *h)
return false;
}
+static inline gfp_t htlb_alloc_mask(struct hstate *h)
+{
+ return 0;
+}
+
+static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
+{
+ return 0;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 16b59f70a457..7f64e4c3ce42 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -630,7 +630,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
struct mem_cgroup_per_node *mz;
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- return mz->lru_zone_size[zone_idx][lru];
+ return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
}
void mem_cgroup_handle_over_high(void);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3e546cbf03dd..cc56f0d6c95b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -10,6 +10,8 @@
typedef struct page *new_page_t(struct page *page, unsigned long private);
typedef void free_page_t(struct page *page, unsigned long private);
+struct migration_target_control;
+
/*
* Return values from addresss_space_operations.migratepage():
* - negative errno on page migration failure;
@@ -31,34 +33,6 @@ enum migrate_reason {
/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
extern const char *migrate_reason_names[MR_TYPES];
-static inline struct page *new_page_nodemask(struct page *page,
- int preferred_nid, nodemask_t *nodemask)
-{
- gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
- unsigned int order = 0;
- struct page *new_page = NULL;
-
- if (PageHuge(page))
- return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
- preferred_nid, nodemask);
-
- if (PageTransHuge(page)) {
- gfp_mask |= GFP_TRANSHUGE;
- order = HPAGE_PMD_ORDER;
- }
-
- if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
- gfp_mask |= __GFP_HIGHMEM;
-
- new_page = __alloc_pages_nodemask(gfp_mask, order,
- preferred_nid, nodemask);
-
- if (new_page && PageTransHuge(new_page))
- prep_transhuge_page(new_page);
-
- return new_page;
-}
-
#ifdef CONFIG_MIGRATION
extern void putback_movable_pages(struct list_head *l);
@@ -67,6 +41,7 @@ extern int migrate_page(struct address_space *mapping,
enum migrate_mode mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
unsigned long private, enum migrate_mode mode, int reason);
+extern struct page *alloc_migration_target(struct page *page, unsigned long private);
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
@@ -85,6 +60,9 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
free_page_t free, unsigned long private, enum migrate_mode mode,
int reason)
{ return -ENOSYS; }
+static inline struct page *alloc_migration_target(struct page *page,
+ unsigned long private)
+ { return NULL; }
static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
{ return -EBUSY; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8098637889ea..9e3217cf6b3a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -39,6 +39,7 @@ struct file_ra_state;
struct user_struct;
struct writeback_control;
struct bdi_writeback;
+struct pt_regs;
void init_mm_internals(void);
@@ -1066,6 +1067,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
static inline enum zone_type page_zonenum(const struct page *page)
{
+ ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
@@ -1658,8 +1660,9 @@ int invalidate_inode_page(struct page *page);
#ifdef CONFIG_MMU
extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags);
-extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long address, unsigned int flags,
+ struct pt_regs *regs);
+extern int fixup_user_fault(struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked);
void unmap_mapping_pages(struct address_space *mapping,
@@ -1668,14 +1671,14 @@ void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
#else
static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags)
+ unsigned long address, unsigned int flags,
+ struct pt_regs *regs)
{
/* should never happen if there's no MMU */
BUG();
return VM_FAULT_SIGBUS;
}
-static inline int fixup_user_fault(struct task_struct *tsk,
- struct mm_struct *mm, unsigned long address,
+static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
unsigned int fault_flags, bool *unlocked)
{
/* should never happen if there's no MMU */
@@ -1701,11 +1704,11 @@ extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags);
-long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked);
-long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long pin_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked);
@@ -2546,7 +2549,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf, bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
-extern int do_madvise(unsigned long start, size_t len_in, int behavior);
+extern int do_madvise(struct task_struct *target_task, struct mm_struct *mm,
+ unsigned long start, size_t len_in, int behavior);
static inline unsigned long
do_mmap_pgoff(struct file *file, unsigned long addr,
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 176d6cf80e7c..86e0e7cb7872 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -77,6 +77,7 @@ extern const struct file_operations pidfd_fops;
struct file;
extern struct pid *pidfd_pid(const struct file *file);
+struct pid *pidfd_get_pid(unsigned int fd);
static inline struct pid *get_pid(struct pid *pid)
{
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ef60a4f05e0..efa61eb116b4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -48,7 +48,6 @@ struct statfs;
struct statfs64;
struct statx;
struct fsinfo_params;
-struct __sysctl_args;
struct sysinfo;
struct timespec;
struct __kernel_old_timeval;
@@ -881,6 +880,8 @@ asmlinkage long sys_munlockall(void);
asmlinkage long sys_mincore(unsigned long start, size_t len,
unsigned char __user * vec);
asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
+asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
+ unsigned long vlen, int behavior, unsigned int flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
@@ -1125,7 +1126,6 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned);
asmlinkage long sys_bdflush(int func, long data);
asmlinkage long sys_oldumount(char __user *name);
asmlinkage long sys_uselib(const char __user *library);
-asmlinkage long sys_sysctl(struct __sysctl_args __user *args);
asmlinkage long sys_sysfs(int option,
unsigned long arg1, unsigned long arg2);
asmlinkage long sys_fork(void);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 50bb7f383a1b..51298a4f4623 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -74,15 +74,13 @@ int proc_do_static_key(struct ctl_table *table, int write, void *buffer,
* sysctl names can be mirrored automatically under /proc/sys. The
* procname supplied controls /proc naming.
*
- * The table's mode will be honoured both for sys_sysctl(2) and
- * proc-fs access.
+ * The table's mode will be honoured for proc-fs access.
*
* Leaf nodes in the sysctl tree will be represented by a single file
* under /proc; non-leaf nodes will be represented by directories. A
* null procname disables /proc mirroring at this node.
*
- * sysctl(2) can automatically manage read and write requests through
- * the sysctl table. The data and maxlen fields of the ctl_table
+ * The data and maxlen fields of the ctl_table
* struct enable minimal validation of the values being written to be
* performed, and the mode field allows minimal authentication.
*
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 25b1bdfb3e97..ed180cbf7118 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -865,9 +865,11 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount)
__SYSCALL(__NR_watch_sb, sys_watch_sb)
#define __NR_fsinfo 442
__SYSCALL(__NR_fsinfo, sys_fsinfo)
+#define __NR_process_madvise 443
+__SC_COMP(__NR_process_madvise, sys_process_madvise, compat_sys_process_madvise)
#undef __NR_syscalls
-#define __NR_syscalls 443
+#define __NR_syscalls 444
/*
* 32 bit systems traditionally used different
diff --git a/kernel/Makefile b/kernel/Makefile
index 71b6bbccdea6..9abd0529d97c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,7 +5,7 @@
obj-y = fork.o exec_domain.o panic.o \
cpu.o exit.o softirq.o resource.o \
- sysctl.o sysctl_binary.o capability.o ptrace.o user.o \
+ sysctl.o capability.o ptrace.o user.o \
signal.o sys.o umh.o workqueue.o pid.o task_work.o \
extable.o params.o \
kthread.o sys_ni.o nsproxy.o \
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e84eb52b646b..f500204eb70d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -376,7 +376,7 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
if (!vaddr || !d)
return -EINVAL;
- ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+ ret = get_user_pages_remote(mm, vaddr, 1,
FOLL_WRITE, &page, &vma, NULL);
if (unlikely(ret <= 0)) {
/*
@@ -477,7 +477,7 @@ retry:
if (is_register)
gup_flags |= FOLL_SPLIT_PMD;
/* Read the page with vaddr into memory */
- ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags,
+ ret = get_user_pages_remote(mm, vaddr, 1, gup_flags,
&old_page, &vma, NULL);
if (ret <= 0)
return ret;
@@ -2029,7 +2029,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
* but we treat this as a 'remote' access since it is
* essentially a kernel access to the memory.
*/
- result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
+ result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page,
NULL, NULL);
if (result < 0)
return result;
diff --git a/kernel/exit.c b/kernel/exit.c
index c2d2961576f2..e45091eae224 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1474,23 +1474,6 @@ end:
return retval;
}
-static struct pid *pidfd_get_pid(unsigned int fd)
-{
- struct fd f;
- struct pid *pid;
-
- f = fdget(fd);
- if (!f.file)
- return ERR_PTR(-EBADF);
-
- pid = pidfd_pid(f.file);
- if (!IS_ERR(pid))
- get_pid(pid);
-
- fdput(f);
- return pid;
-}
-
static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
int options, struct rusage *ru)
{
diff --git a/kernel/futex.c b/kernel/futex.c
index 05e88562de68..d024fcef62e8 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -699,7 +699,7 @@ static int fault_in_user_writeable(u32 __user *uaddr)
int ret;
mmap_read_lock(mm);
- ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
+ ret = fixup_user_fault(mm, (unsigned long)uaddr,
FAULT_FLAG_WRITE, NULL);
mmap_read_unlock(mm);
diff --git a/kernel/pid.c b/kernel/pid.c
index da5aea5f04fa..de9d29c41d77 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -519,6 +519,23 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return idr_get_next(&ns->idr, &nr);
}
+struct pid *pidfd_get_pid(unsigned int fd)
+{
+ struct fd f;
+ struct pid *pid;
+
+ f = fdget(fd);
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ pid = pidfd_pid(f.file);
+ if (!IS_ERR(pid))
+ get_pid(pid);
+
+ fdput(f);
+ return pid;
+}
+
/**
* pidfd_create() - Create a new pid file descriptor.
*
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 519317f3904c..c935c1819ba3 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -287,6 +287,8 @@ COND_SYSCALL(mlockall);
COND_SYSCALL(munlockall);
COND_SYSCALL(mincore);
COND_SYSCALL(madvise);
+COND_SYSCALL(process_madvise);
+COND_SYSCALL_COMPAT(process_madvise);
COND_SYSCALL(remap_file_pages);
COND_SYSCALL(mbind);
COND_SYSCALL_COMPAT(mbind);
@@ -371,7 +373,6 @@ COND_SYSCALL(socketcall);
COND_SYSCALL_COMPAT(socketcall);
/* compat syscalls for arm64, x86, ... */
-COND_SYSCALL_COMPAT(sysctl);
COND_SYSCALL_COMPAT(fanotify_mark);
/* x86 */
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
deleted file mode 100644
index 7d550cc76a3b..000000000000
--- a/kernel/sysctl_binary.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include "../fs/xfs/xfs_sysctl.h"
-#include <linux/sunrpc/debug.h>
-#include <linux/string.h>
-#include <linux/syscalls.h>
-#include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/fs.h>
-#include <linux/nsproxy.h>
-#include <linux/pid_namespace.h>
-#include <linux/file.h>
-#include <linux/ctype.h>
-#include <linux/netdevice.h>
-#include <linux/kernel.h>
-#include <linux/uuid.h>
-#include <linux/slab.h>
-#include <linux/compat.h>
-
-static ssize_t binary_sysctl(const int *name, int nlen,
- void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
-static void deprecated_sysctl_warning(const int *name, int nlen)
-{
- int i;
-
- /*
- * CTL_KERN/KERN_VERSION is used by older glibc and cannot
- * ever go away.
- */
- if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
- return;
-
- if (printk_ratelimit()) {
- printk(KERN_INFO
- "warning: process `%s' used the deprecated sysctl "
- "system call with ", current->comm);
- for (i = 0; i < nlen; i++)
- printk(KERN_CONT "%d.", name[i]);
- printk(KERN_CONT "\n");
- }
- return;
-}
-
-#define WARN_ONCE_HASH_BITS 8
-#define WARN_ONCE_HASH_SIZE (1<<WARN_ONCE_HASH_BITS)
-
-static DECLARE_BITMAP(warn_once_bitmap, WARN_ONCE_HASH_SIZE);
-
-#define FNV32_OFFSET 2166136261U
-#define FNV32_PRIME 0x01000193
-
-/*
- * Print each legacy sysctl (approximately) only once.
- * To avoid making the tables non-const use a external
- * hash-table instead.
- * Worst case hash collision: 6, but very rarely.
- * NOTE! We don't use the SMP-safe bit tests. We simply
- * don't care enough.
- */
-static void warn_on_bintable(const int *name, int nlen)
-{
- int i;
- u32 hash = FNV32_OFFSET;
-
- for (i = 0; i < nlen; i++)
- hash = (hash ^ name[i]) * FNV32_PRIME;
- hash %= WARN_ONCE_HASH_SIZE;
- if (__test_and_set_bit(hash, warn_once_bitmap))
- return;
- deprecated_sysctl_warning(name, nlen);
-}
-
-static ssize_t do_sysctl(int __user *args_name, int nlen,
- void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
-{
- int name[CTL_MAXNAME];
- int i;
-
- /* Check args->nlen. */
- if (nlen < 0 || nlen > CTL_MAXNAME)
- return -ENOTDIR;
- /* Read in the sysctl name for simplicity */
- for (i = 0; i < nlen; i++)
- if (get_user(name[i], args_name + i))
- return -EFAULT;
-
- warn_on_bintable(name, nlen);
-
- return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
-}
-
-SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
-{
- struct __sysctl_args tmp;
- size_t oldlen = 0;
- ssize_t result;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && !tmp.oldlenp)
- return -EFAULT;
-
- if (tmp.oldlenp && get_user(oldlen, tmp.oldlenp))
- return -EFAULT;
-
- result = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, oldlen,
- tmp.newval, tmp.newlen);
-
- if (result >= 0) {
- oldlen = result;
- result = 0;
- }
-
- if (tmp.oldlenp && put_user(oldlen, tmp.oldlenp))
- return -EFAULT;
-
- return result;
-}
-
-
-#ifdef CONFIG_COMPAT
-
-struct compat_sysctl_args {
- compat_uptr_t name;
- int nlen;
- compat_uptr_t oldval;
- compat_uptr_t oldlenp;
- compat_uptr_t newval;
- compat_size_t newlen;
- compat_ulong_t __unused[4];
-};
-
-COMPAT_SYSCALL_DEFINE1(sysctl, struct compat_sysctl_args __user *, args)
-{
- struct compat_sysctl_args tmp;
- compat_size_t __user *compat_oldlenp;
- size_t oldlen = 0;
- ssize_t result;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && !tmp.oldlenp)
- return -EFAULT;
-
- compat_oldlenp = compat_ptr(tmp.oldlenp);
- if (compat_oldlenp && get_user(oldlen, compat_oldlenp))
- return -EFAULT;
-
- result = do_sysctl(compat_ptr(tmp.name), tmp.nlen,
- compat_ptr(tmp.oldval), oldlen,
- compat_ptr(tmp.newval), tmp.newlen);
-
- if (result >= 0) {
- oldlen = result;
- result = 0;
- }
-
- if (compat_oldlenp && put_user(oldlen, compat_oldlenp))
- return -EFAULT;
-
- return result;
-}
-
-#endif /* CONFIG_COMPAT */
diff --git a/mm/filemap.c b/mm/filemap.c
index 9d43acdba8aa..cf4bdf7803b6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2425,6 +2425,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
struct address_space *mapping = file->f_mapping;
struct file *fpin = NULL;
pgoff_t offset = vmf->pgoff;
+ unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ)
@@ -2440,14 +2441,15 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
}
/* Avoid banging the cache line if not needed */
- if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
- ra->mmap_miss++;
+ mmap_miss = READ_ONCE(ra->mmap_miss);
+ if (mmap_miss < MMAP_LOTSAMISS * 10)
+ WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
/*
* Do we miss much more than hit in this file? If so,
* stop bothering with read-ahead. It will only hurt.
*/
- if (ra->mmap_miss > MMAP_LOTSAMISS)
+ if (mmap_miss > MMAP_LOTSAMISS)
return fpin;
/*
@@ -2473,13 +2475,15 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
struct file_ra_state *ra = &file->f_ra;
struct address_space *mapping = file->f_mapping;
struct file *fpin = NULL;
+ unsigned int mmap_miss;
pgoff_t offset = vmf->pgoff;
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
return fpin;
- if (ra->mmap_miss > 0)
- ra->mmap_miss--;
+ mmap_miss = READ_ONCE(ra->mmap_miss);
+ if (mmap_miss)
+ WRITE_ONCE(ra->mmap_miss, --mmap_miss);
if (PageReadahead(page)) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
page_cache_async_readahead(mapping, ra, file,
@@ -2645,6 +2649,7 @@ void filemap_map_pages(struct vm_fault *vmf,
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct page *page;
+ unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
rcu_read_lock();
xas_for_each(&xas, page, end_pgoff) {
@@ -2681,8 +2686,8 @@ void filemap_map_pages(struct vm_fault *vmf,
if (page->index >= max_idx)
goto unlock;
- if (file->f_ra.mmap_miss > 0)
- file->f_ra.mmap_miss--;
+ if (mmap_miss > 0)
+ mmap_miss--;
vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
if (vmf->pte)
@@ -2702,6 +2707,7 @@ next:
break;
}
rcu_read_unlock();
+ WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
}
EXPORT_SYMBOL(filemap_map_pages);
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 9d977b1fc016..2183a56c7874 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -61,16 +61,16 @@ static u64 frontswap_failed_stores;
static u64 frontswap_invalidates;
static inline void inc_frontswap_loads(void) {
- frontswap_loads++;
+ data_race(frontswap_loads++);
}
static inline void inc_frontswap_succ_stores(void) {
- frontswap_succ_stores++;
+ data_race(frontswap_succ_stores++);
}
static inline void inc_frontswap_failed_stores(void) {
- frontswap_failed_stores++;
+ data_race(frontswap_failed_stores++);
}
static inline void inc_frontswap_invalidates(void) {
- frontswap_invalidates++;
+ data_race(frontswap_invalidates++);
}
#else
static inline void inc_frontswap_loads(void) { }
diff --git a/mm/gup.c b/mm/gup.c
index 5daadae475ea..2cc5eba44362 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -859,7 +859,7 @@ unmap:
* does not include FOLL_NOWAIT, the mmap_lock may be released. If it
* is, *@locked will be set to 0 and -EBUSY returned.
*/
-static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
+static int faultin_page(struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *locked)
{
unsigned int fault_flags = 0;
@@ -884,7 +884,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
fault_flags |= FAULT_FLAG_TRIED;
}
- ret = handle_mm_fault(vma, address, fault_flags);
+ ret = handle_mm_fault(vma, address, fault_flags, NULL);
if (ret & VM_FAULT_ERROR) {
int err = vm_fault_to_errno(ret, *flags);
@@ -893,13 +893,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
BUG();
}
- if (tsk) {
- if (ret & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- }
-
if (ret & VM_FAULT_RETRY) {
if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
*locked = 0;
@@ -969,7 +962,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
/**
* __get_user_pages() - pin user pages in memory
- * @tsk: task_struct of target task
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
@@ -1028,7 +1020,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* instead of __get_user_pages. __get_user_pages should be used only if
* you need some special @gup_flags.
*/
-static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1110,8 +1102,7 @@ retry:
page = follow_page_mask(vma, start, foll_flags, &ctx);
if (!page) {
- ret = faultin_page(tsk, vma, start, &foll_flags,
- locked);
+ ret = faultin_page(vma, start, &foll_flags, locked);
switch (ret) {
case 0:
goto retry;
@@ -1185,8 +1176,6 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
/**
* fixup_user_fault() - manually resolve a user page fault
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @address: user address
* @fault_flags:flags to pass down to handle_mm_fault()
@@ -1214,7 +1203,7 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
* This function will not return with an unlocked mmap_lock. So it has not the
* same semantics wrt the @mm->mmap_lock as does filemap_fault().
*/
-int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
+int fixup_user_fault(struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked)
{
@@ -1238,7 +1227,7 @@ retry:
fatal_signal_pending(current))
return -EINTR;
- ret = handle_mm_fault(vma, address, fault_flags);
+ ret = handle_mm_fault(vma, address, fault_flags, NULL);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
int err = vm_fault_to_errno(ret, 0);
@@ -1255,12 +1244,6 @@ retry:
goto retry;
}
- if (tsk) {
- if (major)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- }
return 0;
}
EXPORT_SYMBOL_GPL(fixup_user_fault);
@@ -1269,8 +1252,7 @@ EXPORT_SYMBOL_GPL(fixup_user_fault);
* Please note that this function, unlike __get_user_pages will not
* return 0 for nr_pages > 0 without FOLL_NOWAIT
*/
-static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
- struct mm_struct *mm,
+static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1303,7 +1285,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
pages_done = 0;
lock_dropped = false;
for (;;) {
- ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
+ ret = __get_user_pages(mm, start, nr_pages, flags, pages,
vmas, locked);
if (!locked)
/* VM_FAULT_RETRY couldn't trigger, bypass */
@@ -1363,7 +1345,7 @@ retry:
}
*locked = 1;
- ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
+ ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
pages, NULL, locked);
if (!*locked) {
/* Continue to retry until we succeeded */
@@ -1449,7 +1431,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
* We made sure addr is within a VMA, so the following will
* not result in a stack expansion that recurses back here.
*/
- return __get_user_pages(current, mm, start, nr_pages, gup_flags,
+ return __get_user_pages(mm, start, nr_pages, gup_flags,
NULL, NULL, locked);
}
@@ -1533,7 +1515,7 @@ struct page *get_dump_page(unsigned long addr)
struct vm_area_struct *vma;
struct page *page;
- if (__get_user_pages(current, current->mm, addr, 1,
+ if (__get_user_pages(current->mm, addr, 1,
FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
NULL) < 1)
return NULL;
@@ -1542,8 +1524,7 @@ struct page *get_dump_page(unsigned long addr)
}
#endif /* CONFIG_ELF_CORE */
#else /* CONFIG_MMU */
-static long __get_user_pages_locked(struct task_struct *tsk,
- struct mm_struct *mm, unsigned long start,
+static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
unsigned long nr_pages, struct page **pages,
struct vm_area_struct **vmas, int *locked,
unsigned int foll_flags)
@@ -1659,8 +1640,7 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
return __alloc_pages_node(nid, gfp_mask, 0);
}
-static long check_and_migrate_cma_pages(struct task_struct *tsk,
- struct mm_struct *mm,
+static long check_and_migrate_cma_pages(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1734,7 +1714,7 @@ check_again:
* again migrating any new CMA pages which we failed to isolate
* earlier.
*/
- ret = __get_user_pages_locked(tsk, mm, start, nr_pages,
+ ret = __get_user_pages_locked(mm, start, nr_pages,
pages, vmas, NULL,
gup_flags);
@@ -1748,8 +1728,7 @@ check_again:
return ret;
}
#else
-static long check_and_migrate_cma_pages(struct task_struct *tsk,
- struct mm_struct *mm,
+static long check_and_migrate_cma_pages(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1764,8 +1743,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
* __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
* allows us to process the FOLL_LONGTERM flag.
*/
-static long __gup_longterm_locked(struct task_struct *tsk,
- struct mm_struct *mm,
+static long __gup_longterm_locked(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1790,7 +1768,7 @@ static long __gup_longterm_locked(struct task_struct *tsk,
flags = memalloc_nocma_save();
}
- rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
+ rc = __get_user_pages_locked(mm, start, nr_pages, pages,
vmas_tmp, NULL, gup_flags);
if (gup_flags & FOLL_LONGTERM) {
@@ -1805,7 +1783,7 @@ static long __gup_longterm_locked(struct task_struct *tsk,
goto out;
}
- rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
+ rc = check_and_migrate_cma_pages(mm, start, rc, pages,
vmas_tmp, gup_flags);
}
@@ -1815,22 +1793,20 @@ out:
return rc;
}
#else /* !CONFIG_FS_DAX && !CONFIG_CMA */
-static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
- struct mm_struct *mm,
+static __always_inline long __gup_longterm_locked(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
unsigned int flags)
{
- return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+ return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
NULL, flags);
}
#endif /* CONFIG_FS_DAX || CONFIG_CMA */
#ifdef CONFIG_MMU
-static long __get_user_pages_remote(struct task_struct *tsk,
- struct mm_struct *mm,
+static long __get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1849,20 +1825,18 @@ static long __get_user_pages_remote(struct task_struct *tsk,
* This will check the vmas (even if our vmas arg is NULL)
* and return -ENOTSUPP if DAX isn't allowed in this case:
*/
- return __gup_longterm_locked(tsk, mm, start, nr_pages, pages,
+ return __gup_longterm_locked(mm, start, nr_pages, pages,
vmas, gup_flags | FOLL_TOUCH |
FOLL_REMOTE);
}
- return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+ return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
locked,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
/**
* get_user_pages_remote() - pin user pages in memory
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
@@ -1921,7 +1895,7 @@ static long __get_user_pages_remote(struct task_struct *tsk,
* should use get_user_pages_remote because it cannot pass
* FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
*/
-long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1933,13 +1907,13 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
return -EINVAL;
- return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags,
+ return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
pages, vmas, locked);
}
EXPORT_SYMBOL(get_user_pages_remote);
#else /* CONFIG_MMU */
-long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1947,8 +1921,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
return 0;
}
-static long __get_user_pages_remote(struct task_struct *tsk,
- struct mm_struct *mm,
+static long __get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1968,11 +1941,10 @@ static long __get_user_pages_remote(struct task_struct *tsk,
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
*
- * This is the same as get_user_pages_remote(), just with a
- * less-flexible calling convention where we assume that the task
- * and mm being operated on are the current task's and don't allow
- * passing of a locked parameter. We also obviously don't pass
- * FOLL_REMOTE in here.
+ * This is the same as get_user_pages_remote(), just with a less-flexible
+ * calling convention where we assume that the mm being operated on belongs to
+ * the current task, and doesn't allow passing of a locked parameter. We also
+ * obviously don't pass FOLL_REMOTE in here.
*/
long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
@@ -1985,7 +1957,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
return -EINVAL;
- return __gup_longterm_locked(current, current->mm, start, nr_pages,
+ return __gup_longterm_locked(current->mm, start, nr_pages,
pages, vmas, gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
@@ -1995,7 +1967,7 @@ EXPORT_SYMBOL(get_user_pages);
*
* mmap_read_lock(mm);
* do_something()
- * get_user_pages(tsk, mm, ..., pages, NULL);
+ * get_user_pages(mm, ..., pages, NULL);
* mmap_read_unlock(mm);
*
* to:
@@ -2003,7 +1975,7 @@ EXPORT_SYMBOL(get_user_pages);
* int locked = 1;
* mmap_read_lock(mm);
* do_something()
- * get_user_pages_locked(tsk, mm, ..., pages, &locked);
+ * get_user_pages_locked(mm, ..., pages, &locked);
* if (locked)
* mmap_read_unlock(mm);
*
@@ -2041,7 +2013,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
return -EINVAL;
- return __get_user_pages_locked(current, current->mm, start, nr_pages,
+ return __get_user_pages_locked(current->mm, start, nr_pages,
pages, NULL, locked,
gup_flags | FOLL_TOUCH);
}
@@ -2051,12 +2023,12 @@ EXPORT_SYMBOL(get_user_pages_locked);
* get_user_pages_unlocked() is suitable to replace the form:
*
* mmap_read_lock(mm);
- * get_user_pages(tsk, mm, ..., pages, NULL);
+ * get_user_pages(mm, ..., pages, NULL);
* mmap_read_unlock(mm);
*
* with:
*
- * get_user_pages_unlocked(tsk, mm, ..., pages);
+ * get_user_pages_unlocked(mm, ..., pages);
*
* It is functionally equivalent to get_user_pages_fast so
* get_user_pages_fast should be used instead if specific gup_flags
@@ -2079,7 +2051,7 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
return -EINVAL;
mmap_read_lock(mm);
- ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
+ ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
&locked, gup_flags | FOLL_TOUCH);
if (locked)
mmap_read_unlock(mm);
@@ -2724,7 +2696,7 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
*/
if (gup_flags & FOLL_LONGTERM) {
mmap_read_lock(current->mm);
- ret = __gup_longterm_locked(current, current->mm,
+ ret = __gup_longterm_locked(current->mm,
start, nr_pages,
pages, NULL, gup_flags);
mmap_read_unlock(current->mm);
@@ -2967,10 +2939,8 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
/**
- * pin_user_pages_remote() - pin pages of a remote process (task != current)
+ * pin_user_pages_remote() - pin pages of a remote process
*
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
@@ -2991,7 +2961,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
*/
-long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long pin_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -3001,7 +2971,7 @@ long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
return -EINVAL;
gup_flags |= FOLL_PIN;
- return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags,
+ return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
pages, vmas, locked);
}
EXPORT_SYMBOL(pin_user_pages_remote);
@@ -3033,7 +3003,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
return -EINVAL;
gup_flags |= FOLL_PIN;
- return __gup_longterm_locked(current, current->mm, start, nr_pages,
+ return __gup_longterm_locked(current->mm, start, nr_pages,
pages, vmas, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages);
@@ -3078,7 +3048,7 @@ long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
return -EINVAL;
gup_flags |= FOLL_PIN;
- return __get_user_pages_locked(current, current->mm, start, nr_pages,
+ return __get_user_pages_locked(current->mm, start, nr_pages,
pages, NULL, locked,
gup_flags | FOLL_TOUCH);
}
diff --git a/mm/hmm.c b/mm/hmm.c
index 0809baee49d0..2f9c90a6b3b3 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -75,7 +75,8 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end,
}
for (; addr < end; addr += PAGE_SIZE)
- if (handle_mm_fault(vma, addr, fault_flags) & VM_FAULT_ERROR)
+ if (handle_mm_fault(vma, addr, fault_flags, NULL) &
+ VM_FAULT_ERROR)
return -EFAULT;
return -EBUSY;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0da89305a555..589c330df4db 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1092,15 +1092,6 @@ retry_cpuset:
return NULL;
}
-/* Movability of hugepages depends on migration support. */
-static inline gfp_t htlb_alloc_mask(struct hstate *h)
-{
- if (hugepage_movable_supported(h))
- return GFP_HIGHUSER_MOVABLE;
- else
- return GFP_HIGHUSER;
-}
-
static struct page *dequeue_huge_page_vma(struct hstate *h,
struct vm_area_struct *vma,
unsigned long address, int avoid_reserve,
@@ -2022,31 +2013,9 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
}
/* page migration callback function */
-struct page *alloc_huge_page_node(struct hstate *h, int nid)
-{
- gfp_t gfp_mask = htlb_alloc_mask(h);
- struct page *page = NULL;
-
- if (nid != NUMA_NO_NODE)
- gfp_mask |= __GFP_THISNODE;
-
- spin_lock(&hugetlb_lock);
- if (h->free_huge_pages - h->resv_huge_pages > 0)
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
- spin_unlock(&hugetlb_lock);
-
- if (!page)
- page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
-
- return page;
-}
-
-/* page migration callback function */
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask)
+ nodemask_t *nmask, gfp_t gfp_mask)
{
- gfp_t gfp_mask = htlb_alloc_mask(h);
-
spin_lock(&hugetlb_lock);
if (h->free_huge_pages - h->resv_huge_pages > 0) {
struct page *page;
@@ -2074,7 +2043,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
gfp_mask = htlb_alloc_mask(h);
node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- page = alloc_huge_page_nodemask(h, node, nodemask);
+ page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask);
mpol_cond_put(mpol);
return page;
diff --git a/mm/internal.h b/mm/internal.h
index dd14c5311d7f..10c677655912 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -613,5 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page)
}
void setup_zone_pageset(struct zone *zone);
-extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
+
+struct migration_target_control {
+ int nid; /* preferred node id */
+ nodemask_t *nmask;
+ gfp_t gfp_mask;
+};
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 29a684a4656a..4e644dc074b1 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1169,8 +1169,10 @@ static bool update_checksum(struct kmemleak_object *object)
u32 old_csum = object->checksum;
kasan_disable_current();
+ kcsan_disable_current();
object->checksum = crc32(0, (void *)object->pointer, object->size);
kasan_enable_current();
+ kcsan_enable_current();
return object->checksum != old_csum;
}
diff --git a/mm/ksm.c b/mm/ksm.c
index 5fb176d497ea..90a625b02a1d 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -480,7 +480,8 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
break;
if (PageKsm(page))
ret = handle_mm_fault(vma, addr,
- FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
+ FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
+ NULL);
else
ret = VM_FAULT_WRITE;
put_page(page);
diff --git a/mm/list_lru.c b/mm/list_lru.c
index e825804b3928..5aa6e44bc2ae 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -180,7 +180,7 @@ unsigned long list_lru_count_one(struct list_lru *lru,
rcu_read_lock();
l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
- count = l->nr_items;
+ count = READ_ONCE(l->nr_items);
rcu_read_unlock();
return count;
diff --git a/mm/madvise.c b/mm/madvise.c
index 1fe2e55c0a9b..7b5ca96108cd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -17,17 +17,20 @@
#include <linux/falloc.h>
#include <linux/fadvise.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/ksm.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/compat.h>
#include <linux/pagewalk.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
#include <linux/mmu_notifier.h>
#include <linux/sched/mm.h>
+#include <linux/uio.h>
#include <asm/tlb.h>
@@ -36,6 +39,7 @@
struct madvise_walk_private {
struct mmu_gather *tlb;
bool pageout;
+ struct task_struct *target_task;
};
/*
@@ -255,6 +259,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end)
{
+ struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
loff_t offset;
@@ -289,12 +294,12 @@ static long madvise_willneed(struct vm_area_struct *vma,
*/
*prev = NULL; /* tell sys_madvise we drop mmap_lock */
get_file(file);
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
offset = (loff_t)(start - vma->vm_start)
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
fput(file);
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
return 0;
}
@@ -315,6 +320,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (fatal_signal_pending(current))
return -EINTR;
+ if (private->target_task &&
+ fatal_signal_pending(private->target_task))
+ return -EINTR;
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge(*pmd)) {
pmd_t orig_pmd;
@@ -476,12 +485,14 @@ static const struct mm_walk_ops cold_walk_ops = {
};
static void madvise_cold_page_range(struct mmu_gather *tlb,
+ struct task_struct *task,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
struct madvise_walk_private walk_private = {
.pageout = false,
.tlb = tlb,
+ .target_task = task,
};
tlb_start_vma(tlb, vma);
@@ -489,7 +500,8 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
tlb_end_vma(tlb, vma);
}
-static long madvise_cold(struct vm_area_struct *vma,
+static long madvise_cold(struct task_struct *task,
+ struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start_addr, unsigned long end_addr)
{
@@ -502,19 +514,21 @@ static long madvise_cold(struct vm_area_struct *vma,
lru_add_drain();
tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
- madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
+ madvise_cold_page_range(&tlb, task, vma, start_addr, end_addr);
tlb_finish_mmu(&tlb, start_addr, end_addr);
return 0;
}
static void madvise_pageout_page_range(struct mmu_gather *tlb,
+ struct task_struct *task,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
struct madvise_walk_private walk_private = {
.pageout = true,
.tlb = tlb,
+ .target_task = task,
};
tlb_start_vma(tlb, vma);
@@ -538,7 +552,8 @@ static inline bool can_do_pageout(struct vm_area_struct *vma)
inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
}
-static long madvise_pageout(struct vm_area_struct *vma,
+static long madvise_pageout(struct task_struct *task,
+ struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start_addr, unsigned long end_addr)
{
@@ -554,7 +569,7 @@ static long madvise_pageout(struct vm_area_struct *vma,
lru_add_drain();
tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
- madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
+ madvise_pageout_page_range(&tlb, task, vma, start_addr, end_addr);
tlb_finish_mmu(&tlb, start_addr, end_addr);
return 0;
@@ -683,7 +698,6 @@ out:
if (nr_swap) {
if (current->mm == mm)
sync_mm_rss(mm);
-
add_mm_counter(mm, MM_SWAPENTS, nr_swap);
}
arch_leave_lazy_mmu_mode();
@@ -763,6 +777,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
int behavior)
{
+ struct mm_struct *mm = vma->vm_mm;
+
*prev = vma;
if (!can_madv_lru_vma(vma))
return -EINVAL;
@@ -770,8 +786,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_lock has been dropped, prev is stale */
- mmap_read_lock(current->mm);
- vma = find_vma(current->mm, start);
+ mmap_read_lock(mm);
+ vma = find_vma(mm, start);
if (!vma)
return -ENOMEM;
if (start < vma->vm_start) {
@@ -825,6 +841,7 @@ static long madvise_remove(struct vm_area_struct *vma,
loff_t offset;
int error;
struct file *f;
+ struct mm_struct *mm = vma->vm_mm;
*prev = NULL; /* tell sys_madvise we drop mmap_lock */
@@ -852,13 +869,13 @@ static long madvise_remove(struct vm_area_struct *vma,
get_file(f);
if (userfaultfd_remove(vma, start, end)) {
/* mmap_lock was not released by userfaultfd_remove() */
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
}
error = vfs_fallocate(f,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
offset, end - start);
fput(f);
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
return error;
}
@@ -927,7 +944,8 @@ static int madvise_inject_error(int behavior,
#endif
static long
-madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
+madvise_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
{
switch (behavior) {
@@ -936,9 +954,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
case MADV_WILLNEED:
return madvise_willneed(vma, prev, start, end);
case MADV_COLD:
- return madvise_cold(vma, prev, start, end);
+ return madvise_cold(task, vma, prev, start, end);
case MADV_PAGEOUT:
- return madvise_pageout(vma, prev, start, end);
+ return madvise_pageout(task, vma, prev, start, end);
case MADV_FREE:
case MADV_DONTNEED:
return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -985,6 +1003,18 @@ madvise_behavior_valid(int behavior)
}
}
+static bool
+process_madvise_behavior_valid(int behavior)
+{
+ switch (behavior) {
+ case MADV_COLD:
+ case MADV_PAGEOUT:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* The madvise(2) system call.
*
@@ -1032,6 +1062,11 @@ madvise_behavior_valid(int behavior)
* MADV_DONTDUMP - the application wants to prevent pages in the given range
* from being included in its core dump.
* MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
+ * MADV_COLD - the application is not expected to use this memory soon,
+ * deactivate pages in this range so that they can be reclaimed
+ * easily if memory pressure hanppens.
+ * MADV_PAGEOUT - the application is not expected to use this memory soon,
+ * page out the pages in this range immediately.
*
* return values:
* zero - success
@@ -1046,7 +1081,8 @@ madvise_behavior_valid(int behavior)
* -EBADF - map exists, but area maps something that isn't a file.
* -EAGAIN - a kernel resource was temporarily unavailable.
*/
-int do_madvise(unsigned long start, size_t len_in, int behavior)
+int do_madvise(struct task_struct *target_task, struct mm_struct *mm,
+ unsigned long start, size_t len_in, int behavior)
{
unsigned long end, tmp;
struct vm_area_struct *vma, *prev;
@@ -1084,7 +1120,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
write = madvise_need_mmap_write(behavior);
if (write) {
- if (mmap_write_lock_killable(current->mm))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
/*
@@ -1099,12 +1135,12 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
* but for now we have the mmget_still_valid()
* model.
*/
- if (!mmget_still_valid(current->mm)) {
- mmap_write_unlock(current->mm);
+ if (!mmget_still_valid(mm)) {
+ mmap_write_unlock(mm);
return -EINTR;
}
} else {
- mmap_read_lock(current->mm);
+ mmap_read_lock(mm);
}
/*
@@ -1112,7 +1148,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
* ranges, just ignore them, but return -ENOMEM at the end.
* - different from the way of handling in mlock etc.
*/
- vma = find_vma_prev(current->mm, start, &prev);
+ vma = find_vma_prev(mm, start, &prev);
if (vma && start > vma->vm_start)
prev = vma;
@@ -1137,7 +1173,8 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
tmp = end;
/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
- error = madvise_vma(vma, &prev, start, tmp, behavior);
+ error = madvise_vma(target_task, vma, &prev,
+ start, tmp, behavior);
if (error)
goto out;
start = tmp;
@@ -1149,19 +1186,122 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
if (prev)
vma = prev->vm_next;
else /* madvise_remove dropped mmap_lock */
- vma = find_vma(current->mm, start);
+ vma = find_vma(mm, start);
}
out:
blk_finish_plug(&plug);
if (write)
- mmap_write_unlock(current->mm);
+ mmap_write_unlock(mm);
else
- mmap_read_unlock(current->mm);
+ mmap_read_unlock(mm);
return error;
}
SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
{
- return do_madvise(start, len_in, behavior);
+ return do_madvise(current, current->mm, start, len_in, behavior);
+}
+
+static int process_madvise_vec(struct task_struct *target_task,
+ struct mm_struct *mm, struct iov_iter *iter, int behavior)
+{
+ struct iovec iovec;
+ int ret = 0;
+
+ while (iov_iter_count(iter)) {
+ iovec = iov_iter_iovec(iter);
+ ret = do_madvise(target_task, mm, (unsigned long)iovec.iov_base,
+ iovec.iov_len, behavior);
+ if (ret < 0)
+ break;
+ iov_iter_advance(iter, iovec.iov_len);
+ }
+
+ return ret;
+}
+
+static ssize_t do_process_madvise(int pidfd, struct iov_iter *iter,
+ int behavior, unsigned int flags)
+{
+ ssize_t ret;
+ struct pid *pid;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ size_t total_len = iov_iter_count(iter);
+
+ if (flags != 0)
+ return -EINVAL;
+
+ pid = pidfd_get_pid(pidfd);
+ if (IS_ERR(pid))
+ return PTR_ERR(pid);
+
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ ret = -ESRCH;
+ goto put_pid;
+ }
+
+ if (task->mm != current->mm &&
+ !process_madvise_behavior_valid(behavior)) {
+ ret = -EINVAL;
+ goto release_task;
+ }
+
+ mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+ if (IS_ERR_OR_NULL(mm)) {
+ ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+ goto release_task;
+ }
+
+ ret = process_madvise_vec(task, mm, iter, behavior);
+ if (ret >= 0)
+ ret = total_len - iov_iter_count(iter);
+
+ mmput(mm);
+release_task:
+ put_task_struct(task);
+put_pid:
+ put_pid(pid);
+ return ret;
+}
+
+SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
+ unsigned long, vlen, int, behavior, unsigned int, flags)
+{
+ ssize_t ret;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+
+ ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret >= 0) {
+ ret = do_process_madvise(pidfd, &iter, behavior, flags);
+ kfree(iov);
+ }
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE5(process_madvise, compat_int_t, pidfd,
+ const struct compat_iovec __user *, vec,
+ compat_ulong_t, vlen,
+ compat_int_t, behavior,
+ compat_uint_t, flags)
+
+{
+ ssize_t ret;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+
+ ret = compat_import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack),
+ &iov, &iter);
+ if (ret >= 0) {
+ ret = do_process_madvise(pidfd, &iter, behavior, flags);
+ kfree(iov);
+ }
+ return ret;
}
+#endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 7359164c3fe9..9523515be997 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1659,13 +1659,6 @@ int unpoison_memory(unsigned long pfn)
}
EXPORT_SYMBOL(unpoison_memory);
-static struct page *new_page(struct page *p, unsigned long private)
-{
- int nid = page_to_nid(p);
-
- return new_page_nodemask(p, nid, &node_states[N_MEMORY]);
-}
-
/*
* Safely get reference count of an arbitrary page.
* Returns 0 for a free page, -EIO for a zero refcount page
@@ -1770,6 +1763,10 @@ static int __soft_offline_page(struct page *page)
const char *msg_page[] = {"page", "hugepage"};
bool huge = PageHuge(page);
LIST_HEAD(pagelist);
+ struct migration_target_control mtc = {
+ .nid = NUMA_NO_NODE,
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ };
/*
* Check PageHWPoison again inside page lock because PageHWPoison
@@ -1806,8 +1803,8 @@ static int __soft_offline_page(struct page *page)
}
if (isolate_page(hpage, &pagelist)) {
- ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
- MIGRATE_SYNC, MR_MEMORY_FAILURE);
+ ret = migrate_pages(&pagelist, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (!ret) {
page_handle_poison(page, true, true, huge);
} else {
diff --git a/mm/memory.c b/mm/memory.c
index 9ffbe8fa08e6..45e1dc0547ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -71,6 +71,8 @@
#include <linux/dax.h>
#include <linux/oom.h>
#include <linux/numa.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
#include <trace/events/kmem.h>
@@ -3127,8 +3129,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (!page) {
struct swap_info_struct *si = swp_swap_info(entry);
- if (si->flags & SWP_SYNCHRONOUS_IO &&
- __swap_count(entry) == 1) {
+ if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
+ __swap_count(entry) == 1) {
/* skip swapcache */
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
vmf->address);
@@ -4366,6 +4368,67 @@ retry_pud:
return handle_pte_fault(&vmf);
}
+/**
+ * mm_account_fault - Do page fault accountings
+ *
+ * @regs: the pt_regs struct pointer. When set to NULL, will skip accounting
+ * of perf event counters, but we'll still do the per-task accounting to
+ * the task who triggered this page fault.
+ * @address: the faulted address.
+ * @flags: the fault flags.
+ * @ret: the fault retcode.
+ *
+ * This will take care of most of the page fault accountings. Meanwhile, it
+ * will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter
+ * updates. However note that the handling of PERF_COUNT_SW_PAGE_FAULTS should
+ * still be in per-arch page fault handlers at the entry of page fault.
+ */
+static inline void mm_account_fault(struct pt_regs *regs,
+ unsigned long address, unsigned int flags,
+ vm_fault_t ret)
+{
+ bool major;
+
+ /*
+ * We don't do accounting for some specific faults:
+ *
+ * - Unsuccessful faults (e.g. when the address wasn't valid). That
+ * includes arch_vma_access_permitted() failing before reaching here.
+ * So this is not a "this many hardware page faults" counter. We
+ * should use the hw profiling for that.
+ *
+ * - Incomplete faults (VM_FAULT_RETRY). They will only be counted
+ * once they're completed.
+ */
+ if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY))
+ return;
+
+ /*
+ * We define the fault as a major fault when the final successful fault
+ * is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't
+ * handle it immediately previously).
+ */
+ major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED);
+
+ if (major)
+ current->maj_flt++;
+ else
+ current->min_flt++;
+
+ /*
+ * If the fault is done for GUP, regs will be NULL. We only do the
+ * accounting for the per thread fault counters who triggered the
+ * fault, and we skip the perf event updates.
+ */
+ if (!regs)
+ return;
+
+ if (major)
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ else
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+}
+
/*
* By the time we get here, we already hold the mm semaphore
*
@@ -4373,7 +4436,7 @@ retry_pud:
* return value. See filemap_fault() and __lock_page_or_retry().
*/
vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
- unsigned int flags)
+ unsigned int flags, struct pt_regs *regs)
{
vm_fault_t ret;
@@ -4414,6 +4477,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
mem_cgroup_oom_synchronize(false);
}
+ mm_account_fault(regs, address, flags, ret);
+
return ret;
}
EXPORT_SYMBOL_GPL(handle_mm_fault);
@@ -4687,7 +4752,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
void *maddr;
struct page *page = NULL;
- ret = get_user_pages_remote(tsk, mm, addr, 1,
+ ret = get_user_pages_remote(mm, addr, 1,
gup_flags, &page, &vma, NULL);
if (ret <= 0) {
#ifndef CONFIG_HAVE_IOREMAP_PROT
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0204de3f9601..dcdf3271f87e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1275,23 +1275,6 @@ found:
return 0;
}
-static struct page *new_node_page(struct page *page, unsigned long private)
-{
- int nid = page_to_nid(page);
- nodemask_t nmask = node_states[N_MEMORY];
-
- /*
- * try to allocate from a different node but reuse this node if there
- * are no other online nodes to be used (e.g. we are offlining a part
- * of the only existing node)
- */
- node_clear(nid, nmask);
- if (nodes_empty(nmask))
- node_set(nid, nmask);
-
- return new_page_nodemask(page, nid, &nmask);
-}
-
static int
do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -1351,9 +1334,28 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
put_page(page);
}
if (!list_empty(&source)) {
- /* Allocate a new page from the nearest neighbor node */
- ret = migrate_pages(&source, new_node_page, NULL, 0,
- MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
+ nodemask_t nmask = node_states[N_MEMORY];
+ struct migration_target_control mtc = {
+ .nmask = &nmask,
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ };
+
+ /*
+ * We have checked that migration range is on a single zone so
+ * we can use the nid of the first page to all the others.
+ */
+ mtc.nid = page_to_nid(list_first_entry(&source, struct page, lru));
+
+ /*
+ * try to allocate from a different node but reuse this node
+ * if there are no other online nodes to be used (e.g. we are
+ * offlining a part of the only existing node)
+ */
+ node_clear(mtc.nid, nmask);
+ if (nodes_empty(nmask))
+ node_set(mtc.nid, nmask);
+ ret = migrate_pages(&source, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
if (ret) {
list_for_each_entry(page, &source, lru) {
pr_warn("migrating pfn %lx failed ret:%d ",
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index dabcee8630f2..93fcfc1f2fa2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1065,27 +1065,6 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
return 0;
}
-/* page allocation callback for NUMA node migration */
-struct page *alloc_new_node_page(struct page *page, unsigned long node)
-{
- if (PageHuge(page))
- return alloc_huge_page_node(page_hstate(compound_head(page)),
- node);
- else if (PageTransHuge(page)) {
- struct page *thp;
-
- thp = alloc_pages_node(node,
- (GFP_TRANSHUGE | __GFP_THISNODE),
- HPAGE_PMD_ORDER);
- if (!thp)
- return NULL;
- prep_transhuge_page(thp);
- return thp;
- } else
- return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE |
- __GFP_THISNODE, 0);
-}
-
/*
* Migrate pages from one node to a target node.
* Returns error or the number of pages not migrated.
@@ -1096,6 +1075,10 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
nodemask_t nmask;
LIST_HEAD(pagelist);
int err = 0;
+ struct migration_target_control mtc = {
+ .nid = dest,
+ .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
+ };
nodes_clear(nmask);
node_set(source, nmask);
@@ -1110,8 +1093,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, alloc_new_node_page, NULL, dest,
- MIGRATE_SYNC, MR_SYSCALL);
+ err = migrate_pages(&pagelist, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(&pagelist);
}
diff --git a/mm/mempool.c b/mm/mempool.c
index 85efab3da720..79bff63ecf27 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -489,7 +489,7 @@ void mempool_free(void *element, mempool_t *pool)
* ensures that there will be frees which return elements to the
* pool waking up the waiters.
*/
- if (unlikely(pool->curr_nr < pool->min_nr)) {
+ if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) {
spin_lock_irqsave(&pool->lock, flags);
if (likely(pool->curr_nr < pool->min_nr)) {
add_element(pool, element);
diff --git a/mm/migrate.c b/mm/migrate.c
index de0f3662c605..c23378169762 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1533,6 +1533,49 @@ out:
return rc;
}
+struct page *alloc_migration_target(struct page *page, unsigned long private)
+{
+ struct migration_target_control *mtc;
+ gfp_t gfp_mask;
+ unsigned int order = 0;
+ struct page *new_page = NULL;
+ int nid;
+ int zidx;
+
+ mtc = (struct migration_target_control *)private;
+ gfp_mask = mtc->gfp_mask;
+ nid = mtc->nid;
+ if (nid == NUMA_NO_NODE)
+ nid = page_to_nid(page);
+
+ if (PageHuge(page)) {
+ struct hstate *h = page_hstate(compound_head(page));
+
+ gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
+ return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
+ }
+
+ if (PageTransHuge(page)) {
+ /*
+ * clear __GFP_RECLAIM to make the migration callback
+ * consistent with regular THP allocations.
+ */
+ gfp_mask &= ~__GFP_RECLAIM;
+ gfp_mask |= GFP_TRANSHUGE;
+ order = HPAGE_PMD_ORDER;
+ }
+ zidx = zone_idx(page_zone(page));
+ if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
+ gfp_mask |= __GFP_HIGHMEM;
+
+ new_page = __alloc_pages_nodemask(gfp_mask, order, nid, mtc->nmask);
+
+ if (new_page && PageTransHuge(new_page))
+ prep_transhuge_page(new_page);
+
+ return new_page;
+}
+
#ifdef CONFIG_NUMA
static int store_status(int __user *status, int start, int value, int nr)
@@ -1550,9 +1593,13 @@ static int do_move_pages_to_node(struct mm_struct *mm,
struct list_head *pagelist, int node)
{
int err;
+ struct migration_target_control mtc = {
+ .nid = node,
+ .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
+ };
- err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
- MIGRATE_SYNC, MR_SYSCALL);
+ err = migrate_pages(pagelist, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(pagelist);
return err;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6f4056cf7384..b31f310c2545 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8349,6 +8349,10 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned long pfn = start;
unsigned int tries = 0;
int ret = 0;
+ struct migration_target_control mtc = {
+ .nid = zone_to_nid(cc->zone),
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ };
migrate_prep();
@@ -8375,8 +8379,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
&cc->migratepages);
cc->nr_migratepages -= nr_reclaimed;
- ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
- NULL, 0, cc->mode, MR_CONTIG_RANGE);
+ ret = migrate_pages(&cc->migratepages, alloc_migration_target,
+ NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
}
if (ret < 0) {
putback_movable_pages(&cc->migratepages);
diff --git a/mm/page_counter.c b/mm/page_counter.c
index c56db2d5e159..e3a205275a0b 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -77,8 +77,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
* This is indeed racy, but we can live with some
* inaccuracy in the watermark.
*/
- if (new > c->watermark)
- c->watermark = new;
+ if (new > READ_ONCE(c->watermark))
+ WRITE_ONCE(c->watermark, new);
}
}
@@ -119,9 +119,10 @@ bool page_counter_try_charge(struct page_counter *counter,
propagate_protected_usage(counter, new);
/*
* This is racy, but we can live with some
- * inaccuracy in the failcnt.
+ * inaccuracy in the failcnt which is only used
+ * to report stats.
*/
- c->failcnt++;
+ data_race(c->failcnt++);
*fail = c;
goto failed;
}
@@ -130,8 +131,8 @@ bool page_counter_try_charge(struct page_counter *counter,
* Just like with failcnt, we can live with some
* inaccuracy in the watermark.
*/
- if (new > c->watermark)
- c->watermark = new;
+ if (new > READ_ONCE(c->watermark))
+ WRITE_ONCE(c->watermark, new);
}
return true;
diff --git a/mm/page_io.c b/mm/page_io.c
index eaaecb75a8c5..739cb5b458dc 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -85,7 +85,7 @@ static void swap_slot_free_notify(struct page *page)
return;
sis = page_swap_info(page);
- if (!(sis->flags & SWP_BLKDEV))
+ if (data_race(!(sis->flags & SWP_BLKDEV)))
return;
/*
@@ -312,7 +312,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
struct swap_info_struct *sis = page_swap_info(page);
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
- if (sis->flags & SWP_FS) {
+ if (data_race(sis->flags & SWP_FS)) {
struct kiocb kiocb;
struct file *swap_file = sis->swap_file;
struct address_space *mapping = swap_file->f_mapping;
@@ -403,7 +403,7 @@ int swap_readpage(struct page *page, bool synchronous)
goto out;
}
- if (sis->flags & SWP_FS) {
+ if (data_race(sis->flags & SWP_FS)) {
struct file *swap_file = sis->swap_file;
struct address_space *mapping = swap_file->f_mapping;
@@ -465,7 +465,7 @@ int swap_set_page_dirty(struct page *page)
{
struct swap_info_struct *sis = page_swap_info(page);
- if (sis->flags & SWP_FS) {
+ if (data_race(sis->flags & SWP_FS)) {
struct address_space *mapping = sis->swap_file->f_mapping;
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index f6d07c5f0d34..242c03121d73 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -306,8 +306,3 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
return pfn < end_pfn ? -EBUSY : 0;
}
-
-struct page *alloc_migrate_target(struct page *page, unsigned long private)
-{
- return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
-}
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index cc85ce81914a..29c052099aff 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -105,7 +105,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
* current/current->mm
*/
mmap_read_lock(mm);
- pinned_pages = pin_user_pages_remote(task, mm, pa, pinned_pages,
+ pinned_pages = pin_user_pages_remote(mm, pa, pinned_pages,
flags, process_pages,
NULL, &locked);
if (locked)
diff --git a/mm/rmap.c b/mm/rmap.c
index c56fab5826c1..b970353c06ed 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -672,7 +672,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
void flush_tlb_batched_pending(struct mm_struct *mm)
{
- if (mm->tlb_flush_batched) {
+ if (data_race(mm->tlb_flush_batched)) {
flush_tlb_mm(mm);
/*
diff --git a/mm/swap.c b/mm/swap.c
index 28ab40041b47..587be74a68de 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -631,7 +631,8 @@ void lru_add_drain_cpu(int cpu)
__pagevec_lru_add(pvec);
pvec = &per_cpu(lru_rotate.pvec, cpu);
- if (pagevec_count(pvec)) {
+ /* Disabling interrupts below acts as a compiler barrier. */
+ if (data_race(pagevec_count(pvec))) {
unsigned long flags;
/* No harm done if a racing interrupt already did this */
@@ -792,7 +793,7 @@ void lru_add_drain_all(void)
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
- pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) ||
+ data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 1983be226b1c..66e750f361ed 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -57,8 +57,8 @@ static bool enable_vma_readahead __read_mostly = true;
#define GET_SWAP_RA_VAL(vma) \
(atomic_long_read(&(vma)->swap_readahead_info) ? : 4)
-#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
-#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0)
+#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++)
+#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr))
static struct {
unsigned long add_total;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7d22347264e6..7b0974f0fcc4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -672,7 +672,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
if (offset == si->lowest_bit)
si->lowest_bit += nr_entries;
if (end == si->highest_bit)
- si->highest_bit -= nr_entries;
+ WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries);
si->inuse_pages += nr_entries;
if (si->inuse_pages == si->pages) {
si->lowest_bit = si->max;
@@ -704,7 +704,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
if (end > si->highest_bit) {
bool was_full = !si->highest_bit;
- si->highest_bit = end;
+ WRITE_ONCE(si->highest_bit, end);
if (was_full && (si->flags & SWP_WRITEOK))
add_to_avail_list(si);
}
@@ -869,7 +869,7 @@ checks:
else
goto done;
}
- si->swap_map[offset] = usage;
+ WRITE_ONCE(si->swap_map[offset], usage);
inc_cluster_info_page(si, si->cluster_info, offset);
unlock_cluster(ci);
@@ -928,12 +928,13 @@ done:
scan:
spin_unlock(&si->lock);
- while (++offset <= si->highest_bit) {
- if (!si->swap_map[offset]) {
+ while (++offset <= READ_ONCE(si->highest_bit)) {
+ if (data_race(!si->swap_map[offset])) {
spin_lock(&si->lock);
goto checks;
}
- if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+ if (vm_swap_full() &&
+ READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
spin_lock(&si->lock);
goto checks;
}
@@ -945,11 +946,12 @@ scan:
}
offset = si->lowest_bit;
while (offset < scan_base) {
- if (!si->swap_map[offset]) {
+ if (data_race(!si->swap_map[offset])) {
spin_lock(&si->lock);
goto checks;
}
- if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+ if (vm_swap_full() &&
+ READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
spin_lock(&si->lock);
goto checks;
}
@@ -1148,7 +1150,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
p = swp_swap_info(entry);
if (!p)
goto bad_nofile;
- if (!(p->flags & SWP_USED))
+ if (data_race(!(p->flags & SWP_USED)))
goto bad_device;
offset = swp_offset(entry);
if (offset >= p->max)
@@ -1174,7 +1176,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
p = __swap_info_get(entry);
if (!p)
goto out;
- if (!p->swap_map[swp_offset(entry)])
+ if (data_race(!p->swap_map[swp_offset(entry)]))
goto bad_free;
return p;
@@ -1243,7 +1245,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
}
usage = count | has_cache;
- p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
+ if (usage)
+ WRITE_ONCE(p->swap_map[offset], usage);
+ else
+ WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE);
return usage;
}
@@ -1295,7 +1300,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
goto bad_nofile;
rcu_read_lock();
- if (!(si->flags & SWP_VALID))
+ if (data_race(!(si->flags & SWP_VALID)))
goto unlock_out;
offset = swp_offset(entry);
if (offset >= si->max)
@@ -3484,7 +3489,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
} else
err = -ENOENT; /* unused swap entry */
- p->swap_map[offset] = count | has_cache;
+ WRITE_ONCE(p->swap_map[offset], count | has_cache);
unlock_out:
unlock_cluster_or_swap_info(p, ci);
diff --git a/scripts/deprecated_terms.txt b/scripts/deprecated_terms.txt
index 4512ef5d5ffa..1be27a24187b 100644
--- a/scripts/deprecated_terms.txt
+++ b/scripts/deprecated_terms.txt
@@ -3,5 +3,8 @@
# The format of each line is:
# deprecated||suggested
#
-blacklist||denylist
-whitelist||allowlist
+blacklist||(denylist|blocklist)
+# For other alternatives of 'slave', Please refer to
+# Documentation/process/coding-style.rst
+slave||(secondary|target|...)
+whitelist||(allowlist|passlist)
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 7869d6a9980b..afe5e68ede77 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -914,7 +914,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
* (represented by bprm). 'current' is the process doing
* the execve().
*/
- if (get_user_pages_remote(current, bprm->mm, pos, 1,
+ if (get_user_pages_remote(bprm->mm, pos, 1,
FOLL_FORCE, &page, NULL, NULL) <= 0)
return false;
#else
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 35b61bfc1b1a..6d29d9a6bc60 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -193,7 +193,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 nospu _sysctl sys_sysctl compat_sys_sysctl
+149 nospu _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b38d48464368..29144b79a49d 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl - -
150 common mlock sys_mlock compat_sys_mlock
151 common munlock sys_munlock compat_sys_munlock
152 common mlockall sys_mlockall sys_mlockall
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..d1c42227b857 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
153 common vhangup sys_vhangup
154 common modify_ldt sys_modify_ldt
155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
+156 64 _sysctl sys_ni_syscall
157 common prctl sys_prctl
158 common arch_prctl sys_arch_prctl
159 common adjtimex sys_adjtimex
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 390f758d5a27..dd777688d14a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -61,7 +61,7 @@ static void async_pf_execute(struct work_struct *work)
* access remotely.
*/
mmap_read_lock(mm);
- get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL,
+ get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, NULL,
&locked);
if (locked)
mmap_read_unlock(mm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2c2c0254c2d8..737666db02de 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1893,7 +1893,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
* not call the fault handler, so do it here.
*/
bool unlocked = false;
- r = fixup_user_fault(current, current->mm, addr,
+ r = fixup_user_fault(current->mm, addr,
(write_fault ? FAULT_FLAG_WRITE : 0),
&unlocked);
if (unlocked)