diff options
Diffstat (limited to 'mm/madvise.c')
-rw-r--r-- | mm/madvise.c | 93 |
1 files changed, 92 insertions, 1 deletions
diff --git a/mm/madvise.c b/mm/madvise.c index d550ef045288..416a56b8e757 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -17,6 +17,8 @@ #include <linux/falloc.h> #include <linux/fadvise.h> #include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/uio.h> #include <linux/ksm.h> #include <linux/fs.h> #include <linux/file.h> @@ -27,7 +29,6 @@ #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> -#include <linux/sched/mm.h> #include <asm/tlb.h> @@ -988,6 +989,18 @@ madvise_behavior_valid(int behavior) } } +static bool +process_madvise_behavior_valid(int behavior) +{ + switch (behavior) { + case MADV_COLD: + case MADV_PAGEOUT: + return true; + default: + return false; + } +} + /* * The madvise(2) system call. * @@ -1035,6 +1048,11 @@ madvise_behavior_valid(int behavior) * MADV_DONTDUMP - the application wants to prevent pages in the given range * from being included in its core dump. * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. + * MADV_COLD - the application is not expected to use this memory soon, + * deactivate pages in this range so that they can be reclaimed + * easily if memory pressure hanppens. + * MADV_PAGEOUT - the application is not expected to use this memory soon, + * page out the pages in this range immediately. * * return values: * zero - success @@ -1151,3 +1169,76 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { return do_madvise(current->mm, start, len_in, behavior); } + +SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, + size_t, vlen, int, behavior, unsigned int, flags) +{ + ssize_t ret; + struct iovec iovstack[UIO_FASTIOV], iovec; + struct iovec *iov = iovstack; + struct iov_iter iter; + struct pid *pid; + struct task_struct *task; + struct mm_struct *mm; + size_t total_len; + unsigned int f_flags; + + if (flags != 0) { + ret = -EINVAL; + goto out; + } + + ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + goto out; + + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) { + ret = PTR_ERR(pid); + goto free_iov; + } + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + + if (task->mm != current->mm && + !process_madvise_behavior_valid(behavior)) { + ret = -EINVAL; + goto release_task; + } + + mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + goto release_task; + } + + total_len = iov_iter_count(&iter); + + while (iov_iter_count(&iter)) { + iovec = iov_iter_iovec(&iter); + ret = do_madvise(mm, (unsigned long)iovec.iov_base, + iovec.iov_len, behavior); + if (ret < 0) + break; + iov_iter_advance(&iter, iovec.iov_len); + } + + if (ret == 0) + ret = total_len - iov_iter_count(&iter); + + mmput(mm); + return ret; + +release_task: + put_task_struct(task); +put_pid: + put_pid(pid); +free_iov: + kfree(iov); +out: + return ret; +} |