summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig15
-rw-r--r--fs/proc/array.c83
-rw-r--r--fs/proc/base.c165
-rw-r--r--fs/proc/cmdline.c14
-rw-r--r--fs/proc/consoles.c14
-rw-r--r--fs/proc/devices.c14
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c148
-rw-r--r--fs/proc/internal.h17
-rw-r--r--fs/proc/interrupts.c14
-rw-r--r--fs/proc/kcore.c23
-rw-r--r--fs/proc/loadavg.c16
-rw-r--r--fs/proc/meminfo.c14
-rw-r--r--fs/proc/nommu.c14
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_net.c104
-rw-r--r--fs/proc/proc_tty.c22
-rw-r--r--fs/proc/self.c4
-rw-r--r--fs/proc/softirqs.c14
-rw-r--r--fs/proc/task_mmu.c39
-rw-r--r--fs/proc/thread_self.c4
-rw-r--r--fs/proc/uptime.c14
-rw-r--r--fs/proc/version.c14
-rw-r--r--fs/proc/vmcore.c386
24 files changed, 757 insertions, 399 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 1ade1206bb89..0eaeb41453f5 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -43,6 +43,21 @@ config PROC_VMCORE
help
Exports the dump image of crashed kernel in ELF format.
+config PROC_VMCORE_DEVICE_DUMP
+ bool "Device Hardware/Firmware Log Collection"
+ depends on PROC_VMCORE
+ default n
+ help
+ After kernel panic, device drivers can collect the device
+ specific snapshot of their hardware or firmware before the
+ underlying devices are initialized in crash recovery kernel.
+ Note that the device driver must be present in the crash
+ recovery kernel's initramfs to collect its underlying device
+ snapshot.
+
+ If you say Y here, the collected device dumps will be added
+ as ELF notes to /proc/vmcore.
+
config PROC_SYSCTL
bool "Sysctl support (/proc/sys)" if EXPERT
depends on PROC_FS
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ae2c807fd719..0ceb3b6b37e7 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -85,6 +85,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
+#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/string_helpers.h>
@@ -95,22 +96,29 @@
#include <asm/processor.h>
#include "internal.h"
-static inline void task_name(struct seq_file *m, struct task_struct *p)
+void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
{
char *buf;
size_t size;
- char tcomm[sizeof(p->comm)];
+ char tcomm[64];
int ret;
- get_task_comm(tcomm, p);
-
- seq_puts(m, "Name:\t");
+ if (p->flags & PF_WQ_WORKER)
+ wq_worker_comm(tcomm, sizeof(tcomm), p);
+ else
+ __get_task_comm(tcomm, sizeof(tcomm), p);
size = seq_get_buf(m, &buf);
- ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
- seq_commit(m, ret < size ? ret : -1);
+ if (escape) {
+ ret = string_escape_str(tcomm, buf, size,
+ ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
+ if (ret >= size)
+ ret = -1;
+ } else {
+ ret = strscpy(buf, tcomm, size);
+ }
- seq_putc(m, '\n');
+ seq_commit(m, ret);
}
/*
@@ -260,7 +268,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
unsigned long flags;
sigset_t pending, shpending, blocked, ignored, caught;
int num_threads = 0;
- unsigned long qsize = 0;
+ unsigned int qsize = 0;
unsigned long qlim = 0;
sigemptyset(&pending);
@@ -335,6 +343,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+ case -EINVAL:
+ seq_printf(m, "unknown");
+ break;
+ case PR_SPEC_NOT_AFFECTED:
+ seq_printf(m, "not vulnerable");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+ seq_printf(m, "thread force mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+ seq_printf(m, "thread mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+ seq_printf(m, "thread vulnerable");
+ break;
+ case PR_SPEC_DISABLE:
+ seq_printf(m, "globally mitigated");
+ break;
+ default:
+ seq_printf(m, "vulnerable");
+ break;
+ }
seq_putc(m, '\n');
}
@@ -365,7 +397,10 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
{
struct mm_struct *mm = get_task_mm(task);
- task_name(m, task);
+ seq_puts(m, "Name:\t");
+ proc_task_name(m, task, true);
+ seq_putc(m, '\n');
+
task_state(m, ns, pid, task);
if (mm) {
@@ -400,7 +435,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
u64 cutime, cstime, utime, stime;
u64 cgtime, gtime;
unsigned long rsslim = 0;
- char tcomm[sizeof(task->comm)];
unsigned long flags;
state = *get_task_state(task);
@@ -427,8 +461,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
}
}
- get_task_comm(tcomm, task);
-
sigemptyset(&sigign);
sigemptyset(&sigcatch);
cutime = cstime = utime = stime = 0;
@@ -495,7 +527,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
seq_puts(m, " (");
- seq_puts(m, tcomm);
+ proc_task_name(m, task, false);
seq_puts(m, ") ");
seq_putc(m, state);
seq_put_decimal_ll(m, " ", ppid);
@@ -677,25 +709,22 @@ out:
static int children_seq_show(struct seq_file *seq, void *v)
{
- struct inode *inode = seq->private;
- pid_t pid;
-
- pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
- seq_printf(seq, "%d ", pid);
+ struct inode *inode = file_inode(seq->file);
+ seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode)));
return 0;
}
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
{
- return get_children_pid(seq->private, NULL, *pos);
+ return get_children_pid(file_inode(seq->file), NULL, *pos);
}
static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct pid *pid;
- pid = get_children_pid(seq->private, v, *pos + 1);
+ pid = get_children_pid(file_inode(seq->file), v, *pos + 1);
put_pid(v);
++*pos;
@@ -716,17 +745,7 @@ static const struct seq_operations children_seq_ops = {
static int children_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &children_seq_ops);
- if (ret)
- return ret;
-
- m = file->private_data;
- m->private = inode;
-
- return ret;
+ return seq_open(file, &children_seq_ops);
}
const struct file_operations proc_tid_children_operations = {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f927340db0a..8358c5330c0b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -213,10 +213,14 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
char *page;
unsigned long count = _count;
unsigned long arg_start, arg_end, env_start, env_end;
- unsigned long len1, len2, len;
- unsigned long p;
+ unsigned long len1, len2;
+ char __user *buf0 = buf;
+ struct {
+ unsigned long p;
+ unsigned long len;
+ } cmdline[2];
char c;
- ssize_t rv;
+ int rv;
BUG_ON(*pos < 0);
@@ -239,12 +243,12 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
goto out_mmput;
}
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
arg_start = mm->arg_start;
arg_end = mm->arg_end;
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
BUG_ON(arg_start > arg_end);
BUG_ON(env_start > env_end);
@@ -253,61 +257,31 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
len2 = env_end - env_start;
/* Empty ARGV. */
- if (len1 == 0) {
- rv = 0;
- goto out_free_page;
- }
+ if (len1 == 0)
+ goto end;
+
/*
* Inherently racy -- command line shares address space
* with code and data.
*/
- rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
- if (rv <= 0)
- goto out_free_page;
-
- rv = 0;
+ if (access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON) != 1)
+ goto end;
+ cmdline[0].p = arg_start;
+ cmdline[0].len = len1;
if (c == '\0') {
/* Command line (set of strings) occupies whole ARGV. */
- if (len1 <= *pos)
- goto out_free_page;
-
- p = arg_start + *pos;
- len = len1 - *pos;
- while (count > 0 && len > 0) {
- unsigned int _count;
- int nr_read;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
- }
-
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
- }
+ cmdline[1].len = 0;
} else {
/*
* Command line (1 string) occupies ARGV and
* extends into ENVP.
*/
- struct {
- unsigned long p;
- unsigned long len;
- } cmdline[2] = {
- { .p = arg_start, .len = len1 },
- { .p = env_start, .len = len2 },
- };
+ cmdline[1].p = env_start;
+ cmdline[1].len = len2;
+ }
+
+ {
loff_t pos1 = *pos;
unsigned int i;
@@ -317,44 +291,40 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
i++;
}
while (i < 2) {
+ unsigned long p;
+ unsigned long len;
+
p = cmdline[i].p + pos1;
len = cmdline[i].len - pos1;
while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
+ unsigned int nr_read, nr_write;
+
+ nr_read = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, nr_read, FOLL_ANON);
+ if (nr_read == 0)
+ goto end;
/*
* Command line can be shorter than whole ARGV
* even if last "marker" byte says it is not.
*/
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
- }
+ if (c == '\0')
+ nr_write = nr_read;
+ else
+ nr_write = strnlen(page, nr_read);
- if (copy_to_user(buf, page, nr_read)) {
+ if (copy_to_user(buf, page, nr_write)) {
rv = -EFAULT;
goto out_free_page;
}
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
+ p += nr_write;
+ len -= nr_write;
+ buf += nr_write;
+ count -= nr_write;
- if (final)
- goto out_free_page;
+ if (nr_write < nr_read)
+ goto end;
}
/* Only first chunk can be read partially. */
@@ -363,12 +333,13 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
}
}
+end:
+ *pos += buf - buf0;
+ rv = buf - buf0;
out_free_page:
free_page((unsigned long)page);
out_mmput:
mmput(mm);
- if (rv > 0)
- *pos += rv;
return rv;
}
@@ -430,7 +401,6 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
struct stack_trace trace;
unsigned long *entries;
int err;
- int i;
entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
if (!entries)
@@ -443,6 +413,8 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
err = lock_trace(task);
if (!err) {
+ unsigned int i;
+
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
@@ -698,7 +670,7 @@ static bool has_pid_permissions(struct pid_namespace *pid,
static int proc_pid_permission(struct inode *inode, int mask)
{
- struct pid_namespace *pid = inode->i_sb->s_fs_info;
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
bool has_perms;
@@ -733,13 +705,11 @@ static const struct inode_operations proc_def_inode_operations = {
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns;
- struct pid *pid;
+ struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid *pid = proc_pid(inode);
struct task_struct *task;
int ret;
- ns = inode->i_sb->s_fs_info;
- pid = proc_pid(inode);
task = get_pid_task(pid, PIDTYPE_PID);
if (!task)
return -ESRCH;
@@ -929,10 +899,10 @@ static ssize_t environ_read(struct file *file, char __user *buf,
if (!mmget_not_zero(mm))
goto free;
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
while (count > 0) {
size_t this_len, max_len;
@@ -946,7 +916,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
if (retval <= 0) {
ret = retval;
@@ -1410,7 +1380,7 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
struct task_struct *p;
p = get_proc_task(inode);
@@ -1565,9 +1535,8 @@ static int comm_show(struct seq_file *m, void *v)
if (!p)
return -ESRCH;
- task_lock(p);
- seq_printf(m, "%s\n", p->comm);
- task_unlock(p);
+ proc_task_name(m, p, false);
+ seq_putc(m, '\n');
put_task_struct(p);
@@ -1693,6 +1662,12 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
kuid_t uid;
kgid_t gid;
+ if (unlikely(task->flags & PF_KTHREAD)) {
+ *ruid = GLOBAL_ROOT_UID;
+ *rgid = GLOBAL_ROOT_GID;
+ return;
+ }
+
/* Default to the tasks effective ownership */
rcu_read_lock();
cred = __task_cred(task);
@@ -1776,14 +1751,14 @@ int pid_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
- struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
- rcu_read_lock();
stat->uid = GLOBAL_ROOT_UID;
stat->gid = GLOBAL_ROOT_GID;
+ rcu_read_lock();
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
@@ -1872,7 +1847,7 @@ const struct dentry_operations pid_dentry_operations =
* by stat.
*/
bool proc_fill_cache(struct file *file, struct dir_context *ctx,
- const char *name, int len,
+ const char *name, unsigned int len,
instantiate_t instantiate, struct task_struct *task, const void *ptr)
{
struct dentry *child, *dir = file->f_path.dentry;
@@ -2334,7 +2309,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
return -ENOMEM;
tp->pid = proc_pid(inode);
- tp->ns = inode->i_sb->s_fs_info;
+ tp->ns = proc_pid_ns(inode);
return 0;
}
@@ -3224,7 +3199,7 @@ retry:
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file));
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
@@ -3248,7 +3223,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
char name[10 + 1];
- int len;
+ unsigned int len;
cond_resched();
if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
@@ -3568,14 +3543,14 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = inode->i_sb->s_fs_info;
+ ns = proc_pid_ns(inode);
tid = (int)file->f_version;
file->f_version = 0;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
task;
task = next_tid(task), ctx->pos++) {
char name[10 + 1];
- int len;
+ unsigned int len;
tid = task_pid_nr_ns(task, ns);
len = snprintf(name, sizeof(name), "%u", tid);
if (!proc_fill_cache(file, ctx, name, len,
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 8233e7af9389..fa762c5fbcb2 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -11,21 +11,9 @@ static int cmdline_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int cmdline_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cmdline_proc_show, NULL);
-}
-
-static const struct file_operations cmdline_proc_fops = {
- .open = cmdline_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_cmdline_init(void)
{
- proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
+ proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
return 0;
}
fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index a8ac48aebd59..954caf0b7fee 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -91,21 +91,9 @@ static const struct seq_operations consoles_op = {
.show = show_console_dev
};
-static int consoles_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &consoles_op);
-}
-
-static const struct file_operations proc_consoles_operations = {
- .open = consoles_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_consoles_init(void)
{
- proc_create("consoles", 0, NULL, &proc_consoles_operations);
+ proc_create_seq("consoles", 0, NULL, &consoles_op);
return 0;
}
fs_initcall(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 2c7f22b14489..37d38697eaf8 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -51,21 +51,9 @@ static const struct seq_operations devinfo_ops = {
.show = devinfo_show
};
-static int devinfo_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &devinfo_ops);
-}
-
-static const struct file_operations proc_devinfo_operations = {
- .open = devinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_devices_init(void)
{
- proc_create("devices", 0, NULL, &proc_devinfo_operations);
+ proc_create_seq("devices", 0, NULL, &devinfo_ops);
return 0;
}
fs_initcall(proc_devices_init);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 05b9893e9a22..81882a13212d 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -248,7 +248,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
struct file *f;
struct fd_data data;
char name[10 + 1];
- int len;
+ unsigned int len;
f = fcheck_files(files, fd);
if (!f)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b77034a694ef..7b4d9714f248 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/uaccess.h>
+#include <linux/seq_file.h>
#include "internal.h"
@@ -345,13 +346,12 @@ static const struct inode_operations proc_dir_inode_operations = {
.setattr = proc_notify_change,
};
-static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+/* returns the registered entry, or frees dp and returns NULL on failure */
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp)
{
- int ret;
-
- ret = proc_alloc_inum(&dp->low_ino);
- if (ret)
- return ret;
+ if (proc_alloc_inum(&dp->low_ino))
+ goto out_free_entry;
write_lock(&proc_subdir_lock);
dp->parent = dir;
@@ -359,12 +359,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
write_unlock(&proc_subdir_lock);
- proc_free_inum(dp->low_ino);
- return -EEXIST;
+ goto out_free_inum;
}
write_unlock(&proc_subdir_lock);
- return 0;
+ return dp;
+out_free_inum:
+ proc_free_inum(dp->low_ino);
+out_free_entry:
+ pde_free(dp);
+ return NULL;
}
static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
@@ -442,10 +446,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
if (ent->data) {
strcpy((char*)ent->data,dest);
ent->proc_iops = &proc_link_inode_operations;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
- ent = NULL;
- }
+ ent = proc_register(parent, ent);
} else {
pde_free(ent);
ent = NULL;
@@ -469,11 +470,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
ent->proc_fops = &proc_dir_operations;
ent->proc_iops = &proc_dir_inode_operations;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
@@ -504,47 +503,47 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->proc_fops = NULL;
ent->proc_iops = NULL;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
EXPORT_SYMBOL(proc_create_mount_point);
-struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
- struct proc_dir_entry *parent,
- const struct file_operations *proc_fops,
- void *data)
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data)
{
- struct proc_dir_entry *pde;
+ struct proc_dir_entry *p;
+
if ((mode & S_IFMT) == 0)
mode |= S_IFREG;
-
- if (!S_ISREG(mode)) {
- WARN_ON(1); /* use proc_mkdir() */
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO;
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
return NULL;
+
+ p = __proc_create(parent, name, mode, 1);
+ if (p) {
+ p->proc_iops = &proc_file_inode_operations;
+ p->data = data;
}
+ return p;
+}
+
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops, void *data)
+{
+ struct proc_dir_entry *p;
BUG_ON(proc_fops == NULL);
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO;
- pde = __proc_create(&parent, name, mode, 1);
- if (!pde)
- goto out;
- pde->proc_fops = proc_fops;
- pde->data = data;
- pde->proc_iops = &proc_file_inode_operations;
- if (proc_register(parent, pde) < 0)
- goto out_free;
- return pde;
-out_free:
- pde_free(pde);
-out:
- return NULL;
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = proc_fops;
+ return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
@@ -556,6 +555,67 @@ struct proc_dir_entry *proc_create(const char *name, umode_t mode,
}
EXPORT_SYMBOL(proc_create);
+static int proc_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ if (de->state_size)
+ return seq_open_private(file, de->seq_ops, de->state_size);
+ return seq_open(file, de->seq_ops);
+}
+
+static const struct file_operations proc_seq_fops = {
+ .open = proc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_seq_private);
+
+static int proc_single_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ return single_open(file, de->single_show, de->data);
+}
+
+static const struct file_operations proc_single_fops = {
+ .open = proc_single_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_single_data);
+
void proc_set_size(struct proc_dir_entry *de, loff_t size)
{
de->size = size;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 275b062e58af..50cb22a08c2f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,7 +44,12 @@ struct proc_dir_entry {
struct completion *pde_unload_completion;
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
+ union {
+ const struct seq_operations *seq_ops;
+ int (*single_show)(struct seq_file *, void *);
+ };
void *data;
+ unsigned int state_size;
unsigned int low_ino;
nlink_t nlink;
kuid_t uid;
@@ -57,9 +62,9 @@ struct proc_dir_entry {
umode_t mode;
u8 namelen;
#ifdef CONFIG_64BIT
-#define SIZEOF_PDE_INLINE_NAME (192-139)
+#define SIZEOF_PDE_INLINE_NAME (192-155)
#else
-#define SIZEOF_PDE_INLINE_NAME (128-87)
+#define SIZEOF_PDE_INLINE_NAME (128-95)
#endif
char inline_name[SIZEOF_PDE_INLINE_NAME];
} __randomize_layout;
@@ -131,6 +136,8 @@ unsigned name_to_int(const struct qstr *qstr);
*/
extern const struct file_operations proc_tid_children_operations;
+extern void proc_task_name(struct seq_file *m, struct task_struct *p,
+ bool escape);
extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
struct pid *, struct task_struct *);
extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
@@ -156,12 +163,16 @@ extern loff_t mem_lseek(struct file *, loff_t, int);
/* Lookups */
typedef struct dentry *instantiate_t(struct dentry *,
struct task_struct *, const void *);
-extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int,
+bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int,
instantiate_t, struct task_struct *, const void *);
/*
* generic.c
*/
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data);
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp);
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
extern int proc_readdir(struct file *, struct dir_context *);
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c
index 6a6bee9c603c..cb0edc7cbf09 100644
--- a/fs/proc/interrupts.c
+++ b/fs/proc/interrupts.c
@@ -34,21 +34,9 @@ static const struct seq_operations int_seq_ops = {
.show = show_interrupts
};
-static int interrupts_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &int_seq_ops);
-}
-
-static const struct file_operations proc_interrupts_operations = {
- .open = interrupts_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_interrupts_init(void)
{
- proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
+ proc_create_seq("interrupts", 0, NULL, &int_seq_ops);
return 0;
}
fs_initcall(proc_interrupts_init);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d1e82761de81..e64ecb9f2720 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
+ struct page *p;
+
+ if (!pfn_valid(pfn))
+ return 1;
+
+ p = pfn_to_page(pfn);
+ if (!memmap_valid_within(pfn, p, page_zone(p)))
+ return 1;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
- ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+ ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
- /* Sanity check: Can happen in 32bit arch...maybe */
- if (ent->addr < (unsigned long) __va(0))
+ if (!virt_addr_valid(ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;
- /* cut when vmalloc() area is higher than direct-map area */
- if (VMALLOC_START > (unsigned long)__va(0)) {
- if (ent->addr > VMALLOC_START)
- goto free_out;
+ /*
+ * We've already checked virt_addr_valid so we know this address
+ * is a valid pointer, therefore we can check against it to determine
+ * if we need to trim
+ */
+ if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index a000d7547479..d06694757201 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -24,25 +24,13 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
nr_running(), nr_threads,
- idr_get_cursor(&task_active_pid_ns(current)->idr));
+ idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
return 0;
}
-static int loadavg_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, loadavg_proc_show, NULL);
-}
-
-static const struct file_operations loadavg_proc_fops = {
- .open = loadavg_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_loadavg_init(void)
{
- proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
+ proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
return 0;
}
fs_initcall(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 65a72ab57471..2fb04846ed11 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -149,21 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int meminfo_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, meminfo_proc_show, NULL);
-}
-
-static const struct file_operations meminfo_proc_fops = {
- .open = meminfo_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_meminfo_init(void)
{
- proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
+ proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
return 0;
}
fs_initcall(proc_meminfo_init);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 75634379f82e..3b63be64e436 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -113,21 +113,9 @@ static const struct seq_operations proc_nommu_region_list_seqop = {
.show = nommu_region_list_show
};
-static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_nommu_region_list_seqop);
-}
-
-static const struct file_operations proc_nommu_region_list_operations = {
- .open = proc_nommu_region_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_nommu_init(void)
{
- proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
+ proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop);
return 0;
}
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 1491918a33c3..792c78a49174 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -154,6 +154,8 @@ u64 stable_page_flags(struct page *page)
if (PageBalloon(page))
u |= 1 << KPF_BALLOON;
+ if (PageTable(page))
+ u |= 1 << KPF_PGTABLE;
if (page_is_idle(page))
u |= 1 << KPF_IDLE;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 1763f370489d..7d94fa005b0d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,20 +38,20 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode)));
}
-int seq_open_net(struct inode *ino, struct file *f,
- const struct seq_operations *ops, int size)
+static int seq_open_net(struct inode *inode, struct file *file)
{
- struct net *net;
+ unsigned int state_size = PDE(inode)->state_size;
struct seq_net_private *p;
+ struct net *net;
- BUG_ON(size < sizeof(*p));
+ WARN_ON_ONCE(state_size < sizeof(*p));
- net = get_proc_net(ino);
- if (net == NULL)
+ net = get_proc_net(inode);
+ if (!net)
return -ENXIO;
- p = __seq_open_private(f, ops, size);
- if (p == NULL) {
+ p = __seq_open_private(file, PDE(inode)->seq_ops, state_size);
+ if (!p) {
put_net(net);
return -ENOMEM;
}
@@ -60,51 +60,83 @@ int seq_open_net(struct inode *ino, struct file *f,
#endif
return 0;
}
-EXPORT_SYMBOL_GPL(seq_open_net);
-int single_open_net(struct inode *inode, struct file *file,
- int (*show)(struct seq_file *, void *))
+static int seq_release_net(struct inode *ino, struct file *f)
{
- int err;
- struct net *net;
-
- err = -ENXIO;
- net = get_proc_net(inode);
- if (net == NULL)
- goto err_net;
-
- err = single_open(file, show, net);
- if (err < 0)
- goto err_open;
+ struct seq_file *seq = f->private_data;
+ put_net(seq_file_net(seq));
+ seq_release_private(ino, f);
return 0;
+}
-err_open:
- put_net(net);
-err_net:
- return err;
+static const struct file_operations proc_net_seq_fops = {
+ .open = seq_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
}
-EXPORT_SYMBOL_GPL(single_open_net);
+EXPORT_SYMBOL_GPL(proc_create_net_data);
-int seq_release_net(struct inode *ino, struct file *f)
+static int single_open_net(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
+ struct proc_dir_entry *de = PDE(inode);
+ struct net *net;
+ int err;
- seq = f->private_data;
+ net = get_proc_net(inode);
+ if (!net)
+ return -ENXIO;
- put_net(seq_file_net(seq));
- seq_release_private(ino, f);
- return 0;
+ err = single_open(file, de->single_show, net);
+ if (err)
+ put_net(net);
+ return err;
}
-EXPORT_SYMBOL_GPL(seq_release_net);
-int single_release_net(struct inode *ino, struct file *f)
+static int single_release_net(struct inode *ino, struct file *f)
{
struct seq_file *seq = f->private_data;
put_net(seq->private);
return single_release(ino, f);
}
-EXPORT_SYMBOL_GPL(single_release_net);
+
+static const struct file_operations proc_net_single_fops = {
+ .open = single_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL_GPL(proc_create_net_single);
static struct net *get_proc_task_net(struct inode *dir)
{
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d0cf1c50bb6c..c69ff191e5d8 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -126,18 +126,6 @@ static const struct seq_operations tty_drivers_op = {
.show = show_tty_driver
};
-static int tty_drivers_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &tty_drivers_op);
-}
-
-static const struct file_operations proc_tty_drivers_operations = {
- .open = tty_drivers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* This function is called by tty_register_driver() to handle
* registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -147,11 +135,11 @@ void proc_tty_register_driver(struct tty_driver *driver)
struct proc_dir_entry *ent;
if (!driver->driver_name || driver->proc_entry ||
- !driver->ops->proc_fops)
+ !driver->ops->proc_show)
return;
- ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
- driver->ops->proc_fops, driver);
+ ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver,
+ driver->ops->proc_show, driver);
driver->proc_entry = ent;
}
@@ -186,6 +174,6 @@ void __init proc_tty_init(void)
* entry.
*/
proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
- proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
- proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
+ proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops);
+ proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op);
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 4d7d061696b3..127265e5c55f 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init;
int proc_setup_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *self;
inode_lock(root_inode);
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 24072cc06e65..12901dcf57e2 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -25,21 +25,9 @@ static int show_softirqs(struct seq_file *p, void *v)
return 0;
}
-static int softirqs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, show_softirqs, NULL);
-}
-
-static const struct file_operations proc_softirqs_operations = {
- .open = softirqs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_softirqs_init(void)
{
- proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
+ proc_create_single("softirqs", 0, NULL, show_softirqs);
return 0;
}
fs_initcall(proc_softirqs_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 65ae54659833..597969db9e90 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -18,6 +18,7 @@
#include <linux/page_idle.h>
#include <linux/shmem_fs.h>
#include <linux/uaccess.h>
+#include <linux/pkeys.h>
#include <asm/elf.h>
#include <asm/tlb.h>
@@ -673,13 +674,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_MERGEABLE)] = "mg",
[ilog2(VM_UFFD_MISSING)]= "um",
[ilog2(VM_UFFD_WP)] = "uw",
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_ARCH_HAS_PKEYS
/* These come out via ProtectionKey: */
[ilog2(VM_PKEY_BIT0)] = "",
[ilog2(VM_PKEY_BIT1)] = "",
[ilog2(VM_PKEY_BIT2)] = "",
[ilog2(VM_PKEY_BIT3)] = "",
+#if VM_PKEY_BIT4
+ [ilog2(VM_PKEY_BIT4)] = "",
#endif
+#endif /* CONFIG_ARCH_HAS_PKEYS */
};
size_t i;
@@ -727,10 +731,6 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
}
#endif /* HUGETLB_PAGE */
-void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
-{
-}
-
#define SEQ_PUT_DEC(str, val) \
seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
static int show_smap(struct seq_file *m, void *v, int is_pid)
@@ -835,7 +835,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
seq_puts(m, " kB\n");
}
if (!rollup_mode) {
- arch_show_smap(m, vma);
+ if (arch_pkeys_enabled())
+ seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
show_smap_vma_flags(m, vma);
}
m_cache_vma(m, vma);
@@ -937,7 +938,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
- * Documentation/vm/soft-dirty.txt for full description
+ * Documentation/admin-guide/mm/soft-dirty.rst for full description
* of how soft-dirty works.
*/
pte_t ptent = *pte;
@@ -1258,8 +1259,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pte_swp_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
entry = pte_to_swp_entry(pte);
- frame = swp_type(entry) |
- (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ if (pm->show_pfn)
+ frame = swp_type(entry) |
+ (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags |= PM_SWAP;
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
@@ -1310,9 +1312,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
else if (is_swap_pmd(pmd)) {
swp_entry_t entry = pmd_to_swp_entry(pmd);
+ unsigned long offset;
- frame = swp_type(entry) |
- (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ if (pm->show_pfn) {
+ offset = swp_offset(entry) +
+ ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ frame = swp_type(entry) |
+ (offset << MAX_SWAPFILES_SHIFT);
+ }
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
@@ -1330,8 +1337,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
- if (pm->show_pfn && (flags & PM_PRESENT))
- frame++;
+ if (pm->show_pfn) {
+ if (flags & PM_PRESENT)
+ frame++;
+ else if (flags & PM_SWAP)
+ frame += (1 << MAX_SWAPFILES_SHIFT);
+ }
}
spin_unlock(ptl);
return err;
@@ -1417,7 +1428,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
* Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
* Bits 5-54 swap offset if swapped
- * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped
* Bits 57-60 zero
* Bit 61 page is file-page or shared-anon
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 9d2efaca499f..b905010ca9eb 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init;
int proc_setup_thread_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *thread_self;
inode_lock(root_inode);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 95a708d83721..3bd12f955867 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -30,21 +30,9 @@ static int uptime_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int uptime_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, uptime_proc_show, NULL);
-}
-
-static const struct file_operations uptime_proc_fops = {
- .open = uptime_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_uptime_init(void)
{
- proc_create("uptime", 0, NULL, &uptime_proc_fops);
+ proc_create_single("uptime", 0, NULL, uptime_proc_show);
return 0;
}
fs_initcall(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
index 94901e8e700d..b449f186577f 100644
--- a/fs/proc/version.c
+++ b/fs/proc/version.c
@@ -15,21 +15,9 @@ static int version_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int version_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, version_proc_show, NULL);
-}
-
-static const struct file_operations version_proc_fops = {
- .open = version_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_version_init(void)
{
- proc_create("version", 0, NULL, &version_proc_fops);
+ proc_create_single("version", 0, NULL, version_proc_show);
return 0;
}
fs_initcall(proc_version_init);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index a45f0af22a60..cfb6674331fd 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/crash_dump.h>
#include <linux/list.h>
+#include <linux/mutex.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
@@ -38,12 +39,23 @@ static size_t elfcorebuf_sz_orig;
static char *elfnotes_buf;
static size_t elfnotes_sz;
+/* Size of all notes minus the device dump notes */
+static size_t elfnotes_orig_sz;
/* Total size of vmcore file. */
static u64 vmcore_size;
static struct proc_dir_entry *proc_vmcore;
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+/* Device Dump list and mutex to synchronize access to list */
+static LIST_HEAD(vmcoredd_list);
+static DEFINE_MUTEX(vmcoredd_mutex);
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+/* Device Dump Size */
+static size_t vmcoredd_orig_sz;
+
/*
* Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
* The called function has to take care of module refcounting.
@@ -178,6 +190,77 @@ static int copy_to(void *target, void *src, size_t size, int userbuf)
return 0;
}
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf)
+{
+ struct vmcoredd_node *dump;
+ u64 offset = 0;
+ int ret = 0;
+ size_t tsz;
+ char *buf;
+
+ mutex_lock(&vmcoredd_mutex);
+ list_for_each_entry(dump, &vmcoredd_list, list) {
+ if (start < offset + dump->size) {
+ tsz = min(offset + (u64)dump->size - start, (u64)size);
+ buf = dump->buf + start - offset;
+ if (copy_to(dst, buf, tsz, userbuf)) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+
+ size -= tsz;
+ start += tsz;
+ dst += tsz;
+
+ /* Leave now if buffer filled already */
+ if (!size)
+ goto out_unlock;
+ }
+ offset += dump->size;
+ }
+
+out_unlock:
+ mutex_unlock(&vmcoredd_mutex);
+ return ret;
+}
+
+static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
+ u64 start, size_t size)
+{
+ struct vmcoredd_node *dump;
+ u64 offset = 0;
+ int ret = 0;
+ size_t tsz;
+ char *buf;
+
+ mutex_lock(&vmcoredd_mutex);
+ list_for_each_entry(dump, &vmcoredd_list, list) {
+ if (start < offset + dump->size) {
+ tsz = min(offset + (u64)dump->size - start, (u64)size);
+ buf = dump->buf + start - offset;
+ if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+
+ size -= tsz;
+ start += tsz;
+ dst += tsz;
+
+ /* Leave now if buffer filled already */
+ if (!size)
+ goto out_unlock;
+ }
+ offset += dump->size;
+ }
+
+out_unlock:
+ mutex_unlock(&vmcoredd_mutex);
+ return ret;
+}
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
/* Read from the ELF header and then the crash dump. On error, negative value is
* returned otherwise number of bytes read are returned.
*/
@@ -215,10 +298,41 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
if (*fpos < elfcorebuf_sz + elfnotes_sz) {
void *kaddr;
+ /* We add device dumps before other elf notes because the
+ * other elf notes may not fill the elf notes buffer
+ * completely and we will end up with zero-filled data
+ * between the elf notes and the device dumps. Tools will
+ * then try to decode this zero-filled data as valid notes
+ * and we don't want that. Hence, adding device dumps before
+ * the other elf notes ensure that zero-filled data can be
+ * avoided.
+ */
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+ /* Read device dumps */
+ if (*fpos < elfcorebuf_sz + vmcoredd_orig_sz) {
+ tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
+ (size_t)*fpos, buflen);
+ start = *fpos - elfcorebuf_sz;
+ if (vmcoredd_copy_dumps(buffer, start, tsz, userbuf))
+ return -EFAULT;
+
+ buflen -= tsz;
+ *fpos += tsz;
+ buffer += tsz;
+ acc += tsz;
+
+ /* leave now if filled buffer already */
+ if (!buflen)
+ return acc;
+ }
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+ /* Read remaining elf notes */
tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
- kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
+ kaddr = elfnotes_buf + *fpos - elfcorebuf_sz - vmcoredd_orig_sz;
if (copy_to(buffer, kaddr, tsz, userbuf))
return -EFAULT;
+
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
@@ -302,10 +416,8 @@ static const struct vm_operations_struct vmcore_mmap_ops = {
};
/**
- * alloc_elfnotes_buf - allocate buffer for ELF note segment in
- * vmalloc memory
- *
- * @notes_sz: size of buffer
+ * vmcore_alloc_buf - allocate buffer in vmalloc memory
+ * @sizez: size of buffer
*
* If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
* the buffer to user-space by means of remap_vmalloc_range().
@@ -313,12 +425,12 @@ static const struct vm_operations_struct vmcore_mmap_ops = {
* If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is
* disabled and there's no need to allow users to mmap the buffer.
*/
-static inline char *alloc_elfnotes_buf(size_t notes_sz)
+static inline char *vmcore_alloc_buf(size_t size)
{
#ifdef CONFIG_MMU
- return vmalloc_user(notes_sz);
+ return vmalloc_user(size);
#else
- return vzalloc(notes_sz);
+ return vzalloc(size);
#endif
}
@@ -446,11 +558,46 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
if (start < elfcorebuf_sz + elfnotes_sz) {
void *kaddr;
+ /* We add device dumps before other elf notes because the
+ * other elf notes may not fill the elf notes buffer
+ * completely and we will end up with zero-filled data
+ * between the elf notes and the device dumps. Tools will
+ * then try to decode this zero-filled data as valid notes
+ * and we don't want that. Hence, adding device dumps before
+ * the other elf notes ensure that zero-filled data can be
+ * avoided. This also ensures that the device dumps and
+ * other elf notes can be properly mmaped at page aligned
+ * address.
+ */
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+ /* Read device dumps */
+ if (start < elfcorebuf_sz + vmcoredd_orig_sz) {
+ u64 start_off;
+
+ tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
+ (size_t)start, size);
+ start_off = start - elfcorebuf_sz;
+ if (vmcoredd_mmap_dumps(vma, vma->vm_start + len,
+ start_off, tsz))
+ goto fail;
+
+ size -= tsz;
+ start += tsz;
+ len += tsz;
+
+ /* leave now if filled buffer already */
+ if (!size)
+ return 0;
+ }
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+ /* Read remaining elf notes */
tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
- kaddr = elfnotes_buf + start - elfcorebuf_sz;
+ kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz;
if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
kaddr, tsz))
goto fail;
+
size -= tsz;
start += tsz;
len += tsz;
@@ -502,8 +649,8 @@ static struct vmcore* __init get_new_element(void)
return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
}
-static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
- struct list_head *vc_list)
+static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
+ struct list_head *vc_list)
{
u64 size;
struct vmcore *m;
@@ -665,7 +812,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
return rc;
*notes_sz = roundup(phdr_sz, PAGE_SIZE);
- *notes_buf = alloc_elfnotes_buf(*notes_sz);
+ *notes_buf = vmcore_alloc_buf(*notes_sz);
if (!*notes_buf)
return -ENOMEM;
@@ -698,6 +845,11 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
/* Modify e_phnum to reflect merged headers. */
ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+ /* Store the size of all notes. We need this to update the note
+ * header when the device dumps will be added.
+ */
+ elfnotes_orig_sz = phdr.p_memsz;
+
return 0;
}
@@ -851,7 +1003,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
return rc;
*notes_sz = roundup(phdr_sz, PAGE_SIZE);
- *notes_buf = alloc_elfnotes_buf(*notes_sz);
+ *notes_buf = vmcore_alloc_buf(*notes_sz);
if (!*notes_buf)
return -ENOMEM;
@@ -884,6 +1036,11 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
/* Modify e_phnum to reflect merged headers. */
ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+ /* Store the size of all notes. We need this to update the note
+ * header when the device dumps will be added.
+ */
+ elfnotes_orig_sz = phdr.p_memsz;
+
return 0;
}
@@ -976,8 +1133,8 @@ static int __init process_ptload_program_headers_elf32(char *elfptr,
}
/* Sets offset fields of vmcore elements. */
-static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
- struct list_head *vc_list)
+static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
+ struct list_head *vc_list)
{
loff_t vmcore_off;
struct vmcore *m;
@@ -1145,6 +1302,202 @@ static int __init parse_crash_elf_headers(void)
return 0;
}
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+/**
+ * vmcoredd_write_header - Write vmcore device dump header at the
+ * beginning of the dump's buffer.
+ * @buf: Output buffer where the note is written
+ * @data: Dump info
+ * @size: Size of the dump
+ *
+ * Fills beginning of the dump's buffer with vmcore device dump header.
+ */
+static void vmcoredd_write_header(void *buf, struct vmcoredd_data *data,
+ u32 size)
+{
+ struct vmcoredd_header *vdd_hdr = (struct vmcoredd_header *)buf;
+
+ vdd_hdr->n_namesz = sizeof(vdd_hdr->name);
+ vdd_hdr->n_descsz = size + sizeof(vdd_hdr->dump_name);
+ vdd_hdr->n_type = NT_VMCOREDD;
+
+ strncpy((char *)vdd_hdr->name, VMCOREDD_NOTE_NAME,
+ sizeof(vdd_hdr->name));
+ memcpy(vdd_hdr->dump_name, data->dump_name, sizeof(vdd_hdr->dump_name));
+}
+
+/**
+ * vmcoredd_update_program_headers - Update all Elf program headers
+ * @elfptr: Pointer to elf header
+ * @elfnotesz: Size of elf notes aligned to page size
+ * @vmcoreddsz: Size of device dumps to be added to elf note header
+ *
+ * Determine type of Elf header (Elf64 or Elf32) and update the elf note size.
+ * Also update the offsets of all the program headers after the elf note header.
+ */
+static void vmcoredd_update_program_headers(char *elfptr, size_t elfnotesz,
+ size_t vmcoreddsz)
+{
+ unsigned char *e_ident = (unsigned char *)elfptr;
+ u64 start, end, size;
+ loff_t vmcore_off;
+ u32 i;
+
+ vmcore_off = elfcorebuf_sz + elfnotesz;
+
+ if (e_ident[EI_CLASS] == ELFCLASS64) {
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfptr;
+ Elf64_Phdr *phdr = (Elf64_Phdr *)(elfptr + sizeof(Elf64_Ehdr));
+
+ /* Update all program headers */
+ for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+ if (phdr->p_type == PT_NOTE) {
+ /* Update note size */
+ phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
+ phdr->p_filesz = phdr->p_memsz;
+ continue;
+ }
+
+ start = rounddown(phdr->p_offset, PAGE_SIZE);
+ end = roundup(phdr->p_offset + phdr->p_memsz,
+ PAGE_SIZE);
+ size = end - start;
+ phdr->p_offset = vmcore_off + (phdr->p_offset - start);
+ vmcore_off += size;
+ }
+ } else {
+ Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfptr;
+ Elf32_Phdr *phdr = (Elf32_Phdr *)(elfptr + sizeof(Elf32_Ehdr));
+
+ /* Update all program headers */
+ for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+ if (phdr->p_type == PT_NOTE) {
+ /* Update note size */
+ phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
+ phdr->p_filesz = phdr->p_memsz;
+ continue;
+ }
+
+ start = rounddown(phdr->p_offset, PAGE_SIZE);
+ end = roundup(phdr->p_offset + phdr->p_memsz,
+ PAGE_SIZE);
+ size = end - start;
+ phdr->p_offset = vmcore_off + (phdr->p_offset - start);
+ vmcore_off += size;
+ }
+ }
+}
+
+/**
+ * vmcoredd_update_size - Update the total size of the device dumps and update
+ * Elf header
+ * @dump_size: Size of the current device dump to be added to total size
+ *
+ * Update the total size of all the device dumps and update the Elf program
+ * headers. Calculate the new offsets for the vmcore list and update the
+ * total vmcore size.
+ */
+static void vmcoredd_update_size(size_t dump_size)
+{
+ vmcoredd_orig_sz += dump_size;
+ elfnotes_sz = roundup(elfnotes_orig_sz, PAGE_SIZE) + vmcoredd_orig_sz;
+ vmcoredd_update_program_headers(elfcorebuf, elfnotes_sz,
+ vmcoredd_orig_sz);
+
+ /* Update vmcore list offsets */
+ set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
+
+ vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
+ &vmcore_list);
+ proc_vmcore->size = vmcore_size;
+}
+
+/**
+ * vmcore_add_device_dump - Add a buffer containing device dump to vmcore
+ * @data: dump info.
+ *
+ * Allocate a buffer and invoke the calling driver's dump collect routine.
+ * Write Elf note at the beginning of the buffer to indicate vmcore device
+ * dump and add the dump to global list.
+ */
+int vmcore_add_device_dump(struct vmcoredd_data *data)
+{
+ struct vmcoredd_node *dump;
+ void *buf = NULL;
+ size_t data_size;
+ int ret;
+
+ if (!data || !strlen(data->dump_name) ||
+ !data->vmcoredd_callback || !data->size)
+ return -EINVAL;
+
+ dump = vzalloc(sizeof(*dump));
+ if (!dump) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ /* Keep size of the buffer page aligned so that it can be mmaped */
+ data_size = roundup(sizeof(struct vmcoredd_header) + data->size,
+ PAGE_SIZE);
+
+ /* Allocate buffer for driver's to write their dumps */
+ buf = vmcore_alloc_buf(data_size);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ vmcoredd_write_header(buf, data, data_size -
+ sizeof(struct vmcoredd_header));
+
+ /* Invoke the driver's dump collection routing */
+ ret = data->vmcoredd_callback(data, buf +
+ sizeof(struct vmcoredd_header));
+ if (ret)
+ goto out_err;
+
+ dump->buf = buf;
+ dump->size = data_size;
+
+ /* Add the dump to driver sysfs list */
+ mutex_lock(&vmcoredd_mutex);
+ list_add_tail(&dump->list, &vmcoredd_list);
+ mutex_unlock(&vmcoredd_mutex);
+
+ vmcoredd_update_size(data_size);
+ return 0;
+
+out_err:
+ if (buf)
+ vfree(buf);
+
+ if (dump)
+ vfree(dump);
+
+ return ret;
+}
+EXPORT_SYMBOL(vmcore_add_device_dump);
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+/* Free all dumps in vmcore device dump list */
+static void vmcore_free_device_dumps(void)
+{
+#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+ mutex_lock(&vmcoredd_mutex);
+ while (!list_empty(&vmcoredd_list)) {
+ struct vmcoredd_node *dump;
+
+ dump = list_first_entry(&vmcoredd_list, struct vmcoredd_node,
+ list);
+ list_del(&dump->list);
+ vfree(dump->buf);
+ vfree(dump);
+ }
+ mutex_unlock(&vmcoredd_mutex);
+#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+}
+
/* Init function for vmcore module. */
static int __init vmcore_init(void)
{
@@ -1192,4 +1545,7 @@ void vmcore_cleanup(void)
kfree(m);
}
free_elfcorebuf();
+
+ /* clear vmcore device dump list */
+ vmcore_free_device_dumps();
}