diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 162 | ||||
-rw-r--r-- | fs/proc/base.c | 245 | ||||
-rw-r--r-- | fs/proc/inode.c | 6 | ||||
-rw-r--r-- | fs/proc/internal.h | 3 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 4 | ||||
-rw-r--r-- | fs/proc/root.c | 2 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 92 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 2 |
8 files changed, 378 insertions, 138 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index f9bd395b3473..c1c207c36cae 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -81,6 +81,7 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/tracehook.h> +#include <linux/user_namespace.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -161,6 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk) static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { + struct user_namespace *user_ns = current_user_ns(); struct group_info *group_info; int g; struct fdtable *fdt = NULL; @@ -189,8 +191,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - cred->uid, cred->euid, cred->suid, cred->fsuid, - cred->gid, cred->egid, cred->sgid, cred->fsgid); + from_kuid_munged(user_ns, cred->uid), + from_kuid_munged(user_ns, cred->euid), + from_kuid_munged(user_ns, cred->suid), + from_kuid_munged(user_ns, cred->fsuid), + from_kgid_munged(user_ns, cred->gid), + from_kgid_munged(user_ns, cred->egid), + from_kgid_munged(user_ns, cred->sgid), + from_kgid_munged(user_ns, cred->fsgid)); task_lock(p); if (p->files) @@ -205,7 +213,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_unlock(p); for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) - seq_printf(m, "%d ", GROUP_AT(group_info, g)); + seq_printf(m, "%d ", + from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); seq_putc(m, '\n'); @@ -361,7 +370,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { unsigned long vsize, eip, esp, wchan = ~0UL; - long priority, nice; + int priority, nice; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; char state; @@ -483,7 +492,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, ' ', 0); seq_put_decimal_ull(m, ' ', start_time); seq_put_decimal_ull(m, ' ', vsize); - seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0); + seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0); seq_put_decimal_ull(m, ' ', rsslim); seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); @@ -508,9 +517,23 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); - seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0); - seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0); - seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0); + + if (mm && permitted) { + seq_put_decimal_ull(m, ' ', mm->start_data); + seq_put_decimal_ull(m, ' ', mm->end_data); + seq_put_decimal_ull(m, ' ', mm->start_brk); + seq_put_decimal_ull(m, ' ', mm->arg_start); + seq_put_decimal_ull(m, ' ', mm->arg_end); + seq_put_decimal_ull(m, ' ', mm->env_start); + seq_put_decimal_ull(m, ' ', mm->env_end); + } else + seq_printf(m, " 0 0 0 0 0 0 0"); + + if (permitted) + seq_put_decimal_ll(m, ' ', task->exit_code); + else + seq_put_decimal_ll(m, ' ', 0); + seq_putc(m, '\n'); if (mm) mmput(mm); @@ -556,3 +579,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, return 0; } + +#ifdef CONFIG_CHECKPOINT_RESTORE +static struct pid * +get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) +{ + struct task_struct *start, *task; + struct pid *pid = NULL; + + read_lock(&tasklist_lock); + + start = pid_task(proc_pid(inode), PIDTYPE_PID); + if (!start) + goto out; + + /* + * Lets try to continue searching first, this gives + * us significant speedup on children-rich processes. + */ + if (pid_prev) { + task = pid_task(pid_prev, PIDTYPE_PID); + if (task && task->real_parent == start && + !(list_empty(&task->sibling))) { + if (list_is_last(&task->sibling, &start->children)) + goto out; + task = list_first_entry(&task->sibling, + struct task_struct, sibling); + pid = get_pid(task_pid(task)); + goto out; + } + } + + /* + * Slow search case. + * + * We might miss some children here if children + * are exited while we were not holding the lock, + * but it was never promised to be accurate that + * much. + * + * "Just suppose that the parent sleeps, but N children + * exit after we printed their tids. Now the slow paths + * skips N extra children, we miss N tasks." (c) + * + * So one need to stop or freeze the leader and all + * its children to get a precise result. + */ + list_for_each_entry(task, &start->children, sibling) { + if (pos-- == 0) { + pid = get_pid(task_pid(task)); + break; + } + } + +out: + read_unlock(&tasklist_lock); + return pid; +} + +static int children_seq_show(struct seq_file *seq, void *v) +{ + struct inode *inode = seq->private; + pid_t pid; + + pid = pid_nr_ns(v, inode->i_sb->s_fs_info); + return seq_printf(seq, "%d ", pid); +} + +static void *children_seq_start(struct seq_file *seq, loff_t *pos) +{ + return get_children_pid(seq->private, NULL, *pos); +} + +static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct pid *pid; + + pid = get_children_pid(seq->private, v, *pos + 1); + put_pid(v); + + ++*pos; + return pid; +} + +static void children_seq_stop(struct seq_file *seq, void *v) +{ + put_pid(v); +} + +static const struct seq_operations children_seq_ops = { + .start = children_seq_start, + .next = children_seq_next, + .stop = children_seq_stop, + .show = children_seq_show, +}; + +static int children_seq_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, &children_seq_ops); + if (ret) + return ret; + + m = file->private_data; + m->private = inode; + + return ret; +} + +int children_seq_release(struct inode *inode, struct file *file) +{ + seq_release(inode, file); + return 0; +} + +const struct file_operations proc_tid_children_operations = { + .open = children_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = children_seq_release, +}; +#endif /* CONFIG_CHECKPOINT_RESTORE */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 1c8b280146d7..437195f204e1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -81,6 +81,7 @@ #include <linux/oom.h> #include <linux/elf.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> #include <linux/flex_array.h> @@ -198,11 +199,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -struct mm_struct *mm_for_maps(struct task_struct *task) -{ - return mm_access(task, PTRACE_MODE_READ); -} - static int proc_pid_cmdline(struct task_struct *task, char * buffer) { int res = 0; @@ -242,7 +238,7 @@ out: static int proc_pid_auxv(struct task_struct *task, char *buffer) { - struct mm_struct *mm = mm_for_maps(task); + struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); int res = PTR_ERR(mm); if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; @@ -410,12 +406,13 @@ static const struct file_operations proc_lstats_operations = { static int proc_oom_score(struct task_struct *task, char *buffer) { + unsigned long totalpages = totalram_pages + total_swap_pages; unsigned long points = 0; read_lock(&tasklist_lock); if (pid_alive(task)) - points = oom_badness(task, NULL, NULL, - totalram_pages + total_swap_pages); + points = oom_badness(task, NULL, NULL, totalpages) * + 1000 / totalpages; read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } @@ -677,7 +674,7 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; -static int mem_open(struct inode* inode, struct file* file) +static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); struct mm_struct *mm; @@ -685,7 +682,7 @@ static int mem_open(struct inode* inode, struct file* file) if (!task) return -ESRCH; - mm = mm_access(task, PTRACE_MODE_ATTACH); + mm = mm_access(task, mode); put_task_struct(task); if (IS_ERR(mm)) @@ -705,6 +702,11 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } +static int mem_open(struct inode *inode, struct file *file) +{ + return __mem_open(inode, file, PTRACE_MODE_ATTACH); +} + static ssize_t mem_rw(struct file *file, char __user *buf, size_t count, loff_t *ppos, int write) { @@ -801,30 +803,29 @@ static const struct file_operations proc_mem_operations = { .release = mem_release, }; +static int environ_open(struct inode *inode, struct file *file) +{ + return __mem_open(inode, file, PTRACE_MODE_READ); +} + static ssize_t environ_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); char *page; unsigned long src = *ppos; - int ret = -ESRCH; - struct mm_struct *mm; + int ret = 0; + struct mm_struct *mm = file->private_data; - if (!task) - goto out_no_task; + if (!mm) + return 0; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out; - - - mm = mm_for_maps(task); - ret = PTR_ERR(mm); - if (!mm || IS_ERR(mm)) - goto out_free; + return -ENOMEM; ret = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; while (count > 0) { int this_len, retval, max_len; @@ -836,7 +837,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; this_len = (this_len > max_len) ? max_len : this_len; - retval = access_process_vm(task, (mm->env_start + src), + retval = access_remote_vm(mm, (mm->env_start + src), page, this_len, 0); if (retval <= 0) { @@ -855,19 +856,18 @@ static ssize_t environ_read(struct file *file, char __user *buf, count -= retval; } *ppos = src; - mmput(mm); -out_free: + +free: free_page((unsigned long) page); -out: - put_task_struct(task); -out_no_task: return ret; } static const struct file_operations proc_environ_operations = { + .open = environ_open, .read = environ_read, .llseek = generic_file_llseek, + .release = mem_release, }; static ssize_t oom_adjust_read(struct file *file, char __user *buf, @@ -1561,8 +1561,8 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) generic_fillattr(inode, stat); rcu_read_lock(); - stat->uid = 0; - stat->gid = 0; + stat->uid = GLOBAL_ROOT_UID; + stat->gid = GLOBAL_ROOT_GID; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { if (!has_pid_permissions(pid, task, 2)) { @@ -1622,8 +1622,8 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = cred->egid; rcu_read_unlock(); } else { - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; } inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); @@ -1799,10 +1799,15 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) if (task) { files = get_files_struct(task); if (files) { + struct file *file; rcu_read_lock(); - if (fcheck_files(files, fd)) { + file = fcheck_files(files, fd); + if (file) { + unsigned f_mode = file->f_mode; + rcu_read_unlock(); put_files_struct(files); + if (task_dumpable(task)) { rcu_read_lock(); cred = __task_cred(task); @@ -1810,10 +1815,19 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = cred->egid; rcu_read_unlock(); } else { - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + } + + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; } - inode->i_mode &= ~(S_ISUID | S_ISGID); + security_task_to_inode(task, inode); put_task_struct(task); return 1; @@ -1836,9 +1850,7 @@ static const struct dentry_operations tid_fd_dentry_operations = static struct dentry *proc_fd_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - unsigned fd = *(const unsigned *)ptr; - struct file *file; - struct files_struct *files; + unsigned fd = (unsigned long)ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); @@ -1848,26 +1860,8 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, goto out; ei = PROC_I(inode); ei->fd = fd; - files = get_files_struct(task); - if (!files) - goto out_iput; - inode->i_mode = S_IFLNK; - - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (!file) - goto out_unlock; - if (file->f_mode & FMODE_READ) - inode->i_mode |= S_IRUSR | S_IXUSR; - if (file->f_mode & FMODE_WRITE) - inode->i_mode |= S_IWUSR | S_IXUSR; - spin_unlock(&files->file_lock); - put_files_struct(files); + inode->i_mode = S_IFLNK; inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; @@ -1879,12 +1873,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, out: return error; -out_unlock: - spin_unlock(&files->file_lock); - put_files_struct(files); -out_iput: - iput(inode); - goto out; } static struct dentry *proc_lookupfd_common(struct inode *dir, @@ -1900,7 +1888,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, if (fd == ~0U) goto out; - result = instantiate(dir, dentry, task, &fd); + result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); out: put_task_struct(task); out_no_task: @@ -1943,21 +1931,22 @@ static int proc_readfd_common(struct file * filp, void * dirent, fd++, filp->f_pos++) { char name[PROC_NUMBUF]; int len; + int rv; if (!fcheck_files(files, fd)) continue; rcu_read_unlock(); len = snprintf(name, sizeof(name), "%d", fd); - if (proc_fill_cache(filp, dirent, filldir, - name, len, instantiate, - p, &fd) < 0) { - rcu_read_lock(); - break; - } + rv = proc_fill_cache(filp, dirent, filldir, + name, len, instantiate, p, + (void *)(unsigned long)fd); + if (rv < 0) + goto out_fd_loop; rcu_read_lock(); } rcu_read_unlock(); +out_fd_loop: put_files_struct(files); } out: @@ -2037,11 +2026,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) if (!task) goto out_notask; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - - mm = get_task_mm(task); - if (!mm) + mm = mm_access(task, PTRACE_MODE_READ); + if (IS_ERR_OR_NULL(mm)) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { @@ -2060,8 +2046,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = cred->egid; rcu_read_unlock(); } else { - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; } security_task_to_inode(task, inode); status = 1; @@ -2177,16 +2163,16 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out; result = ERR_PTR(-EACCES); - if (lock_trace(task)) + if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; result = ERR_PTR(-ENOENT); if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) - goto out_unlock; + goto out_put_task; mm = get_task_mm(task); if (!mm) - goto out_unlock; + goto out_put_task; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); @@ -2198,8 +2184,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, out_no_vma: up_read(&mm->mmap_sem); mmput(mm); -out_unlock: - unlock_trace(task); out_put_task: put_task_struct(task); out: @@ -2233,7 +2217,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) goto out; ret = -EACCES; - if (lock_trace(task)) + if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; ret = 0; @@ -2241,12 +2225,12 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) case 0: ino = inode->i_ino; if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) - goto out_unlock; + goto out_put_task; filp->f_pos++; case 1: ino = parent_ino(dentry); if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out_unlock; + goto out_put_task; filp->f_pos++; default: { @@ -2257,7 +2241,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) mm = get_task_mm(task); if (!mm) - goto out_unlock; + goto out_put_task; down_read(&mm->mmap_sem); nr_files = 0; @@ -2287,7 +2271,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) flex_array_free(fa); up_read(&mm->mmap_sem); mmput(mm); - goto out_unlock; + goto out_put_task; } for (i = 0, vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { @@ -2332,8 +2316,6 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) } } -out_unlock: - unlock_trace(task); out_put_task: put_task_struct(task); out: @@ -2374,7 +2356,7 @@ static const struct inode_operations proc_fd_inode_operations = { static struct dentry *proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - unsigned fd = *(unsigned *)ptr; + unsigned fd = (unsigned long)ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); @@ -2943,6 +2925,74 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) } #endif /* CONFIG_TASK_IO_ACCOUNTING */ +#ifdef CONFIG_USER_NS +static int proc_id_map_open(struct inode *inode, struct file *file, + struct seq_operations *seq_ops) +{ + struct user_namespace *ns = NULL; + struct task_struct *task; + struct seq_file *seq; + int ret = -EINVAL; + + task = get_proc_task(inode); + if (task) { + rcu_read_lock(); + ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + put_task_struct(task); + } + if (!ns) + goto err; + + ret = seq_open(file, seq_ops); + if (ret) + goto err_put_ns; + + seq = file->private_data; + seq->private = ns; + + return 0; +err_put_ns: + put_user_ns(ns); +err: + return ret; +} + +static int proc_id_map_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + put_user_ns(ns); + return seq_release(inode, file); +} + +static int proc_uid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_uid_seq_operations); +} + +static int proc_gid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_gid_seq_operations); +} + +static const struct file_operations proc_uid_map_operations = { + .open = proc_uid_map_open, + .write = proc_uid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; + +static const struct file_operations proc_gid_map_operations = { + .open = proc_gid_map_open, + .write = proc_gid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; +#endif /* CONFIG_USER_NS */ + static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3045,6 +3095,10 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), #endif +#ifdef CONFIG_USER_NS + REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), + REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), +#endif }; static int proc_tgid_base_readdir(struct file * filp, @@ -3349,6 +3403,9 @@ static const struct pid_entry tid_base_stuff[] = { ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_tid_maps_operations), +#ifdef CONFIG_CHECKPOINT_RESTORE + REG("children", S_IRUGO, proc_tid_children_operations), +#endif #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), #endif @@ -3400,6 +3457,10 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), #endif +#ifdef CONFIG_USER_NS + REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), + REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), +#endif }; static int proc_tid_base_readdir(struct file * filp, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 205c92280838..7ac817b64a71 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode) const struct proc_ns_operations *ns_ops; truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); /* Stop tracking associated processes */ put_pid(PROC_I(inode)->pid); @@ -108,8 +108,8 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) struct super_block *sb = root->d_sb; struct pid_namespace *pid = sb->s_fs_info; - if (pid->pid_gid) - seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); + if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); if (pid->hide_pid != 0) seq_printf(seq, ",hidepid=%u", pid->hide_pid); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5f79bb8b4c60..eca4aca5b6e2 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -31,8 +31,6 @@ struct vmalloc_info { unsigned long largest_chunk; }; -extern struct mm_struct *mm_for_maps(struct task_struct *); - #ifdef CONFIG_MMU #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) extern void get_vmalloc_info(struct vmalloc_info *vmi); @@ -56,6 +54,7 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); +extern const struct file_operations proc_tid_children_operations; extern const struct file_operations proc_pid_maps_operations; extern const struct file_operations proc_tid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 21d836f40292..3476bca8f7af 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -371,9 +371,9 @@ void register_sysctl_root(struct ctl_table_root *root) static int test_perm(int mode, int op) { - if (!current_euid()) + if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) mode >>= 6; - else if (in_egroup_p(0)) + else if (in_egroup_p(GLOBAL_ROOT_GID)) mode >>= 3; if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) return 0; diff --git a/fs/proc/root.c b/fs/proc/root.c index eed44bfc85db..7c30fce037c0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -67,7 +67,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) case Opt_gid: if (match_int(&args[0], &option)) return 0; - pid->pid_gid = option; + pid->pid_gid = make_kgid(current_user_ns(), option); break; case Opt_hidepid: if (match_int(&args[0], &option)) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2d60492d6df8..4540b8f76f16 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -125,7 +125,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_for_maps(priv->task); + mm = mm_access(priv->task, PTRACE_MODE_READ); if (!mm || IS_ERR(mm)) return mm; down_read(&mm->mmap_sem); @@ -393,6 +393,7 @@ struct mem_size_stats { unsigned long anonymous; unsigned long anonymous_thp; unsigned long swap; + unsigned long nonlinear; u64 pss; }; @@ -402,24 +403,33 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; - struct page *page; + pgoff_t pgoff = linear_page_index(vma, addr); + struct page *page = NULL; int mapcount; - if (is_swap_pte(ptent)) { - mss->swap += ptent_size; - return; + if (pte_present(ptent)) { + page = vm_normal_page(vma, addr, ptent); + } else if (is_swap_pte(ptent)) { + swp_entry_t swpent = pte_to_swp_entry(ptent); + + if (!non_swap_entry(swpent)) + mss->swap += ptent_size; + else if (is_migration_entry(swpent)) + page = migration_entry_to_page(swpent); + } else if (pte_file(ptent)) { + if (pte_to_pgoff(ptent) != pgoff) + mss->nonlinear += ptent_size; } - if (!pte_present(ptent)) - return; - - page = vm_normal_page(vma, addr, ptent); if (!page) return; if (PageAnon(page)) mss->anonymous += ptent_size; + if (page->index != pgoff) + mss->nonlinear += ptent_size; + mss->resident += ptent_size; /* Accumulate the size in pages that have been accessed. */ if (pte_young(ptent) || PageReferenced(page)) @@ -521,6 +531,10 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) (vma->vm_flags & VM_LOCKED) ? (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); + if (vma->vm_flags & VM_NONLINEAR) + seq_printf(m, "Nonlinear: %8lu kB\n", + mss.nonlinear >> 10); + if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task->mm)) ? vma->vm_start : 0; @@ -700,6 +714,7 @@ struct pagemapread { #define PM_PRESENT PM_STATUS(4LL) #define PM_SWAP PM_STATUS(2LL) +#define PM_FILE PM_STATUS(1LL) #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) #define PM_END_OF_BUFFER 1 @@ -733,20 +748,33 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, return err; } -static u64 swap_pte_to_pagemap_entry(pte_t pte) +static void pte_to_pagemap_entry(pagemap_entry_t *pme, + struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - swp_entry_t e = pte_to_swp_entry(pte); - return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); -} + u64 frame, flags; + struct page *page = NULL; + + if (pte_present(pte)) { + frame = pte_pfn(pte); + flags = PM_PRESENT; + page = vm_normal_page(vma, addr, pte); + } else if (is_swap_pte(pte)) { + swp_entry_t entry = pte_to_swp_entry(pte); + + frame = swp_type(entry) | + (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + flags = PM_SWAP; + if (is_migration_entry(entry)) + page = migration_entry_to_page(entry); + } else { + *pme = make_pme(PM_NOT_PRESENT); + return; + } -static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte) -{ - if (is_swap_pte(pte)) - *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP); - else if (pte_present(pte)) - *pme = make_pme(PM_PFRAME(pte_pfn(pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + if (page && !PageAnon(page)) + flags |= PM_FILE; + + *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -761,6 +789,8 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, if (pmd_present(pmd)) *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, @@ -780,7 +810,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; @@ -801,15 +831,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, /* check to see if we've left 'vma' behind * and need a new, higher one */ - if (vma && (addr >= vma->vm_end)) + if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); + pme = make_pme(PM_NOT_PRESENT); + } /* check that 'vma' actually covers this address, * and that it isn't a huge page vma */ if (vma && (vma->vm_start <= addr) && !is_vm_hugetlb_page(vma)) { pte = pte_offset_map(pmd, addr); - pte_to_pagemap_entry(&pme, *pte); + pte_to_pagemap_entry(&pme, vma, addr, *pte); /* unmap before userspace copy */ pte_unmap(pte); } @@ -830,6 +862,8 @@ static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } /* This function walks within one hugetlb entry in the single call */ @@ -839,7 +873,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, { struct pagemapread *pm = walk->private; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme; for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; @@ -861,11 +895,11 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, * For each page in the address space, this file contains one 64-bit entry * consisting of the following: * - * Bits 0-55 page frame number (PFN) if present + * Bits 0-54 page frame number (PFN) if present * Bits 0-4 swap type if swapped - * Bits 5-55 swap offset if swapped + * Bits 5-54 swap offset if swapped * Bits 55-60 page shift (page size = 1<<page shift) - * Bit 61 reserved for future use + * Bit 61 page is file-page or shared-anon * Bit 62 page swapped * Bit 63 page present * @@ -911,7 +945,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!pm.buffer) goto out_task; - mm = mm_for_maps(task); + mm = mm_access(task, PTRACE_MODE_READ); ret = PTR_ERR(mm); if (!mm || IS_ERR(mm)) goto out_free; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 74fe164d1b23..1ccfa537f5f5 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -223,7 +223,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_for_maps(priv->task); + mm = mm_access(priv->task, PTRACE_MODE_READ); if (!mm || IS_ERR(mm)) { put_task_struct(priv->task); priv->task = NULL; |