summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c67
-rw-r--r--kernel/audit.h5
-rw-r--r--kernel/audit_tree.c9
-rw-r--r--kernel/audit_watch.c4
-rw-r--r--kernel/auditfilter.c12
-rw-r--r--kernel/auditsc.c16
-rw-r--r--kernel/cgroup.c11
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/debug/debug_core.c16
-rw-r--r--kernel/debug/kdb/kdb_main.c69
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/futex_compat.c3
-rw-r--r--kernel/hw_breakpoint.c3
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/irq_work.c4
-rw-r--r--kernel/jump_label.c77
-rw-r--r--kernel/kprobes.c26
-rw-r--r--kernel/latencytop.c17
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/perf_event.c135
-rw-r--r--kernel/pm_qos_params.c4
-rw-r--r--kernel/posix-cpu-timers.c12
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/printk.c21
-rw-r--r--kernel/range.c2
-rw-r--r--kernel/relay.c15
-rw-r--r--kernel/sched.c47
-rw-r--r--kernel/sched_fair.c73
-rw-r--r--kernel/sched_stats.h20
-rw-r--r--kernel/sched_stoptask.c4
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/trace.c20
-rw-r--r--kernel/watchdog.c2
36 files changed, 502 insertions, 260 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index d96045789b54..77770a034d59 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -467,23 +467,16 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid)
struct task_struct *tsk;
int err;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
tsk = find_task_by_vpid(pid);
- err = -ESRCH;
- if (!tsk)
- goto out;
- err = 0;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!tsk->signal->audit_tty)
- err = -EPERM;
- spin_unlock_irq(&tsk->sighand->siglock);
- if (err)
- goto out;
-
- tty_audit_push_task(tsk, loginuid, sessionid);
-out:
- read_unlock(&tasklist_lock);
+ if (!tsk) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ err = tty_audit_push_task(tsk, loginuid, sessionid);
+ put_task_struct(tsk);
return err;
}
@@ -506,7 +499,7 @@ int audit_send_list(void *_dest)
}
struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
- int multi, void *payload, int size)
+ int multi, const void *payload, int size)
{
struct sk_buff *skb;
struct nlmsghdr *nlh;
@@ -555,8 +548,8 @@ static int audit_send_reply_thread(void *arg)
* Allocates an skb, builds the netlink message, and sends it to the pid.
* No failure notifications.
*/
-void audit_send_reply(int pid, int seq, int type, int done, int multi,
- void *payload, int size)
+static void audit_send_reply(int pid, int seq, int type, int done, int multi,
+ const void *payload, int size)
{
struct sk_buff *skb;
struct task_struct *tsk;
@@ -880,40 +873,40 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_TTY_GET: {
struct audit_tty_status s;
struct task_struct *tsk;
+ unsigned long flags;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
tsk = find_task_by_vpid(pid);
- if (!tsk)
- err = -ESRCH;
- else {
- spin_lock_irq(&tsk->sighand->siglock);
+ if (tsk && lock_task_sighand(tsk, &flags)) {
s.enabled = tsk->signal->audit_tty != 0;
- spin_unlock_irq(&tsk->sighand->siglock);
- }
- read_unlock(&tasklist_lock);
- audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0,
- &s, sizeof(s));
+ unlock_task_sighand(tsk, &flags);
+ } else
+ err = -ESRCH;
+ rcu_read_unlock();
+
+ if (!err)
+ audit_send_reply(NETLINK_CB(skb).pid, seq,
+ AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
break;
}
case AUDIT_TTY_SET: {
struct audit_tty_status *s;
struct task_struct *tsk;
+ unsigned long flags;
if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
return -EINVAL;
s = data;
if (s->enabled != 0 && s->enabled != 1)
return -EINVAL;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
tsk = find_task_by_vpid(pid);
- if (!tsk)
- err = -ESRCH;
- else {
- spin_lock_irq(&tsk->sighand->siglock);
+ if (tsk && lock_task_sighand(tsk, &flags)) {
tsk->signal->audit_tty = s->enabled != 0;
- spin_unlock_irq(&tsk->sighand->siglock);
- }
- read_unlock(&tasklist_lock);
+ unlock_task_sighand(tsk, &flags);
+ } else
+ err = -ESRCH;
+ rcu_read_unlock();
break;
}
default:
diff --git a/kernel/audit.h b/kernel/audit.h
index f7206db4e13d..91e7071c4d2c 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -84,10 +84,7 @@ extern int audit_compare_dname_path(const char *dname, const char *path,
int *dirlen);
extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
int done, int multi,
- void *payload, int size);
-extern void audit_send_reply(int pid, int seq, int type,
- int done, int multi,
- void *payload, int size);
+ const void *payload, int size);
extern void audit_panic(const char *message);
struct audit_netlink_list {
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 7f18d3a4527e..37b2bea170c8 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -223,7 +223,7 @@ static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct fsnotify_mark *entry = &chunk->mark;
- struct audit_chunk *new;
+ struct audit_chunk *new = NULL;
struct audit_tree *owner;
int size = chunk->count - 1;
int i, j;
@@ -232,9 +232,14 @@ static void untag_chunk(struct node *p)
spin_unlock(&hash_lock);
+ if (size)
+ new = alloc_chunk(size);
+
spin_lock(&entry->lock);
if (chunk->dead || !entry->i.inode) {
spin_unlock(&entry->lock);
+ if (new)
+ free_chunk(new);
goto out;
}
@@ -255,9 +260,9 @@ static void untag_chunk(struct node *p)
goto out;
}
- new = alloc_chunk(size);
if (!new)
goto Fallback;
+
fsnotify_duplicate_mark(&new->mark, entry);
if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
free_chunk(new);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index f0c9b2e7542d..d2e3c7866460 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -60,7 +60,7 @@ struct audit_parent {
};
/* fsnotify handle. */
-struct fsnotify_group *audit_watch_group;
+static struct fsnotify_group *audit_watch_group;
/* fsnotify events we care about. */
#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
@@ -123,7 +123,7 @@ void audit_put_watch(struct audit_watch *watch)
}
}
-void audit_remove_watch(struct audit_watch *watch)
+static void audit_remove_watch(struct audit_watch *watch)
{
list_del(&watch->wlist);
audit_put_parent(watch->parent);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index eb7675499fb5..add2819af71b 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1252,6 +1252,18 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
case AUDIT_LOGINUID:
result = audit_comparator(cb->loginuid, f->op, f->val);
break;
+ case AUDIT_SUBJ_USER:
+ case AUDIT_SUBJ_ROLE:
+ case AUDIT_SUBJ_TYPE:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ if (f->lsm_rule)
+ result = security_audit_rule_match(cb->sid,
+ f->type,
+ f->op,
+ f->lsm_rule,
+ NULL);
+ break;
}
if (!result)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1b31c130d034..f49a0318c2ed 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -241,6 +241,10 @@ struct audit_context {
pid_t pid;
struct audit_cap_data cap;
} capset;
+ struct {
+ int fd;
+ int flags;
+ } mmap;
};
int fds[2];
@@ -1305,6 +1309,10 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted);
audit_log_cap(ab, "cap_pe", &context->capset.cap.effective);
break; }
+ case AUDIT_MMAP: {
+ audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
+ context->mmap.flags);
+ break; }
}
audit_log_end(ab);
}
@@ -2476,6 +2484,14 @@ void __audit_log_capset(pid_t pid,
context->type = AUDIT_CAPSET;
}
+void __audit_mmap_fd(int fd, int flags)
+{
+ struct audit_context *context = current->audit_context;
+ context->mmap.fd = fd;
+ context->mmap.flags = flags;
+ context->type = AUDIT_MMAP;
+}
+
/**
* audit_core_dumps - record information about processes that end abnormally
* @signr: signal value
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5cf366965d0c..66a416b42c18 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1460,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb)
return 0;
}
-static int cgroup_get_sb(struct file_system_type *fs_type,
+static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
struct cgroup_sb_opts opts;
struct cgroupfs_root *root;
@@ -1596,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
drop_parsed_module_refcounts(opts.subsys_bits);
}
- simple_set_mnt(mnt, sb);
kfree(opts.release_agent);
kfree(opts.name);
- return 0;
+ return dget(sb->s_root);
drop_new_super:
deactivate_locked_super(sb);
@@ -1608,7 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
out_err:
kfree(opts.release_agent);
kfree(opts.name);
- return ret;
+ return ERR_PTR(ret);
}
static void cgroup_kill_sb(struct super_block *sb) {
@@ -1658,7 +1657,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
static struct file_system_type cgroup_fs_type = {
.name = "cgroup",
- .get_sb = cgroup_get_sb,
+ .mount = cgroup_mount,
.kill_sb = cgroup_kill_sb,
};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 51b143e2a07a..4349935c2ad8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -231,18 +231,17 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock);
* users. If someone tries to mount the "cpuset" filesystem, we
* silently switch it to mount "cgroup" instead
*/
-static int cpuset_get_sb(struct file_system_type *fs_type,
- int flags, const char *unused_dev_name,
- void *data, struct vfsmount *mnt)
+static struct dentry *cpuset_mount(struct file_system_type *fs_type,
+ int flags, const char *unused_dev_name, void *data)
{
struct file_system_type *cgroup_fs = get_fs_type("cgroup");
- int ret = -ENODEV;
+ struct dentry *ret = ERR_PTR(-ENODEV);
if (cgroup_fs) {
char mountopts[] =
"cpuset,noprefix,"
"release_agent=/sbin/cpuset_release_agent";
- ret = cgroup_fs->get_sb(cgroup_fs, flags,
- unused_dev_name, mountopts, mnt);
+ ret = cgroup_fs->mount(cgroup_fs, flags,
+ unused_dev_name, mountopts);
put_filesystem(cgroup_fs);
}
return ret;
@@ -250,7 +249,7 @@ static int cpuset_get_sb(struct file_system_type *fs_type,
static struct file_system_type cpuset_fs_type = {
.name = "cpuset",
- .get_sb = cpuset_get_sb,
+ .mount = cpuset_mount,
};
/*
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index fec596da9bd0..cefd4a11f6d9 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -209,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
return 0;
}
-/**
- * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
- * @regs: Current &struct pt_regs.
- *
- * This function will be called if the particular architecture must
- * disable hardware debugging while it is processing gdb packets or
- * handling exception.
- */
-void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
-{
-}
-
/*
* Some architectures need cache flushes when we set/clear a
* breakpoint:
@@ -484,7 +472,9 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
atomic_inc(&masters_in_kgdb);
else
atomic_inc(&slaves_in_kgdb);
- kgdb_disable_hw_debug(ks->linux_regs);
+
+ if (arch_kgdb_ops.disable_hw_break)
+ arch_kgdb_ops.disable_hw_break(regs);
acquirelock:
/*
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index d7bda21a106b..a6e729766821 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50];
#define for_each_kdbcmd(cmd, num) \
for ((cmd) = kdb_base_commands, (num) = 0; \
num < kdb_max_commands; \
- num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++)
+ num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++)
typedef struct _kdbmsg {
int km_diag; /* kdb diagnostic */
@@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)
}
if (!s->usable)
return KDB_NOTIMP;
- s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
+ s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
if (!s->command) {
kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
cmdstr);
@@ -1127,7 +1127,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
/* special case below */
} else {
kdb_printf("\nEntering kdb (current=0x%p, pid %d) ",
- kdb_current, kdb_current->pid);
+ kdb_current, kdb_current ? kdb_current->pid : 0);
#if defined(CONFIG_SMP)
kdb_printf("on processor %d ", raw_smp_processor_id());
#endif
@@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv)
*/
static int kdb_ll(int argc, const char **argv)
{
- int diag;
+ int diag = 0;
unsigned long addr;
long offset = 0;
unsigned long va;
@@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv)
char buf[80];
if (KDB_FLAG(CMD_INTERRUPT))
- return 0;
+ goto out;
sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
diag = kdb_parse(buf);
if (diag)
- return diag;
+ goto out;
addr = va + linkoffset;
if (kdb_getword(&va, addr, sizeof(va)))
- return 0;
+ goto out;
}
- kfree(command);
- return 0;
+out:
+ kfree(command);
+ return diag;
}
static int kdb_kgdb(int argc, const char **argv)
@@ -2603,20 +2604,17 @@ static int kdb_summary(int argc, const char **argv)
*/
static int kdb_per_cpu(int argc, const char **argv)
{
- char buf[256], fmtstr[64];
- kdb_symtab_t symtab;
- cpumask_t suppress = CPU_MASK_NONE;
- int cpu, diag;
- unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
+ char fmtstr[64];
+ int cpu, diag, nextarg = 1;
+ unsigned long addr, symaddr, val, bytesperword = 0, whichcpu = ~0UL;
if (argc < 1 || argc > 3)
return KDB_ARGCOUNT;
- snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]);
- if (!kdbgetsymval(buf, &symtab)) {
- kdb_printf("%s is not a per_cpu variable\n", argv[1]);
- return KDB_BADADDR;
- }
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &symaddr, NULL, NULL);
+ if (diag)
+ return diag;
+
if (argc >= 2) {
diag = kdbgetularg(argv[2], &bytesperword);
if (diag)
@@ -2649,46 +2647,25 @@ static int kdb_per_cpu(int argc, const char **argv)
#define KDB_PCU(cpu) 0
#endif
#endif
-
for_each_online_cpu(cpu) {
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
+
if (whichcpu != ~0UL && whichcpu != cpu)
continue;
- addr = symtab.sym_start + KDB_PCU(cpu);
+ addr = symaddr + KDB_PCU(cpu);
diag = kdb_getword(&val, addr, bytesperword);
if (diag) {
kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to "
"read, diag=%d\n", cpu, addr, diag);
continue;
}
-#ifdef CONFIG_SMP
- if (!val) {
- cpu_set(cpu, suppress);
- continue;
- }
-#endif /* CONFIG_SMP */
kdb_printf("%5d ", cpu);
kdb_md_line(fmtstr, addr,
bytesperword == KDB_WORD_SIZE,
1, bytesperword, 1, 1, 0);
}
- if (cpus_weight(suppress) == 0)
- return 0;
- kdb_printf("Zero suppressed cpu(s):");
- for (cpu = first_cpu(suppress); cpu < num_possible_cpus();
- cpu = next_cpu(cpu, suppress)) {
- kdb_printf(" %d", cpu);
- if (cpu == num_possible_cpus() - 1 ||
- next_cpu(cpu, suppress) != cpu + 1)
- continue;
- while (cpu < num_possible_cpus() &&
- next_cpu(cpu, suppress) == cpu + 1)
- ++cpu;
- kdb_printf("-%d", cpu);
- }
- kdb_printf("\n");
-
#undef KDB_PCU
-
return 0;
}
@@ -2763,13 +2740,13 @@ int kdb_register_repeat(char *cmd,
}
if (kdb_commands) {
memcpy(new, kdb_commands,
- kdb_max_commands * sizeof(*new));
+ (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new));
kfree(kdb_commands);
}
memset(new + kdb_max_commands, 0,
kdb_command_extend * sizeof(*new));
kdb_commands = new;
- kp = kdb_commands + kdb_max_commands;
+ kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX;
kdb_max_commands += kdb_command_extend;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index b194febf5799..676149a4ac5f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -96,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk)
sig->tty = NULL;
} else {
/*
+ * This can only happen if the caller is de_thread().
+ * FIXME: this is the temporary hack, we should teach
+ * posix-cpu-timers to handle this case correctly.
+ */
+ if (unlikely(has_group_leader_pid(tsk)))
+ posix_cpu_timers_exit_group(tsk);
+
+ /*
* If there is any task waiting for the group exit
* then notify it:
*/
@@ -906,6 +914,15 @@ NORET_TYPE void do_exit(long code)
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
+ /*
+ * If do_exit is called because this processes oopsed, it's possible
+ * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
+ * continuing. Amongst other possible reasons, this is to prevent
+ * mm_release()->clear_child_tid() from writing to a user-controlled
+ * kernel address.
+ */
+ set_fs(USER_DS);
+
tracehook_report_exit(&code);
validate_creds_for_do_exit(tsk);
diff --git a/kernel/futex.c b/kernel/futex.c
index 6c683b37f2ce..40a8777a27d0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr)
{
struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *next_entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+ unsigned int uninitialized_var(next_pi);
unsigned long futex_offset;
int rc;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 06da4dfc339b..a7934ac75e5b 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr)
{
struct compat_robust_list_head __user *head = curr->compat_robust_list;
struct robust_list __user *entry, *next_entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+ unsigned int uninitialized_var(next_pi);
compat_uptr_t uentry, next_uentry, upending;
compat_long_t futex_offset;
int rc;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f0afca..e5325825aeb6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
.read = hw_breakpoint_pmu_read,
};
-static int __init init_hw_breakpoint(void)
+int __init init_hw_breakpoint(void)
{
unsigned int **task_bp_pinned;
int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
return -ENOMEM;
}
-core_initcall(init_hw_breakpoint);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 644e8d5fa367..5f92acc5f952 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -324,6 +324,10 @@ void enable_irq(unsigned int irq)
if (!desc)
return;
+ if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable,
+ KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
+ return;
+
chip_bus_lock(desc);
raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, false);
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff8481..90f881904bb1 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -145,7 +145,9 @@ void irq_work_run(void)
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
- cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+ (void)cmpxchg(&entry->next,
+ next_flags(NULL, IRQ_WORK_BUSY),
+ NULL);
}
}
EXPORT_SYMBOL_GPL(irq_work_run);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 7be868bf25c6..3b79bd938330 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -39,6 +39,16 @@ struct jump_label_module_entry {
struct module *mod;
};
+void jump_label_lock(void)
+{
+ mutex_lock(&jump_label_mutex);
+}
+
+void jump_label_unlock(void)
+{
+ mutex_unlock(&jump_label_mutex);
+}
+
static int jump_label_cmp(const void *a, const void *b)
{
const struct jump_entry *jea = a;
@@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
struct jump_label_module_entry *e_module;
int count;
- mutex_lock(&jump_label_mutex);
+ jump_label_lock();
entry = get_jump_label_entry((jump_label_t)key);
if (entry) {
count = entry->nr_entries;
@@ -168,13 +178,14 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
count = e_module->nr_entries;
iter = e_module->table;
while (count--) {
- if (kernel_text_address(iter->code))
+ if (iter->key &&
+ kernel_text_address(iter->code))
arch_jump_label_transform(iter, type);
iter++;
}
}
}
- mutex_unlock(&jump_label_mutex);
+ jump_label_unlock();
}
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
@@ -231,6 +242,7 @@ out:
* overlaps with any of the jump label patch addresses. Code
* that wants to modify kernel text should first verify that
* it does not overlap with any of the jump label addresses.
+ * Caller must hold jump_label_mutex.
*
* returns 1 if there is an overlap, 0 otherwise
*/
@@ -241,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end)
struct jump_entry *iter_stop = __start___jump_table;
int conflict = 0;
- mutex_lock(&jump_label_mutex);
iter = iter_start;
while (iter < iter_stop) {
if (addr_conflict(iter, start, end)) {
@@ -256,10 +267,16 @@ int jump_label_text_reserved(void *start, void *end)
conflict = module_conflict(start, end);
#endif
out:
- mutex_unlock(&jump_label_mutex);
return conflict;
}
+/*
+ * Not all archs need this.
+ */
+void __weak arch_jump_label_text_poke_early(jump_label_t addr)
+{
+}
+
static __init int init_jump_label(void)
{
int ret;
@@ -267,7 +284,7 @@ static __init int init_jump_label(void)
struct jump_entry *iter_stop = __stop___jump_table;
struct jump_entry *iter;
- mutex_lock(&jump_label_mutex);
+ jump_label_lock();
ret = build_jump_label_hashtable(__start___jump_table,
__stop___jump_table);
iter = iter_start;
@@ -275,7 +292,7 @@ static __init int init_jump_label(void)
arch_jump_label_text_poke_early(iter->code);
iter++;
}
- mutex_unlock(&jump_label_mutex);
+ jump_label_unlock();
return ret;
}
early_initcall(init_jump_label);
@@ -366,6 +383,39 @@ static void remove_jump_label_module(struct module *mod)
}
}
+static void remove_jump_label_module_init(struct module *mod)
+{
+ struct hlist_head *head;
+ struct hlist_node *node, *node_next, *module_node, *module_node_next;
+ struct jump_label_entry *e;
+ struct jump_label_module_entry *e_module;
+ struct jump_entry *iter;
+ int i, count;
+
+ /* if the module doesn't have jump label entries, just return */
+ if (!mod->num_jump_entries)
+ return;
+
+ for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
+ head = &jump_label_table[i];
+ hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
+ hlist_for_each_entry_safe(e_module, module_node,
+ module_node_next,
+ &(e->modules), hlist) {
+ if (e_module->mod != mod)
+ continue;
+ count = e_module->nr_entries;
+ iter = e_module->table;
+ while (count--) {
+ if (within_module_init(iter->code, mod))
+ iter->key = 0;
+ iter++;
+ }
+ }
+ }
+ }
+}
+
static int
jump_label_module_notify(struct notifier_block *self, unsigned long val,
void *data)
@@ -375,16 +425,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
switch (val) {
case MODULE_STATE_COMING:
- mutex_lock(&jump_label_mutex);
+ jump_label_lock();
ret = add_jump_label_module(mod);
if (ret)
remove_jump_label_module(mod);
- mutex_unlock(&jump_label_mutex);
+ jump_label_unlock();
break;
case MODULE_STATE_GOING:
- mutex_lock(&jump_label_mutex);
+ jump_label_lock();
remove_jump_label_module(mod);
- mutex_unlock(&jump_label_mutex);
+ jump_label_unlock();
+ break;
+ case MODULE_STATE_LIVE:
+ jump_label_lock();
+ remove_jump_label_module_init(mod);
+ jump_label_unlock();
break;
}
return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 99865c33a60d..9737a76e106f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1145,14 +1145,13 @@ int __kprobes register_kprobe(struct kprobe *p)
if (ret)
return ret;
+ jump_label_lock();
preempt_disable();
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr) ||
- jump_label_text_reserved(p->addr, p->addr)) {
- preempt_enable();
- return -EINVAL;
- }
+ jump_label_text_reserved(p->addr, p->addr))
+ goto fail_with_jump_label;
/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
p->flags &= KPROBE_FLAG_DISABLED;
@@ -1166,10 +1165,9 @@ int __kprobes register_kprobe(struct kprobe *p)
* We must hold a refcount of the probed module while updating
* its code to prohibit unexpected unloading.
*/
- if (unlikely(!try_module_get(probed_mod))) {
- preempt_enable();
- return -EINVAL;
- }
+ if (unlikely(!try_module_get(probed_mod)))
+ goto fail_with_jump_label;
+
/*
* If the module freed .init.text, we couldn't insert
* kprobes in there.
@@ -1177,16 +1175,18 @@ int __kprobes register_kprobe(struct kprobe *p)
if (within_module_init((unsigned long)p->addr, probed_mod) &&
probed_mod->state != MODULE_STATE_COMING) {
module_put(probed_mod);
- preempt_enable();
- return -EINVAL;
+ goto fail_with_jump_label;
}
}
preempt_enable();
+ jump_label_unlock();
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
mutex_lock(&kprobe_mutex);
+ jump_label_lock(); /* needed to call jump_label_text_reserved() */
+
get_online_cpus(); /* For avoiding text_mutex deadlock. */
mutex_lock(&text_mutex);
@@ -1214,12 +1214,18 @@ int __kprobes register_kprobe(struct kprobe *p)
out:
mutex_unlock(&text_mutex);
put_online_cpus();
+ jump_label_unlock();
mutex_unlock(&kprobe_mutex);
if (probed_mod)
module_put(probed_mod);
return ret;
+
+fail_with_jump_label:
+ preempt_enable();
+ jump_label_unlock();
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(register_kprobe);
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 877fb306d415..17110a4a4fc2 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
account_global_scheduler_latency(tsk, &lat);
- /*
- * short term hack; if we're > 32 we stop; future we recycle:
- */
- tsk->latency_record_count++;
- if (tsk->latency_record_count >= LT_SAVECOUNT)
- goto out_unlock;
-
- for (i = 0; i < LT_SAVECOUNT; i++) {
+ for (i = 0; i < tsk->latency_record_count; i++) {
struct latency_record *mylat;
int same = 1;
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
}
}
+ /*
+ * short term hack; if we're > 32 we stop; future we recycle:
+ */
+ if (tsk->latency_record_count >= LT_SAVECOUNT)
+ goto out_unlock;
+
/* Allocated a new one: */
- i = tsk->latency_record_count;
+ i = tsk->latency_record_count++;
memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
out_unlock:
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..d190664f25ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
mod->num_trace_events, GFP_KERNEL);
#endif
+#ifdef CONFIG_TRACING
+ mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
+ sizeof(*mod->trace_bprintk_fmt_start),
+ &mod->num_trace_bprintk_fmt);
+ /*
+ * This section contains pointers to allocated objects in the trace
+ * code and not scanning it leads to false positives.
+ */
+ kmemleak_scan_area(mod->trace_bprintk_fmt_start,
+ sizeof(*mod->trace_bprintk_fmt_start) *
+ mod->num_trace_bprintk_fmt, GFP_KERNEL);
+#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
/* sechdrs[0].sh_size is always zero */
mod->ftrace_callsites = section_objs(info, "__mcount_loc",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 517d827f4982..eac7e3364335 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
#include <asm/irq_regs.h>
@@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event,
event->tstamp_running += ctx->time - event->tstamp_stopped;
+ event->shadow_ctx_time = ctx->time - ctx->timestamp;
+
if (!is_software_event(event))
cpuctx->active_oncpu++;
ctx->nr_active++;
@@ -1284,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
{
int ctxn;
- perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
-
for_each_task_context_nr(ctxn)
perf_event_context_sched_out(task, ctxn, next);
}
@@ -1619,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
{
raw_spin_lock(&ctx->lock);
- /* Rotate the first entry last of non-pinned groups */
- list_rotate_left(&ctx->flexible_groups);
+ /*
+ * Rotate the first entry last of non-pinned groups. Rotation might be
+ * disabled by the inheritance code.
+ */
+ if (!ctx->rotate_disable)
+ list_rotate_left(&ctx->flexible_groups);
raw_spin_unlock(&ctx->lock);
}
@@ -2232,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex);
- mutex_lock(&event->owner->perf_event_mutex);
- list_del_init(&event->owner_entry);
- mutex_unlock(&event->owner->perf_event_mutex);
- put_task_struct(event->owner);
-
free_event(event);
return 0;
@@ -2249,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
static int perf_release(struct inode *inode, struct file *file)
{
struct perf_event *event = file->private_data;
+ struct task_struct *owner;
file->private_data = NULL;
+ rcu_read_lock();
+ owner = ACCESS_ONCE(event->owner);
+ /*
+ * Matches the smp_wmb() in perf_event_exit_task(). If we observe
+ * !owner it means the list deletion is complete and we can indeed
+ * free this event, otherwise we need to serialize on
+ * owner->perf_event_mutex.
+ */
+ smp_read_barrier_depends();
+ if (owner) {
+ /*
+ * Since delayed_put_task_struct() also drops the last
+ * task reference we can safely take a new reference
+ * while holding the rcu_read_lock().
+ */
+ get_task_struct(owner);
+ }
+ rcu_read_unlock();
+
+ if (owner) {
+ mutex_lock(&owner->perf_event_mutex);
+ /*
+ * We have to re-check the event->owner field, if it is cleared
+ * we raced with perf_event_exit_task(), acquiring the mutex
+ * ensured they're done, and we can proceed with freeing the
+ * event.
+ */
+ if (event->owner)
+ list_del_init(&event->owner_entry);
+ mutex_unlock(&owner->perf_event_mutex);
+ put_task_struct(owner);
+ }
+
return perf_event_release_kernel(event);
}
@@ -3396,7 +3430,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
}
static void perf_output_read_one(struct perf_output_handle *handle,
- struct perf_event *event)
+ struct perf_event *event,
+ u64 enabled, u64 running)
{
u64 read_format = event->attr.read_format;
u64 values[4];
@@ -3404,11 +3439,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,
values[n++] = perf_event_count(event);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- values[n++] = event->total_time_enabled +
+ values[n++] = enabled +
atomic64_read(&event->child_total_time_enabled);
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- values[n++] = event->total_time_running +
+ values[n++] = running +
atomic64_read(&event->child_total_time_running);
}
if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3456,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
* XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
*/
static void perf_output_read_group(struct perf_output_handle *handle,
- struct perf_event *event)
+ struct perf_event *event,
+ u64 enabled, u64 running)
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
@@ -3431,10 +3467,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = leader->total_time_enabled;
+ values[n++] = enabled;
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = leader->total_time_running;
+ values[n++] = running;
if (leader != event)
leader->pmu->read(leader);
@@ -3459,13 +3495,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
}
}
+#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
+ u64 enabled = 0, running = 0, now, ctx_time;
+ u64 read_format = event->attr.read_format;
+
+ /*
+ * compute total_time_enabled, total_time_running
+ * based on snapshot values taken when the event
+ * was last scheduled in.
+ *
+ * we cannot simply called update_context_time()
+ * because of locking issue as we are called in
+ * NMI context
+ */
+ if (read_format & PERF_FORMAT_TOTAL_TIMES) {
+ now = perf_clock();
+ ctx_time = event->shadow_ctx_time + now;
+ enabled = ctx_time - event->tstamp_enabled;
+ running = ctx_time - event->tstamp_running;
+ }
+
if (event->attr.read_format & PERF_FORMAT_GROUP)
- perf_output_read_group(handle, event);
+ perf_output_read_group(handle, event, enabled, running);
else
- perf_output_read_one(handle, event);
+ perf_output_read_one(handle, event, enabled, running);
}
void perf_output_sample(struct perf_output_handle *handle,
@@ -5651,7 +5709,7 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_unlock(&ctx->mutex);
event->owner = current;
- get_task_struct(current);
+
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
@@ -5719,12 +5777,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
++ctx->generation;
mutex_unlock(&ctx->mutex);
- event->owner = current;
- get_task_struct(current);
- mutex_lock(&current->perf_event_mutex);
- list_add_tail(&event->owner_entry, &current->perf_event_list);
- mutex_unlock(&current->perf_event_mutex);
-
return event;
err_free:
@@ -5875,8 +5927,24 @@ again:
*/
void perf_event_exit_task(struct task_struct *child)
{
+ struct perf_event *event, *tmp;
int ctxn;
+ mutex_lock(&child->perf_event_mutex);
+ list_for_each_entry_safe(event, tmp, &child->perf_event_list,
+ owner_entry) {
+ list_del_init(&event->owner_entry);
+
+ /*
+ * Ensure the list deletion is visible before we clear
+ * the owner, closes a race against perf_release() where
+ * we need to serialize on the owner->perf_event_mutex.
+ */
+ smp_wmb();
+ event->owner = NULL;
+ }
+ mutex_unlock(&child->perf_event_mutex);
+
for_each_task_context_nr(ctxn)
perf_event_exit_task_context(child, ctxn);
}
@@ -6096,6 +6164,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
struct perf_event *event;
struct task_struct *parent = current;
int inherited_all = 1;
+ unsigned long flags;
int ret = 0;
child->perf_event_ctxp[ctxn] = NULL;
@@ -6136,6 +6205,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
break;
}
+ /*
+ * We can't hold ctx->lock when iterating the ->flexible_group list due
+ * to allocations, but we need to prevent rotation because
+ * rotate_ctx() will change the list from interrupt context.
+ */
+ raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+ parent_ctx->rotate_disable = 1;
+ raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
ret = inherit_task_group(event, parent, parent_ctx,
child, ctxn, &inherited_all);
@@ -6143,6 +6221,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
break;
}
+ raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+ parent_ctx->rotate_disable = 0;
+ raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
child_ctx = child->perf_event_ctxp[ctxn];
if (child_ctx && inherited_all) {
@@ -6295,6 +6377,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
void __init perf_event_init(void)
{
+ int ret;
+
perf_event_init_all_cpus();
init_srcu_struct(&pmus_srcu);
perf_pmu_register(&perf_swevent);
@@ -6302,4 +6386,7 @@ void __init perf_event_init(void)
perf_pmu_register(&perf_task_clock);
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
+
+ ret = init_hw_breakpoint();
+ WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
}
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index c7a8f453919e..aeaa7f846821 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
switch (o->type) {
case PM_QOS_MIN:
- return plist_last(&o->requests)->prio;
+ return plist_first(&o->requests)->prio;
case PM_QOS_MAX:
- return plist_first(&o->requests)->prio;
+ return plist_last(&o->requests)->prio;
default:
/* runtime check for not using enum */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 6842eeba5879..05bb7173850e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)
if (pid == 0)
return 0;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
- same_thread_group(p, current) : thread_group_leader(p))) {
+ same_thread_group(p, current) : has_group_leader_pid(p))) {
error = -EINVAL;
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return error;
}
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
INIT_LIST_HEAD(&new_timer->it.cpu.entry);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
if (pid == 0) {
p = current;
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
p = current->group_leader;
} else {
p = find_task_by_vpid(pid);
- if (p && !thread_group_leader(p))
+ if (p && !has_group_leader_pid(p))
p = NULL;
}
}
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
} else {
ret = -EINVAL;
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return ret;
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 29bff6117abc..a5aff3ebad38 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -246,9 +246,13 @@ config PM_OPS
depends on PM_SLEEP || PM_RUNTIME
default y
+config ARCH_HAS_OPP
+ bool
+
config PM_OPP
bool "Operating Performance Point (OPP) Layer library"
depends on PM
+ depends on ARCH_HAS_OPP
---help---
SOCs have a standard set of tuples consisting of frequency and
voltage pairs that the device will support per voltage domain. This
diff --git a/kernel/printk.c b/kernel/printk.c
index b2ebaee8c377..9a2264fc42ca 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -261,6 +261,12 @@ static inline void boot_delay_msec(void)
}
#endif
+#ifdef CONFIG_SECURITY_DMESG_RESTRICT
+int dmesg_restrict = 1;
+#else
+int dmesg_restrict;
+#endif
+
int do_syslog(int type, char __user *buf, int len, bool from_file)
{
unsigned i, j, limit, count;
@@ -268,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
char c;
int error = 0;
- error = security_syslog(type, from_file);
+ /*
+ * If this is from /proc/kmsg we only do the capabilities checks
+ * at open time.
+ */
+ if (type == SYSLOG_ACTION_OPEN || !from_file) {
+ if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if ((type != SYSLOG_ACTION_READ_ALL &&
+ type != SYSLOG_ACTION_SIZE_BUFFER) &&
+ !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ }
+
+ error = security_syslog(type);
if (error)
return error;
diff --git a/kernel/range.c b/kernel/range.c
index 471b66acabb5..37fa9b99ad58 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2)
int clean_sort_range(struct range *range, int az)
{
- int i, j, k = az - 1, nr_range = 0;
+ int i, j, k = az - 1, nr_range = az;
for (i = 0; i < k; i++) {
if (range[i].end)
diff --git a/kernel/relay.c b/kernel/relay.c
index c7cf397fb929..859ea5a9605f 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -70,17 +70,10 @@ static const struct vm_operations_struct relay_file_mmap_ops = {
*/
static struct page **relay_alloc_page_array(unsigned int n_pages)
{
- struct page **array;
- size_t pa_size = n_pages * sizeof(struct page *);
-
- if (pa_size > PAGE_SIZE) {
- array = vmalloc(pa_size);
- if (array)
- memset(array, 0, pa_size);
- } else {
- array = kzalloc(pa_size, GFP_KERNEL);
- }
- return array;
+ const size_t pa_size = n_pages * sizeof(struct page *);
+ if (pa_size > PAGE_SIZE)
+ return vzalloc(pa_size);
+ return kzalloc(pa_size, GFP_KERNEL);
}
/*
diff --git a/kernel/sched.c b/kernel/sched.c
index d42992bccdfa..dc91a4d09ac3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -560,18 +560,8 @@ struct rq {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-static inline
-void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
-{
- rq->curr->sched_class->check_preempt_curr(rq, p, flags);
- /*
- * A queue event has occurred, and we're going to schedule. In
- * this case, we can save a useless back to back clock update.
- */
- if (test_tsk_need_resched(p))
- rq->skip_clock_update = 1;
-}
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
static inline int cpu_of(struct rq *rq)
{
@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
p->sched_class->prio_changed(rq, p, oldprio, running);
}
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+{
+ const struct sched_class *class;
+
+ if (p->sched_class == rq->curr->sched_class) {
+ rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+ } else {
+ for_each_class(class) {
+ if (class == rq->curr->sched_class)
+ break;
+ if (class == p->sched_class) {
+ resched_task(rq->curr);
+ break;
+ }
+ }
+ }
+
+ /*
+ * A queue event has occurred, and we're going to schedule. In
+ * this case, we can save a useless back to back clock update.
+ */
+ if (test_tsk_need_resched(rq->curr))
+ rq->skip_clock_update = 1;
+}
+
#ifdef CONFIG_SMP
/*
* Is this task likely cache-hot:
@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
if (cpu != group_first_cpu(sd->groups))
return;
+ sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
+
child = sd->child;
sd->groups->cpu_power = 0;
@@ -8510,12 +8527,12 @@ void sched_move_task(struct task_struct *tsk)
if (unlikely(running))
tsk->sched_class->put_prev_task(rq, tsk);
- set_task_rq(tsk, task_cpu(tsk));
-
#ifdef CONFIG_FAIR_GROUP_SCHED
- if (tsk->sched_class->moved_group)
- tsk->sched_class->moved_group(tsk, on_rq);
+ if (tsk->sched_class->task_move_group)
+ tsk->sched_class->task_move_group(tsk, on_rq);
+ else
#endif
+ set_task_rq(tsk, task_cpu(tsk));
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 933f3d1b62ea..00ebd7686676 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
int scale = cfs_rq->nr_running >= sched_nr_latency;
- if (unlikely(rt_prio(p->prio)))
- goto preempt;
-
- if (unlikely(p->sched_class != &fair_sched_class))
- return;
-
if (unlikely(se == pse))
return;
@@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
set_task_cpu(p, this_cpu);
activate_task(this_rq, p, 0);
check_preempt_curr(this_rq, p, 0);
-
- /* re-arm NEWIDLE balancing when moving tasks */
- src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
- this_rq->idle_stamp = 0;
}
/*
@@ -2035,13 +2025,16 @@ struct sd_lb_stats {
unsigned long this_load_per_task;
unsigned long this_nr_running;
unsigned long this_has_capacity;
+ unsigned int this_idle_cpus;
/* Statistics of the busiest group */
+ unsigned int busiest_idle_cpus;
unsigned long max_load;
unsigned long busiest_load_per_task;
unsigned long busiest_nr_running;
unsigned long busiest_group_capacity;
unsigned long busiest_has_capacity;
+ unsigned int busiest_group_weight;
int group_imb; /* Is there imbalance in this sd */
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2063,6 +2056,8 @@ struct sg_lb_stats {
unsigned long sum_nr_running; /* Nr tasks running in the group */
unsigned long sum_weighted_load; /* Weighted load of group's tasks */
unsigned long group_capacity;
+ unsigned long idle_cpus;
+ unsigned long group_weight;
int group_imb; /* Is there an imbalance in the group ? */
int group_has_capacity; /* Is there extra capacity in the group? */
};
@@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_load += load;
sgs->sum_nr_running += rq->nr_running;
sgs->sum_weighted_load += weighted_cpuload(i);
-
+ if (idle_cpu(i))
+ sgs->idle_cpus++;
}
/*
@@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
if (!sgs->group_capacity)
sgs->group_capacity = fix_small_capacity(sd, group);
+ sgs->group_weight = group->group_weight;
if (sgs->group_capacity > sgs->sum_nr_running)
sgs->group_has_capacity = 1;
@@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load;
sds->this_has_capacity = sgs.group_has_capacity;
+ sds->this_idle_cpus = sgs.idle_cpus;
} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
sds->max_load = sgs.avg_load;
sds->busiest = sg;
sds->busiest_nr_running = sgs.sum_nr_running;
+ sds->busiest_idle_cpus = sgs.idle_cpus;
sds->busiest_group_capacity = sgs.group_capacity;
sds->busiest_load_per_task = sgs.sum_weighted_load;
sds->busiest_has_capacity = sgs.group_has_capacity;
+ sds->busiest_group_weight = sgs.group_weight;
sds->group_imb = sgs.group_imb;
}
@@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (sds.this_load >= sds.avg_load)
goto out_balanced;
- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
- goto out_balanced;
+ /*
+ * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
+ * And to check for busy balance use !idle_cpu instead of
+ * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
+ * even when they are idle.
+ */
+ if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
+ if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+ goto out_balanced;
+ } else {
+ /*
+ * This cpu is idle. If the busiest group load doesn't
+ * have more tasks than the number of available cpu's and
+ * there is no imbalance between this and busiest group
+ * wrt to idle cpu's, it is balanced.
+ */
+ if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
+ sds.busiest_nr_running <= sds.busiest_group_weight)
+ goto out_balanced;
+ }
force_balance:
/* Looks like there is an imbalance. Compute it */
@@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval))
next_balance = sd->last_balance + interval;
- if (pulled_task)
+ if (pulled_task) {
+ this_rq->idle_stamp = 0;
break;
+ }
}
raw_spin_lock(&this_rq->lock);
@@ -3869,13 +3889,26 @@ static void set_curr_task_fair(struct rq *rq)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p, int on_rq)
+static void task_move_group_fair(struct task_struct *p, int on_rq)
{
- struct cfs_rq *cfs_rq = task_cfs_rq(p);
-
- update_curr(cfs_rq);
+ /*
+ * If the task was not on the rq at the time of this cgroup movement
+ * it must have been asleep, sleeping tasks keep their ->vruntime
+ * absolute on their old rq until wakeup (needed for the fair sleeper
+ * bonus in place_entity()).
+ *
+ * If it was on the rq, we've just 'preempted' it, which does convert
+ * ->vruntime to a relative base.
+ *
+ * Make sure both cases convert their relative position when migrating
+ * to another cgroup's rq. This does somewhat interfere with the
+ * fair sleeper stuff for the first placement, but who cares.
+ */
+ if (!on_rq)
+ p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
+ set_task_rq(p, task_cpu(p));
if (!on_rq)
- place_entity(cfs_rq, &p->se, 1);
+ p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
}
#endif
@@ -3927,7 +3960,7 @@ static const struct sched_class fair_sched_class = {
.get_rr_interval = get_rr_interval_fair,
#ifdef CONFIG_FAIR_GROUP_SCHED
- .moved_group = moved_group_fair,
+ .task_move_group = task_move_group_fair,
#endif
};
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 25c2f962f6fc..48ddf431db0e 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -157,15 +157,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
}
/*
- * Called when a process is dequeued from the active array and given
- * the cpu. We should note that with the exception of interactive
- * tasks, the expired queue will become the active queue after the active
- * queue is empty, without explicitly dequeuing and requeuing tasks in the
- * expired queue. (Interactive tasks may be requeued directly to the
- * active queue, thus delaying tasks in the expired queue from running;
- * see scheduler_tick()).
- *
- * Though we are interested in knowing how long it was from the *first* time a
+ * We are interested in knowing how long it was from the *first* time a
* task was queued to the time that it finally hit a cpu, we call this routine
* from dequeue_task() to account for possible rq->clock skew across cpus. The
* delta taken on each cpu would annul the skew.
@@ -203,16 +195,6 @@ static void sched_info_arrive(struct task_struct *t)
}
/*
- * Called when a process is queued into either the active or expired
- * array. The time is noted and later used to determine how long we
- * had to wait for us to reach the cpu. Since the expired queue will
- * become the active queue after active queue is empty, without dequeuing
- * and requeuing any tasks, we are interested in queuing to either. It
- * is unusual but not impossible for tasks to be dequeued and immediately
- * requeued in the same or another array: this can happen in sched_yield(),
- * set_user_nice(), and even load_balance() as it moves tasks from runqueue
- * to runqueue.
- *
* This function is only called from enqueue_task(), but also only updates
* the timestamp if it is already not set. It's assumed that
* sched_info_dequeued() will clear that stamp when appropriate.
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 45bddc0c1048..2bf6b47058c1 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,
static void
check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
{
- resched_task(rq->curr); /* we preempt everything */
+ /* we're never preempted */
}
static struct task_struct *pick_next_task_stop(struct rq *rq)
{
struct task_struct *stop = rq->stop;
- if (stop && stop->state == TASK_RUNNING)
+ if (stop && stop->se.on_rq)
return stop;
return NULL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c33a1edb799f..5abfa1518554 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -702,6 +702,15 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &ten_thousand,
},
+ {
+ .procname = "dmesg_restrict",
+ .data = &dmesg_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#endif
{
.procname = "ngroups_max",
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e04b8bcdef88..ea37e2ff4164 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -126,7 +126,7 @@ if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
- select FRAME_POINTER if (!ARM_UNWIND)
+ select FRAME_POINTER if !ARM_UNWIND && !S390
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bc251ed66724..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
BLK_TC_ACT(BLK_TC_WRITE) };
-#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
#define BLK_TC_RAHEAD BLK_TC_AHEAD
/* The ilog2() calls fall out because they're constant */
@@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
what |= ddir_act[rw & WRITE];
- what |= MASK_TC_BIT(rw, HARDBARRIER);
what |= MASK_TC_BIT(rw, SYNC);
what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
@@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
- if (rw & REQ_HARDBARRIER)
- rwbs[i++] = 'B';
if (rw & REQ_SYNC)
rwbs[i++] = 'S';
if (rw & REQ_META)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 82d9b8106cd0..c380612273bf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
@@ -1284,6 +1283,8 @@ void trace_dump_stack(void)
__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
}
+static DEFINE_PER_CPU(int, user_stack_count);
+
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
@@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
if (unlikely(in_nmi()))
return;
+ /*
+ * prevent recursion, since the user stack tracing may
+ * trigger other kernel events.
+ */
+ preempt_disable();
+ if (__this_cpu_read(user_stack_count))
+ goto out;
+
+ __this_cpu_inc(user_stack_count);
+
+
+
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
@@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
save_stack_trace_user(&trace);
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
+
+ __this_cpu_dec(user_stack_count);
+
+ out:
+ preempt_enable();
}
#ifdef UNUSED
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index bafba687a6d8..6e3c41a4024c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
-static int __initdata no_watchdog;
+static int no_watchdog;
/* boot commands */