summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.freezer1
-rw-r--r--kernel/Kconfig.hz1
-rw-r--r--kernel/Kconfig.locks1
-rw-r--r--kernel/Kconfig.preempt1
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/audit.c42
-rw-r--r--kernel/audit.h23
-rw-r--r--kernel/audit_fsnotify.c11
-rw-r--r--kernel/audit_watch.c15
-rw-r--r--kernel/auditfilter.c77
-rw-r--r--kernel/auditsc.c42
-rw-r--r--kernel/bpf/arraymap.c18
-rw-r--r--kernel/bpf/bpf_lru_list.c5
-rw-r--r--kernel/bpf/bpf_lru_list.h5
-rw-r--r--kernel/bpf/cgroup.c94
-rw-r--r--kernel/bpf/core.c52
-rw-r--r--kernel/bpf/cpumap.c9
-rw-r--r--kernel/bpf/devmap.c19
-rw-r--r--kernel/bpf/hashtab.c37
-rw-r--r--kernel/bpf/inode.c2
-rw-r--r--kernel/bpf/local_storage.c13
-rw-r--r--kernel/bpf/lpm_trie.c8
-rw-r--r--kernel/bpf/map_in_map.c5
-rw-r--r--kernel/bpf/map_in_map.h5
-rw-r--r--kernel/bpf/percpu_freelist.c5
-rw-r--r--kernel/bpf/percpu_freelist.h5
-rw-r--r--kernel/bpf/queue_stack_maps.c13
-rw-r--r--kernel/bpf/reuseport_array.c17
-rw-r--r--kernel/bpf/stackmap.c33
-rw-r--r--kernel/bpf/syscall.c108
-rw-r--r--kernel/bpf/tnum.c1
-rw-r--r--kernel/bpf/verifier.c397
-rw-r--r--kernel/bpf/xskmap.c12
-rw-r--r--kernel/cgroup/cgroup-v1.c1
-rw-r--r--kernel/cgroup/cgroup.c172
-rw-r--r--kernel/cgroup/rstat.c1
-rw-r--r--kernel/context_tracking.c1
-rw-r--r--kernel/cpu.c4
-rw-r--r--kernel/crash_dump.c1
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/debug/Makefile1
-rw-r--r--kernel/delayacct.c11
-rw-r--r--kernel/dma/Kconfig1
-rw-r--r--kernel/dma/contiguous.c56
-rw-r--r--kernel/dma/direct.c41
-rw-r--r--kernel/dma/swiotlb.c1
-rw-r--r--kernel/events/internal.h4
-rw-r--r--kernel/events/ring_buffer.c64
-rw-r--r--kernel/events/uprobes.c4
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/extable.c14
-rw-r--r--kernel/fail_function.c23
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/freezer.c1
-rw-r--r--kernel/futex.c15
-rw-r--r--kernel/gcov/Kconfig1
-rw-r--r--kernel/gcov/fs.c24
-rwxr-xr-xkernel/gen_kheaders.sh (renamed from kernel/gen_ikh_data.sh)17
-rw-r--r--kernel/hung_task.c1
-rw-r--r--kernel/irq/Kconfig1
-rw-r--r--kernel/irq_work.c1
-rw-r--r--kernel/jump_label.c1
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/kheaders.c40
-rw-r--r--kernel/kprobes.c43
-rw-r--r--kernel/ksysfs.c4
-rw-r--r--kernel/kthread.c1
-rw-r--r--kernel/livepatch/Kconfig1
-rw-r--r--kernel/livepatch/Makefile1
-rw-r--r--kernel/livepatch/core.c14
-rw-r--r--kernel/livepatch/patch.c14
-rw-r--r--kernel/livepatch/shadow.c14
-rw-r--r--kernel/livepatch/transition.c14
-rw-r--r--kernel/locking/lock_events.h42
-rw-r--r--kernel/locking/lockdep.c8
-rw-r--r--kernel/locking/mutex.c1
-rw-r--r--kernel/locking/percpu-rwsem.c1
-rw-r--r--kernel/locking/qrwlock.c11
-rw-r--r--kernel/locking/qspinlock.c11
-rw-r--r--kernel/locking/qspinlock_stat.h10
-rw-r--r--kernel/locking/rtmutex.c1
-rw-r--r--kernel/locking/test-ww_mutex.c15
-rw-r--r--kernel/module-internal.h6
-rw-r--r--kernel/module.c14
-rw-r--r--kernel/module_signing.c6
-rw-r--r--kernel/notifier.c1
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/params.c14
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/pid_namespace.c3
-rw-r--r--kernel/power/Kconfig1
-rw-r--r--kernel/power/hibernate.c13
-rw-r--r--kernel/power/qos.c1
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/printk/Makefile1
-rw-r--r--kernel/printk/internal.h14
-rw-r--r--kernel/printk/printk.c1
-rw-r--r--kernel/printk/printk_safe.c14
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/rcu/Kconfig1
-rw-r--r--kernel/rcu/Kconfig.debug1
-rw-r--r--kernel/reboot.c1
-rw-r--r--kernel/resource.c1
-rw-r--r--kernel/rseq.c4
-rw-r--r--kernel/sched/clock.c1
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/cputime.c1
-rw-r--r--kernel/sched/idle.c1
-rw-r--r--kernel/sched/isolation.c1
-rw-r--r--kernel/sched/membarrier.c11
-rw-r--r--kernel/sched/wait.c1
-rw-r--r--kernel/sched/wait_bit.c1
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/signal.c254
-rw-r--r--kernel/smp.c1
-rw-r--r--kernel/smpboot.c1
-rw-r--r--kernel/stacktrace.c3
-rw-r--r--kernel/stop_machine.c3
-rw-r--r--kernel/sys.c62
-rw-r--r--kernel/sysctl.c1
-rw-r--r--kernel/taskstats.c12
-rw-r--r--kernel/test_kprobes.c11
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/trace/Kconfig13
-rw-r--r--kernel/trace/blktrace.c6
-rw-r--r--kernel/trace/bpf_trace.c101
-rw-r--r--kernel/trace/ftrace.c48
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--kernel/trace/trace.h19
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events_filter.c8
-rw-r--r--kernel/trace/trace_events_hist.c13
-rw-r--r--kernel/trace/trace_kdb.c6
-rw-r--r--kernel/trace/trace_kprobe.c112
-rw-r--r--kernel/trace/trace_probe.c37
-rw-r--r--kernel/trace/trace_probe.h3
-rw-r--r--kernel/trace/trace_probe_tmpl.h36
-rw-r--r--kernel/trace/trace_uprobe.c19
-rw-r--r--kernel/tracepoint.c15
-rw-r--r--kernel/tsacct.c13
-rw-r--r--kernel/umh.c1
-rw-r--r--kernel/up.c1
-rw-r--r--kernel/user-return-notifier.c1
-rw-r--r--kernel/user.c1
-rw-r--r--kernel/workqueue.c1
146 files changed, 1735 insertions, 1018 deletions
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
index a3bb4cb52539..68646feefb3d 100644
--- a/kernel/Kconfig.freezer
+++ b/kernel/Kconfig.freezer
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
config FREEZER
def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..38ef6d06888e 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Timer Interrupt Frequency Configuration
#
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index bf770d7556f7..e0852dc333ac 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# The ARCH_INLINE foo is necessary because select ignores "depends on"
#
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 0fee5fe6c899..dc0b682ec2d9 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
choice
prompt "Preemption Model"
diff --git a/kernel/Makefile b/kernel/Makefile
index 33824f0385b3..a8d923b5481b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -71,7 +71,7 @@ obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
obj-$(CONFIG_IKCONFIG) += configs.o
-obj-$(CONFIG_IKHEADERS_PROC) += kheaders.o
+obj-$(CONFIG_IKHEADERS) += kheaders.o
obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
@@ -127,7 +127,7 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz
quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz
-cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_ikh_data.sh $@
+cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_kheaders.sh $@
$(obj)/kheaders_data.tar.xz: FORCE
$(call cmd,genikh)
diff --git a/kernel/audit.c b/kernel/audit.c
index b96bf69183f4..da8dc0db5bd3 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* audit.c -- Auditing support
* Gateway between the kernel (e.g., selinux) and the user-space audit daemon.
* System-call specific features have moved to auditsc.c
@@ -5,20 +6,6 @@
* Copyright 2003-2007 Red Hat Inc., Durham, North Carolina.
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Written by Rickard E. (Rik) Faith <faith@redhat.com>
*
* Goals: 1) Integrate fully with Security Modules.
@@ -2274,6 +2261,33 @@ out:
}
/**
+ * audit_signal_info - record signal info for shutting down audit subsystem
+ * @sig: signal value
+ * @t: task being signaled
+ *
+ * If the audit subsystem is being terminated, record the task (pid)
+ * and uid that is doing that.
+ */
+int audit_signal_info(int sig, struct task_struct *t)
+{
+ kuid_t uid = current_uid(), auid;
+
+ if (auditd_test_task(t) &&
+ (sig == SIGTERM || sig == SIGHUP ||
+ sig == SIGUSR1 || sig == SIGUSR2)) {
+ audit_sig_pid = task_tgid_nr(current);
+ auid = audit_get_loginuid(current);
+ if (uid_valid(auid))
+ audit_sig_uid = auid;
+ else
+ audit_sig_uid = uid;
+ security_task_getsecid(current, &audit_sig_sid);
+ }
+
+ return audit_signal_info_syscall(t);
+}
+
+/**
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
diff --git a/kernel/audit.h b/kernel/audit.h
index 2071725a999f..6fb7160412d4 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* audit -- definition of audit_context structure and supporting types
*
* Copyright 2003-2004 Red Hat, Inc.
* Copyright 2005 Hewlett-Packard Development Company, L.P.
* Copyright 2005 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
@@ -299,7 +286,7 @@ extern const char *audit_tree_path(struct audit_tree *tree);
extern void audit_put_tree(struct audit_tree *tree);
extern void audit_kill_trees(struct audit_context *context);
-extern int audit_signal_info(int sig, struct task_struct *t);
+extern int audit_signal_info_syscall(struct task_struct *t);
extern void audit_filter_inodes(struct task_struct *tsk,
struct audit_context *ctx);
extern struct list_head *audit_killed_trees(void);
@@ -330,7 +317,11 @@ extern struct list_head *audit_killed_trees(void);
#define audit_tree_path(rule) "" /* never called */
#define audit_kill_trees(context) BUG()
-#define audit_signal_info(s, t) AUDIT_DISABLED
+static inline int audit_signal_info_syscall(struct task_struct *t)
+{
+ return 0;
+}
+
#define audit_filter_inodes(t, c) AUDIT_DISABLED
#endif /* CONFIG_AUDITSYSCALL */
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index b5737b826951..f0d243318452 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* audit_fsnotify.c -- tracking inodes
*
* Copyright 2003-2009,2014-2015 Red Hat, Inc.
* Copyright 2005 Hewlett-Packard Development Company, L.P.
* Copyright 2005 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/kernel.h>
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index b50c574223fa..1f31c2f1e6fc 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* audit_watch.c -- watching inodes
*
* Copyright 2003-2009 Red Hat, Inc.
* Copyright 2005 Hewlett-Packard Development Company, L.P.
* Copyright 2005 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/file.h>
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 303fb04770ce..b0126e9c0743 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* auditfilter.c -- filtering of audit events
*
* Copyright 2003-2004 Red Hat, Inc.
* Copyright 2005 Hewlett-Packard Development Company, L.P.
* Copyright 2005 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -335,7 +322,7 @@ static u32 audit_to_op(u32 op)
/* check if an audit field is valid */
static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
{
- switch(f->type) {
+ switch (f->type) {
case AUDIT_MSGTYPE:
if (entry->rule.listnr != AUDIT_FILTER_EXCLUDE &&
entry->rule.listnr != AUDIT_FILTER_USER)
@@ -347,7 +334,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
break;
}
- switch(entry->rule.listnr) {
+ switch (entry->rule.listnr) {
case AUDIT_FILTER_FS:
switch(f->type) {
case AUDIT_FSTYPE:
@@ -358,9 +345,16 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
}
}
- switch(f->type) {
- default:
- return -EINVAL;
+ /* Check for valid field type and op */
+ switch (f->type) {
+ case AUDIT_ARG0:
+ case AUDIT_ARG1:
+ case AUDIT_ARG2:
+ case AUDIT_ARG3:
+ case AUDIT_PERS: /* <uapi/linux/personality.h> */
+ case AUDIT_DEVMINOR:
+ /* all ops are valid */
+ break;
case AUDIT_UID:
case AUDIT_EUID:
case AUDIT_SUID:
@@ -373,46 +367,53 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
case AUDIT_FSGID:
case AUDIT_OBJ_GID:
case AUDIT_PID:
- case AUDIT_PERS:
case AUDIT_MSGTYPE:
case AUDIT_PPID:
case AUDIT_DEVMAJOR:
- case AUDIT_DEVMINOR:
case AUDIT_EXIT:
case AUDIT_SUCCESS:
case AUDIT_INODE:
case AUDIT_SESSIONID:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
+ case AUDIT_SADDR_FAM:
/* bit ops are only useful on syscall args */
if (f->op == Audit_bitmask || f->op == Audit_bittest)
return -EINVAL;
break;
- case AUDIT_ARG0:
- case AUDIT_ARG1:
- case AUDIT_ARG2:
- case AUDIT_ARG3:
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
case AUDIT_SUBJ_TYPE:
- case AUDIT_SUBJ_SEN:
- case AUDIT_SUBJ_CLR:
case AUDIT_OBJ_USER:
case AUDIT_OBJ_ROLE:
case AUDIT_OBJ_TYPE:
- case AUDIT_OBJ_LEV_LOW:
- case AUDIT_OBJ_LEV_HIGH:
case AUDIT_WATCH:
case AUDIT_DIR:
case AUDIT_FILTERKEY:
- break;
case AUDIT_LOGINUID_SET:
- if ((f->val != 0) && (f->val != 1))
- return -EINVAL;
- /* FALL THROUGH */
case AUDIT_ARCH:
case AUDIT_FSTYPE:
+ case AUDIT_PERM:
+ case AUDIT_FILETYPE:
+ case AUDIT_FIELD_COMPARE:
+ case AUDIT_EXE:
+ /* only equal and not equal valid ops */
if (f->op != Audit_not_equal && f->op != Audit_equal)
return -EINVAL;
break;
+ default:
+ /* field not recognized */
+ return -EINVAL;
+ }
+
+ /* Check for select valid field values */
+ switch (f->type) {
+ case AUDIT_LOGINUID_SET:
+ if ((f->val != 0) && (f->val != 1))
+ return -EINVAL;
+ break;
case AUDIT_PERM:
if (f->val & ~15)
return -EINVAL;
@@ -425,11 +426,14 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
if (f->val > AUDIT_MAX_FIELD_COMPARE)
return -EINVAL;
break;
- case AUDIT_EXE:
- if (f->op != Audit_not_equal && f->op != Audit_equal)
+ case AUDIT_SADDR_FAM:
+ if (f->val >= AF_MAX)
return -EINVAL;
break;
+ default:
+ break;
}
+
return 0;
}
@@ -1203,7 +1207,6 @@ int audit_comparator(u32 left, u32 op, u32 right)
case Audit_bittest:
return ((left & right) == right);
default:
- BUG();
return 0;
}
}
@@ -1226,7 +1229,6 @@ int audit_uid_comparator(kuid_t left, u32 op, kuid_t right)
case Audit_bitmask:
case Audit_bittest:
default:
- BUG();
return 0;
}
}
@@ -1249,7 +1251,6 @@ int audit_gid_comparator(kgid_t left, u32 op, kgid_t right)
case Audit_bitmask:
case Audit_bittest:
default:
- BUG();
return 0;
}
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 95ae27edd417..4effe01ebbe2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -601,12 +601,20 @@ static int audit_filter_rules(struct task_struct *tsk,
}
break;
case AUDIT_WATCH:
- if (name)
- result = audit_watch_compare(rule->watch, name->ino, name->dev);
+ if (name) {
+ result = audit_watch_compare(rule->watch,
+ name->ino,
+ name->dev);
+ if (f->op == Audit_not_equal)
+ result = !result;
+ }
break;
case AUDIT_DIR:
- if (ctx)
+ if (ctx) {
result = match_tree_refs(ctx, rule->tree);
+ if (f->op == Audit_not_equal)
+ result = !result;
+ }
break;
case AUDIT_LOGINUID:
result = audit_uid_comparator(audit_get_loginuid(tsk),
@@ -615,6 +623,11 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_LOGINUID_SET:
result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
break;
+ case AUDIT_SADDR_FAM:
+ if (ctx->sockaddr)
+ result = audit_comparator(ctx->sockaddr->ss_family,
+ f->op, f->val);
+ break;
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
case AUDIT_SUBJ_TYPE:
@@ -684,9 +697,13 @@ static int audit_filter_rules(struct task_struct *tsk,
break;
case AUDIT_PERM:
result = audit_match_perm(ctx, f->val);
+ if (f->op == Audit_not_equal)
+ result = !result;
break;
case AUDIT_FILETYPE:
result = audit_match_filetype(ctx, f->val);
+ if (f->op == Audit_not_equal)
+ result = !result;
break;
case AUDIT_FIELD_COMPARE:
result = audit_field_compare(tsk, cred, f, ctx, name);
@@ -2360,30 +2377,17 @@ void __audit_ptrace(struct task_struct *t)
}
/**
- * audit_signal_info - record signal info for shutting down audit subsystem
- * @sig: signal value
+ * audit_signal_info_syscall - record signal info for syscalls
* @t: task being signaled
*
* If the audit subsystem is being terminated, record the task (pid)
* and uid that is doing that.
*/
-int audit_signal_info(int sig, struct task_struct *t)
+int audit_signal_info_syscall(struct task_struct *t)
{
struct audit_aux_data_pids *axp;
struct audit_context *ctx = audit_context();
- kuid_t uid = current_uid(), auid, t_uid = task_uid(t);
-
- if (auditd_test_task(t) &&
- (sig == SIGTERM || sig == SIGHUP ||
- sig == SIGUSR1 || sig == SIGUSR2)) {
- audit_sig_pid = task_tgid_nr(current);
- auid = audit_get_loginuid(current);
- if (uid_valid(auid))
- audit_sig_uid = auid;
- else
- audit_sig_uid = uid;
- security_task_getsecid(current, &audit_sig_sid);
- }
+ kuid_t t_uid = task_uid(t);
if (!audit_signals || audit_dummy_context())
return 0;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 584636c9e2eb..0349cbf23cdb 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
u64 cost, array_size, mask64;
+ struct bpf_map_memory mem;
struct bpf_array *array;
elem_size = round_up(attr->value_size, 8);
@@ -116,32 +117,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
- if (cost >= U32_MAX - PAGE_SIZE)
- return ERR_PTR(-ENOMEM);
- if (percpu) {
+ if (percpu)
cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
- if (cost >= U32_MAX - PAGE_SIZE)
- return ERR_PTR(-ENOMEM);
- }
- cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
- ret = bpf_map_precharge_memlock(cost);
+ ret = bpf_map_charge_init(&mem, cost);
if (ret < 0)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
- if (!array)
+ if (!array) {
+ bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
+ }
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
- array->map.pages = cost;
+ bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) {
+ bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index e6ef4401a138..1b6b9349cb85 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <linux/cpumask.h>
#include <linux/spinlock.h>
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index 7d4f89b7cb84..f02504640e18 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -1,8 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#ifndef __BPF_LRU_LIST_H_
#define __BPF_LRU_LIST_H_
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index fcde0f7b2585..1b65ab0df457 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,13 +22,23 @@
DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+void cgroup_bpf_offline(struct cgroup *cgrp)
+{
+ cgroup_get(cgrp);
+ percpu_ref_kill(&cgrp->bpf.refcnt);
+}
+
/**
- * cgroup_bpf_put() - put references of all bpf programs
- * @cgrp: the cgroup to modify
+ * cgroup_bpf_release() - put references of all bpf programs and
+ * release all cgroup bpf data
+ * @work: work structure embedded into the cgroup to modify
*/
-void cgroup_bpf_put(struct cgroup *cgrp)
+static void cgroup_bpf_release(struct work_struct *work)
{
+ struct cgroup *cgrp = container_of(work, struct cgroup,
+ bpf.release_work);
enum bpf_cgroup_storage_type stype;
+ struct bpf_prog_array *old_array;
unsigned int type;
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp)
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
- bpf_prog_array_free(cgrp->bpf.effective[type]);
+ old_array = rcu_dereference_protected(
+ cgrp->bpf.effective[type],
+ percpu_ref_is_dying(&cgrp->bpf.refcnt));
+ bpf_prog_array_free(old_array);
}
+
+ percpu_ref_exit(&cgrp->bpf.refcnt);
+ cgroup_put(cgrp);
+}
+
+/**
+ * cgroup_bpf_release_fn() - callback used to schedule releasing
+ * of bpf cgroup data
+ * @ref: percpu ref counter structure
+ */
+static void cgroup_bpf_release_fn(struct percpu_ref *ref)
+{
+ struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
+
+ INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
+ queue_work(system_wq, &cgrp->bpf.release_work);
}
/* count number of elements in the list.
@@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
*/
static int compute_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
- struct bpf_prog_array __rcu **array)
+ struct bpf_prog_array **array)
{
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *progs;
@@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
}
} while ((p = cgroup_parent(p)));
- rcu_assign_pointer(*array, progs);
+ *array = progs;
return 0;
}
static void activate_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
- struct bpf_prog_array __rcu *array)
+ struct bpf_prog_array *old_array)
{
- struct bpf_prog_array __rcu *old_array;
-
- old_array = xchg(&cgrp->bpf.effective[type], array);
+ rcu_swap_protected(cgrp->bpf.effective[type], old_array,
+ lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
*/
@@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
* that array below is variable length
*/
#define NR ARRAY_SIZE(cgrp->bpf.effective)
- struct bpf_prog_array __rcu *arrays[NR] = {};
- int i;
+ struct bpf_prog_array *arrays[NR] = {};
+ int ret, i;
+
+ ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
+ GFP_KERNEL);
+ if (ret)
+ return ret;
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
cleanup:
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
+
+ percpu_ref_exit(&cgrp->bpf.refcnt);
+
return -ENOMEM;
}
@@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
enum bpf_attach_type type = attr->query.attach_type;
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
+ struct bpf_prog_array *effective;
int cnt, ret = 0, i;
+ effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex));
+
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
- cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
+ cnt = bpf_prog_array_length(effective);
else
cnt = prog_list_length(progs);
@@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
}
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
- return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
- prog_ids, cnt);
+ return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
} else {
struct bpf_prog_list *pl;
u32 id;
@@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* The program type passed in via @type must be suitable for network
* filtering. No further check is performed to assert that.
*
- * This function will return %-EPERM if any if an attached program was found
- * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ * For egress packets, this function can return:
+ * NET_XMIT_SUCCESS (0) - continue with packet output
+ * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
+ * NET_XMIT_CN (2) - continue with packet output and notify TCP
+ * to call cwr
+ * -EPERM - drop packet
+ *
+ * For ingress packets, this function will return -EPERM if any
+ * attached program was found and if it returned != 1 during execution.
+ * Otherwise 0 is returned.
*/
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
@@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
/* compute pointers for the bpf prog */
bpf_compute_and_save_data_end(skb, &saved_data_end);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
- __bpf_prog_run_save_cb);
+ if (type == BPF_CGROUP_INET_EGRESS) {
+ ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
+ cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
+ } else {
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
+ __bpf_prog_run_save_cb);
+ ret = (ret == 1 ? 0 : -EPERM);
+ }
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
- return ret == 1 ? 0 : -EPERM;
+
+ return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 242a643af82f..6e880c082904 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux Socket Filter - Kernel level socket filtering
*
@@ -12,11 +13,6 @@
* Alexei Starovoitov <ast@plumgrid.com>
* Daniel Borkmann <dborkman@redhat.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Andi Kleen - Fix a few bad bugs and races.
* Kris Katterjohn - Added many additional checks in bpf_check_classic()
*/
@@ -1795,38 +1791,33 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
return &empty_prog_array.hdr;
}
-void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
+void bpf_prog_array_free(struct bpf_prog_array *progs)
{
- if (!progs ||
- progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
+ if (!progs || progs == &empty_prog_array.hdr)
return;
kfree_rcu(progs, rcu);
}
-int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
+int bpf_prog_array_length(struct bpf_prog_array *array)
{
struct bpf_prog_array_item *item;
u32 cnt = 0;
- rcu_read_lock();
- item = rcu_dereference(array)->items;
- for (; item->prog; item++)
+ for (item = array->items; item->prog; item++)
if (item->prog != &dummy_bpf_prog.prog)
cnt++;
- rcu_read_unlock();
return cnt;
}
-static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
+static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
u32 *prog_ids,
u32 request_cnt)
{
struct bpf_prog_array_item *item;
int i = 0;
- item = rcu_dereference_check(array, 1)->items;
- for (; item->prog; item++) {
+ for (item = array->items; item->prog; item++) {
if (item->prog == &dummy_bpf_prog.prog)
continue;
prog_ids[i] = item->prog->aux->id;
@@ -1839,7 +1830,7 @@ static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
return !!(item->prog);
}
-int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
+int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
__u32 __user *prog_ids, u32 cnt)
{
unsigned long err = 0;
@@ -1850,18 +1841,12 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
* cnt = bpf_prog_array_length();
* if (cnt > 0)
* bpf_prog_array_copy_to_user(..., cnt);
- * so below kcalloc doesn't need extra cnt > 0 check, but
- * bpf_prog_array_length() releases rcu lock and
- * prog array could have been swapped with empty or larger array,
- * so always copy 'cnt' prog_ids to the user.
- * In a rare race the user will see zero prog_ids
+ * so below kcalloc doesn't need extra cnt > 0 check.
*/
ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
if (!ids)
return -ENOMEM;
- rcu_read_lock();
nospc = bpf_prog_array_copy_core(array, ids, cnt);
- rcu_read_unlock();
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
kfree(ids);
if (err)
@@ -1871,19 +1856,19 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
return 0;
}
-void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
+void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
struct bpf_prog *old_prog)
{
- struct bpf_prog_array_item *item = array->items;
+ struct bpf_prog_array_item *item;
- for (; item->prog; item++)
+ for (item = array->items; item->prog; item++)
if (item->prog == old_prog) {
WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
break;
}
}
-int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
+int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array)
@@ -1947,7 +1932,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
return 0;
}
-int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
+int bpf_prog_array_copy_info(struct bpf_prog_array *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt)
{
@@ -2090,6 +2075,15 @@ bool __weak bpf_helper_changes_pkt_data(void *func)
return false;
}
+/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
+ * analysis code and wants explicit zero extension inserted by verifier.
+ * Otherwise, return FALSE.
+ */
+bool __weak bpf_jit_needs_zext(void)
+{
+ return false;
+}
+
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
*/
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index cf727d77c6c6..b31a71909307 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
- if (cost >= U32_MAX - PAGE_SIZE)
- goto free_cmap;
- cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */
- ret = bpf_map_precharge_memlock(cmap->map.pages);
+ ret = bpf_map_charge_init(&cmap->map.memory, cost);
if (ret) {
err = ret;
goto free_cmap;
@@ -121,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
__alignof__(unsigned long));
if (!cmap->flush_needed)
- goto free_cmap;
+ goto free_charge;
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
@@ -133,6 +130,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
return &cmap->map;
free_percpu:
free_percpu(cmap->flush_needed);
+free_charge:
+ bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
kfree(cmap);
return ERR_PTR(err);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 191b79948424..b58a33ca8a27 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -88,8 +88,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
struct bpf_dtab *dtab;
- int err = -EINVAL;
u64 cost;
+ int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -108,13 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
cost += dev_map_bitmap_size(attr) * num_possible_cpus();
- if (cost >= U32_MAX - PAGE_SIZE)
- goto free_dtab;
-
- dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
- /* if map size is larger than memlock limit, reject it early */
- err = bpf_map_precharge_memlock(dtab->map.pages);
+ /* if map size is larger than memlock limit, reject it */
+ err = bpf_map_charge_init(&dtab->map.memory, cost);
if (err)
goto free_dtab;
@@ -125,19 +121,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
__alignof__(unsigned long),
GFP_KERNEL | __GFP_NOWARN);
if (!dtab->flush_needed)
- goto free_dtab;
+ goto free_charge;
dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
- goto free_dtab;
+ goto free_charge;
spin_lock(&dev_map_lock);
list_add_tail_rcu(&dtab->list, &dev_map_list);
spin_unlock(&dev_map_lock);
return &dtab->map;
+free_charge:
+ bpf_map_charge_finish(&dtab->map.memory);
free_dtab:
free_percpu(dtab->flush_needed);
kfree(dtab);
@@ -164,6 +162,9 @@ static void dev_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map);
synchronize_rcu();
+ /* Make sure prior __dev_map_entry_free() have completed. */
+ rcu_barrier();
+
/* To ensure all pending flush operations have completed wait for flush
* bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
* Because the above synchronize_rcu() ensures the map is disconnected
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 192d32e77db3..d92e05d9979b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -360,14 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else
cost += (u64) htab->elem_size * num_possible_cpus();
- if (cost >= U32_MAX - PAGE_SIZE)
- /* make sure page count doesn't overflow */
- goto free_htab;
-
- htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-
- /* if map size is larger than memlock limit, reject it early */
- err = bpf_map_precharge_memlock(htab->map.pages);
+ /* if map size is larger than memlock limit, reject it */
+ err = bpf_map_charge_init(&htab->map.memory, cost);
if (err)
goto free_htab;
@@ -376,7 +370,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
- goto free_htab;
+ goto free_charge;
if (htab->map.map_flags & BPF_F_ZERO_SEED)
htab->hashrnd = 0;
@@ -409,6 +403,8 @@ free_prealloc:
prealloc_destroy(htab);
free_buckets:
bpf_map_area_free(htab->buckets);
+free_charge:
+ bpf_map_charge_finish(&htab->map.memory);
free_htab:
kfree(htab);
return ERR_PTR(err);
@@ -527,18 +523,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf;
}
-static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
+static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
+ void *key, const bool mark)
{
struct htab_elem *l = __htab_map_lookup_elem(map, key);
if (l) {
- bpf_lru_node_set_ref(&l->lru_node);
+ if (mark)
+ bpf_lru_node_set_ref(&l->lru_node);
return l->key + round_up(map->key_size, 8);
}
return NULL;
}
+static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return __htab_lru_map_lookup_elem(map, key, true);
+}
+
+static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
+{
+ return __htab_lru_map_lookup_elem(map, key, false);
+}
+
static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
@@ -1250,6 +1258,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_lru_map_lookup_elem,
+ .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
.map_update_elem = htab_lru_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup,
@@ -1281,7 +1290,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l;
void __percpu *pptr;
int ret = -ENOENT;
@@ -1297,8 +1305,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
l = __htab_map_lookup_elem(map, key);
if (!l)
goto out;
- if (htab_is_lru(htab))
- bpf_lru_node_set_ref(&l->lru_node);
+ /* We do not mark LRU map element here in order to not mess up
+ * eviction heuristics when user space does a map walk.
+ */
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off,
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index bc53e5b20ddc..84a80b02db99 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -518,7 +518,7 @@ out:
static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
{
struct bpf_prog *prog;
- int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
+ int ret = inode_permission(inode, MAY_READ);
if (ret)
return ERR_PTR(ret);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 980e8f1f6cb5..addd6fdceec8 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -272,6 +272,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
+ struct bpf_map_memory mem;
+ int ret;
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
return ERR_PTR(-EINVAL);
@@ -290,13 +292,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);
+ ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
+ if (ret < 0)
+ return ERR_PTR(ret);
+
map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
__GFP_ZERO | GFP_USER, numa_node);
- if (!map)
+ if (!map) {
+ bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
+ }
- map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
- PAGE_SIZE) >> PAGE_SHIFT;
+ bpf_map_charge_move(&map->map.memory, &mem);
/* copy mandatory map attributes */
bpf_map_init_from_attr(&map->map, attr);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index e61630c2e50b..09334f13a8a0 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -573,14 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
cost_per_node = sizeof(struct lpm_trie_node) +
attr->value_size + trie->data_size;
cost += (u64) attr->max_entries * cost_per_node;
- if (cost >= U32_MAX - PAGE_SIZE) {
- ret = -E2BIG;
- goto out_err;
- }
-
- trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
- ret = bpf_map_precharge_memlock(trie->map.pages);
+ ret = bpf_map_charge_init(&trie->map.memory, cost);
if (ret)
goto out_err;
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 3dff41403583..fab4fb134547 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <linux/slab.h>
#include <linux/bpf.h>
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index 6183db9ec08c..a507bf6ef8b9 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -1,8 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#ifndef __MAP_IN_MAP_H__
#define __MAP_IN_MAP_H__
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 0c1b4ba9e90e..6e090140b924 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include "percpu_freelist.h"
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
index c3960118e617..fbf8a8a28979 100644
--- a/kernel/bpf/percpu_freelist.h
+++ b/kernel/bpf/percpu_freelist.h
@@ -1,8 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#ifndef __PERCPU_FREELIST_H__
#define __PERCPU_FREELIST_H__
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 0b140d236889..f697647ceb54 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -67,29 +67,28 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
{
int ret, numa_node = bpf_map_attr_numa_node(attr);
+ struct bpf_map_memory mem = {0};
struct bpf_queue_stack *qs;
u64 size, queue_size, cost;
size = (u64) attr->max_entries + 1;
cost = queue_size = sizeof(*qs) + size * attr->value_size;
- if (cost >= U32_MAX - PAGE_SIZE)
- return ERR_PTR(-E2BIG);
- cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-
- ret = bpf_map_precharge_memlock(cost);
+ ret = bpf_map_charge_init(&mem, cost);
if (ret < 0)
return ERR_PTR(ret);
qs = bpf_map_area_alloc(queue_size, numa_node);
- if (!qs)
+ if (!qs) {
+ bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
+ }
memset(qs, 0, sizeof(*qs));
bpf_map_init_from_attr(&qs->map, attr);
- qs->map.pages = cost;
+ bpf_map_charge_move(&qs->map.memory, &mem);
qs->size = size;
raw_spin_lock_init(&qs->lock);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 18e225de80ff..50c083ba978c 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -151,7 +151,8 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
{
int err, numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array;
- u64 cost, array_size;
+ struct bpf_map_memory mem;
+ u64 array_size;
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
@@ -159,24 +160,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
array_size = sizeof(*array);
array_size += (u64)attr->max_entries * sizeof(struct sock *);
- /* make sure there is no u32 overflow later in round_up() */
- cost = array_size;
- if (cost >= U32_MAX - PAGE_SIZE)
- return ERR_PTR(-ENOMEM);
- cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-
- err = bpf_map_precharge_memlock(cost);
+ err = bpf_map_charge_init(&mem, array_size);
if (err)
return ERR_PTR(err);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
- if (!array)
+ if (!array) {
+ bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
+ }
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
- array->map.pages = cost;
+ bpf_map_charge_move(&array->map.memory, &mem);
return &array->map;
}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 950ab2f28922..052580c33d26 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/jhash.h>
@@ -89,6 +86,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
struct bpf_stack_map *smap;
+ struct bpf_map_memory mem;
u64 cost, n_buckets;
int err;
@@ -116,40 +114,37 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
n_buckets = roundup_pow_of_two(attr->max_entries);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
- if (cost >= U32_MAX - PAGE_SIZE)
- return ERR_PTR(-E2BIG);
+ cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
+ err = bpf_map_charge_init(&mem, cost);
+ if (err)
+ return ERR_PTR(err);
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
- if (!smap)
+ if (!smap) {
+ bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
-
- err = -E2BIG;
- cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
- if (cost >= U32_MAX - PAGE_SIZE)
- goto free_smap;
+ }
bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
smap->n_buckets = n_buckets;
- smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-
- err = bpf_map_precharge_memlock(smap->map.pages);
- if (err)
- goto free_smap;
err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
- goto free_smap;
+ goto free_charge;
err = prealloc_elems_and_freelist(smap);
if (err)
goto put_buffers;
+ bpf_map_charge_move(&smap->map.memory, &mem);
+
return &smap->map;
put_buffers:
put_callchain_buffers();
-free_smap:
+free_charge:
+ bpf_map_charge_finish(&mem);
bpf_map_area_free(smap);
return ERR_PTR(err);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ad3ccf82f31d..4c53cbd3329d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
map->numa_node = bpf_map_attr_numa_node(attr);
}
-int bpf_map_precharge_memlock(u32 pages)
-{
- struct user_struct *user = get_current_user();
- unsigned long memlock_limit, cur;
-
- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- cur = atomic_long_read(&user->locked_vm);
- free_uid(user);
- if (cur + pages > memlock_limit)
- return -EPERM;
- return 0;
-}
-
static int bpf_charge_memlock(struct user_struct *user, u32 pages)
{
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -214,45 +201,62 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages)
static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
{
- atomic_long_sub(pages, &user->locked_vm);
+ if (user)
+ atomic_long_sub(pages, &user->locked_vm);
}
-static int bpf_map_init_memlock(struct bpf_map *map)
+int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size)
{
- struct user_struct *user = get_current_user();
+ u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
+ struct user_struct *user;
int ret;
- ret = bpf_charge_memlock(user, map->pages);
+ if (size >= U32_MAX - PAGE_SIZE)
+ return -E2BIG;
+
+ user = get_current_user();
+ ret = bpf_charge_memlock(user, pages);
if (ret) {
free_uid(user);
return ret;
}
- map->user = user;
- return ret;
+
+ mem->pages = pages;
+ mem->user = user;
+
+ return 0;
}
-static void bpf_map_release_memlock(struct bpf_map *map)
+void bpf_map_charge_finish(struct bpf_map_memory *mem)
{
- struct user_struct *user = map->user;
- bpf_uncharge_memlock(user, map->pages);
- free_uid(user);
+ bpf_uncharge_memlock(mem->user, mem->pages);
+ free_uid(mem->user);
+}
+
+void bpf_map_charge_move(struct bpf_map_memory *dst,
+ struct bpf_map_memory *src)
+{
+ *dst = *src;
+
+ /* Make sure src will not be used for the redundant uncharging. */
+ memset(src, 0, sizeof(struct bpf_map_memory));
}
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
{
int ret;
- ret = bpf_charge_memlock(map->user, pages);
+ ret = bpf_charge_memlock(map->memory.user, pages);
if (ret)
return ret;
- map->pages += pages;
+ map->memory.pages += pages;
return ret;
}
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
{
- bpf_uncharge_memlock(map->user, pages);
- map->pages -= pages;
+ bpf_uncharge_memlock(map->memory.user, pages);
+ map->memory.pages -= pages;
}
static int bpf_map_alloc_id(struct bpf_map *map)
@@ -303,11 +307,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
+ struct bpf_map_memory mem;
- bpf_map_release_memlock(map);
+ bpf_map_charge_move(&mem, &map->memory);
security_bpf_map_free(map);
/* implementation dependent freeing */
map->ops->map_free(map);
+ bpf_map_charge_finish(&mem);
}
static void bpf_map_put_uref(struct bpf_map *map)
@@ -395,7 +401,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->value_size,
map->max_entries,
map->map_flags,
- map->pages * 1ULL << PAGE_SHIFT,
+ map->memory.pages * 1ULL << PAGE_SHIFT,
map->id,
READ_ONCE(map->frozen));
@@ -549,6 +555,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
+ struct bpf_map_memory mem;
struct bpf_map *map;
int f_flags;
int err;
@@ -573,7 +580,7 @@ static int map_create(union bpf_attr *attr)
err = bpf_obj_name_cpy(map->name, attr->map_name);
if (err)
- goto free_map_nouncharge;
+ goto free_map;
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
@@ -583,20 +590,20 @@ static int map_create(union bpf_attr *attr)
if (!attr->btf_value_type_id) {
err = -EINVAL;
- goto free_map_nouncharge;
+ goto free_map;
}
btf = btf_get_by_fd(attr->btf_fd);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
- goto free_map_nouncharge;
+ goto free_map;
}
err = map_check_btf(map, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err) {
btf_put(btf);
- goto free_map_nouncharge;
+ goto free_map;
}
map->btf = btf;
@@ -608,15 +615,11 @@ static int map_create(union bpf_attr *attr)
err = security_bpf_map_alloc(map);
if (err)
- goto free_map_nouncharge;
-
- err = bpf_map_init_memlock(map);
- if (err)
- goto free_map_sec;
+ goto free_map;
err = bpf_map_alloc_id(map);
if (err)
- goto free_map;
+ goto free_map_sec;
err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
@@ -632,13 +635,13 @@ static int map_create(union bpf_attr *attr)
return err;
-free_map:
- bpf_map_release_memlock(map);
free_map_sec:
security_bpf_map_free(map);
-free_map_nouncharge:
+free_map:
btf_put(map->btf);
+ bpf_map_charge_move(&mem, &map->memory);
map->ops->map_free(map);
+ bpf_map_charge_finish(&mem);
return err;
}
@@ -808,7 +811,10 @@ static int map_lookup_elem(union bpf_attr *attr)
err = map->ops->map_peek_elem(map, value);
} else {
rcu_read_lock();
- ptr = map->ops->map_lookup_elem(map, key);
+ if (map->ops->map_lookup_elem_sys_only)
+ ptr = map->ops->map_lookup_elem_sys_only(map, key);
+ else
+ ptr = map->ops->map_lookup_elem(map, key);
if (IS_ERR(ptr)) {
err = PTR_ERR(ptr);
} else if (!ptr) {
@@ -1582,6 +1588,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ switch (expected_attach_type) {
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ return 0;
+ default:
+ return -EINVAL;
+ }
default:
return 0;
}
@@ -1601,7 +1615,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
- if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT))
+ if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
+ BPF_F_ANY_ALIGNMENT |
+ BPF_F_TEST_RND_HI32))
return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -1831,6 +1847,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ return prog->enforce_expected_attach_type &&
+ prog->expected_attach_type != attach_type ?
+ -EINVAL : 0;
default:
return 0;
}
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 938d41211be7..ca52b9642943 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* tnum: tracked (or tristate) numbers
*
* A tnum tracks knowledge about the bits of a value. Each bit can be either
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 95f9354495ad..5c2cb5bd84ce 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -176,7 +176,7 @@ struct bpf_verifier_stack_elem {
struct bpf_verifier_stack_elem *next;
};
-#define BPF_COMPLEXITY_LIMIT_STACK 1024
+#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
#define BPF_COMPLEXITY_LIMIT_STATES 64
#define BPF_MAP_PTR_UNPRIV 1UL
@@ -782,8 +782,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
if (err)
goto err;
elem->st.speculative |= speculative;
- if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
- verbose(env, "BPF program is too complex\n");
+ if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
+ verbose(env, "The sequence of %d jumps is too complex.\n",
+ env->stack_size);
goto err;
}
return &elem->st;
@@ -981,6 +982,7 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
__mark_reg_not_init(regs + regno);
}
+#define DEF_NOT_SUBREG (0)
static void init_reg_state(struct bpf_verifier_env *env,
struct bpf_func_state *state)
{
@@ -991,6 +993,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
mark_reg_not_init(env, regs, i);
regs[i].live = REG_LIVE_NONE;
regs[i].parent = NULL;
+ regs[i].subreg_def = DEF_NOT_SUBREG;
}
/* frame pointer */
@@ -1136,7 +1139,7 @@ next:
*/
static int mark_reg_read(struct bpf_verifier_env *env,
const struct bpf_reg_state *state,
- struct bpf_reg_state *parent)
+ struct bpf_reg_state *parent, u8 flag)
{
bool writes = parent == state->parent; /* Observe write marks */
int cnt = 0;
@@ -1151,17 +1154,26 @@ static int mark_reg_read(struct bpf_verifier_env *env,
parent->var_off.value, parent->off);
return -EFAULT;
}
- if (parent->live & REG_LIVE_READ)
+ /* The first condition is more likely to be true than the
+ * second, checked it first.
+ */
+ if ((parent->live & REG_LIVE_READ) == flag ||
+ parent->live & REG_LIVE_READ64)
/* The parentage chain never changes and
* this parent was already marked as LIVE_READ.
* There is no need to keep walking the chain again and
* keep re-marking all parents as LIVE_READ.
* This case happens when the same register is read
* multiple times without writes into it in-between.
+ * Also, if parent has the stronger REG_LIVE_READ64 set,
+ * then no need to set the weak REG_LIVE_READ32.
*/
break;
/* ... then we depend on parent's value */
- parent->live |= REG_LIVE_READ;
+ parent->live |= flag;
+ /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
+ if (flag == REG_LIVE_READ64)
+ parent->live &= ~REG_LIVE_READ32;
state = parent;
parent = state->parent;
writes = true;
@@ -1173,12 +1185,129 @@ static int mark_reg_read(struct bpf_verifier_env *env,
return 0;
}
+/* This function is supposed to be used by the following 32-bit optimization
+ * code only. It returns TRUE if the source or destination register operates
+ * on 64-bit, otherwise return FALSE.
+ */
+static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
+{
+ u8 code, class, op;
+
+ code = insn->code;
+ class = BPF_CLASS(code);
+ op = BPF_OP(code);
+ if (class == BPF_JMP) {
+ /* BPF_EXIT for "main" will reach here. Return TRUE
+ * conservatively.
+ */
+ if (op == BPF_EXIT)
+ return true;
+ if (op == BPF_CALL) {
+ /* BPF to BPF call will reach here because of marking
+ * caller saved clobber with DST_OP_NO_MARK for which we
+ * don't care the register def because they are anyway
+ * marked as NOT_INIT already.
+ */
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ return false;
+ /* Helper call will reach here because of arg type
+ * check, conservatively return TRUE.
+ */
+ if (t == SRC_OP)
+ return true;
+
+ return false;
+ }
+ }
+
+ if (class == BPF_ALU64 || class == BPF_JMP ||
+ /* BPF_END always use BPF_ALU class. */
+ (class == BPF_ALU && op == BPF_END && insn->imm == 64))
+ return true;
+
+ if (class == BPF_ALU || class == BPF_JMP32)
+ return false;
+
+ if (class == BPF_LDX) {
+ if (t != SRC_OP)
+ return BPF_SIZE(code) == BPF_DW;
+ /* LDX source must be ptr. */
+ return true;
+ }
+
+ if (class == BPF_STX) {
+ if (reg->type != SCALAR_VALUE)
+ return true;
+ return BPF_SIZE(code) == BPF_DW;
+ }
+
+ if (class == BPF_LD) {
+ u8 mode = BPF_MODE(code);
+
+ /* LD_IMM64 */
+ if (mode == BPF_IMM)
+ return true;
+
+ /* Both LD_IND and LD_ABS return 32-bit data. */
+ if (t != SRC_OP)
+ return false;
+
+ /* Implicit ctx ptr. */
+ if (regno == BPF_REG_6)
+ return true;
+
+ /* Explicit source could be any width. */
+ return true;
+ }
+
+ if (class == BPF_ST)
+ /* The only source register for BPF_ST is a ptr. */
+ return true;
+
+ /* Conservatively return true at default. */
+ return true;
+}
+
+/* Return TRUE if INSN doesn't have explicit value define. */
+static bool insn_no_def(struct bpf_insn *insn)
+{
+ u8 class = BPF_CLASS(insn->code);
+
+ return (class == BPF_JMP || class == BPF_JMP32 ||
+ class == BPF_STX || class == BPF_ST);
+}
+
+/* Return TRUE if INSN has defined any 32-bit value explicitly. */
+static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+ if (insn_no_def(insn))
+ return false;
+
+ return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
+}
+
+static void mark_insn_zext(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
+{
+ s32 def_idx = reg->subreg_def;
+
+ if (def_idx == DEF_NOT_SUBREG)
+ return;
+
+ env->insn_aux_data[def_idx - 1].zext_dst = true;
+ /* The dst will be zero extended, so won't be sub-register anymore. */
+ reg->subreg_def = DEF_NOT_SUBREG;
+}
+
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
enum reg_arg_type t)
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
struct bpf_reg_state *reg, *regs = state->regs;
+ bool rw64;
if (regno >= MAX_BPF_REG) {
verbose(env, "R%d is invalid\n", regno);
@@ -1186,6 +1315,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
}
reg = &regs[regno];
+ rw64 = is_reg64(env, insn, regno, reg, t);
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
if (reg->type == NOT_INIT) {
@@ -1196,7 +1326,11 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
if (regno == BPF_REG_FP)
return 0;
- return mark_reg_read(env, reg, reg->parent);
+ if (rw64)
+ mark_insn_zext(env, reg);
+
+ return mark_reg_read(env, reg, reg->parent,
+ rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
} else {
/* check whether register used as dest operand can be written to */
if (regno == BPF_REG_FP) {
@@ -1204,6 +1338,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
reg->live |= REG_LIVE_WRITTEN;
+ reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
if (t == DST_OP)
mark_reg_unknown(env, regs, regno);
}
@@ -1383,7 +1518,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
}
mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
- reg_state->stack[spi].spilled_ptr.parent);
+ reg_state->stack[spi].spilled_ptr.parent,
+ REG_LIVE_READ64);
return 0;
} else {
int zeros = 0;
@@ -1400,7 +1536,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
return -EACCES;
}
mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
- reg_state->stack[spi].spilled_ptr.parent);
+ reg_state->stack[spi].spilled_ptr.parent,
+ REG_LIVE_READ64);
if (value_regno >= 0) {
if (zeros == size) {
/* any size read into register is zero extended,
@@ -2109,6 +2246,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
value_regno);
if (reg_type_may_be_null(reg_type))
regs[value_regno].id = ++env->id_gen;
+ /* A load of ctx field could have different
+ * actual load size with the one encoded in the
+ * insn. When the dst is PTR, it is for sure not
+ * a sub-register.
+ */
+ regs[value_regno].subreg_def = DEF_NOT_SUBREG;
}
regs[value_regno].type = reg_type;
}
@@ -2368,7 +2511,8 @@ mark:
* the whole slot to be marked as 'read'
*/
mark_reg_read(env, &state->stack[spi].spilled_ptr,
- state->stack[spi].spilled_ptr.parent);
+ state->stack[spi].spilled_ptr.parent,
+ REG_LIVE_READ64);
}
return update_stack_depth(env, state, min_off);
}
@@ -3332,6 +3476,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
}
+ /* helper call returns 64-bit value. */
+ regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+
/* update return register (already marked as written above) */
if (fn->ret_type == RET_INTEGER) {
/* sets type to SCALAR_VALUE */
@@ -4263,6 +4410,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
*/
*dst_reg = *src_reg;
dst_reg->live |= REG_LIVE_WRITTEN;
+ dst_reg->subreg_def = DEF_NOT_SUBREG;
} else {
/* R1 = (u32) R2 */
if (is_pointer_value(env, insn->src_reg)) {
@@ -4273,6 +4421,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
} else if (src_reg->type == SCALAR_VALUE) {
*dst_reg = *src_reg;
dst_reg->live |= REG_LIVE_WRITTEN;
+ dst_reg->subreg_def = env->insn_idx + 1;
} else {
mark_reg_unknown(env, regs,
insn->dst_reg);
@@ -5352,16 +5501,23 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
* Already marked as written above.
*/
mark_reg_unknown(env, regs, BPF_REG_0);
+ /* ld_abs load up to 32-bit skb data. */
+ regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
return 0;
}
static int check_return_code(struct bpf_verifier_env *env)
{
+ struct tnum enforce_attach_type_range = tnum_unknown;
struct bpf_reg_state *reg;
struct tnum range = tnum_range(0, 1);
switch (env->prog->type) {
case BPF_PROG_TYPE_CGROUP_SKB:
+ if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
+ range = tnum_range(0, 3);
+ enforce_attach_type_range = tnum_range(2, 3);
+ }
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
@@ -5380,18 +5536,23 @@ static int check_return_code(struct bpf_verifier_env *env)
}
if (!tnum_in(range, reg->var_off)) {
+ char tn_buf[48];
+
verbose(env, "At program exit the register R0 ");
if (!tnum_is_unknown(reg->var_off)) {
- char tn_buf[48];
-
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
- verbose(env, " should have been 0 or 1\n");
+ tnum_strn(tn_buf, sizeof(tn_buf), range);
+ verbose(env, " should have been %s\n", tn_buf);
return -EINVAL;
}
+
+ if (!tnum_is_unknown(enforce_attach_type_range) &&
+ tnum_in(enforce_attach_type_range, reg->var_off))
+ env->prog->enforce_expected_attach_type = 1;
return 0;
}
@@ -5435,7 +5596,25 @@ enum {
BRANCH = 2,
};
-#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
+static u32 state_htab_size(struct bpf_verifier_env *env)
+{
+ return env->prog->len;
+}
+
+static struct bpf_verifier_state_list **explored_state(
+ struct bpf_verifier_env *env,
+ int idx)
+{
+ struct bpf_verifier_state *cur = env->cur_state;
+ struct bpf_func_state *state = cur->frame[cur->curframe];
+
+ return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
+}
+
+static void init_explored_state(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].prune_point = true;
+}
/* t, w, e - match pseudo-code above:
* t - index of current instruction
@@ -5461,7 +5640,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
if (e == BRANCH)
/* mark branch target for state pruning */
- env->explored_states[w] = STATE_LIST_MARK;
+ init_explored_state(env, w);
if (insn_state[w] == 0) {
/* tree-edge */
@@ -5529,9 +5708,9 @@ peek_stack:
else if (ret < 0)
goto err_free;
if (t + 1 < insn_cnt)
- env->explored_states[t + 1] = STATE_LIST_MARK;
+ init_explored_state(env, t + 1);
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
- env->explored_states[t] = STATE_LIST_MARK;
+ init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
if (ret == 1)
goto peek_stack;
@@ -5554,10 +5733,10 @@ peek_stack:
* after every call and jump
*/
if (t + 1 < insn_cnt)
- env->explored_states[t + 1] = STATE_LIST_MARK;
+ init_explored_state(env, t + 1);
} else {
/* conditional jump with two edges */
- env->explored_states[t] = STATE_LIST_MARK;
+ init_explored_state(env, t);
ret = push_insn(t, t + 1, FALLTHROUGH, env);
if (ret == 1)
goto peek_stack;
@@ -6005,12 +6184,10 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
struct bpf_verifier_state_list *sl;
int i;
- sl = env->explored_states[insn];
- if (!sl)
- return;
-
- while (sl != STATE_LIST_MARK) {
- if (sl->state.curframe != cur->curframe)
+ sl = *explored_state(env, insn);
+ while (sl) {
+ if (sl->state.insn_idx != insn ||
+ sl->state.curframe != cur->curframe)
goto next;
for (i = 0; i <= cur->curframe; i++)
if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
@@ -6292,20 +6469,33 @@ static bool states_equal(struct bpf_verifier_env *env,
return true;
}
+/* Return 0 if no propagation happened. Return negative error code if error
+ * happened. Otherwise, return the propagated bit.
+ */
static int propagate_liveness_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
struct bpf_reg_state *parent_reg)
{
+ u8 parent_flag = parent_reg->live & REG_LIVE_READ;
+ u8 flag = reg->live & REG_LIVE_READ;
int err;
- if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
+ /* When comes here, read flags of PARENT_REG or REG could be any of
+ * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
+ * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
+ */
+ if (parent_flag == REG_LIVE_READ64 ||
+ /* Or if there is no read flag from REG. */
+ !flag ||
+ /* Or if the read flag from REG is the same as PARENT_REG. */
+ parent_flag == flag)
return 0;
- err = mark_reg_read(env, reg, parent_reg);
+ err = mark_reg_read(env, reg, parent_reg, flag);
if (err)
return err;
- return 0;
+ return flag;
}
/* A write screens off any subsequent reads; but write marks come from the
@@ -6339,8 +6529,10 @@ static int propagate_liveness(struct bpf_verifier_env *env,
for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
err = propagate_liveness_reg(env, &state_reg[i],
&parent_reg[i]);
- if (err)
+ if (err < 0)
return err;
+ if (err == REG_LIVE_READ64)
+ mark_insn_zext(env, &parent_reg[i]);
}
/* Propagate stack slots. */
@@ -6350,11 +6542,11 @@ static int propagate_liveness(struct bpf_verifier_env *env,
state_reg = &state->stack[i].spilled_ptr;
err = propagate_liveness_reg(env, state_reg,
parent_reg);
- if (err)
+ if (err < 0)
return err;
}
}
- return err;
+ return 0;
}
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
@@ -6364,18 +6556,21 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
struct bpf_verifier_state *cur = env->cur_state, *new;
int i, j, err, states_cnt = 0;
- pprev = &env->explored_states[insn_idx];
- sl = *pprev;
-
- if (!sl)
+ if (!env->insn_aux_data[insn_idx].prune_point)
/* this 'insn_idx' instruction wasn't marked, so we will not
* be doing state search here
*/
return 0;
+ pprev = explored_state(env, insn_idx);
+ sl = *pprev;
+
clean_live_states(env, insn_idx, cur);
- while (sl != STATE_LIST_MARK) {
+ while (sl) {
+ states_cnt++;
+ if (sl->state.insn_idx != insn_idx)
+ goto next;
if (states_equal(env, &sl->state, cur)) {
sl->hit_cnt++;
/* reached equivalent register/stack state,
@@ -6393,7 +6588,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
return err;
return 1;
}
- states_cnt++;
sl->miss_cnt++;
/* heuristic to determine whether this state is beneficial
* to keep checking from state equivalence point of view.
@@ -6420,6 +6614,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
sl = *pprev;
continue;
}
+next:
pprev = &sl->next;
sl = *pprev;
}
@@ -6451,8 +6646,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
kfree(new_sl);
return err;
}
- new_sl->next = env->explored_states[insn_idx];
- env->explored_states[insn_idx] = new_sl;
+ new->insn_idx = insn_idx;
+ new_sl->next = *explored_state(env, insn_idx);
+ *explored_state(env, insn_idx) = new_sl;
/* connect new state to parentage chain. Current frame needs all
* registers connected. Only r6 - r9 of the callers are alive (pushed
* to the stack implicitly by JITs) so in callers' frames connect just
@@ -7130,14 +7326,23 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
* insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
* [0, off) and [off, end) to new locations, so the patched range stays zero
*/
-static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
- u32 off, u32 cnt)
+static int adjust_insn_aux_data(struct bpf_verifier_env *env,
+ struct bpf_prog *new_prog, u32 off, u32 cnt)
{
struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+ struct bpf_insn *insn = new_prog->insnsi;
+ u32 prog_len;
int i;
+ /* aux info at OFF always needs adjustment, no matter fast path
+ * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
+ * original insn at old prog.
+ */
+ old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
+
if (cnt == 1)
return 0;
+ prog_len = new_prog->len;
new_data = vzalloc(array_size(prog_len,
sizeof(struct bpf_insn_aux_data)));
if (!new_data)
@@ -7145,8 +7350,10 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
memcpy(new_data + off + cnt - 1, old_data + off,
sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
- for (i = off; i < off + cnt - 1; i++)
+ for (i = off; i < off + cnt - 1; i++) {
new_data[i].seen = true;
+ new_data[i].zext_dst = insn_has_def32(env, insn + i);
+ }
env->insn_aux_data = new_data;
vfree(old_data);
return 0;
@@ -7179,7 +7386,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
env->insn_aux_data[off].orig_idx);
return NULL;
}
- if (adjust_insn_aux_data(env, new_prog->len, off, len))
+ if (adjust_insn_aux_data(env, new_prog, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
return new_prog;
@@ -7443,6 +7650,84 @@ static int opt_remove_nops(struct bpf_verifier_env *env)
return 0;
}
+static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
+ const union bpf_attr *attr)
+{
+ struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ int i, patch_len, delta = 0, len = env->prog->len;
+ struct bpf_insn *insns = env->prog->insnsi;
+ struct bpf_prog *new_prog;
+ bool rnd_hi32;
+
+ rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
+ zext_patch[1] = BPF_ZEXT_REG(0);
+ rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
+ rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
+ rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
+ for (i = 0; i < len; i++) {
+ int adj_idx = i + delta;
+ struct bpf_insn insn;
+
+ insn = insns[adj_idx];
+ if (!aux[adj_idx].zext_dst) {
+ u8 code, class;
+ u32 imm_rnd;
+
+ if (!rnd_hi32)
+ continue;
+
+ code = insn.code;
+ class = BPF_CLASS(code);
+ if (insn_no_def(&insn))
+ continue;
+
+ /* NOTE: arg "reg" (the fourth one) is only used for
+ * BPF_STX which has been ruled out in above
+ * check, it is safe to pass NULL here.
+ */
+ if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
+ if (class == BPF_LD &&
+ BPF_MODE(code) == BPF_IMM)
+ i++;
+ continue;
+ }
+
+ /* ctx load could be transformed into wider load. */
+ if (class == BPF_LDX &&
+ aux[adj_idx].ptr_type == PTR_TO_CTX)
+ continue;
+
+ imm_rnd = get_random_int();
+ rnd_hi32_patch[0] = insn;
+ rnd_hi32_patch[1].imm = imm_rnd;
+ rnd_hi32_patch[3].dst_reg = insn.dst_reg;
+ patch = rnd_hi32_patch;
+ patch_len = 4;
+ goto apply_patch_buffer;
+ }
+
+ if (!bpf_jit_needs_zext())
+ continue;
+
+ zext_patch[0] = insn;
+ zext_patch[1].dst_reg = insn.dst_reg;
+ zext_patch[1].src_reg = insn.dst_reg;
+ patch = zext_patch;
+ patch_len = 2;
+apply_patch_buffer:
+ new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ insns = new_prog->insnsi;
+ aux = env->insn_aux_data;
+ delta += patch_len - 1;
+ }
+
+ return 0;
+}
+
/* convert load instructions that access fields of a context type into a
* sequence of instructions that access fields of the underlying structure:
* struct __sk_buff -> struct sk_buff
@@ -8130,16 +8415,15 @@ static void free_states(struct bpf_verifier_env *env)
if (!env->explored_states)
return;
- for (i = 0; i < env->prog->len; i++) {
+ for (i = 0; i < state_htab_size(env); i++) {
sl = env->explored_states[i];
- if (sl)
- while (sl != STATE_LIST_MARK) {
- sln = sl->next;
- free_verifier_state(&sl->state, false);
- kfree(sl);
- sl = sln;
- }
+ while (sl) {
+ sln = sl->next;
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ sl = sln;
+ }
}
kvfree(env->explored_states);
@@ -8239,7 +8523,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
goto skip_full_check;
}
- env->explored_states = kvcalloc(env->prog->len,
+ env->explored_states = kvcalloc(state_htab_size(env),
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
@@ -8294,6 +8578,15 @@ skip_full_check:
if (ret == 0)
ret = fixup_bpf_calls(env);
+ /* do 32-bit optimization after insn patching has done so those patched
+ * insns could be handled correctly.
+ */
+ if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
+ ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
+ env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
+ : false;
+ }
+
if (ret == 0)
ret = fixup_call_args(env);
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 686d244e798d..413d75f4fc72 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -17,8 +17,8 @@ struct xsk_map {
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
- int cpu, err = -EINVAL;
struct xsk_map *m;
+ int cpu, err;
u64 cost;
if (!capable(CAP_NET_ADMIN))
@@ -37,13 +37,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
cost += sizeof(struct list_head) * num_possible_cpus();
- if (cost >= U32_MAX - PAGE_SIZE)
- goto free_m;
-
- m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */
- err = bpf_map_precharge_memlock(m->map.pages);
+ err = bpf_map_charge_init(&m->map.memory, cost);
if (err)
goto free_m;
@@ -51,7 +47,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
m->flush_list = alloc_percpu(struct list_head);
if (!m->flush_list)
- goto free_m;
+ goto free_charge;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
@@ -65,6 +61,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
free_percpu:
free_percpu(m->flush_list);
+free_charge:
+ bpf_map_charge_finish(&m->map.memory);
free_m:
kfree(m);
return ERR_PTR(err);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 68ca5de7ec27..88006be40ea3 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include "cgroup-internal.h"
#include <linux/ctype.h>
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 217cec4e22c6..557a5ea5098c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -215,7 +215,8 @@ static struct cftype cgroup_base_files[];
static int cgroup_apply_control(struct cgroup *cgrp);
static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
-static void css_task_iter_advance(struct css_task_iter *it);
+static void css_task_iter_skip(struct css_task_iter *it,
+ struct task_struct *task);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
struct cgroup_subsys *ss);
@@ -738,6 +739,7 @@ struct css_set init_css_set = {
.dom_cset = &init_css_set,
.tasks = LIST_HEAD_INIT(init_css_set.tasks),
.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
+ .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
@@ -843,6 +845,21 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
cgroup_update_populated(link->cgrp, populated);
}
+/*
+ * @task is leaving, advance task iterators which are pointing to it so
+ * that they can resume at the next position. Advancing an iterator might
+ * remove it from the list, use safe walk. See css_task_iter_skip() for
+ * details.
+ */
+static void css_set_skip_task_iters(struct css_set *cset,
+ struct task_struct *task)
+{
+ struct css_task_iter *it, *pos;
+
+ list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
+ css_task_iter_skip(it, task);
+}
+
/**
* css_set_move_task - move a task from one css_set to another
* @task: task being moved
@@ -868,22 +885,9 @@ static void css_set_move_task(struct task_struct *task,
css_set_update_populated(to_cset, true);
if (from_cset) {
- struct css_task_iter *it, *pos;
-
WARN_ON_ONCE(list_empty(&task->cg_list));
- /*
- * @task is leaving, advance task iterators which are
- * pointing to it so that they can resume at the next
- * position. Advancing an iterator might remove it from
- * the list, use safe walk. See css_task_iter_advance*()
- * for details.
- */
- list_for_each_entry_safe(it, pos, &from_cset->task_iters,
- iters_node)
- if (it->task_pos == &task->cg_list)
- css_task_iter_advance(it);
-
+ css_set_skip_task_iters(from_cset, task);
list_del_init(&task->cg_list);
if (!css_set_populated(from_cset))
css_set_update_populated(from_cset, false);
@@ -1210,6 +1214,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
cset->dom_cset = cset;
INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_tasks);
+ INIT_LIST_HEAD(&cset->dying_tasks);
INIT_LIST_HEAD(&cset->task_iters);
INIT_LIST_HEAD(&cset->threaded_csets);
INIT_HLIST_NODE(&cset->hlist);
@@ -1810,11 +1815,13 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
enum cgroup2_param {
Opt_nsdelegate,
+ Opt_memory_localevents,
nr__cgroup2_params
};
static const struct fs_parameter_spec cgroup2_param_specs[] = {
- fsparam_flag ("nsdelegate", Opt_nsdelegate),
+ fsparam_flag("nsdelegate", Opt_nsdelegate),
+ fsparam_flag("memory_localevents", Opt_memory_localevents),
{}
};
@@ -1837,6 +1844,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
case Opt_nsdelegate:
ctx->flags |= CGRP_ROOT_NS_DELEGATE;
return 0;
+ case Opt_memory_localevents:
+ ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
+ return 0;
}
return -EINVAL;
}
@@ -1848,6 +1858,11 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
else
cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
+
+ if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
+ cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
+ else
+ cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
}
}
@@ -1855,6 +1870,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
{
if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
seq_puts(seq, ",nsdelegate");
+ if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
+ seq_puts(seq, ",memory_localevents");
return 0;
}
@@ -4396,15 +4413,18 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
it->task_pos = NULL;
return;
}
- } while (!css_set_populated(cset));
+ } while (!css_set_populated(cset) && !list_empty(&cset->dying_tasks));
if (!list_empty(&cset->tasks))
it->task_pos = cset->tasks.next;
- else
+ else if (!list_empty(&cset->mg_tasks))
it->task_pos = cset->mg_tasks.next;
+ else
+ it->task_pos = cset->dying_tasks.next;
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
+ it->dying_tasks_head = &cset->dying_tasks;
/*
* We don't keep css_sets locked across iteration steps and thus
@@ -4430,9 +4450,20 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
list_add(&it->iters_node, &cset->task_iters);
}
+static void css_task_iter_skip(struct css_task_iter *it,
+ struct task_struct *task)
+{
+ lockdep_assert_held(&css_set_lock);
+
+ if (it->task_pos == &task->cg_list) {
+ it->task_pos = it->task_pos->next;
+ it->flags |= CSS_TASK_ITER_SKIPPED;
+ }
+}
+
static void css_task_iter_advance(struct css_task_iter *it)
{
- struct list_head *next;
+ struct task_struct *task;
lockdep_assert_held(&css_set_lock);
repeat:
@@ -4442,25 +4473,40 @@ repeat:
* consumed first and then ->mg_tasks. After ->mg_tasks,
* we move onto the next cset.
*/
- next = it->task_pos->next;
-
- if (next == it->tasks_head)
- next = it->mg_tasks_head->next;
+ if (it->flags & CSS_TASK_ITER_SKIPPED)
+ it->flags &= ~CSS_TASK_ITER_SKIPPED;
+ else
+ it->task_pos = it->task_pos->next;
- if (next == it->mg_tasks_head)
+ if (it->task_pos == it->tasks_head)
+ it->task_pos = it->mg_tasks_head->next;
+ if (it->task_pos == it->mg_tasks_head)
+ it->task_pos = it->dying_tasks_head->next;
+ if (it->task_pos == it->dying_tasks_head)
css_task_iter_advance_css_set(it);
- else
- it->task_pos = next;
} else {
/* called from start, proceed to the first cset */
css_task_iter_advance_css_set(it);
}
- /* if PROCS, skip over tasks which aren't group leaders */
- if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
- !thread_group_leader(list_entry(it->task_pos, struct task_struct,
- cg_list)))
- goto repeat;
+ if (!it->task_pos)
+ return;
+
+ task = list_entry(it->task_pos, struct task_struct, cg_list);
+
+ if (it->flags & CSS_TASK_ITER_PROCS) {
+ /* if PROCS, skip over tasks which aren't group leaders */
+ if (!thread_group_leader(task))
+ goto repeat;
+
+ /* and dying leaders w/o live member threads */
+ if (!atomic_read(&task->signal->live))
+ goto repeat;
+ } else {
+ /* skip all dying ones */
+ if (task->flags & PF_EXITING)
+ goto repeat;
+ }
}
/**
@@ -4955,8 +5001,6 @@ static void css_release_work_fn(struct work_struct *work)
if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL);
-
- cgroup_bpf_put(cgrp);
}
mutex_unlock(&cgroup_mutex);
@@ -5482,6 +5526,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
cgroup1_check_for_release(parent);
+ cgroup_bpf_offline(cgrp);
+
/* put the base reference */
percpu_ref_kill(&cgrp->self.refcnt);
@@ -5997,6 +6043,7 @@ void cgroup_exit(struct task_struct *tsk)
if (!list_empty(&tsk->cg_list)) {
spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
+ list_add_tail(&tsk->cg_list, &cset->dying_tasks);
cset->nr_tasks--;
WARN_ON_ONCE(cgroup_task_frozen(tsk));
@@ -6022,6 +6069,13 @@ void cgroup_release(struct task_struct *task)
do_each_subsys_mask(ss, ssid, have_release_callback) {
ss->release(task);
} while_each_subsys_mask();
+
+ if (use_task_css_set_links) {
+ spin_lock_irq(&css_set_lock);
+ css_set_skip_task_iters(task_css_set(task), task);
+ list_del_init(&task->cg_list);
+ spin_unlock_irq(&css_set_lock);
+ }
}
void cgroup_free(struct task_struct *task)
@@ -6221,6 +6275,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
* Don't use cgroup_get_live().
*/
cgroup_get(sock_cgroup_ptr(skcd));
+ cgroup_bpf_get(sock_cgroup_ptr(skcd));
return;
}
@@ -6232,6 +6287,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp;
+ cgroup_bpf_get(cset->dfl_cgrp);
break;
}
cpu_relax();
@@ -6242,7 +6298,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
- cgroup_put(sock_cgroup_ptr(skcd));
+ struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+
+ cgroup_bpf_put(cgrp);
+ cgroup_put(cgrp);
}
#endif /* CONFIG_SOCK_CGROUP_DATA */
@@ -6325,7 +6384,7 @@ static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
+ return snprintf(buf, PAGE_SIZE, "nsdelegate\nmemory_localevents\n");
}
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
@@ -6345,4 +6404,47 @@ static int __init cgroup_sysfs_init(void)
return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
}
subsys_initcall(cgroup_sysfs_init);
+
+static u64 power_of_ten(int power)
+{
+ u64 v = 1;
+ while (power--)
+ v *= 10;
+ return v;
+}
+
+/**
+ * cgroup_parse_float - parse a floating number
+ * @input: input string
+ * @dec_shift: number of decimal digits to shift
+ * @v: output
+ *
+ * Parse a decimal floating point number in @input and store the result in
+ * @v with decimal point right shifted @dec_shift times. For example, if
+ * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345.
+ * Returns 0 on success, -errno otherwise.
+ *
+ * There's nothing cgroup specific about this function except that it's
+ * currently the only user.
+ */
+int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
+{
+ s64 whole, frac = 0;
+ int fstart = 0, fend = 0, flen;
+
+ if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
+ return -EINVAL;
+ if (frac < 0)
+ return -EINVAL;
+
+ flen = fend > fstart ? fend - fstart : 0;
+ if (flen < dec_shift)
+ frac *= power_of_ten(dec_shift - flen);
+ else
+ frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
+
+ *v = whole * power_of_ten(dec_shift) + frac;
+ return 0;
+}
+
#endif /* CONFIG_SYSFS */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index bb95a35e8c2d..ca19b4c8acf5 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include "cgroup-internal.h"
#include <linux/sched/cputime.h>
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 9ad37b9e44a7..be01a4d627c9 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Context tracking: Probe on high level context boundaries such as kernel
* and userspace. This includes syscalls and exceptions entry/exit.
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f2ef10460698..077fde6fb953 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2061,7 +2061,7 @@ static void cpuhp_online_cpu_device(unsigned int cpu)
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
}
-static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
{
int cpu, ret = 0;
@@ -2093,7 +2093,7 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
return ret;
}
-static int cpuhp_smt_enable(void)
+int cpuhp_smt_enable(void)
{
int cpu, ret = 0;
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index b64e238b553b..9c23ae074b40 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <linux/crash_dump.h>
#include <linux/init.h>
diff --git a/kernel/cred.c b/kernel/cred.c
index 45d77284aed0..e74ffdc98a92 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Task credentials management - see Documentation/security/credentials.rst
*
* Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
*/
#include <linux/export.h>
#include <linux/cred.h>
diff --git a/kernel/debug/Makefile b/kernel/debug/Makefile
index a85edc339985..332ee6c6ec2c 100644
--- a/kernel/debug/Makefile
+++ b/kernel/debug/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the linux kernel debugger
#
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 2a12b988c717..27725754ac99 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* delayacct.c - per-task delay accounting
*
* Copyright (C) Shailabh Nagar, IBM Corp. 2006
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#include <linux/sched.h>
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 83d711f8d665..70f8f8d9200e 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config HAS_DMA
bool
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index b2a87905846d..bfc0c17f2a3d 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -214,6 +214,62 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
return cma_release(dev_get_cma_area(dev), pages, count);
}
+/**
+ * dma_alloc_contiguous() - allocate contiguous pages
+ * @dev: Pointer to device for which the allocation is performed.
+ * @size: Requested allocation size.
+ * @gfp: Allocation flags.
+ *
+ * This function allocates contiguous memory buffer for specified device. It
+ * first tries to use device specific contiguous memory area if available or
+ * the default global one, then tries a fallback allocation of normal pages.
+ *
+ * Note that it byapss one-page size of allocations from the global area as
+ * the addresses within one page are always contiguous, so there is no need
+ * to waste CMA pages for that kind; it also helps reduce fragmentations.
+ */
+struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
+{
+ int node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
+ size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ size_t align = get_order(PAGE_ALIGN(size));
+ struct page *page = NULL;
+ struct cma *cma = NULL;
+
+ if (dev && dev->cma_area)
+ cma = dev->cma_area;
+ else if (count > 1)
+ cma = dma_contiguous_default_area;
+
+ /* CMA can be used only in the context which permits sleeping */
+ if (cma && gfpflags_allow_blocking(gfp)) {
+ align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
+ page = cma_alloc(cma, count, align, gfp & __GFP_NOWARN);
+ }
+
+ /* Fallback allocation of normal pages */
+ if (!page)
+ page = alloc_pages_node(node, gfp, align);
+ return page;
+}
+
+/**
+ * dma_free_contiguous() - release allocated pages
+ * @dev: Pointer to device for which the pages were allocated.
+ * @page: Pointer to the allocated pages.
+ * @size: Size of allocated pages.
+ *
+ * This function releases memory allocated by dma_alloc_contiguous(). As the
+ * cma_release returns false when provided pages do not belong to contiguous
+ * area and true otherwise, this function then does a fallback __free_pages()
+ * upon a false-return.
+ */
+void dma_free_contiguous(struct device *dev, struct page *page, size_t size)
+{
+ if (!cma_release(dev_get_cma_area(dev), page, size >> PAGE_SHIFT))
+ __free_pages(page, get_order(size));
+}
+
/*
* Support for reserved memory regions defined in device tree
*/
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 2c2772e9702a..b67f0aa08aa3 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -96,8 +96,6 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- int page_order = get_order(size);
struct page *page = NULL;
u64 phys_mask;
@@ -109,20 +107,9 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_mask);
again:
- /* CMA can be used only in the context which permits sleeping */
- if (gfpflags_allow_blocking(gfp)) {
- page = dma_alloc_from_contiguous(dev, count, page_order,
- gfp & __GFP_NOWARN);
- if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
- }
- }
- if (!page)
- page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
-
+ page = dma_alloc_contiguous(dev, size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
- __free_pages(page, page_order);
+ dma_free_contiguous(dev, page, size);
page = NULL;
if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
@@ -154,7 +141,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
if (PageHighMem(page)) {
/*
* Depending on the cma= arguments and per-arch setup
- * dma_alloc_from_contiguous could return highmem pages.
+ * dma_alloc_contiguous could return highmem pages.
* Without remapping there is no way to return them here,
* so log an error and fail.
*/
@@ -171,15 +158,19 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
*dma_handle = phys_to_dma(dev, page_to_phys(page));
}
memset(ret, 0, size);
+
+ if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+ !dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+ arch_dma_prep_coherent(page, size);
+ ret = uncached_kernel_address(ret);
+ }
+
return ret;
}
void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- if (!dma_release_from_contiguous(dev, page, count))
- __free_pages(page, get_order(size));
+ dma_free_contiguous(dev, page, size);
}
void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
@@ -189,13 +180,18 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
if (force_dma_unencrypted())
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
+
+ if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+ !dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT))
+ cpu_addr = cached_kernel_address(cpu_addr);
__dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
}
void *dma_direct_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
- if (!dev_is_dma_coherent(dev))
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+ !dev_is_dma_coherent(dev))
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
}
@@ -203,7 +199,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
void dma_direct_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
{
- if (!dev_is_dma_coherent(dev))
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+ !dev_is_dma_coherent(dev))
arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
else
dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 6f7619c1f877..13f0cb080a4d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Dynamic DMA mapping support.
*
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 79c47076700a..3aef4191798c 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -24,7 +24,7 @@ struct ring_buffer {
atomic_t poll; /* POLL_ for wakeups */
local_t head; /* write position */
- local_t nest; /* nested writers */
+ unsigned int nest; /* nested writers */
local_t events; /* event limit */
local_t wakeup; /* wakeup stamp */
local_t lost; /* nr records lost */
@@ -41,7 +41,7 @@ struct ring_buffer {
/* AUX area */
long aux_head;
- local_t aux_nest;
+ unsigned int aux_nest;
long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */
unsigned long aux_pgoff;
int aux_nr_pages;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 674b35383491..ffb59a4ef4ff 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -38,7 +38,12 @@ static void perf_output_get_handle(struct perf_output_handle *handle)
struct ring_buffer *rb = handle->rb;
preempt_disable();
- local_inc(&rb->nest);
+
+ /*
+ * Avoid an explicit LOAD/STORE such that architectures with memops
+ * can use them.
+ */
+ (*(volatile unsigned int *)&rb->nest)++;
handle->wakeup = local_read(&rb->wakeup);
}
@@ -46,17 +51,35 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
{
struct ring_buffer *rb = handle->rb;
unsigned long head;
+ unsigned int nest;
+
+ /*
+ * If this isn't the outermost nesting, we don't have to update
+ * @rb->user_page->data_head.
+ */
+ nest = READ_ONCE(rb->nest);
+ if (nest > 1) {
+ WRITE_ONCE(rb->nest, nest - 1);
+ goto out;
+ }
again:
+ /*
+ * In order to avoid publishing a head value that goes backwards,
+ * we must ensure the load of @rb->head happens after we've
+ * incremented @rb->nest.
+ *
+ * Otherwise we can observe a @rb->head value before one published
+ * by an IRQ/NMI happening between the load and the increment.
+ */
+ barrier();
head = local_read(&rb->head);
/*
- * IRQ/NMI can happen here, which means we can miss a head update.
+ * IRQ/NMI can happen here and advance @rb->head, causing our
+ * load above to be stale.
*/
- if (!local_dec_and_test(&rb->nest))
- goto out;
-
/*
* Since the mmap() consumer (userspace) can run on a different CPU:
*
@@ -84,14 +107,23 @@ again:
* See perf_output_begin().
*/
smp_wmb(); /* B, matches C */
- rb->user_page->data_head = head;
+ WRITE_ONCE(rb->user_page->data_head, head);
/*
- * Now check if we missed an update -- rely on previous implied
- * compiler barriers to force a re-read.
+ * We must publish the head before decrementing the nest count,
+ * otherwise an IRQ/NMI can publish a more recent head value and our
+ * write will (temporarily) publish a stale value.
*/
+ barrier();
+ WRITE_ONCE(rb->nest, 0);
+
+ /*
+ * Ensure we decrement @rb->nest before we validate the @rb->head.
+ * Otherwise we cannot be sure we caught the 'last' nested update.
+ */
+ barrier();
if (unlikely(head != local_read(&rb->head))) {
- local_inc(&rb->nest);
+ WRITE_ONCE(rb->nest, 1);
goto again;
}
@@ -330,6 +362,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *output_event = event;
unsigned long aux_head, aux_tail;
struct ring_buffer *rb;
+ unsigned int nest;
if (output_event->parent)
output_event = output_event->parent;
@@ -360,13 +393,16 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (!refcount_inc_not_zero(&rb->aux_refcount))
goto err;
+ nest = READ_ONCE(rb->aux_nest);
/*
* Nesting is not supported for AUX area, make sure nested
* writers are caught early
*/
- if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1)))
+ if (WARN_ON_ONCE(nest))
goto err_put;
+ WRITE_ONCE(rb->aux_nest, nest + 1);
+
aux_head = rb->aux_head;
handle->rb = rb;
@@ -394,7 +430,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (!handle->size) { /* A, matches D */
event->pending_disable = smp_processor_id();
perf_output_wakeup(handle);
- local_set(&rb->aux_nest, 0);
+ WRITE_ONCE(rb->aux_nest, 0);
goto err_put;
}
}
@@ -471,7 +507,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
perf_event_aux_event(handle->event, aux_head, size,
handle->aux_flags);
- rb->user_page->aux_head = rb->aux_head;
+ WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
if (rb_need_aux_wakeup(rb))
wakeup = true;
@@ -483,7 +519,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
handle->event = NULL;
- local_set(&rb->aux_nest, 0);
+ WRITE_ONCE(rb->aux_nest, 0);
/* can't be last */
rb_free_aux(rb);
ring_buffer_put(rb);
@@ -503,7 +539,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
rb->aux_head += size;
- rb->user_page->aux_head = rb->aux_head;
+ WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
if (rb_need_aux_wakeup(rb)) {
perf_output_wakeup(handle);
handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 78f61bfc6b79..359122185cfb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2112,7 +2112,7 @@ static void handle_trampoline(struct pt_regs *regs)
sigill:
uprobe_warn(current, "handle uretprobe, sending SIGILL.");
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
@@ -2228,7 +2228,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
if (unlikely(err)) {
uprobe_warn(current, "execute the probed insn, sending SIGILL.");
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 8361a560cd1d..a75b6a7f458a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/exit.c
*
@@ -194,6 +195,7 @@ repeat:
rcu_read_unlock();
proc_flush_task(p);
+ cgroup_release(p);
write_lock_irq(&tasklist_lock);
ptrace_release_task(p);
@@ -219,7 +221,6 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
- cgroup_release(p);
release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct);
diff --git a/kernel/extable.c b/kernel/extable.c
index 6a5b61ebc66c..e23cce6e6092 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Rewritten by Rusty Russell, on the backs of many others...
Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/ftrace.h>
#include <linux/memory.h>
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
index feb80712b913..63b349168da7 100644
--- a/kernel/fail_function.c
+++ b/kernel/fail_function.c
@@ -152,20 +152,13 @@ static int fei_retval_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(fei_retval_ops, fei_retval_get, fei_retval_set,
"%llx\n");
-static int fei_debugfs_add_attr(struct fei_attr *attr)
+static void fei_debugfs_add_attr(struct fei_attr *attr)
{
struct dentry *dir;
dir = debugfs_create_dir(attr->kp.symbol_name, fei_debugfs_dir);
- if (!dir)
- return -ENOMEM;
-
- if (!debugfs_create_file("retval", 0600, dir, attr, &fei_retval_ops)) {
- debugfs_remove_recursive(dir);
- return -ENOMEM;
- }
- return 0;
+ debugfs_create_file("retval", 0600, dir, attr, &fei_retval_ops);
}
static void fei_debugfs_remove_attr(struct fei_attr *attr)
@@ -306,7 +299,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
ret = register_kprobe(&attr->kp);
if (!ret)
- ret = fei_debugfs_add_attr(attr);
+ fei_debugfs_add_attr(attr);
if (ret < 0)
fei_attr_remove(attr);
else {
@@ -337,19 +330,13 @@ static int __init fei_debugfs_init(void)
return PTR_ERR(dir);
/* injectable attribute is just a symlink of error_inject/list */
- if (!debugfs_create_symlink("injectable", dir,
- "../error_injection/list"))
- goto error;
+ debugfs_create_symlink("injectable", dir, "../error_injection/list");
- if (!debugfs_create_file("inject", 0600, dir, NULL, &fei_ops))
- goto error;
+ debugfs_create_file("inject", 0600, dir, NULL, &fei_ops);
fei_debugfs_dir = dir;
return 0;
-error:
- debugfs_remove_recursive(dir);
- return -ENOMEM;
}
late_initcall(fei_debugfs_init);
diff --git a/kernel/fork.c b/kernel/fork.c
index c20f7096b6b7..9f238cdd886e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/fork.c
*
@@ -122,7 +123,7 @@
unsigned long total_forks; /* Handle normal Linux uptimes. */
int nr_threads; /* The idle threads do not count.. */
-int max_threads; /* tunable limit on nr_threads */
+static int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index b162b74611e4..c0738424bb43 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/freezer.c - Function to freeze a process
*
diff --git a/kernel/futex.c b/kernel/futex.c
index 2268b97d5439..4b5b468c58b6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Fast Userspace Mutexes (which I call "Futexes!").
* (C) Rusty Russell, IBM 2002
@@ -29,20 +30,6 @@
*
* "The futexes are also cursed."
* "But they come in a choice of three flavours!"
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/compat.h>
#include <linux/slab.h>
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index f71c1adcff31..3941a9c48f83 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
menu "GCOV-based kernel profiling"
config GCOV_KERNEL
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index 6e40ff6be083..e5eb5ea7ea59 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -64,7 +64,6 @@ struct gcov_node {
static const char objtree[] = OBJTREE;
static const char srctree[] = SRCTREE;
static struct gcov_node root_node;
-static struct dentry *reset_dentry;
static LIST_HEAD(all_head);
static DEFINE_MUTEX(node_lock);
@@ -387,8 +386,6 @@ static void add_links(struct gcov_node *node, struct dentry *parent)
goto out_err;
node->links[i] = debugfs_create_symlink(deskew(basename),
parent, target);
- if (!node->links[i])
- goto out_err;
kfree(target);
}
@@ -450,11 +447,6 @@ static struct gcov_node *new_node(struct gcov_node *parent,
parent->dentry, node, &gcov_data_fops);
} else
node->dentry = debugfs_create_dir(node->name, parent->dentry);
- if (!node->dentry) {
- pr_warn("could not create file\n");
- kfree(node);
- return NULL;
- }
if (info)
add_links(node, parent->dentry);
list_add(&node->list, &parent->children);
@@ -761,32 +753,20 @@ void gcov_event(enum gcov_action action, struct gcov_info *info)
/* Create debugfs entries. */
static __init int gcov_fs_init(void)
{
- int rc = -EIO;
-
init_node(&root_node, NULL, NULL, NULL);
/*
* /sys/kernel/debug/gcov will be parent for the reset control file
* and all profiling files.
*/
root_node.dentry = debugfs_create_dir("gcov", NULL);
- if (!root_node.dentry)
- goto err_remove;
/*
* Create reset file which resets all profiling counts when written
* to.
*/
- reset_dentry = debugfs_create_file("reset", 0600, root_node.dentry,
- NULL, &gcov_reset_fops);
- if (!reset_dentry)
- goto err_remove;
+ debugfs_create_file("reset", 0600, root_node.dentry, NULL,
+ &gcov_reset_fops);
/* Replay previous events to get our fs hierarchy up-to-date. */
gcov_enable_events();
return 0;
-
-err_remove:
- pr_err("init failed\n");
- debugfs_remove(root_node.dentry);
-
- return rc;
}
device_initcall(gcov_fs_init);
diff --git a/kernel/gen_ikh_data.sh b/kernel/gen_kheaders.sh
index 591a94f7b387..9a34e1d9bd7f 100755
--- a/kernel/gen_ikh_data.sh
+++ b/kernel/gen_kheaders.sh
@@ -2,7 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# This script generates an archive consisting of kernel headers
-# for CONFIG_IKHEADERS_PROC.
+# for CONFIG_IKHEADERS.
set -e
spath="$(dirname "$(readlink -f "$0")")"
kroot="$spath/.."
@@ -31,9 +31,8 @@ arch/$SRCARCH/include/
# This block is useful for debugging the incremental builds.
# Uncomment it for debugging.
-# iter=1
-# if [ ! -f /tmp/iter ]; then echo 1 > /tmp/iter;
-# else; iter=$(($(cat /tmp/iter) + 1)); fi
+# if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter;
+# else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi
# find $src_file_list -type f | xargs ls -lR > /tmp/src-ls-$iter
# find $obj_file_list -type f | xargs ls -lR > /tmp/obj-ls-$iter
@@ -43,10 +42,18 @@ arch/$SRCARCH/include/
pushd $kroot > /dev/null
src_files_md5="$(find $src_file_list -type f |
grep -v "include/generated/compile.h" |
+ grep -v "include/generated/autoconf.h" |
+ grep -v "include/config/auto.conf" |
+ grep -v "include/config/auto.conf.cmd" |
+ grep -v "include/config/tristate.conf" |
xargs ls -lR | md5sum | cut -d ' ' -f1)"
popd > /dev/null
obj_files_md5="$(find $obj_file_list -type f |
grep -v "include/generated/compile.h" |
+ grep -v "include/generated/autoconf.h" |
+ grep -v "include/config/auto.conf" |
+ grep -v "include/config/auto.conf.cmd" |
+ grep -v "include/config/tristate.conf" |
xargs ls -lR | md5sum | cut -d ' ' -f1)"
if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi
@@ -82,7 +89,7 @@ find $cpio_dir -type f -print0 |
tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null
-echo "$src_files_md5" > kernel/kheaders.md5
+echo "$src_files_md5" > kernel/kheaders.md5
echo "$obj_files_md5" >> kernel/kheaders.md5
echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index f108a95882c6..14a625c16cb3 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Detect Hung Task
*
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 8fee06625c37..f92d9a687372 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
menu "IRQ subsystem"
# Options selectable by the architecture code
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 73288914ed5e..d42acaf81886 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
*
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index de6efdecc70d..0bfa10f4410c 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* jump label support
*
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 14934afa9e68..95a260f9214b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kallsyms.c: in-kernel printing of symbolic oopses and stack traces.
*
diff --git a/kernel/kheaders.c b/kernel/kheaders.c
index 70ae6052920d..8f69772af77b 100644
--- a/kernel/kheaders.c
+++ b/kernel/kheaders.c
@@ -8,9 +8,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/proc_fs.h>
+#include <linux/kobject.h>
#include <linux/init.h>
-#include <linux/uaccess.h>
/*
* Define kernel_headers_data and kernel_headers_data_end, within which the
@@ -31,39 +30,32 @@ extern char kernel_headers_data;
extern char kernel_headers_data_end;
static ssize_t
-ikheaders_read_current(struct file *file, char __user *buf,
- size_t len, loff_t *offset)
+ikheaders_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
{
- return simple_read_from_buffer(buf, len, offset,
- &kernel_headers_data,
- &kernel_headers_data_end -
- &kernel_headers_data);
+ memcpy(buf, &kernel_headers_data + off, len);
+ return len;
}
-static const struct file_operations ikheaders_file_ops = {
- .read = ikheaders_read_current,
- .llseek = default_llseek,
+static struct bin_attribute kheaders_attr __ro_after_init = {
+ .attr = {
+ .name = "kheaders.tar.xz",
+ .mode = 0444,
+ },
+ .read = &ikheaders_read,
};
static int __init ikheaders_init(void)
{
- struct proc_dir_entry *entry;
-
- /* create the current headers file */
- entry = proc_create("kheaders.tar.xz", S_IRUGO, NULL,
- &ikheaders_file_ops);
- if (!entry)
- return -ENOMEM;
-
- proc_set_size(entry,
- &kernel_headers_data_end -
- &kernel_headers_data);
- return 0;
+ kheaders_attr.size = (&kernel_headers_data_end -
+ &kernel_headers_data);
+ return sysfs_create_bin_file(kernel_kobj, &kheaders_attr);
}
static void __exit ikheaders_cleanup(void)
{
- remove_proc_entry("kheaders.tar.xz", NULL);
+ sysfs_remove_bin_file(kernel_kobj, &kheaders_attr);
}
module_init(ikheaders_init);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b1ea30a5540e..e4f14792ee86 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Kernel Probes (KProbes)
* kernel/kprobes.c
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
*
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
@@ -2289,6 +2276,7 @@ static int __init init_kprobes(void)
init_test_probes();
return err;
}
+postcore_initcall(init_kprobes);
#ifdef CONFIG_DEBUG_FS
static void report_probe(struct seq_file *pi, struct kprobe *p,
@@ -2583,36 +2571,21 @@ static const struct file_operations fops_kp = {
static int __init debugfs_kprobe_init(void)
{
- struct dentry *dir, *file;
+ struct dentry *dir;
unsigned int value = 1;
dir = debugfs_create_dir("kprobes", NULL);
- if (!dir)
- return -ENOMEM;
- file = debugfs_create_file("list", 0400, dir, NULL,
- &debugfs_kprobes_operations);
- if (!file)
- goto error;
+ debugfs_create_file("list", 0400, dir, NULL,
+ &debugfs_kprobes_operations);
- file = debugfs_create_file("enabled", 0600, dir,
- &value, &fops_kp);
- if (!file)
- goto error;
+ debugfs_create_file("enabled", 0600, dir, &value, &fops_kp);
- file = debugfs_create_file("blacklist", 0400, dir, NULL,
- &debugfs_kprobe_blacklist_ops);
- if (!file)
- goto error;
+ debugfs_create_file("blacklist", 0400, dir, NULL,
+ &debugfs_kprobe_blacklist_ops);
return 0;
-
-error:
- debugfs_remove(dir);
- return -ENOMEM;
}
late_initcall(debugfs_kprobe_init);
#endif /* CONFIG_DEBUG_FS */
-
-module_init(init_kprobes);
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 46ba853656f6..35859da8bd4f 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/ksysfs.c - sysfs attributes in /sys/kernel, which
* are not related to any other subsystem
*
* Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
- *
- * This file is release under the GPLv2
- *
*/
#include <linux/kobject.h>
diff --git a/kernel/kthread.c b/kernel/kthread.c
index be4e8795561a..621467c33fef 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Kernel thread helper functions.
* Copyright (C) 2004 IBM Corporation, Rusty Russell.
*
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index ec4565122e65..54102deb50ba 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config HAVE_LIVEPATCH
bool
help
diff --git a/kernel/livepatch/Makefile b/kernel/livepatch/Makefile
index b36ceda6488e..cf9b5bcdb952 100644
--- a/kernel/livepatch/Makefile
+++ b/kernel/livepatch/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_LIVEPATCH) += livepatch.o
livepatch-objs := core.o patch.o shadow.o transition.o
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 91cd519756d3..2398832947c6 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* core.c - Kernel Live Patching Core
*
* Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
* Copyright (C) 2014 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 99cb3ad05eb4..bd43537702bd 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* patch.c - livepatch patching functions
*
* Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
* Copyright (C) 2014 SUSE
* Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/kernel/livepatch/shadow.c b/kernel/livepatch/shadow.c
index 83958c814439..e5c9fb295ba9 100644
--- a/kernel/livepatch/shadow.c
+++ b/kernel/livepatch/shadow.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* shadow.c - Shadow Variables
*
* Copyright (C) 2014 Josh Poimboeuf <jpoimboe@redhat.com>
* Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
* Copyright (C) 2017 Joe Lawrence <joe.lawrence@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/**
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index c53370d596be..abb2a4a2cbb2 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* transition.c - Kernel Live Patching transition functions
*
* Copyright (C) 2015-2016 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
index feb1acc54611..46b71af8eef2 100644
--- a/kernel/locking/lock_events.h
+++ b/kernel/locking/lock_events.h
@@ -31,12 +31,50 @@ enum lock_events {
DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
/*
+ * The purpose of the lock event counting subsystem is to provide a low
+ * overhead way to record the number of specific locking events by using
+ * percpu counters. It is the percpu sum that matters, not specifically
+ * how many of them happens in each cpu.
+ *
+ * It is possible that the same percpu counter may be modified in both
+ * the process and interrupt contexts. For architectures that perform
+ * percpu operation with multiple instructions, it is possible to lose
+ * count if a process context percpu update is interrupted in the middle
+ * and the same counter is updated in the interrupt context. Therefore,
+ * the generated percpu sum may not be precise. The error, if any, should
+ * be small and insignificant.
+ *
+ * For those architectures that do multi-instruction percpu operation,
+ * preemption in the middle and moving the task to another cpu may cause
+ * a larger error in the count. Again, this will be few and far between.
+ * Given the imprecise nature of the count and the possibility of resetting
+ * the count and doing the measurement again, this is not really a big
+ * problem.
+ *
+ * To get a better picture of what is happening under the hood, it is
+ * suggested that a few measurements should be taken with the counts
+ * reset in between to stamp out outliner because of these possible
+ * error conditions.
+ *
+ * To minimize overhead, we use __this_cpu_*() in all cases except when
+ * CONFIG_DEBUG_PREEMPT is defined. In this particular case, this_cpu_*()
+ * will be used to avoid the appearance of unwanted BUG messages.
+ */
+#ifdef CONFIG_DEBUG_PREEMPT
+#define lockevent_percpu_inc(x) this_cpu_inc(x)
+#define lockevent_percpu_add(x, v) this_cpu_add(x, v)
+#else
+#define lockevent_percpu_inc(x) __this_cpu_inc(x)
+#define lockevent_percpu_add(x, v) __this_cpu_add(x, v)
+#endif
+
+/*
* Increment the PV qspinlock statistical counters
*/
static inline void __lockevent_inc(enum lock_events event, bool cond)
{
if (cond)
- __this_cpu_inc(lockevents[event]);
+ lockevent_percpu_inc(lockevents[event]);
}
#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
@@ -44,7 +82,7 @@ static inline void __lockevent_inc(enum lock_events event, bool cond)
static inline void __lockevent_add(enum lock_events event, int inc)
{
- __this_cpu_add(lockevents[event], inc);
+ lockevent_percpu_add(lockevents[event], inc);
}
#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d06190fa5082..8d32ae7768a7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/lockdep.c
*
@@ -731,7 +732,8 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
* Huh! same key, different name? Did someone trample
* on some memory? We're most confused.
*/
- WARN_ON_ONCE(class->name != lock->name);
+ WARN_ON_ONCE(class->name != lock->name &&
+ lock->key != &__lockdep_no_validate__);
return class;
}
}
@@ -2816,10 +2818,6 @@ static inline int validate_chain(struct task_struct *curr,
{
return 1;
}
-
-static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
-{
-}
#endif
/*
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index db578783dd36..0c601ae072b3 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/locking/mutex.c
*
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f17dad99eec8..b6a9cc62099a 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index c7471c3fb798..fe9ca92faa2a 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Queued read/write locks
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
* (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
*
* Authors: Waiman Long <waiman.long@hp.com>
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index e14b32c69639..2473f10c6956 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Queued spinlock
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2013-2014,2018 Red Hat, Inc.
* (C) Copyright 2015 Intel Corp.
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 54152670ff24..e625bb410aa2 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -1,13 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*
* Authors: Waiman Long <longman@redhat.com>
*/
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 978d63a8261c..38fbf9fa7f1b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* RT-Mutexes: simple blocking mutual exclusion locks with PI support
*
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 65a3b7e55b9f..3e82f449b4ff 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Module-based API test facility for ww_mutexes
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
*/
#include <linux/kernel.h>
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index d354341f8cc0..33783abc377b 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Module internals
*
* Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
*/
#include <linux/elf.h>
diff --git a/kernel/module.c b/kernel/module.c
index 6e6712b3aaf5..80c7c09584cf 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
Copyright (C) 2002 Richard Henderson
Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/export.h>
#include <linux/extable.h>
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index 6b9a926fd86b..b10fb1986ca9 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Module signature checker
*
* Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
*/
#include <linux/kernel.h>
diff --git a/kernel/notifier.c b/kernel/notifier.c
index bfc95b3e4235..d9f5081d578d 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kdebug.h>
#include <linux/kprobes.h>
#include <linux/export.h>
diff --git a/kernel/panic.c b/kernel/panic.c
index b4543a31a495..4d9f55bf7d38 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/panic.c
*
diff --git a/kernel/params.c b/kernel/params.c
index ce89f757e6da..cf448785d058 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Helpers for initial module or kernel cmdline parsing
Copyright (C) 2001 Rusty Russell.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/kernel/pid.c b/kernel/pid.c
index c3ac441e8543..39181ccca846 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic pidhash and scalable, time-bounded PID allocator
*
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index aa6e72fb7c08..6d726cef241c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Pid namespaces
*
@@ -325,7 +326,7 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
}
read_lock(&tasklist_lock);
- force_sig(SIGKILL, pid_ns->child_reaper);
+ send_sig(SIGKILL, pid_ns->child_reaper, 1);
read_unlock(&tasklist_lock);
do_exit(0);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 9bbaaab14b36..ff8592ddedee 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config SUSPEND
bool "Suspend to RAM and standby"
depends on ARCH_SUSPEND_POSSIBLE
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c8c272df7154..356bcc925833 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -129,7 +129,7 @@ static int hibernation_test(int level) { return 0; }
static int platform_begin(int platform_mode)
{
return (platform_mode && hibernation_ops) ?
- hibernation_ops->begin() : 0;
+ hibernation_ops->begin(PMSG_FREEZE) : 0;
}
/**
@@ -257,6 +257,11 @@ void swsusp_show_speed(ktime_t start, ktime_t stop,
(kps % 1000) / 10);
}
+__weak int arch_resume_nosmt(void)
+{
+ return 0;
+}
+
/**
* create_image - Create a hibernation image.
* @platform_mode: Whether or not to use the platform driver.
@@ -324,6 +329,10 @@ static int create_image(int platform_mode)
Enable_cpus:
suspend_enable_secondary_cpus();
+ /* Allow architectures to do nosmt-specific post-resume dances */
+ if (!in_suspend)
+ error = arch_resume_nosmt();
+
Platform_finish:
platform_finish(platform_mode);
@@ -542,7 +551,7 @@ int hibernation_platform_enter(void)
* hibernation_ops->finish() before saving the image, so we should let
* the firmware know that we're going to enter the sleep state after all
*/
- error = hibernation_ops->begin();
+ error = hibernation_ops->begin(PMSG_HIBERNATE);
if (error)
goto Close;
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 9d22131afc1e..33e3febaba53 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* This module exposes the interface to kernel space for specifying
* QoS dependencies. It provides infrastructure for registration of:
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index ef908c134b34..43d869db6c07 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -62,6 +62,12 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head);
enum s2idle_states __read_mostly s2idle_state;
static DEFINE_RAW_SPINLOCK(s2idle_lock);
+/**
+ * pm_suspend_via_s2idle - Check if suspend-to-idle is the default suspend.
+ *
+ * Return 'true' if suspend-to-idle has been selected as the default system
+ * suspend method.
+ */
bool pm_suspend_via_s2idle(void)
{
return mem_sleep_current == PM_SUSPEND_TO_IDLE;
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 4a2ffc39eb95..4d052fc6bcde 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-y = printk.o
obj-$(CONFIG_PRINTK) += printk_safe.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 0f1898820cba..c8e6ab689d42 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* internal.h - printk internal definitions
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/percpu.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a6e06fe38e41..1888f6a3b694 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/printk.c
*
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index 0913b4d385de..b4045e782743 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* printk_safe.c - Safe printk for printk-deadlock-prone contexts
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/preempt.h>
diff --git a/kernel/profile.c b/kernel/profile.c
index 9c08a2c7cb1d..af7c94bf5fa1 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/profile.c
* Simple profiling. Manages a direct-mapped profile hit count buffer,
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6f357f4fc859..5710d07e67cf 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/ptrace.c
*
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 37301430970e..480edf328b51 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# RCU-related configuration options
#
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 0ec7d1d33a14..5ec3ea4028e2 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# RCU-related debugging configuration options
#
diff --git a/kernel/reboot.c b/kernel/reboot.c
index b9e79e8c7226..c4d472b7f1b4 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/reboot.c
*
diff --git a/kernel/resource.c b/kernel/resource.c
index 8c15f846e8ef..158f04ec1d4f 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/resource.c
*
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 9424ee90589e..27c48eb7de40 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -277,7 +277,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
error:
sig = ksig ? ksig->sig : 0;
- force_sigsegv(sig, t);
+ force_sigsegv(sig);
}
#ifdef CONFIG_DEBUG_RSEQ
@@ -296,7 +296,7 @@ void rseq_syscall(struct pt_regs *regs)
return;
if (!access_ok(t->rseq, sizeof(*t->rseq)) ||
rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
- force_sig(SIGSEGV, t);
+ force_sig(SIGSEGV);
}
#endif
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index e3e3b979f9bd..1152259a4ca0 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* sched_clock() for unstable CPU clocks
*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 102dfcf0a29a..874c427742a9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/sched/core.c
*
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ba4a143bdcf3..2305ce89a26c 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Simple CPU accounting cgroup controller
*/
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index f5516bae0c1b..80940939b733 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic entry points for the idle threads and
* implementation of the idle task scheduling class.
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 687302051a27..123ea07a3f3b 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Housekeeping management. Manage the targets for routine code that can run on
* any CPU: unbound workqueues, timers, kthreads and any offloadable work.
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 3cd8a3a795d2..aa8d75804108 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*
* membarrier system call
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include "sched.h"
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 6eb1f8efd221..fa0f9adfb752 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic waiting primitives.
*
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index c67c6d24adc2..45eba18a2898 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* The implementation of the wait_bit*() and related waiting APIs:
*/
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 811b4a86cdf6..dba52a7db5e8 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -609,7 +609,7 @@ static void seccomp_send_sigsys(int syscall, int reason)
{
struct kernel_siginfo info;
seccomp_init_siginfo(&info, syscall, reason);
- force_sig_info(SIGSYS, &info, current);
+ force_sig_info(&info);
}
#endif /* CONFIG_SECCOMP_FILTER */
diff --git a/kernel/signal.c b/kernel/signal.c
index 1c86b78a7597..b477e21ecafc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/signal.c
*
@@ -44,6 +45,7 @@
#include <linux/posix-timers.h>
#include <linux/livepatch.h>
#include <linux/cgroup.h>
+#include <linux/audit.h>
#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>
@@ -53,7 +55,6 @@
#include <asm/unistd.h>
#include <asm/siginfo.h>
#include <asm/cacheflush.h>
-#include "audit.h" /* audit_signal_info() */
/*
* SLAB caches for signal bits.
@@ -1056,29 +1057,8 @@ static inline bool legacy_queue(struct sigpending *signals, int sig)
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
-#ifdef CONFIG_USER_NS
-static inline void userns_fixup_signal_uid(struct kernel_siginfo *info, struct task_struct *t)
-{
- if (current_user_ns() == task_cred_xxx(t, user_ns))
- return;
-
- if (SI_FROMKERNEL(info))
- return;
-
- rcu_read_lock();
- info->si_uid = from_kuid_munged(task_cred_xxx(t, user_ns),
- make_kuid(current_user_ns(), info->si_uid));
- rcu_read_unlock();
-}
-#else
-static inline void userns_fixup_signal_uid(struct kernel_siginfo *info, struct task_struct *t)
-{
- return;
-}
-#endif
-
static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
- enum pid_type type, int from_ancestor_ns)
+ enum pid_type type, bool force)
{
struct sigpending *pending;
struct sigqueue *q;
@@ -1088,8 +1068,7 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
assert_spin_locked(&t->sighand->siglock);
result = TRACE_SIGNAL_IGNORED;
- if (!prepare_signal(sig, t,
- from_ancestor_ns || (info == SEND_SIG_PRIV)))
+ if (!prepare_signal(sig, t, force))
goto ret;
pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
@@ -1134,7 +1113,11 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
q->info.si_code = SI_USER;
q->info.si_pid = task_tgid_nr_ns(current,
task_active_pid_ns(t));
- q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+ rcu_read_lock();
+ q->info.si_uid =
+ from_kuid_munged(task_cred_xxx(t, user_ns),
+ current_uid());
+ rcu_read_unlock();
break;
case (unsigned long) SEND_SIG_PRIV:
clear_siginfo(&q->info);
@@ -1146,30 +1129,24 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
break;
default:
copy_siginfo(&q->info, info);
- if (from_ancestor_ns)
- q->info.si_pid = 0;
break;
}
-
- userns_fixup_signal_uid(&q->info, t);
-
- } else if (!is_si_special(info)) {
- if (sig >= SIGRTMIN && info->si_code != SI_USER) {
- /*
- * Queue overflow, abort. We may abort if the
- * signal was rt and sent by user using something
- * other than kill().
- */
- result = TRACE_SIGNAL_OVERFLOW_FAIL;
- ret = -EAGAIN;
- goto ret;
- } else {
- /*
- * This is a silent loss of information. We still
- * send the signal, but the *info bits are lost.
- */
- result = TRACE_SIGNAL_LOSE_INFO;
- }
+ } else if (!is_si_special(info) &&
+ sig >= SIGRTMIN && info->si_code != SI_USER) {
+ /*
+ * Queue overflow, abort. We may abort if the
+ * signal was rt and sent by user using something
+ * other than kill().
+ */
+ result = TRACE_SIGNAL_OVERFLOW_FAIL;
+ ret = -EAGAIN;
+ goto ret;
+ } else {
+ /*
+ * This is a silent loss of information. We still
+ * send the signal, but the *info bits are lost.
+ */
+ result = TRACE_SIGNAL_LOSE_INFO;
}
out_set:
@@ -1196,17 +1173,62 @@ ret:
return ret;
}
+static inline bool has_si_pid_and_uid(struct kernel_siginfo *info)
+{
+ bool ret = false;
+ switch (siginfo_layout(info->si_signo, info->si_code)) {
+ case SIL_KILL:
+ case SIL_CHLD:
+ case SIL_RT:
+ ret = true;
+ break;
+ case SIL_TIMER:
+ case SIL_POLL:
+ case SIL_FAULT:
+ case SIL_FAULT_MCEERR:
+ case SIL_FAULT_BNDERR:
+ case SIL_FAULT_PKUERR:
+ case SIL_SYS:
+ ret = false;
+ break;
+ }
+ return ret;
+}
+
static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
enum pid_type type)
{
- int from_ancestor_ns = 0;
+ /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */
+ bool force = false;
-#ifdef CONFIG_PID_NS
- from_ancestor_ns = si_fromuser(info) &&
- !task_pid_nr_ns(current, task_active_pid_ns(t));
-#endif
+ if (info == SEND_SIG_NOINFO) {
+ /* Force if sent from an ancestor pid namespace */
+ force = !task_pid_nr_ns(current, task_active_pid_ns(t));
+ } else if (info == SEND_SIG_PRIV) {
+ /* Don't ignore kernel generated signals */
+ force = true;
+ } else if (has_si_pid_and_uid(info)) {
+ /* SIGKILL and SIGSTOP is special or has ids */
+ struct user_namespace *t_user_ns;
+
+ rcu_read_lock();
+ t_user_ns = task_cred_xxx(t, user_ns);
+ if (current_user_ns() != t_user_ns) {
+ kuid_t uid = make_kuid(current_user_ns(), info->si_uid);
+ info->si_uid = from_kuid_munged(t_user_ns, uid);
+ }
+ rcu_read_unlock();
- return __send_signal(sig, info, t, type, from_ancestor_ns);
+ /* A kernel generated signal? */
+ force = (info->si_code == SI_KERNEL);
+
+ /* From an ancestor pid namespace? */
+ if (!task_pid_nr_ns(current, task_active_pid_ns(t))) {
+ info->si_pid = 0;
+ force = true;
+ }
+ }
+ return __send_signal(sig, info, t, type, force);
}
static void print_fatal_signal(int signr)
@@ -1273,12 +1295,13 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p
* We don't want to have recursive SIGSEGV's etc, for example,
* that is why we also clear SIGNAL_UNKILLABLE.
*/
-int
-force_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *t)
+static int
+force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t)
{
unsigned long int flags;
int ret, blocked, ignored;
struct k_sigaction *action;
+ int sig = info->si_signo;
spin_lock_irqsave(&t->sighand->siglock, flags);
action = &t->sighand->action[sig-1];
@@ -1303,6 +1326,11 @@ force_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *t)
return ret;
}
+int force_sig_info(struct kernel_siginfo *info)
+{
+ return force_sig_info_to_task(info, current);
+}
+
/*
* Nuke all other threads in the group.
*/
@@ -1439,13 +1467,44 @@ static inline bool kill_as_cred_perm(const struct cred *cred,
uid_eq(cred->uid, pcred->uid);
}
-/* like kill_pid_info(), but doesn't use uid/euid of "current" */
-int kill_pid_info_as_cred(int sig, struct kernel_siginfo *info, struct pid *pid,
- const struct cred *cred)
+/*
+ * The usb asyncio usage of siginfo is wrong. The glibc support
+ * for asyncio which uses SI_ASYNCIO assumes the layout is SIL_RT.
+ * AKA after the generic fields:
+ * kernel_pid_t si_pid;
+ * kernel_uid32_t si_uid;
+ * sigval_t si_value;
+ *
+ * Unfortunately when usb generates SI_ASYNCIO it assumes the layout
+ * after the generic fields is:
+ * void __user *si_addr;
+ *
+ * This is a practical problem when there is a 64bit big endian kernel
+ * and a 32bit userspace. As the 32bit address will encoded in the low
+ * 32bits of the pointer. Those low 32bits will be stored at higher
+ * address than appear in a 32 bit pointer. So userspace will not
+ * see the address it was expecting for it's completions.
+ *
+ * There is nothing in the encoding that can allow
+ * copy_siginfo_to_user32 to detect this confusion of formats, so
+ * handle this by requiring the caller of kill_pid_usb_asyncio to
+ * notice when this situration takes place and to store the 32bit
+ * pointer in sival_int, instead of sival_addr of the sigval_t addr
+ * parameter.
+ */
+int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr,
+ struct pid *pid, const struct cred *cred)
{
- int ret = -EINVAL;
+ struct kernel_siginfo info;
struct task_struct *p;
unsigned long flags;
+ int ret = -EINVAL;
+
+ clear_siginfo(&info);
+ info.si_signo = sig;
+ info.si_errno = errno;
+ info.si_code = SI_ASYNCIO;
+ *((sigval_t *)&info.si_pid) = addr;
if (!valid_signal(sig))
return ret;
@@ -1456,17 +1515,17 @@ int kill_pid_info_as_cred(int sig, struct kernel_siginfo *info, struct pid *pid,
ret = -ESRCH;
goto out_unlock;
}
- if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) {
+ if (!kill_as_cred_perm(cred, p)) {
ret = -EPERM;
goto out_unlock;
}
- ret = security_task_kill(p, info, sig, cred);
+ ret = security_task_kill(p, &info, sig, cred);
if (ret)
goto out_unlock;
if (sig) {
if (lock_task_sighand(p, &flags)) {
- ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0);
+ ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false);
unlock_task_sighand(p, &flags);
} else
ret = -ESRCH;
@@ -1475,7 +1534,7 @@ out_unlock:
rcu_read_unlock();
return ret;
}
-EXPORT_SYMBOL_GPL(kill_pid_info_as_cred);
+EXPORT_SYMBOL_GPL(kill_pid_usb_asyncio);
/*
* kill_something_info() interprets pid in interesting ways just like kill(2).
@@ -1551,9 +1610,17 @@ send_sig(int sig, struct task_struct *p, int priv)
}
EXPORT_SYMBOL(send_sig);
-void force_sig(int sig, struct task_struct *p)
+void force_sig(int sig)
{
- force_sig_info(sig, SEND_SIG_PRIV, p);
+ struct kernel_siginfo info;
+
+ clear_siginfo(&info);
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_KERNEL;
+ info.si_pid = 0;
+ info.si_uid = 0;
+ force_sig_info(&info);
}
EXPORT_SYMBOL(force_sig);
@@ -1563,18 +1630,20 @@ EXPORT_SYMBOL(force_sig);
* the problem was already a SIGSEGV, we'll want to
* make sure we don't even try to deliver the signal..
*/
-void force_sigsegv(int sig, struct task_struct *p)
+void force_sigsegv(int sig)
{
+ struct task_struct *p = current;
+
if (sig == SIGSEGV) {
unsigned long flags;
spin_lock_irqsave(&p->sighand->siglock, flags);
p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
- force_sig(SIGSEGV, p);
+ force_sig(SIGSEGV);
}
-int force_sig_fault(int sig, int code, void __user *addr
+int force_sig_fault_to_task(int sig, int code, void __user *addr
___ARCH_SI_TRAPNO(int trapno)
___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
, struct task_struct *t)
@@ -1594,7 +1663,16 @@ int force_sig_fault(int sig, int code, void __user *addr
info.si_flags = flags;
info.si_isr = isr;
#endif
- return force_sig_info(info.si_signo, &info, t);
+ return force_sig_info_to_task(&info, t);
+}
+
+int force_sig_fault(int sig, int code, void __user *addr
+ ___ARCH_SI_TRAPNO(int trapno)
+ ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr))
+{
+ return force_sig_fault_to_task(sig, code, addr
+ ___ARCH_SI_TRAPNO(trapno)
+ ___ARCH_SI_IA64(imm, flags, isr), current);
}
int send_sig_fault(int sig, int code, void __user *addr
@@ -1620,7 +1698,7 @@ int send_sig_fault(int sig, int code, void __user *addr
return send_sig_info(info.si_signo, &info, t);
}
-int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
+int force_sig_mceerr(int code, void __user *addr, short lsb)
{
struct kernel_siginfo info;
@@ -1631,7 +1709,7 @@ int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct
info.si_code = code;
info.si_addr = addr;
info.si_addr_lsb = lsb;
- return force_sig_info(info.si_signo, &info, t);
+ return force_sig_info(&info);
}
int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
@@ -1660,7 +1738,7 @@ int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
info.si_addr = addr;
info.si_lower = lower;
info.si_upper = upper;
- return force_sig_info(info.si_signo, &info, current);
+ return force_sig_info(&info);
}
#ifdef SEGV_PKUERR
@@ -1674,7 +1752,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey)
info.si_code = SEGV_PKUERR;
info.si_addr = addr;
info.si_pkey = pkey;
- return force_sig_info(info.si_signo, &info, current);
+ return force_sig_info(&info);
}
#endif
@@ -1690,7 +1768,7 @@ int force_sig_ptrace_errno_trap(int errno, void __user *addr)
info.si_errno = errno;
info.si_code = TRAP_HWBKPT;
info.si_addr = addr;
- return force_sig_info(info.si_signo, &info, current);
+ return force_sig_info(&info);
}
int kill_pgrp(struct pid *pid, int sig, int priv)
@@ -2495,6 +2573,8 @@ relock:
if (signal_group_exit(signal)) {
ksig->info.si_signo = signr = SIGKILL;
sigdelset(&current->pending.signal, SIGKILL);
+ trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
+ &sighand->action[SIGKILL - 1]);
recalc_sigpending();
goto fatal;
}
@@ -2684,7 +2764,7 @@ static void signal_delivered(struct ksignal *ksig, int stepping)
void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
{
if (failed)
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
else
signal_delivered(ksig, stepping);
}
@@ -4485,6 +4565,28 @@ static inline void siginfo_buildtime_checks(void)
CHECK_OFFSET(si_syscall);
CHECK_OFFSET(si_arch);
#undef CHECK_OFFSET
+
+ /* usb asyncio */
+ BUILD_BUG_ON(offsetof(struct siginfo, si_pid) !=
+ offsetof(struct siginfo, si_addr));
+ if (sizeof(int) == sizeof(void __user *)) {
+ BUILD_BUG_ON(sizeof_field(struct siginfo, si_pid) !=
+ sizeof(void __user *));
+ } else {
+ BUILD_BUG_ON((sizeof_field(struct siginfo, si_pid) +
+ sizeof_field(struct siginfo, si_uid)) !=
+ sizeof(void __user *));
+ BUILD_BUG_ON(offsetofend(struct siginfo, si_pid) !=
+ offsetof(struct siginfo, si_uid));
+ }
+#ifdef CONFIG_COMPAT
+ BUILD_BUG_ON(offsetof(struct compat_siginfo, si_pid) !=
+ offsetof(struct compat_siginfo, si_addr));
+ BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
+ sizeof(compat_uptr_t));
+ BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
+ sizeof_field(struct siginfo, si_pid));
+#endif
}
void __init signals_init(void)
diff --git a/kernel/smp.c b/kernel/smp.c
index f4cf1b0bb3b8..d155374632eb 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic helpers for smp ipi calls
*
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index c230c2dd48e1..2efe1e206167 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Common SMP CPU bringup/teardown functions
*/
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 27bafc1e271e..36139de0a3c4 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/stacktrace.c
*
@@ -206,7 +207,7 @@ int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
ret = arch_stack_walk_reliable(consume_entry, &c, tsk);
put_task_stack(tsk);
- return ret;
+ return ret ? ret : c.len;
}
#endif
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 7231fb5953fc..2b5a6754646f 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* kernel/stop_machine.c
*
@@ -5,8 +6,6 @@
* Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au
* Copyright (C) 2010 SUSE Linux Products GmbH
* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
- *
- * This file is released under the GPLv2 and any later version.
*/
#include <linux/completion.h>
#include <linux/cpu.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index bdbfe8d37418..2969304c29fe 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1882,13 +1882,14 @@ exit_err:
}
/*
+ * Check arithmetic relations of passed addresses.
+ *
* WARNING: we don't require any capability here so be very careful
* in what is allowed for modification from userspace.
*/
-static int validate_prctl_map(struct prctl_mm_map *prctl_map)
+static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map)
{
unsigned long mmap_max_addr = TASK_SIZE;
- struct mm_struct *mm = current->mm;
int error = -EINVAL, i;
static const unsigned char offsets[] = {
@@ -1949,24 +1950,6 @@ static int validate_prctl_map(struct prctl_mm_map *prctl_map)
prctl_map->start_data))
goto out;
- /*
- * Someone is trying to cheat the auxv vector.
- */
- if (prctl_map->auxv_size) {
- if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
- goto out;
- }
-
- /*
- * Finally, make sure the caller has the rights to
- * change /proc/pid/exe link: only local sys admin should
- * be allowed to.
- */
- if (prctl_map->exe_fd != (u32)-1) {
- if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
- goto out;
- }
-
error = 0;
out:
return error;
@@ -1993,11 +1976,18 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
return -EFAULT;
- error = validate_prctl_map(&prctl_map);
+ error = validate_prctl_map_addr(&prctl_map);
if (error)
return error;
if (prctl_map.auxv_size) {
+ /*
+ * Someone is trying to cheat the auxv vector.
+ */
+ if (!prctl_map.auxv ||
+ prctl_map.auxv_size > sizeof(mm->saved_auxv))
+ return -EINVAL;
+
memset(user_auxv, 0, sizeof(user_auxv));
if (copy_from_user(user_auxv,
(const void __user *)prctl_map.auxv,
@@ -2010,6 +2000,14 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
}
if (prctl_map.exe_fd != (u32)-1) {
+ /*
+ * Make sure the caller has the rights to
+ * change /proc/pid/exe link: only local sys admin should
+ * be allowed to.
+ */
+ if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ return -EINVAL;
+
error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
if (error)
return error;
@@ -2097,7 +2095,11 @@ static int prctl_set_mm(int opt, unsigned long addr,
unsigned long arg4, unsigned long arg5)
{
struct mm_struct *mm = current->mm;
- struct prctl_mm_map prctl_map;
+ struct prctl_mm_map prctl_map = {
+ .auxv = NULL,
+ .auxv_size = 0,
+ .exe_fd = -1,
+ };
struct vm_area_struct *vma;
int error;
@@ -2125,9 +2127,15 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = -EINVAL;
- down_write(&mm->mmap_sem);
+ /*
+ * arg_lock protects concurent updates of arg boundaries, we need
+ * mmap_sem for a) concurrent sys_brk, b) finding VMA for addr
+ * validation.
+ */
+ down_read(&mm->mmap_sem);
vma = find_vma(mm, addr);
+ spin_lock(&mm->arg_lock);
prctl_map.start_code = mm->start_code;
prctl_map.end_code = mm->end_code;
prctl_map.start_data = mm->start_data;
@@ -2139,9 +2147,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
prctl_map.arg_end = mm->arg_end;
prctl_map.env_start = mm->env_start;
prctl_map.env_end = mm->env_end;
- prctl_map.auxv = NULL;
- prctl_map.auxv_size = 0;
- prctl_map.exe_fd = -1;
switch (opt) {
case PR_SET_MM_START_CODE:
@@ -2181,7 +2186,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
goto out;
}
- error = validate_prctl_map(&prctl_map);
+ error = validate_prctl_map_addr(&prctl_map);
if (error)
goto out;
@@ -2218,7 +2223,8 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = 0;
out:
- up_write(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
+ up_read(&mm->mmap_sem);
return error;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 943c89178e3d..7d1008be6173 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* sysctl.c: General linux system control interface
*
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 5f852b8f59f7..13a0f2e6ebc2 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* taskstats.c - Export per-task statistics to userland
*
* Copyright (C) Shailabh Nagar, IBM Corp. 2006
* (C) Balbir Singh, IBM Corp. 2006
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/kernel.h>
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 7bca480151b0..76c997fdbc9d 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* test_kprobes.c - simple sanity test for *probes
*
* Copyright IBM Corp. 2008
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#define pr_fmt(fmt) "Kprobe smoke test: " fmt
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e2c038d6c13c..fcc42353f125 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Timer subsystem related configuration options
#
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d965cef6c77..98da8998c25c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Architectures that offer an FUNCTION_TRACER implementation should
# select HAVE_FUNCTION_TRACER:
@@ -596,9 +597,19 @@ config FTRACE_STARTUP_TEST
functioning properly. It will do tests on all the configured
tracers of ftrace.
+config EVENT_TRACE_STARTUP_TEST
+ bool "Run selftest on trace events"
+ depends on FTRACE_STARTUP_TEST
+ default y
+ help
+ This option performs a test on all trace events in the system.
+ It basically just enables each event and runs some code that
+ will trigger events (not necessarily the event it enables)
+ This may take some time run as there are a lot of events.
+
config EVENT_TRACE_TEST_SYSCALLS
bool "Run selftest on syscall events"
- depends on FTRACE_STARTUP_TEST
+ depends on EVENT_TRACE_STARTUP_TEST
help
This option will also enable testing every syscall event.
It only enables the event and disables it and runs various loads
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e1c6d79fb4cc..2d6e93ab0478 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -512,8 +512,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
dir = debugfs_lookup(buts->name, blk_debugfs_root);
if (!dir)
bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
- if (!dir)
- goto err;
bt->dev = dev;
atomic_set(&bt->dropped, 0);
@@ -522,12 +520,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
ret = -EIO;
bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
&blk_dropped_fops);
- if (!bt->dropped_file)
- goto err;
bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
- if (!bt->msg_file)
- goto err;
bt->rchan = relay_open("trace", dir, buts->buf_size,
buts->buf_nr, &blk_relay_callbacks, bt);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b496ffdf5f36..3994a231eb92 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -19,6 +19,9 @@
#include "trace_probe.h"
#include "trace.h"
+#define bpf_event_rcu_dereference(p) \
+ rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
+
#ifdef CONFIG_MODULES
struct bpf_trace_module {
struct module *module;
@@ -567,6 +570,69 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
.arg3_type = ARG_ANYTHING,
};
+struct send_signal_irq_work {
+ struct irq_work irq_work;
+ struct task_struct *task;
+ u32 sig;
+};
+
+static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
+
+static void do_bpf_send_signal(struct irq_work *entry)
+{
+ struct send_signal_irq_work *work;
+
+ work = container_of(entry, struct send_signal_irq_work, irq_work);
+ group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID);
+}
+
+BPF_CALL_1(bpf_send_signal, u32, sig)
+{
+ struct send_signal_irq_work *work = NULL;
+
+ /* Similar to bpf_probe_write_user, task needs to be
+ * in a sound condition and kernel memory access be
+ * permitted in order to send signal to the current
+ * task.
+ */
+ if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
+ return -EPERM;
+ if (unlikely(uaccess_kernel()))
+ return -EPERM;
+ if (unlikely(!nmi_uaccess_okay()))
+ return -EPERM;
+
+ if (in_nmi()) {
+ /* Do an early check on signal validity. Otherwise,
+ * the error is lost in deferred irq_work.
+ */
+ if (unlikely(!valid_signal(sig)))
+ return -EINVAL;
+
+ work = this_cpu_ptr(&send_signal_work);
+ if (work->irq_work.flags & IRQ_WORK_BUSY)
+ return -EBUSY;
+
+ /* Add the current task, which is the target of sending signal,
+ * to the irq_work. The current task may change when queued
+ * irq works get executed.
+ */
+ work->task = current;
+ work->sig = sig;
+ irq_work_queue(&work->irq_work);
+ return 0;
+ }
+
+ return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID);
+}
+
+static const struct bpf_func_proto bpf_send_signal_proto = {
+ .func = bpf_send_signal,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -617,6 +683,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_send_signal:
+ return &bpf_send_signal_proto;
default:
return NULL;
}
@@ -1034,7 +1102,7 @@ static DEFINE_MUTEX(bpf_event_mutex);
int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog *prog)
{
- struct bpf_prog_array __rcu *old_array;
+ struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret = -EEXIST;
@@ -1052,7 +1120,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
if (event->prog)
goto unlock;
- old_array = event->tp_event->prog_array;
+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
if (old_array &&
bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
ret = -E2BIG;
@@ -1075,7 +1143,7 @@ unlock:
void perf_event_detach_bpf_prog(struct perf_event *event)
{
- struct bpf_prog_array __rcu *old_array;
+ struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret;
@@ -1084,7 +1152,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
if (!event->prog)
goto unlock;
- old_array = event->tp_event->prog_array;
+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
if (ret == -ENOENT)
goto unlock;
@@ -1106,6 +1174,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
{
struct perf_event_query_bpf __user *uquery = info;
struct perf_event_query_bpf query = {};
+ struct bpf_prog_array *progs;
u32 *ids, prog_cnt, ids_len;
int ret;
@@ -1130,10 +1199,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
*/
mutex_lock(&bpf_event_mutex);
- ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
- ids,
- ids_len,
- &prog_cnt);
+ progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
mutex_unlock(&bpf_event_mutex);
if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
@@ -1297,7 +1364,8 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
}
#ifdef CONFIG_MODULES
-int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module)
+static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
+ void *module)
{
struct bpf_trace_module *btm, *tmp;
struct module *mod = module;
@@ -1336,11 +1404,24 @@ static struct notifier_block bpf_module_nb = {
.notifier_call = bpf_event_notify,
};
-int __init bpf_event_init(void)
+static int __init bpf_event_init(void)
{
register_module_notifier(&bpf_module_nb);
return 0;
}
+static int __init send_signal_irq_work_init(void)
+{
+ int cpu;
+ struct send_signal_irq_work *work;
+
+ for_each_possible_cpu(cpu) {
+ work = per_cpu_ptr(&send_signal_work, cpu);
+ init_irq_work(&work->irq_work, do_bpf_send_signal);
+ }
+ return 0;
+}
+
fs_initcall(bpf_event_init);
+subsys_initcall(send_signal_irq_work_init);
#endif /* CONFIG_MODULES */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a12aff849c04..5c3eadb143ed 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1622,6 +1622,11 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
return keep_regs;
}
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
+static struct ftrace_ops *
+ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
+
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1750,15 +1755,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
}
/*
- * If the rec had TRAMP enabled, then it needs to
- * be cleared. As TRAMP can only be enabled iff
- * there is only a single ops attached to it.
- * In otherwords, always disable it on decrementing.
- * In the future, we may set it if rec count is
- * decremented to one, and the ops that is left
- * has a trampoline.
+ * The TRAMP needs to be set only if rec count
+ * is decremented to one, and the ops that is
+ * left has a trampoline. As TRAMP can only be
+ * enabled if there is only a single ops attached
+ * to it.
*/
- rec->flags &= ~FTRACE_FL_TRAMP;
+ if (ftrace_rec_count(rec) == 1 &&
+ ftrace_find_tramp_ops_any(rec))
+ rec->flags |= FTRACE_FL_TRAMP;
+ else
+ rec->flags &= ~FTRACE_FL_TRAMP;
/*
* flags will be cleared in ftrace_check_record()
@@ -1768,7 +1775,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
count++;
/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
- update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
+ update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE;
/* Shortcut, if we handled all records, we are done. */
if (!all && count == hash->count)
@@ -1951,11 +1958,6 @@ static void print_ip_ins(const char *fmt, const unsigned char *p)
printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
}
-static struct ftrace_ops *
-ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
-static struct ftrace_ops *
-ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
-
enum ftrace_bug_type ftrace_bug_type;
const void *ftrace_expected;
@@ -2047,7 +2049,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec)
}
}
-static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
+static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
{
unsigned long flag = 0UL;
@@ -2146,28 +2148,28 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
/**
* ftrace_update_record, set a record that now is tracing or not
* @rec: the record to update
- * @enable: set to 1 if the record is tracing, zero to force disable
+ * @enable: set to true if the record is tracing, false to force disable
*
* The records that represent all functions that can be traced need
* to be updated when tracing has been enabled.
*/
-int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+int ftrace_update_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 1);
+ return ftrace_check_record(rec, enable, true);
}
/**
* ftrace_test_record, check if the record has been enabled or not
* @rec: the record to test
- * @enable: set to 1 to check if enabled, 0 if it is disabled
+ * @enable: set to true to check if enabled, false if it is disabled
*
* The arch code may need to test if a record is already set to
* tracing to determine how to modify the function code that it
* represents.
*/
-int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+int ftrace_test_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 0);
+ return ftrace_check_record(rec, enable, false);
}
static struct ftrace_ops *
@@ -2356,7 +2358,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
}
static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
@@ -2395,7 +2397,7 @@ void __weak ftrace_replace_code(int mod_flags)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
+ bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL;
int failed;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2c92b3d9ea30..a092e40225a7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -366,7 +366,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
}
/**
- * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
+ * trace_filter_add_remove_task - Add or remove a task from a pid_list
* @pid_list: The list to modify
* @self: The current task for fork or NULL for exit
* @task: The task to add or remove
@@ -4842,12 +4842,13 @@ static const char readme_msg[] =
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
- "\t $stack<index>, $stack, $retval, $comm, $arg<N>\n"
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#else
- "\t $stack<index>, $stack, $retval, $comm\n"
+ "\t $stack<index>, $stack, $retval, $comm,\n"
#endif
+ "\t +|-[u]<offset>(<fetcharg>)\n"
"\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
- "\t b<bit-width>@<bit-offset>/<container-size>,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
"\t <type>\\[<array-size>\\]\n"
#ifdef CONFIG_HIST_TRIGGERS
"\t field: <stype> <name>;\n"
@@ -8604,10 +8605,6 @@ struct dentry *tracing_init_dentry(void)
*/
tr->dir = debugfs_create_automount("tracing", NULL,
trace_automount, NULL);
- if (!tr->dir) {
- pr_warn_once("Could not create debugfs directory 'tracing'\n");
- return ERR_PTR(-ENOMEM);
- }
return NULL;
}
@@ -8910,12 +8907,8 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
cnt++;
- /* reset all but tr, trace, and overruns */
- memset(&iter.seq, 0,
- sizeof(struct trace_iterator) -
- offsetof(struct trace_iterator, seq));
+ trace_iterator_reset(&iter);
iter.iter_flags |= TRACE_FILE_LAT_FMT;
- iter.pos = -1;
if (trace_find_next_entry_inc(&iter) != NULL) {
int ret;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1974ce818ddb..005f08629b8b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -15,7 +15,6 @@
#include <linux/trace_seq.h>
#include <linux/trace_events.h>
#include <linux/compiler.h>
-#include <linux/trace_seq.h>
#include <linux/glob.h>
#ifdef CONFIG_FTRACE_SYSCALLS
@@ -1967,4 +1966,22 @@ static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { }
extern struct trace_iterator *tracepoint_print_iter;
+/*
+ * Reset the state of the trace_iterator so that it can read consumed data.
+ * Normally, the trace_iterator is used for reading the data when it is not
+ * consumed, and must retain state.
+ */
+static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
+{
+ const size_t offset = offsetof(struct trace_iterator, seq);
+
+ /*
+ * Keep gcc from complaining about overwriting more than just one
+ * member in the structure.
+ */
+ memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset);
+
+ iter->pos = -1;
+}
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0ce3db67f556..edc72f3b080c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3190,7 +3190,7 @@ void __init trace_event_init(void)
event_trace_enable();
}
-#ifdef CONFIG_FTRACE_STARTUP_TEST
+#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
static DEFINE_SPINLOCK(test_spinlock);
static DEFINE_SPINLOCK(test_spinlock_irq);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d3e59312ef40..5079d1db3754 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -428,7 +428,7 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
op_stack = kmalloc_array(nr_parens, sizeof(*op_stack), GFP_KERNEL);
if (!op_stack)
return ERR_PTR(-ENOMEM);
- prog_stack = kmalloc_array(nr_preds, sizeof(*prog_stack), GFP_KERNEL);
+ prog_stack = kcalloc(nr_preds, sizeof(*prog_stack), GFP_KERNEL);
if (!prog_stack) {
parse_error(pe, -ENOMEM, 0);
goto out_free;
@@ -579,7 +579,11 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
out_free:
kfree(op_stack);
kfree(inverts);
- kfree(prog_stack);
+ if (prog_stack) {
+ for (i = 0; prog_stack[i].pred; i++)
+ kfree(prog_stack[i].pred);
+ kfree(prog_stack);
+ }
return ERR_PTR(ret);
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 7fca3457c705..ca6b0dff60c5 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -59,7 +59,7 @@
C(NO_CLOSING_PAREN, "No closing paren found"), \
C(SUBSYS_NOT_FOUND, "Missing subsystem"), \
C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \
- C(INVALID_REF_KEY, "Using variable references as keys not supported"), \
+ C(INVALID_REF_KEY, "Using variable references in keys not supported"), \
C(VAR_NOT_FOUND, "Couldn't find variable"), \
C(FIELD_NOT_FOUND, "Couldn't find field"),
@@ -1854,6 +1854,9 @@ static u64 hist_field_var_ref(struct hist_field *hist_field,
struct hist_elt_data *elt_data;
u64 var_val = 0;
+ if (WARN_ON_ONCE(!elt))
+ return var_val;
+
elt_data = elt->private_data;
var_val = elt_data->var_ref_vals[hist_field->var_ref_idx];
@@ -3582,14 +3585,20 @@ static bool cond_snapshot_update(struct trace_array *tr, void *cond_data)
struct track_data *track_data = tr->cond_snapshot->cond_data;
struct hist_elt_data *elt_data, *track_elt_data;
struct snapshot_context *context = cond_data;
+ struct action_data *action;
u64 track_val;
if (!track_data)
return false;
+ action = track_data->action_data;
+
track_val = get_track_val(track_data->hist_data, context->elt,
track_data->action_data);
+ if (!action->track_data.check_val(track_data->track_val, track_val))
+ return false;
+
track_data->track_val = track_val;
memcpy(track_data->key, context->key, track_data->key_len);
@@ -4503,7 +4512,7 @@ static int create_key_field(struct hist_trigger_data *hist_data,
goto out;
}
- if (hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+ if (field_has_hist_vars(hist_field, 0)) {
hist_err(tr, HIST_ERR_INVALID_REF_KEY, errpos(field_str));
destroy_hist_field(hist_field, 0);
ret = -EINVAL;
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 6c1ae6b752d1..cca65044c14c 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -37,12 +37,8 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file)
if (skip_entries)
kdb_printf("(skipping %d entries)\n", skip_entries);
- /* reset all but tr, trace, and overruns */
- memset(&iter.seq, 0,
- sizeof(struct trace_iterator) -
- offsetof(struct trace_iterator, seq));
+ trace_iterator_reset(&iter);
iter.iter_flags |= TRACE_FILE_LAT_FMT;
- iter.pos = -1;
if (cpu_file == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7d736248a070..7958da2fd922 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -12,6 +12,8 @@
#include <linux/rculist.h>
#include <linux/error-injection.h>
+#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
+
#include "trace_dynevent.h"
#include "trace_kprobe_selftest.h"
#include "trace_probe.h"
@@ -19,6 +21,18 @@
#define KPROBE_EVENT_SYSTEM "kprobes"
#define KRETPROBE_MAXACTIVE_MAX 4096
+#define MAX_KPROBE_CMDLINE_SIZE 1024
+
+/* Kprobe early definition from command line */
+static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
+static bool kprobe_boot_events_enabled __initdata;
+
+static int __init set_kprobe_boot_events(char *str)
+{
+ strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
+ return 0;
+}
+__setup("kprobe_event=", set_kprobe_boot_events);
static int trace_kprobe_create(int argc, const char **argv);
static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
@@ -886,6 +900,15 @@ fetch_store_strlen(unsigned long addr)
return (ret < 0) ? ret : len;
}
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+ const void __user *uaddr = (__force const void __user *)addr;
+
+ return strnlen_unsafe_user(uaddr, MAX_STRING_SIZE);
+}
+
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
* length and relative data location.
@@ -894,19 +917,46 @@ static nokprobe_inline int
fetch_store_string(unsigned long addr, void *dest, void *base)
{
int maxlen = get_loc_len(*(u32 *)dest);
- u8 *dst = get_loc_data(dest, base);
+ void *__dest;
long ret;
if (unlikely(!maxlen))
return -ENOMEM;
+
+ __dest = get_loc_data(dest, base);
+
/*
* Try to get string again, since the string can be changed while
* probing.
*/
- ret = strncpy_from_unsafe(dst, (void *)addr, maxlen);
+ ret = strncpy_from_unsafe(__dest, (void *)addr, maxlen);
+ if (ret >= 0)
+ *(u32 *)dest = make_data_loc(ret, __dest - base);
+
+ return ret;
+}
+
+/*
+ * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
+ * with max length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+ const void __user *uaddr = (__force const void __user *)addr;
+ int maxlen = get_loc_len(*(u32 *)dest);
+ void *__dest;
+ long ret;
+
+ if (unlikely(!maxlen))
+ return -ENOMEM;
+ __dest = get_loc_data(dest, base);
+
+ ret = strncpy_from_unsafe_user(__dest, uaddr, maxlen);
if (ret >= 0)
- *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
+ *(u32 *)dest = make_data_loc(ret, __dest - base);
+
return ret;
}
@@ -916,6 +966,14 @@ probe_mem_read(void *dest, void *src, size_t size)
return probe_kernel_read(dest, src, size);
}
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+ const void __user *uaddr = (__force const void __user *)src;
+
+ return probe_user_read(dest, uaddr, size);
+}
+
/* Note that we don't verify it, since the code does not come from user space */
static int
process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
@@ -1450,6 +1508,46 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call)
}
#endif /* CONFIG_PERF_EVENTS */
+static __init void enable_boot_kprobe_events(void)
+{
+ struct trace_array *tr = top_trace_array();
+ struct trace_event_file *file;
+ struct trace_kprobe *tk;
+ struct dyn_event *pos;
+
+ mutex_lock(&event_mutex);
+ for_each_trace_kprobe(tk, pos) {
+ list_for_each_entry(file, &tr->events, list)
+ if (file->event_call == &tk->tp.call)
+ trace_event_enable_disable(file, 1, 0);
+ }
+ mutex_unlock(&event_mutex);
+}
+
+static __init void setup_boot_kprobe_events(void)
+{
+ char *p, *cmd = kprobe_boot_events_buf;
+ int ret;
+
+ strreplace(kprobe_boot_events_buf, ',', ' ');
+
+ while (cmd && *cmd != '\0') {
+ p = strchr(cmd, ';');
+ if (p)
+ *p++ = '\0';
+
+ ret = trace_run_command(cmd, create_or_delete_trace_kprobe);
+ if (ret)
+ pr_warn("Failed to add event(%d): %s\n", ret, cmd);
+ else
+ kprobe_boot_events_enabled = true;
+
+ cmd = p;
+ }
+
+ enable_boot_kprobe_events();
+}
+
/* Make a tracefs interface for controlling probe points */
static __init int init_kprobe_trace(void)
{
@@ -1481,6 +1579,9 @@ static __init int init_kprobe_trace(void)
if (!entry)
pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
+
+ setup_boot_kprobe_events();
+
return 0;
}
fs_initcall(init_kprobe_trace);
@@ -1513,6 +1614,11 @@ static __init int kprobe_trace_self_tests_init(void)
if (tracing_is_disabled())
return -ENODEV;
+ if (kprobe_boot_events_enabled) {
+ pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n");
+ return 0;
+ }
+
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index a347faced959..b6b0593844cd 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -78,6 +78,8 @@ static const struct fetch_type probe_fetch_types[] = {
/* Special types */
__ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1,
"__data_loc char[]"),
+ __ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1,
+ "__data_loc char[]"),
/* Basic types */
ASSIGN_FETCH_TYPE(u8, u8, 0),
ASSIGN_FETCH_TYPE(u16, u16, 0),
@@ -322,6 +324,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
{
struct fetch_insn *code = *pcode;
unsigned long param;
+ int deref = FETCH_OP_DEREF;
long offset = 0;
char *tmp;
int ret = 0;
@@ -394,9 +397,14 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
break;
case '+': /* deref memory */
- arg++; /* Skip '+', because kstrtol() rejects it. */
- /* fall through */
case '-':
+ if (arg[1] == 'u') {
+ deref = FETCH_OP_UDEREF;
+ arg[1] = arg[0];
+ arg++;
+ }
+ if (arg[0] == '+')
+ arg++; /* Skip '+', because kstrtol() rejects it. */
tmp = strchr(arg, '(');
if (!tmp) {
trace_probe_log_err(offs, DEREF_NEED_BRACE);
@@ -432,7 +440,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
}
*pcode = code;
- code->op = FETCH_OP_DEREF;
+ code->op = deref;
code->offset = offset;
}
break;
@@ -569,15 +577,17 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
goto fail;
/* Store operation */
- if (!strcmp(parg->type->name, "string")) {
- if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM &&
- code->op != FETCH_OP_COMM) {
+ if (!strcmp(parg->type->name, "string") ||
+ !strcmp(parg->type->name, "ustring")) {
+ if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
+ code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) {
trace_probe_log_err(offset + (t ? (t - arg) : 0),
BAD_STRING);
ret = -EINVAL;
goto fail;
}
- if (code->op != FETCH_OP_DEREF || parg->count) {
+ if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
+ parg->count) {
/*
* IMM and COMM is pointing actual address, those must
* be kept, and if parg->count != 0, this is an array
@@ -590,12 +600,20 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
goto fail;
}
}
- code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */
+ /* If op == DEREF, replace it with STRING */
+ if (!strcmp(parg->type->name, "ustring") ||
+ code->op == FETCH_OP_UDEREF)
+ code->op = FETCH_OP_ST_USTRING;
+ else
+ code->op = FETCH_OP_ST_STRING;
code->size = parg->type->size;
parg->dynamic = true;
} else if (code->op == FETCH_OP_DEREF) {
code->op = FETCH_OP_ST_MEM;
code->size = parg->type->size;
+ } else if (code->op == FETCH_OP_UDEREF) {
+ code->op = FETCH_OP_ST_UMEM;
+ code->size = parg->type->size;
} else {
code++;
if (code->op != FETCH_OP_NOP) {
@@ -618,7 +636,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
/* Loop(Array) operation */
if (parg->count) {
if (scode->op != FETCH_OP_ST_MEM &&
- scode->op != FETCH_OP_ST_STRING) {
+ scode->op != FETCH_OP_ST_STRING &&
+ scode->op != FETCH_OP_ST_USTRING) {
trace_probe_log_err(offset + (t ? (t - arg) : 0),
BAD_STRING);
ret = -EINVAL;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f9a8c632188b..42816358dd48 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -92,10 +92,13 @@ enum fetch_op {
FETCH_OP_FOFFS, /* File offset: .immediate */
// Stage 2 (dereference) op
FETCH_OP_DEREF, /* Dereference: .offset */
+ FETCH_OP_UDEREF, /* User-space Dereference: .offset */
// Stage 3 (store) ops
FETCH_OP_ST_RAW, /* Raw: .size */
FETCH_OP_ST_MEM, /* Mem: .offset, .size */
+ FETCH_OP_ST_UMEM, /* Mem: .offset, .size */
FETCH_OP_ST_STRING, /* String: .offset, .size */
+ FETCH_OP_ST_USTRING, /* User String: .offset, .size */
// Stage 4 (modify) op
FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */
// Stage 5 (loop) op
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index c30c61f12ddd..e5282828f4a6 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -59,8 +59,13 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs,
static nokprobe_inline int fetch_store_strlen(unsigned long addr);
static nokprobe_inline int
fetch_store_string(unsigned long addr, void *dest, void *base);
+static nokprobe_inline int fetch_store_strlen_user(unsigned long addr);
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base);
static nokprobe_inline int
probe_mem_read(void *dest, void *src, size_t size);
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size);
/* From the 2nd stage, routine is same */
static nokprobe_inline int
@@ -74,14 +79,21 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
stage2:
/* 2nd stage: dereference memory if needed */
- while (code->op == FETCH_OP_DEREF) {
- lval = val;
- ret = probe_mem_read(&val, (void *)val + code->offset,
- sizeof(val));
+ do {
+ if (code->op == FETCH_OP_DEREF) {
+ lval = val;
+ ret = probe_mem_read(&val, (void *)val + code->offset,
+ sizeof(val));
+ } else if (code->op == FETCH_OP_UDEREF) {
+ lval = val;
+ ret = probe_mem_read_user(&val,
+ (void *)val + code->offset, sizeof(val));
+ } else
+ break;
if (ret)
return ret;
code++;
- }
+ } while (1);
s3 = code;
stage3:
@@ -91,6 +103,10 @@ stage3:
ret = fetch_store_strlen(val + code->offset);
code++;
goto array;
+ } else if (code->op == FETCH_OP_ST_USTRING) {
+ ret += fetch_store_strlen_user(val + code->offset);
+ code++;
+ goto array;
} else
return -EILSEQ;
}
@@ -102,10 +118,17 @@ stage3:
case FETCH_OP_ST_MEM:
probe_mem_read(dest, (void *)val + code->offset, code->size);
break;
+ case FETCH_OP_ST_UMEM:
+ probe_mem_read_user(dest, (void *)val + code->offset, code->size);
+ break;
case FETCH_OP_ST_STRING:
loc = *(u32 *)dest;
ret = fetch_store_string(val + code->offset, dest, base);
break;
+ case FETCH_OP_ST_USTRING:
+ loc = *(u32 *)dest;
+ ret = fetch_store_string_user(val + code->offset, dest, base);
+ break;
default:
return -EILSEQ;
}
@@ -123,7 +146,8 @@ array:
total += ret;
if (++i < code->param) {
code = s3;
- if (s3->op != FETCH_OP_ST_STRING) {
+ if (s3->op != FETCH_OP_ST_STRING &&
+ s3->op != FETCH_OP_ST_USTRING) {
dest += s3->size;
val += s3->size;
goto stage3;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index eb7e06b54741..3d6b868830f3 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -140,6 +140,13 @@ probe_mem_read(void *dest, void *src, size_t size)
return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
}
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+ return probe_mem_read(dest, src, size);
+}
+
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
* length and relative data location.
@@ -176,6 +183,12 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
return ret;
}
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+ return fetch_store_string(addr, dest, base);
+}
+
/* Return the length of string -- including null terminal byte */
static nokprobe_inline int
fetch_store_strlen(unsigned long addr)
@@ -191,6 +204,12 @@ fetch_store_strlen(unsigned long addr)
return (len > MAX_STRING_SIZE) ? 0 : len;
}
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+ return fetch_store_strlen(addr);
+}
+
static unsigned long translate_user_vaddr(unsigned long file_offset)
{
unsigned long base_addr;
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 46f2ab1e08a9..df3ade14ccbd 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2008-2014 Mathieu Desnoyers
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <linux/module.h>
#include <linux/mutex.h>
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 370724b45391..7be3e7530841 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* tsacct.c - System accounting over taskstats interface
*
* Copyright (C) Jay Lan, <jlan@sgi.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/kernel.h>
diff --git a/kernel/umh.c b/kernel/umh.c
index d937cbad903a..7f255b5a8845 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* umh - the kernel usermode helper
*/
diff --git a/kernel/up.c b/kernel/up.c
index ff536f9cc8a2..483c9962c999 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Uniprocessor-only support functions. The counterpart to kernel/smp.c
*/
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
index 9586b670a5b2..870ecd7c63ed 100644
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/user-return-notifier.h>
#include <linux/percpu.h>
diff --git a/kernel/user.c b/kernel/user.c
index 88b834f0eebc..78b17e36e705 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* The "user cache".
*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9657315405de..95aea04ff722 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/workqueue.c - generic async execution with shared worker pool
*