From 39f19ebbf57b403695f7b5f9cf322fe1ddb5d7fb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 9 Jan 2017 10:19:50 -0800
Subject: bpf: rename ARG_PTR_TO_STACK

since ARG_PTR_TO_STACK is no longer just pointer to stack
rename it to ARG_PTR_TO_MEM and adjust comment.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/bpf_trace.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fa77311dadb2..f883c43c96f3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -76,8 +76,8 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 	.func		= bpf_probe_read,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_RAW_STACK,
-	.arg2_type	= ARG_CONST_STACK_SIZE,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
 	.arg3_type	= ARG_ANYTHING,
 };
 
@@ -109,8 +109,8 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_ANYTHING,
-	.arg2_type	= ARG_PTR_TO_STACK,
-	.arg3_type	= ARG_CONST_STACK_SIZE,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
 };
 
 static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
@@ -213,8 +213,8 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
 	.func		= bpf_trace_printk,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_STACK,
-	.arg2_type	= ARG_CONST_STACK_SIZE,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
 };
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
@@ -329,8 +329,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
-	.arg4_type	= ARG_PTR_TO_STACK,
-	.arg5_type	= ARG_CONST_STACK_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
 };
 
 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
@@ -492,8 +492,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
-	.arg4_type	= ARG_PTR_TO_STACK,
-	.arg5_type	= ARG_CONST_STACK_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
 };
 
 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
-- 
cgit v1.2.1


From 6b8cc1d11ef75c5b9c530b3d0d148f3c2dd25f93 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 12 Jan 2017 11:51:32 +0100
Subject: bpf: pass original insn directly to convert_ctx_access

Currently, when calling convert_ctx_access() callback for the various
program types, we pass in insn->dst_reg, insn->src_reg, insn->off from
the original instruction. This information is needed to rewrite the
instruction that is based on the user ctx structure into a kernel
representation for the ctx. As we'd like to allow access size beyond
just BPF_W, we'd need also insn->code for that in order to decode the
original access size. Given that, lets just pass insn directly to the
convert_ctx_access() callback and work on that to not clutter the
callback with even more arguments we need to pass when everything is
already contained in insn. So lets go through that once, no functional
change.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/bpf_trace.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f883c43c96f3..1860e7f1e5a8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -572,28 +572,29 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 	return true;
 }
 
-static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
-				      int src_reg, int ctx_off,
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
+				      const struct bpf_insn *si,
 				      struct bpf_insn *insn_buf,
 				      struct bpf_prog *prog)
 {
 	struct bpf_insn *insn = insn_buf;
 
-	switch (ctx_off) {
+	switch (si->off) {
 	case offsetof(struct bpf_perf_event_data, sample_period):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
-						       data), dst_reg, src_reg,
+						       data), si->dst_reg, si->src_reg,
 				      offsetof(struct bpf_perf_event_data_kern, data));
-		*insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
 				      offsetof(struct perf_sample_data, period));
 		break;
 	default:
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
-						       regs), dst_reg, src_reg,
+						       regs), si->dst_reg, si->src_reg,
 				      offsetof(struct bpf_perf_event_data_kern, regs));
-		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
+				      si->off);
 		break;
 	}
 
-- 
cgit v1.2.1


From 2d071c643f1cd15a24172de4b5b7ae2adb93abbb Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 15 Jan 2017 01:34:25 +0100
Subject: bpf, trace: make ctx access checks more robust

Make sure that ctx cannot potentially be accessed oob by asserting
explicitly that ctx access size into pt_regs for BPF_PROG_TYPE_KPROBE
programs must be within limits. In case some 32bit archs have pt_regs
not being a multiple of 8, then BPF_DW access could cause such access.

BPF_PROG_TYPE_KPROBE progs don't have a ctx conversion function since
there's no extra mapping needed. kprobe_prog_is_valid_access() didn't
enforce sizeof(long) as the only allowed access size, since LLVM can
generate non BPF_W/BPF_DW access to regs from time to time.

For BPF_PROG_TYPE_TRACEPOINT we don't have a ctx conversion either, so
add a BUILD_BUG_ON() check to make sure that BPF_DW access will not be
a similar issue in future (ctx works on event buffer as opposed to
pt_regs there).

Fixes: 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/bpf_trace.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1860e7f1e5a8..c22a961d1a42 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -459,6 +459,13 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
 		return false;
 	if (off % size != 0)
 		return false;
+	/*
+	 * Assertion for 32 bit to make sure last 8 byte access
+	 * (BPF_DW) to the last 4 byte member is disallowed.
+	 */
+	if (off + size > sizeof(struct pt_regs))
+		return false;
+
 	return true;
 }
 
@@ -540,6 +547,8 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type
 		return false;
 	if (off % size != 0)
 		return false;
+
+	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
 	return true;
 }
 
-- 
cgit v1.2.1


From a5e8c07059d0f0b31737408711d44794928ac218 Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Wed, 18 Jan 2017 17:55:49 +0000
Subject: bpf: add bpf_probe_read_str helper

Provide a simple helper with the same semantics of strncpy_from_unsafe():

int bpf_probe_read_str(void *dst, int size, const void *unsafe_addr)

This gives more flexibility to a bpf program. A typical use case is
intercepting a file name during sys_open(). The current approach is:

SEC("kprobe/sys_open")
void bpf_sys_open(struct pt_regs *ctx)
{
	char buf[PATHLEN]; // PATHLEN is defined to 256
	bpf_probe_read(buf, sizeof(buf), ctx->di);

	/* consume buf */
}

This is suboptimal because the size of the string needs to be estimated
at compile time, causing more memory to be copied than often necessary,
and can become more problematic if further processing on buf is done,
for example by pushing it to userspace via bpf_perf_event_output(),
since the real length of the string is unknown and the entire buffer
must be copied (and defining an unrolled strnlen() inside the bpf
program is a very inefficient and unfeasible approach).

With the new helper, the code can easily operate on the actual string
length rather than the buffer size:

SEC("kprobe/sys_open")
void bpf_sys_open(struct pt_regs *ctx)
{
	char buf[PATHLEN]; // PATHLEN is defined to 256
	int res = bpf_probe_read_str(buf, sizeof(buf), ctx->di);

	/* consume buf, for example push it to userspace via
	 * bpf_perf_event_output(), but this time we can use
	 * res (the string length) as event size, after checking
	 * its boundaries.
	 */
}

Another useful use case is when parsing individual process arguments or
individual environment variables navigating current->mm->arg_start and
current->mm->env_start: using this helper and the return value, one can
quickly iterate at the right offset of the memory area.

The code changes simply leverage the already existent
strncpy_from_unsafe() kernel function, which is safe to be called from a
bpf program as it is used in bpf_trace_printk().

Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/bpf_trace.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c22a961d1a42..424daa4586d1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -395,6 +395,36 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
+	   const void *, unsafe_ptr)
+{
+	int ret;
+
+	/*
+	 * The strncpy_from_unsafe() call will likely not fill the entire
+	 * buffer, but that's okay in this circumstance as we're probing
+	 * arbitrary memory anyway similar to bpf_probe_read() and might
+	 * as well probe the stack. Thus, memory is explicitly cleared
+	 * only in error case, so that improper users ignoring return
+	 * code altogether don't copy garbage; otherwise length of string
+	 * is returned that can be used for bpf_perf_event_output() et al.
+	 */
+	ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
+	if (unlikely(ret < 0))
+		memset(dst, 0, size);
+
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_str_proto = {
+	.func		= bpf_probe_read_str,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -432,6 +462,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_current_task_under_cgroup_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
+	case BPF_FUNC_probe_read_str:
+		return &bpf_probe_read_str_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.1


From 2acae0d5b0f73a8fb4b180bd13491feb96e55fc6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 25 Jan 2017 02:28:16 +0100
Subject: trace: add variant without spacing in trace_print_hex_seq

For upcoming tracepoint support for BPF, we want to dump the program's
tag. Format should be similar to __print_hex(), but without spacing.
Add a __print_hex_str() variant for exactly that purpose that reuses
trace_print_hex_seq().

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/trace_output.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 5d33a7352919..30a144b1b9ee 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -163,14 +163,15 @@ trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
 EXPORT_SYMBOL_GPL(trace_print_bitmask_seq);
 
 const char *
-trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
+trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len,
+		    bool spacing)
 {
 	int i;
 	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0; i < buf_len; i++)
-		trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
-
+		trace_seq_printf(p, "%s%2.2x", !spacing || i == 0 ? "" : " ",
+				 buf[i]);
 	trace_seq_putc(p, 0);
 
 	return ret;
-- 
cgit v1.2.1


From 3898fac1f488c76e0eef5b5267b4ba8112a82ac4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 2 Feb 2017 17:09:54 +0100
Subject: trace: rename trace_print_hex_seq arg and add kdoc

Steven suggested to improve trace_print_hex_seq() a bit after commit
2acae0d5b0f7 ("trace: add variant without spacing in trace_print_hex_seq")
in two ways: i) by adding a kdoc comment for the helper function
itself and ii) by renaming 'spacing' argument into 'concatenate'
to better denote that we don't add spaces between each hex bytes.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/trace_output.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 30a144b1b9ee..aea6a1218c7d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -162,15 +162,26 @@ trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
 }
 EXPORT_SYMBOL_GPL(trace_print_bitmask_seq);
 
+/**
+ * trace_print_hex_seq - print buffer as hex sequence
+ * @p: trace seq struct to write to
+ * @buf: The buffer to print
+ * @buf_len: Length of @buf in bytes
+ * @concatenate: Print @buf as single hex string or with spacing
+ *
+ * Prints the passed buffer as a hex sequence either as a whole,
+ * single hex string if @concatenate is true or with spacing after
+ * each byte in case @concatenate is false.
+ */
 const char *
 trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len,
-		    bool spacing)
+		    bool concatenate)
 {
 	int i;
 	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0; i < buf_len; i++)
-		trace_seq_printf(p, "%s%2.2x", !spacing || i == 0 ? "" : " ",
+		trace_seq_printf(p, "%s%2.2x", concatenate || i == 0 ? "" : " ",
 				 buf[i]);
 	trace_seq_putc(p, 0);
 
-- 
cgit v1.2.1


From c78f8bdfa11fcceb9723c61212e4bd8f76c87f9e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 16 Feb 2017 22:24:48 +0100
Subject: bpf: mark all registered map/prog types as __ro_after_init

All map types and prog types are registered to the BPF core through
bpf_register_map_type() and bpf_register_prog_type() during init and
remain unchanged thereafter. As by design we don't (and never will)
have any pluggable code that can register to that at any later point
in time, lets mark all the existing bpf_{map,prog}_type_list objects
in the tree as __ro_after_init, so they can be moved to read-only
section from then onwards.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/trace/bpf_trace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 424daa4586d1..cee9802cf3e0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -506,7 +506,7 @@ static const struct bpf_verifier_ops kprobe_prog_ops = {
 	.is_valid_access = kprobe_prog_is_valid_access,
 };
 
-static struct bpf_prog_type_list kprobe_tl = {
+static struct bpf_prog_type_list kprobe_tl __ro_after_init = {
 	.ops	= &kprobe_prog_ops,
 	.type	= BPF_PROG_TYPE_KPROBE,
 };
@@ -589,7 +589,7 @@ static const struct bpf_verifier_ops tracepoint_prog_ops = {
 	.is_valid_access = tp_prog_is_valid_access,
 };
 
-static struct bpf_prog_type_list tracepoint_tl = {
+static struct bpf_prog_type_list tracepoint_tl __ro_after_init = {
 	.ops	= &tracepoint_prog_ops,
 	.type	= BPF_PROG_TYPE_TRACEPOINT,
 };
@@ -648,7 +648,7 @@ static const struct bpf_verifier_ops perf_event_prog_ops = {
 	.convert_ctx_access	= pe_prog_convert_ctx_access,
 };
 
-static struct bpf_prog_type_list perf_event_tl = {
+static struct bpf_prog_type_list perf_event_tl __ro_after_init = {
 	.ops	= &perf_event_prog_ops,
 	.type	= BPF_PROG_TYPE_PERF_EVENT,
 };
-- 
cgit v1.2.1