summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2018-03-19 11:33:27 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2018-03-19 11:33:27 +1100
commit03c0cedab0a46722239a691961d846b5c1cfcde3 (patch)
treee63e64ece75dc3b1a217f7832e022cd2288c595e
parent497d1b7c2c1ee88b17ac045a0b37144aa14eef52 (diff)
parent318df9f01cffabf120b36daa96dfca273e46cbbf (diff)
downloadlinux-next-03c0cedab0a46722239a691961d846b5c1cfcde3.tar.gz
Merge remote-tracking branch 'bpf-next/master'
-rw-r--r--include/uapi/linux/bpf.h22
-rw-r--r--include/uapi/linux/bpf_perf_event.h1
-rw-r--r--kernel/bpf/inode.c3
-rw-r--r--kernel/bpf/stackmap.c257
-rw-r--r--kernel/trace/bpf_trace.c20
-rw-r--r--samples/bpf/trace_event_kern.c4
-rw-r--r--samples/bpf/trace_event_user.c15
-rw-r--r--tools/bpf/Makefile78
-rw-r--r--tools/bpf/bpftool/Makefile6
-rw-r--r--tools/bpf/bpftool/xlated_dumper.h2
-rw-r--r--tools/include/uapi/linux/bpf.h22
-rw-r--r--tools/testing/selftests/bpf/Makefile12
-rw-r--r--tools/testing/selftests/bpf/test_progs.c164
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_build_id.c60
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c22
15 files changed, 632 insertions, 56 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a66769e5875..1e15d1724d89 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -231,6 +231,28 @@ enum bpf_attach_type {
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID (1U << 5)
+
+enum bpf_stack_build_id_status {
+ /* user space need an empty entry to identify end of a trace */
+ BPF_STACK_BUILD_ID_EMPTY = 0,
+ /* with valid build_id and offset */
+ BPF_STACK_BUILD_ID_VALID = 1,
+ /* couldn't get build_id, fallback to ip */
+ BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+ __s32 status;
+ unsigned char build_id[BPF_BUILD_ID_SIZE];
+ union {
+ __u64 offset;
+ __u64 ip;
+ };
+};
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
index 8f95303f9d80..eb1b9d21250c 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -13,6 +13,7 @@
struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
+ __u64 addr;
};
#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 81e2f6995adb..bf6da59ae0d0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -178,6 +178,9 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{
+ /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
+ * extensions.
+ */
if (strchr(dentry->d_name.name, '.'))
return ERR_PTR(-EPERM);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b0ecf43f5894..57eeb1234b67 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -9,16 +9,19 @@
#include <linux/filter.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
+#include <linux/elf.h>
+#include <linux/pagemap.h>
#include "percpu_freelist.h"
-#define STACK_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define STACK_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \
+ BPF_F_STACK_BUILD_ID)
struct stack_map_bucket {
struct pcpu_freelist_node fnode;
u32 hash;
u32 nr;
- u64 ip[];
+ u64 data[];
};
struct bpf_stack_map {
@@ -29,6 +32,17 @@ struct bpf_stack_map {
struct stack_map_bucket *buckets[];
};
+static inline bool stack_map_use_build_id(struct bpf_map *map)
+{
+ return (map->map_flags & BPF_F_STACK_BUILD_ID);
+}
+
+static inline int stack_map_data_size(struct bpf_map *map)
+{
+ return stack_map_use_build_id(map) ?
+ sizeof(struct bpf_stack_build_id) : sizeof(u64);
+}
+
static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
{
u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
@@ -68,8 +82,16 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- value_size < 8 || value_size % 8 ||
- value_size / 8 > sysctl_perf_event_max_stack)
+ value_size < 8 || value_size % 8)
+ return ERR_PTR(-EINVAL);
+
+ BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
+ if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
+ if (value_size % sizeof(struct bpf_stack_build_id) ||
+ value_size / sizeof(struct bpf_stack_build_id)
+ > sysctl_perf_event_max_stack)
+ return ERR_PTR(-EINVAL);
+ } else if (value_size / 8 > sysctl_perf_event_max_stack)
return ERR_PTR(-EINVAL);
/* hash table size must be power of 2 */
@@ -114,13 +136,184 @@ free_smap:
return ERR_PTR(err);
}
+#define BPF_BUILD_ID 3
+/*
+ * Parse build id from the note segment. This logic can be shared between
+ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+ * identical.
+ */
+static inline int stack_map_parse_build_id(void *page_addr,
+ unsigned char *build_id,
+ void *note_start,
+ Elf32_Word note_size)
+{
+ Elf32_Word note_offs = 0, new_offs;
+
+ /* check for overflow */
+ if (note_start < page_addr || note_start + note_size < note_start)
+ return -EINVAL;
+
+ /* only supports note that fits in the first page */
+ if (note_start + note_size > page_addr + PAGE_SIZE)
+ return -EINVAL;
+
+ while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+ Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+ if (nhdr->n_type == BPF_BUILD_ID &&
+ nhdr->n_namesz == sizeof("GNU") &&
+ nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
+ memcpy(build_id,
+ note_start + note_offs +
+ ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+ BPF_BUILD_ID_SIZE);
+ return 0;
+ }
+ new_offs = note_offs + sizeof(Elf32_Nhdr) +
+ ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+ if (new_offs <= note_offs) /* overflow */
+ break;
+ note_offs = new_offs;
+ }
+ return -EINVAL;
+}
+
+/* Parse build ID from 32-bit ELF */
+static int stack_map_get_build_id_32(void *page_addr,
+ unsigned char *build_id)
+{
+ Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+ Elf32_Phdr *phdr;
+ int i;
+
+ /* only supports phdr that fits in one page */
+ if (ehdr->e_phnum >
+ (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+ return -EINVAL;
+
+ phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+
+ for (i = 0; i < ehdr->e_phnum; ++i)
+ if (phdr[i].p_type == PT_NOTE)
+ return stack_map_parse_build_id(page_addr, build_id,
+ page_addr + phdr[i].p_offset,
+ phdr[i].p_filesz);
+ return -EINVAL;
+}
+
+/* Parse build ID from 64-bit ELF */
+static int stack_map_get_build_id_64(void *page_addr,
+ unsigned char *build_id)
+{
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+ Elf64_Phdr *phdr;
+ int i;
+
+ /* only supports phdr that fits in one page */
+ if (ehdr->e_phnum >
+ (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+ return -EINVAL;
+
+ phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+
+ for (i = 0; i < ehdr->e_phnum; ++i)
+ if (phdr[i].p_type == PT_NOTE)
+ return stack_map_parse_build_id(page_addr, build_id,
+ page_addr + phdr[i].p_offset,
+ phdr[i].p_filesz);
+ return -EINVAL;
+}
+
+/* Parse build ID of ELF file mapped to vma */
+static int stack_map_get_build_id(struct vm_area_struct *vma,
+ unsigned char *build_id)
+{
+ Elf32_Ehdr *ehdr;
+ struct page *page;
+ void *page_addr;
+ int ret;
+
+ /* only works for page backed storage */
+ if (!vma->vm_file)
+ return -EINVAL;
+
+ page = find_get_page(vma->vm_file->f_mapping, 0);
+ if (!page)
+ return -EFAULT; /* page not mapped */
+
+ ret = -EINVAL;
+ page_addr = page_address(page);
+ ehdr = (Elf32_Ehdr *)page_addr;
+
+ /* compare magic x7f "ELF" */
+ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+ goto out;
+
+ /* only support executable file and shared object file */
+ if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+ goto out;
+
+ if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+ ret = stack_map_get_build_id_32(page_addr, build_id);
+ else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+ ret = stack_map_get_build_id_64(page_addr, build_id);
+out:
+ put_page(page);
+ return ret;
+}
+
+static void stack_map_get_build_id_offset(struct bpf_map *map,
+ struct stack_map_bucket *bucket,
+ u64 *ips, u32 trace_nr, bool user)
+{
+ int i;
+ struct vm_area_struct *vma;
+ struct bpf_stack_build_id *id_offs;
+
+ bucket->nr = trace_nr;
+ id_offs = (struct bpf_stack_build_id *)bucket->data;
+
+ /*
+ * We cannot do up_read() in nmi context, so build_id lookup is
+ * only supported for non-nmi events. If at some point, it is
+ * possible to run find_vma() without taking the semaphore, we
+ * would like to allow build_id lookup in nmi context.
+ *
+ * Same fallback is used for kernel stack (!user) on a stackmap
+ * with build_id.
+ */
+ if (!user || !current || !current->mm || in_nmi() ||
+ down_read_trylock(&current->mm->mmap_sem) == 0) {
+ /* cannot access current->mm, fall back to ips */
+ for (i = 0; i < trace_nr; i++) {
+ id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+ id_offs[i].ip = ips[i];
+ }
+ return;
+ }
+
+ for (i = 0; i < trace_nr; i++) {
+ vma = find_vma(current->mm, ips[i]);
+ if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+ /* per entry fall back to ips */
+ id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+ id_offs[i].ip = ips[i];
+ continue;
+ }
+ id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
+ - vma->vm_start;
+ id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+ }
+ up_read(&current->mm->mmap_sem);
+}
+
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags)
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
- u32 max_depth = map->value_size / 8;
+ u32 max_depth = map->value_size / stack_map_data_size(map);
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
@@ -128,6 +321,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
bool user = flags & BPF_F_USER_STACK;
bool kernel = !user;
u64 *ips;
+ bool hash_matches;
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
@@ -156,24 +350,43 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
id = hash & (smap->n_buckets - 1);
bucket = READ_ONCE(smap->buckets[id]);
- if (bucket && bucket->hash == hash) {
- if (flags & BPF_F_FAST_STACK_CMP)
+ hash_matches = bucket && bucket->hash == hash;
+ /* fast cmp */
+ if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
+ return id;
+
+ if (stack_map_use_build_id(map)) {
+ /* for build_id+offset, pop a bucket before slow cmp */
+ new_bucket = (struct stack_map_bucket *)
+ pcpu_freelist_pop(&smap->freelist);
+ if (unlikely(!new_bucket))
+ return -ENOMEM;
+ stack_map_get_build_id_offset(map, new_bucket, ips,
+ trace_nr, user);
+ trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
+ if (hash_matches && bucket->nr == trace_nr &&
+ memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
+ pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
return id;
- if (bucket->nr == trace_nr &&
- memcmp(bucket->ip, ips, trace_len) == 0)
+ }
+ if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
+ pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
+ return -EEXIST;
+ }
+ } else {
+ if (hash_matches && bucket->nr == trace_nr &&
+ memcmp(bucket->data, ips, trace_len) == 0)
return id;
+ if (bucket && !(flags & BPF_F_REUSE_STACKID))
+ return -EEXIST;
+
+ new_bucket = (struct stack_map_bucket *)
+ pcpu_freelist_pop(&smap->freelist);
+ if (unlikely(!new_bucket))
+ return -ENOMEM;
+ memcpy(new_bucket->data, ips, trace_len);
}
- /* this call stack is not in the map, try to add it */
- if (bucket && !(flags & BPF_F_REUSE_STACKID))
- return -EEXIST;
-
- new_bucket = (struct stack_map_bucket *)
- pcpu_freelist_pop(&smap->freelist);
- if (unlikely(!new_bucket))
- return -ENOMEM;
-
- memcpy(new_bucket->ip, ips, trace_len);
new_bucket->hash = hash;
new_bucket->nr = trace_nr;
@@ -212,8 +425,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
if (!bucket)
return -ENOENT;
- trace_len = bucket->nr * sizeof(u64);
- memcpy(value, bucket->ip, trace_len);
+ trace_len = bucket->nr * stack_map_data_size(map);
+ memcpy(value, bucket->data, trace_len);
memset(value + trace_len, 0, map->value_size - trace_len);
old_bucket = xchg(&smap->buckets[id], bucket);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c0a9e310d715..c634e093951f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -726,8 +726,7 @@ const struct bpf_prog_ops tracepoint_prog_ops = {
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
- const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
- sample_period);
+ const int size_u64 = sizeof(u64);
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
return false;
@@ -738,8 +737,13 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
switch (off) {
case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
- bpf_ctx_record_field_size(info, size_sp);
- if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
+ bpf_ctx_record_field_size(info, size_u64);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+ return false;
+ break;
+ case bpf_ctx_range(struct bpf_perf_event_data, addr):
+ bpf_ctx_record_field_size(info, size_u64);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
return false;
break;
default:
@@ -766,6 +770,14 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct perf_sample_data, period, 8,
target_size));
break;
+ case offsetof(struct bpf_perf_event_data, addr):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+ data), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_perf_event_data_kern, data));
+ *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct perf_sample_data, addr, 8,
+ target_size));
+ break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
regs), si->dst_reg, si->src_reg,
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index a77a583d94d4..7068fbdde951 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
{
char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+ char addr_fmt[] = "Address recorded on event: %llx";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
else
bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
+ if (ctx->addr != 0)
+ bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index bf4f1b6d9a52..56f7a259a7c9 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
/* Intel Instruction Retired */
.config = 0xc0,
};
+ struct perf_event_attr attr_type_raw_lock_load = {
+ .sample_freq = SAMPLE_FREQ,
+ .freq = 1,
+ .type = PERF_TYPE_RAW,
+ /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+ .config = 0x21d0,
+ /* Request to record lock address from PEBS */
+ .sample_type = PERF_SAMPLE_ADDR,
+ /* Record address value requires precise event */
+ .precise_ip = 2,
+ };
printf("Test HW_CPU_CYCLES\n");
test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
test_perf_event_all_cpu(&attr_type_raw);
test_perf_event_task(&attr_type_raw);
+ printf("Test Lock Load\n");
+ test_perf_event_all_cpu(&attr_type_raw_lock_load);
+ test_perf_event_task(&attr_type_raw_lock_load);
+
printf("*** PASS ***\n");
}
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index c8ec0ae16bf0..1ea545965ee3 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -1,19 +1,28 @@
# SPDX-License-Identifier: GPL-2.0
-prefix = /usr
+include ../scripts/Makefile.include
+
+prefix ?= /usr/local
CC = gcc
LEX = flex
YACC = bison
MAKE = make
+INSTALL ?= install
CFLAGS += -Wall -O2
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
FEATURE_USER = .bpf
FEATURE_TESTS = libbfd disassembler-four-args
FEATURE_DISPLAY = libbfd disassembler-four-args
@@ -38,40 +47,59 @@ ifeq ($(feature-disassembler-four-args), 1)
CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
endif
-%.yacc.c: %.y
- $(YACC) -o $@ -d $<
+$(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
+ $(QUIET_BISON)$(YACC) -o $@ -d $<
-%.lex.c: %.l
- $(LEX) -o $@ $<
+$(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
+ $(QUIET_FLEX)$(LEX) -o $@ $<
-all: bpf_jit_disasm bpf_dbg bpf_asm bpftool
+$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
+ $(QUIET_CC)$(COMPILE.c) -o $@ $<
-bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
-bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
-bpf_jit_disasm : bpf_jit_disasm.o
+$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
+ $(QUIET_CC)$(COMPILE.c) -o $@ $<
+$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
+ $(QUIET_CC)$(COMPILE.c) -o $@ $<
-bpf_dbg : LDLIBS = -lreadline
-bpf_dbg : bpf_dbg.o
+PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
-bpf_asm : LDLIBS =
-bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
-bpf_exp.lex.o : bpf_exp.yacc.c
+all: $(PROGS) bpftool
-clean: bpftool_clean
- rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
+$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
+$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lopcodes -lbfd -ldl
-install: bpftool_install
- install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
- install bpf_dbg $(prefix)/bin/bpf_dbg
- install bpf_asm $(prefix)/bin/bpf_asm
+$(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lreadline
+
+$(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
+
+$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+
+clean: bpftool_clean
+ $(call QUIET_CLEAN, bpf-progs)
+ $(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
+ $(call QUIET_CLEAN, core-gen)
+ $(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+
+install: $(PROGS) bpftool_install
+ $(call QUIET_INSTALL, bpf_jit_disasm)
+ $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
+ $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
+ $(call QUIET_INSTALL, bpf_dbg)
+ $(Q)$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
+ $(call QUIET_INSTALL, bpf_asm)
+ $(Q)$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
bpftool:
- $(MAKE) -C bpftool
+ $(call descend,bpftool)
bpftool_install:
- $(MAKE) -C bpftool install
+ $(call descend,bpftool,install)
bpftool_clean:
- $(MAKE) -C bpftool clean
+ $(call descend,bpftool,clean)
-.PHONY: bpftool FORCE
+.PHONY: all install clean bpftool bpftool_install bpftool_clean
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 26901ec87361..4e69782c4a79 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -38,7 +38,7 @@ bash_compdir ?= /usr/share/bash-completion/completions
CC = gcc
CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
@@ -70,7 +70,7 @@ ifeq ($(feature-disassembler-four-args), 1)
CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
endif
-include $(wildcard *.d)
+include $(wildcard $(OUTPUT)*.d)
all: $(OUTPUT)bpftool
@@ -89,6 +89,8 @@ $(OUTPUT)%.o: %.c
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(call QUIET_CLEAN, core-gen)
+ $(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
install: $(OUTPUT)bpftool
$(call QUIET_INSTALL, bpftool)
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index 51c935d38ae2..b34affa7ef2d 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,7 +49,7 @@ struct dump_data {
unsigned long address_call_base;
struct kernel_sym *sym_mapping;
__u32 sym_count;
- char scratch_buff[SYM_MAX_NAME];
+ char scratch_buff[SYM_MAX_NAME + 8];
};
void kernel_syms_load(struct dump_data *dd);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db6bdc375126..1944d0aee7e8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -231,6 +231,28 @@ enum bpf_attach_type {
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID (1U << 5)
+
+enum bpf_stack_build_id_status {
+ /* user space need an empty entry to identify end of a trace */
+ BPF_STACK_BUILD_ID_EMPTY = 0,
+ /* with valid build_id and offset */
+ BPF_STACK_BUILD_ID_VALID = 1,
+ /* couldn't get build_id, fallback to ip */
+ BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+ __s32 status;
+ unsigned char build_id[BPF_BUILD_ID_SIZE];
+ union {
+ __u64 offset;
+ __u64 ip;
+ };
+};
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8567a858b789..b0d29fd5e51c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -13,6 +13,14 @@ endif
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
LDLIBS += -lcap -lelf -lrt -lpthread
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): urandom_read
+
+urandom_read: urandom_read.c
+ $(CC) -o $(TEST_CUSTOM_PROGS) -static $<
+
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
@@ -21,7 +29,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
- sample_map_ret0.o test_tcpbpf_kern.o
+ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -74,3 +82,5 @@ $(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 27ad5404389e..e9df48b306df 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -841,7 +841,8 @@ static void test_tp_attach_query(void)
static int compare_map_keys(int map1_fd, int map2_fd)
{
__u32 key, next_key;
- char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
+ char val_buf[PERF_MAX_STACK_DEPTH *
+ sizeof(struct bpf_stack_build_id)];
int err;
err = bpf_map_get_next_key(map1_fd, NULL, &key);
@@ -964,6 +965,166 @@ out:
return;
}
+static int extract_build_id(char *build_id, size_t size)
+{
+ FILE *fp;
+ char *line = NULL;
+ size_t len = 0;
+
+ fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+ if (fp == NULL)
+ return -1;
+
+ if (getline(&line, &len, fp) == -1)
+ goto err;
+ fclose(fp);
+
+ if (len > size)
+ len = size;
+ memcpy(build_id, line, len);
+ build_id[len] = '\0';
+ return 0;
+err:
+ fclose(fp);
+ return -1;
+}
+
+static void test_stacktrace_build_id(void)
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd;
+ const char *file = "./test_stacktrace_build_id.o";
+ int bytes, efd, err, pmu_fd, prog_fd;
+ struct perf_event_attr attr = {};
+ __u32 key, previous_key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+ int i, j;
+ struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+ int build_id_matches = 0;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* Get the ID for the sched/sched_switch tracepoint */
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ "read", "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ /* Open the perf event and attach bpf progrram */
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+ == 0);
+ assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = extract_build_id(buf, 256);
+
+ if (CHECK(err, "get build_id with readelf",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ if (CHECK(err, "get_next_key from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+
+ do {
+ char build_id[64];
+
+ err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ if (CHECK(err, "lookup_elem from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+ for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+ if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+ id_offs[i].offset != 0) {
+ for (j = 0; j < 20; ++j)
+ sprintf(build_id + 2 * j, "%02x",
+ id_offs[i].build_id[j] & 0xff);
+ if (strstr(buf, build_id) != NULL)
+ build_id_matches = 1;
+ }
+ previous_key = key;
+ } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+ CHECK(build_id_matches < 1, "build id match",
+ "Didn't find expected build ID from the map");
+
+disable_pmu:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+ close(pmu_fd);
+
+close_prog:
+ bpf_object__close(obj);
+
+out:
+ return;
+}
+
int main(void)
{
test_pkt_access();
@@ -976,6 +1137,7 @@ int main(void)
test_obj_name();
test_tp_attach_query();
test_stacktrace_map();
+ test_stacktrace_build_id();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644
index 000000000000..b755bd783ce5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH,
+ .max_entries = 128,
+ .map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+ unsigned long long pad;
+ int got_bits;
+ int pool_left;
+ int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+ __u32 key = 0, val = 0, *value_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+ if ((int)key >= 0)
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644
index 000000000000..4acfdebf36fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+int main(void)
+{
+ int fd = open("/dev/urandom", O_RDONLY);
+ int i;
+ char buf[BUF_SIZE];
+
+ if (fd < 0)
+ return 1;
+ for (i = 0; i < 4; ++i)
+ read(fd, buf, BUF_SIZE);
+
+ close(fd);
+ return 0;
+}