diff options
Diffstat (limited to 'samples/bpf')
-rw-r--r-- | samples/bpf/Makefile | 21 | ||||
-rw-r--r-- | samples/bpf/cpustat_kern.c | 36 | ||||
-rw-r--r-- | samples/bpf/cpustat_user.c | 47 | ||||
-rw-r--r-- | samples/bpf/lathist_kern.c | 24 | ||||
-rw-r--r-- | samples/bpf/lathist_user.c | 42 | ||||
-rw-r--r-- | samples/bpf/offwaketime_kern.c | 52 | ||||
-rw-r--r-- | samples/bpf/offwaketime_user.c | 66 | ||||
-rw-r--r-- | samples/bpf/spintest_kern.c | 36 | ||||
-rw-r--r-- | samples/bpf/spintest_user.c | 68 | ||||
-rw-r--r-- | samples/bpf/syscall_tp_kern.c | 24 | ||||
-rw-r--r-- | samples/bpf/syscall_tp_user.c | 54 | ||||
-rw-r--r-- | samples/bpf/task_fd_query_kern.c | 2 | ||||
-rw-r--r-- | samples/bpf/task_fd_query_user.c | 2 | ||||
-rw-r--r-- | samples/bpf/test_current_task_under_cgroup_kern.c | 27 | ||||
-rw-r--r-- | samples/bpf/test_current_task_under_cgroup_user.c | 52 | ||||
-rw-r--r-- | samples/bpf/test_probe_write_user_kern.c | 12 | ||||
-rw-r--r-- | samples/bpf/test_probe_write_user_user.c | 49 | ||||
-rw-r--r-- | samples/bpf/trace_output_kern.c | 15 | ||||
-rw-r--r-- | samples/bpf/trace_output_user.c | 55 | ||||
-rw-r--r-- | samples/bpf/tracex3_kern.c | 2 | ||||
-rw-r--r-- | samples/bpf/xdpsock_user.c | 32 | ||||
-rw-r--r-- | samples/bpf/xsk_fwd.c | 1085 |
22 files changed, 1575 insertions, 228 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f87ee02073ba..4f1ed0e3cf9f 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -48,6 +48,7 @@ tprogs-y += syscall_tp tprogs-y += cpustat tprogs-y += xdp_adjust_tail tprogs-y += xdpsock +tprogs-y += xsk_fwd tprogs-y += xdp_fwd tprogs-y += task_fd_query tprogs-y += xdp_sample_pkts @@ -71,12 +72,12 @@ tracex4-objs := tracex4_user.o tracex5-objs := tracex5_user.o $(TRACE_HELPERS) tracex6-objs := tracex6_user.o tracex7-objs := tracex7_user.o -test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o -trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS) -lathist-objs := bpf_load.o lathist_user.o -offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS) -spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS) -map_perf_test-objs := bpf_load.o map_perf_test_user.o +test_probe_write_user-objs := test_probe_write_user_user.o +trace_output-objs := trace_output_user.o $(TRACE_HELPERS) +lathist-objs := lathist_user.o +offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) +spintest-objs := spintest_user.o $(TRACE_HELPERS) +map_perf_test-objs := map_perf_test_user.o test_overhead-objs := bpf_load.o test_overhead_user.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_attach-objs := test_cgrp2_attach.o @@ -86,7 +87,7 @@ xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o xdp_router_ipv4-objs := xdp_router_ipv4_user.o -test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \ +test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := trace_event_user.o $(TRACE_HELPERS) sampleip-objs := sampleip_user.o $(TRACE_HELPERS) @@ -100,10 +101,11 @@ xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o -syscall_tp-objs := bpf_load.o syscall_tp_user.o -cpustat-objs := bpf_load.o cpustat_user.o +syscall_tp-objs := syscall_tp_user.o +cpustat-objs := cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := xdpsock_user.o +xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) @@ -203,6 +205,7 @@ TPROGLDLIBS_trace_output += -lrt TPROGLDLIBS_map_perf_test += -lrt TPROGLDLIBS_test_overhead += -lrt TPROGLDLIBS_xdpsock += -pthread +TPROGLDLIBS_xsk_fwd += -pthread # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c index a86a19d5f033..5aefd19cdfa1 100644 --- a/samples/bpf/cpustat_kern.c +++ b/samples/bpf/cpustat_kern.c @@ -51,28 +51,28 @@ static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 }; #define MAP_OFF_PSTATE_IDX 3 #define MAP_OFF_NUM 4 -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = MAX_CPU * MAP_OFF_NUM, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAP_OFF_NUM); +} my_map SEC(".maps"); /* cstate_duration records duration time for every idle state per CPU */ -struct bpf_map_def SEC("maps") cstate_duration = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES); +} cstate_duration SEC(".maps"); /* pstate_duration records duration time for every operating point per CPU */ -struct bpf_map_def SEC("maps") pstate_duration = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES); +} pstate_duration SEC(".maps"); /* * The trace events for cpu_idle and cpu_frequency are taken from: diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c index 869a99406dbf..96675985e9e0 100644 --- a/samples/bpf/cpustat_user.c +++ b/samples/bpf/cpustat_user.c @@ -9,7 +9,6 @@ #include <string.h> #include <unistd.h> #include <fcntl.h> -#include <linux/bpf.h> #include <locale.h> #include <sys/types.h> #include <sys/stat.h> @@ -18,7 +17,9 @@ #include <sys/wait.h> #include <bpf/bpf.h> -#include "bpf_load.h" +#include <bpf/libbpf.h> + +static int cstate_map_fd, pstate_map_fd; #define MAX_CPU 8 #define MAX_PSTATE_ENTRIES 5 @@ -181,21 +182,50 @@ static void int_exit(int sig) { cpu_stat_inject_cpu_idle_event(); cpu_stat_inject_cpu_frequency_event(); - cpu_stat_update(map_fd[1], map_fd[2]); + cpu_stat_update(cstate_map_fd, pstate_map_fd); cpu_stat_print(); exit(0); } int main(int argc, char **argv) { + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; int ret; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (!prog) { + printf("finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration"); + pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration"); + if (cstate_map_fd < 0 || pstate_map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } ret = cpu_stat_inject_cpu_idle_event(); @@ -210,10 +240,13 @@ int main(int argc, char **argv) signal(SIGTERM, int_exit); while (1) { - cpu_stat_update(map_fd[1], map_fd[2]); + cpu_stat_update(cstate_map_fd, pstate_map_fd); cpu_stat_print(); sleep(5); } +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c index ca9c2e4e69aa..4adfcbbe6ef4 100644 --- a/samples/bpf/lathist_kern.c +++ b/samples/bpf/lathist_kern.c @@ -18,12 +18,12 @@ * trace_preempt_[on|off] tracepoints hooks is not supported. */ -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u64), - .max_entries = MAX_CPU, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, u64); + __uint(max_entries, MAX_CPU); +} my_map SEC(".maps"); SEC("kprobe/trace_preempt_off") int bpf_prog1(struct pt_regs *ctx) @@ -61,12 +61,12 @@ static unsigned int log2l(unsigned long v) return log2(v); } -struct bpf_map_def SEC("maps") my_lat = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(long), - .max_entries = MAX_CPU * MAX_ENTRIES, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, long); + __uint(max_entries, MAX_CPU * MAX_ENTRIES); +} my_lat SEC(".maps"); SEC("kprobe/trace_preempt_on") int bpf_prog2(struct pt_regs *ctx) diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c index 2ff2839a52d5..7d8ff2418303 100644 --- a/samples/bpf/lathist_user.c +++ b/samples/bpf/lathist_user.c @@ -6,9 +6,8 @@ #include <unistd.h> #include <stdlib.h> #include <signal.h> -#include <linux/bpf.h> +#include <bpf/libbpf.h> #include <bpf/bpf.h> -#include "bpf_load.h" #define MAX_ENTRIES 20 #define MAX_CPU 4 @@ -81,20 +80,51 @@ static void get_data(int fd) int main(int argc, char **argv) { + struct bpf_link *links[2]; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; + int map_fd, i = 0; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + links[i] = bpf_program__attach(prog); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[i] = NULL; + goto cleanup; + } + i++; } while (1) { - get_data(map_fd[1]); + get_data(map_fd); print_hist(); sleep(5); } +cleanup: + for (i--; i >= 0; i--) + bpf_link__destroy(links[i]); + + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index e74ee1cd4b9c..14b792915a9c 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -28,38 +28,38 @@ struct key_t { u32 tret; }; -struct bpf_map_def SEC("maps") counts = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct key_t), - .value_size = sizeof(u64), - .max_entries = 10000, -}; - -struct bpf_map_def SEC("maps") start = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = 10000, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct key_t); + __type(value, u64); + __uint(max_entries, 10000); +} counts SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, 10000); +} start SEC(".maps"); struct wokeby_t { char name[TASK_COMM_LEN]; u32 ret; }; -struct bpf_map_def SEC("maps") wokeby = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(struct wokeby_t), - .max_entries = 10000, -}; - -struct bpf_map_def SEC("maps") stackmap = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .key_size = sizeof(u32), - .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), - .max_entries = 10000, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, struct wokeby_t); + __uint(max_entries, 10000); +} wokeby SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(u32)); + __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); + __uint(max_entries, 10000); +} stackmap SEC(".maps"); #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 51c7da5341cc..5734cfdaaacb 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -5,19 +5,19 @@ #include <unistd.h> #include <stdlib.h> #include <signal.h> -#include <linux/bpf.h> -#include <string.h> #include <linux/perf_event.h> #include <errno.h> -#include <assert.h> #include <stdbool.h> #include <sys/resource.h> #include <bpf/libbpf.h> -#include "bpf_load.h" +#include <bpf/bpf.h> #include "trace_helpers.h" #define PRINT_RAW_ADDR 0 +/* counts, stackmap */ +static int map_fd[2]; + static void print_ksym(__u64 addr) { struct ksym *sym; @@ -52,14 +52,14 @@ static void print_stack(struct key_t *key, __u64 count) int i; printf("%s;", key->target); - if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_ksym(ip[i]); } printf("-;"); - if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) { printf("---;"); } else { for (i = 0; i < PERF_MAX_STACK_DEPTH; i++) @@ -96,23 +96,54 @@ static void int_exit(int sig) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj = NULL; + struct bpf_link *links[2]; + struct bpf_program *prog; + int delay = 1, i = 0; char filename[256]; - int delay = 1; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + obj = NULL; + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap"); + if (map_fd[0] < 0 || map_fd[1] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + bpf_object__for_each_program(prog, obj) { + links[i] = bpf_program__attach(prog); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[i] = NULL; + goto cleanup; + } + i++; } if (argc > 1) @@ -120,5 +151,10 @@ int main(int argc, char **argv) sleep(delay); print_stacks(map_fd[0]); +cleanup: + for (i--; i >= 0; i--) + bpf_link__destroy(links[i]); + + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c index f508af357251..455da77319d9 100644 --- a/samples/bpf/spintest_kern.c +++ b/samples/bpf/spintest_kern.c @@ -12,25 +12,25 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(long), - .value_size = sizeof(long), - .max_entries = 1024, -}; -struct bpf_map_def SEC("maps") my_map2 = { - .type = BPF_MAP_TYPE_PERCPU_HASH, - .key_size = sizeof(long), - .value_size = sizeof(long), - .max_entries = 1024, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, long); + __type(value, long); + __uint(max_entries, 1024); +} my_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(long)); + __uint(value_size, sizeof(long)); + __uint(max_entries, 1024); +} my_map2 SEC(".maps"); -struct bpf_map_def SEC("maps") stackmap = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .key_size = sizeof(u32), - .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), - .max_entries = 10000, -}; +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(u32)); + __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); + __uint(max_entries, 10000); +} stackmap SEC(".maps"); #define PROG(foo) \ int foo(struct pt_regs *ctx) \ diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index fb430ea2ef51..847da9284fa8 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -1,40 +1,77 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <unistd.h> -#include <linux/bpf.h> #include <string.h> #include <assert.h> #include <sys/resource.h> #include <bpf/libbpf.h> -#include "bpf_load.h" +#include <bpf/bpf.h> #include "trace_helpers.h" int main(int ac, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256], symbol[256]; + struct bpf_object *obj = NULL; + struct bpf_link *links[20]; long key, next_key, value; - char filename[256]; + struct bpf_program *prog; + int map_fd, i, j = 0; + const char *title; struct ksym *sym; - int i; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + obj = NULL; + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd = bpf_object__find_map_fd_by_name(obj, "my_map"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + title = bpf_program__title(prog, false); + if (sscanf(title, "kprobe/%s", symbol) != 1) + continue; + + /* Attach prog only when symbol exists */ + if (ksym_get_addr(symbol)) { + links[j] = bpf_program__attach(prog); + if (libbpf_get_error(links[j])) { + fprintf(stderr, "bpf_program__attach failed\n"); + links[j] = NULL; + goto cleanup; + } + j++; + } } for (i = 0; i < 5; i++) { key = 0; printf("kprobing funcs:"); - while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_map_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd, &next_key, &value); assert(next_key == value); sym = ksym_search(value); key = next_key; @@ -48,10 +85,15 @@ int main(int ac, char **argv) if (key) printf("\n"); key = 0; - while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) - bpf_map_delete_elem(map_fd[0], &next_key); + while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) + bpf_map_delete_elem(map_fd, &next_key); sleep(1); } +cleanup: + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c index 5a62b03b1f88..50231c2eff9c 100644 --- a/samples/bpf/syscall_tp_kern.c +++ b/samples/bpf/syscall_tp_kern.c @@ -18,19 +18,19 @@ struct syscalls_exit_open_args { long ret; }; -struct bpf_map_def SEC("maps") enter_open_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u32), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} enter_open_map SEC(".maps"); -struct bpf_map_def SEC("maps") exit_open_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u32), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} exit_open_map SEC(".maps"); static __always_inline void count(void *map) { diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 57014bab7cbe..76a1d00128fb 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -5,16 +5,12 @@ #include <unistd.h> #include <fcntl.h> #include <stdlib.h> -#include <signal.h> -#include <linux/bpf.h> #include <string.h> #include <linux/perf_event.h> #include <errno.h> -#include <assert.h> -#include <stdbool.h> #include <sys/resource.h> +#include <bpf/libbpf.h> #include <bpf/bpf.h> -#include "bpf_load.h" /* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*. * This requires kernel CONFIG_FTRACE_SYSCALLS to be set. @@ -49,16 +45,44 @@ static void verify_map(int map_id) static int test(char *filename, int num_progs) { - int i, fd, map0_fds[num_progs], map1_fds[num_progs]; + int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0; + struct bpf_link *links[num_progs * 4]; + struct bpf_object *objs[num_progs]; + struct bpf_program *prog; for (i = 0; i < num_progs; i++) { - if (load_bpf_file(filename)) { - fprintf(stderr, "%s", bpf_log_buf); - return 1; + objs[i] = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(objs[i])) { + fprintf(stderr, "opening BPF object file failed\n"); + objs[i] = NULL; + goto cleanup; } - printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]); - map0_fds[i] = map_fd[0]; - map1_fds[i] = map_fd[1]; + + /* load BPF program */ + if (bpf_object__load(objs[i])) { + fprintf(stderr, "loading BPF object file failed\n"); + goto cleanup; + } + + map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i], + "enter_open_map"); + map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i], + "exit_open_map"); + if (map0_fds[i] < 0 || map1_fds[i] < 0) { + fprintf(stderr, "finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, objs[i]) { + links[j] = bpf_program__attach(prog); + if (libbpf_get_error(links[j])) { + fprintf(stderr, "bpf_program__attach failed\n"); + links[j] = NULL; + goto cleanup; + } + j++; + } + printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]); } /* current load_bpf_file has perf_event_open default pid = -1 @@ -80,6 +104,12 @@ static int test(char *filename, int num_progs) verify_map(map1_fds[i]); } +cleanup: + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + for (i--; i >= 0; i--) + bpf_object__close(objs[i]); return 0; } diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c index 278ade5427c8..c821294e1774 100644 --- a/samples/bpf/task_fd_query_kern.c +++ b/samples/bpf/task_fd_query_kern.c @@ -10,7 +10,7 @@ int bpf_prog1(struct pt_regs *ctx) return 0; } -SEC("kretprobe/blk_account_io_completion") +SEC("kretprobe/blk_account_io_done") int bpf_prog2(struct pt_regs *ctx) { return 0; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index ff2e9c1c7266..4a74531dc403 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -314,7 +314,7 @@ int main(int argc, char **argv) /* test two functions in the corresponding *_kern.c file */ CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request", BPF_FD_TYPE_KPROBE)); - CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion", + CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_done", BPF_FD_TYPE_KRETPROBE)); /* test nondebug fs kprobe */ diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c index 6dc4f41bb6cb..fbd43e2bb4d3 100644 --- a/samples/bpf/test_current_task_under_cgroup_kern.c +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -10,23 +10,24 @@ #include <linux/version.h> #include <bpf/bpf_helpers.h> #include <uapi/linux/utsname.h> +#include "trace_common.h" -struct bpf_map_def SEC("maps") cgroup_map = { - .type = BPF_MAP_TYPE_CGROUP_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u32), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); + __uint(max_entries, 1); +} cgroup_map SEC(".maps"); -struct bpf_map_def SEC("maps") perf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, 1); +} perf_map SEC(".maps"); /* Writes the last PID that called sync to a map at index 0 */ -SEC("kprobe/sys_sync") +SEC("kprobe/" SYSCALL(sys_sync)) int bpf_prog1(struct pt_regs *ctx) { u64 pid = bpf_get_current_pid_tgid(); diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c index 06e9f8ce42e2..ac251a417f45 100644 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -4,10 +4,9 @@ #define _GNU_SOURCE #include <stdio.h> -#include <linux/bpf.h> #include <unistd.h> #include <bpf/bpf.h> -#include "bpf_load.h" +#include <bpf/libbpf.h> #include "cgroup_helpers.h" #define CGROUP_PATH "/my-cgroup" @@ -15,13 +14,44 @@ int main(int argc, char **argv) { pid_t remote_pid, local_pid = getpid(); - int cg2, idx = 0, rc = 0; + struct bpf_link *link = NULL; + struct bpf_program *prog; + int cg2, idx = 0, rc = 1; + struct bpf_object *obj; char filename[256]; + int map_fd[2]; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (!prog) { + printf("finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map"); + if (map_fd[0] < 0 || map_fd[1] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } if (setup_cgroup_environment()) @@ -70,12 +100,14 @@ int main(int argc, char **argv) goto err; } - goto out; -err: - rc = 1; + rc = 0; -out: +err: close(cg2); cleanup_cgroup_environment(); + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return rc; } diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c index fd651a65281e..220a96438d75 100644 --- a/samples/bpf/test_probe_write_user_kern.c +++ b/samples/bpf/test_probe_write_user_kern.c @@ -13,12 +13,12 @@ #include <bpf/bpf_core_read.h> #include "trace_common.h" -struct bpf_map_def SEC("maps") dnat_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct sockaddr_in), - .value_size = sizeof(struct sockaddr_in), - .max_entries = 256, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sockaddr_in); + __type(value, struct sockaddr_in); + __uint(max_entries, 256); +} dnat_map SEC(".maps"); /* kprobe is NOT a stable ABI * kernel functions can be removed, renamed or completely change semantics. diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index 045eb5e30f54..00ccfb834e45 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -1,21 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <assert.h> -#include <linux/bpf.h> #include <unistd.h> #include <bpf/bpf.h> -#include "bpf_load.h" +#include <bpf/libbpf.h> #include <sys/socket.h> -#include <string.h> #include <netinet/in.h> #include <arpa/inet.h> int main(int ac, char **argv) { - int serverfd, serverconnfd, clientfd; - socklen_t sockaddr_len; - struct sockaddr serv_addr, mapped_addr, tmp_addr; struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in; + struct sockaddr serv_addr, mapped_addr, tmp_addr; + int serverfd, serverconnfd, clientfd, map_fd; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_object *obj; + socklen_t sockaddr_len; char filename[256]; char *ip; @@ -24,10 +25,35 @@ int main(int ac, char **argv) tmp_addr_in = (struct sockaddr_in *)&tmp_addr; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (libbpf_get_error(prog)) { + fprintf(stderr, "ERROR: finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); @@ -51,7 +77,7 @@ int main(int ac, char **argv) mapped_addr_in->sin_port = htons(5555); mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); - assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); + assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY)); assert(listen(serverfd, 5) == 0); @@ -75,5 +101,8 @@ int main(int ac, char **argv) /* Is the server's getsockname = the socket getpeername */ assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0); +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c index 1d7d422cae6f..b64815af0943 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -2,15 +2,16 @@ #include <linux/version.h> #include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "trace_common.h" -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(u32)); + __uint(max_entries, 2); +} my_map SEC(".maps"); -SEC("kprobe/sys_write") +SEC("kprobe/" SYSCALL(sys_write)) int bpf_prog1(struct pt_regs *ctx) { struct S { diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 60a17dd05345..364b98764d54 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -1,23 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only #include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> #include <fcntl.h> #include <poll.h> -#include <linux/perf_event.h> -#include <linux/bpf.h> -#include <errno.h> -#include <assert.h> -#include <sys/syscall.h> -#include <sys/ioctl.h> -#include <sys/mman.h> #include <time.h> #include <signal.h> #include <bpf/libbpf.h> -#include "bpf_load.h" -#include "perf-sys.h" static __u64 time_get_ns(void) { @@ -57,20 +44,48 @@ static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size) int main(int argc, char **argv) { struct perf_buffer_opts pb_opts = {}; + struct bpf_link *link = NULL; + struct bpf_program *prog; struct perf_buffer *pb; + struct bpf_object *obj; + int map_fd, ret = 0; char filename[256]; FILE *f; - int ret; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd = bpf_object__find_map_fd_by_name(obj, "my_map"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (libbpf_get_error(prog)) { + fprintf(stderr, "ERROR: finding a prog in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } pb_opts.sample_cb = print_bpf_output; - pb = perf_buffer__new(map_fd[0], 8, &pb_opts); + pb = perf_buffer__new(map_fd, 8, &pb_opts); ret = libbpf_get_error(pb); if (ret) { printf("failed to setup perf_buffer: %d\n", ret); @@ -84,5 +99,9 @@ int main(int argc, char **argv) while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) { } kill(0, SIGINT); + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return ret; } diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index 659613c19a82..710a4410b2fb 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -49,7 +49,7 @@ struct { __uint(max_entries, SLOTS); } lat_map SEC(".maps"); -SEC("kprobe/blk_account_io_completion") +SEC("kprobe/blk_account_io_done") int bpf_prog2(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 19c679456a0e..4cead341ae57 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -613,7 +613,16 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) { struct xsk_umem_info *umem; struct xsk_umem_config cfg = { - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + /* We recommend that you set the fill ring size >= HW RX ring size + + * AF_XDP RX ring size. Make sure you fill up the fill ring + * with buffers at regular intervals, and you will with this setting + * avoid allocation failures in the driver. These are usually quite + * expensive since drivers have not been written to assume that + * allocation failures are common. For regular sockets, kernel + * allocated memory is used that only runs out in OOM situations + * that should be rare. + */ + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = opt_xsk_frame_size, .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, @@ -640,13 +649,13 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem) u32 idx; ret = xsk_ring_prod__reserve(&umem->fq, - XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); - if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) + XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx); + if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2) exit_with_error(-ret); - for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++) *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * opt_xsk_frame_size; - xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); + xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2); } static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, @@ -888,9 +897,6 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, if (!xsk->outstanding_tx) return; - if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) - kick_tx(xsk); - ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : xsk->outstanding_tx; @@ -1004,7 +1010,7 @@ static void rx_drop_all(void) } } -static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size) +static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) { u32 idx; unsigned int i; @@ -1017,14 +1023,14 @@ static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size) for (i = 0; i < batch_size; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; tx_desc->len = PKT_SIZE; } xsk_ring_prod__submit(&xsk->tx, batch_size); xsk->outstanding_tx += batch_size; - frame_nb += batch_size; - frame_nb %= NUM_FRAMES; + *frame_nb += batch_size; + *frame_nb %= NUM_FRAMES; complete_tx_only(xsk, batch_size); } @@ -1080,7 +1086,7 @@ static void tx_only_all(void) } for (i = 0; i < num_socks; i++) - tx_only(xsks[i], frame_nb[i], batch_size); + tx_only(xsks[i], &frame_nb[i], batch_size); pkt_cnt += batch_size; diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c new file mode 100644 index 000000000000..1cd97c84c337 --- /dev/null +++ b/samples/bpf/xsk_fwd.c @@ -0,0 +1,1085 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. */ + +#define _GNU_SOURCE +#include <poll.h> +#include <pthread.h> +#include <signal.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <getopt.h> +#include <netinet/ether.h> +#include <net/if.h> + +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <linux/if_xdp.h> + +#include <bpf/libbpf.h> +#include <bpf/xsk.h> +#include <bpf/bpf.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef __u64 u64; +typedef __u32 u32; +typedef __u16 u16; +typedef __u8 u8; + +/* This program illustrates the packet forwarding between multiple AF_XDP + * sockets in multi-threaded environment. All threads are sharing a common + * buffer pool, with each socket having its own private buffer cache. + * + * Example 1: Single thread handling two sockets. The packets received by socket + * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue + * QB), while the packets received by socket B are forwarded to socket A. The + * thread is running on CPU core X: + * + * ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X + * + * Example 2: Two threads, each handling two sockets. The thread running on CPU + * core X forwards all the packets received by socket A to socket B, and all the + * packets received by socket B to socket A. The thread running on CPU core Y is + * performing the same packet forwarding between sockets C and D: + * + * ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD + * -c CX -c CY + */ + +/* + * Buffer pool and buffer cache + * + * For packet forwarding, the packet buffers are typically allocated from the + * pool for packet reception and freed back to the pool for further reuse once + * the packet transmission is completed. + * + * The buffer pool is shared between multiple threads. In order to minimize the + * access latency to the shared buffer pool, each thread creates one (or + * several) buffer caches, which, unlike the buffer pool, are private to the + * thread that creates them and therefore cannot be shared with other threads. + * The access to the shared pool is only needed either (A) when the cache gets + * empty due to repeated buffer allocations and it needs to be replenished from + * the pool, or (B) when the cache gets full due to repeated buffer free and it + * needs to be flushed back to the pull. + * + * In a packet forwarding system, a packet received on any input port can + * potentially be transmitted on any output port, depending on the forwarding + * configuration. For AF_XDP sockets, for this to work with zero-copy of the + * packet buffers when, it is required that the buffer pool memory fits into the + * UMEM area shared by all the sockets. + */ + +struct bpool_params { + u32 n_buffers; + u32 buffer_size; + int mmap_flags; + + u32 n_users_max; + u32 n_buffers_per_slab; +}; + +/* This buffer pool implementation organizes the buffers into equally sized + * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the + * pool that are completely filled with buffer pointers (full slabs). + * + * Each buffer cache has a slab for buffer allocation and a slab for buffer + * free, with both of these slabs initially empty. When the cache's allocation + * slab goes empty, it is swapped with one of the available full slabs from the + * pool, if any is available. When the cache's free slab goes full, it is + * swapped for one of the empty slabs from the pool, which is guaranteed to + * succeed. + * + * Partially filled slabs never get traded between the cache and the pool + * (except when the cache itself is destroyed), which enables fast operation + * through pointer swapping. + */ +struct bpool { + struct bpool_params params; + pthread_mutex_t lock; + void *addr; + + u64 **slabs; + u64 **slabs_reserved; + u64 *buffers; + u64 *buffers_reserved; + + u64 n_slabs; + u64 n_slabs_reserved; + u64 n_buffers; + + u64 n_slabs_available; + u64 n_slabs_reserved_available; + + struct xsk_umem_config umem_cfg; + struct xsk_ring_prod umem_fq; + struct xsk_ring_cons umem_cq; + struct xsk_umem *umem; +}; + +static struct bpool * +bpool_init(struct bpool_params *params, + struct xsk_umem_config *umem_cfg) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved; + u64 slabs_size, slabs_reserved_size; + u64 buffers_size, buffers_reserved_size; + u64 total_size, i; + struct bpool *bp; + u8 *p; + int status; + + /* mmap prep. */ + if (setrlimit(RLIMIT_MEMLOCK, &r)) + return NULL; + + /* bpool internals dimensioning. */ + n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) / + params->n_buffers_per_slab; + n_slabs_reserved = params->n_users_max * 2; + n_buffers = n_slabs * params->n_buffers_per_slab; + n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab; + + slabs_size = n_slabs * sizeof(u64 *); + slabs_reserved_size = n_slabs_reserved * sizeof(u64 *); + buffers_size = n_buffers * sizeof(u64); + buffers_reserved_size = n_buffers_reserved * sizeof(u64); + + total_size = sizeof(struct bpool) + + slabs_size + slabs_reserved_size + + buffers_size + buffers_reserved_size; + + /* bpool memory allocation. */ + p = calloc(total_size, sizeof(u8)); + if (!p) + return NULL; + + /* bpool memory initialization. */ + bp = (struct bpool *)p; + memcpy(&bp->params, params, sizeof(*params)); + bp->params.n_buffers = n_buffers; + + bp->slabs = (u64 **)&p[sizeof(struct bpool)]; + bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) + + slabs_size]; + bp->buffers = (u64 *)&p[sizeof(struct bpool) + + slabs_size + slabs_reserved_size]; + bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) + + slabs_size + slabs_reserved_size + buffers_size]; + + bp->n_slabs = n_slabs; + bp->n_slabs_reserved = n_slabs_reserved; + bp->n_buffers = n_buffers; + + for (i = 0; i < n_slabs; i++) + bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab]; + bp->n_slabs_available = n_slabs; + + for (i = 0; i < n_slabs_reserved; i++) + bp->slabs_reserved[i] = &bp->buffers_reserved[i * + params->n_buffers_per_slab]; + bp->n_slabs_reserved_available = n_slabs_reserved; + + for (i = 0; i < n_buffers; i++) + bp->buffers[i] = i * params->buffer_size; + + /* lock. */ + status = pthread_mutex_init(&bp->lock, NULL); + if (status) { + free(p); + return NULL; + } + + /* mmap. */ + bp->addr = mmap(NULL, + n_buffers * params->buffer_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags, + -1, + 0); + if (bp->addr == MAP_FAILED) { + pthread_mutex_destroy(&bp->lock); + free(p); + return NULL; + } + + /* umem. */ + status = xsk_umem__create(&bp->umem, + bp->addr, + bp->params.n_buffers * bp->params.buffer_size, + &bp->umem_fq, + &bp->umem_cq, + umem_cfg); + if (status) { + munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size); + pthread_mutex_destroy(&bp->lock); + free(p); + return NULL; + } + memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg)); + + return bp; +} + +static void +bpool_free(struct bpool *bp) +{ + if (!bp) + return; + + xsk_umem__delete(bp->umem); + munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size); + pthread_mutex_destroy(&bp->lock); + free(bp); +} + +struct bcache { + struct bpool *bp; + + u64 *slab_cons; + u64 *slab_prod; + + u64 n_buffers_cons; + u64 n_buffers_prod; +}; + +static u32 +bcache_slab_size(struct bcache *bc) +{ + struct bpool *bp = bc->bp; + + return bp->params.n_buffers_per_slab; +} + +static struct bcache * +bcache_init(struct bpool *bp) +{ + struct bcache *bc; + + bc = calloc(1, sizeof(struct bcache)); + if (!bc) + return NULL; + + bc->bp = bp; + bc->n_buffers_cons = 0; + bc->n_buffers_prod = 0; + + pthread_mutex_lock(&bp->lock); + if (bp->n_slabs_reserved_available == 0) { + pthread_mutex_unlock(&bp->lock); + free(bc); + return NULL; + } + + bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1]; + bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2]; + bp->n_slabs_reserved_available -= 2; + pthread_mutex_unlock(&bp->lock); + + return bc; +} + +static void +bcache_free(struct bcache *bc) +{ + struct bpool *bp; + + if (!bc) + return; + + /* In order to keep this example simple, the case of freeing any + * existing buffers from the cache back to the pool is ignored. + */ + + bp = bc->bp; + pthread_mutex_lock(&bp->lock); + bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod; + bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons; + bp->n_slabs_reserved_available += 2; + pthread_mutex_unlock(&bp->lock); + + free(bc); +} + +/* To work correctly, the implementation requires that the *n_buffers* input + * argument is never greater than the buffer pool's *n_buffers_per_slab*. This + * is typically the case, with one exception taking place when large number of + * buffers are allocated at init time (e.g. for the UMEM fill queue setup). + */ +static inline u32 +bcache_cons_check(struct bcache *bc, u32 n_buffers) +{ + struct bpool *bp = bc->bp; + u64 n_buffers_per_slab = bp->params.n_buffers_per_slab; + u64 n_buffers_cons = bc->n_buffers_cons; + u64 n_slabs_available; + u64 *slab_full; + + /* + * Consumer slab is not empty: Use what's available locally. Do not + * look for more buffers from the pool when the ask can only be + * partially satisfied. + */ + if (n_buffers_cons) + return (n_buffers_cons < n_buffers) ? + n_buffers_cons : + n_buffers; + + /* + * Consumer slab is empty: look to trade the current consumer slab + * (full) for a full slab from the pool, if any is available. + */ + pthread_mutex_lock(&bp->lock); + n_slabs_available = bp->n_slabs_available; + if (!n_slabs_available) { + pthread_mutex_unlock(&bp->lock); + return 0; + } + + n_slabs_available--; + slab_full = bp->slabs[n_slabs_available]; + bp->slabs[n_slabs_available] = bc->slab_cons; + bp->n_slabs_available = n_slabs_available; + pthread_mutex_unlock(&bp->lock); + + bc->slab_cons = slab_full; + bc->n_buffers_cons = n_buffers_per_slab; + return n_buffers; +} + +static inline u64 +bcache_cons(struct bcache *bc) +{ + u64 n_buffers_cons = bc->n_buffers_cons - 1; + u64 buffer; + + buffer = bc->slab_cons[n_buffers_cons]; + bc->n_buffers_cons = n_buffers_cons; + return buffer; +} + +static inline void +bcache_prod(struct bcache *bc, u64 buffer) +{ + struct bpool *bp = bc->bp; + u64 n_buffers_per_slab = bp->params.n_buffers_per_slab; + u64 n_buffers_prod = bc->n_buffers_prod; + u64 n_slabs_available; + u64 *slab_empty; + + /* + * Producer slab is not yet full: store the current buffer to it. + */ + if (n_buffers_prod < n_buffers_per_slab) { + bc->slab_prod[n_buffers_prod] = buffer; + bc->n_buffers_prod = n_buffers_prod + 1; + return; + } + + /* + * Producer slab is full: trade the cache's current producer slab + * (full) for an empty slab from the pool, then store the current + * buffer to the new producer slab. As one full slab exists in the + * cache, it is guaranteed that there is at least one empty slab + * available in the pool. + */ + pthread_mutex_lock(&bp->lock); + n_slabs_available = bp->n_slabs_available; + slab_empty = bp->slabs[n_slabs_available]; + bp->slabs[n_slabs_available] = bc->slab_prod; + bp->n_slabs_available = n_slabs_available + 1; + pthread_mutex_unlock(&bp->lock); + + slab_empty[0] = buffer; + bc->slab_prod = slab_empty; + bc->n_buffers_prod = 1; +} + +/* + * Port + * + * Each of the forwarding ports sits on top of an AF_XDP socket. In order for + * packet forwarding to happen with no packet buffer copy, all the sockets need + * to share the same UMEM area, which is used as the buffer pool memory. + */ +#ifndef MAX_BURST_RX +#define MAX_BURST_RX 64 +#endif + +#ifndef MAX_BURST_TX +#define MAX_BURST_TX 64 +#endif + +struct burst_rx { + u64 addr[MAX_BURST_RX]; + u32 len[MAX_BURST_RX]; +}; + +struct burst_tx { + u64 addr[MAX_BURST_TX]; + u32 len[MAX_BURST_TX]; + u32 n_pkts; +}; + +struct port_params { + struct xsk_socket_config xsk_cfg; + struct bpool *bp; + const char *iface; + u32 iface_queue; +}; + +struct port { + struct port_params params; + + struct bcache *bc; + + struct xsk_ring_cons rxq; + struct xsk_ring_prod txq; + struct xsk_ring_prod umem_fq; + struct xsk_ring_cons umem_cq; + struct xsk_socket *xsk; + int umem_fq_initialized; + + u64 n_pkts_rx; + u64 n_pkts_tx; +}; + +static void +port_free(struct port *p) +{ + if (!p) + return; + + /* To keep this example simple, the code to free the buffers from the + * socket's receive and transmit queues, as well as from the UMEM fill + * and completion queues, is not included. + */ + + if (p->xsk) + xsk_socket__delete(p->xsk); + + bcache_free(p->bc); + + free(p); +} + +static struct port * +port_init(struct port_params *params) +{ + struct port *p; + u32 umem_fq_size, pos = 0; + int status, i; + + /* Memory allocation and initialization. */ + p = calloc(sizeof(struct port), 1); + if (!p) + return NULL; + + memcpy(&p->params, params, sizeof(p->params)); + umem_fq_size = params->bp->umem_cfg.fill_size; + + /* bcache. */ + p->bc = bcache_init(params->bp); + if (!p->bc || + (bcache_slab_size(p->bc) < umem_fq_size) || + (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) { + port_free(p); + return NULL; + } + + /* xsk socket. */ + status = xsk_socket__create_shared(&p->xsk, + params->iface, + params->iface_queue, + params->bp->umem, + &p->rxq, + &p->txq, + &p->umem_fq, + &p->umem_cq, + ¶ms->xsk_cfg); + if (status) { + port_free(p); + return NULL; + } + + /* umem fq. */ + xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos); + + for (i = 0; i < umem_fq_size; i++) + *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) = + bcache_cons(p->bc); + + xsk_ring_prod__submit(&p->umem_fq, umem_fq_size); + p->umem_fq_initialized = 1; + + return p; +} + +static inline u32 +port_rx_burst(struct port *p, struct burst_rx *b) +{ + u32 n_pkts, pos, i; + + /* Free buffers for FQ replenish. */ + n_pkts = ARRAY_SIZE(b->addr); + + n_pkts = bcache_cons_check(p->bc, n_pkts); + if (!n_pkts) + return 0; + + /* RXQ. */ + n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos); + if (!n_pkts) { + if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) { + struct pollfd pollfd = { + .fd = xsk_socket__fd(p->xsk), + .events = POLLIN, + }; + + poll(&pollfd, 1, 0); + } + return 0; + } + + for (i = 0; i < n_pkts; i++) { + b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr; + b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len; + } + + xsk_ring_cons__release(&p->rxq, n_pkts); + p->n_pkts_rx += n_pkts; + + /* UMEM FQ. */ + for ( ; ; ) { + int status; + + status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos); + if (status == n_pkts) + break; + + if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) { + struct pollfd pollfd = { + .fd = xsk_socket__fd(p->xsk), + .events = POLLIN, + }; + + poll(&pollfd, 1, 0); + } + } + + for (i = 0; i < n_pkts; i++) + *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) = + bcache_cons(p->bc); + + xsk_ring_prod__submit(&p->umem_fq, n_pkts); + + return n_pkts; +} + +static inline void +port_tx_burst(struct port *p, struct burst_tx *b) +{ + u32 n_pkts, pos, i; + int status; + + /* UMEM CQ. */ + n_pkts = p->params.bp->umem_cfg.comp_size; + + n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos); + + for (i = 0; i < n_pkts; i++) { + u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i); + + bcache_prod(p->bc, addr); + } + + xsk_ring_cons__release(&p->umem_cq, n_pkts); + + /* TXQ. */ + n_pkts = b->n_pkts; + + for ( ; ; ) { + status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos); + if (status == n_pkts) + break; + + if (xsk_ring_prod__needs_wakeup(&p->txq)) + sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, + NULL, 0); + } + + for (i = 0; i < n_pkts; i++) { + xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i]; + xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i]; + } + + xsk_ring_prod__submit(&p->txq, n_pkts); + if (xsk_ring_prod__needs_wakeup(&p->txq)) + sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + p->n_pkts_tx += n_pkts; +} + +/* + * Thread + * + * Packet forwarding threads. + */ +#ifndef MAX_PORTS_PER_THREAD +#define MAX_PORTS_PER_THREAD 16 +#endif + +struct thread_data { + struct port *ports_rx[MAX_PORTS_PER_THREAD]; + struct port *ports_tx[MAX_PORTS_PER_THREAD]; + u32 n_ports_rx; + struct burst_rx burst_rx; + struct burst_tx burst_tx[MAX_PORTS_PER_THREAD]; + u32 cpu_core_id; + int quit; +}; + +static void swap_mac_addresses(void *data) +{ + struct ether_header *eth = (struct ether_header *)data; + struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; + struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; + struct ether_addr tmp; + + tmp = *src_addr; + *src_addr = *dst_addr; + *dst_addr = tmp; +} + +static void * +thread_func(void *arg) +{ + struct thread_data *t = arg; + cpu_set_t cpu_cores; + u32 i; + + CPU_ZERO(&cpu_cores); + CPU_SET(t->cpu_core_id, &cpu_cores); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores); + + for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) { + struct port *port_rx = t->ports_rx[i]; + struct port *port_tx = t->ports_tx[i]; + struct burst_rx *brx = &t->burst_rx; + struct burst_tx *btx = &t->burst_tx[i]; + u32 n_pkts, j; + + /* RX. */ + n_pkts = port_rx_burst(port_rx, brx); + if (!n_pkts) + continue; + + /* Process & TX. */ + for (j = 0; j < n_pkts; j++) { + u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]); + u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr, + addr); + + swap_mac_addresses(pkt); + + btx->addr[btx->n_pkts] = brx->addr[j]; + btx->len[btx->n_pkts] = brx->len[j]; + btx->n_pkts++; + + if (btx->n_pkts == MAX_BURST_TX) { + port_tx_burst(port_tx, btx); + btx->n_pkts = 0; + } + } + } + + return NULL; +} + +/* + * Process + */ +static const struct bpool_params bpool_params_default = { + .n_buffers = 64 * 1024, + .buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .mmap_flags = 0, + + .n_users_max = 16, + .n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, +}; + +static const struct xsk_umem_config umem_cfg_default = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, + .flags = 0, +}; + +static const struct port_params port_params_default = { + .xsk_cfg = { + .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .libbpf_flags = 0, + .xdp_flags = XDP_FLAGS_DRV_MODE, + .bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY, + }, + + .bp = NULL, + .iface = NULL, + .iface_queue = 0, +}; + +#ifndef MAX_PORTS +#define MAX_PORTS 64 +#endif + +#ifndef MAX_THREADS +#define MAX_THREADS 64 +#endif + +static struct bpool_params bpool_params; +static struct xsk_umem_config umem_cfg; +static struct bpool *bp; + +static struct port_params port_params[MAX_PORTS]; +static struct port *ports[MAX_PORTS]; +static u64 n_pkts_rx[MAX_PORTS]; +static u64 n_pkts_tx[MAX_PORTS]; +static int n_ports; + +static pthread_t threads[MAX_THREADS]; +static struct thread_data thread_data[MAX_THREADS]; +static int n_threads; + +static void +print_usage(char *prog_name) +{ + const char *usage = + "Usage:\n" + "\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n" + "\n" + "-c CORE CPU core to run a packet forwarding thread\n" + " on. May be invoked multiple times.\n" + "\n" + "-b SIZE Number of buffers in the buffer pool shared\n" + " by all the forwarding threads. Default: %u.\n" + "\n" + "-i INTERFACE Network interface. Each (INTERFACE, QUEUE)\n" + " pair specifies one forwarding port. May be\n" + " invoked multiple times.\n" + "\n" + "-q QUEUE Network interface queue for RX and TX. Each\n" + " (INTERFACE, QUEUE) pair specified one\n" + " forwarding port. Default: %u. May be invoked\n" + " multiple times.\n" + "\n"; + printf(usage, + prog_name, + bpool_params_default.n_buffers, + port_params_default.iface_queue); +} + +static int +parse_args(int argc, char **argv) +{ + struct option lgopts[] = { + { NULL, 0, 0, 0 } + }; + int opt, option_index; + + /* Parse the input arguments. */ + for ( ; ;) { + opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index); + if (opt == EOF) + break; + + switch (opt) { + case 'b': + bpool_params.n_buffers = atoi(optarg); + break; + + case 'c': + if (n_threads == MAX_THREADS) { + printf("Max number of threads (%d) reached.\n", + MAX_THREADS); + return -1; + } + + thread_data[n_threads].cpu_core_id = atoi(optarg); + n_threads++; + break; + + case 'i': + if (n_ports == MAX_PORTS) { + printf("Max number of ports (%d) reached.\n", + MAX_PORTS); + return -1; + } + + port_params[n_ports].iface = optarg; + port_params[n_ports].iface_queue = 0; + n_ports++; + break; + + case 'q': + if (n_ports == 0) { + printf("No port specified for queue.\n"); + return -1; + } + port_params[n_ports - 1].iface_queue = atoi(optarg); + break; + + default: + printf("Illegal argument.\n"); + return -1; + } + } + + optind = 1; /* reset getopt lib */ + + /* Check the input arguments. */ + if (!n_ports) { + printf("No ports specified.\n"); + return -1; + } + + if (!n_threads) { + printf("No threads specified.\n"); + return -1; + } + + if (n_ports % n_threads) { + printf("Ports cannot be evenly distributed to threads.\n"); + return -1; + } + + return 0; +} + +static void +print_port(u32 port_id) +{ + struct port *port = ports[port_id]; + + printf("Port %u: interface = %s, queue = %u\n", + port_id, port->params.iface, port->params.iface_queue); +} + +static void +print_thread(u32 thread_id) +{ + struct thread_data *t = &thread_data[thread_id]; + u32 i; + + printf("Thread %u (CPU core %u): ", + thread_id, t->cpu_core_id); + + for (i = 0; i < t->n_ports_rx; i++) { + struct port *port_rx = t->ports_rx[i]; + struct port *port_tx = t->ports_tx[i]; + + printf("(%s, %u) -> (%s, %u), ", + port_rx->params.iface, + port_rx->params.iface_queue, + port_tx->params.iface, + port_tx->params.iface_queue); + } + + printf("\n"); +} + +static void +print_port_stats_separator(void) +{ + printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n", + "----", + "------------", + "-------------", + "------------", + "-------------"); +} + +static void +print_port_stats_header(void) +{ + print_port_stats_separator(); + printf("| %4s | %12s | %13s | %12s | %13s |\n", + "Port", + "RX packets", + "RX rate (pps)", + "TX packets", + "TX_rate (pps)"); + print_port_stats_separator(); +} + +static void +print_port_stats_trailer(void) +{ + print_port_stats_separator(); + printf("\n"); +} + +static void +print_port_stats(int port_id, u64 ns_diff) +{ + struct port *p = ports[port_id]; + double rx_pps, tx_pps; + + rx_pps = (p->n_pkts_rx - n_pkts_rx[port_id]) * 1000000000. / ns_diff; + tx_pps = (p->n_pkts_tx - n_pkts_tx[port_id]) * 1000000000. / ns_diff; + + printf("| %4d | %12llu | %13.0f | %12llu | %13.0f |\n", + port_id, + p->n_pkts_rx, + rx_pps, + p->n_pkts_tx, + tx_pps); + + n_pkts_rx[port_id] = p->n_pkts_rx; + n_pkts_tx[port_id] = p->n_pkts_tx; +} + +static void +print_port_stats_all(u64 ns_diff) +{ + int i; + + print_port_stats_header(); + for (i = 0; i < n_ports; i++) + print_port_stats(i, ns_diff); + print_port_stats_trailer(); +} + +static int quit; + +static void +signal_handler(int sig) +{ + quit = 1; +} + +static void remove_xdp_program(void) +{ + int i; + + for (i = 0 ; i < n_ports; i++) + bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1, + port_params[i].xsk_cfg.xdp_flags); +} + +int main(int argc, char **argv) +{ + struct timespec time; + u64 ns0; + int i; + + /* Parse args. */ + memcpy(&bpool_params, &bpool_params_default, + sizeof(struct bpool_params)); + memcpy(&umem_cfg, &umem_cfg_default, + sizeof(struct xsk_umem_config)); + for (i = 0; i < MAX_PORTS; i++) + memcpy(&port_params[i], &port_params_default, + sizeof(struct port_params)); + + if (parse_args(argc, argv)) { + print_usage(argv[0]); + return -1; + } + + /* Buffer pool initialization. */ + bp = bpool_init(&bpool_params, &umem_cfg); + if (!bp) { + printf("Buffer pool initialization failed.\n"); + return -1; + } + printf("Buffer pool created successfully.\n"); + + /* Ports initialization. */ + for (i = 0; i < MAX_PORTS; i++) + port_params[i].bp = bp; + + for (i = 0; i < n_ports; i++) { + ports[i] = port_init(&port_params[i]); + if (!ports[i]) { + printf("Port %d initialization failed.\n", i); + return -1; + } + print_port(i); + } + printf("All ports created successfully.\n"); + + /* Threads. */ + for (i = 0; i < n_threads; i++) { + struct thread_data *t = &thread_data[i]; + u32 n_ports_per_thread = n_ports / n_threads, j; + + for (j = 0; j < n_ports_per_thread; j++) { + t->ports_rx[j] = ports[i * n_ports_per_thread + j]; + t->ports_tx[j] = ports[i * n_ports_per_thread + + (j + 1) % n_ports_per_thread]; + } + + t->n_ports_rx = n_ports_per_thread; + + print_thread(i); + } + + for (i = 0; i < n_threads; i++) { + int status; + + status = pthread_create(&threads[i], + NULL, + thread_func, + &thread_data[i]); + if (status) { + printf("Thread %d creation failed.\n", i); + return -1; + } + } + printf("All threads created successfully.\n"); + + /* Print statistics. */ + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + signal(SIGABRT, signal_handler); + + clock_gettime(CLOCK_MONOTONIC, &time); + ns0 = time.tv_sec * 1000000000UL + time.tv_nsec; + for ( ; !quit; ) { + u64 ns1, ns_diff; + + sleep(1); + clock_gettime(CLOCK_MONOTONIC, &time); + ns1 = time.tv_sec * 1000000000UL + time.tv_nsec; + ns_diff = ns1 - ns0; + ns0 = ns1; + + print_port_stats_all(ns_diff); + } + + /* Threads completion. */ + printf("Quit.\n"); + for (i = 0; i < n_threads; i++) + thread_data[i].quit = 1; + + for (i = 0; i < n_threads; i++) + pthread_join(threads[i], NULL); + + for (i = 0; i < n_ports; i++) + port_free(ports[i]); + + bpool_free(bp); + + remove_xdp_program(); + + return 0; +} |