summaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-03 14:04:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-03 14:04:18 -0700
commit5bb053bef82523a8fd78d650bca81c9f114fa276 (patch)
tree58c2fe47f60bb69230bb05d57a6c9e3f47f7b1fe /samples
parentbb2407a7219760926760f0448fddf00d625e5aec (diff)
parent159f02977b2feb18a4bece5e586c838a6d26d44b (diff)
downloadlinux-next-5bb053bef82523a8fd78d650bca81c9f114fa276.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support offloading wireless authentication to userspace via NL80211_CMD_EXTERNAL_AUTH, from Srinivas Dasari. 2) A lot of work on network namespace setup/teardown from Kirill Tkhai. Setup and cleanup of namespaces now all run asynchronously and thus performance is significantly increased. 3) Add rx/tx timestamping support to mv88e6xxx driver, from Brandon Streiff. 4) Support zerocopy on RDS sockets, from Sowmini Varadhan. 5) Use denser instruction encoding in x86 eBPF JIT, from Daniel Borkmann. 6) Support hw offload of vlan filtering in mvpp2 dreiver, from Maxime Chevallier. 7) Support grafting of child qdiscs in mlxsw driver, from Nogah Frankel. 8) Add packet forwarding tests to selftests, from Ido Schimmel. 9) Deal with sub-optimal GSO packets better in BBR congestion control, from Eric Dumazet. 10) Support 5-tuple hashing in ipv6 multipath routing, from David Ahern. 11) Add path MTU tests to selftests, from Stefano Brivio. 12) Various bits of IPSEC offloading support for mlx5, from Aviad Yehezkel, Yossi Kuperman, and Saeed Mahameed. 13) Support RSS spreading on ntuple filters in SFC driver, from Edward Cree. 14) Lots of sockmap work from John Fastabend. Applications can use eBPF to filter sendmsg and sendpage operations. 15) In-kernel receive TLS support, from Dave Watson. 16) Add XDP support to ixgbevf, this is significant because it should allow optimized XDP usage in various cloud environments. From Tony Nguyen. 17) Add new Intel E800 series "ice" ethernet driver, from Anirudh Venkataramanan et al. 18) IP fragmentation match offload support in nfp driver, from Pieter Jansen van Vuuren. 19) Support XDP redirect in i40e driver, from Björn Töpel. 20) Add BPF_RAW_TRACEPOINT program type for accessing the arguments of tracepoints in their raw form, from Alexei Starovoitov. 21) Lots of striding RQ improvements to mlx5 driver with many performance improvements, from Tariq Toukan. 22) Use rhashtable for inet frag reassembly, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1678 commits) net: mvneta: improve suspend/resume net: mvneta: split rxq/txq init and txq deinit into SW and HW parts ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh net: bgmac: Fix endian access in bgmac_dma_tx_ring_free() net: bgmac: Correctly annotate register space route: check sysctl_fib_multipath_use_neigh earlier than hash fix typo in command value in drivers/net/phy/mdio-bitbang. sky2: Increase D3 delay to sky2 stops working after suspend net/mlx5e: Set EQE based as default TX interrupt moderation mode ibmvnic: Disable irqs before exiting reset from closed state net: sched: do not emit messages while holding spinlock vlan: also check phy_driver ts_info for vlan's real device Bluetooth: Mark expected switch fall-throughs Bluetooth: Set HCI_QUIRK_SIMULTANEOUS_DISCOVERY for BTUSB_QCA_ROME Bluetooth: btrsi: remove unused including <linux/version.h> Bluetooth: hci_bcm: Remove DMI quirk for the MINIX Z83-4 sh_eth: kill useless check in __sh_eth_get_regs() sh_eth: add sh_eth_cpu_data::no_xdfar flag ipv6: factorize sk_wmem_alloc updates done by __ip6_append_data() ipv4: factorize sk_wmem_alloc updates done by __ip_append_data() ...
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--samples/bpf/bpf_load.c22
-rw-r--r--samples/bpf/cookie_uid_helper_example.c2
-rw-r--r--samples/bpf/cpustat_kern.c281
-rw-r--r--samples/bpf/cpustat_user.c219
-rw-r--r--samples/bpf/tcbpf2_kern.c6
-rwxr-xr-xsamples/bpf/test_cgrp2_sock.sh1
-rwxr-xr-xsamples/bpf/test_cgrp2_sock2.sh3
-rw-r--r--samples/bpf/test_overhead_raw_tp_kern.c17
-rw-r--r--samples/bpf/test_overhead_user.c12
-rwxr-xr-xsamples/bpf/test_tunnel_bpf.sh5
-rw-r--r--samples/bpf/trace_event_kern.c4
-rw-r--r--samples/bpf/trace_event_user.c15
-rw-r--r--samples/bpf/xdp_redirect_user.c7
-rw-r--r--samples/sockmap/Makefile2
-rw-r--r--samples/sockmap/sockmap_kern.c239
-rwxr-xr-xsamples/sockmap/sockmap_test.sh488
-rw-r--r--samples/sockmap/sockmap_user.c360
18 files changed, 1653 insertions, 35 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index ec3fc8d88e87..4d6a6edd4bf6 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
hostprogs-y += xdp_monitor
hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
+hostprogs-y += cpustat
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -117,6 +119,7 @@ always += offwaketime_kern.o
always += spintest_kern.o
always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
+always += test_overhead_raw_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o
@@ -144,6 +147,7 @@ always += xdp_monitor_kern.o
always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
+always += cpustat_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +192,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 69806d74fa53..bebe4188b4b3 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -61,12 +61,14 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
+ bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
bool is_xdp = strncmp(event, "xdp", 3) == 0;
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
bool is_sockops = strncmp(event, "sockops", 7) == 0;
bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
+ bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
@@ -84,6 +86,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_KPROBE;
} else if (is_tracepoint) {
prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ } else if (is_raw_tracepoint) {
+ prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
} else if (is_xdp) {
prog_type = BPF_PROG_TYPE_XDP;
} else if (is_perf_event) {
@@ -96,6 +100,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_SOCK_OPS;
} else if (is_sk_skb) {
prog_type = BPF_PROG_TYPE_SK_SKB;
+ } else if (is_sk_msg) {
+ prog_type = BPF_PROG_TYPE_SK_MSG;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@@ -113,7 +119,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
- if (is_socket || is_sockops || is_sk_skb) {
+ if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
if (is_socket)
event += 6;
else
@@ -128,6 +134,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return populate_prog_array(event, fd);
}
+ if (is_raw_tracepoint) {
+ efd = bpf_raw_tracepoint_open(event + 15, fd);
+ if (efd < 0) {
+ printf("tracepoint %s %s\n", event + 15, strerror(errno));
+ return -1;
+ }
+ event_fd[prog_cnt - 1] = efd;
+ return 0;
+ }
+
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
@@ -584,12 +600,14 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "tracepoint/", 11) == 0 ||
+ memcmp(shname, "raw_tracepoint/", 15) == 0 ||
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "perf_event", 10) == 0 ||
memcmp(shname, "socket", 6) == 0 ||
memcmp(shname, "cgroup/", 7) == 0 ||
memcmp(shname, "sockops", 7) == 0 ||
- memcmp(shname, "sk_skb", 6) == 0) {
+ memcmp(shname, "sk_skb", 6) == 0 ||
+ memcmp(shname, "sk_msg", 6) == 0) {
ret = load_and_attach(shname, data->d_buf,
data->d_size);
if (ret != 0)
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index 9d751e209f31..8eca27e595ae 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -246,7 +246,7 @@ static void udp_client(void)
recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0,
(struct sockaddr *)&si_me, &slen);
if (recv_len < 0)
- error(1, errno, "revieve\n");
+ error(1, errno, "receive\n");
res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr),
sizeof(si_me.sin_addr));
if (res != 0)
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644
index 000000000000..68c84da065b1
--- /dev/null
+++ b/samples/bpf/cpustat_kern.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ * WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp |
+ * +--------------------------+
+ * | cstate index |
+ * +--------------------------+
+ * | pstate timestamp |
+ * +--------------------------+
+ * | pstate index |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME 0
+#define MAP_OFF_CSTATE_IDX 1
+#define MAP_OFF_PSTATE_TIME 2
+#define MAP_OFF_PSTATE_IDX 3
+#define MAP_OFF_NUM 4
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+ u64 pad;
+ u32 state;
+ u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+ u32 i;
+
+ for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+ if (frequency == cpu_opps[i])
+ return i;
+ }
+
+ return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+ u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ if (ctx->cpu_id > MAX_CPU)
+ return 0;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+ cts = bpf_map_lookup_elem(&my_map, &key);
+ if (!cts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ prev_state = *cstate;
+ *cstate = ctx->state;
+
+ if (!*cts) {
+ *cts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *cts;
+ *cts = cur_ts;
+
+ /*
+ * When state doesn't equal to (u32)-1, the cpu will enter
+ * one idle state; for this case we need to record interval
+ * for the pstate.
+ *
+ * OPP2
+ * +---------------------+
+ * OPP1 | |
+ * ---------+ |
+ * | Idle state
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ if (ctx->state != (u32)-1) {
+
+ /* record pstate after have first cpu_frequency event */
+ if (!*pts)
+ return 0;
+
+ delta = cur_ts - *pts;
+
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ /*
+ * When state equal to (u32)-1, the cpu just exits from one
+ * specific idle state; for this case we need to record
+ * interval for the pstate.
+ *
+ * OPP2
+ * -----------+
+ * | OPP1
+ * | +-----------
+ * | Idle state |
+ * +---------------------+
+ *
+ * |<- cstate duration ->|
+ * ^ ^
+ * cts cur_ts
+ */
+ } else {
+
+ key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+ val = bpf_map_lookup_elem(&cstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+ }
+
+ /* Update timestamp for pstate as new start time */
+ if (*pts)
+ *pts = cur_ts;
+
+ return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+ u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ prev_state = *pstate;
+ *pstate = ctx->state;
+
+ if (!*pts) {
+ *pts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *pts;
+ *pts = cur_ts;
+
+ /* When CPU is in idle, bail out to skip pstate statistics */
+ if (*cstate != (u32)(-1))
+ return 0;
+
+ /*
+ * The cpu changes to another different OPP (in below diagram
+ * change frequency from OPP3 to OPP1), need recording interval
+ * for previous frequency OPP3 and update timestamp as start
+ * time for new frequency OPP1.
+ *
+ * OPP3
+ * +---------------------+
+ * OPP2 | |
+ * ---------+ |
+ * | OPP1
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644
index 000000000000..2b4cd1ae57c5
--- /dev/null
+++ b/samples/bpf/cpustat_user.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+#define MAX_STARS 40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ "208000"
+#define CPUFREQ_HIGHEST_FREQ "12000000"
+
+struct cpu_stat_data {
+ unsigned long cstate[MAX_CSTATE_ENTRIES];
+ unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+ int i, j;
+ char state_str[sizeof("cstate-9")];
+ struct cpu_stat_data *data;
+
+ /* Clear screen */
+ printf("\033[2J");
+
+ /* Header */
+ printf("\nCPU states statistics:\n");
+ printf("%-10s ", "state(ms)");
+
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ sprintf(state_str, "cstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ sprintf(state_str, "pstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ printf("\n");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ data = &stat_data[j];
+
+ printf("CPU-%-6d ", j);
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->cstate[i] / 1000000);
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->pstate[i] / 1000000);
+
+ printf("\n");
+ }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+ unsigned long key, value;
+ int c, i;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ key = c * MAX_CSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(cstate_fd, &key, &value);
+ stat_data[c].cstate[i] = value;
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ key = c * MAX_PSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(pstate_fd, &key, &value);
+ stat_data[c].pstate[i] = value;
+ }
+ }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+ int rcpu, i, ret;
+ cpu_set_t cpumask;
+ cpu_set_t original_cpumask;
+
+ ret = sysconf(_SC_NPROCESSORS_CONF);
+ if (ret < 0)
+ return -1;
+
+ rcpu = sched_getcpu();
+ if (rcpu < 0)
+ return -1;
+
+ /* Keep track of the CPUs we will run on */
+ sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+ for (i = 0; i < ret; i++) {
+
+ /* Pointless to wake up ourself */
+ if (i == rcpu)
+ continue;
+
+ /* Pointless to wake CPUs we will not run on */
+ if (!CPU_ISSET(i, &original_cpumask))
+ continue;
+
+ CPU_ZERO(&cpumask);
+ CPU_SET(i, &cpumask);
+
+ sched_setaffinity(0, sizeof(cpumask), &cpumask);
+ }
+
+ /* Enable all the CPUs of the original mask */
+ sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+ return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+ int len, fd;
+
+ fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+ if (fd < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ return fd;
+ }
+
+ len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+ len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+err:
+ close(fd);
+ return len;
+}
+
+static void int_exit(int sig)
+{
+ cpu_stat_inject_cpu_idle_event();
+ cpu_stat_inject_cpu_frequency_event();
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ int ret;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ ret = cpu_stat_inject_cpu_idle_event();
+ if (ret < 0)
+ return 1;
+
+ ret = cpu_stat_inject_cpu_frequency_event();
+ if (ret < 0)
+ return 1;
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ while (1) {
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ sleep(5);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index efdc16d195ff..9a8db7bd6db4 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -52,7 +52,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -92,7 +93,8 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
key.tunnel_label = 0xabcde;
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_SEQ_NUMBER);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
index 8ee0371a100a..9f6174236856 100755
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -61,6 +61,7 @@ cleanup_and_exit()
[ -n "$msg" ] && echo "ERROR: $msg"
+ test_cgrp2_sock -d ${CGRP_MNT}/sockopts
ip li del cgrp2_sock
umount ${CGRP_MNT}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
index fc4e64d00cb3..0f396a86e0cb 100755
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -28,6 +28,9 @@ function attach_bpf {
}
function cleanup {
+ if [ -d /tmp/cgroupv2/foo ]; then
+ test_cgrp2_sock -d /tmp/cgroupv2/foo
+ fi
ip link del veth0b
ip netns delete at_ns0
umount /tmp/cgroupv2
diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c
new file mode 100644
index 000000000000..d2af8bc1c805
--- /dev/null
+++ b/samples/bpf/test_overhead_raw_tp_kern.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("raw_tracepoint/task_rename")
+int prog(struct bpf_raw_tracepoint_args *ctx)
+{
+ return 0;
+}
+
+SEC("raw_tracepoint/urandom_read")
+int prog2(struct bpf_raw_tracepoint_args *ctx)
+{
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
index d291167fd3c7..e1d35e07a10e 100644
--- a/samples/bpf/test_overhead_user.c
+++ b/samples/bpf/test_overhead_user.c
@@ -158,5 +158,17 @@ int main(int argc, char **argv)
unload_progs();
}
+ if (test_flags & 0xC0) {
+ snprintf(filename, sizeof(filename),
+ "%s_raw_tp_kern.o", argv[0]);
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+ printf("w/RAW_TRACEPOINT\n");
+ run_perf_test(num_cpu, test_flags >> 6);
+ unload_progs();
+ }
+
return 0;
}
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 43ce049996ee..c265863ccdf9 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -23,7 +23,8 @@ function config_device {
function add_gre_tunnel {
# in namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
@@ -43,7 +44,7 @@ function add_ip6gretap_tunnel {
# in namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+ ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
local ::11 remote ::22
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index a77a583d94d4..7068fbdde951 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
{
char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+ char addr_fmt[] = "Address recorded on event: %llx";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
else
bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
+ if (ctx->addr != 0)
+ bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index bf4f1b6d9a52..56f7a259a7c9 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
/* Intel Instruction Retired */
.config = 0xc0,
};
+ struct perf_event_attr attr_type_raw_lock_load = {
+ .sample_freq = SAMPLE_FREQ,
+ .freq = 1,
+ .type = PERF_TYPE_RAW,
+ /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+ .config = 0x21d0,
+ /* Request to record lock address from PEBS */
+ .sample_type = PERF_SAMPLE_ADDR,
+ /* Record address value requires precise event */
+ .precise_ip = 2,
+ };
printf("Test HW_CPU_CYCLES\n");
test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
test_perf_event_all_cpu(&attr_type_raw);
test_perf_event_task(&attr_type_raw);
+ printf("Test Lock Load\n");
+ test_perf_event_all_cpu(&attr_type_raw_lock_load);
+ test_perf_event_task(&attr_type_raw_lock_load);
+
printf("*** PASS ***\n");
}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index d54e91eb6cbf..b701b5c21342 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -20,6 +20,7 @@
#include <string.h>
#include <unistd.h>
#include <libgen.h>
+#include <sys/resource.h>
#include "bpf_load.h"
#include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
const char *optstr = "SN";
char filename[256];
int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
return 1;
}
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
ifindex_in = strtoul(argv[optind], NULL, 0);
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 73f1da4d116c..9bf2881bd11b 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -2,7 +2,7 @@
hostprogs-y := sockmap
# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 52b0053274f4..9ff8bc5dc206 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -43,6 +43,55 @@ struct bpf_map_def SEC("maps") sock_map = {
.max_entries = 20,
};
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
@@ -54,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
{
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
- int ret = 0;
+ int len, *f, ret, zero = 0;
+ __u64 flags = 0;
if (lport == 10000)
ret = 10;
else
ret = 1;
- bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
- return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
+ len = (__u32)skb->data_end - (__u32)skb->data;
+ f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+ if (f && *f) {
+ ret = 3;
+ flags = *f;
+ }
+
+ bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+ len, flags);
+ return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
}
SEC("sockops")
@@ -105,4 +163,179 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
return 0;
}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+ int err1 = -1, err2 = -1, zero = 0, one = 1;
+ int *bytes, *start, *end, len1, len2;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+ int err;
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+ len1, err1, err2);
+ return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1, key = 0;
+ int *start, *end, *f;
+ __u64 flags = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+ int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+ int *f, *bytes, *start, *end, len1, len2;
+ __u64 flags = 0;
+
+ int err;
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+ len1, flags, err1 ? err1 : err2);
+ err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+ bpf_printk("sk_msg3: err %i\n", err);
+ return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes) {
+ ret = bpf_msg_apply_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ } else {
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes) {
+ if (((__u64)data_end - (__u64)data) >= *bytes)
+ return SK_PASS;
+ ret = bpf_msg_cork_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+
+ return SK_DROP;
+}
+
+
char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
new file mode 100755
index 000000000000..ace75f070eb8
--- /dev/null
+++ b/samples/sockmap/sockmap_test.sh
@@ -0,0 +1,488 @@
+#Test a bunch of positive cases to verify basic functionality
+for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+ for i in 1 10 100; do
+ for l in 1 10 100; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+ done
+ done
+done
+done
+done
+
+#Test max iov
+t="sendmsg"
+r=1
+i=1024
+l=1
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+
+# Test max iov with 1k send
+
+t="sendmsg"
+r=1
+i=1024
+l=1024
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+
+# Test apply with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply and redirect with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply and redirect with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply and redirect with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with 1B not really useful but test it anyways
+r=1
+i=1024
+l=1024
+prog="--txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_redir --txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# mix and match cork and apply not really useful but valid programs
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+# Tests for bpf_msg_pull_data()
+for i in `seq 99 100 1600`; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+for i in `seq 199 100 1600`; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
+ echo $TEST
+ $TEST
+ sleep 2
+done
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+ --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
+echo $TEST
+$TEST
+sleep 2
+
+# Run through gamut again with start and end
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+ for i in 1 10 100; do
+ for l in 1 10 100; do
+ TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
+ echo $TEST
+ $TEST
+ sleep 2
+ done
+ done
+done
+done
+done
+
+# Some specific tests to cover specific code paths
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+ -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7c25c0c112bc..6f2334912283 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -29,6 +29,7 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/types.h>
+#include <sys/sendfile.h>
#include <linux/netlink.h>
#include <linux/socket.h>
@@ -54,6 +55,18 @@ void running_handler(int a);
/* global sockets */
int s1, s2, c1, c2, p1, p2;
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"cgroup", required_argument, NULL, 'c' },
@@ -62,6 +75,18 @@ static const struct option long_options[] = {
{"iov_count", required_argument, NULL, 'i' },
{"length", required_argument, NULL, 'l' },
{"test", required_argument, NULL, 't' },
+ {"data_test", no_argument, NULL, 'd' },
+ {"txmsg", no_argument, &txmsg_pass, 1 },
+ {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
+ {"txmsg_redir", no_argument, &txmsg_redir, 1 },
+ {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
+ {"txmsg_drop", no_argument, &txmsg_drop, 1 },
+ {"txmsg_apply", required_argument, NULL, 'a'},
+ {"txmsg_cork", required_argument, NULL, 'k'},
+ {"txmsg_start", required_argument, NULL, 's'},
+ {"txmsg_end", required_argument, NULL, 'e'},
+ {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
+ {"txmsg_skb", no_argument, &txmsg_skb, 1 },
{0, 0, NULL, 0 }
};
@@ -195,19 +220,71 @@ struct msg_stats {
struct timespec end;
};
+struct sockmap_options {
+ int verbose;
+ bool base;
+ bool sendpage;
+ bool data_test;
+ bool drop_expected;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+ struct msg_stats *s,
+ struct sockmap_options *opt)
+{
+ bool drop = opt->drop_expected;
+ unsigned char k = 0;
+ FILE *file;
+ int i, fp;
+
+ file = fopen(".sendpage_tst.tmp", "w+");
+ for (i = 0; i < iov_length * cnt; i++, k++)
+ fwrite(&k, sizeof(char), 1, file);
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+ fclose(file);
+
+ fp = open(".sendpage_tst.tmp", O_RDONLY);
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendfile(fd, fp, NULL, iov_length);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ close(fp);
+ return sent;
+ } else if (drop && sent >= 0) {
+ printf("sendpage loop error expected: %i\n", sent);
+ close(fp);
+ return -EIO;
+ }
+
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ close(fp);
+ return 0;
+}
+
static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
- struct msg_stats *s, bool tx)
+ struct msg_stats *s, bool tx,
+ struct sockmap_options *opt)
{
struct msghdr msg = {0};
int err, i, flags = MSG_NOSIGNAL;
struct iovec *iov;
+ unsigned char k;
+ bool data_test = opt->data_test;
+ bool drop = opt->drop_expected;
iov = calloc(iov_count, sizeof(struct iovec));
if (!iov)
return errno;
+ k = 0;
for (i = 0; i < iov_count; i++) {
- char *d = calloc(iov_length, sizeof(char));
+ unsigned char *d = calloc(iov_length, sizeof(char));
if (!d) {
fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
@@ -215,21 +292,34 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
iov[i].iov_base = d;
iov[i].iov_len = iov_length;
+
+ if (data_test && tx) {
+ int j;
+
+ for (j = 0; j < iov_length; j++)
+ d[j] = k++;
+ }
}
msg.msg_iov = iov;
msg.msg_iovlen = iov_count;
+ k = 0;
if (tx) {
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
int sent = sendmsg(fd, &msg, flags);
- if (sent < 0) {
+ if (!drop && sent < 0) {
perror("send loop error:");
goto out_errno;
+ } else if (drop && sent >= 0) {
+ printf("send loop error expected: %i\n", sent);
+ errno = -EIO;
+ goto out_errno;
}
- s->bytes_sent += sent;
+ if (sent > 0)
+ s->bytes_sent += sent;
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
} else {
@@ -272,6 +362,26 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
s->bytes_recvd += recv;
+
+ if (data_test) {
+ int j;
+
+ for (i = 0; i < msg.msg_iovlen; i++) {
+ unsigned char *d = iov[i].iov_base;
+
+ for (j = 0;
+ j < iov[i].iov_len && recv; j++) {
+ if (d[j] != k++) {
+ errno = -EIO;
+ fprintf(stderr,
+ "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+ i, j, d[j], k - 1, d[j+1], k + 1);
+ goto out_errno;
+ }
+ recv--;
+ }
+ }
+ }
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
}
@@ -300,7 +410,7 @@ static inline float recvdBps(struct msg_stats s)
}
static int sendmsg_test(int iov_count, int iov_buf, int cnt,
- int verbose, bool base)
+ struct sockmap_options *opt)
{
float sent_Bps = 0, recvd_Bps = 0;
int rx_fd, txpid, rxpid, err = 0;
@@ -309,14 +419,20 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
errno = 0;
- if (base)
+ if (opt->base)
rx_fd = p1;
else
rx_fd = p2;
rxpid = fork();
if (rxpid == 0) {
- err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
+ if (opt->drop_expected)
+ exit(1);
+
+ if (opt->sendpage)
+ iov_count = 1;
+ err = msg_loop(rx_fd, iov_count, iov_buf,
+ cnt, &s, false, opt);
if (err)
fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -339,7 +455,12 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
txpid = fork();
if (txpid == 0) {
- err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+ if (opt->sendpage)
+ err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+ else
+ err = msg_loop(c1, iov_count, iov_buf,
+ cnt, &s, true, opt);
+
if (err)
fprintf(stderr,
"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -364,7 +485,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
return err;
}
-static int forever_ping_pong(int rate, int verbose)
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
{
struct timeval timeout;
char buf[1024] = {0};
@@ -429,7 +550,7 @@ static int forever_ping_pong(int rate, int verbose)
if (rate)
sleep(rate);
- if (verbose) {
+ if (opt->verbose) {
printf(".");
fflush(stdout);
@@ -443,20 +564,34 @@ enum {
PING_PONG,
SENDMSG,
BASE,
+ BASE_SENDPAGE,
+ SENDPAGE,
};
int main(int argc, char **argv)
{
- int iov_count = 1, length = 1024, rate = 1, verbose = 0;
+ int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
int opt, longindex, err, cg_fd = 0;
+ struct sockmap_options options = {0};
int test = PING_PONG;
char filename[256];
- while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
long_options, &longindex)) != -1) {
switch (opt) {
- /* Cgroup configuration */
+ case 's':
+ txmsg_start = atoi(optarg);
+ break;
+ case 'e':
+ txmsg_end = atoi(optarg);
+ break;
+ case 'a':
+ txmsg_apply = atoi(optarg);
+ break;
+ case 'k':
+ txmsg_cork = atoi(optarg);
+ break;
case 'c':
cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
if (cg_fd < 0) {
@@ -470,7 +605,7 @@ int main(int argc, char **argv)
rate = atoi(optarg);
break;
case 'v':
- verbose = 1;
+ options.verbose = 1;
break;
case 'i':
iov_count = atoi(optarg);
@@ -478,6 +613,9 @@ int main(int argc, char **argv)
case 'l':
length = atoi(optarg);
break;
+ case 'd':
+ options.data_test = true;
+ break;
case 't':
if (strcmp(optarg, "ping") == 0) {
test = PING_PONG;
@@ -485,11 +623,17 @@ int main(int argc, char **argv)
test = SENDMSG;
} else if (strcmp(optarg, "base") == 0) {
test = BASE;
+ } else if (strcmp(optarg, "base_sendpage") == 0) {
+ test = BASE_SENDPAGE;
+ } else if (strcmp(optarg, "sendpage") == 0) {
+ test = SENDPAGE;
} else {
usage(argv);
return -1;
}
break;
+ case 0:
+ break;
case 'h':
default:
usage(argv);
@@ -515,16 +659,16 @@ int main(int argc, char **argv)
/* catch SIGINT */
signal(SIGINT, running_handler);
- /* If base test skip BPF setup */
- if (test == BASE)
- goto run;
-
if (load_bpf_file(filename)) {
fprintf(stderr, "load_bpf_file: (%s) %s\n",
filename, strerror(errno));
return 1;
}
+ /* If base test skip BPF setup */
+ if (test == BASE || test == BASE_SENDPAGE)
+ goto run;
+
/* Attach programs to sockmap */
err = bpf_prog_attach(prog_fd[0], map_fd[0],
BPF_SK_SKB_STREAM_PARSER, 0);
@@ -557,15 +701,183 @@ run:
goto out;
}
- if (test == PING_PONG)
- err = forever_ping_pong(rate, verbose);
- else if (test == SENDMSG)
- err = sendmsg_test(iov_count, length, rate, verbose, false);
- else if (test == BASE)
- err = sendmsg_test(iov_count, length, rate, verbose, true);
+ /* Attach txmsg program to sockmap */
+ if (txmsg_pass)
+ tx_prog_fd = prog_fd[3];
+ else if (txmsg_noisy)
+ tx_prog_fd = prog_fd[4];
+ else if (txmsg_redir)
+ tx_prog_fd = prog_fd[5];
+ else if (txmsg_redir_noisy)
+ tx_prog_fd = prog_fd[6];
+ else if (txmsg_drop)
+ tx_prog_fd = prog_fd[9];
+ /* apply and cork must be last */
+ else if (txmsg_apply)
+ tx_prog_fd = prog_fd[7];
+ else if (txmsg_cork)
+ tx_prog_fd = prog_fd[8];
else
+ tx_prog_fd = 0;
+
+ if (tx_prog_fd) {
+ int redir_fd, i = 0;
+
+ err = bpf_prog_attach(tx_prog_fd,
+ map_fd[1], BPF_SK_MSG_VERDICT, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ if (txmsg_redir || txmsg_redir_noisy)
+ redir_fd = c2;
+ else
+ redir_fd = c1;
+
+ err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ if (txmsg_apply) {
+ err = bpf_map_update_elem(map_fd[3],
+ &i, &txmsg_apply, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_cork) {
+ err = bpf_map_update_elem(map_fd[4],
+ &i, &txmsg_cork, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_start) {
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_start, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_end) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_end, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
+ if (txmsg_ingress) {
+ int in = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ i = 1;
+ err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 2;
+ err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_skb) {
+ int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 3;
+ err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+ }
+
+ if (txmsg_drop)
+ options.drop_expected = true;
+
+ if (test == PING_PONG)
+ err = forever_ping_pong(rate, &options);
+ else if (test == SENDMSG) {
+ options.base = false;
+ options.sendpage = false;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else if (test == SENDPAGE) {
+ options.base = false;
+ options.sendpage = true;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else if (test == BASE) {
+ options.base = true;
+ options.sendpage = false;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else if (test == BASE_SENDPAGE) {
+ options.base = true;
+ options.sendpage = true;
+ err = sendmsg_test(iov_count, length, rate, &options);
+ } else
fprintf(stderr, "unknown test\n");
out:
+ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
close(s1);
close(s2);
close(p1);