summaryrefslogtreecommitdiff
path: root/src/core/bpf
diff options
context:
space:
mode:
authorJulia Kartseva <hex@fb.com>2020-11-13 17:40:17 -0800
committerJulia Kartseva <hex@fb.com>2021-04-26 16:07:41 -0700
commit58a33faf8081e3b6126b49817a0b5cee54e8770b (patch)
treef0c9d6e8e5977c161b98c8bf6a1ca8f0d020b32f /src/core/bpf
parentea1036e253ed3712f43ddd7a22e7957cd2c48ee2 (diff)
downloadsystemd-58a33faf8081e3b6126b49817a0b5cee54e8770b.tar.gz
bpf: add socket-bind BPF program code sources
Introduce BPF program compiled from BPF source code in restricted C - socket-bind. It addresses feature request [0]. The goal is to allow systemd services to bind(2) only to a predefined set of ports. This prevents assigning socket address with unallowed port to a socket and creating servers listening on that port. This compliments firewalling feature presenting in systemd: whereas cgroup/{egress|ingress} hooks act on packets, this doesn't protect from untrusted service or payload hijacking an important port. While ports in 0-1023 range are restricted to root only, 1024-65535 range is not protected by any mean. Performance is another aspect of socket_bind feature since per-packet cost can be eliminated for some port-based filtering policies. The feature is implemented with cgroup/bind{4|6} hooks [1]. In contrast to the present systemd approach using raw bpf instructions, this program is compiled from sources. Stretch goal is to make bpf ecosystem in systemd more friendly for developer and to clear path for more BPF programs. [0] https://github.com/systemd/systemd/pull/13496#issuecomment-570573085 [1] https://www.spinics.net/lists/netdev/msg489054.html
Diffstat (limited to 'src/core/bpf')
-rw-r--r--src/core/bpf/socket_bind/socket-bind-api.bpf.h47
-rw-r--r--src/core/bpf/socket_bind/socket-bind.bpf.c103
2 files changed, 150 insertions, 0 deletions
diff --git a/src/core/bpf/socket_bind/socket-bind-api.bpf.h b/src/core/bpf/socket_bind/socket-bind-api.bpf.h
new file mode 100644
index 0000000000..2f08408747
--- /dev/null
+++ b/src/core/bpf/socket_bind/socket-bind-api.bpf.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+#include <linux/types.h>
+
+/*
+ * Bind rule is matched with socket fields accessible to cgroup/bind{4,6} hook
+ * through bpf_sock_addr struct.
+ * address_family is expected to be one of AF_UNSPEC, AF_INET or AF_INET6.
+ * Matching by family is bypassed for rules with AF_UNSPEC set, which makes the
+ * rest of a rule applicable for both IPv4 and IPv6 addresses.
+ * If matching by family is either successful or bypassed, a rule and a socket
+ * are matched by ports.
+ * nr_ports and port_min fields specify a set of ports to match a user port
+ * with.
+ * If nr_ports is 0, maching by port is bypassed, making that rule applicable
+ * for all possible ports, e.g. [1, 65535] range. Thus a rule with
+ * address_family and nr_ports equal to AF_UNSPEC and 0 correspondingly forms
+ * 'allow any' or 'deny any' cases.
+ * For positive nr_ports, a user_port lying in a range from port_min to
+ * port_min + nr_ports exclusively is considered to be a match. nr_ports
+ * equalling to 1 forms a rule for a single port.
+ * Ports are in host order.
+ *
+ * Examples:
+ * AF_UNSPEC, 1, 7777: match IPv4 and IPv6 addresses with 7777 user port;
+ *
+ * AF_INET, 1023, 1: match IPv4 addresses with user port in [1, 1023]
+ * range inclusively;
+ *
+ * AF_INET6, 0, 0: match IPv6 addresses;
+ *
+ * AF_UNSPEC, 0, 0: match IPv4 and IPv6 addresses.
+ */
+
+struct socket_bind_rule {
+ __u32 address_family;
+ __u16 nr_ports;
+ __u16 port_min;
+};
+
+#define SOCKET_BIND_MAX_RULES 128
diff --git a/src/core/bpf/socket_bind/socket-bind.bpf.c b/src/core/bpf/socket_bind/socket-bind.bpf.c
new file mode 100644
index 0000000000..474808d824
--- /dev/null
+++ b/src/core/bpf/socket_bind/socket-bind.bpf.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+#include "socket-bind-api.bpf.h"
+/* <linux/types.h> must precede <bpf/bpf_helpers.h> due to
+ * <bpf/bpf_helpers.h> does not depend from type header by design.
+ */
+#include <linux/types.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+
+/*
+ * max_entries is set from user space with bpf_map__resize helper.
+ */
+struct socket_bind_map_t {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct socket_bind_rule);
+};
+
+enum socket_bind_action {
+ SOCKET_BIND_DENY = 0,
+ SOCKET_BIND_ALLOW = 1,
+};
+
+struct socket_bind_map_t sd_bind_allow SEC(".maps");
+struct socket_bind_map_t sd_bind_deny SEC(".maps");
+
+static __always_inline bool match_af(
+ __u8 address_family, const struct socket_bind_rule *r) {
+ return r->address_family == AF_UNSPEC || address_family == r->address_family;
+}
+
+static __always_inline bool match_user_port(
+ __u16 port, const struct socket_bind_rule *r) {
+ return r->nr_ports == 0 ||
+ (port >= r->port_min && port < r->port_min + (__u32) r->nr_ports);
+}
+
+static __always_inline bool match(
+ __u8 address_family,
+ __u16 port,
+ const struct socket_bind_rule *r) {
+ return match_af(address_family, r) && match_user_port(port, r);
+}
+
+static __always_inline bool match_rules(
+ struct bpf_sock_addr *ctx,
+ struct socket_bind_map_t *rules) {
+ volatile __u32 user_port = ctx->user_port;
+ __u16 port = (__u16)bpf_ntohs(user_port);
+
+ for (__u32 i = 0; i < SOCKET_BIND_MAX_RULES; ++i) {
+ const __u32 key = i;
+ const struct socket_bind_rule *rule = bpf_map_lookup_elem(rules, &key);
+
+ /* Lookup returns NULL if iterator is advanced past the last
+ * element put in the map. */
+ if (!rule)
+ break;
+
+ if (match(ctx->user_family, port, rule))
+ return true;
+ }
+
+ return false;
+}
+
+static __always_inline int bind_socket(struct bpf_sock_addr *ctx) {
+ if (match_rules(ctx, &sd_bind_allow))
+ return SOCKET_BIND_ALLOW;
+
+ if (match_rules(ctx, &sd_bind_deny))
+ return SOCKET_BIND_DENY;
+
+ return SOCKET_BIND_ALLOW;
+}
+
+SEC("cgroup/bind4")
+int sd_bind4(struct bpf_sock_addr *ctx) {
+ if (ctx->user_family != AF_INET || ctx->family != AF_INET)
+ return SOCKET_BIND_ALLOW;
+
+ return bind_socket(ctx);
+}
+
+SEC("cgroup/bind6")
+int sd_bind6(struct bpf_sock_addr *ctx) {
+ if (ctx->user_family != AF_INET6 || ctx->family != AF_INET6)
+ return SOCKET_BIND_ALLOW;
+
+ return bind_socket(ctx);
+}
+
+char _license[] SEC("license") = "GPL";