summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKai Lüke <kailueke@riseup.net>2019-04-23 12:14:20 +0200
committerLennart Poettering <lennart@poettering.net>2019-06-25 09:56:16 +0200
commitfab347489fcfafbc8367c86afc637ce1b81ae59e (patch)
tree25eb895a90940163ff7e6f0e3d8c0054433ae6d1
parent2d901d33a90ef9d3fe01ac66c4894c9e6bf48ce0 (diff)
downloadsystemd-fab347489fcfafbc8367c86afc637ce1b81ae59e.tar.gz
bpf-firewall: custom BPF programs through IP(Ingress|Egress)FilterPath=
Takes a single /sys/fs/bpf/pinned_prog string as argument, but may be specified multiple times. An empty assignment resets all previous filters. Closes https://github.com/systemd/systemd/issues/10227
-rw-r--r--man/systemd.resource-control.xml33
-rw-r--r--src/analyze/analyze-security.c37
-rw-r--r--src/core/bpf-firewall.c106
-rw-r--r--src/core/bpf-firewall.h1
-rw-r--r--src/core/cgroup.c21
-rw-r--r--src/core/cgroup.h3
-rw-r--r--src/core/dbus-cgroup.c76
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c61
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/unit.c12
-rw-r--r--src/core/unit.h5
-rw-r--r--src/shared/bpf-program.c19
-rw-r--r--src/shared/bpf-program.h1
-rw-r--r--src/shared/bus-unit-util.c12
-rw-r--r--src/test/test-bpf.c53
16 files changed, 436 insertions, 7 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 95209a8a6a..e7b5dfbce6 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -619,6 +619,39 @@
</varlistentry>
<varlistentry>
+ <term><varname>IPIngressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
+ <term><varname>IPEgressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
+
+ <listitem>
+ <para>Add custom network traffic filters implemented as BPF programs, applying to all IP packets
+ sent and received over <constant>AF_INET</constant> and <constant>AF_INET6</constant> sockets.
+ Takes an absolute path to a pinned BPF program in the BPF virtual filesystem (<filename>/sys/fs/bpf/</filename>).
+ </para>
+
+ <para>The filters configured with this option are applied to all sockets created by processes
+ of this unit (or in the case of socket units, associated with it). The filters are loaded in addition
+ to filters any of the parent slice units this unit might be a member of as well as any
+ <varname>IPAddressAllow=</varname> and <varname>IPAddressDeny=</varname> filters in any of these units.
+ By default there are no filters specified.</para>
+
+ <para>If these settings are used multiple times in the same unit all the specified programs are attached. If an
+ empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para>
+
+ <para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to
+ all sockets associated with it directly, but not to any sockets created by the ultimately activated services
+ for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into
+ the service via socket activation. Thus, it is usually a good idea, to replicate the IP filter programs on both
+ the socket and the service unit, however it often makes sense to maintain one configuration more open and the other
+ one more restricted, depending on the usecase.</para>
+
+ <para>Note that these settings might not be supported on some systems (for example if eBPF control group
+ support is not enabled in the underlying kernel or container manager). These settings will fail the service in
+ that case. If compatibility with such systems is desired it is hence recommended to attach your filter manually
+ (requires <varname>Delegate=</varname><constant>yes</constant>) instead of using this setting.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>DeviceAllow=</varname></term>
<listitem>
diff --git a/src/analyze/analyze-security.c b/src/analyze/analyze-security.c
index 0962950dd0..3cf6515f5f 100644
--- a/src/analyze/analyze-security.c
+++ b/src/analyze/analyze-security.c
@@ -45,6 +45,9 @@ struct security_info {
bool ip_address_allow_localhost;
bool ip_address_allow_other;
+ bool ip_filters_custom_ingress;
+ bool ip_filters_custom_egress;
+
char *keyring_mode;
bool lock_personality;
bool memory_deny_write_execute;
@@ -590,7 +593,10 @@ static int assess_ip_address_allow(
assert(ret_badness);
assert(ret_description);
- if (!info->ip_address_deny_all) {
+ if (info->ip_filters_custom_ingress || info->ip_filters_custom_egress) {
+ d = strdup("Service defines custom ingress/egress IP filters with BPF programs");
+ b = 0;
+ } else if (!info->ip_address_deny_all) {
d = strdup("Service does not define an IP address whitelist");
b = 10;
} else if (info->ip_address_allow_other) {
@@ -1824,6 +1830,33 @@ static int property_read_ip_address_allow(
return sd_bus_message_exit_container(m);
}
+static int property_read_ip_filters(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ _cleanup_(strv_freep) char **l = NULL;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_read_strv(m, &l);
+ if (r < 0)
+ return r;
+
+ if (streq(member, "IPIngressFilterPath"))
+ info->ip_filters_custom_ingress = !strv_isempty(l);
+ else if (streq(member, "IPEgressFilterPath"))
+ info->ip_filters_custom_ingress = !strv_isempty(l);
+
+ return 0;
+}
+
static int property_read_device_allow(
sd_bus *bus,
const char *member,
@@ -1873,6 +1906,8 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_
{ "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) },
{ "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 },
{ "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 },
+ { "IPIngressFilterPath", "as", property_read_ip_filters, 0 },
+ { "IPEgressFilterPath", "as", property_read_ip_filters, 0 },
{ "Id", "s", NULL, offsetof(struct security_info, id) },
{ "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) },
{ "LoadState", "s", NULL, offsetof(struct security_info, load_state) },
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
index 8163db276b..7a8b848fb3 100644
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@@ -587,6 +587,95 @@ int bpf_firewall_compile(Unit *u) {
return 0;
}
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);
+
+static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
+ char **bpf_fs_path;
+
+ set_clear(*set);
+
+ STRV_FOREACH(bpf_fs_path, filter_paths) {
+ _cleanup_free_ BPFProgram *prog = NULL;
+ int r;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m");
+
+ r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);
+
+ r = set_ensure_allocated(set, &filter_prog_hash_ops);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");
+
+ r = set_put(*set, prog);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+ TAKE_PTR(prog);
+ }
+
+ return 0;
+}
+
+int bpf_firewall_load_custom(Unit *u) {
+ CGroupContext *cc;
+ int r, supported;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return 0;
+
+ if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
+ return 0;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
+
+ r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
+ if (r < 0)
+ return r;
+ r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) {
+ BPFProgram *prog;
+ Iterator i;
+ int r;
+
+ assert(u);
+
+ set_clear(*set_installed);
+
+ SET_FOREACH(prog, *set, i) {
+ r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
+ /* Remember that these BPF programs are installed now. */
+ r = set_ensure_allocated(set_installed, &filter_prog_hash_ops);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");
+
+ r = set_put(*set_installed, prog);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+ bpf_program_ref(prog);
+ }
+
+ return 0;
+}
+
int bpf_firewall_install(Unit *u) {
_cleanup_free_ char *path = NULL;
CGroupContext *cc;
@@ -614,6 +703,9 @@ int bpf_firewall_install(Unit *u) {
log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
return -EOPNOTSUPP;
}
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
+ (!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
if (r < 0)
@@ -628,7 +720,8 @@ int bpf_firewall_install(Unit *u) {
u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
if (u->ip_bpf_egress) {
- r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
+ r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path,
+ flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI));
if (r < 0)
return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
@@ -637,13 +730,22 @@ int bpf_firewall_install(Unit *u) {
}
if (u->ip_bpf_ingress) {
- r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
+ r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path,
+ flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI));
if (r < 0)
return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
}
+ r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
+ if (r < 0)
+ return r;
+
+ r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
+ if (r < 0)
+ return r;
+
return 0;
}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
index 10cafcc02e..f1460d982d 100644
--- a/src/core/bpf-firewall.h
+++ b/src/core/bpf-firewall.h
@@ -15,6 +15,7 @@ int bpf_firewall_supported(void);
int bpf_firewall_compile(Unit *u);
int bpf_firewall_install(Unit *u);
+int bpf_firewall_load_custom(Unit *u);
int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
int bpf_firewall_reset_accounting(int map_fd);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 1ed5723892..0428f62481 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -199,6 +199,9 @@ void cgroup_context_done(CGroupContext *c) {
c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
+
+ c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
+ c->ip_filters_egress = strv_free(c->ip_filters_egress);
}
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@@ -210,6 +213,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
IPAddressAccessItem *iaai;
+ char **path;
char u[FORMAT_TIMESPAN_MAX];
char v[FORMAT_TIMESPAN_MAX];
@@ -360,6 +364,12 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
(void) in_addr_to_string(iaai->family, &iaai->address, &k);
fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
}
+
+ STRV_FOREACH(path, c->ip_filters_ingress)
+ fprintf(f, "%sIPIngressFilterPath=%s\n", prefix, *path);
+
+ STRV_FOREACH(path, c->ip_filters_egress)
+ fprintf(f, "%sIPEgressFilterPath=%s\n", prefix, *path);
}
int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
@@ -945,6 +955,7 @@ static void cgroup_apply_firewall(Unit *u) {
if (bpf_firewall_compile(u) < 0)
return;
+ (void) bpf_firewall_load_custom(u);
(void) bpf_firewall_install(u);
}
@@ -1353,7 +1364,9 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {
if (c->ip_accounting ||
c->ip_address_allow ||
- c->ip_address_deny)
+ c->ip_address_deny ||
+ c->ip_filters_ingress ||
+ c->ip_filters_egress)
return true;
/* If any parent slice has an IP access list defined, it applies too */
@@ -1919,6 +1932,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
if (set_isempty(pids))
return 0;
+ /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
+ * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
+ r = bpf_firewall_load_custom(u);
+ if (r < 0)
+ return r;
+
r = unit_realize_cgroup(u);
if (r < 0)
return r;
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index fe347ea114..d1537c503e 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -114,6 +114,9 @@ struct CGroupContext {
LIST_HEAD(IPAddressAccessItem, ip_address_allow);
LIST_HEAD(IPAddressAccessItem, ip_address_deny);
+ char **ip_filters_ingress;
+ char **ip_filters_egress;
+
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 9f4fd06dc4..f70e6c87ee 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -362,6 +362,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
+ SD_BUS_PROPERTY("IPIngressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_ingress), 0),
+ SD_BUS_PROPERTY("IPEgressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_egress), 0),
SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0),
SD_BUS_VTABLE_END
};
@@ -462,6 +464,80 @@ static int bus_cgroup_set_transient_property(
}
return 1;
+ } else if (STR_IN_SET(name, "IPIngressFilterPath", "IPEgressFilterPath")) {
+ char ***filters;
+ size_t n = 0;
+
+ filters = streq(name, "IPIngressFilterPath") ? &c->ip_filters_ingress : &c->ip_filters_egress;
+ r = sd_bus_message_enter_container(message, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *path;
+
+ r = sd_bus_message_read(message, "s", &path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!path_is_normalized(path) || !path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= expects a normalized absolute path.", name);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && !strv_contains(*filters, path)) {
+ r = strv_extend(filters, path);
+ if (r < 0)
+ return log_oom();
+ }
+ n++;
+ }
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ char **entry;
+ size_t size = 0;
+
+ if (n == 0)
+ *filters = strv_free(*filters);
+
+ unit_invalidate_cgroup_bpf(u);
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs(name, f);
+ fputs("=\n", f);
+
+ STRV_FOREACH(entry, *filters)
+ fprintf(f, "%s=%s\n", name, *entry);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+
+ if (*filters) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+ static bool warned = false;
+
+ log_full(warned ? LOG_DEBUG : LOG_WARNING,
+ "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with mulitiple filters.\n"
+ "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id);
+ warned = true;
+ }
+ }
+ }
+
+ return 1;
}
return 0;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 5e6fb64093..f7906b374a 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -205,6 +205,8 @@ $1.DisableControllers, config_parse_disable_controllers, 0,
$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
+$1.IPIngressFilterPath, config_parse_ip_filter_bpf_progs, 0, offsetof($1, cgroup_context.ip_filters_ingress)
+$1.IPEgressFilterPath, config_parse_ip_filter_bpf_progs, 0, offsetof($1, cgroup_context.ip_filters_egress)
$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
)m4_dnl
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 274d9d2fef..ba41f8ee1a 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -18,6 +18,7 @@
#include "af-list.h"
#include "alloc-util.h"
#include "all-units.h"
+#include "bpf-firewall.h"
#include "bus-error.h"
#include "bus-internal.h"
#include "bus-util.h"
@@ -4456,6 +4457,66 @@ int config_parse_disable_controllers(
return 0;
}
+int config_parse_ip_filter_bpf_progs(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *resolved = NULL;
+ Unit *u = userdata;
+ char ***paths = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(paths);
+
+ if (isempty(rvalue)) {
+ *paths = strv_free(*paths);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (strv_contains(*paths, resolved))
+ return 0;
+
+ r = strv_extend(paths, resolved);
+ if (r < 0)
+ return log_oom();
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+ static bool warned = false;
+
+ log_full(warned ? LOG_DEBUG : LOG_WARNING,
+ "File %s:%u configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters.\n"
+ "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", filename, line, lvalue, rvalue);
+
+ warned = true;
+ }
+
+ return 0;
+}
+
#define FOLLOW_MAX 8
static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index ddcc8d216d..8d5f7010cd 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -110,6 +110,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers);
CONFIG_PARSER_PROTOTYPE(config_parse_oom_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_numa_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_numa_mask);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_filter_bpf_progs);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
diff --git a/src/core/unit.c b/src/core/unit.c
index 4d777b447d..463db73ff1 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -12,6 +12,7 @@
#include "all-units.h"
#include "alloc-util.h"
+#include "bpf-firewall.h"
#include "bus-common-errors.h"
#include "bus-util.h"
#include "cgroup-util.h"
@@ -682,6 +683,11 @@ void unit_free(Unit *u) {
bpf_program_unref(u->ip_bpf_egress);
bpf_program_unref(u->ip_bpf_egress_installed);
+ set_free(u->ip_bpf_custom_ingress);
+ set_free(u->ip_bpf_custom_egress);
+ set_free(u->ip_bpf_custom_ingress_installed);
+ set_free(u->ip_bpf_custom_egress_installed);
+
bpf_program_unref(u->bpf_device_control_installed);
condition_free_list(u->conditions);
@@ -5500,6 +5506,12 @@ int unit_prepare_exec(Unit *u) {
assert(u);
+ /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
+ * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
+ r = bpf_firewall_load_custom(u);
+ if (r < 0)
+ return r;
+
/* Prepares everything so that we can fork of a process for this unit */
(void) unit_realize_cgroup(u);
diff --git a/src/core/unit.h b/src/core/unit.h
index 007c4aea9e..ef495f836b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -10,6 +10,7 @@
#include "emergency-action.h"
#include "install.h"
#include "list.h"
+#include "set.h"
#include "unit-name.h"
#include "cgroup.h"
@@ -281,6 +282,10 @@ typedef struct Unit {
BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+ Set *ip_bpf_custom_ingress;
+ Set *ip_bpf_custom_ingress_installed;
+ Set *ip_bpf_custom_egress;
+ Set *ip_bpf_custom_egress_installed;
uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
index 40bc9645be..93f8db3f34 100644
--- a/src/shared/bpf-program.c
+++ b/src/shared/bpf-program.c
@@ -94,6 +94,25 @@ int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
return 0;
}
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
+ return -EBUSY;
+
+ attr = (union bpf_attr) {
+ .pathname = PTR_TO_UINT64(path),
+ };
+
+ p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
_cleanup_free_ char *copy = NULL;
_cleanup_close_ int fd = -1;
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
index c21eb2f72a..a21589eb1f 100644
--- a/src/shared/bpf-program.h
+++ b/src/shared/bpf-program.h
@@ -31,6 +31,7 @@ BPFProgram *bpf_program_ref(BPFProgram *p);
int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
int bpf_program_cgroup_detach(BPFProgram *p);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index bb30e8f151..2ea25d830a 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -758,6 +758,18 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return 1;
}
+ if (STR_IN_SET(field, "IPIngressFilterPath", "IPEgressFilterPath")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "as", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", field, "as", 1, eq);
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
return 0;
}
diff --git a/src/test/test-bpf.c b/src/test/test-bpf.c
index 90ab15c549..6a75221542 100644
--- a/src/test/test-bpf.c
+++ b/src/test/test-bpf.c
@@ -9,6 +9,7 @@
#include "bpf-program.h"
#include "load-fragment.h"
#include "manager.h"
+#include "missing.h"
#include "rm-rf.h"
#include "service.h"
#include "test-helper.h"
@@ -42,7 +43,7 @@ static bool can_memlock(void) {
int main(int argc, char *argv[]) {
struct bpf_insn exit_insn[] = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0), /* drop */
BPF_EXIT_INSN()
};
@@ -54,6 +55,9 @@ int main(int argc, char *argv[]) {
char log_buf[65535];
struct rlimit rl;
int r;
+ union bpf_attr attr;
+ bool test_custom_filter = false;
+ const char *test_prog = "/sys/fs/bpf/test-dropper";
test_setup_logging(LOG_DEBUG);
@@ -88,14 +92,31 @@ int main(int argc, char *argv[]) {
return log_tests_skipped("BPF firewalling not supported");
assert_se(r > 0);
- if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+ if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
log_notice("BPF firewalling with BPF_F_ALLOW_MULTI supported. Yay!");
- else
+ test_custom_filter = true;
+ } else
log_notice("BPF firewalling (though without BPF_F_ALLOW_MULTI) supported. Good.");
r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
assert(r >= 0);
+ if (test_custom_filter) {
+ attr = (union bpf_attr) {
+ .pathname = PTR_TO_UINT64(test_prog),
+ .bpf_fd = p->kernel_fd,
+ .file_flags = 0,
+ };
+
+ (void) unlink(test_prog);
+
+ r = bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ if (r < 0) {
+ log_warning_errno(errno, "BPF object pinning failed, will not run custom filter test: %m");
+ test_custom_filter = false;
+ }
+ }
+
p = bpf_program_unref(p);
/* The simple tests succeeded. Now let's try full unit-based use-case. */
@@ -175,5 +196,31 @@ int main(int argc, char *argv[]) {
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
+ if (test_custom_filter) {
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "custom-filter.service") == 0);
+ assert_se(cc = unit_get_cgroup_context(u));
+ u->perpetual = true;
+
+ cc->ip_accounting = true;
+
+ assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0);
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+ SERVICE(u)->type = SERVICE_ONESHOT;
+ u->load_state = UNIT_LOADED;
+
+ assert_se(unit_start(u) >= 0);
+
+ while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+ assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED ||
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS);
+
+ (void) unlink(test_prog);
+ assert_se(SERVICE(u)->state == SERVICE_DEAD);
+ }
+
return 0;
}