summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS4
-rw-r--r--lib/daemon-unix.c33
-rw-r--r--lib/daemon-windows.c6
-rw-r--r--lib/daemon.c2
-rw-r--r--lib/daemon.h4
-rw-r--r--ovsdb/ovsdb-client.c6
-rw-r--r--ovsdb/ovsdb-server.c4
-rw-r--r--tests/test-netflow.c2
-rw-r--r--tests/test-sflow.c2
-rw-r--r--tests/test-unixctl.c2
-rw-r--r--utilities/ovs-ofctl.c4
-rw-r--r--utilities/ovs-testcontroller.c4
-rw-r--r--vswitchd/ovs-vswitchd.8.in9
-rw-r--r--vswitchd/ovs-vswitchd.c11
14 files changed, 67 insertions, 26 deletions
diff --git a/NEWS b/NEWS
index 72b9024e6..8771ee618 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,10 @@ Post-v3.1.0
in order to create OVSDB sockets with access mode of 0770.
- QoS:
* Added new configuration option 'jitter' for a linux-netem QoS type.
+ - DPDK:
+ * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
+ with the --hw-rawio-access command line option. This allows the
+ process extra privileges when mapping physical interconnect memory.
v3.1.0 - 16 Feb 2023
diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
index 1a7ba427d..4fdc6e3c4 100644
--- a/lib/daemon-unix.c
+++ b/lib/daemon-unix.c
@@ -88,7 +88,8 @@ static bool switch_user = false;
static uid_t uid;
static gid_t gid;
static char *user = NULL;
-static void daemon_become_new_user__(bool access_datapath);
+static void daemon_become_new_user__(bool access_datapath,
+ bool access_hardware_ports);
static void check_already_running(void);
static int lock_pidfile(FILE *, int command);
@@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid)
* daemonize_complete()) or that it failed to start up (by exiting with a
* nonzero exit code). */
void
-daemonize_start(bool access_datapath)
+daemonize_start(bool access_datapath, bool access_hardware_ports)
{
assert_single_threaded();
daemonize_fd = -1;
if (switch_user) {
- daemon_become_new_user__(access_datapath);
+ daemon_become_new_user__(access_datapath, access_hardware_ports);
switch_user = false;
}
@@ -807,7 +808,8 @@ daemon_become_new_user_unix(void)
/* Linux specific implementation of daemon_become_new_user()
* using libcap-ng. */
static void
-daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
+daemon_become_new_user_linux(bool access_datapath OVS_UNUSED,
+ bool access_hardware_ports OVS_UNUSED)
{
#if defined __linux__ && HAVE_LIBCAPNG
int ret;
@@ -827,6 +829,20 @@ daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_BROADCAST);
+#ifdef DPDK_NETDEV
+ if (access_hardware_ports && !ret) {
+ ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO);
+ if (!ret) {
+ VLOG_INFO("The Linux capability CAP_SYS_RAWIO "
+ "is enabled.");
+ }
+ }
+#else
+ if (access_hardware_ports) {
+ VLOG_WARN("No driver requires Linux capability "
+ "CAP_SYS_RAWIO, disabling it.");
+ }
+#endif
}
} else {
ret = -1;
@@ -854,7 +870,7 @@ daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
}
static void
-daemon_become_new_user__(bool access_datapath)
+daemon_become_new_user__(bool access_datapath, bool access_hardware_ports)
{
/* If vlog file has been created, change its owner to the non-root user
* as specifed by the --user option. */
@@ -862,7 +878,8 @@ daemon_become_new_user__(bool access_datapath)
if (LINUX) {
if (LIBCAPNG) {
- daemon_become_new_user_linux(access_datapath);
+ daemon_become_new_user_linux(access_datapath,
+ access_hardware_ports);
} else {
VLOG_FATAL("%s: fail to downgrade user using libcap-ng. "
"(libcap-ng is not configured at compile time), "
@@ -877,11 +894,11 @@ daemon_become_new_user__(bool access_datapath)
* However, there in case the user switch needs to be done
* before daemonize_start(), the following API can be used. */
void
-daemon_become_new_user(bool access_datapath)
+daemon_become_new_user(bool access_datapath, bool access_hardware_ports)
{
assert_single_threaded();
if (switch_user) {
- daemon_become_new_user__(access_datapath);
+ daemon_become_new_user__(access_datapath, access_hardware_ports);
/* daemonize_start() should not switch user again. */
switch_user = false;
}
diff --git a/lib/daemon-windows.c b/lib/daemon-windows.c
index 7e5f264f5..4e6bbe0f0 100644
--- a/lib/daemon-windows.c
+++ b/lib/daemon-windows.c
@@ -498,7 +498,8 @@ make_pidfile(void)
}
void
-daemonize_start(bool access_datapath OVS_UNUSED)
+daemonize_start(bool access_datapath OVS_UNUSED,
+ bool access_hardware_ports OVS_UNUSED)
{
if (pidfile) {
make_pidfile();
@@ -526,7 +527,8 @@ daemonize_complete(void)
}
void
-daemon_become_new_user(bool access_datapath OVS_UNUSED)
+daemon_become_new_user(bool access_datapath OVS_UNUSED,
+ bool access_hardware_ports OVS_UNUSED)
{
}
diff --git a/lib/daemon.c b/lib/daemon.c
index 3249c5ab4..1e1c019eb 100644
--- a/lib/daemon.c
+++ b/lib/daemon.c
@@ -48,7 +48,7 @@ get_detach(void)
void
daemonize(void)
{
- daemonize_start(false);
+ daemonize_start(false, false);
daemonize_complete();
}
diff --git a/lib/daemon.h b/lib/daemon.h
index 094157496..42372d146 100644
--- a/lib/daemon.h
+++ b/lib/daemon.h
@@ -167,10 +167,10 @@ void set_detach(void);
bool get_detach(void);
void daemon_save_fd(int fd);
void daemonize(void);
-void daemonize_start(bool access_datapath);
+void daemonize_start(bool access_datapath, bool access_hardware_ports);
void daemonize_complete(void);
void daemon_set_new_user(const char * user_spec);
-void daemon_become_new_user(bool access_datapath);
+void daemon_become_new_user(bool access_datapath, bool access_hardware_ports);
void daemon_usage(void);
void daemon_disable_self_confinement(void);
bool daemon_should_self_confine(void);
diff --git a/ovsdb/ovsdb-client.c b/ovsdb/ovsdb-client.c
index f1b8d6491..bae2c5f04 100644
--- a/ovsdb/ovsdb-client.c
+++ b/ovsdb/ovsdb-client.c
@@ -250,7 +250,7 @@ main(int argc, char *argv[])
parse_options(argc, argv);
fatal_ignore_sigpipe();
- daemon_become_new_user(false);
+ daemon_become_new_user(false, false);
if (optind >= argc) {
ovs_fatal(0, "missing command name; use --help for help");
}
@@ -1392,7 +1392,7 @@ do_monitor__(struct jsonrpc *rpc, const char *database,
daemon_save_fd(STDOUT_FILENO);
daemon_save_fd(STDERR_FILENO);
- daemonize_start(false);
+ daemonize_start(false, false);
if (get_detach()) {
int error;
@@ -2276,7 +2276,7 @@ do_lock(struct jsonrpc *rpc, const char *method, const char *lock)
getting a reply of the previous
request. */
daemon_save_fd(STDOUT_FILENO);
- daemonize_start(false);
+ daemonize_start(false, false);
lock_req_init(&lock_req, method, lock);
if (get_detach()) {
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 33ca4910d..4fea2dbda 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -341,7 +341,7 @@ main(int argc, char *argv[])
&run_command, &sync_from, &sync_exclude, &active);
is_backup = sync_from && !active;
- daemon_become_new_user(false);
+ daemon_become_new_user(false, false);
/* Create and initialize 'config_tmpfile' as a temporary file to hold
* ovsdb-server's most basic configuration, and then save our initial
@@ -359,7 +359,7 @@ main(int argc, char *argv[])
save_config__(config_tmpfile, &remotes, &db_filenames, sync_from,
sync_exclude, is_backup);
- daemonize_start(false);
+ daemonize_start(false, false);
/* Load the saved config. */
load_config(config_tmpfile, &remotes, &db_filenames, &sync_from,
diff --git a/tests/test-netflow.c b/tests/test-netflow.c
index d2322d450..7f89cfcae 100644
--- a/tests/test-netflow.c
+++ b/tests/test-netflow.c
@@ -195,7 +195,7 @@ test_netflow_main(int argc, char *argv[])
}
daemon_save_fd(STDOUT_FILENO);
- daemonize_start(false);
+ daemonize_start(false, false);
error = unixctl_server_create(NULL, &server);
if (error) {
diff --git a/tests/test-sflow.c b/tests/test-sflow.c
index 460d4d6c5..3c617bdd1 100644
--- a/tests/test-sflow.c
+++ b/tests/test-sflow.c
@@ -709,7 +709,7 @@ test_sflow_main(int argc, char *argv[])
}
daemon_save_fd(STDOUT_FILENO);
- daemonize_start(false);
+ daemonize_start(false, false);
error = unixctl_server_create(NULL, &server);
if (error) {
diff --git a/tests/test-unixctl.c b/tests/test-unixctl.c
index 3eadf54cd..9e8982789 100644
--- a/tests/test-unixctl.c
+++ b/tests/test-unixctl.c
@@ -83,7 +83,7 @@ test_unixctl_main(int argc, char *argv[])
fatal_ignore_sigpipe();
parse_options(&argc, &argv, &unixctl_path);
- daemonize_start(false);
+ daemonize_start(false, false);
int retval = unixctl_server_create(unixctl_path, &unixctl);
if (retval) {
exit(EXIT_FAILURE);
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index 3ce4e82ec..24d0941cf 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -173,7 +173,7 @@ main(int argc, char *argv[])
ctx.argc = argc - optind;
ctx.argv = argv + optind;
- daemon_become_new_user(false);
+ daemon_become_new_user(false, false);
if (read_only) {
ovs_cmdl_run_command_read_only(&ctx, get_all_commands());
} else {
@@ -2127,7 +2127,7 @@ monitor_vconn(struct vconn *vconn, bool reply_to_echo_requests,
int error;
daemon_save_fd(STDERR_FILENO);
- daemonize_start(false);
+ daemonize_start(false, false);
error = unixctl_server_create(unixctl_path, &server);
if (error) {
ovs_fatal(error, "failed to create unixctl server");
diff --git a/utilities/ovs-testcontroller.c b/utilities/ovs-testcontroller.c
index b489ff5fc..9f2fbfdf5 100644
--- a/utilities/ovs-testcontroller.c
+++ b/utilities/ovs-testcontroller.c
@@ -109,7 +109,7 @@ main(int argc, char *argv[])
parse_options(argc, argv);
fatal_ignore_sigpipe();
- daemon_become_new_user(false);
+ daemon_become_new_user(false, false);
if (argc - optind < 1) {
ovs_fatal(0, "at least one vconn argument required; "
@@ -148,7 +148,7 @@ main(int argc, char *argv[])
ovs_fatal(0, "no active or passive switch connections");
}
- daemonize_start(false);
+ daemonize_start(false, false);
retval = unixctl_server_create(unixctl_path, &unixctl);
if (retval) {
diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in
index 9569265fc..10c6e077b 100644
--- a/vswitchd/ovs-vswitchd.8.in
+++ b/vswitchd/ovs-vswitchd.8.in
@@ -81,6 +81,15 @@ unavailable or unsuccessful.
.SS "DPDK Options"
For details on initializing \fBovs\-vswitchd\fR to use DPDK ports,
refer to the documentation or \fBovs\-vswitchd.conf.db\fR(5).
+.SS "DPDK HW Access Options"
+.IP "\fB\-\-hw\-rawio\-access\fR"
+Tells \fBovs\-vswitchd\fR to retain the \fBCAP_SYS_RAWIO\fR capability,
+to allow userspace drivers access to raw hardware memory. This will
+also allow the \fBovs\-vswitchd\fR daemon to call \fBiopl()\fR and
+\fBioperm()\fR functions as well as access memory devices to set port
+access. This is a \fBvery\fR powerful capability, so generally only
+enable as needed for specific hardware (for example mlx5 with full
+hardware offload via rte_flow).
.SS "Daemon Options"
.ds DD \
\fBovs\-vswitchd\fR detaches only after it has connected to the \
diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index 407bfc60e..a244d2f70 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -60,6 +60,9 @@ VLOG_DEFINE_THIS_MODULE(vswitchd);
* the kernel from paging any of its memory to disk. */
static bool want_mlockall;
+/* --hw-rawio-access: If set, retains CAP_SYS_RAWIO privileges. */
+static bool hw_rawio_access;
+
static unixctl_cb_func ovs_vswitchd_exit;
static char *parse_options(int argc, char *argv[], char **unixctl_path);
@@ -89,7 +92,7 @@ main(int argc, char *argv[])
remote = parse_options(argc, argv, &unixctl_path);
fatal_ignore_sigpipe();
- daemonize_start(true);
+ daemonize_start(true, hw_rawio_access);
if (want_mlockall) {
#ifdef HAVE_MLOCKALL
@@ -169,6 +172,7 @@ parse_options(int argc, char *argv[], char **unixctl_pathp)
OPT_DPDK,
SSL_OPTION_ENUMS,
OPT_DUMMY_NUMA,
+ OPT_HW_RAWIO_ACCESS,
};
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h'},
@@ -185,6 +189,7 @@ parse_options(int argc, char *argv[], char **unixctl_pathp)
{"disable-system-route", no_argument, NULL, OPT_DISABLE_SYSTEM_ROUTE},
{"dpdk", optional_argument, NULL, OPT_DPDK},
{"dummy-numa", required_argument, NULL, OPT_DUMMY_NUMA},
+ {"hw-rawio-access", no_argument, NULL, OPT_HW_RAWIO_ACCESS},
{NULL, 0, NULL, 0},
};
char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
@@ -249,6 +254,10 @@ parse_options(int argc, char *argv[], char **unixctl_pathp)
ovs_numa_set_dummy(optarg);
break;
+ case OPT_HW_RAWIO_ACCESS:
+ hw_rawio_access = true;
+ break;
+
default:
abort();
}