summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/mkosi.yml2
-rw-r--r--hwdb.d/60-sensor.hwdb9
-rw-r--r--hwdb.d/70-mouse.hwdb4
-rw-r--r--man/common-variables.xml8
-rw-r--r--man/iocost.conf.xml76
-rw-r--r--man/kernel-command-line.xml1
-rw-r--r--man/rules/meson.build1
-rw-r--r--man/systemd-poweroff.service.xml57
-rw-r--r--man/systemd.xml14
-rw-r--r--mkosi.conf.d/10-systemd.conf1
-rw-r--r--mkosi.conf.d/20-debian.conf1
-rwxr-xr-xmkosi.postinst5
-rw-r--r--rules.d/90-iocost.rules20
-rw-r--r--rules.d/meson.build1
-rw-r--r--src/basic/chase.c114
-rw-r--r--src/basic/chase.h2
-rw-r--r--src/basic/fd-util.c22
-rw-r--r--src/basic/list.h9
-rw-r--r--src/basic/log.c28
-rw-r--r--src/basic/mountpoint-util.c6
-rw-r--r--src/basic/os-util.c4
-rw-r--r--src/basic/path-util.c28
-rw-r--r--src/basic/path-util.h1
-rw-r--r--src/basic/ratelimit.c20
-rw-r--r--src/basic/stat-util.c4
-rw-r--r--src/basic/string-util.c12
-rw-r--r--src/basic/string-util.h5
-rw-r--r--src/basic/strv.h12
-rw-r--r--src/core/device.c21
-rw-r--r--src/core/kmod-setup.c58
-rw-r--r--src/core/main.c14
-rw-r--r--src/core/manager.c1
-rw-r--r--src/fstab-generator/fstab-generator.c2
-rw-r--r--src/gpt-auto-generator/gpt-auto-generator.c9
-rw-r--r--src/libsystemd-network/sd-dhcp-server.c38
-rw-r--r--src/shared/dissect-image.c61
-rw-r--r--src/shared/dissect-image.h1
-rw-r--r--src/shared/find-esp.c4
-rw-r--r--src/test/test-chase.c74
-rw-r--r--src/test/test-path-util.c39
-rw-r--r--src/udev/iocost/iocost.c334
-rw-r--r--src/udev/iocost/iocost.conf17
-rw-r--r--src/udev/meson.build3
-rwxr-xr-xtest/test-bootctl-json.sh9
-rwxr-xr-xtest/units/testsuite-54.sh1
45 files changed, 925 insertions, 228 deletions
diff --git a/.github/workflows/mkosi.yml b/.github/workflows/mkosi.yml
index 4e77af75d7..d974152be1 100644
--- a/.github/workflows/mkosi.yml
+++ b/.github/workflows/mkosi.yml
@@ -73,7 +73,7 @@ jobs:
steps:
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
- - uses: systemd/mkosi@268a9374d5a93a4021db6521a39db2c2ed9fd755
+ - uses: systemd/mkosi@ebc8b63f3be58243c7ff4273517fa4d2858c7e30
- name: Configure
run: |
diff --git a/hwdb.d/60-sensor.hwdb b/hwdb.d/60-sensor.hwdb
index 038e9dc013..ebf8c718a7 100644
--- a/hwdb.d/60-sensor.hwdb
+++ b/hwdb.d/60-sensor.hwdb
@@ -193,6 +193,15 @@ sensor:modalias:acpi:BMI0160*:dmi:*:svnAYANEO:pn*NEXT*:*
ACCEL_MOUNT_MATRIX=1, 0, 0; 0, -1, 0; 0, 0, 1
#########################################
+# BMAX
+#########################################
+
+# BMAX Y13
+sensor:modalias:acpi:KIOX010A:*:dmi:*:svnAMI:*:skuH2M6:*
+ ACCEL_MOUNT_MATRIX=-1, 0, 0; 0, -1, 0; 0, 0, -1
+ ACCEL_LOCATION=display
+
+#########################################
# Chuwi
#########################################
diff --git a/hwdb.d/70-mouse.hwdb b/hwdb.d/70-mouse.hwdb
index e0a94541e4..159bc78621 100644
--- a/hwdb.d/70-mouse.hwdb
+++ b/hwdb.d/70-mouse.hwdb
@@ -400,7 +400,9 @@ mouse:usb:v046dpc08b:name:Logitech G502 HERO Gaming Mouse:*
# Logitech G502 X (Wired)
mouse:usb:v046dpc098:name:Logitech, Inc. G502 X LIGHTSPEED:*
# Logitech G502 X (Wireless)
-mouse:usb:v046dpc547:name:Logitech USB Receiver:*
+# The USB receiver is also used by other mice. See #27118.
+# If you want to enable the entry, please copy below to your custom hwdb file.
+#mouse:usb:v046dpc547:name:Logitech USB Receiver:*
MOUSE_DPI=1200@1000 *2400@1000 3200@1000 6400@1000
# Logitech G700 Laser Mouse (Wired)
diff --git a/man/common-variables.xml b/man/common-variables.xml
index 0e220b3f9e..81425e57e2 100644
--- a/man/common-variables.xml
+++ b/man/common-variables.xml
@@ -81,6 +81,14 @@
</listitem>
</varlistentry>
+ <varlistentry id='log-ratelimit-kmsg'>
+ <term><varname>$SYSTEMD_LOG_RATELIMIT_KMSG</varname></term>
+
+ <listitem><para id='log-ratelimit-kmsg-body'> Whether to ratelimit kmsg or not. Takes a boolean.
+ Defaults to <literal>true</literal>. If disabled, systemd will not ratelimit messages written to kmsg.
+ </para></listitem>
+ </varlistentry>
+
<varlistentry id='pager'>
<term><varname>$SYSTEMD_PAGER</varname></term>
diff --git a/man/iocost.conf.xml b/man/iocost.conf.xml
new file mode 100644
index 0000000000..be74244267
--- /dev/null
+++ b/man/iocost.conf.xml
@@ -0,0 +1,76 @@
+<?xml version='1.0'?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
+
+<refentry id="iocost.conf" xmlns:xi="http://www.w3.org/2001/XInclude">
+ <refentryinfo>
+ <title>iocost.conf</title>
+ <productname>systemd</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>iocost.conf</refentrytitle>
+ <manvolnum>5</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>iocost.conf</refname>
+ <refpurpose>Configuration files for the iocost solution manager</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <para>
+ <filename>/etc/systemd/iocost.conf</filename>
+ <filename>/etc/systemd/iocost.conf.d/*.conf</filename>
+ </para>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>This file configures the behavior of <literal>iocost</literal>, a tool mostly used by
+ <citerefentry><refentrytitle>systemd-udevd</refentrytitle><manvolnum>8</manvolnum></citerefentry> rules
+ to automatically apply I/O cost solutions to <filename>/sys/fs/cgroup/io.cost.*</filename>.</para>
+
+ <para>The qos and model values are calculated based on benchmarks collected on the
+ <ulink url="https://github.com/iocost-benchmark/iocost-benchmarks">iocost-benchmark</ulink>
+ project and turned into a set of solutions that go from most to least isolated.
+ Isolation allows the system to remain responsive in face of high I/O load.
+ Which solutions are available for a device can be queried from the udev metadata attached to it. By
+ default the naive solution is used, which provides the most bandwidth.</para>
+ </refsect1>
+
+ <xi:include href="standard-conf.xml" xpointer="main-conf" />
+
+ <refsect1>
+ <title>Options</title>
+
+ <para>All options are configured in the [IOCost] section:</para>
+
+ <variablelist class='config-directives'>
+
+ <varlistentry>
+ <term><varname>TargetSolution=</varname></term>
+
+ <listitem><para>Chooses which I/O cost solution (identified by named string) should be used
+ for the devices in this system. The known solutions can be queried from the udev metadata
+ attached to the devices. If a device does not have the specified solution, the first one
+ listed in <varname>IOCOST_SOLUTIONS</varname> is used instead.</para>
+
+ <para>E.g. <literal>TargetSolution=isolated-bandwidth</literal>.</para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <para>
+ <citerefentry><refentrytitle>udevadm</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
+ <ulink url="https://github.com/iocost-benchmark/iocost-benchmarks">The iocost-benchmarks github project</ulink>,
+ <ulink url="https://github.com/facebookexperimental/resctl-demo/tree/main/resctl-bench/doc">The resctl-bench
+ documentation details how the values are obtained</ulink>
+ </para>
+ </refsect1>
+
+</refentry>
diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml
index 27ef72da36..09f8ace4de 100644
--- a/man/kernel-command-line.xml
+++ b/man/kernel-command-line.xml
@@ -66,6 +66,7 @@
<term><varname>systemd.log_level=</varname></term>
<term><varname>systemd.log_location=</varname></term>
<term><varname>systemd.log_color</varname></term>
+ <term><varname>systemd.log_ratelimit_kmsg</varname></term>
<term><varname>systemd.default_standard_output=</varname></term>
<term><varname>systemd.default_standard_error=</varname></term>
<term><varname>systemd.setenv=</varname></term>
diff --git a/man/rules/meson.build b/man/rules/meson.build
index cdf98eaaf0..ca3b471281 100644
--- a/man/rules/meson.build
+++ b/man/rules/meson.build
@@ -24,6 +24,7 @@ manpages = [
['hostnamectl', '1', [], 'ENABLE_HOSTNAMED'],
['hwdb', '7', [], 'ENABLE_HWDB'],
['integritytab', '5', [], 'HAVE_LIBCRYPTSETUP'],
+ ['iocost.conf', '5', [], ''],
['journal-remote.conf', '5', ['journal-remote.conf.d'], 'HAVE_MICROHTTPD'],
['journal-upload.conf', '5', ['journal-upload.conf.d'], 'HAVE_MICROHTTPD'],
['journalctl', '1', [], ''],
diff --git a/man/systemd-poweroff.service.xml b/man/systemd-poweroff.service.xml
index 9adfcc5af0..98c20471da 100644
--- a/man/systemd-poweroff.service.xml
+++ b/man/systemd-poweroff.service.xml
@@ -36,41 +36,34 @@
<refsect1>
<title>Description</title>
- <para><filename>systemd-poweroff.service</filename> is a system
- service that is pulled in by <filename>poweroff.target</filename> and
- is responsible for the actual system power-off operation. Similarly,
- <filename>systemd-halt.service</filename> is pulled in by
- <filename>halt.target</filename>,
- <filename>systemd-reboot.service</filename> by
- <filename>reboot.target</filename> and
- <filename>systemd-kexec.service</filename> by
- <filename>kexec.target</filename> to execute the respective
- actions.</para>
+ <para><filename>systemd-poweroff.service</filename> is a system service that is pulled in by
+ <filename>poweroff.target</filename> and is responsible for the actual system power-off
+ operation. Similarly, <filename>systemd-halt.service</filename> is pulled in by
+ <filename>halt.target</filename>, <filename>systemd-reboot.service</filename> by
+ <filename>reboot.target</filename> and <filename>systemd-kexec.service</filename> by
+ <filename>kexec.target</filename> to execute the respective actions.</para>
- <para>When these services are run, they ensure that PID 1 is
- replaced by the
- <filename>/usr/lib/systemd/systemd-shutdown</filename> tool which
- is then responsible for the actual shutdown. Before shutting down,
- this binary will try to unmount all remaining file systems,
- disable all remaining swap devices, detach all remaining storage
- devices and kill all remaining processes.</para>
+ <para>When these services are run, they ensure that PID 1 is replaced by the
+ <filename>/usr/lib/systemd/systemd-shutdown</filename> tool which is then responsible for the actual
+ shutdown. Before shutting down, this binary will try to unmount all remaining file systems (or at least
+ remount them read-only), disable all remaining swap devices, detach all remaining storage devices and
+ kill all remaining processes.</para>
- <para>It is necessary to have this code in a separate binary
- because otherwise rebooting after an upgrade might be broken — the
- running PID 1 could still depend on libraries which are not
- available any more, thus keeping the file system busy, which then
- cannot be re-mounted read-only.</para>
+ <para>It is necessary to have this code in a separate binary because otherwise rebooting after an upgrade
+ might be broken — the running PID 1 could still depend on libraries which are not available any more,
+ thus keeping the file system busy, which then cannot be re-mounted read-only.</para>
- <para>Immediately before executing the actual system
- power-off/halt/reboot/kexec <filename>systemd-shutdown</filename>
- will run all executables in
- <filename>/usr/lib/systemd/system-shutdown/</filename> and pass
- one arguments to them: either <literal>poweroff</literal>,
- <literal>halt</literal>, <literal>reboot</literal>, or
- <literal>kexec</literal>, depending on the chosen action. All
- executables in this directory are executed in parallel, and
- execution of the action is not continued before all executables
- finished.</para>
+ <para>Shortly before executing the actual system power-off/halt/reboot/kexec
+ <filename>systemd-shutdown</filename> will run all executables in
+ <filename>/usr/lib/systemd/system-shutdown/</filename> and pass one arguments to them: either
+ <literal>poweroff</literal>, <literal>halt</literal>, <literal>reboot</literal>, or
+ <literal>kexec</literal>, depending on the chosen action. All executables in this directory are executed
+ in parallel, and execution of the action is not continued before all executables finished. Note that
+ these executables are run <emphasis>after</emphasis> all services have been shut down, and after most
+ mounts have been detached (the root file system as well as <filename>/run/</filename> and various API
+ file systems are still around though). This means any programs dropped into this directory must be
+ prepared to run in such a limited execution environment and not rely on external services or hierarchies
+ such as <filename>/var/</filename> to be around (or writable).</para>
<para>Note that <filename>systemd-poweroff.service</filename> (and the related units) should never be
executed directly. Instead, trigger system shutdown with a command such as <literal>systemctl
diff --git a/man/systemd.xml b/man/systemd.xml
index 1a68301d50..ca9e4e9988 100644
--- a/man/systemd.xml
+++ b/man/systemd.xml
@@ -681,6 +681,11 @@
</varlistentry>
<varlistentry>
+ <term><varname>$SYSTEMD_LOG_RATELIMIT_KMSG</varname></term>
+ <listitem><xi:include href="common-variables.xml" xpointer="log-ratelimit-kmsg" /></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>$XDG_CONFIG_HOME</varname></term>
<term><varname>$XDG_CONFIG_DIRS</varname></term>
<term><varname>$XDG_DATA_HOME</varname></term>
@@ -865,13 +870,16 @@
<term><varname>systemd.log_target=</varname></term>
<term><varname>systemd.log_time</varname></term>
<term><varname>systemd.log_tid</varname></term>
+ <term><varname>systemd.log_ratelimit_kmsg</varname></term>
<listitem><para>Controls log output, with the same effect as the
<varname>$SYSTEMD_LOG_COLOR</varname>, <varname>$SYSTEMD_LOG_LEVEL</varname>,
<varname>$SYSTEMD_LOG_LOCATION</varname>, <varname>$SYSTEMD_LOG_TARGET</varname>,
- <varname>$SYSTEMD_LOG_TIME</varname>, and <varname>$SYSTEMD_LOG_TID</varname> environment variables
- described above. <varname>systemd.log_color</varname>, <varname>systemd.log_location</varname>,
- <varname>systemd.log_time</varname>, and <varname>systemd.log_tid=</varname> can be specified without
+ <varname>$SYSTEMD_LOG_TIME</varname>, <varname>$SYSTEMD_LOG_TID</varname> and
+ <varname>$SYSTEMD_LOG_RATELIMIT_KMSG</varname> environment variables described above.
+ <varname>systemd.log_color</varname>, <varname>systemd.log_location</varname>,
+ <varname>systemd.log_time</varname>, <varname>systemd.log_tid</varname> and
+ <varname>systemd.log_ratelimit_kmsg</varname> can be specified without
an argument, with the same effect as a positive boolean.</para></listitem>
</varlistentry>
diff --git a/mkosi.conf.d/10-systemd.conf b/mkosi.conf.d/10-systemd.conf
index b7175fb705..2b02eba0d6 100644
--- a/mkosi.conf.d/10-systemd.conf
+++ b/mkosi.conf.d/10-systemd.conf
@@ -71,6 +71,7 @@ QemuMem=2G
ExtraSearchPaths=build/
KernelCommandLineExtra=systemd.crash_shell
systemd.log_level=debug
+ systemd.log_ratelimit_kmsg=0
systemd.journald.forward_to_console
systemd.journald.max_level_console=warning
systemd.mask=auditd
diff --git a/mkosi.conf.d/20-debian.conf b/mkosi.conf.d/20-debian.conf
index 61a86310e4..011f6e9296 100644
--- a/mkosi.conf.d/20-debian.conf
+++ b/mkosi.conf.d/20-debian.conf
@@ -8,6 +8,7 @@ Release=testing
[Content]
Packages=
+ ?priority(required)
btrfs-progs
cryptsetup-bin
dbus-broker
diff --git a/mkosi.postinst b/mkosi.postinst
index 5aad60f97d..24b4666ad7 100755
--- a/mkosi.postinst
+++ b/mkosi.postinst
@@ -62,3 +62,8 @@ if [ -n "$IMAGE_VERSION" ] ; then
-e "\$aIMAGE_VERSION=$IMAGE_VERSION" \
/usr/lib/os-release
fi
+
+# Install a default ignore preset on Debian until it is shipped by Debian itself.
+if grep -q -e "ID=debian" -e "ID_LIKE=debian" /etc/os-release; then
+ echo "ignore *" >/usr/lib/systemd/system-preset/99-ignore.preset
+fi
diff --git a/rules.d/90-iocost.rules b/rules.d/90-iocost.rules
new file mode 100644
index 0000000000..50f778a0ae
--- /dev/null
+++ b/rules.d/90-iocost.rules
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+SUBSYSTEM!="block", GOTO="iocost_end"
+
+ENV{DEVTYPE}=="partition", GOTO="iocost_end"
+
+ACTION=="remove", GOTO="iocost_end"
+
+ENV{ID_MODEL}!="", IMPORT{builtin}="hwdb 'block::name:$env{ID_MODEL}:fwrev:$env{ID_REVISION}:'"
+
+ENV{IOCOST_SOLUTIONS}!="", RUN+="iocost apply $env{DEVNAME}"
+
+LABEL="iocost_end"
diff --git a/rules.d/meson.build b/rules.d/meson.build
index 09edd58da2..7280f5b995 100644
--- a/rules.d/meson.build
+++ b/rules.d/meson.build
@@ -28,6 +28,7 @@ rules = [
'78-sound-card.rules',
'80-net-setup-link.rules',
'81-net-dhcp.rules',
+ '90-iocost.rules',
)],
[files('80-drivers.rules'),
diff --git a/src/basic/chase.c b/src/basic/chase.c
index eb4bda07a6..373252b645 100644
--- a/src/basic/chase.c
+++ b/src/basic/chase.c
@@ -111,12 +111,26 @@ int chaseat(int dir_fd, const char *path, ChaseFlags flags, char **ret_path, int
* given directory file descriptor, even if it is absolute. If the given directory file descriptor is
* AT_FDCWD and "path" is absolute, it is interpreted relative to the root directory of the host.
*
- * If "dir_fd" is a valid directory fd, "path" is an absolute path and "ret_path" is not NULL, this
- * functions returns a relative path in "ret_path" because openat() like functions generally ignore
- * the directory fd if they are provided with an absolute path. On the other hand, if "dir_fd" is
- * AT_FDCWD and "path" is an absolute path, we return an absolute path in "ret_path" because
- * otherwise, if the caller passes the returned relative path to another openat() like function, it
- * would be resolved relative to the current working directory instead of to "/".
+ * When "dir_fd" points to a non-root directory and CHASE_AT_RESOLVE_IN_ROOT is set, this function
+ * always returns a relative path in "ret_path", even if "path" is an absolute path, because openat()
+ * like functions generally ignore the directory fd if they are provided with an absolute path. When
+ * CHASE_AT_RESOLVE_IN_ROOT is not set, then this returns relative path to the specified file
+ * descriptor if all resolved symlinks are relative, otherwise absolute path will be returned. When
+ * "dir_fd" is AT_FDCWD and "path" is an absolute path, we return an absolute path in "ret_path"
+ * because otherwise, if the caller passes the returned relative path to another openat() like
+ * function, it would be resolved relative to the current working directory instead of to "/".
+ *
+ * Summary about the result path:
+ * - "dir_fd" points to the root directory
+ * → result will be absolute
+ * - "dir_fd" points to a non-root directory, and CHASE_AT_RESOLVE_IN_ROOT is set
+ * → relative
+ * - "dir_fd" points to a non-root directory, and CHASE_AT_RESOLVE_IN_ROOT is not set
+ * → relative when all resolved symlinks are relative, otherwise absolute
+ * - "dir_fd" is AT_FDCWD, and "path" is absolute
+ * → absolute
+ * - "dir_fd" is AT_FDCWD, and "path" is relative
+ * → relative when all resolved symlinks are relative, otherwise absolute
*
* Algorithmically this operates on two path buffers: "done" are the components of the path we
* already processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we
@@ -190,8 +204,9 @@ int chaseat(int dir_fd, const char *path, ChaseFlags flags, char **ret_path, int
return -ENOMEM;
/* If we receive an absolute path together with AT_FDCWD, we need to return an absolute path, because
- * a relative path would be interpreted relative to the current working directory. */
- bool need_absolute = dir_fd == AT_FDCWD && path_is_absolute(path);
+ * a relative path would be interpreted relative to the current working directory. Also, let's make
+ * the result absolute when the file descriptor of the root directory is specified. */
+ bool need_absolute = (dir_fd == AT_FDCWD && path_is_absolute(path)) || dir_fd_is_root(dir_fd) > 0;
if (need_absolute) {
done = strdup("/");
if (!done)
@@ -373,6 +388,11 @@ int chaseat(int dir_fd, const char *path, ChaseFlags flags, char **ret_path, int
unsafe_transition(&st_child, &st))
return log_unsafe_transition(child, fd, path, flags);
+ /* When CHASE_AT_RESOLVE_IN_ROOT is not set, now the chased path may be
+ * outside of the specified dir_fd. Let's make the result absolute. */
+ if (!FLAGS_SET(flags, CHASE_AT_RESOLVE_IN_ROOT))
+ need_absolute = true;
+
r = free_and_strdup(&done, need_absolute ? "/" : NULL);
if (r < 0)
return r;
@@ -474,8 +494,8 @@ chased_one:
return 0;
}
-int chase(const char *path, const char *original_root, ChaseFlags flags, char **ret_path, int *ret_fd) {
- _cleanup_free_ char *root = NULL, *absolute = NULL, *p = NULL;
+int chase(const char *path, const char *root, ChaseFlags flags, char **ret_path, int *ret_fd) {
+ _cleanup_free_ char *root_abs = NULL, *absolute = NULL, *p = NULL;
_cleanup_close_ int fd = -EBADF, pfd = -EBADF;
int r;
@@ -484,18 +504,17 @@ int chase(const char *path, const char *original_root, ChaseFlags flags, char **
if (isempty(path))
return -EINVAL;
- /* A root directory of "/" or "" is identical to none */
- if (empty_or_root(original_root))
- original_root = NULL;
-
- if (original_root) {
- r = path_make_absolute_cwd(original_root, &root);
+ /* A root directory of "/" or "" is identical to "/". */
+ if (empty_or_root(root))
+ root = "/";
+ else {
+ r = path_make_absolute_cwd(root, &root_abs);
if (r < 0)
return r;
/* Simplify the root directory, so that it has no duplicate slashes and nothing at the
* end. While we won't resolve the root path we still simplify it. */
- path_simplify(root);
+ root = path_simplify(root_abs);
assert(path_is_absolute(root));
assert(!empty_or_root(root));
@@ -515,14 +534,14 @@ int chase(const char *path, const char *original_root, ChaseFlags flags, char **
return r;
}
- path = path_startswith(absolute, empty_to_root(root));
+ path = path_startswith(absolute, root);
if (!path)
return log_full_errno(FLAGS_SET(flags, CHASE_WARN) ? LOG_WARNING : LOG_DEBUG,
SYNTHETIC_ERRNO(ECHRNG),
"Specified path '%s' is outside of specified root directory '%s', refusing to resolve.",
- absolute, empty_to_root(root));
+ absolute, root);
- fd = open(empty_to_root(root), O_CLOEXEC|O_DIRECTORY|O_PATH);
+ fd = open(root, O_CLOEXEC|O_DIRECTORY|O_PATH);
if (fd < 0)
return -errno;
@@ -532,19 +551,27 @@ int chase(const char *path, const char *original_root, ChaseFlags flags, char **
if (ret_path) {
if (!FLAGS_SET(flags, CHASE_EXTRACT_FILENAME)) {
- _cleanup_free_ char *q = NULL;
- q = path_join(empty_to_root(root), p);
- if (!q)
- return -ENOMEM;
+ /* When "root" points to the root directory, the result of chaseat() is always
+ * absolute, hence it is not necessary to prefix with the root. When "root" points to
+ * a non-root directory, the result path is always normalized and relative, hence
+ * we can simply call path_join() and not necessary to call path_simplify().
+ * Note that the result of chaseat() may start with "." (more specifically, it may be
+ * "." or "./"), and we need to drop "." in that case. */
+
+ if (empty_or_root(root))
+ assert(path_is_absolute(p));
+ else {
+ char *q;
- path_simplify(q);
+ assert(!path_is_absolute(p));
- if (FLAGS_SET(flags, CHASE_TRAIL_SLASH) && ENDSWITH_SET(path, "/", "/."))
- if (!strextend(&q, "/"))
+ q = path_join(root, p + (*p == '.'));
+ if (!q)
return -ENOMEM;
- free_and_replace(p, q);
+ free_and_replace(p, q);
+ }
}
*ret_path = TAKE_PTR(p);
@@ -556,6 +583,37 @@ int chase(const char *path, const char *original_root, ChaseFlags flags, char **
return r;
}
+int chaseat_prefix_root(const char *path, const char *root, char **ret) {
+ char *q;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ /* This is mostly for prefixing the result of chaseat(). */
+
+ if (!path_is_absolute(path)) {
+ _cleanup_free_ char *root_abs = NULL;
+
+ /* If the dir_fd points to the root directory, chaseat() always returns an absolute path. */
+ assert(!empty_or_root(root));
+
+ r = path_make_absolute_cwd(root, &root_abs);
+ if (r < 0)
+ return r;
+
+ root = path_simplify(root_abs);
+
+ q = path_join(root, path + (path[0] == '.' && IN_SET(path[1], '/', '\0')));
+ } else
+ q = strdup(path);
+ if (!q)
+ return -ENOMEM;
+
+ *ret = q;
+ return 0;
+}
+
int chase_and_open(const char *path, const char *root, ChaseFlags chase_flags, int open_flags, char **ret_path) {
_cleanup_close_ int path_fd = -EBADF;
_cleanup_free_ char *p = NULL, *fname = NULL;
diff --git a/src/basic/chase.h b/src/basic/chase.h
index 40121f7d70..f37e836822 100644
--- a/src/basic/chase.h
+++ b/src/basic/chase.h
@@ -42,6 +42,8 @@ bool unsafe_transition(const struct stat *a, const struct stat *b);
int chase(const char *path_with_prefix, const char *root, ChaseFlags chase_flags, char **ret_path, int *ret_fd);
+int chaseat_prefix_root(const char *path, const char *root, char **ret);
+
int chase_and_open(const char *path, const char *root, ChaseFlags chase_flags, int open_flags, char **ret_path);
int chase_and_opendir(const char *path, const char *root, ChaseFlags chase_flags, char **ret_path, DIR **ret_dir);
int chase_and_stat(const char *path, const char *root, ChaseFlags chase_flags, char **ret_path, struct stat *ret_stat);
diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
index 7125e28e1b..974a7aac65 100644
--- a/src/basic/fd-util.c
+++ b/src/basic/fd-util.c
@@ -911,10 +911,23 @@ int dir_fd_is_root(int dir_fd) {
if (!statx_inode_same(&st.sx, &pst.sx))
return false;
+ /* Even if the parent directory has the same inode, the fd may not point to the root directory "/",
+ * and we also need to check that the mount ids are the same. Otherwise, a construct like the
+ * following could be used to trick us:
+ *
+ * $ mkdir /tmp/x /tmp/x/y
+ * $ mount --bind /tmp/x /tmp/x/y
+ *
+ * Note, statx() does not provide the mount ID and path_get_mnt_id_at() does not work when an old
+ * kernel is used without /proc mounted. In that case, let's assume that we do not have such spurious
+ * mount points in an early boot stage, and silently skip the following check. */
+
if (!FLAGS_SET(st.nsx.stx_mask, STATX_MNT_ID)) {
int mntid;
r = path_get_mnt_id_at(dir_fd, "", &mntid);
+ if (r == -ENOSYS)
+ return true; /* skip the mount ID check */
if (r < 0)
return r;
assert(mntid >= 0);
@@ -927,6 +940,8 @@ int dir_fd_is_root(int dir_fd) {
int mntid;
r = path_get_mnt_id_at(dir_fd, "..", &mntid);
+ if (r == -ENOSYS)
+ return true; /* skip the mount ID check */
if (r < 0)
return r;
assert(mntid >= 0);
@@ -935,13 +950,6 @@ int dir_fd_is_root(int dir_fd) {
pst.nsx.stx_mask |= STATX_MNT_ID;
}
- /* Even if the parent directory has the same inode, the fd may not point to the root directory "/",
- * and we also need to check that the mount ids are the same. Otherwise, a construct like the
- * following could be used to trick us:
- *
- * $ mkdir /tmp/x /tmp/x/y
- * $ mount --bind /tmp/x /tmp/x/y
- */
return statx_mount_same(&st.nsx, &pst.nsx);
}
diff --git a/src/basic/list.h b/src/basic/list.h
index ffc8bd8304..e4e5dff3ea 100644
--- a/src/basic/list.h
+++ b/src/basic/list.h
@@ -46,7 +46,7 @@
/* Remove an item from the list */
#define LIST_REMOVE(name,head,item) \
- ({ \
+ ({ \
typeof(*(head)) **_head = &(head), *_item = (item); \
assert(_item); \
if (_item->name##_next) \
@@ -127,8 +127,11 @@
_b; \
})
-#define LIST_JUST_US(name,item) \
- (!(item)->name##_prev && !(item)->name##_next)
+#define LIST_JUST_US(name, item) \
+ ({ \
+ typeof(*(item)) *_item = (item); \
+ !(_item)->name##_prev && !(_item)->name##_next; \
+ })
/* The type of the iterator 'i' is automatically determined by the type of 'head', and declared in the
* loop. Hence, do not declare the same variable in the outer scope. Sometimes, we set 'head' through
diff --git a/src/basic/log.c b/src/basic/log.c
index 75f59c4343..4cd2d5a4ab 100644
--- a/src/basic/log.c
+++ b/src/basic/log.c
@@ -50,6 +50,7 @@ static void *log_syntax_callback_userdata = NULL;
static LogTarget log_target = LOG_TARGET_CONSOLE;
static int log_max_level = LOG_INFO;
static int log_facility = LOG_DAEMON;
+static bool ratelimit_kmsg = true;
static int console_fd = STDERR_FILENO;
static int syslog_fd = -EBADF;
@@ -552,8 +553,12 @@ static int write_to_kmsg(
if (kmsg_fd < 0)
return 0;
- if (!ratelimit_below(&ratelimit))
- return 0;
+ if (ratelimit_kmsg && !ratelimit_below(&ratelimit)) {
+ if (ratelimit_num_dropped(&ratelimit) > 1)
+ return 0;
+
+ buffer = "Too many messages being logged to kmsg, ignoring";
+ }
xsprintf(header_priority, "<%i>", level);
xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
@@ -1178,6 +1183,17 @@ int log_set_max_level_from_string(const char *e) {
return 0;
}
+static int log_set_ratelimit_kmsg_from_string(const char *e) {
+ int r;
+
+ r = parse_boolean(e);
+ if (r < 0)
+ return r;
+
+ ratelimit_kmsg = r;
+ return 0;
+}
+
static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
/*
@@ -1228,6 +1244,10 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
if (log_show_time_from_string(value ?: "1") < 0)
log_warning("Failed to parse log time setting '%s'. Ignoring.", value);
+ } else if (proc_cmdline_key_streq(key, "systemd.log_ratelimit_kmsg")) {
+
+ if (log_set_ratelimit_kmsg_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log ratelimit kmsg boolean '%s'. Ignoring.", value);
}
return 0;
@@ -1268,6 +1288,10 @@ void log_parse_environment_variables(void) {
e = getenv("SYSTEMD_LOG_TID");
if (e && log_show_tid_from_string(e) < 0)
log_warning("Failed to parse log tid '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_RATELIMIT_KMSG");
+ if (e && log_set_ratelimit_kmsg_from_string(e) < 0)
+ log_warning("Failed to parse log ratelimit kmsg boolean '%s'. Ignoring.", e);
}
void log_parse_environment(void) {
diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c
index 698ea65d48..7237930a76 100644
--- a/src/basic/mountpoint-util.c
+++ b/src/basic/mountpoint-util.c
@@ -123,7 +123,7 @@ static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mn
r = read_full_virtual_file(path, &fdinfo, NULL);
if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
- return -EOPNOTSUPP;
+ return proc_mounted() > 0 ? -EOPNOTSUPP : -ENOSYS;
if (r < 0)
return r;
@@ -280,7 +280,7 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
fallback_fdinfo:
r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
- if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
+ if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM, -ENOSYS))
goto fallback_fstat;
if (r < 0)
return r;
@@ -549,6 +549,8 @@ int dev_is_devtmpfs(void) {
return r;
r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
+ if (r == -ENOENT)
+ return proc_mounted() > 0 ? -ENOENT : -ENOSYS;
if (r < 0)
return r;
diff --git a/src/basic/os-util.c b/src/basic/os-util.c
index dd8faf2376..5d06e20871 100644
--- a/src/basic/os-util.c
+++ b/src/basic/os-util.c
@@ -155,7 +155,7 @@ int open_os_release(const char *root, char **ret_path, int *ret_fd) {
return r;
if (ret_path) {
- r = path_prefix_root_cwd(p, root, ret_path);
+ r = chaseat_prefix_root(p, root, ret_path);
if (r < 0)
return r;
}
@@ -292,7 +292,7 @@ int open_extension_release(
return r;
if (ret_path) {
- r = path_prefix_root_cwd(p, root, ret_path);
+ r = chaseat_prefix_root(p, root, ret_path);
if (r < 0)
return r;
}
diff --git a/src/basic/path-util.c b/src/basic/path-util.c
index fa2e26789f..0b0f0da760 100644
--- a/src/basic/path-util.c
+++ b/src/basic/path-util.c
@@ -100,34 +100,6 @@ int path_make_absolute_cwd(const char *p, char **ret) {
return 0;
}
-int path_prefix_root_cwd(const char *p, const char *root, char **ret) {
- _cleanup_free_ char *root_abs = NULL;
- char *c;
- int r;
-
- assert(p);
- assert(ret);
-
- /* Unlike path_make_absolute(), this always prefixes root path if specified.
- * The root path is always simplified, but the provided path will not.
- * This is useful for prefixing the result of chaseat(). */
-
- if (empty_or_root(root))
- return path_make_absolute_cwd(p, ret);
-
- r = path_make_absolute_cwd(root, &root_abs);
- if (r < 0)
- return r;
-
- path_simplify(root_abs);
- c = path_join(root_abs, p);
- if (!c)
- return -ENOMEM;
-
- *ret = c;
- return 0;
-}
-
int path_make_relative(const char *from, const char *to, char **ret) {
_cleanup_free_ char *result = NULL;
unsigned n_parents;
diff --git a/src/basic/path-util.h b/src/basic/path-util.h
index a0af9de674..7843599816 100644
--- a/src/basic/path-util.h
+++ b/src/basic/path-util.h
@@ -60,7 +60,6 @@ int path_split_and_make_absolute(const char *p, char ***ret);
char* path_make_absolute(const char *p, const char *prefix);
int safe_getcwd(char **ret);
int path_make_absolute_cwd(const char *p, char **ret);
-int path_prefix_root_cwd(const char *p, const char *root, char **ret);
int path_make_relative(const char *from, const char *to, char **ret);
int path_make_relative_parent(const char *from_child, const char *to, char **ret);
char *path_startswith_full(const char *path, const char *prefix, bool accept_dot_dot) _pure_;
diff --git a/src/basic/ratelimit.c b/src/basic/ratelimit.c
index a0260bfe1c..5675ec2f46 100644
--- a/src/basic/ratelimit.c
+++ b/src/basic/ratelimit.c
@@ -10,7 +10,6 @@
bool ratelimit_below(RateLimit *r) {
usec_t ts;
- bool good = false;
assert(r);
@@ -21,22 +20,25 @@ bool ratelimit_below(RateLimit *r) {
if (r->begin <= 0 ||
usec_sub_unsigned(ts, r->begin) > r->interval) {
- r->begin = ts;
+ r->begin = ts; /* Start a new time window */
+ r->num = 1; /* Reset counter */
+ return true;
+ }
- /* Reset counter */
- r->num = 0;
- good = true;
- } else if (r->num < r->burst)
- good = true;
+ if (_unlikely_(r->num == UINT_MAX))
+ return false;
r->num++;
- return good;
+ return r->num <= r->burst;
}
unsigned ratelimit_num_dropped(RateLimit *r) {
assert(r);
- return r->num > r->burst ? r->num - r->burst : 0;
+ if (r->num == UINT_MAX) /* overflow, return as special case */
+ return UINT_MAX;
+
+ return LESS_BY(r->num, r->burst);
}
usec_t ratelimit_end(const RateLimit *rl) {
diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c
index e97770707f..335daca234 100644
--- a/src/basic/stat-util.c
+++ b/src/basic/stat-util.c
@@ -190,10 +190,10 @@ int files_same(const char *filea, const char *fileb, int flags) {
assert(fileb);
if (fstatat(AT_FDCWD, filea, &a, flags) < 0)
- return -errno;
+ return log_debug_errno(errno, "Cannot stat %s: %m", filea);
if (fstatat(AT_FDCWD, fileb, &b, flags) < 0)
- return -errno;
+ return log_debug_errno(errno, "Cannot stat %s: %m", fileb);
return stat_inode_same(&a, &b);
}
diff --git a/src/basic/string-util.c b/src/basic/string-util.c
index cc2f8ecdab..c74ee67dfe 100644
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -1283,3 +1283,15 @@ char *find_line_startswith(const char *haystack, const char *needle) {
return p + strlen(needle);
}
+
+char *startswith_strv(const char *string, char **strv) {
+ char *found = NULL;
+
+ STRV_FOREACH(i, strv) {
+ found = startswith(string, *i);
+ if (found)
+ break;
+ }
+
+ return found;
+}
diff --git a/src/basic/string-util.h b/src/basic/string-util.h
index 75483924af..4430910e22 100644
--- a/src/basic/string-util.h
+++ b/src/basic/string-util.h
@@ -267,3 +267,8 @@ char *strdupspn(const char *a, const char *accept);
char *strdupcspn(const char *a, const char *reject);
char *find_line_startswith(const char *haystack, const char *needle);
+
+char *startswith_strv(const char *string, char **strv);
+
+#define STARTSWITH_SET(p, ...) \
+ startswith_strv(p, STRV_MAKE(__VA_ARGS__))
diff --git a/src/basic/strv.h b/src/basic/strv.h
index b4d3f121f9..544d46a3f8 100644
--- a/src/basic/strv.h
+++ b/src/basic/strv.h
@@ -200,18 +200,6 @@ static inline void strv_print(char * const *l) {
_x && strv_contains_case(STRV_MAKE(__VA_ARGS__), _x); \
})
-#define STARTSWITH_SET(p, ...) \
- ({ \
- const char *_p = (p); \
- char *_found = NULL; \
- STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \
- _found = startswith(_p, *_i); \
- if (_found) \
- break; \
- } \
- _found; \
- })
-
#define ENDSWITH_SET(p, ...) \
({ \
const char *_p = (p); \
diff --git a/src/core/device.c b/src/core/device.c
index 4f6ecf4d7f..1449867e35 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -55,24 +55,31 @@ static int device_by_path(Manager *m, const char *path, Unit **ret) {
static void device_unset_sysfs(Device *d) {
Hashmap *devices;
- Device *first;
assert(d);
if (!d->sysfs)
return;
- /* Remove this unit from the chain of devices which share the
- * same sysfs path. */
+ /* Remove this unit from the chain of devices which share the same sysfs path. */
+
devices = UNIT(d)->manager->devices_by_sysfs;
- first = hashmap_get(devices, d->sysfs);
- LIST_REMOVE(same_sysfs, first, d);
- if (first)
- hashmap_remove_and_replace(devices, d->sysfs, first->sysfs, first);
+ if (d->same_sysfs_prev)
+ /* If this is not the first unit, then simply remove this unit. */
+ d->same_sysfs_prev->same_sysfs_next = d->same_sysfs_next;
+ else if (d->same_sysfs_next)
+ /* If this is the first unit, replace with the next unit. */
+ assert_se(hashmap_replace(devices, d->same_sysfs_next->sysfs, d->same_sysfs_next) >= 0);
else
+ /* Otherwise, remove the entry. */
hashmap_remove(devices, d->sysfs);
+ if (d->same_sysfs_next)
+ d->same_sysfs_next->same_sysfs_prev = d->same_sysfs_prev;
+
+ d->same_sysfs_prev = d->same_sysfs_next = NULL;
+
d->sysfs = mfree(d->sysfs);
}
diff --git a/src/core/kmod-setup.c b/src/core/kmod-setup.c
index e843743777..c09e17f568 100644
--- a/src/core/kmod-setup.c
+++ b/src/core/kmod-setup.c
@@ -11,6 +11,7 @@
#include "macro.h"
#include "recurse-dir.h"
#include "string-util.h"
+#include "strv.h"
#include "virt.h"
#if HAVE_KMOD
@@ -30,7 +31,7 @@ static void systemd_kmod_log(
REENABLE_WARNING;
}
-static int has_virtio_rng_recurse_dir_cb(
+static int match_modalias_recurse_dir_cb(
RecurseDirEvent event,
const char *path,
int dir_fd,
@@ -40,6 +41,7 @@ static int has_virtio_rng_recurse_dir_cb(
void *userdata) {
_cleanup_free_ char *alias = NULL;
+ char **modaliases = ASSERT_PTR(userdata);
int r;
if (event != RECURSE_DIR_ENTRY)
@@ -57,10 +59,7 @@ static int has_virtio_rng_recurse_dir_cb(
return RECURSE_DIR_LEAVE_DIRECTORY;
}
- if (startswith(alias, "pci:v00001AF4d00001005"))
- return 1;
-
- if (startswith(alias, "pci:v00001AF4d00001044"))
+ if (startswith_strv(alias, modaliases))
return 1;
return RECURSE_DIR_LEAVE_DIRECTORY;
@@ -69,20 +68,45 @@ static int has_virtio_rng_recurse_dir_cb(
static bool has_virtio_rng(void) {
int r;
+ /* Directory traversal might be slow, hence let's do a cheap check first if it's even worth it */
+ if (detect_vm() == VIRTUALIZATION_NONE)
+ return false;
+
r = recurse_dir_at(
AT_FDCWD,
"/sys/devices/pci0000:00",
/* statx_mask= */ 0,
/* n_depth_max= */ 2,
RECURSE_DIR_ENSURE_TYPE,
- has_virtio_rng_recurse_dir_cb,
- NULL);
+ match_modalias_recurse_dir_cb,
+ STRV_MAKE("pci:v00001AF4d00001005", "pci:v00001AF4d00001044"));
if (r < 0)
log_debug_errno(r, "Failed to determine whether host has virtio-rng device, ignoring: %m");
return r > 0;
}
+static bool has_virtio_console(void) {
+ int r;
+
+ /* Directory traversal might be slow, hence let's do a cheap check first if it's even worth it */
+ if (detect_vm() == VIRTUALIZATION_NONE)
+ return false;
+
+ r = recurse_dir_at(
+ AT_FDCWD,
+ "/sys/devices/pci0000:00",
+ /* statx_mask= */ 0,
+ /* n_depth_max= */ 3,
+ RECURSE_DIR_ENSURE_TYPE,
+ match_modalias_recurse_dir_cb,
+ STRV_MAKE("virtio:d00000003v", "virtio:d0000000Bv"));
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether host has virtio-console device, ignoring: %m");
+
+ return r > 0;
+}
+
static bool in_qemu(void) {
return IN_SET(detect_vm(), VIRTUALIZATION_KVM, VIRTUALIZATION_QEMU);
}
@@ -100,31 +124,35 @@ int kmod_setup(void) {
} kmod_table[] = {
/* This one we need to load explicitly, since auto-loading on use doesn't work
* before udev created the ghost device nodes, and we need it earlier than that. */
- { "autofs4", "/sys/class/misc/autofs", true, false, NULL },
+ { "autofs4", "/sys/class/misc/autofs", true, false, NULL },
/* This one we need to load explicitly, since auto-loading of IPv6 is not done when
* we try to configure ::1 on the loopback device. */
- { "ipv6", "/sys/module/ipv6", false, true, NULL },
+ { "ipv6", "/sys/module/ipv6", false, true, NULL },
/* This should never be a module */
- { "unix", "/proc/net/unix", true, true, NULL },
+ { "unix", "/proc/net/unix", true, true, NULL },
#if HAVE_LIBIPTC
/* netfilter is needed by networkd, nspawn among others, and cannot be autoloaded */
- { "ip_tables", "/proc/net/ip_tables_names", false, false, NULL },
+ { "ip_tables", "/proc/net/ip_tables_names", false, false, NULL },
#endif
/* virtio_rng would be loaded by udev later, but real entropy might be needed very early */
- { "virtio_rng", NULL, false, false, has_virtio_rng },
+ { "virtio_rng", NULL, false, false, has_virtio_rng },
+
+ /* we want early logging to hvc consoles if possible, and make sure systemd-getty-generator
+ * can rely on all consoles being probed already.*/
+ { "virtio_console", NULL, false, false, has_virtio_console },
/* qemu_fw_cfg would be loaded by udev later, but we want to import credentials from it super early */
- { "qemu_fw_cfg", "/sys/firmware/qemu_fw_cfg", false, false, in_qemu },
+ { "qemu_fw_cfg", "/sys/firmware/qemu_fw_cfg", false, false, in_qemu },
/* dmi-sysfs is needed to import credentials from it super early */
- { "dmi-sysfs", "/sys/firmware/dmi/entries", false, false, NULL },
+ { "dmi-sysfs", "/sys/firmware/dmi/entries", false, false, NULL },
#if HAVE_TPM2
/* Make sure the tpm subsystem is available which ConditionSecurity=tpm2 depends on. */
- { "tpm", "/sys/class/tpmrm", false, false, efi_has_tpm2 },
+ { "tpm", "/sys/class/tpmrm", false, false, efi_has_tpm2 },
#endif
};
_cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
diff --git a/src/core/main.c b/src/core/main.c
index b627916a25..932ea64e45 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -2798,12 +2798,18 @@ int main(int argc, char *argv[]) {
error_message = "Failed to mount early API filesystems";
goto finish;
}
+ }
+
+ /* We might have just mounted /proc, so let's try to parse the kernel
+ * command line log arguments immediately. */
+ log_parse_environment();
- /* Let's open the log backend a second time, in case the first time didn't
- * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
- * available, and it previously wasn't. */
- log_open();
+ /* Let's open the log backend a second time, in case the first time didn't
+ * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
+ * available, and it previously wasn't. */
+ log_open();
+ if (!skip_setup) {
disable_printk_ratelimit();
r = initialize_security(
diff --git a/src/core/manager.c b/src/core/manager.c
index 805ee0a38c..94416665ec 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1600,6 +1600,7 @@ static void manager_clear_jobs_and_units(Manager *m) {
assert(!m->stop_when_unneeded_queue);
assert(!m->start_when_upheld_queue);
assert(!m->stop_when_bound_queue);
+ assert(!m->release_resources_queue);
assert(hashmap_isempty(m->jobs));
assert(hashmap_isempty(m->units));
diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c
index cc2c5512dd..dae290c889 100644
--- a/src/fstab-generator/fstab-generator.c
+++ b/src/fstab-generator/fstab-generator.c
@@ -1080,7 +1080,7 @@ static int add_sysroot_usr_mount_or_fallback(void) {
/* OK, so we didn't write anything out for /sysusr/usr/ nor /sysroot/usr/. In this case, let's make
* sure that initrd-usr-fs.target is at least ordered after sysroot.mount so that services that order
- * themselves get the guarantee that /usr/ is definitely mounted somewhere. */
+ * themselves after it get the guarantee that /usr/ is definitely mounted somewhere. */
return generator_add_symlink(
arg_dest,
diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
index 005df04328..030ada5d6e 100644
--- a/src/gpt-auto-generator/gpt-auto-generator.c
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
@@ -761,12 +761,11 @@ static int enumerate_partitions(dev_t devnum) {
* on. And thus we also don't set DISSECT_IMAGE_PIN_PARTITION_DEVICES here, because
* we don't actually mount anything immediately. */
&m);
- if (r == -ENOPKG) {
- log_debug_errno(r, "No suitable partition table found on block device %s, ignoring.", devname);
- return 0;
+ if (r < 0) {
+ bool ok = r == -ENOPKG;
+ dissect_log_error(ok ? LOG_DEBUG : LOG_ERR, r, devname, NULL);
+ return ok ? 0 : r;
}
- if (r < 0)
- return log_error_errno(r, "Failed to dissect partition table of block device %s: %m", devname);
if (m->partitions[PARTITION_SWAP].found) {
k = add_partition_swap(m->partitions + PARTITION_SWAP);
diff --git a/src/libsystemd-network/sd-dhcp-server.c b/src/libsystemd-network/sd-dhcp-server.c
index 05c0cddfd0..55290ee0f1 100644
--- a/src/libsystemd-network/sd-dhcp-server.c
+++ b/src/libsystemd-network/sd-dhcp-server.c
@@ -1061,6 +1061,40 @@ static bool address_available(sd_dhcp_server *server, be32_t address) {
return true;
}
+static int server_get_static_lease(sd_dhcp_server *server, const DHCPRequest *req, DHCPLease **ret) {
+ DHCPLease *static_lease;
+ _cleanup_free_ uint8_t *data = NULL;
+
+ assert(server);
+ assert(req);
+ assert(ret);
+
+ static_lease = hashmap_get(server->static_leases_by_client_id, &req->client_id);
+ if (static_lease) {
+ *ret = static_lease;
+ return 0;
+ }
+
+ /* when no lease is found based on the client id fall back to chaddr */
+ data = new(uint8_t, req->message->hlen + 1);
+ if (!data)
+ return -ENOMEM;
+
+ /* set client id type to 1: Ethernet Link-Layer (RFC 2132) */
+ data[0] = 0x01;
+ memcpy(data + 1, req->message->chaddr, req->message->hlen);
+
+ static_lease = hashmap_get(server->static_leases_by_client_id,
+ &(DHCPClientId) {
+ .length = req->message->hlen + 1,
+ .data = data,
+ });
+
+ *ret = static_lease;
+
+ return 0;
+}
+
#define HASH_KEY SD_ID128_MAKE(0d,1d,fe,bd,f1,24,bd,b3,47,f1,dd,6e,73,21,93,30)
int dhcp_server_handle_message(sd_dhcp_server *server, DHCPMessage *message, size_t length) {
@@ -1092,7 +1126,9 @@ int dhcp_server_handle_message(sd_dhcp_server *server, DHCPMessage *message, siz
return r;
existing_lease = hashmap_get(server->bound_leases_by_client_id, &req->client_id);
- static_lease = hashmap_get(server->static_leases_by_client_id, &req->client_id);
+ r = server_get_static_lease(server, req, &static_lease);
+ if (r < 0)
+ return r;
switch (type) {
diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c
index 6bf6636ba3..b84ef46442 100644
--- a/src/shared/dissect-image.c
+++ b/src/shared/dissect-image.c
@@ -252,6 +252,21 @@ int probe_filesystem_full(
if (!b)
return -ENOMEM;
+ /* The Linux kernel maintains separate block device caches for main ("whole") and partition block
+ * devices, which means making a change to one might not be reflected immediately when reading via
+ * the other. That's massively confusing when mixing accesses to such devices. Let's address this in
+ * a limited way: when probing a file system that is not at the beginning of the block device we
+ * apparently probe a partition via the main block device, and in that case let's first flush the
+ * main block device cache, so that we get the data that the per-partition block device last
+ * sync'ed on.
+ *
+ * This only works under the assumption that any tools that write to the partition block devices
+ * issue an syncfs()/fsync() on the device after making changes. Typically file system formatting
+ * tools that write a superblock onto a partition block device do that, however. */
+ if (offset != 0)
+ if (ioctl(fd, BLKFLSBUF, 0) < 0)
+ log_debug_errno(errno, "Failed to flush block device cache, ignoring: %m");
+
errno = 0;
r = blkid_probe_set_device(
b,
@@ -1575,7 +1590,8 @@ int dissect_image_file(
#endif
}
-static int dissect_log_error(int r, const char *name, const VeritySettings *verity) {
+int dissect_log_error(int log_level, int r, const char *name, const VeritySettings *verity) {
+ assert(log_level >= 0 && log_level <= LOG_DEBUG);
assert(name);
switch (r) {
@@ -1584,43 +1600,43 @@ static int dissect_log_error(int r, const char *name, const VeritySettings *veri
return r;
case -EOPNOTSUPP:
- return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
+ return log_full_errno(log_level, r, "Dissecting images is not supported, compiled without blkid support.");
case -ENOPKG:
- return log_error_errno(r, "%s: Couldn't identify a suitable partition table or file system.", name);
+ return log_full_errno(log_level, r, "%s: Couldn't identify a suitable partition table or file system.", name);
case -ENOMEDIUM:
- return log_error_errno(r, "%s: The image does not pass os-release/extension-release validation.", name);
+ return log_full_errno(log_level, r, "%s: The image does not pass os-release/extension-release validation.", name);
case -EADDRNOTAVAIL:
- return log_error_errno(r, "%s: No root partition for specified root hash found.", name);
+ return log_full_errno(log_level, r, "%s: No root partition for specified root hash found.", name);
case -ENOTUNIQ:
- return log_error_errno(r, "%s: Multiple suitable root partitions found in image.", name);
+ return log_full_errno(log_level, r, "%s: Multiple suitable root partitions found in image.", name);
case -ENXIO:
- return log_error_errno(r, "%s: No suitable root partition found in image.", name);
+ return log_full_errno(log_level, r, "%s: No suitable root partition found in image.", name);
case -EPROTONOSUPPORT:
- return log_error_errno(r, "Device '%s' is a loopback block device with partition scanning turned off, please turn it on.", name);
+ return log_full_errno(log_level, r, "Device '%s' is a loopback block device with partition scanning turned off, please turn it on.", name);
case -ENOTBLK:
- return log_error_errno(r, "%s: Image is not a block device.", name);
+ return log_full_errno(log_level, r, "%s: Image is not a block device.", name);
case -EBADR:
- return log_error_errno(r,
- "Combining partitioned images (such as '%s') with external Verity data (such as '%s') not supported. "
- "(Consider setting $SYSTEMD_DISSECT_VERITY_SIDECAR=0 to disable automatic discovery of external Verity data.)",
- name, strna(verity ? verity->data_path : NULL));
+ return log_full_errno(log_level, r,
+ "Combining partitioned images (such as '%s') with external Verity data (such as '%s') not supported. "
+ "(Consider setting $SYSTEMD_DISSECT_VERITY_SIDECAR=0 to disable automatic discovery of external Verity data.)",
+ name, strna(verity ? verity->data_path : NULL));
case -ERFKILL:
- return log_error_errno(r, "%s: image does not match image policy.", name);
+ return log_full_errno(log_level, r, "%s: image does not match image policy.", name);
case -ENOMSG:
- return log_error_errno(r, "%s: no suitable partitions found.", name);
+ return log_full_errno(log_level, r, "%s: no suitable partitions found.", name);
default:
- return log_error_errno(r, "Failed to dissect image '%s': %m", name);
+ return log_full_errno(log_level, r, "%s: cannot dissect image: %m", name);
}
}
@@ -1633,6 +1649,7 @@ int dissect_image_file_and_warn(
DissectedImage **ret) {
return dissect_log_error(
+ LOG_ERR,
dissect_image_file(path, verity, mount_options, image_policy, flags, ret),
path,
verity);
@@ -1890,11 +1907,6 @@ static int mount_partition(
if (!fstype)
return -EAFNOSUPPORT;
- r = dissect_fstype_ok(fstype);
- if (r < 0)
- return r;
- if (!r)
- return -EIDRM; /* Recognizable error */
/* We are looking at an encrypted partition? This either means stacked encryption, or the caller
* didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this
@@ -1902,6 +1914,12 @@ static int mount_partition(
if (streq(fstype, "crypto_LUKS"))
return -EUNATCH;
+ r = dissect_fstype_ok(fstype);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -EIDRM; /* Recognizable error */
+
rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
discard = ((flags & DISSECT_IMAGE_DISCARD) ||
@@ -3560,6 +3578,7 @@ int dissect_loop_device_and_warn(
assert(loop);
return dissect_log_error(
+ LOG_ERR,
dissect_loop_device(loop, verity, mount_options, image_policy, flags, ret),
loop->backing_file ?: loop->node,
verity);
diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h
index a55ad63d2d..184e6151ed 100644
--- a/src/shared/dissect-image.h
+++ b/src/shared/dissect-image.h
@@ -147,6 +147,7 @@ static inline int probe_filesystem(const char *path, char **ret_fstype) {
return probe_filesystem_full(-1, path, 0, UINT64_MAX, ret_fstype);
}
+int dissect_log_error(int log_level, int r, const char *name, const VeritySettings *verity);
int dissect_image_file(const char *path, const VeritySettings *verity, const MountOptions *mount_options, const ImagePolicy *image_policy, DissectImageFlags flags, DissectedImage **ret);
int dissect_image_file_and_warn(const char *path, const VeritySettings *verity, const MountOptions *mount_options, const ImagePolicy *image_policy, DissectImageFlags flags, DissectedImage **ret);
int dissect_loop_device(LoopDevice *loop, const VeritySettings *verity, const MountOptions *mount_options, const ImagePolicy *image_policy, DissectImageFlags flags, DissectedImage **ret);
diff --git a/src/shared/find-esp.c b/src/shared/find-esp.c
index 6a0002a2bd..c4cf508517 100644
--- a/src/shared/find-esp.c
+++ b/src/shared/find-esp.c
@@ -540,7 +540,7 @@ int find_esp_and_warn(
return r;
if (ret_path) {
- r = path_prefix_root_cwd(p, root, ret_path);
+ r = chaseat_prefix_root(p, root, ret_path);
if (r < 0)
return r;
}
@@ -859,7 +859,7 @@ int find_xbootldr_and_warn(
return r;
if (ret_path) {
- r = path_prefix_root_cwd(p, root, ret_path);
+ r = chaseat_prefix_root(p, root, ret_path);
if (r < 0)
return r;
}
diff --git a/src/test/test-chase.c b/src/test/test-chase.c
index 1e98f5c6ed..558f4109e3 100644
--- a/src/test/test-chase.c
+++ b/src/test/test-chase.c
@@ -442,6 +442,43 @@ TEST(chaseat) {
assert_se(streq(result, "/usr"));
result = mfree(result);
+ /* If the file descriptor points to the root directory, the result will be absolute. */
+
+ fd = open("/", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ assert_se(fd >= 0);
+
+ assert_se(chaseat(fd, p, 0, &result, NULL) >= 0);
+ assert_se(streq(result, "/usr"));
+ result = mfree(result);
+
+ assert_se(chaseat(fd, p, CHASE_AT_RESOLVE_IN_ROOT, &result, NULL) >= 0);
+ assert_se(streq(result, "/usr"));
+ result = mfree(result);
+
+ fd = safe_close(fd);
+
+ /* If the file descriptor does not point to the root directory, the result will be relative
+ * unless the result is outside of the specified file descriptor. */
+
+ assert_se(chaseat(tfd, "abc", 0, &result, NULL) >= 0);
+ assert_se(streq(result, "/usr"));
+ result = mfree(result);
+
+ assert_se(chaseat(tfd, "/abc", 0, &result, NULL) >= 0);
+ assert_se(streq(result, "/usr"));
+ result = mfree(result);
+
+ assert_se(chaseat(tfd, "abc", CHASE_AT_RESOLVE_IN_ROOT, NULL, NULL) == -ENOENT);
+ assert_se(chaseat(tfd, "/abc", CHASE_AT_RESOLVE_IN_ROOT, NULL, NULL) == -ENOENT);
+
+ assert_se(chaseat(tfd, "abc", CHASE_AT_RESOLVE_IN_ROOT | CHASE_NONEXISTENT, &result, NULL) >= 0);
+ assert_se(streq(result, "usr"));
+ result = mfree(result);
+
+ assert_se(chaseat(tfd, "/abc", CHASE_AT_RESOLVE_IN_ROOT | CHASE_NONEXISTENT, &result, NULL) >= 0);
+ assert_se(streq(result, "usr"));
+ result = mfree(result);
+
/* Test that absolute path or not are the same when resolving relative to a directory file
* descriptor and that we always get a relative path back. */
@@ -611,4 +648,41 @@ static int intro(void) {
return EXIT_SUCCESS;
}
+TEST(chaseat_prefix_root) {
+ _cleanup_free_ char *cwd = NULL, *ret = NULL, *expected = NULL;
+
+ assert_se(safe_getcwd(&cwd) >= 0);
+
+ assert_se(chaseat_prefix_root("/hoge", NULL, &ret) >= 0);
+ assert_se(streq(ret, "/hoge"));
+
+ ret = mfree(ret);
+
+ assert_se(chaseat_prefix_root("/hoge", "a/b/c", &ret) >= 0);
+ assert_se(streq(ret, "/hoge"));
+
+ ret = mfree(ret);
+
+ assert_se(chaseat_prefix_root("hoge", "/a/b//./c///", &ret) >= 0);
+ assert_se(streq(ret, "/a/b/c/hoge"));
+
+ ret = mfree(ret);
+
+ assert_se(chaseat_prefix_root("hoge", "a/b//./c///", &ret) >= 0);
+ assert_se(expected = path_join(cwd, "a/b/c/hoge"));
+ assert_se(streq(ret, expected));
+
+ ret = mfree(ret);
+ expected = mfree(expected);
+
+ assert_se(chaseat_prefix_root("./hoge/aaa/../././b", "/a/b//./c///", &ret) >= 0);
+ assert_se(streq(ret, "/a/b/c/hoge/aaa/../././b"));
+
+ ret = mfree(ret);
+
+ assert_se(chaseat_prefix_root("./hoge/aaa/../././b", "a/b//./c///", &ret) >= 0);
+ assert_se(expected = path_join(cwd, "a/b/c/hoge/aaa/../././b"));
+ assert_se(streq(ret, expected));
+}
+
DEFINE_TEST_MAIN_WITH_INTRO(LOG_INFO, intro);
diff --git a/src/test/test-path-util.c b/src/test/test-path-util.c
index 22e8f3481a..e40ffea4d5 100644
--- a/src/test/test-path-util.c
+++ b/src/test/test-path-util.c
@@ -494,45 +494,6 @@ TEST(fsck_exists) {
assert_se(fsck_exists_for_fstype("/../bin/") == 0);
}
-TEST(path_prefix_root_cwd) {
- _cleanup_free_ char *cwd = NULL, *ret = NULL, *expected = NULL;
-
- assert_se(safe_getcwd(&cwd) >= 0);
-
- assert_se(path_prefix_root_cwd("hoge", NULL, &ret) >= 0);
- assert_se(expected = path_join(cwd, "hoge"));
- assert_se(streq(ret, expected));
-
- ret = mfree(ret);
- expected = mfree(expected);
-
- assert_se(path_prefix_root_cwd("/hoge", NULL, &ret) >= 0);
- assert_se(streq(ret, "/hoge"));
-
- ret = mfree(ret);
-
- assert_se(path_prefix_root_cwd("hoge", "/a/b//./c///", &ret) >= 0);
- assert_se(streq(ret, "/a/b/c/hoge"));
-
- ret = mfree(ret);
-
- assert_se(path_prefix_root_cwd("hoge", "a/b//./c///", &ret) >= 0);
- assert_se(expected = path_join(cwd, "a/b/c/hoge"));
- assert_se(streq(ret, expected));
-
- ret = mfree(ret);
- expected = mfree(expected);
-
- assert_se(path_prefix_root_cwd("/../hoge/aaa/../././b", "/a/b//./c///", &ret) >= 0);
- assert_se(streq(ret, "/a/b/c/../hoge/aaa/../././b"));
-
- ret = mfree(ret);
-
- assert_se(path_prefix_root_cwd("/../hoge/aaa/../././b", "a/b//./c///", &ret) >= 0);
- assert_se(expected = path_join(cwd, "a/b/c/../hoge/aaa/../././b"));
- assert_se(streq(ret, expected));
-}
-
static void test_path_make_relative_one(const char *from, const char *to, const char *expected) {
_cleanup_free_ char *z = NULL;
int r;
diff --git a/src/udev/iocost/iocost.c b/src/udev/iocost/iocost.c
new file mode 100644
index 0000000000..54b50b4a8d
--- /dev/null
+++ b/src/udev/iocost/iocost.c
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "build.h"
+#include "cgroup-util.h"
+#include "conf-parser.h"
+#include "devnum-util.h"
+#include "device-util.h"
+#include "main-func.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "verbs.h"
+
+static char *arg_target_solution = NULL;
+STATIC_DESTRUCTOR_REGISTER(arg_target_solution, freep);
+
+static int parse_config(void) {
+ static const ConfigTableItem items[] = {
+ { "IOCost", "TargetSolution", config_parse_string, 0, &arg_target_solution },
+ };
+ return config_parse(
+ NULL,
+ "/etc/udev/iocost.conf",
+ NULL,
+ "IOCost\0",
+ config_item_table_lookup,
+ items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+}
+
+static int help(void) {
+ printf("%s [OPTIONS...]\n\n"
+ "Set up iocost model and qos solutions for block devices\n"
+ "\nCommands:\n"
+ " apply <path> [SOLUTION] Apply solution for the device if\n"
+ " found, do nothing otherwise\n"
+ " query <path> Query the known solution for\n"
+ " the device\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached();
+ }
+
+ return 1;
+}
+
+static int get_known_solutions(sd_device *device, char ***ret_solutions) {
+ _cleanup_free_ char **s = NULL;
+ const char *value;
+ int r;
+
+ assert(ret_solutions);
+
+ r = sd_device_get_property_value(device, "IOCOST_SOLUTIONS", &value);
+ if (r < 0)
+ return r;
+
+ s = strv_split(value, WHITESPACE);
+ if (!s)
+ return -ENOMEM;
+
+ *ret_solutions = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int choose_solution(char **solutions, const char **ret_name) {
+ assert(ret_name);
+
+ if (strv_isempty(solutions))
+ return log_error_errno(
+ SYNTHETIC_ERRNO(EINVAL), "IOCOST_SOLUTIONS exists in hwdb but is empty.");
+
+ if (arg_target_solution && strv_find(solutions, arg_target_solution)) {
+ *ret_name = arg_target_solution;
+ log_debug("Selected solution based on target solution: %s", *ret_name);
+ } else {
+ *ret_name = solutions[0];
+ log_debug("Selected first available solution: %s", *ret_name);
+ }
+
+ return 0;
+}
+
+static int query_named_solution(
+ sd_device *device,
+ const char *name,
+ const char **ret_model,
+ const char **ret_qos) {
+
+ _cleanup_strv_free_ char **solutions = NULL;
+ _cleanup_free_ char *upper_name = NULL, *qos_key = NULL, *model_key = NULL;
+ const char *qos = NULL, *model = NULL;
+ int r;
+
+ assert(ret_qos);
+ assert(ret_model);
+
+ /* If NULL is passed we query the default solution, which is the first one listed
+ * in the IOCOST_SOLUTIONS key or the one specified by the TargetSolution setting.
+ */
+ if (!name) {
+ r = get_known_solutions(device, &solutions);
+ if (r == -ENOENT)
+ return log_device_debug_errno(device, r, "No entry found for device, skipping iocost logic.");
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to query solutions from device: %m");
+
+ r = choose_solution(solutions, &name);
+ if (r < 0)
+ return r;
+ }
+
+ upper_name = strdup(name);
+ if (!upper_name)
+ return log_oom();
+
+ ascii_strupper(upper_name);
+ string_replace_char(upper_name, '-', '_');
+
+ qos_key = strjoin("IOCOST_QOS_", upper_name);
+ if (!qos_key)
+ return log_oom();
+
+ model_key = strjoin("IOCOST_MODEL_", upper_name);
+ if (!model_key)
+ return log_oom();
+
+ r = sd_device_get_property_value(device, qos_key, &qos);
+ if (r == -ENOENT)
+ return log_device_debug_errno(device, r, "No value found for key %s, skipping iocost logic.", qos_key);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to obtain model for iocost solution from device: %m");
+
+ r = sd_device_get_property_value(device, model_key, &model);
+ if (r == -ENOENT)
+ return log_device_debug_errno(device, r, "No value found for key %s, skipping iocost logic.", model_key);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to obtain model for iocost solution from device: %m");
+
+ *ret_qos = qos;
+ *ret_model = model;
+
+ return 0;
+}
+
+static int apply_solution_for_path(const char *path, const char *name) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_free_ char *qos = NULL, *model = NULL;
+ const char *qos_params = NULL, *model_params = NULL;
+ dev_t devnum;
+ int r;
+
+ r = sd_device_new_from_path(&device, path);
+ if (r < 0)
+ return log_error_errno(r, "Error looking up device: %m");
+
+ r = query_named_solution(device, name, &model_params, &qos_params);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devnum(device, &devnum);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Error getting devnum: %m");
+
+ if (asprintf(&qos, DEVNUM_FORMAT_STR " enable=1 ctrl=user %s", DEVNUM_FORMAT_VAL(devnum), qos_params) < 0)
+ return log_oom();
+
+ if (asprintf(&model, DEVNUM_FORMAT_STR " model=linear ctrl=user %s", DEVNUM_FORMAT_VAL(devnum), model_params) < 0)
+ return log_oom();
+
+ log_debug("Applying iocost parameters to %s using solution '%s'\n"
+ "\tio.cost.qos: %s\n"
+ "\tio.cost.model: %s\n", path, name ?: "default", qos, model);
+
+ r = cg_set_attribute("io", NULL, "io.cost.qos", qos);
+ if (r < 0) {
+ log_device_full_errno(device, r == -ENOENT ? LOG_DEBUG : LOG_ERR, r, "Failed to set io.cost.qos: %m");
+ return r == -ENOENT ? 0 : r;
+ }
+
+ r = cg_set_attribute("io", NULL, "io.cost.model", model);
+ if (r < 0) {
+ log_device_full_errno(device, r == -ENOENT ? LOG_DEBUG : LOG_ERR, r, "Failed to set io.cost.model: %m");
+ return r == -ENOENT ? 0 : r;
+ }
+
+ return 0;
+}
+
+static int query_solutions_for_path(const char *path) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_strv_free_ char **solutions = NULL;
+ const char *default_solution = NULL;
+ const char *model_name = NULL;
+ int r;
+
+ r = sd_device_new_from_path(&device, path);
+ if (r < 0)
+ return log_error_errno(r, "Error looking up device: %m");
+
+ r = sd_device_get_property_value(device, "ID_MODEL_FROM_DATABASE", &model_name);
+ if (r == -ENOENT) {
+ log_device_debug(device, "Missing ID_MODEL_FROM_DATABASE property, trying ID_MODEL");
+ r = sd_device_get_property_value(device, "ID_MODEL", &model_name);
+ if (r == -ENOENT) {
+ log_device_info(device, "Device model not found");
+ return 0;
+ }
+ }
+ if (r < 0)
+ return log_device_error_errno(device, r, "Model name for device %s is unknown", path);
+
+ r = get_known_solutions(device, &solutions);
+ if (r == -ENOENT) {
+ log_device_info(device, "Attribute IOCOST_SOLUTIONS missing, model not found in hwdb.");
+ return 0;
+ }
+ if (r < 0)
+ return log_device_error_errno(device, r, "Couldn't access IOCOST_SOLUTIONS for device %s, model name %s on hwdb: %m\n", path, model_name);
+
+ r = choose_solution(solutions, &default_solution);
+ if (r < 0)
+ return r;
+
+ log_info("Known solutions for %s model name: \"%s\"\n"
+ "Preferred solution: %s\n"
+ "Solution that would be applied: %s",
+ path, model_name,
+ arg_target_solution, default_solution);
+
+ STRV_FOREACH(s, solutions) {
+ const char *model = NULL, *qos = NULL;
+
+ r = query_named_solution(device, *s, &model, &qos);
+ if (r < 0 || !model || !qos)
+ continue;
+
+ log_info("%s: io.cost.qos: %s\n"
+ "%s: io.cost.model: %s", *s, qos, *s, model);
+ }
+
+ return 0;
+}
+
+static int verb_query(int argc, char *argv[], void *userdata) {
+ return query_solutions_for_path(ASSERT_PTR(argv[1]));
+}
+
+static int verb_apply(int argc, char *argv[], void *userdata) {
+ return apply_solution_for_path(
+ ASSERT_PTR(argv[1]),
+ argc > 2 ? ASSERT_PTR(argv[2]) : NULL);
+}
+
+static int iocost_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "query", 2, 2, 0, verb_query },
+ { "apply", 2, 3, 0, verb_apply },
+ {},
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_setup();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ (void) parse_config();
+
+ if (!arg_target_solution) {
+ arg_target_solution = strdup("naive");
+ if (!arg_target_solution)
+ return log_oom();
+ }
+
+ log_debug("Target solution: %s.", arg_target_solution);
+
+ return iocost_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/udev/iocost/iocost.conf b/src/udev/iocost/iocost.conf
new file mode 100644
index 0000000000..394ea349ee
--- /dev/null
+++ b/src/udev/iocost/iocost.conf
@@ -0,0 +1,17 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# Entries in this file show the compile time defaults. Local configuration
+# should be created by either modifying this file. Defaults can be restored by
+# simply deleting this file.
+#
+# Use 'systemd-analyze cat-config udev/iocost.conf' to display the full config.
+#
+# See iocost.conf(5) for details.
+
+[IOCost]
+#TargetSolution=naive
diff --git a/src/udev/meson.build b/src/udev/meson.build
index 5b44dd8d7d..081948d223 100644
--- a/src/udev/meson.build
+++ b/src/udev/meson.build
@@ -116,6 +116,7 @@ udev_progs = [['ata_id/ata_id.c'],
['cdrom_id/cdrom_id.c'],
['fido_id/fido_id.c',
'fido_id/fido_id_desc.c'],
+ ['iocost/iocost.c'],
['scsi_id/scsi_id.c',
'scsi_id/scsi_serial.c'],
['v4l_id/v4l_id.c'],
@@ -149,6 +150,8 @@ endforeach
if install_sysconfdir_samples
install_data('udev.conf',
install_dir : sysconfdir / 'udev')
+ install_data('iocost/iocost.conf',
+ install_dir : sysconfdir / 'udev')
endif
custom_target(
diff --git a/test/test-bootctl-json.sh b/test/test-bootctl-json.sh
index fde5fbd1de..4d7c468241 100755
--- a/test/test-bootctl-json.sh
+++ b/test/test-bootctl-json.sh
@@ -28,6 +28,15 @@ command -v jq >/dev/null || {
"$bootctl" -R || test "$?" -eq 80
"$bootctl" -RR || test "$?" -eq 80
+# regression tests for
+# https://github.com/systemd/systemd/pull/27199#issuecomment-1511387731
+if ret=$("$bootctl" --print-esp-path); then
+ test "$ret" = "/efi" -o "$ret" = "/boot" -o "$ret" = "/boot/efi"
+fi
+if ret=$("bootctl" --print-boot-path); then
+ test "$ret" = "/efi" -o "$ret" = "/boot" -o "$ret" = "/boot/efi"
+fi
+
if "$bootctl" -R > /dev/null ; then
P=$("$bootctl" -R)
PP=$("$bootctl" -RR)
diff --git a/test/units/testsuite-54.sh b/test/units/testsuite-54.sh
index ab896a5759..4f9a0c2877 100755
--- a/test/units/testsuite-54.sh
+++ b/test/units/testsuite-54.sh
@@ -135,6 +135,7 @@ fi
systemd-run -p DynamicUser=yes -p 'LoadCredential=os:/etc/os-release' \
-p 'ExecStartPre=true' \
-p 'ExecStartPre=systemd-creds cat os' \
+ --unit=test-54-exec-start.service \
--wait \
--pipe \
true | cmp /etc/os-release