diff options
Diffstat (limited to 'src')
143 files changed, 3013 insertions, 1715 deletions
diff --git a/src/basic/chase-symlinks.c b/src/basic/chase-symlinks.c index 0bb07000ba..385d0aed69 100644 --- a/src/basic/chase-symlinks.c +++ b/src/basic/chase-symlinks.c @@ -466,8 +466,10 @@ int chase_symlinks( return -errno; flags |= CHASE_AT_RESOLVE_IN_ROOT; - } else + } else { + path = absolute; fd = AT_FDCWD; + } r = chase_symlinks_at(fd, path, flags & ~CHASE_PREFIX_ROOT, ret_path ? &p : NULL, ret_fd ? &pfd : NULL); if (r < 0) @@ -557,7 +559,7 @@ int chase_symlinks_and_opendir( return r; assert(path_fd >= 0); - d = opendir(FORMAT_PROC_FD_PATH(path_fd)); + d = xopendirat(path_fd, ".", O_NOFOLLOW); if (!d) return -errno; diff --git a/src/basic/constants.h b/src/basic/constants.h index 54021911ab..4a24ba9c8c 100644 --- a/src/basic/constants.h +++ b/src/basic/constants.h @@ -43,7 +43,7 @@ #define DEFAULT_EXIT_USEC (30*USEC_PER_SEC) /* The default value for the net.unix.max_dgram_qlen sysctl */ -#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL +#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512 #define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT #define SIGNALS_IGNORE SIGPIPE diff --git a/src/basic/escape.c b/src/basic/escape.c index 1cb7ced545..e04b435d5b 100644 --- a/src/basic/escape.c +++ b/src/basic/escape.c @@ -445,31 +445,30 @@ char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFl } char* octescape(const char *s, size_t len) { - char *r, *t; - const char *f; + char *buf, *t; - /* Escapes all chars in bad, in addition to \ and " chars, - * in \nnn style escaping. */ + /* Escapes all chars in bad, in addition to \ and " chars, in \nnn style escaping. */ - r = new(char, len * 4 + 1); - if (!r) + assert(s || len == 0); + + t = buf = new(char, len * 4 + 1); + if (!buf) return NULL; - for (f = s, t = r; f < s + len; f++) { + for (size_t i = 0; i < len; i++) { + uint8_t u = (uint8_t) s[i]; - if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) { + if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"')) { *(t++) = '\\'; - *(t++) = '0' + (*f >> 6); - *(t++) = '0' + ((*f >> 3) & 8); - *(t++) = '0' + (*f & 8); + *(t++) = '0' + (u >> 6); + *(t++) = '0' + ((u >> 3) & 7); + *(t++) = '0' + (u & 7); } else - *(t++) = *f; + *(t++) = u; } *t = 0; - - return r; - + return buf; } static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) { diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c index ec33a61588..01a45e4384 100644 --- a/src/basic/fd-util.c +++ b/src/basic/fd-util.c @@ -780,6 +780,37 @@ int fd_reopen(int fd, int flags) { return new_fd; } +int fd_reopen_condition( + int fd, + int flags, + int mask, + int *ret_new_fd) { + + int r, new_fd; + + assert(fd >= 0); + + /* Invokes fd_reopen(fd, flags), but only if the existing F_GETFL flags don't match the specified + * flags (masked by the specified mask). This is useful for converting O_PATH fds into real fds if + * needed, but only then. */ + + r = fcntl(fd, F_GETFL); + if (r < 0) + return -errno; + + if ((r & mask) == (flags & mask)) { + *ret_new_fd = -1; + return fd; + } + + new_fd = fd_reopen(fd, flags); + if (new_fd < 0) + return new_fd; + + *ret_new_fd = new_fd; + return new_fd; +} + int read_nr_open(void) { _cleanup_free_ char *nr_open = NULL; int r; diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h index 29c7d86f27..fbaa458613 100644 --- a/src/basic/fd-util.h +++ b/src/basic/fd-util.h @@ -108,6 +108,7 @@ static inline int make_null_stdio(void) { }) int fd_reopen(int fd, int flags); +int fd_reopen_condition(int fd, int flags, int mask, int *ret_new_fd); int read_nr_open(void); int fd_get_diskseq(int fd, uint64_t *ret); diff --git a/src/basic/glyph-util.c b/src/basic/glyph-util.c index 67f2270daf..de1224f04f 100644 --- a/src/basic/glyph-util.c +++ b/src/basic/glyph-util.c @@ -71,6 +71,7 @@ const char *special_glyph(SpecialGlyph code) { [SPECIAL_GLYPH_RECYCLING] = "~", [SPECIAL_GLYPH_DOWNLOAD] = "\\", [SPECIAL_GLYPH_SPARKLES] = "*", + [SPECIAL_GLYPH_WARNING_SIGN] = "!", }, /* UTF-8 */ @@ -124,10 +125,11 @@ const char *special_glyph(SpecialGlyph code) { /* This emoji is a single character cell glyph in Unicode, and two in ASCII */ [SPECIAL_GLYPH_TOUCH] = u8"👆", /* actually called: BACKHAND INDEX POINTING UP */ - /* These three emojis are single character cell glyphs in Unicode and also in ASCII. */ + /* These four emojis are single character cell glyphs in Unicode and also in ASCII. */ [SPECIAL_GLYPH_RECYCLING] = u8"♻️", /* actually called: UNIVERSAL RECYCLNG SYMBOL */ [SPECIAL_GLYPH_DOWNLOAD] = u8"⤵️", /* actually called: RIGHT ARROW CURVING DOWN */ [SPECIAL_GLYPH_SPARKLES] = u8"✨", + [SPECIAL_GLYPH_WARNING_SIGN] = u8"⚠️", }, }; diff --git a/src/basic/glyph-util.h b/src/basic/glyph-util.h index 621d7a85b7..b64639622e 100644 --- a/src/basic/glyph-util.h +++ b/src/basic/glyph-util.h @@ -44,6 +44,7 @@ typedef enum SpecialGlyph { SPECIAL_GLYPH_RECYCLING, SPECIAL_GLYPH_DOWNLOAD, SPECIAL_GLYPH_SPARKLES, + SPECIAL_GLYPH_WARNING_SIGN, _SPECIAL_GLYPH_MAX, _SPECIAL_GLYPH_INVALID = -EINVAL, } SpecialGlyph; diff --git a/src/basic/hostname-util.h b/src/basic/hostname-util.h index a00b852395..bcac3d9fb0 100644 --- a/src/basic/hostname-util.h +++ b/src/basic/hostname-util.h @@ -60,4 +60,12 @@ static inline bool is_outbound_hostname(const char *hostname) { return STRCASE_IN_SET(hostname, "_outbound", "_outbound."); } +static inline bool is_dns_stub_hostname(const char *hostname) { + return STRCASE_IN_SET(hostname, "_localdnsstub", "_localdnsstub."); +} + +static inline bool is_dns_proxy_stub_hostname(const char *hostname) { + return STRCASE_IN_SET(hostname, "_localdnsproxy", "_localdnsproxy."); +} + int get_pretty_hostname(char **ret); diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c index 8fa9986a76..7bc2f71bcf 100644 --- a/src/basic/terminal-util.c +++ b/src/basic/terminal-util.c @@ -268,7 +268,7 @@ int reset_terminal_fd(int fd, bool switch_to_text) { termios.c_iflag &= ~(IGNBRK | BRKINT | ISTRIP | INLCR | IGNCR | IUCLC); termios.c_iflag |= ICRNL | IMAXBEL | IUTF8; - termios.c_oflag |= ONLCR; + termios.c_oflag |= ONLCR | OPOST; termios.c_cflag |= CREAD; termios.c_lflag = ISIG | ICANON | IEXTEN | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOPRT | ECHOKE; diff --git a/src/basic/virt.c b/src/basic/virt.c index 9a0b5a28d1..7c238613e6 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -50,6 +50,8 @@ static Virtualization detect_vm_cpuid(void) { { "QNXQVMBSQG", VIRTUALIZATION_QNX }, /* https://projectacrn.org */ { "ACRNACRNACRN", VIRTUALIZATION_ACRN }, + /* https://www.lockheedmartin.com/en-us/products/Hardened-Security-for-Intel-Processors.html */ + { "SRESRESRESRE", VIRTUALIZATION_SRE }, }; uint32_t eax, ebx, ecx, edx; @@ -1036,6 +1038,7 @@ static const char *const virtualization_table[_VIRTUALIZATION_MAX] = { [VIRTUALIZATION_ACRN] = "acrn", [VIRTUALIZATION_POWERVM] = "powervm", [VIRTUALIZATION_APPLE] = "apple", + [VIRTUALIZATION_SRE] = "sre", [VIRTUALIZATION_VM_OTHER] = "vm-other", [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn", diff --git a/src/basic/virt.h b/src/basic/virt.h index e19a238939..d49f3237e8 100644 --- a/src/basic/virt.h +++ b/src/basic/virt.h @@ -26,6 +26,7 @@ typedef enum Virtualization { VIRTUALIZATION_ACRN, VIRTUALIZATION_POWERVM, VIRTUALIZATION_APPLE, + VIRTUALIZATION_SRE, VIRTUALIZATION_VM_OTHER, VIRTUALIZATION_VM_LAST = VIRTUALIZATION_VM_OTHER, diff --git a/src/boot/efi/boot.c b/src/boot/efi/boot.c index 85e936b866..2e657a8bf9 100644 --- a/src/boot/efi/boot.c +++ b/src/boot/efi/boot.c @@ -1204,7 +1204,7 @@ static void config_defaults_load_from_file(Config *config, char *content) { continue; } free(config->entry_default_config); - config->entry_default_config = xstra_to_str(value); + config->entry_default_config = xstr8_to_16(value); continue; } @@ -1418,32 +1418,32 @@ static void config_entry_add_type1( while ((line = line_get_key_value(content, " \t", &pos, &key, &value))) { if (streq8(key, "title")) { free(entry->title); - entry->title = xstra_to_str(value); + entry->title = xstr8_to_16(value); continue; } if (streq8(key, "sort-key")) { free(entry->sort_key); - entry->sort_key = xstra_to_str(value); + entry->sort_key = xstr8_to_16(value); continue; } if (streq8(key, "version")) { free(entry->version); - entry->version = xstra_to_str(value); + entry->version = xstr8_to_16(value); continue; } if (streq8(key, "machine-id")) { free(entry->machine_id); - entry->machine_id = xstra_to_str(value); + entry->machine_id = xstr8_to_16(value); continue; } if (streq8(key, "linux")) { free(entry->loader); entry->type = LOADER_LINUX; - entry->loader = xstra_to_path(value); + entry->loader = xstr8_to_path(value); entry->key = 'l'; continue; } @@ -1451,7 +1451,7 @@ static void config_entry_add_type1( if (streq8(key, "efi")) { entry->type = LOADER_EFI; free(entry->loader); - entry->loader = xstra_to_path(value); + entry->loader = xstr8_to_path(value); /* do not add an entry for ourselves */ if (strcaseeq16(entry->loader, loaded_image_path)) { @@ -1472,7 +1472,7 @@ static void config_entry_add_type1( if (streq8(key, "devicetree")) { free(entry->devicetree); - entry->devicetree = xstra_to_path(value); + entry->devicetree = xstr8_to_path(value); continue; } @@ -1481,7 +1481,7 @@ static void config_entry_add_type1( entry->initrd, n_initrd == 0 ? 0 : (n_initrd + 1) * sizeof(uint16_t *), (n_initrd + 2) * sizeof(uint16_t *)); - entry->initrd[n_initrd++] = xstra_to_path(value); + entry->initrd[n_initrd++] = xstr8_to_path(value); entry->initrd[n_initrd] = NULL; continue; } @@ -1489,7 +1489,7 @@ static void config_entry_add_type1( if (streq8(key, "options")) { _cleanup_free_ char16_t *new = NULL; - new = xstra_to_str(value); + new = xstr8_to_16(value); if (entry->options) { char16_t *s = xpool_print(L"%s %s", entry->options, new); free(entry->options); @@ -1550,7 +1550,7 @@ static EFI_STATUS efivar_get_timeout(const char16_t *var, uint32_t *ret_value) { static void config_load_defaults(Config *config, EFI_FILE *root_dir) { _cleanup_free_ char *content = NULL; - UINTN value; + UINTN value = 0; /* avoid false maybe-uninitialized warning */ EFI_STATUS err; assert(root_dir); @@ -2134,49 +2134,49 @@ static void config_entry_add_unified( while ((line = line_get_key_value(content, "=", &pos, &key, &value))) { if (streq8(key, "PRETTY_NAME")) { free(os_pretty_name); - os_pretty_name = xstra_to_str(value); + os_pretty_name = xstr8_to_16(value); continue; } if (streq8(key, "IMAGE_ID")) { free(os_image_id); - os_image_id = xstra_to_str(value); + os_image_id = xstr8_to_16(value); continue; } if (streq8(key, "NAME")) { free(os_name); - os_name = xstra_to_str(value); + os_name = xstr8_to_16(value); continue; } if (streq8(key, "ID")) { free(os_id); - os_id = xstra_to_str(value); + os_id = xstr8_to_16(value); continue; } if (streq8(key, "IMAGE_VERSION")) { free(os_image_version); - os_image_version = xstra_to_str(value); + os_image_version = xstr8_to_16(value); continue; } if (streq8(key, "VERSION")) { free(os_version); - os_version = xstra_to_str(value); + os_version = xstr8_to_16(value); continue; } if (streq8(key, "VERSION_ID")) { free(os_version_id); - os_version_id = xstra_to_str(value); + os_version_id = xstr8_to_16(value); continue; } if (streq8(key, "BUILD_ID")) { free(os_build_id); - os_build_id = xstra_to_str(value); + os_build_id = xstr8_to_16(value); continue; } } @@ -2219,13 +2219,11 @@ static void config_entry_add_unified( content = mfree(content); /* read the embedded cmdline file */ - err = file_read(linux_dir, f->FileName, offs[SECTION_CMDLINE], szs[SECTION_CMDLINE], &content, NULL); + size_t cmdline_len; + err = file_read(linux_dir, f->FileName, offs[SECTION_CMDLINE], szs[SECTION_CMDLINE], &content, &cmdline_len); if (err == EFI_SUCCESS) { - /* chomp the newline */ - if (content[szs[SECTION_CMDLINE] - 1] == '\n') - content[szs[SECTION_CMDLINE] - 1] = '\0'; - - entry->options = xstra_to_str(content); + entry->options = xstrn8_to_16(content, cmdline_len); + mangle_stub_cmdline(entry->options); } } } @@ -2235,7 +2233,7 @@ static void config_load_xbootldr( EFI_HANDLE *device) { _cleanup_(file_closep) EFI_FILE *root_dir = NULL; - EFI_HANDLE new_device; + EFI_HANDLE new_device = NULL; /* avoid false maybe-uninitialized warning */ EFI_STATUS err; assert(config); @@ -2640,12 +2638,6 @@ EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) { /* Uncomment the next line if you need to wait for debugger. */ // debug_break(); - /* The firmware may skip initializing some devices for the sake of a faster boot. This is especially - * true for fastboot enabled firmwares. But this means that things we use like input devices or the - * xbootldr partition may not be available yet. Reconnect all drivers should hopefully make the - * firmware initialize everything we need. */ - (void) reconnect_all_drivers(); - err = BS->OpenProtocol(image, &LoadedImageProtocol, (void **)&loaded_image, diff --git a/src/boot/efi/console.c b/src/boot/efi/console.c index 14c0008afb..cd980fd535 100644 --- a/src/boot/efi/console.c +++ b/src/boot/efi/console.c @@ -12,6 +12,20 @@ #define VERTICAL_MAX_OK 1080 #define VIEWPORT_RATIO 10 +static EFI_STATUS console_connect(void) { + EFI_BOOT_MANAGER_POLICY_PROTOCOL *boot_policy; + EFI_STATUS err; + + /* This should make console devices appear/fully initialize on fastboot firmware. */ + + err = BS->LocateProtocol( + &(EFI_GUID) EFI_BOOT_MANAGER_POLICY_PROTOCOL_GUID, NULL, (void **) &boot_policy); + if (err != EFI_SUCCESS) + return err; + + return boot_policy->ConnectDeviceClass(boot_policy, &(EFI_GUID) EFI_BOOT_MANAGER_POLICY_CONSOLE_GUID); +} + static inline void event_closep(EFI_EVENT *event) { if (!*event) return; @@ -47,6 +61,8 @@ EFI_STATUS console_key_read(uint64_t *key, uint64_t timeout_usec) { assert(key); if (!checked) { + console_connect(); + /* Get the *first* TextInputEx device.*/ err = BS->LocateProtocol(&SimpleTextInputExProtocol, NULL, (void **) &extraInEx); if (err != EFI_SUCCESS || BS->CheckEvent(extraInEx->WaitForKeyEx) == EFI_INVALID_PARAMETER) diff --git a/src/boot/efi/cpio.c b/src/boot/efi/cpio.c index 648f9f000f..79b5d4327b 100644 --- a/src/boot/efi/cpio.c +++ b/src/boot/efi/cpio.c @@ -359,24 +359,7 @@ static char16_t *get_dropin_dir(const EFI_DEVICE_PATH *file_path) { if (device_path_to_str(file_path, &file_path_str) != EFI_SUCCESS) return NULL; - for (char16_t *i = file_path_str, *fixed = i;; i++) { - if (*i == '\0') { - *fixed = '\0'; - break; - } - - /* Fix device path node separator. */ - if (*i == '/') - *i = '\\'; - - /* Double '\' is not allowed in EFI file paths. */ - if (fixed != file_path_str && fixed[-1] == '\\' && *i == '\\') - continue; - - *fixed = *i; - fixed++; - } - + convert_efi_path(file_path_str); return xpool_print(u"%s.extra.d", file_path_str); } @@ -485,7 +468,7 @@ EFI_STATUS pack_cpio( for (UINTN i = 0; i < n_items; i++) { _cleanup_free_ char *content = NULL; - UINTN contentsize; + UINTN contentsize = 0; /* avoid false maybe-uninitialized warning */ err = file_read(extra_dir, items[i], 0, 0, &content, &contentsize); if (err != EFI_SUCCESS) { diff --git a/src/boot/efi/efi-string.c b/src/boot/efi/efi-string.c index b877c6f224..2ba15673c9 100644 --- a/src/boot/efi/efi-string.c +++ b/src/boot/efi/efi-string.c @@ -9,7 +9,8 @@ # include "util.h" #else # include <stdlib.h> -# include "macro.h" +# include "alloc-util.h" +# define xnew(t, n) ASSERT_SE_PTR(new(t, n)) # define xmalloc(n) ASSERT_SE_PTR(malloc(n)) #endif @@ -138,6 +139,81 @@ DEFINE_STRCHR(char16_t, strchr16); DEFINE_STRNDUP(char, xstrndup8, strnlen8); DEFINE_STRNDUP(char16_t, xstrndup16, strnlen16); +static unsigned utf8_to_unichar(const char *utf8, size_t n, char32_t *c) { + char32_t unichar; + unsigned len; + + assert(utf8); + assert(c); + + if (!(utf8[0] & 0x80)) { + *c = utf8[0]; + return 1; + } else if ((utf8[0] & 0xe0) == 0xc0) { + len = 2; + unichar = utf8[0] & 0x1f; + } else if ((utf8[0] & 0xf0) == 0xe0) { + len = 3; + unichar = utf8[0] & 0x0f; + } else if ((utf8[0] & 0xf8) == 0xf0) { + len = 4; + unichar = utf8[0] & 0x07; + } else if ((utf8[0] & 0xfc) == 0xf8) { + len = 5; + unichar = utf8[0] & 0x03; + } else if ((utf8[0] & 0xfe) == 0xfc) { + len = 6; + unichar = utf8[0] & 0x01; + } else { + *c = UINT32_MAX; + return 1; + } + + if (len > n) { + *c = UINT32_MAX; + return len; + } + + for (unsigned i = 1; i < len; i++) { + if ((utf8[i] & 0xc0) != 0x80) { + *c = UINT32_MAX; + return len; + } + unichar <<= 6; + unichar |= utf8[i] & 0x3f; + } + + *c = unichar; + return len; +} + +/* Convert UTF-8 to UCS-2, skipping any invalid or short byte sequences. */ +char16_t *xstrn8_to_16(const char *str8, size_t n) { + if (!str8 || n == 0) + return NULL; + + size_t i = 0; + char16_t *str16 = xnew(char16_t, n + 1); + + while (n > 0 && *str8 != '\0') { + char32_t unichar; + + size_t utf8len = utf8_to_unichar(str8, n, &unichar); + str8 += utf8len; + n = LESS_BY(n, utf8len); + + switch (unichar) { + case 0 ... 0xd7ffU: + case 0xe000U ... 0xffffU: + str16[i++] = unichar; + break; + } + } + + str16[i] = '\0'; + return str16; +} + static bool efi_fnmatch_prefix(const char16_t *p, const char16_t *h, const char16_t **ret_p, const char16_t **ret_h) { assert(p); assert(h); diff --git a/src/boot/efi/efi-string.h b/src/boot/efi/efi-string.h index d4d76a7c18..e12add0b19 100644 --- a/src/boot/efi/efi-string.h +++ b/src/boot/efi/efi-string.h @@ -99,6 +99,11 @@ static inline char16_t *xstrdup16(const char16_t *s) { return xstrndup16(s, SIZE_MAX); } +char16_t *xstrn8_to_16(const char *str8, size_t n); +static inline char16_t *xstr8_to_16(const char *str8) { + return xstrn8_to_16(str8, strlen8(str8)); +} + bool efi_fnmatch(const char16_t *pattern, const char16_t *haystack); bool parse_number8(const char *s, uint64_t *ret_u, const char **ret_tail); diff --git a/src/boot/efi/linux.c b/src/boot/efi/linux.c index dd7eb48c8c..48801f9dd8 100644 --- a/src/boot/efi/linux.c +++ b/src/boot/efi/linux.c @@ -93,15 +93,16 @@ static EFI_STATUS load_image(EFI_HANDLE parent, const void *source, size_t len, EFI_STATUS linux_exec( EFI_HANDLE parent, - const char *cmdline, UINTN cmdline_len, - const void *linux_buffer, UINTN linux_length, - const void *initrd_buffer, UINTN initrd_length) { + const char16_t *cmdline, + const void *linux_buffer, + size_t linux_length, + const void *initrd_buffer, + size_t initrd_length) { uint32_t compat_address; EFI_STATUS err; assert(parent); - assert(cmdline || cmdline_len == 0); assert(linux_buffer && linux_length > 0); assert(initrd_buffer || initrd_length == 0); @@ -113,7 +114,6 @@ EFI_STATUS linux_exec( return linux_exec_efi_handover( parent, cmdline, - cmdline_len, linux_buffer, linux_length, initrd_buffer, @@ -133,7 +133,7 @@ EFI_STATUS linux_exec( return log_error_status_stall(err, u"Error getting kernel loaded image protocol: %r", err); if (cmdline) { - loaded_image->LoadOptions = xstra_to_str(cmdline); + loaded_image->LoadOptions = (void *) cmdline; loaded_image->LoadOptionsSize = strsize16(loaded_image->LoadOptions); } diff --git a/src/boot/efi/linux.h b/src/boot/efi/linux.h index 19e5f5c4a8..f0a6a37ed1 100644 --- a/src/boot/efi/linux.h +++ b/src/boot/efi/linux.h @@ -2,14 +2,19 @@ #pragma once #include <efi.h> +#include <uchar.h> EFI_STATUS linux_exec( EFI_HANDLE parent, - const char *cmdline, UINTN cmdline_len, - const void *linux_buffer, UINTN linux_length, - const void *initrd_buffer, UINTN initrd_length); + const char16_t *cmdline, + const void *linux_buffer, + size_t linux_length, + const void *initrd_buffer, + size_t initrd_length); EFI_STATUS linux_exec_efi_handover( EFI_HANDLE parent, - const char *cmdline, UINTN cmdline_len, - const void *linux_buffer, UINTN linux_length, - const void *initrd_buffer, UINTN initrd_length); + const char16_t *cmdline, + const void *linux_buffer, + size_t linux_length, + const void *initrd_buffer, + size_t initrd_length); diff --git a/src/boot/efi/linux_x86.c b/src/boot/efi/linux_x86.c index 64336ce348..6a5e431107 100644 --- a/src/boot/efi/linux_x86.c +++ b/src/boot/efi/linux_x86.c @@ -126,12 +126,13 @@ static void linux_efi_handover(EFI_HANDLE parent, uintptr_t kernel, BootParams * EFI_STATUS linux_exec_efi_handover( EFI_HANDLE parent, - const char *cmdline, UINTN cmdline_len, - const void *linux_buffer, UINTN linux_length, - const void *initrd_buffer, UINTN initrd_length) { + const char16_t *cmdline, + const void *linux_buffer, + size_t linux_length, + const void *initrd_buffer, + size_t initrd_length) { assert(parent); - assert(cmdline || cmdline_len == 0); assert(linux_buffer); assert(initrd_buffer || initrd_length == 0); @@ -185,14 +186,20 @@ EFI_STATUS linux_exec_efi_handover( _cleanup_pages_ Pages cmdline_pages = {}; if (cmdline) { + size_t len = MIN(strlen16(cmdline), image_params->hdr.cmdline_size); + cmdline_pages = xmalloc_pages( can_4g ? AllocateAnyPages : AllocateMaxAddress, EfiLoaderData, - EFI_SIZE_TO_PAGES(cmdline_len + 1), + EFI_SIZE_TO_PAGES(len + 1), CMDLINE_PTR_MAX); - memcpy(PHYSICAL_ADDRESS_TO_POINTER(cmdline_pages.addr), cmdline, cmdline_len); - ((char *) PHYSICAL_ADDRESS_TO_POINTER(cmdline_pages.addr))[cmdline_len] = 0; + /* Convert cmdline to ASCII. */ + char *cmdline8 = PHYSICAL_ADDRESS_TO_POINTER(cmdline_pages.addr); + for (size_t i = 0; i < len; i++) + cmdline8[i] = cmdline[i] <= 0x7E ? cmdline[i] : ' '; + cmdline8[len] = '\0'; + boot_params->hdr.cmd_line_ptr = (uint32_t) cmdline_pages.addr; boot_params->ext_cmd_line_ptr = cmdline_pages.addr >> 32; assert(can_4g || cmdline_pages.addr <= CMDLINE_PTR_MAX); diff --git a/src/boot/efi/measure.c b/src/boot/efi/measure.c index 9a16920787..6da07d917e 100644 --- a/src/boot/efi/measure.c +++ b/src/boot/efi/measure.c @@ -187,7 +187,7 @@ EFI_STATUS tpm_log_event_ascii(uint32_t pcrindex, EFI_PHYSICAL_ADDRESS buffer, U _cleanup_free_ char16_t *c = NULL; if (description) - c = xstra_to_str(description); + c = xstr8_to_16(description); return tpm_log_event(pcrindex, buffer, buffer_size, c, ret_measured); } diff --git a/src/boot/efi/missing_efi.h b/src/boot/efi/missing_efi.h index f9169248ec..b446e0399f 100644 --- a/src/boot/efi/missing_efi.h +++ b/src/boot/efi/missing_efi.h @@ -385,3 +385,35 @@ typedef struct _EFI_CONSOLE_CONTROL_PROTOCOL { { 0xd719b2cb, 0x3d3a, 0x4596, {0xa3, 0xbc, 0xda, 0xd0, 0xe, 0x67, 0x65, 0x6f }} #endif + +#ifndef EFI_SHELL_PARAMETERS_PROTOCOL_GUID +# define EFI_SHELL_PARAMETERS_PROTOCOL_GUID \ + { 0x752f3136, 0x4e16, 0x4fdc, { 0xa2, 0x2a, 0xe5, 0xf4, 0x68, 0x12, 0xf4, 0xca } } + +typedef struct { + CHAR16 **Argv; + UINTN Argc; + void *StdIn; + void *StdOut; + void *StdErr; +} EFI_SHELL_PARAMETERS_PROTOCOL; +#endif + +#ifndef EFI_BOOT_MANAGER_POLICY_PROTOCOL_GUID +#define EFI_BOOT_MANAGER_POLICY_PROTOCOL_GUID \ + { 0xFEDF8E0C, 0xE147, 0x11E3, { 0x99, 0x03, 0xB8, 0xE8, 0x56, 0x2C, 0xBA, 0xFA } } +#define EFI_BOOT_MANAGER_POLICY_CONSOLE_GUID \ + { 0xCAB0E94C, 0xE15F, 0x11E3, { 0x91, 0x8D, 0xB8, 0xE8, 0x56, 0x2C, 0xBA, 0xFA } } + +typedef struct EFI_BOOT_MANAGER_POLICY_PROTOCOL EFI_BOOT_MANAGER_POLICY_PROTOCOL; +struct EFI_BOOT_MANAGER_POLICY_PROTOCOL { + UINT64 Revision; + EFI_STATUS (EFIAPI *ConnectDevicePath)( + EFI_BOOT_MANAGER_POLICY_PROTOCOL *This, + EFI_DEVICE_PATH *DevicePath, + BOOLEAN Recursive); + EFI_STATUS (EFIAPI *ConnectDeviceClass)( + EFI_BOOT_MANAGER_POLICY_PROTOCOL *This, + EFI_GUID *Class); +}; +#endif diff --git a/src/boot/efi/part-discovery.c b/src/boot/efi/part-discovery.c index de6d6112a1..14479c06ea 100644 --- a/src/boot/efi/part-discovery.c +++ b/src/boot/efi/part-discovery.c @@ -202,6 +202,10 @@ static EFI_STATUS find_device(const EFI_GUID *type, EFI_HANDLE *device, EFI_DEVI if (err != EFI_SUCCESS) return err; + /* The drivers for other partitions on this drive may not be initialized on fastboot firmware, so we + * have to ask the firmware to do just that. */ + (void) BS->ConnectController(disk_handle, NULL, NULL, true); + err = BS->HandleProtocol(disk_handle, &BlockIoProtocol, (void **)&block_io); if (err != EFI_SUCCESS) return err; diff --git a/src/boot/efi/secure-boot.c b/src/boot/efi/secure-boot.c index 65457bf423..6212868134 100644 --- a/src/boot/efi/secure-boot.c +++ b/src/boot/efi/secure-boot.c @@ -6,7 +6,7 @@ #include "util.h" bool secure_boot_enabled(void) { - bool secure; + bool secure = false; /* avoid false maybe-uninitialized warning */ EFI_STATUS err; err = efivar_get_boolean_u8(EFI_GLOBAL_GUID, L"SecureBoot", &secure); diff --git a/src/boot/efi/splash.c b/src/boot/efi/splash.c index 5bc1084e62..25df97eb21 100644 --- a/src/boot/efi/splash.c +++ b/src/boot/efi/splash.c @@ -39,16 +39,11 @@ struct bmp_map { static EFI_STATUS bmp_parse_header( const uint8_t *bmp, - UINTN size, + size_t size, struct bmp_dib **ret_dib, struct bmp_map **ret_map, const uint8_t **pixmap) { - struct bmp_file *file; - struct bmp_dib *dib; - struct bmp_map *map; - UINTN row_size; - assert(bmp); assert(ret_dib); assert(ret_map); @@ -58,7 +53,7 @@ static EFI_STATUS bmp_parse_header( return EFI_INVALID_PARAMETER; /* check file header */ - file = (struct bmp_file *)bmp; + struct bmp_file *file = (struct bmp_file *) bmp; if (file->signature[0] != 'B' || file->signature[1] != 'M') return EFI_INVALID_PARAMETER; if (file->size != size) @@ -67,7 +62,7 @@ static EFI_STATUS bmp_parse_header( return EFI_INVALID_PARAMETER; /* check device-independent bitmap */ - dib = (struct bmp_dib *)(bmp + sizeof(struct bmp_file)); + struct bmp_dib *dib = (struct bmp_dib *) (bmp + sizeof(struct bmp_file)); if (dib->size < sizeof(struct bmp_dib)) return EFI_UNSUPPORTED; @@ -92,38 +87,26 @@ static EFI_STATUS bmp_parse_header( return EFI_UNSUPPORTED; } - row_size = ((UINTN) dib->depth * dib->x + 31) / 32 * 4; + size_t row_size = ((size_t) dib->depth * dib->x + 31) / 32 * 4; if (file->size - file->offset < dib->y * row_size) return EFI_INVALID_PARAMETER; if (row_size * dib->y > 64 * 1024 * 1024) return EFI_INVALID_PARAMETER; /* check color table */ - map = (struct bmp_map *)(bmp + sizeof(struct bmp_file) + dib->size); + struct bmp_map *map = (struct bmp_map *) (bmp + sizeof(struct bmp_file) + dib->size); if (file->offset < sizeof(struct bmp_file) + dib->size) return EFI_INVALID_PARAMETER; if (file->offset > sizeof(struct bmp_file) + dib->size) { - uint32_t map_count; - UINTN map_size; + uint32_t map_count = 0; if (dib->colors_used) map_count = dib->colors_used; - else { - switch (dib->depth) { - case 1: - case 4: - case 8: - map_count = 1 << dib->depth; - break; + else if (IN_SET(dib->depth, 1, 4, 8)) + map_count = 1 << dib->depth; - default: - map_count = 0; - break; - } - } - - map_size = file->offset - (sizeof(struct bmp_file) + dib->size); + size_t map_size = file->offset - (sizeof(struct bmp_file) + dib->size); if (map_size != sizeof(struct bmp_map) * map_count) return EFI_INVALID_PARAMETER; } @@ -135,28 +118,51 @@ static EFI_STATUS bmp_parse_header( return EFI_SUCCESS; } -static void pixel_blend(uint32_t *dst, const uint32_t source) { - uint32_t alpha, src, src_rb, src_g, dst_rb, dst_g, rb, g; - - assert(dst); - - alpha = (source & 0xff); - - /* convert src from RGBA to XRGB */ - src = source >> 8; +enum Channels { R, G, B, A, _CHANNELS_MAX }; +static void read_channel_maks( + const struct bmp_dib *dib, + uint32_t channel_mask[static _CHANNELS_MAX], + uint8_t channel_shift[static _CHANNELS_MAX], + uint8_t channel_scale[static _CHANNELS_MAX]) { - /* decompose into RB and G components */ - src_rb = (src & 0xff00ff); - src_g = (src & 0x00ff00); - - dst_rb = (*dst & 0xff00ff); - dst_g = (*dst & 0x00ff00); - - /* blend */ - rb = ((((src_rb - dst_rb) * alpha + 0x800080) >> 8) + dst_rb) & 0xff00ff; - g = ((((src_g - dst_g) * alpha + 0x008000) >> 8) + dst_g) & 0x00ff00; + assert(dib); - *dst = (rb | g); + if (IN_SET(dib->depth, 16, 32) && dib->size >= sizeof(*dib) + 3 * sizeof(uint32_t)) { + uint32_t *mask = (uint32_t *) ((uint8_t *) dib + sizeof(*dib)); + channel_mask[R] = mask[R]; + channel_mask[G] = mask[G]; + channel_mask[B] = mask[B]; + channel_shift[R] = __builtin_ctz(mask[R]); + channel_shift[G] = __builtin_ctz(mask[G]); + channel_shift[B] = __builtin_ctz(mask[B]); + channel_scale[R] = 0xff / ((1 << __builtin_popcount(mask[R])) - 1); + channel_scale[G] = 0xff / ((1 << __builtin_popcount(mask[G])) - 1); + channel_scale[B] = 0xff / ((1 << __builtin_popcount(mask[B])) - 1); + + if (dib->size >= sizeof(*dib) + 4 * sizeof(uint32_t) && mask[A] != 0) { + channel_mask[A] = mask[A]; + channel_shift[A] = __builtin_ctz(mask[A]); + channel_scale[A] = 0xff / ((1 << __builtin_popcount(mask[A])) - 1); + } else { + channel_mask[A] = 0; + channel_shift[A] = 0; + channel_scale[A] = 0; + } + } else { + bool bpp16 = dib->depth == 16; + channel_mask[R] = bpp16 ? 0x7C00 : 0xFF0000; + channel_mask[G] = bpp16 ? 0x03E0 : 0x00FF00; + channel_mask[B] = bpp16 ? 0x001F : 0x0000FF; + channel_mask[A] = bpp16 ? 0x0000 : 0x000000; + channel_shift[R] = bpp16 ? 0xA : 0x10; + channel_shift[G] = bpp16 ? 0x5 : 0x08; + channel_shift[B] = bpp16 ? 0x0 : 0x00; + channel_shift[A] = bpp16 ? 0x0 : 0x00; + channel_scale[R] = bpp16 ? 0x08 : 0x1; + channel_scale[G] = bpp16 ? 0x08 : 0x1; + channel_scale[B] = bpp16 ? 0x08 : 0x1; + channel_scale[A] = bpp16 ? 0x00 : 0x0; + } } static EFI_STATUS bmp_to_blt( @@ -172,17 +178,19 @@ static EFI_STATUS bmp_to_blt( assert(map); assert(pixmap); + uint32_t channel_mask[_CHANNELS_MAX]; + uint8_t channel_shift[_CHANNELS_MAX], channel_scale[_CHANNELS_MAX]; + read_channel_maks(dib, channel_mask, channel_shift, channel_scale); + /* transform and copy pixels */ in = pixmap; - for (UINTN y = 0; y < dib->y; y++) { - EFI_GRAPHICS_OUTPUT_BLT_PIXEL *out; - UINTN row_size; + for (uint32_t y = 0; y < dib->y; y++) { + EFI_GRAPHICS_OUTPUT_BLT_PIXEL *out = &buf[(dib->y - y - 1) * dib->x]; - out = &buf[(dib->y - y - 1) * dib->x]; - for (UINTN x = 0; x < dib->x; x++, in++, out++) { + for (uint32_t x = 0; x < dib->x; x++, in++, out++) { switch (dib->depth) { case 1: { - for (UINTN i = 0; i < 8 && x < dib->x; i++) { + for (unsigned i = 0; i < 8 && x < dib->x; i++) { out->Red = map[((*in) >> (7 - i)) & 1].red; out->Green = map[((*in) >> (7 - i)) & 1].green; out->Blue = map[((*in) >> (7 - i)) & 1].blue; @@ -195,9 +203,7 @@ static EFI_STATUS bmp_to_blt( } case 4: { - UINTN i; - - i = (*in) >> 4; + unsigned i = (*in) >> 4; out->Red = map[i].red; out->Green = map[i].green; out->Blue = map[i].blue; @@ -218,16 +224,6 @@ static EFI_STATUS bmp_to_blt( out->Blue = map[*in].blue; break; - case 16: { - uint16_t i = *(uint16_t *) in; - - out->Red = (i & 0x7c00) >> 7; - out->Green = (i & 0x3e0) >> 2; - out->Blue = (i & 0x1f) << 3; - in += 1; - break; - } - case 24: out->Red = in[2]; out->Green = in[1]; @@ -235,34 +231,42 @@ static EFI_STATUS bmp_to_blt( in += 2; break; + case 16: case 32: { - uint32_t i = *(uint32_t *) in; + uint32_t i = dib->depth == 16 ? *(uint16_t *) in : *(uint32_t *) in; + + uint8_t r = ((i & channel_mask[R]) >> channel_shift[R]) * channel_scale[R], + g = ((i & channel_mask[G]) >> channel_shift[G]) * channel_scale[G], + b = ((i & channel_mask[B]) >> channel_shift[B]) * channel_scale[B], + a = 0xFFu; + if (channel_mask[A] != 0) + a = ((i & channel_mask[A]) >> channel_shift[A]) * channel_scale[A]; - pixel_blend((uint32_t *)out, i); + out->Red = (out->Red * (0xFFu - a) + r * a) >> 8; + out->Green = (out->Green * (0xFFu - a) + g * a) >> 8; + out->Blue = (out->Blue * (0xFFu - a) + b * a) >> 8; - in += 3; + in += dib->depth == 16 ? 1 : 3; break; } } } /* add row padding; new lines always start at 32 bit boundary */ - row_size = in - pixmap; + size_t row_size = in - pixmap; in += ((row_size + 3) & ~3) - row_size; } return EFI_SUCCESS; } -EFI_STATUS graphics_splash(const uint8_t *content, UINTN len) { +EFI_STATUS graphics_splash(const uint8_t *content, size_t len) { EFI_GRAPHICS_OUTPUT_BLT_PIXEL background = {}; EFI_GRAPHICS_OUTPUT_PROTOCOL *GraphicsOutput = NULL; struct bmp_dib *dib; struct bmp_map *map; const uint8_t *pixmap; - _cleanup_free_ void *blt = NULL; - UINTN x_pos = 0; - UINTN y_pos = 0; + size_t x_pos = 0, y_pos = 0; EFI_STATUS err; if (len == 0) @@ -297,9 +301,9 @@ EFI_STATUS graphics_splash(const uint8_t *content, UINTN len) { if (err != EFI_SUCCESS) return err; - /* EFI buffer */ - blt = xnew(EFI_GRAPHICS_OUTPUT_BLT_PIXEL, dib->x * dib->y); - + /* Read in current screen content to perform proper alpha blending. */ + _cleanup_free_ EFI_GRAPHICS_OUTPUT_BLT_PIXEL *blt = xnew( + EFI_GRAPHICS_OUTPUT_BLT_PIXEL, dib->x * dib->y); err = GraphicsOutput->Blt( GraphicsOutput, blt, EfiBltVideoToBltBuffer, x_pos, y_pos, 0, 0, diff --git a/src/boot/efi/stub.c b/src/boot/efi/stub.c index 6ece3cf733..023f8ae255 100644 --- a/src/boot/efi/stub.c +++ b/src/boot/efi/stub.c @@ -133,16 +133,62 @@ static void export_variables(EFI_LOADED_IMAGE_PROTOCOL *loaded_image) { (void) efivar_set_uint64_le(LOADER_GUID, L"StubFeatures", stub_features, 0); } +static bool use_load_options( + EFI_HANDLE stub_image, + EFI_LOADED_IMAGE_PROTOCOL *loaded_image, + bool have_cmdline, + char16_t **ret) { + + assert(stub_image); + assert(loaded_image); + assert(ret); + + /* We only allow custom command lines if we aren't in secure boot or if no cmdline was baked into + * the stub image. */ + if (secure_boot_enabled() && have_cmdline) + return false; + + /* We also do a superficial check whether first character of passed command line + * is printable character (for compat with some Dell systems which fill in garbage?). */ + if (loaded_image->LoadOptionsSize < sizeof(char16_t) || ((char16_t *) loaded_image->LoadOptions)[0] <= 0x1F) + return false; + + /* The UEFI shell registers EFI_SHELL_PARAMETERS_PROTOCOL onto images it runs. This lets us know that + * LoadOptions starts with the stub binary path which we want to strip off. */ + EFI_SHELL_PARAMETERS_PROTOCOL *shell; + if (BS->HandleProtocol(stub_image, &(EFI_GUID) EFI_SHELL_PARAMETERS_PROTOCOL_GUID, (void **) &shell) + != EFI_SUCCESS) { + /* Not running from EFI shell, use entire LoadOptions. Note that LoadOptions is a void*, so + * it could be anything! */ + *ret = xstrndup16(loaded_image->LoadOptions, loaded_image->LoadOptionsSize / sizeof(char16_t)); + mangle_stub_cmdline(*ret); + return true; + } + + if (shell->Argc < 2) + /* No arguments were provided? Then we fall back to built-in cmdline. */ + return false; + + /* Assemble the command line ourselves without our stub path. */ + *ret = xstrdup16(shell->Argv[1]); + for (size_t i = 2; i < shell->Argc; i++) { + _cleanup_free_ char16_t *old = *ret; + *ret = xpool_print(u"%s %s", old, shell->Argv[i]); + } + + mangle_stub_cmdline(*ret); + return true; +} + EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) { _cleanup_free_ void *credential_initrd = NULL, *global_credential_initrd = NULL, *sysext_initrd = NULL, *pcrsig_initrd = NULL, *pcrpkey_initrd = NULL; - UINTN credential_initrd_size = 0, global_credential_initrd_size = 0, sysext_initrd_size = 0, pcrsig_initrd_size = 0, pcrpkey_initrd_size = 0; - UINTN cmdline_len = 0, linux_size, initrd_size, dt_size; + size_t credential_initrd_size = 0, global_credential_initrd_size = 0, sysext_initrd_size = 0, pcrsig_initrd_size = 0, pcrpkey_initrd_size = 0; + size_t linux_size, initrd_size, dt_size; EFI_PHYSICAL_ADDRESS linux_base, initrd_base, dt_base; _cleanup_(devicetree_cleanup) struct devicetree_state dt_state = {}; EFI_LOADED_IMAGE_PROTOCOL *loaded_image; - UINTN addrs[_UNIFIED_SECTION_MAX] = {}, szs[_UNIFIED_SECTION_MAX] = {}; - char *cmdline = NULL; - _cleanup_free_ char *cmdline_owned = NULL; + size_t addrs[_UNIFIED_SECTION_MAX] = {}, szs[_UNIFIED_SECTION_MAX] = {}; + _cleanup_free_ char16_t *cmdline = NULL; int sections_measured = -1, parameters_measured = -1; bool sysext_measured = false, m; uint64_t loader_features = 0; @@ -221,32 +267,19 @@ EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) { /* Show splash screen as early as possible */ graphics_splash((const uint8_t*) loaded_image->ImageBase + addrs[UNIFIED_SECTION_SPLASH], szs[UNIFIED_SECTION_SPLASH]); - if (szs[UNIFIED_SECTION_CMDLINE] > 0) { - cmdline = (char *) loaded_image->ImageBase + addrs[UNIFIED_SECTION_CMDLINE]; - cmdline_len = szs[UNIFIED_SECTION_CMDLINE]; - } - - /* if we are not in secure boot mode, or none was provided, accept a custom command line and replace - * the built-in one. We also do a superficial check whether first character of passed command line - * is printable character (for compat with some Dell systems which fill in garbage?). */ - if ((!secure_boot_enabled() || cmdline_len == 0) && - loaded_image->LoadOptionsSize > 0 && - ((char16_t *) loaded_image->LoadOptions)[0] > 0x1F) { - cmdline_len = (loaded_image->LoadOptionsSize / sizeof(char16_t)) * sizeof(char); - cmdline = cmdline_owned = xnew(char, cmdline_len); - - for (UINTN i = 0; i < cmdline_len; i++) { - char16_t c = ((char16_t *) loaded_image->LoadOptions)[i]; - cmdline[i] = c > 0x1F && c < 0x7F ? c : ' '; /* convert non-printable and non_ASCII characters to spaces. */ - } - + if (use_load_options(image, loaded_image, szs[UNIFIED_SECTION_CMDLINE] > 0, &cmdline)) { /* Let's measure the passed kernel command line into the TPM. Note that this possibly * duplicates what we already did in the boot menu, if that was already used. However, since * we want the boot menu to support an EFI binary, and want to this stub to be usable from * any boot menu, let's measure things anyway. */ m = false; - (void) tpm_log_load_options(loaded_image->LoadOptions, &m); + (void) tpm_log_load_options(cmdline, &m); parameters_measured = m; + } else if (szs[UNIFIED_SECTION_CMDLINE] > 0) { + cmdline = xstrn8_to_16( + (char *) loaded_image->ImageBase + addrs[UNIFIED_SECTION_CMDLINE], + szs[UNIFIED_SECTION_CMDLINE]); + mangle_stub_cmdline(cmdline); } export_variables(loaded_image); @@ -387,7 +420,7 @@ EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) { log_error_stall(L"Error loading embedded devicetree: %r", err); } - err = linux_exec(image, cmdline, cmdline_len, + err = linux_exec(image, cmdline, PHYSICAL_ADDRESS_TO_POINTER(linux_base), linux_size, PHYSICAL_ADDRESS_TO_POINTER(initrd_base), initrd_size); graphics_mode(false); diff --git a/src/boot/efi/test-efi-string.c b/src/boot/efi/test-efi-string.c index 2b2359fe5c..7b43e1d629 100644 --- a/src/boot/efi/test-efi-string.c +++ b/src/boot/efi/test-efi-string.c @@ -324,6 +324,33 @@ TEST(xstrdup16) { free(s); } +TEST(xstrn8_to_16) { + char16_t *s = NULL; + + assert_se(xstrn8_to_16(NULL, 1) == NULL); + assert_se(xstrn8_to_16("a", 0) == NULL); + + assert_se(s = xstrn8_to_16("", 1)); + assert_se(streq16(s, u"")); + free(s); + + assert_se(s = xstrn8_to_16("1", 1)); + assert_se(streq16(s, u"1")); + free(s); + + assert_se(s = xstr8_to_16("abcxyzABCXYZ09 .,-_#*!\"§$%&/()=?`~")); + assert_se(streq16(s, u"abcxyzABCXYZ09 .,-_#*!\"§$%&/()=?`~")); + free(s); + + assert_se(s = xstr8_to_16("ÿⱿ𝇉 😺")); + assert_se(streq16(s, u"ÿⱿ ")); + free(s); + + assert_se(s = xstrn8_to_16("¶¶", 3)); + assert_se(streq16(s, u"¶")); + free(s); +} + #define TEST_FNMATCH_ONE(pattern, haystack, expect) \ ({ \ assert_se(fnmatch(pattern, haystack, 0) == (expect ? 0 : FNM_NOMATCH)); \ diff --git a/src/boot/efi/util.c b/src/boot/efi/util.c index 57436dbf0c..1f07fbc38c 100644 --- a/src/boot/efi/util.c +++ b/src/boot/efi/util.c @@ -91,7 +91,7 @@ EFI_STATUS efivar_set_uint64_le(const EFI_GUID *vendor, const char16_t *name, ui return efivar_set_raw(vendor, name, buf, sizeof(buf), flags); } -EFI_STATUS efivar_get(const EFI_GUID *vendor, const char16_t *name, char16_t **value) { +EFI_STATUS efivar_get(const EFI_GUID *vendor, const char16_t *name, char16_t **ret) { _cleanup_free_ char16_t *buf = NULL; EFI_STATUS err; char16_t *val; @@ -108,12 +108,12 @@ EFI_STATUS efivar_get(const EFI_GUID *vendor, const char16_t *name, char16_t **v if ((size % sizeof(char16_t)) != 0) return EFI_INVALID_PARAMETER; - if (!value) + if (!ret) return EFI_SUCCESS; /* Return buffer directly if it happens to be NUL terminated already */ if (size >= sizeof(char16_t) && buf[size / sizeof(char16_t) - 1] == 0) { - *value = TAKE_PTR(buf); + *ret = TAKE_PTR(buf); return EFI_SUCCESS; } @@ -123,18 +123,17 @@ EFI_STATUS efivar_get(const EFI_GUID *vendor, const char16_t *name, char16_t **v memcpy(val, buf, size); val[size / sizeof(char16_t) - 1] = 0; /* NUL terminate */ - *value = val; + *ret = val; return EFI_SUCCESS; } -EFI_STATUS efivar_get_uint_string(const EFI_GUID *vendor, const char16_t *name, UINTN *i) { +EFI_STATUS efivar_get_uint_string(const EFI_GUID *vendor, const char16_t *name, UINTN *ret) { _cleanup_free_ char16_t *val = NULL; EFI_STATUS err; uint64_t u; assert(vendor); assert(name); - assert(i); err = efivar_get(vendor, name, &val); if (err != EFI_SUCCESS) @@ -143,7 +142,8 @@ EFI_STATUS efivar_get_uint_string(const EFI_GUID *vendor, const char16_t *name, if (!parse_number16(val, &u, NULL) || u > UINTN_MAX) return EFI_INVALID_PARAMETER; - *i = u; + if (ret) + *ret = u; return EFI_SUCCESS; } @@ -156,15 +156,17 @@ EFI_STATUS efivar_get_uint32_le(const EFI_GUID *vendor, const char16_t *name, ui assert(name); err = efivar_get_raw(vendor, name, &buf, &size); - if (err == EFI_SUCCESS && ret) { - if (size != sizeof(uint32_t)) - return EFI_BUFFER_TOO_SMALL; + if (err != EFI_SUCCESS) + return err; + + if (size != sizeof(uint32_t)) + return EFI_BUFFER_TOO_SMALL; + if (ret) *ret = (uint32_t) buf[0] << 0U | (uint32_t) buf[1] << 8U | (uint32_t) buf[2] << 16U | (uint32_t) buf[3] << 24U; - } - return err; + return EFI_SUCCESS; } EFI_STATUS efivar_get_uint64_le(const EFI_GUID *vendor, const char16_t *name, uint64_t *ret) { @@ -176,19 +178,21 @@ EFI_STATUS efivar_get_uint64_le(const EFI_GUID *vendor, const char16_t *name, ui assert(name); err = efivar_get_raw(vendor, name, &buf, &size); - if (err == EFI_SUCCESS && ret) { - if (size != sizeof(uint64_t)) - return EFI_BUFFER_TOO_SMALL; + if (err != EFI_SUCCESS) + return err; + + if (size != sizeof(uint64_t)) + return EFI_BUFFER_TOO_SMALL; + if (ret) *ret = (uint64_t) buf[0] << 0U | (uint64_t) buf[1] << 8U | (uint64_t) buf[2] << 16U | (uint64_t) buf[3] << 24U | (uint64_t) buf[4] << 32U | (uint64_t) buf[5] << 40U | (uint64_t) buf[6] << 48U | (uint64_t) buf[7] << 56U; - } - return err; + return EFI_SUCCESS; } -EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const char16_t *name, char **buffer, UINTN *size) { +EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const char16_t *name, char **ret, UINTN *ret_size) { _cleanup_free_ char *buf = NULL; UINTN l; EFI_STATUS err; @@ -200,16 +204,15 @@ EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const char16_t *name, char **b buf = xmalloc(l); err = RT->GetVariable((char16_t *) name, (EFI_GUID *) vendor, NULL, &l, buf); - if (err == EFI_SUCCESS) { - - if (buffer) - *buffer = TAKE_PTR(buf); + if (err != EFI_SUCCESS) + return err; - if (size) - *size = l; - } + if (ret) + *ret = TAKE_PTR(buf); + if (ret_size) + *ret_size = l; - return err; + return EFI_SUCCESS; } EFI_STATUS efivar_get_boolean_u8(const EFI_GUID *vendor, const char16_t *name, bool *ret) { @@ -219,13 +222,15 @@ EFI_STATUS efivar_get_boolean_u8(const EFI_GUID *vendor, const char16_t *name, b assert(vendor); assert(name); - assert(ret); err = efivar_get_raw(vendor, name, &b, &size); - if (err == EFI_SUCCESS) + if (err != EFI_SUCCESS) + return err; + + if (ret) *ret = *b > 0; - return err; + return EFI_SUCCESS; } void efivar_set_time_usec(const EFI_GUID *vendor, const char16_t *name, uint64_t usec) { @@ -244,127 +249,36 @@ void efivar_set_time_usec(const EFI_GUID *vendor, const char16_t *name, uint64_t efivar_set(vendor, name, str, 0); } -static int utf8_to_16(const char *stra, char16_t *c) { - char16_t unichar; - UINTN len; - - assert(stra); - assert(c); - - if (!(stra[0] & 0x80)) - len = 1; - else if ((stra[0] & 0xe0) == 0xc0) - len = 2; - else if ((stra[0] & 0xf0) == 0xe0) - len = 3; - else if ((stra[0] & 0xf8) == 0xf0) - len = 4; - else if ((stra[0] & 0xfc) == 0xf8) - len = 5; - else if ((stra[0] & 0xfe) == 0xfc) - len = 6; - else - return -1; - - switch (len) { - case 1: - unichar = stra[0]; - break; - case 2: - unichar = stra[0] & 0x1f; - break; - case 3: - unichar = stra[0] & 0x0f; - break; - case 4: - unichar = stra[0] & 0x07; - break; - case 5: - unichar = stra[0] & 0x03; - break; - case 6: - unichar = stra[0] & 0x01; - break; - } - - for (UINTN i = 1; i < len; i++) { - if ((stra[i] & 0xc0) != 0x80) - return -1; - unichar <<= 6; - unichar |= stra[i] & 0x3f; - } - - *c = unichar; - return len; -} - -char16_t *xstra_to_str(const char *stra) { - UINTN strlen; - UINTN len; - UINTN i; - char16_t *str; - - assert(stra); +void convert_efi_path(char16_t *path) { + assert(path); - len = strlen8(stra); - str = xnew(char16_t, len + 1); + for (size_t i = 0, fixed = 0;; i++) { + /* Fix device path node separator. */ + path[fixed] = (path[i] == '/') ? '\\' : path[i]; - strlen = 0; - i = 0; - while (i < len) { - int utf8len; - - utf8len = utf8_to_16(stra + i, str + strlen); - if (utf8len <= 0) { - /* invalid utf8 sequence, skip the garbage */ - i++; + /* Double '\' is not allowed in EFI file paths. */ + if (fixed > 0 && path[fixed - 1] == '\\' && path[fixed] == '\\') continue; - } - strlen++; - i += utf8len; + if (path[i] == '\0') + break; + + fixed++; } - str[strlen] = '\0'; - return str; } -char16_t *xstra_to_path(const char *stra) { - char16_t *str; - UINTN strlen; - UINTN len; - UINTN i; - - assert(stra); - - len = strlen8(stra); - str = xnew(char16_t, len + 2); - - str[0] = '\\'; - strlen = 1; - i = 0; - while (i < len) { - int utf8len; - - utf8len = utf8_to_16(stra + i, str + strlen); - if (utf8len <= 0) { - /* invalid utf8 sequence, skip the garbage */ - i++; - continue; - } - - if (str[strlen] == '/') - str[strlen] = '\\'; - if (str[strlen] == '\\' && str[strlen-1] == '\\') { - /* skip double slashes */ - i += utf8len; - continue; - } +char16_t *xstr8_to_path(const char *str8) { + assert(str8); + char16_t *path = xstr8_to_16(str8); + convert_efi_path(path); + return path; +} - strlen++; - i += utf8len; - } - str[strlen] = '\0'; - return str; +void mangle_stub_cmdline(char16_t *cmdline) { + for (; *cmdline != '\0'; cmdline++) + /* Convert ASCII control characters to spaces. */ + if (*cmdline <= 0x1F) + *cmdline = ' '; } EFI_STATUS file_read(EFI_FILE *dir, const char16_t *name, UINTN off, UINTN size, char **ret, UINTN *ret_size) { diff --git a/src/boot/efi/util.h b/src/boot/efi/util.h index 4c5b6cab13..fb525ba636 100644 --- a/src/boot/efi/util.h +++ b/src/boot/efi/util.h @@ -130,15 +130,16 @@ EFI_STATUS efivar_set_uint32_le(const EFI_GUID *vendor, const char16_t *NAME, ui EFI_STATUS efivar_set_uint64_le(const EFI_GUID *vendor, const char16_t *name, uint64_t value, uint32_t flags); void efivar_set_time_usec(const EFI_GUID *vendor, const char16_t *name, uint64_t usec); -EFI_STATUS efivar_get(const EFI_GUID *vendor, const char16_t *name, char16_t **value); -EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const char16_t *name, char **buffer, UINTN *size); -EFI_STATUS efivar_get_uint_string(const EFI_GUID *vendor, const char16_t *name, UINTN *i); +EFI_STATUS efivar_get(const EFI_GUID *vendor, const char16_t *name, char16_t **ret); +EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const char16_t *name, char **ret, UINTN *ret_size); +EFI_STATUS efivar_get_uint_string(const EFI_GUID *vendor, const char16_t *name, UINTN *ret); EFI_STATUS efivar_get_uint32_le(const EFI_GUID *vendor, const char16_t *name, uint32_t *ret); EFI_STATUS efivar_get_uint64_le(const EFI_GUID *vendor, const char16_t *name, uint64_t *ret); EFI_STATUS efivar_get_boolean_u8(const EFI_GUID *vendor, const char16_t *name, bool *ret); -char16_t *xstra_to_path(const char *stra); -char16_t *xstra_to_str(const char *stra); +void convert_efi_path(char16_t *path); +char16_t *xstr8_to_path(const char *stra); +void mangle_stub_cmdline(char16_t *cmdline); EFI_STATUS file_read(EFI_FILE *dir, const char16_t *name, UINTN off, UINTN size, char **content, UINTN *content_size); diff --git a/src/boot/efi/vmm.c b/src/boot/efi/vmm.c index b1bfd778fc..2260b217b7 100644 --- a/src/boot/efi/vmm.c +++ b/src/boot/efi/vmm.c @@ -83,6 +83,10 @@ EFI_STATUS vmm_open(EFI_HANDLE *ret_vmm_dev, EFI_FILE **ret_vmm_dir) { assert(ret_vmm_dev); assert(ret_vmm_dir); + /* Make sure all file systems have been initialized. Only do this in VMs as this is slow + * on some real firmwares. */ + (void) reconnect_all_drivers(); + /* find all file system handles */ err = BS->LocateHandleBuffer(ByProtocol, &FileSystemProtocol, NULL, &n_handles, &handles); if (err != EFI_SUCCESS) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index cf51024dcb..5d82d656bc 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -56,6 +56,13 @@ typedef struct Group { uint64_t io_input_bps, io_output_bps; } Group; +/* Counted objects, enum order matters */ +typedef enum PidsCount { + COUNT_USERSPACE_PROCESSES, /* least */ + COUNT_ALL_PROCESSES, + COUNT_PIDS, /* most, requires pids controller */ +} PidsCount; + static unsigned arg_depth = 3; static unsigned arg_iterations = UINT_MAX; static bool arg_batch = false; @@ -66,11 +73,7 @@ static char* arg_root = NULL; static bool arg_recursive = true; static bool arg_recursive_unset = false; -static enum { - COUNT_PIDS, - COUNT_USERSPACE_PROCESSES, - COUNT_ALL_PROCESSES, -} arg_count = COUNT_PIDS; +static PidsCount arg_count = COUNT_PIDS; static enum { ORDER_PATH, @@ -916,6 +919,7 @@ static int run(int argc, char *argv[]) { usec_t last_refresh = 0; bool quit = false, immediate_refresh = false; _cleanup_free_ char *root = NULL; + PidsCount possible_count; CGroupMask mask; int r; @@ -929,7 +933,9 @@ static int run(int argc, char *argv[]) { if (r < 0) return log_error_errno(r, "Failed to determine supported controllers: %m"); - arg_count = (mask & CGROUP_MASK_PIDS) ? COUNT_PIDS : COUNT_USERSPACE_PROCESSES; + /* honor user selection unless pids controller is unavailable */ + possible_count = (mask & CGROUP_MASK_PIDS) ? COUNT_PIDS : COUNT_ALL_PROCESSES; + arg_count = MIN(possible_count, arg_count); if (arg_recursive_unset && arg_count == COUNT_PIDS) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index ab2617153a..8d7b1f60da 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -2425,6 +2425,7 @@ static int method_disable_unit_files_generic( sd_bus_message *message, Manager *m, int (*call)(LookupScope scope, UnitFileFlags flags, const char *root_dir, char *files[], InstallChange **changes, size_t *n_changes), + bool carries_install_info, sd_bus_error *error) { _cleanup_strv_free_ char **l = NULL; @@ -2440,7 +2441,8 @@ static int method_disable_unit_files_generic( if (r < 0) return r; - if (sd_bus_message_is_method_call(message, NULL, "DisableUnitFilesWithFlags")) { + if (sd_bus_message_is_method_call(message, NULL, "DisableUnitFilesWithFlags") || + sd_bus_message_is_method_call(message, NULL, "DisableUnitFilesWithFlagsAndInstallInfo")) { uint64_t raw_flags; r = sd_bus_message_read(message, "t", &raw_flags); @@ -2469,19 +2471,23 @@ static int method_disable_unit_files_generic( if (r < 0) return install_error(error, r, changes, n_changes); - return reply_install_changes_and_free(m, message, -1, changes, n_changes, error); + return reply_install_changes_and_free(m, message, carries_install_info ? r : -1, changes, n_changes, error); } static int method_disable_unit_files_with_flags(sd_bus_message *message, void *userdata, sd_bus_error *error) { - return method_disable_unit_files_generic(message, userdata, unit_file_disable, error); + return method_disable_unit_files_generic(message, userdata, unit_file_disable, /* carries_install_info = */ false, error); +} + +static int method_disable_unit_files_with_flags_and_install_info(sd_bus_message *message, void *userdata, sd_bus_error *error) { + return method_disable_unit_files_generic(message, userdata, unit_file_disable, /* carries_install_info = */ true, error); } static int method_disable_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) { - return method_disable_unit_files_generic(message, userdata, unit_file_disable, error); + return method_disable_unit_files_generic(message, userdata, unit_file_disable, /* carries_install_info = */ false, error); } static int method_unmask_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) { - return method_disable_unit_files_generic(message, userdata, unit_file_unmask, error); + return method_disable_unit_files_generic(message, userdata, unit_file_unmask, /* carries_install_info = */ false, error); } static int method_revert_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) { @@ -2623,11 +2629,10 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + Manager *m = ASSERT_PTR(userdata); InstallChange *changes = NULL; size_t n_changes = 0, i; - UnitFileFlags flags; const char *name; - char **p; int runtime, r; r = sd_bus_message_read(message, "sb", &name, &runtime); @@ -2642,11 +2647,9 @@ static int method_get_unit_file_links(sd_bus_message *message, void *userdata, s if (r < 0) return r; - p = STRV_MAKE(name); - flags = UNIT_FILE_DRY_RUN | - (runtime ? UNIT_FILE_RUNTIME : 0); - - r = unit_file_disable(LOOKUP_SCOPE_SYSTEM, flags, NULL, p, &changes, &n_changes); + r = unit_file_disable(m->unit_file_scope, + UNIT_FILE_DRY_RUN | (runtime ? UNIT_FILE_RUNTIME : 0), + NULL, STRV_MAKE(name), &changes, &n_changes); if (r < 0) { log_error_errno(r, "Failed to get file links for %s: %m", name); goto finish; @@ -3194,6 +3197,11 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_RESULT("a(sss)", changes), method_disable_unit_files_with_flags, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD_WITH_ARGS("DisableUnitFilesWithFlagsAndInstallInfo", + SD_BUS_ARGS("as", files, "t", flags), + SD_BUS_RESULT("b", carries_install_info, "a(sss)", changes), + method_disable_unit_files_with_flags_and_install_info, + SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD_WITH_ARGS("ReenableUnitFiles", SD_BUS_ARGS("as", files, "b", runtime, "b", force), SD_BUS_RESULT("b", carries_install_info, "a(sss)", changes), diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index 7d2ceb0765..7b07bb8bb9 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -5,6 +5,7 @@ #include "bus-get-properties.h" #include "dbus-cgroup.h" #include "dbus-kill.h" +#include "dbus-manager.h" #include "dbus-scope.h" #include "dbus-unit.h" #include "dbus-util.h" @@ -39,6 +40,7 @@ int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_err } static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, scope_result, ScopeResult); +static BUS_DEFINE_SET_TRANSIENT_PARSE(oom_policy, OOMPolicy, oom_policy_from_string); const sd_bus_vtable bus_scope_vtable[] = { SD_BUS_VTABLE_START(0), @@ -47,6 +49,7 @@ const sd_bus_vtable bus_scope_vtable[] = { SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Scope, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Scope, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeRandomizedExtraUSec", "t", bus_property_get_usec, offsetof(Scope, runtime_rand_extra_usec), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("OOMPolicy", "s", bus_property_get_oom_policy, offsetof(Scope, oom_policy), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_SIGNAL("RequestStop", NULL, 0), SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_method_abandon, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_VTABLE_END @@ -77,6 +80,9 @@ static int bus_scope_set_transient_property( if (streq(name, "RuntimeRandomizedExtraUSec")) return bus_set_transient_usec(u, name, &s->runtime_rand_extra_usec, message, flags, error); + if (streq(name, "OOMPolicy")) + return bus_set_transient_oom_policy(u, name, &s->oom_policy, message, flags, error); + if (streq(name, "PIDs")) { _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; unsigned n = 0; diff --git a/src/core/device.c b/src/core/device.c index 224fc90835..6e07f2745b 100644 --- a/src/core/device.c +++ b/src/core/device.c @@ -135,6 +135,7 @@ static void device_done(Unit *u) { assert(d); device_unset_sysfs(d); + d->deserialized_sysfs = mfree(d->deserialized_sysfs); d->wants_property = strv_free(d->wants_property); d->path = mfree(d->path); } @@ -267,7 +268,7 @@ static int device_coldplug(Unit *u) { * 1. MANAGER_IS_RUNNING() == false * 2. enumerate devices: manager_enumerate() -> device_enumerate() * Device.enumerated_found is set. - * 3. deserialize devices: manager_deserialize() -> device_deserialize() + * 3. deserialize devices: manager_deserialize() -> device_deserialize_item() * Device.deserialize_state and Device.deserialized_found are set. * 4. coldplug devices: manager_coldplug() -> device_coldplug() * deserialized properties are copied to the main properties. @@ -282,23 +283,41 @@ static int device_coldplug(Unit *u) { * * - On switch-root, the udev database may be cleared, except for devices with sticky bit, i.e. * OPTIONS="db_persist". Hence, almost no devices are enumerated in the step 2. However, in - * general, we have several serialized devices. So, DEVICE_FOUND_UDEV bit in the deserialized_found - * must be ignored, as udev rules in initrd and the main system are often different. If the - * deserialized state is DEVICE_PLUGGED, we need to downgrade it to DEVICE_TENTATIVE. Unlike the - * other starting mode, MANAGER_IS_SWITCHING_ROOT() is true when device_coldplug() and - * device_catchup() are called. Hence, let's conditionalize the operations by using the - * flag. After switch-root, systemd-udevd will (re-)process all devices, and the Device.found and - * Device.state will be adjusted. + * general, we have several serialized devices. So, DEVICE_FOUND_UDEV bit in the + * Device.deserialized_found must be ignored, as udev rules in initrd and the main system are often + * different. If the deserialized state is DEVICE_PLUGGED, we need to downgrade it to + * DEVICE_TENTATIVE. Unlike the other starting mode, MANAGER_IS_SWITCHING_ROOT() is true when + * device_coldplug() and device_catchup() are called. Hence, let's conditionalize the operations by + * using the flag. After switch-root, systemd-udevd will (re-)process all devices, and the + * Device.found and Device.state will be adjusted. * - * - On reload or reexecute, we can trust enumerated_found, deserialized_found, and deserialized_state. - * Of course, deserialized parameters may be outdated, but the unit state can be adjusted later by - * device_catchup() or uevents. */ + * - On reload or reexecute, we can trust Device.enumerated_found, Device.deserialized_found, and + * Device.deserialized_state. Of course, deserialized parameters may be outdated, but the unit + * state can be adjusted later by device_catchup() or uevents. */ if (MANAGER_IS_SWITCHING_ROOT(m) && !FLAGS_SET(d->enumerated_found, DEVICE_FOUND_UDEV)) { - found &= ~DEVICE_FOUND_UDEV; /* ignore DEVICE_FOUND_UDEV bit */ + + /* The device has not been enumerated. On switching-root, such situation is natural. See the + * above comment. To prevent problematic state transition active → dead → active, let's + * drop the DEVICE_FOUND_UDEV flag and downgrade state to DEVICE_TENTATIVE(activating). See + * issue #12953 and #23208. */ + found &= ~DEVICE_FOUND_UDEV; if (state == DEVICE_PLUGGED) - state = DEVICE_TENTATIVE; /* downgrade state */ + state = DEVICE_TENTATIVE; + + /* Also check the validity of the device syspath. Without this check, if the device was + * removed while switching root, it would never go to inactive state, as both Device.found + * and Device.enumerated_found do not have the DEVICE_FOUND_UDEV flag, so device_catchup() in + * device_update_found_one() does nothing in most cases. See issue #25106. Note that the + * syspath field is only serialized when systemd is sufficiently new and the device has been + * already processed by udevd. */ + if (d->deserialized_sysfs) { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + + if (sd_device_new_from_syspath(&dev, d->deserialized_sysfs) < 0) + state = DEVICE_DEAD; + } } if (d->found == found && d->state == state) @@ -387,6 +406,9 @@ static int device_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); + if (d->sysfs) + (void) serialize_item(f, "sysfs", d->sysfs); + if (d->path) (void) serialize_item(f, "path", d->path); @@ -408,7 +430,14 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value, assert(value); assert(fds); - if (streq(key, "path")) { + if (streq(key, "sysfs")) { + if (!d->deserialized_sysfs) { + d->deserialized_sysfs = strdup(value); + if (!d->deserialized_sysfs) + log_oom_debug(); + } + + } else if (streq(key, "path")) { if (!d->path) { d->path = strdup(value); if (!d->path) diff --git a/src/core/device.h b/src/core/device.h index 7584bc70c4..9dd6fb57c2 100644 --- a/src/core/device.h +++ b/src/core/device.h @@ -20,7 +20,7 @@ typedef enum DeviceFound { struct Device { Unit meta; - char *sysfs; + char *sysfs, *deserialized_sysfs; char *path; /* syspath, device node, alias, or devlink */ /* In order to be able to distinguish dependencies on different device nodes we might end up creating multiple diff --git a/src/core/main.c b/src/core/main.c index 119c518664..9c1de3624c 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -1184,9 +1184,10 @@ static void bump_file_max_and_nr_open(void) { #if BUMP_PROC_SYS_FS_FILE_MAX /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were * different, but the operation would fail silently.) */ - r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX); + r = sysctl_write("fs/file-max", LONG_MAX_STR); if (r < 0) - log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m"); + log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, + r, "Failed to bump fs.file-max, ignoring: %m"); #endif #if BUMP_PROC_SYS_FS_NR_OPEN @@ -1218,7 +1219,7 @@ static void bump_file_max_and_nr_open(void) { break; } - r = sysctl_writef("fs/nr_open", "%i\n", v); + r = sysctl_writef("fs/nr_open", "%i", v); if (r == -EINVAL) { log_debug("Couldn't write fs.nr_open as %i, halving it.", v); v /= 2; @@ -1404,8 +1405,7 @@ static int bump_unix_max_dgram_qlen(void) { if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN) return 0; - r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, - "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN); + r = sysctl_write("net/unix/max_dgram_qlen", STRINGIFY(DEFAULT_UNIX_MAX_DGRAM_QLEN)); if (r < 0) return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump AF_UNIX datagram queue length, ignoring: %m"); diff --git a/src/core/mount.c b/src/core/mount.c index be46e56689..f16e5c487b 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -1909,6 +1909,7 @@ static void mount_enumerate(Manager *m) { mnt_init_debug(0); if (!m->mount_monitor) { + unsigned mount_rate_limit_burst = 5; int fd; m->mount_monitor = mnt_new_monitor(); @@ -1948,7 +1949,15 @@ static void mount_enumerate(Manager *m) { goto fail; } - r = sd_event_source_set_ratelimit(m->mount_event_source, 1 * USEC_PER_SEC, 5); + /* Let users override the default (5 in 1s), as it stalls the boot sequence on busy systems. */ + const char *e = secure_getenv("SYSTEMD_DEFAULT_MOUNT_RATE_LIMIT_BURST"); + if (e) { + r = safe_atou(e, &mount_rate_limit_burst); + if (r < 0) + log_debug("Invalid value in $SYSTEMD_DEFAULT_MOUNT_RATE_LIMIT_BURST, ignoring: %s", e); + } + + r = sd_event_source_set_ratelimit(m->mount_event_source, 1 * USEC_PER_SEC, mount_rate_limit_burst); if (r < 0) { log_error_errno(r, "Failed to enable rate limit for mount events: %m"); goto fail; diff --git a/src/core/namespace.c b/src/core/namespace.c index c0d0cc9715..4920716f34 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -2051,7 +2051,9 @@ int setup_namespace( DISSECT_IMAGE_RELAX_VAR_CHECK | DISSECT_IMAGE_FSCK | DISSECT_IMAGE_USR_NO_ROOT | - DISSECT_IMAGE_GROWFS; + DISSECT_IMAGE_GROWFS | + DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES; size_t n_mounts; int r; @@ -2486,7 +2488,7 @@ int setup_namespace( goto finish; /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ - r = mount_pivot_root(root); + r = mount_switch_root(root, MOUNT_ATTR_PROPAGATION_INHERIT); if (r == -EINVAL && root_directory) { /* If we are using root_directory and we don't have privileges (ie: user manager in a user * namespace) and the root_directory is already a mount point in the parent namespace, @@ -2496,7 +2498,7 @@ int setup_namespace( r = mount_nofollow_verbose(LOG_DEBUG, root, root, NULL, MS_BIND|MS_REC, NULL); if (r < 0) goto finish; - r = mount_pivot_root(root); + r = mount_switch_root(root, MOUNT_ATTR_PROPAGATION_INHERIT); } if (r < 0) { log_debug_errno(r, "Failed to mount root with MS_MOVE: %m"); diff --git a/src/core/scope.c b/src/core/scope.c index 54a6cc63e4..e2fc4cc995 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -43,6 +43,7 @@ static void scope_init(Unit *u) { s->timeout_stop_usec = u->manager->default_timeout_stop_usec; u->ignore_on_isolate = true; s->user = s->group = NULL; + s->oom_policy = _OOM_POLICY_INVALID; } static void scope_done(Unit *u) { @@ -194,6 +195,11 @@ static int scope_add_extras(Scope *s) { if (r < 0) return r; + if (s->oom_policy < 0) + s->oom_policy = s->cgroup_context.delegate ? OOM_CONTINUE : UNIT(s)->manager->default_oom_policy; + + s->cgroup_context.memory_oom_group = s->oom_policy == OOM_KILL; + return scope_add_default_dependencies(s); } @@ -286,11 +292,13 @@ static void scope_dump(Unit *u, FILE *f, const char *prefix) { "%sScope State: %s\n" "%sResult: %s\n" "%sRuntimeMaxSec: %s\n" - "%sRuntimeRandomizedExtraSec: %s\n", + "%sRuntimeRandomizedExtraSec: %s\n" + "%sOOMPolicy: %s\n", prefix, scope_state_to_string(s->state), prefix, scope_result_to_string(s->result), prefix, FORMAT_TIMESPAN(s->runtime_max_usec, USEC_PER_SEC), - prefix, FORMAT_TIMESPAN(s->runtime_rand_extra_usec, USEC_PER_SEC)); + prefix, FORMAT_TIMESPAN(s->runtime_rand_extra_usec, USEC_PER_SEC), + prefix, oom_policy_to_string(s->oom_policy)); cgroup_context_dump(UNIT(s), f, prefix); kill_context_dump(&s->kill_context, f, prefix); @@ -635,11 +643,16 @@ static void scope_notify_cgroup_oom_event(Unit *u, bool managed_oom) { else log_unit_debug(u, "Process of control group was killed by the OOM killer."); - /* This will probably need to be modified when scope units get an oom-policy */ + if (s->oom_policy == OOM_CONTINUE) + return; + switch (s->state) { case SCOPE_START_CHOWN: case SCOPE_RUNNING: + scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_FAILURE_OOM_KILL); + break; + case SCOPE_STOP_SIGTERM: scope_enter_signal(s, SCOPE_STOP_SIGKILL, SCOPE_FAILURE_OOM_KILL); break; diff --git a/src/core/scope.h b/src/core/scope.h index 6a228f1177..c9574a32c2 100644 --- a/src/core/scope.h +++ b/src/core/scope.h @@ -38,6 +38,8 @@ struct Scope { char *user; char *group; + + OOMPolicy oom_policy; }; extern const UnitVTable scope_vtable; diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c index e05ebdc631..62181a6309 100644 --- a/src/core/selinux-access.c +++ b/src/core/selinux-access.c @@ -229,9 +229,7 @@ int mac_selinux_access_check_internal( } else { /* If no unit context is known, use our own */ if (getcon_raw(&fcon) < 0) { - r = -errno; - - log_warning_errno(r, "SELinux getcon_raw() failed%s (perm=%s): %m", + log_warning_errno(errno, "SELinux getcon_raw() failed%s (perm=%s): %m", enforce ? "" : ", ignoring", permission); if (!enforce) @@ -239,6 +237,12 @@ int mac_selinux_access_check_internal( return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to get current context: %m"); } + if (!fcon) { + if (!enforce) + return 0; + + return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "We appear not to have any SELinux context: %m"); + } acon = fcon; tclass = "system"; diff --git a/src/core/selinux-setup.c b/src/core/selinux-setup.c index 153322442c..17fa2c0142 100644 --- a/src/core/selinux-setup.c +++ b/src/core/selinux-setup.c @@ -30,30 +30,28 @@ int mac_selinux_setup(bool *loaded_policy) { usec_t before_load, after_load; char *con; int r; - bool initialized = false; + bool initialized; assert(loaded_policy); /* Turn off all of SELinux' own logging, we want to do that */ - selinux_set_callback(SELINUX_CB_LOG, (union selinux_callback) { .func_log = null_log }); + selinux_set_callback(SELINUX_CB_LOG, (const union selinux_callback) { .func_log = null_log }); - /* Don't load policy in the initrd if we don't appear to have - * it. For the real root, we check below if we've already - * loaded policy, and return gracefully. - */ + /* Don't load policy in the initrd if we don't appear to have it. For the real root, we check below + * if we've already loaded policy, and return gracefully. */ if (in_initrd() && access(selinux_path(), F_OK) < 0) return 0; /* Already initialized by somebody else? */ r = getcon_raw(&con); - /* getcon_raw can return 0, and still give us a NULL pointer if - * /proc/self/attr/current is empty. SELinux guarantees this won't - * happen, but that file isn't specific to SELinux, and may be provided - * by some other arbitrary LSM with different semantics. */ + /* getcon_raw can return 0, and still give us a NULL pointer if /proc/self/attr/current is + * empty. SELinux guarantees this won't happen, but that file isn't specific to SELinux, and may be + * provided by some other arbitrary LSM with different semantics. */ if (r == 0 && con) { initialized = !streq(con, "kernel"); freecon(con); - } + } else + initialized = false; /* Make sure we have no fds open while loading the policy and * transitioning */ diff --git a/src/core/service.c b/src/core/service.c index 2c734eb096..bb190b1e8a 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -2421,7 +2421,7 @@ static void service_enter_reload(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP|EXEC_WRITE_CREDENTIALS, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index 4ff86ba1de..e7ea582300 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -17,6 +17,7 @@ #include "copy.h" #include "device-util.h" #include "devnum-util.h" +#include "discover-image.h" #include "dissect-image.h" #include "env-util.h" #include "escape.h" @@ -56,6 +57,7 @@ static enum { ACTION_WITH, ACTION_COPY_FROM, ACTION_COPY_TO, + ACTION_DISCOVER, } arg_action = ACTION_DISSECT; static const char *arg_image = NULL; static const char *arg_path = NULL; @@ -67,7 +69,9 @@ static DissectImageFlags arg_flags = DISSECT_IMAGE_RELAX_VAR_CHECK | DISSECT_IMAGE_FSCK | DISSECT_IMAGE_USR_NO_ROOT | - DISSECT_IMAGE_GROWFS; + DISSECT_IMAGE_GROWFS | + DISSECT_IMAGE_PIN_PARTITION_DEVICES | + DISSECT_IMAGE_ADD_PARTITION_DEVICES; static VeritySettings arg_verity_settings = VERITY_SETTINGS_DEFAULT; static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF; static PagerFlags arg_pager_flags = 0; @@ -126,6 +130,7 @@ static int help(void) { " --with Mount, run command, unmount\n" " -x --copy-from Copy files from image to host\n" " -a --copy-to Copy files from host to image\n" + " --discover Discover DDIs in well known directories\n" "\nSee the %2$s for details.\n", program_invocation_short_name, link, @@ -197,6 +202,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_RMDIR, ARG_JSON, ARG_MTREE, + ARG_DISCOVER, }; static const struct option options[] = { @@ -221,6 +227,7 @@ static int parse_argv(int argc, char *argv[]) { { "copy-from", no_argument, NULL, 'x' }, { "copy-to", no_argument, NULL, 'a' }, { "json", required_argument, NULL, ARG_JSON }, + { "discover", no_argument, NULL, ARG_DISCOVER }, {} }; @@ -398,6 +405,10 @@ static int parse_argv(int argc, char *argv[]) { break; + case ARG_DISCOVER: + arg_action = ACTION_DISCOVER; + break; + case '?': return -EINVAL; @@ -489,6 +500,13 @@ static int parse_argv(int argc, char *argv[]) { break; + case ACTION_DISCOVER: + if (optind != argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Expected no argument."); + + break; + default: assert_not_reached(); } @@ -1174,9 +1192,9 @@ static int action_list_or_mtree_or_copy(DissectedImage *m, LoopDevice *d) { static int action_umount(const char *path) { _cleanup_close_ int fd = -1; - _cleanup_free_ char *canonical = NULL, *devname = NULL; + _cleanup_free_ char *canonical = NULL; _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; - dev_t devno; + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; int r; fd = chase_symlinks_and_open(path, NULL, 0, O_DIRECTORY, &canonical); @@ -1191,18 +1209,26 @@ static int action_umount(const char *path) { if (r < 0) return log_error_errno(r, "Failed to determine whether '%s' is a mount point: %m", canonical); - r = fd_get_whole_disk(fd, /*backing=*/ true, &devno); - if (r < 0) - return log_error_errno(r, "Failed to find backing block device for '%s': %m", canonical); + r = block_device_new_from_fd(fd, BLOCK_DEVICE_LOOKUP_WHOLE_DISK | BLOCK_DEVICE_LOOKUP_BACKING, &dev); + if (r < 0) { + _cleanup_close_ int usr_fd = -1; + + /* The command `systemd-dissect --mount` expects that the image at least has the root or /usr + * partition. If it does not have the root partition, then we mount the /usr partition on a + * tmpfs. Hence, let's try to find the backing block device through the /usr partition. */ + + usr_fd = openat(fd, "usr", O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW); + if (usr_fd < 0) + return log_error_errno(errno, "Failed to open '%s/usr': %m", canonical); - r = devname_from_devnum(S_IFBLK, devno, &devname); + r = block_device_new_from_fd(usr_fd, BLOCK_DEVICE_LOOKUP_WHOLE_DISK | BLOCK_DEVICE_LOOKUP_BACKING, &dev); + } if (r < 0) - return log_error_errno(r, "Failed to get devname of block device " DEVNUM_FORMAT_STR ": %m", - DEVNUM_FORMAT_VAL(devno)); + return log_error_errno(r, "Failed to find backing block device for '%s': %m", canonical); - r = loop_device_open_from_path(devname, 0, LOCK_EX, &d); + r = loop_device_open(dev, 0, LOCK_EX, &d); if (r < 0) - return log_error_errno(r, "Failed to open loop device '%s': %m", devname); + return log_device_error_errno(dev, r, "Failed to open loopback block device: %m"); /* We've locked the loop device, now we're ready to unmount. To allow the unmount to succeed, we have * to close the O_PATH fd we opened earlier. */ @@ -1315,13 +1341,60 @@ static int action_with(DissectedImage *m, LoopDevice *d) { return rcode; } +static int action_discover(void) { + _cleanup_(hashmap_freep) Hashmap *images = NULL; + _cleanup_(table_unrefp) Table *t = NULL; + Image *img; + int r; + + images = hashmap_new(&image_hash_ops); + if (!images) + return log_oom(); + + for (ImageClass cl = 0; cl < _IMAGE_CLASS_MAX; cl++) { + r = image_discover(cl, NULL, images); + if (r < 0) + return log_error_errno(r, "Failed to discover images: %m"); + } + + if ((arg_json_format_flags & JSON_FORMAT_OFF) && hashmap_isempty(images)) { + log_info("No images found."); + return 0; + } + + t = table_new("name", "type", "class", "ro", "path", "time", "usage"); + if (!t) + return log_oom(); + + HASHMAP_FOREACH(img, images) { + + if (!IN_SET(img->type, IMAGE_RAW, IMAGE_BLOCK)) + continue; + + r = table_add_many( + t, + TABLE_STRING, img->name, + TABLE_STRING, image_type_to_string(img->type), + TABLE_STRING, image_class_to_string(img->class), + TABLE_BOOLEAN, img->read_only, + TABLE_PATH, img->path, + TABLE_TIMESTAMP, img->mtime != 0 ? img->mtime : img->crtime, + TABLE_SIZE, img->usage); + if (r < 0) + return table_log_add_error(r); + } + + (void) table_set_sort(t, (size_t) 0); + + return table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend); +} + static int run(int argc, char *argv[]) { _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; int r; - log_parse_environment(); - log_open(); + log_setup(); r = parse_argv(argc, argv); if (r <= 0) @@ -1329,6 +1402,8 @@ static int run(int argc, char *argv[]) { if (arg_action == ACTION_UMOUNT) return action_umount(arg_path); + if (arg_action == ACTION_DISCOVER) + return action_discover(); r = verity_settings_load( &arg_verity_settings, diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c index f939b2e8c2..f2f6cc1a53 100644 --- a/src/gpt-auto-generator/gpt-auto-generator.c +++ b/src/gpt-auto-generator/gpt-auto-generator.c @@ -358,23 +358,19 @@ static int add_automount( _cleanup_free_ char *unit = NULL, *p = NULL; _cleanup_fclose_ FILE *f = NULL; - const char *opt = "noauto"; int r; assert(id); assert(where); assert(description); - if (options) - opt = strjoina(options, ",", opt); - r = add_mount(id, what, where, fstype, rw, growfs, - opt, + options, description, NULL); if (r < 0) @@ -665,6 +661,11 @@ static int enumerate_partitions(dev_t devnum) { NULL, NULL, DISSECT_IMAGE_GPT_ONLY| DISSECT_IMAGE_USR_NO_ROOT, + /* NB! Unlike most other places where we dissect block devices we do not use + * DISSECT_IMAGE_ADD_PARTITION_DEVICES here: we want that the kernel finds the + * devices, and udev probes them before we mount them via .mount units much later + * on. And thus we also don't set DISSECT_IMAGE_PIN_PARTITION_DEVICES here, because + * we don't actually mount anything immediately. */ &m); if (r == -ENOPKG) { log_debug_errno(r, "No suitable partition table found, ignoring."); diff --git a/src/home/homectl.c b/src/home/homectl.c index 3e846e370a..a6d25c84fc 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -2392,6 +2392,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_IO_WEIGHT, ARG_LUKS_PBKDF_TYPE, ARG_LUKS_PBKDF_HASH_ALGORITHM, + ARG_LUKS_PBKDF_FORCE_ITERATIONS, ARG_LUKS_PBKDF_TIME_COST, ARG_LUKS_PBKDF_MEMORY_COST, ARG_LUKS_PBKDF_PARALLEL_THREADS, @@ -2473,6 +2474,7 @@ static int parse_argv(int argc, char *argv[]) { { "luks-volume-key-size", required_argument, NULL, ARG_LUKS_VOLUME_KEY_SIZE }, { "luks-pbkdf-type", required_argument, NULL, ARG_LUKS_PBKDF_TYPE }, { "luks-pbkdf-hash-algorithm", required_argument, NULL, ARG_LUKS_PBKDF_HASH_ALGORITHM }, + { "luks-pbkdf-force-iterations", required_argument, NULL, ARG_LUKS_PBKDF_FORCE_ITERATIONS }, { "luks-pbkdf-time-cost", required_argument, NULL, ARG_LUKS_PBKDF_TIME_COST }, { "luks-pbkdf-memory-cost", required_argument, NULL, ARG_LUKS_PBKDF_MEMORY_COST }, { "luks-pbkdf-parallel-threads", required_argument, NULL, ARG_LUKS_PBKDF_PARALLEL_THREADS }, @@ -3093,10 +3095,12 @@ static int parse_argv(int argc, char *argv[]) { break; case ARG_LUKS_VOLUME_KEY_SIZE: + case ARG_LUKS_PBKDF_FORCE_ITERATIONS: case ARG_LUKS_PBKDF_PARALLEL_THREADS: case ARG_RATE_LIMIT_BURST: { const char *field = c == ARG_LUKS_VOLUME_KEY_SIZE ? "luksVolumeKeySize" : + c == ARG_LUKS_PBKDF_FORCE_ITERATIONS ? "luksPbkdfForceIterations" : c == ARG_LUKS_PBKDF_PARALLEL_THREADS ? "luksPbkdfParallelThreads" : c == ARG_RATE_LIMIT_BURST ? "rateLimitBurst" : NULL; unsigned n; diff --git a/src/home/homed-manager.h b/src/home/homed-manager.h index 18e7542e13..20bbb4cfeb 100644 --- a/src/home/homed-manager.h +++ b/src/home/homed-manager.h @@ -59,7 +59,7 @@ struct Manager { char *userdb_service; EVP_PKEY *private_key; /* actually a pair of private and public key */ - Hashmap *public_keys; /* key name [char*] → publick key [EVP_PKEY*] */ + Hashmap *public_keys; /* key name [char*] → public key [EVP_PKEY*] */ RebalanceState rebalance_state; usec_t rebalance_interval_usec; diff --git a/src/home/homework-luks.c b/src/home/homework-luks.c index 5e1d5bbd65..4d04359740 100644 --- a/src/home/homework-luks.c +++ b/src/home/homework-luks.c @@ -141,17 +141,19 @@ static int probe_file_system_by_fd( errno = 0; r = blkid_probe_set_device(b, fd, 0, 0); if (r != 0) - return errno > 0 ? -errno : -ENOMEM; + return errno_or_else(ENOMEM); (void) blkid_probe_enable_superblocks(b, 1); (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_UUID); errno = 0; r = blkid_do_safeprobe(b); - if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */ + if (r == _BLKID_SAFEPROBE_ERROR) + return errno_or_else(EIO); + if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND)) return -ENOPKG; - if (r != 0) - return errno > 0 ? -errno : -EIO; + + assert(r == _BLKID_SAFEPROBE_FOUND); (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); if (!fstype) @@ -656,7 +658,7 @@ static int luks_validate( errno = 0; r = blkid_probe_set_device(b, fd, 0, 0); if (r != 0) - return errno > 0 ? -errno : -ENOMEM; + return errno_or_else(ENOMEM); (void) blkid_probe_enable_superblocks(b, 1); (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); @@ -665,10 +667,12 @@ static int luks_validate( errno = 0; r = blkid_do_safeprobe(b); - if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */ + if (r == _BLKID_SAFEPROBE_ERROR) + return errno_or_else(EIO); + if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND)) return -ENOPKG; - if (r != 0) - return errno > 0 ? -errno : -EIO; + + assert(r == _BLKID_SAFEPROBE_FOUND); (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); if (streq_ptr(fstype, "crypto_LUKS")) { @@ -687,22 +691,21 @@ static int luks_validate( errno = 0; pl = blkid_probe_get_partitions(b); if (!pl) - return errno > 0 ? -errno : -ENOMEM; + return errno_or_else(ENOMEM); errno = 0; n = blkid_partlist_numof_partitions(pl); if (n < 0) - return errno > 0 ? -errno : -EIO; + return errno_or_else(EIO); for (int i = 0; i < n; i++) { - blkid_partition pp; sd_id128_t id = SD_ID128_NULL; - const char *sid; + blkid_partition pp; errno = 0; pp = blkid_partlist_get_partition(pl, i); if (!pp) - return errno > 0 ? -errno : -EIO; + return errno_or_else(EIO); if (sd_id128_string_equal(blkid_partition_get_type_string(pp), SD_GPT_USER_HOME) <= 0) continue; @@ -710,15 +713,12 @@ static int luks_validate( if (!streq_ptr(blkid_partition_get_name(pp), label)) continue; - sid = blkid_partition_get_uuid(pp); - if (sid) { - r = sd_id128_from_string(sid, &id); - if (r < 0) - log_debug_errno(r, "Couldn't parse partition UUID %s, weird: %m", sid); - if (!sd_id128_is_null(partition_uuid) && !sd_id128_equal(id, partition_uuid)) - continue; - } + r = blkid_partition_get_uuid_id128(pp, &id); + if (r < 0) + log_debug_errno(r, "Failed to read partition UUID, ignoring: %m"); + else if (!sd_id128_is_null(partition_uuid) && !sd_id128_equal(id, partition_uuid)) + continue; if (found) return -ENOPKG; @@ -1670,12 +1670,16 @@ static struct crypt_pbkdf_type* build_good_pbkdf(struct crypt_pbkdf_type *buffer assert(buffer); assert(hr); + bool benchmark = user_record_luks_pbkdf_force_iterations(hr) == UINT64_MAX; + *buffer = (struct crypt_pbkdf_type) { .hash = user_record_luks_pbkdf_hash_algorithm(hr), .type = user_record_luks_pbkdf_type(hr), - .time_ms = user_record_luks_pbkdf_time_cost_usec(hr) / USEC_PER_MSEC, + .time_ms = benchmark ? user_record_luks_pbkdf_time_cost_usec(hr) / USEC_PER_MSEC : 0, + .iterations = benchmark ? 0 : user_record_luks_pbkdf_force_iterations(hr), .max_memory_kb = user_record_luks_pbkdf_memory_cost(hr) / 1024, .parallel_threads = user_record_luks_pbkdf_parallel_threads(hr), + .flags = benchmark ? 0 : CRYPT_PBKDF_NO_BENCHMARK, }; return buffer; @@ -1837,7 +1841,7 @@ static int make_partition_table( _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *p = NULL, *q = NULL; _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL; _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL; - _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL; + _cleanup_free_ char *disk_uuid_as_string = NULL; uint64_t offset, size, first_lba, start, last_lba, end; sd_id128_t disk_uuid; int r; @@ -1855,14 +1859,7 @@ static int make_partition_table( if (r < 0) return log_error_errno(r, "Failed to initialize partition type: %m"); - c = fdisk_new_context(); - if (!c) - return log_oom(); - - if (asprintf(&path, "/proc/self/fd/%i", fd) < 0) - return log_oom(); - - r = fdisk_assign_device(c, path, 0); + r = fdisk_new_context_fd(fd, /* read_only= */ false, &c); if (r < 0) return log_error_errno(r, "Failed to open device: %m"); @@ -2645,7 +2642,7 @@ static int prepare_resize_partition( _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL; _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL; - _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL; + _cleanup_free_ char *disk_uuid_as_string = NULL; struct fdisk_partition *found = NULL; sd_id128_t disk_uuid; size_t n_partitions; @@ -2668,14 +2665,7 @@ static int prepare_resize_partition( return 0; } - c = fdisk_new_context(); - if (!c) - return log_oom(); - - if (asprintf(&path, "/proc/self/fd/%i", fd) < 0) - return log_oom(); - - r = fdisk_assign_device(c, path, 0); + r = fdisk_new_context_fd(fd, /* read_only= */ false, &c); if (r < 0) return log_error_errno(r, "Failed to open device: %m"); @@ -2759,7 +2749,6 @@ static int apply_resize_partition( _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL; _cleanup_free_ void *two_zero_lbas = NULL; - _cleanup_free_ char *path = NULL; ssize_t n; int r; @@ -2791,14 +2780,7 @@ static int apply_resize_partition( if (n != 1024) return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while wiping partition table."); - c = fdisk_new_context(); - if (!c) - return log_oom(); - - if (asprintf(&path, "/proc/self/fd/%i", fd) < 0) - return log_oom(); - - r = fdisk_assign_device(c, path, 0); + r = fdisk_new_context_fd(fd, /* read_only= */ false, &c); if (r < 0) return log_error_errno(r, "Failed to open device: %m"); diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c index f3d3131828..ea19dfbb04 100644 --- a/src/hostname/hostnamed.c +++ b/src/hostname/hostnamed.c @@ -246,6 +246,14 @@ static int get_firmware_version(char **ret) { return get_hardware_firmware_data("bios_version", ret); } +static int get_firmware_vendor(char **ret) { + return get_hardware_firmware_data("bios_vendor", ret); +} + +static int get_firmware_date(char **ret) { + return get_hardware_firmware_data("bios_date", ret); +} + static const char* valid_chassis(const char *chassis) { assert(chassis); @@ -628,6 +636,37 @@ static int property_get_firmware_version( return sd_bus_message_append(reply, "s", firmware_version); } +static int property_get_firmware_vendor( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + _cleanup_free_ char *firmware_vendor = NULL; + + (void) get_firmware_vendor(&firmware_vendor); + + return sd_bus_message_append(reply, "s", firmware_vendor); +} + +static int property_get_firmware_date( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + _cleanup_free_ char *firmware_date = NULL; + + (void) get_firmware_date(&firmware_date); + + return sd_bus_message_append(reply, "s", firmware_date); +} static int property_get_hostname( sd_bus *bus, const char *path, @@ -1149,7 +1188,8 @@ static int method_get_hardware_serial(sd_bus_message *m, void *userdata, sd_bus_ static int method_describe(sd_bus_message *m, void *userdata, sd_bus_error *error) { _cleanup_free_ char *hn = NULL, *dhn = NULL, *in = NULL, *text = NULL, - *chassis = NULL, *vendor = NULL, *model = NULL, *serial = NULL, *firmware_version = NULL; + *chassis = NULL, *vendor = NULL, *model = NULL, *serial = NULL, *firmware_version = NULL, + *firmware_vendor = NULL, *firmware_date = NULL; _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; sd_id128_t product_uuid = SD_ID128_NULL; @@ -1213,6 +1253,8 @@ static int method_describe(sd_bus_message *m, void *userdata, sd_bus_error *erro (void) get_hardware_serial(&serial); } (void) get_firmware_version(&firmware_version); + (void) get_firmware_vendor(&firmware_vendor); + (void) get_firmware_date(&firmware_date); r = json_build(&v, JSON_BUILD_OBJECT( JSON_BUILD_PAIR("Hostname", JSON_BUILD_STRING(hn)), @@ -1234,6 +1276,8 @@ static int method_describe(sd_bus_message *m, void *userdata, sd_bus_error *erro JSON_BUILD_PAIR("HardwareModel", JSON_BUILD_STRING(model ?: c->data[PROP_HARDWARE_MODEL])), JSON_BUILD_PAIR("HardwareSerial", JSON_BUILD_STRING(serial)), JSON_BUILD_PAIR("FirmwareVersion", JSON_BUILD_STRING(firmware_version)), + JSON_BUILD_PAIR("FirmwareVendor", JSON_BUILD_STRING(firmware_vendor)), + JSON_BUILD_PAIR("FirmwareDate", JSON_BUILD_STRING(firmware_date)), JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(product_uuid), "ProductUUID", JSON_BUILD_ID128(product_uuid)), JSON_BUILD_PAIR_CONDITION(sd_id128_is_null(product_uuid), "ProductUUID", JSON_BUILD_NULL))); @@ -1275,6 +1319,8 @@ static const sd_bus_vtable hostname_vtable[] = { SD_BUS_PROPERTY("HardwareVendor", "s", property_get_hardware_vendor, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("HardwareModel", "s", property_get_hardware_model, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("FirmwareVersion", "s", property_get_firmware_version, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("FirmwareVendor", "s", property_get_firmware_vendor, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("FirmwareDate", "s", property_get_firmware_date, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_METHOD_WITH_ARGS("SetHostname", SD_BUS_ARGS("s", hostname, "b", interactive), diff --git a/src/import/importd.c b/src/import/importd.c index 9a3ea131c1..4f6be5f20c 100644 --- a/src/import/importd.c +++ b/src/import/importd.c @@ -94,6 +94,9 @@ struct Manager { int notify_fd; sd_event_source *notify_event_source; + + bool use_btrfs_subvol; + bool use_btrfs_quota; }; #define TRANSFERS_MAX 64 @@ -628,10 +631,15 @@ static int manager_new(Manager **ret) { assert(ret); - m = new0(Manager, 1); + m = new(Manager, 1); if (!m) return -ENOMEM; + *m = (Manager) { + .use_btrfs_subvol = true, + .use_btrfs_quota = true, + }; + r = sd_event_default(&m->event); if (r < 0) return r; @@ -719,7 +727,7 @@ static int method_import_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Local name %s is invalid", local); - r = setup_machine_directory(error); + r = setup_machine_directory(error, m->use_btrfs_subvol, m->use_btrfs_quota); if (r < 0) return r; @@ -788,7 +796,7 @@ static int method_import_fs(sd_bus_message *msg, void *userdata, sd_bus_error *e return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Local name %s is invalid", local); - r = setup_machine_directory(error); + r = setup_machine_directory(error, m->use_btrfs_subvol, m->use_btrfs_quota); if (r < 0) return r; @@ -939,7 +947,7 @@ static int method_pull_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_er return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown verification mode %s", verify); - r = setup_machine_directory(error); + r = setup_machine_directory(error, m->use_btrfs_subvol, m->use_btrfs_quota); if (r < 0) return r; @@ -1351,6 +1359,28 @@ static int manager_run(Manager *m) { m); } +static void manager_parse_env(Manager *m) { + int r; + + assert(m); + + /* Same as src/import/{import,pull}.c: + * Let's make these relatively low-level settings also controllable via env vars. User can then set + * them for systemd-importd.service if they like to tweak behaviour */ + + r = getenv_bool("SYSTEMD_IMPORT_BTRFS_SUBVOL"); + if (r >= 0) + m->use_btrfs_subvol = r; + else if (r != -ENXIO) + log_warning_errno(r, "Failed to parse $SYSTEMD_IMPORT_BTRFS_SUBVOL: %m"); + + r = getenv_bool("SYSTEMD_IMPORT_BTRFS_QUOTA"); + if (r >= 0) + m->use_btrfs_quota = r; + else if (r != -ENXIO) + log_warning_errno(r, "Failed to parse $SYSTEMD_IMPORT_BTRFS_QUOTA: %m"); +} + static int run(int argc, char *argv[]) { _cleanup_(manager_unrefp) Manager *m = NULL; int r; @@ -1373,6 +1403,8 @@ static int run(int argc, char *argv[]) { if (r < 0) return log_error_errno(r, "Failed to allocate manager object: %m"); + manager_parse_env(m); + r = manager_add_bus_objects(m); if (r < 0) return r; diff --git a/src/journal/journald-audit.c b/src/journal/journald-audit.c index d301d28966..a2ebf97c9c 100644 --- a/src/journal/journald-audit.c +++ b/src/journal/journald-audit.c @@ -8,6 +8,7 @@ #include "fd-util.h" #include "hexdecoct.h" #include "io-util.h" +#include "journal-internal.h" #include "journald-audit.h" #include "missing_audit.h" #include "string-util.h" @@ -441,7 +442,7 @@ void server_process_audit_message( } if (!NLMSG_OK(nl, buffer_size)) { - log_ratelimit_error(JOURNALD_LOG_RATELIMIT, "Audit netlink message truncated."); + log_ratelimit_error(JOURNAL_LOG_RATELIMIT, "Audit netlink message truncated."); return; } diff --git a/src/journal/journald-context.c b/src/journal/journald-context.c index 6d58422ddd..222855ae60 100644 --- a/src/journal/journald-context.c +++ b/src/journal/journald-context.c @@ -12,6 +12,7 @@ #include "fileio.h" #include "fs-util.h" #include "io-util.h" +#include "journal-internal.h" #include "journal-util.h" #include "journald-context.h" #include "parse-util.h" @@ -258,7 +259,7 @@ static int client_context_read_label( /* If we got no SELinux label passed in, let's try to acquire one */ - if (getpidcon(c->pid, &con) >= 0) { + if (getpidcon(c->pid, &con) >= 0 && con) { free_and_replace(c->label, con); c->label_size = strlen(c->label); } @@ -771,7 +772,7 @@ void client_context_acquire_default(Server *s) { r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to acquire our own context, ignoring: %m"); } @@ -781,7 +782,7 @@ void client_context_acquire_default(Server *s) { r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to acquire PID1's context, ignoring: %m"); } diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c index 6c1e7892d1..99eace0848 100644 --- a/src/journal/journald-kmsg.c +++ b/src/journal/journald-kmsg.c @@ -16,6 +16,7 @@ #include "format-util.h" #include "fs-util.h" #include "io-util.h" +#include "journal-internal.h" #include "journald-kmsg.h" #include "journald-server.h" #include "journald-syslog.h" @@ -320,7 +321,7 @@ static int server_read_dev_kmsg(Server *s) { if (l < 0) { /* Old kernels who don't allow reading from /dev/kmsg * return EINVAL when we try. So handle this cleanly, - * but don' try to ever read from it again. */ + * but don't try to ever read from it again. */ if (errno == EINVAL) { s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source); return 0; @@ -329,7 +330,7 @@ static int server_read_dev_kmsg(Server *s) { if (ERRNO_IS_TRANSIENT(errno) || errno == EPIPE) return 0; - return log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, "Failed to read from /dev/kmsg: %m"); + return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to read from /dev/kmsg: %m"); } dev_kmsg_record(s, buffer, l); @@ -368,7 +369,7 @@ static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void assert(fd == s->dev_kmsg_fd); if (revents & EPOLLERR) - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "/dev/kmsg buffer overrun, some messages lost."); if (!(revents & EPOLLIN)) diff --git a/src/journal/journald-native.c b/src/journal/journald-native.c index 21e20db2d4..847f69c1ff 100644 --- a/src/journal/journald-native.c +++ b/src/journal/journald-native.c @@ -11,6 +11,7 @@ #include "fs-util.h" #include "io-util.h" #include "journal-importer.h" +#include "journal-internal.h" #include "journal-util.h" #include "journald-console.h" #include "journald-kmsg.h" @@ -309,7 +310,7 @@ void server_process_native_message( if (ucred && pid_is_valid(ucred->pid)) { r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid); } @@ -350,33 +351,33 @@ void server_process_native_file( r = fd_get_path(fd, &k); if (r < 0) { - log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "readlink(/proc/self/fd/%i) failed: %m", fd); return; } e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/"); if (!e) { - log_ratelimit_error(JOURNALD_LOG_RATELIMIT, + log_ratelimit_error(JOURNAL_LOG_RATELIMIT, "Received file outside of allowed directories. Refusing."); return; } if (!filename_is_valid(e)) { - log_ratelimit_error(JOURNALD_LOG_RATELIMIT, + log_ratelimit_error(JOURNAL_LOG_RATELIMIT, "Received file in subdirectory of allowed directories. Refusing."); return; } } if (fstat(fd, &st) < 0) { - log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, + log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to stat passed file, ignoring: %m"); return; } if (!S_ISREG(st.st_mode)) { - log_ratelimit_error(JOURNALD_LOG_RATELIMIT, + log_ratelimit_error(JOURNAL_LOG_RATELIMIT, "File passed is not regular. Ignoring."); return; } @@ -387,7 +388,7 @@ void server_process_native_file( /* When !sealed, set a lower memory limit. We have to read the file, * effectively doubling memory use. */ if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2)) { - log_ratelimit_error(JOURNALD_LOG_RATELIMIT, + log_ratelimit_error(JOURNAL_LOG_RATELIMIT, "File passed too large (%"PRIu64" bytes). Ignoring.", (uint64_t) st.st_size); return; @@ -402,7 +403,7 @@ void server_process_native_file( ps = PAGE_ALIGN(st.st_size); p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0); if (p == MAP_FAILED) { - log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, + log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to map memfd, ignoring: %m"); return; } @@ -415,7 +416,7 @@ void server_process_native_file( ssize_t n; if (fstatvfs(fd, &vfs) < 0) { - log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, + log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to stat file system of passed file, not processing it: %m"); return; } @@ -426,7 +427,7 @@ void server_process_native_file( * https://github.com/systemd/systemd/issues/1822 */ if (vfs.f_flag & ST_MANDLOCK) { - log_ratelimit_error(JOURNALD_LOG_RATELIMIT, + log_ratelimit_error(JOURNAL_LOG_RATELIMIT, "Received file descriptor from file system with mandatory locking enabled, not processing it."); return; } @@ -440,7 +441,7 @@ void server_process_native_file( * and so is SMB. */ r = fd_nonblock(fd, true); if (r < 0) { - log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to make fd non-blocking, not processing it: %m"); return; } @@ -457,7 +458,7 @@ void server_process_native_file( n = pread(fd, p, st.st_size, 0); if (n < 0) - log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, + log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to read file, ignoring: %m"); else if (n > 0) server_process_native_message(s, p, n, ucred, tv, label, label_len); diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index cb94a037d5..b268db0220 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -102,10 +102,10 @@ static int determine_path_usage( d = opendir(path); if (!d) return log_ratelimit_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, - errno, JOURNALD_LOG_RATELIMIT, "Failed to open %s: %m", path); + errno, JOURNAL_LOG_RATELIMIT, "Failed to open %s: %m", path); if (fstatvfs(dirfd(d), &ss) < 0) - return log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to fstatvfs(%s): %m", path); *ret_free = ss.f_bsize * ss.f_bavail; @@ -256,7 +256,7 @@ static void server_add_acls(ManagedJournalFile *f, uid_t uid) { r = fd_add_uid_acl_permission(f->file->fd, uid, ACL_READ); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to set ACL on %s, ignoring: %m", f->file->path); #endif } @@ -357,7 +357,7 @@ static int system_journal_open(Server *s, bool flush_requested, bool relinquish_ patch_min_use(&s->system_storage); } else { if (!IN_SET(r, -ENOENT, -EROFS)) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to open system journal: %m"); r = 0; @@ -387,7 +387,7 @@ static int system_journal_open(Server *s, bool flush_requested, bool relinquish_ r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal); if (r < 0) { if (r != -ENOENT) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to open runtime journal: %m"); r = 0; @@ -402,7 +402,7 @@ static int system_journal_open(Server *s, bool flush_requested, bool relinquish_ r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal); if (r < 0) - return log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to open runtime journal: %m"); } @@ -500,10 +500,10 @@ static int do_rotate( r = managed_journal_file_rotate(f, s->mmap, file_flags, s->compress.threshold_bytes, s->deferred_closes); if (r < 0) { if (*f) - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to rotate %s: %m", (*f)->file->path); else - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to create new %s journal: %m", name); } @@ -554,7 +554,7 @@ static int vacuum_offline_user_journals(Server *s) { if (errno == ENOENT) return 0; - return log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to open %s: %m", s->system_storage.path); } @@ -570,7 +570,7 @@ static int vacuum_offline_user_journals(Server *s) { de = readdir_no_dot(d); if (!de) { if (errno != 0) - log_ratelimit_warning_errno(errno, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to enumerate %s, ignoring: %m", s->system_storage.path); @@ -605,7 +605,7 @@ static int vacuum_offline_user_journals(Server *s) { fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK); if (fd < 0) { log_ratelimit_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, - errno, JOURNALD_LOG_RATELIMIT, + errno, JOURNAL_LOG_RATELIMIT, "Failed to open journal file '%s' for rotation: %m", full); continue; } @@ -628,13 +628,13 @@ static int vacuum_offline_user_journals(Server *s) { NULL, &f); if (r < 0) { - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full); r = journal_file_dispose(dirfd(d), de->d_name); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to move %s out of the way, ignoring: %m", full); else @@ -692,21 +692,21 @@ void server_sync(Server *s) { if (s->system_journal) { r = managed_journal_file_set_offline(s->system_journal, false); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to sync system journal, ignoring: %m"); } ORDERED_HASHMAP_FOREACH(f, s->user_journals) { r = managed_journal_file_set_offline(f, false); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to sync user journal, ignoring: %m"); } if (s->sync_event_source) { r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF); if (r < 0) - log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to disable sync timer source: %m"); } @@ -729,7 +729,7 @@ static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) { storage->metrics.n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose); if (r < 0 && r != -ENOENT) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to vacuum %s, ignoring: %m", storage->path); cache_space_invalidate(&storage->space); @@ -801,38 +801,46 @@ static bool shall_try_append_again(JournalFile *f, int r) { log_debug("%s: Allocation limit reached, rotating.", f->path); return true; + case -EROFS: /* Read-only file system */ + /* When appending an entry fails if shall_try_append_again returns true, the journal is + * rotated. If the FS is read-only, rotation will fail and s->system_journal will be set to + * NULL. After that, when find_journal will try to open the journal since s->system_journal + * will be NULL, it will open the runtime journal. */ + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Read-only file system, rotating.", f->path); + return true; + case -EIO: /* I/O error of some kind (mmap) */ - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, "%s: IO error, rotating.", f->path); + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: IO error, rotating.", f->path); return true; case -EHOSTDOWN: /* Other machine */ - log_ratelimit_info(JOURNALD_LOG_RATELIMIT, "%s: Journal file from other machine, rotating.", f->path); + log_ratelimit_info(JOURNAL_LOG_RATELIMIT, "%s: Journal file from other machine, rotating.", f->path); return true; case -EBUSY: /* Unclean shutdown */ - log_ratelimit_info(JOURNALD_LOG_RATELIMIT, "%s: Unclean shutdown, rotating.", f->path); + log_ratelimit_info(JOURNAL_LOG_RATELIMIT, "%s: Unclean shutdown, rotating.", f->path); return true; case -EPROTONOSUPPORT: /* Unsupported feature */ - log_ratelimit_info(JOURNALD_LOG_RATELIMIT, "%s: Unsupported feature, rotating.", f->path); + log_ratelimit_info(JOURNAL_LOG_RATELIMIT, "%s: Unsupported feature, rotating.", f->path); return true; case -EBADMSG: /* Corrupted */ case -ENODATA: /* Truncated */ case -ESHUTDOWN: /* Already archived */ - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, "%s: Journal file corrupted, rotating.", f->path); + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file corrupted, rotating.", f->path); return true; case -EIDRM: /* Journal file has been deleted */ - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, "%s: Journal file has been deleted, rotating.", f->path); + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file has been deleted, rotating.", f->path); return true; case -ETXTBSY: /* Journal file is from the future */ - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, "%s: Journal file is from the future, rotating.", f->path); + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file is from the future, rotating.", f->path); return true; case -EAFNOSUPPORT: - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: underlying file system does not support memory mapping or another required file system feature.", f->path); return false; @@ -864,7 +872,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure * bisection works correctly. */ - log_ratelimit_info(JOURNALD_LOG_RATELIMIT, "Time jumped backwards, rotating."); + log_ratelimit_info(JOURNAL_LOG_RATELIMIT, "Time jumped backwards, rotating."); rotate = true; } else { @@ -873,7 +881,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n return; if (journal_file_rotate_suggested(f->file, s->max_file_usec, LOG_INFO)) { - log_ratelimit_info(JOURNALD_LOG_RATELIMIT, + log_ratelimit_info(JOURNAL_LOG_RATELIMIT, "%s: Journal header limits reached or header out-of-date, rotating.", f->file->path); rotate = true; @@ -1212,7 +1220,7 @@ int server_flush_to_var(Server *s, bool require_flag_file) { r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY); if (r < 0) - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to read runtime journal: %m"); sd_journal_set_data_threshold(j, 0); @@ -1228,7 +1236,7 @@ int server_flush_to_var(Server *s, bool require_flag_file) { r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) { - log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, "Can't read entry: %m"); + log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Can't read entry: %m"); goto finish; } @@ -1237,17 +1245,17 @@ int server_flush_to_var(Server *s, bool require_flag_file) { continue; if (!shall_try_append_again(s->system_journal->file, r)) { - log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, "Can't write entry: %m"); + log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Can't write entry: %m"); goto finish; } - log_ratelimit_info(JOURNALD_LOG_RATELIMIT, "Rotating system journal."); + log_ratelimit_info(JOURNAL_LOG_RATELIMIT, "Rotating system journal."); server_rotate(s); server_vacuum(s, false); if (!s->system_journal) { - log_ratelimit_notice(JOURNALD_LOG_RATELIMIT, + log_ratelimit_notice(JOURNAL_LOG_RATELIMIT, "Didn't flush runtime journal since rotation of system journal wasn't successful."); r = -EIO; goto finish; @@ -1256,7 +1264,7 @@ int server_flush_to_var(Server *s, bool require_flag_file) { log_debug("Retrying write."); r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset); if (r < 0) { - log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, "Can't write entry: %m"); + log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Can't write entry: %m"); goto finish; } } @@ -1284,7 +1292,7 @@ finish: fn = strjoina(s->runtime_directory, "/flushed"); k = touch(fn); if (k < 0) - log_ratelimit_warning_errno(k, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(k, JOURNAL_LOG_RATELIMIT, "Failed to touch %s, ignoring: %m", fn); server_refresh_idle_timer(s); @@ -1314,7 +1322,7 @@ static int server_relinquish_var(Server *s) { fn = strjoina(s->runtime_directory, "/flushed"); if (unlink(fn) < 0 && errno != ENOENT) - log_ratelimit_warning_errno(errno, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to unlink %s, ignoring: %m", fn); server_refresh_idle_timer(s); @@ -1387,11 +1395,11 @@ int server_process_datagram( if (ERRNO_IS_TRANSIENT(n)) return 0; if (n == -EXFULL) { - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "Got message with truncated control data (too many fds sent?), ignoring."); return 0; } - return log_ratelimit_error_errno(n, JOURNALD_LOG_RATELIMIT, "recvmsg() failed: %m"); + return log_ratelimit_error_errno(n, JOURNAL_LOG_RATELIMIT, "recvmsg() failed: %m"); } CMSG_FOREACH(cmsg, &msghdr) @@ -1424,7 +1432,7 @@ int server_process_datagram( if (n > 0 && n_fds == 0) server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len); else if (n_fds > 0) - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "Got file descriptors via syslog socket. Ignoring."); } else if (fd == s->native_fd) { @@ -1433,7 +1441,7 @@ int server_process_datagram( else if (n == 0 && n_fds == 1) server_process_native_file(s, fds[0], ucred, tv, label, label_len); else if (n_fds > 0) - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "Got too many file descriptors via native socket. Ignoring."); } else { @@ -1442,7 +1450,7 @@ int server_process_datagram( if (n > 0 && n_fds == 0) server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen); else if (n_fds > 0) - log_ratelimit_warning(JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "Got file descriptors via audit socket. Ignoring."); } @@ -1496,7 +1504,7 @@ static void server_full_rotate(Server *s) { fn = strjoina(s->runtime_directory, "/rotated"); r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC)); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to write %s, ignoring: %m", fn); } @@ -1600,7 +1608,7 @@ static void server_full_sync(Server *s) { fn = strjoina(s->runtime_directory, "/synced"); r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC)); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to write %s, ignoring: %m", fn); return; diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index fb512dcfeb..ee8f374190 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -20,8 +20,6 @@ typedef struct Server Server; #include "time-util.h" #include "varlink.h" -#define JOURNALD_LOG_RATELIMIT ((const RateLimit) { .interval = 60 * USEC_PER_SEC, .burst = 3 }) - typedef enum Storage { STORAGE_AUTO, STORAGE_VOLATILE, diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c index abfd046837..49f28972ea 100644 --- a/src/journal/journald-stream.c +++ b/src/journal/journald-stream.c @@ -19,6 +19,7 @@ #include "fileio.h" #include "fs-util.h" #include "io-util.h" +#include "journal-internal.h" #include "journald-console.h" #include "journald-context.h" #include "journald-kmsg.h" @@ -160,7 +161,7 @@ static int stdout_stream_save(StdoutStream *s) { r = fstat(s->fd, &st); if (r < 0) - return log_ratelimit_warning_errno(errno, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to stat connected stream: %m"); /* We use device and inode numbers as identifier for the stream */ @@ -232,7 +233,7 @@ static int stdout_stream_save(StdoutStream *s) { if (s->server->notify_event_source) { r = sd_event_source_set_enabled(s->server->notify_event_source, SD_EVENT_ON); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, "Failed to enable notify event source: %m"); + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to enable notify event source: %m"); } } @@ -240,7 +241,7 @@ static int stdout_stream_save(StdoutStream *s) { fail: (void) unlink(s->state_file); - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to save stream data %s: %m", s->state_file); } @@ -268,7 +269,7 @@ static int stdout_stream_log( else if (pid_is_valid(s->ucred.pid)) { r = client_context_acquire(s->server, s->ucred.pid, &s->ucred, s->label, strlen_ptr(s->label), s->unit_id, &s->context); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to acquire client context, ignoring: %m"); } @@ -366,7 +367,7 @@ static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) { /* line breaks by NUL, line max length or EOF are not permissible during the negotiation part of the protocol */ if (line_break != LINE_BREAK_NEWLINE && s->state != STDOUT_STREAM_RUNNING) - return log_ratelimit_warning_errno(SYNTHETIC_ERRNO(EINVAL), JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(SYNTHETIC_ERRNO(EINVAL), JOURNAL_LOG_RATELIMIT, "Control protocol line not properly terminated."); switch (s->state) { @@ -398,7 +399,7 @@ static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) { priority = syslog_parse_priority_and_facility(p); if (priority < 0) - return log_ratelimit_warning_errno(priority, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(priority, JOURNAL_LOG_RATELIMIT, "Failed to parse log priority line: %m"); s->priority = priority; @@ -409,7 +410,7 @@ static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) { case STDOUT_STREAM_LEVEL_PREFIX: r = parse_boolean(p); if (r < 0) - return log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to parse level prefix line: %m"); s->level_prefix = r; @@ -419,7 +420,7 @@ static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) { case STDOUT_STREAM_FORWARD_TO_SYSLOG: r = parse_boolean(p); if (r < 0) - return log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to parse forward to syslog line: %m"); s->forward_to_syslog = r; @@ -429,7 +430,7 @@ static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) { case STDOUT_STREAM_FORWARD_TO_KMSG: r = parse_boolean(p); if (r < 0) - return log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to parse copy to kmsg line: %m"); s->forward_to_kmsg = r; @@ -439,7 +440,7 @@ static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) { case STDOUT_STREAM_FORWARD_TO_CONSOLE: r = parse_boolean(p); if (r < 0) - return log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to parse copy to console line."); s->forward_to_console = r; @@ -597,7 +598,7 @@ static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents, if (ERRNO_IS_TRANSIENT(errno)) return 0; - log_ratelimit_warning_errno(errno, JOURNALD_LOG_RATELIMIT, "Failed to read from stream: %m"); + log_ratelimit_warning_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to read from stream: %m"); goto terminate; } cmsg_close_all(&msghdr); @@ -656,7 +657,7 @@ int stdout_stream_install(Server *s, int fd, StdoutStream **ret) { r = sd_id128_randomize(&id); if (r < 0) - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, "Failed to generate stream ID: %m"); + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to generate stream ID: %m"); stream = new(StdoutStream, 1); if (!stream) @@ -672,7 +673,7 @@ int stdout_stream_install(Server *s, int fd, StdoutStream **ret) { r = getpeercred(fd, &stream->ucred); if (r < 0) - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, "Failed to determine peer credentials: %m"); + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to determine peer credentials: %m"); r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true); if (r < 0) @@ -681,18 +682,18 @@ int stdout_stream_install(Server *s, int fd, StdoutStream **ret) { if (mac_selinux_use()) { r = getpeersec(fd, &stream->label); if (r < 0 && r != -EOPNOTSUPP) - (void) log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, "Failed to determine peer security context: %m"); + (void) log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to determine peer security context: %m"); } (void) shutdown(fd, SHUT_WR); r = sd_event_add_io(s->event, &stream->event_source, fd, EPOLLIN, stdout_stream_process, stream); if (r < 0) - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, "Failed to add stream to event loop: %m"); + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to add stream to event loop: %m"); r = sd_event_source_set_priority(stream->event_source, SD_EVENT_PRIORITY_NORMAL+5); if (r < 0) - return log_ratelimit_error_errno(r, JOURNALD_LOG_RATELIMIT, "Failed to adjust stdout event source priority: %m"); + return log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to adjust stdout event source priority: %m"); stream->fd = fd; @@ -724,7 +725,7 @@ static int stdout_stream_new(sd_event_source *es, int listen_fd, uint32_t revent if (ERRNO_IS_ACCEPT_AGAIN(errno)) return 0; - return log_ratelimit_error_errno(errno, JOURNALD_LOG_RATELIMIT, "Failed to accept stdout connection: %m"); + return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to accept stdout connection: %m"); } if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) { diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c index 6394adfdfd..d8708b0775 100644 --- a/src/journal/journald-syslog.c +++ b/src/journal/journald-syslog.c @@ -10,6 +10,7 @@ #include "fd-util.h" #include "format-util.h" #include "io-util.h" +#include "journal-internal.h" #include "journald-console.h" #include "journald-kmsg.h" #include "journald-server.h" @@ -334,7 +335,7 @@ void server_process_syslog_message( if (ucred && pid_is_valid(ucred->pid)) { r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context); if (r < 0) - log_ratelimit_warning_errno(r, JOURNALD_LOG_RATELIMIT, + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid); } diff --git a/src/kernel-install/90-uki-copy.install b/src/kernel-install/90-uki-copy.install new file mode 100755 index 0000000000..d6e3deb723 --- /dev/null +++ b/src/kernel-install/90-uki-copy.install @@ -0,0 +1,97 @@ +#!/bin/sh +# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- +# ex: ts=8 sw=4 sts=4 et filetype=sh +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. +# +# systemd is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with systemd; If not, see <https://www.gnu.org/licenses/>. + +set -e + +COMMAND="${1:?}" +KERNEL_VERSION="${2:?}" +# shellcheck disable=SC2034 +ENTRY_DIR_ABS="$3" +KERNEL_IMAGE="$4" + +[ "$KERNEL_INSTALL_LAYOUT" = "uki" ] || exit 0 + +ENTRY_TOKEN="$KERNEL_INSTALL_ENTRY_TOKEN" +BOOT_ROOT="$KERNEL_INSTALL_BOOT_ROOT" + +UKI_DIR="$BOOT_ROOT/EFI/Linux" + +case "$COMMAND" in + remove) + [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \ + echo "Removing $UKI_DIR/$ENTRY_TOKEN-$KERNEL_VERSION*.efi" + exec rm -f \ + "$UKI_DIR/$ENTRY_TOKEN-$KERNEL_VERSION.efi" \ + "$UKI_DIR/$ENTRY_TOKEN-$KERNEL_VERSION+"*".efi" + ;; + add) + ;; + *) + exit 0 + ;; +esac + +if ! [ -d "$UKI_DIR" ]; then + echo "Error: entry directory '$UKI_DIR' does not exist" >&2 + exit 1 +fi + +TRIES_FILE="${KERNEL_INSTALL_CONF_ROOT:-/etc/kernel}/tries" + +if [ -f "$TRIES_FILE" ]; then + read -r TRIES <"$TRIES_FILE" + if ! echo "$TRIES" | grep -q '^[0-9][0-9]*$'; then + echo "$TRIES_FILE does not contain an integer." >&2 + exit 1 + fi + UKI_FILE="$UKI_DIR/$ENTRY_TOKEN-$KERNEL_VERSION+$TRIES.efi" +else + UKI_FILE="$UKI_DIR/$ENTRY_TOKEN-$KERNEL_VERSION.efi" +fi + +# If there is a UKI named uki.efi on the staging area use that, if not use what +# was passed in as $KERNEL_IMAGE but insist it has a .efi extension +if [ -f "$KERNEL_INSTALL_STAGING_AREA/uki.efi" ]; then + [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && echo "Installing $KERNEL_INSTALL_STAGING_AREA/uki.efi" + install -m 0644 "$KERNEL_INSTALL_STAGING_AREA/uki.efi" "$UKI_FILE" || { + echo "Error: could not copy '$KERNEL_INSTALL_STAGING_AREA/uki.efi' to '$UKI_FILE'." >&2 + exit 1 + } +elif [ -n "$KERNEL_IMAGE" ]; then + [ -f "$KERNEL_IMAGE" ] || { + echo "Error: UKI '$KERNEL_IMAGE' not a file." >&2 + exit 1 + } + [ "$KERNEL_IMAGE" != "${KERNEL_IMAGE%*.efi}.efi" ] && { + echo "Error: $KERNEL_IMAGE is missing .efi suffix." >&2 + exit 1 + } + [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && echo "Installing $KERNEL_IMAGE" + install -m 0644 "$KERNEL_IMAGE" "$UKI_FILE" || { + echo "Error: could not copy '$KERNEL_IMAGE' to '$UKI_FILE'." >&2 + exit 1 + } +else + [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && echo "No UKI available. Nothing to do." + exit 0 +fi +chown root:root "$UKI_FILE" || : + +exit 0 diff --git a/src/kernel-install/meson.build b/src/kernel-install/meson.build index e5cfdb824c..b0b6c27ede 100644 --- a/src/kernel-install/meson.build +++ b/src/kernel-install/meson.build @@ -12,6 +12,8 @@ loaderentry_install = custom_target( install_mode : 'rwxr-xr-x', install_dir : kernelinstalldir) +uki_copy_install = files('90-uki-copy.install') + if want_kernel_install install_data('50-depmod.install', install_mode : 'rwxr-xr-x', diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index fa98c8946e..299a6a2c8c 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -3177,7 +3177,7 @@ static int event_arm_timer( assert_se(d->fd >= 0); if (t == 0) { - /* We don' want to disarm here, just mean some time looooong ago. */ + /* We don't want to disarm here, just mean some time looooong ago. */ its.it_value.tv_sec = 0; its.it_value.tv_nsec = 1; } else diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 9084da41e3..507958dabd 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -23,6 +23,7 @@ #include "journal-authenticate.h" #include "journal-def.h" #include "journal-file.h" +#include "journal-internal.h" #include "lookup3.h" #include "memory-util.h" #include "path-util.h" @@ -3582,22 +3583,24 @@ static int journal_file_warn_btrfs(JournalFile *f) { r = fd_is_fs_type(f->fd, BTRFS_SUPER_MAGIC); if (r < 0) - return log_warning_errno(r, "Failed to determine if journal is on btrfs: %m"); + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to determine if journal is on btrfs: %m"); if (!r) return 0; r = read_attr_fd(f->fd, &attrs); if (r < 0) - return log_warning_errno(r, "Failed to read file attributes: %m"); + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to read file attributes: %m"); if (attrs & FS_NOCOW_FL) { log_debug("Detected btrfs file system with copy-on-write disabled, all is good."); return 0; } - log_notice("Creating journal file %s on a btrfs file system, and copy-on-write is enabled. " - "This is likely to slow down journal access substantially, please consider turning " - "off the copy-on-write file attribute on the journal directory, using chattr +C.", f->path); + log_ratelimit_notice(JOURNAL_LOG_RATELIMIT, + "Creating journal file %s on a btrfs file system, and copy-on-write is enabled. " + "This is likely to slow down journal access substantially, please consider turning " + "off the copy-on-write file attribute on the journal directory, using chattr +C.", + f->path); return 1; } @@ -4161,10 +4164,6 @@ int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, u return 1; } -/* Ideally this would be a function parameter but initializers for static fields have to be compile - * time constants so we hardcode the interval instead. */ -#define LOG_RATELIMIT ((const RateLimit) { .interval = 60 * USEC_PER_SEC, .burst = 3 }) - bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log_level) { assert(f); assert(f->header); @@ -4172,7 +4171,8 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log /* If we gained new header fields we gained new features, * hence suggest a rotation */ if (le64toh(f->header->header_size) < sizeof(Header)) { - log_full(log_level, "%s uses an outdated header, suggesting rotation.", f->path); + log_ratelimit_full(log_level, JOURNAL_LOG_RATELIMIT, + "%s uses an outdated header, suggesting rotation.", f->path); return true; } @@ -4183,7 +4183,7 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log if (JOURNAL_HEADER_CONTAINS(f->header, n_data)) if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) { log_ratelimit_full( - log_level, LOG_RATELIMIT, + log_level, JOURNAL_LOG_RATELIMIT, "Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.", f->path, 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))), @@ -4197,7 +4197,7 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log if (JOURNAL_HEADER_CONTAINS(f->header, n_fields)) if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) { log_ratelimit_full( - log_level, LOG_RATELIMIT, + log_level, JOURNAL_LOG_RATELIMIT, "Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.", f->path, 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))), @@ -4211,7 +4211,7 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) && le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) { log_ratelimit_full( - log_level, LOG_RATELIMIT, + log_level, JOURNAL_LOG_RATELIMIT, "Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.", f->path, le64toh(f->header->data_hash_chain_depth)); return true; @@ -4220,7 +4220,7 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) && le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) { log_ratelimit_full( - log_level, LOG_RATELIMIT, + log_level, JOURNAL_LOG_RATELIMIT, "Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.", f->path, le64toh(f->header->field_hash_chain_depth)); return true; @@ -4232,7 +4232,7 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log le64toh(f->header->n_data) > 0 && le64toh(f->header->n_fields) == 0) { log_ratelimit_full( - log_level, LOG_RATELIMIT, + log_level, JOURNAL_LOG_RATELIMIT, "Data objects of %s are not indexed by field objects, suggesting rotation.", f->path); return true; @@ -4246,7 +4246,7 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log if (h > 0 && t > h + max_file_usec) { log_ratelimit_full( - log_level, LOG_RATELIMIT, + log_level, JOURNAL_LOG_RATELIMIT, "Oldest entry in %s is older than the configured file retention duration (%s), suggesting rotation.", f->path, FORMAT_TIMESPAN(max_file_usec, USEC_PER_SEC)); return true; diff --git a/src/libsystemd/sd-journal/journal-internal.h b/src/libsystemd/sd-journal/journal-internal.h index 7fc6896522..ed052d1b89 100644 --- a/src/libsystemd/sd-journal/journal-internal.h +++ b/src/libsystemd/sd-journal/journal-internal.h @@ -14,6 +14,8 @@ #include "list.h" #include "set.h" +#define JOURNAL_LOG_RATELIMIT ((const RateLimit) { .interval = 60 * USEC_PER_SEC, .burst = 3 }) + typedef struct Match Match; typedef struct Location Location; typedef struct Directory Directory; diff --git a/src/libsystemd/sd-journal/journal-vacuum.c b/src/libsystemd/sd-journal/journal-vacuum.c index eac3500202..7b5e0fa65f 100644 --- a/src/libsystemd/sd-journal/journal-vacuum.c +++ b/src/libsystemd/sd-journal/journal-vacuum.c @@ -13,6 +13,7 @@ #include "fs-util.h" #include "journal-def.h" #include "journal-file.h" +#include "journal-internal.h" #include "journal-vacuum.h" #include "sort-util.h" #include "string-util.h" @@ -251,7 +252,9 @@ int journal_directory_vacuum( freed += size; } else if (r != -ENOENT) - log_warning_errno(r, "Failed to delete empty archived journal %s/%s: %m", directory, p); + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, + "Failed to delete empty archived journal %s/%s: %m", + directory, p); continue; } @@ -299,7 +302,9 @@ int journal_directory_vacuum( sum = 0; } else if (r != -ENOENT) - log_warning_errno(r, "Failed to delete archived journal %s/%s: %m", directory, list[i].filename); + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, + "Failed to delete archived journal %s/%s: %m", + directory, list[i].filename); } if (oldest_usec && i < n_list && (*oldest_usec == 0 || list[i].realtime < *oldest_usec)) diff --git a/src/libsystemd/sd-netlink/netlink-internal.h b/src/libsystemd/sd-netlink/netlink-internal.h index 514f22511c..bca13bce57 100644 --- a/src/libsystemd/sd-netlink/netlink-internal.h +++ b/src/libsystemd/sd-netlink/netlink-internal.h @@ -7,6 +7,7 @@ #include "list.h" #include "netlink-types.h" +#include "ordered-set.h" #include "prioq.h" #include "time-util.h" @@ -72,11 +73,9 @@ struct sd_netlink { Hashmap *broadcast_group_refs; bool broadcast_group_dont_leave:1; /* until we can rely on 4.2 */ - sd_netlink_message **rqueue; - unsigned rqueue_size; - - sd_netlink_message **rqueue_partial; - unsigned rqueue_partial_size; + OrderedSet *rqueue; + Hashmap *rqueue_by_serial; + Hashmap *rqueue_partial_by_serial; struct nlmsghdr *rbuffer; @@ -148,8 +147,6 @@ void message_seal(sd_netlink_message *m); int netlink_open_family(sd_netlink **ret, int family); bool netlink_pid_changed(sd_netlink *nl); -int netlink_rqueue_make_room(sd_netlink *nl); -int netlink_rqueue_partial_make_room(sd_netlink *nl); int socket_bind(sd_netlink *nl); int socket_broadcast_group_ref(sd_netlink *nl, unsigned group); diff --git a/src/libsystemd/sd-netlink/netlink-socket.c b/src/libsystemd/sd-netlink/netlink-socket.c index 1da459c014..96162963a7 100644 --- a/src/libsystemd/sd-netlink/netlink-socket.c +++ b/src/libsystemd/sd-netlink/netlink-socket.c @@ -180,11 +180,12 @@ int socket_write_message(sd_netlink *nl, sd_netlink_message *m) { return k; } -static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_group, bool peek) { +static int socket_recv_message(int fd, void *buf, size_t buf_size, uint32_t *ret_mcast_group, bool peek) { + struct iovec iov = IOVEC_MAKE(buf, buf_size); union sockaddr_union sender; CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct nl_pktinfo))) control; struct msghdr msg = { - .msg_iov = iov, + .msg_iov = &iov, .msg_iovlen = 1, .msg_name = &sender, .msg_namelen = sizeof(sender), @@ -194,14 +195,17 @@ static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_gr ssize_t n; assert(fd >= 0); - assert(iov); + assert(peek || (buf && buf_size > 0)); n = recvmsg_safe(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0)); if (n < 0) { if (n == -ENOBUFS) return log_debug_errno(n, "sd-netlink: kernel receive buffer overrun"); - if (ERRNO_IS_TRANSIENT(n)) + if (ERRNO_IS_TRANSIENT(n)) { + if (ret_mcast_group) + *ret_mcast_group = 0; return 0; + } return (int) n; } @@ -216,9 +220,14 @@ static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_gr return (int) n; } + if (ret_mcast_group) + *ret_mcast_group = 0; return 0; } + if (!peek && (size_t) n > buf_size) /* message did not fit in read buffer */ + return -EIO; + if (ret_mcast_group) { struct nl_pktinfo *pi; @@ -232,151 +241,221 @@ static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_gr return (int) n; } -/* On success, the number of bytes received is returned and *ret points to the received message - * which has a valid header and the correct size. - * If nothing useful was received 0 is returned. - * On failure, a negative error code is returned. - */ -int socket_read_message(sd_netlink *nl) { - _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL; - bool multi_part = false, done = false; - size_t len, allocated; - struct iovec iov = {}; - uint32_t group = 0; - unsigned i = 0; +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + netlink_message_hash_ops, + void, trivial_hash_func, trivial_compare_func, + sd_netlink_message, sd_netlink_message_unref); + +static int netlink_queue_received_message(sd_netlink *nl, sd_netlink_message *m) { + uint32_t serial; int r; assert(nl); - assert(nl->rbuffer); + assert(m); - /* read nothing, just get the pending message size */ - r = socket_recv_message(nl->fd, &iov, NULL, true); - if (r <= 0) + if (ordered_set_size(nl->rqueue) >= NETLINK_RQUEUE_MAX) + return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS), + "sd-netlink: exhausted the read queue size (%d)", NETLINK_RQUEUE_MAX); + + r = ordered_set_ensure_put(&nl->rqueue, &netlink_message_hash_ops, m); + if (r < 0) return r; - else - len = (size_t) r; - /* make room for the pending message */ - if (!greedy_realloc((void**) &nl->rbuffer, len, sizeof(uint8_t))) - return -ENOMEM; + sd_netlink_message_ref(m); - allocated = MALLOC_SIZEOF_SAFE(nl->rbuffer); - iov = IOVEC_MAKE(nl->rbuffer, allocated); + if (sd_netlink_message_is_broadcast(m)) + return 0; - /* read the pending message */ - r = socket_recv_message(nl->fd, &iov, &group, false); - if (r <= 0) - return r; - else - len = (size_t) r; + serial = message_get_serial(m); + if (serial == 0) + return 0; - if (len > allocated) - /* message did not fit in read buffer */ - return -EIO; + if (sd_netlink_message_get_errno(m) < 0) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *old = NULL; - if (NLMSG_OK(nl->rbuffer, len) && nl->rbuffer->nlmsg_flags & NLM_F_MULTI) { - multi_part = true; + old = hashmap_remove(nl->rqueue_by_serial, UINT32_TO_PTR(serial)); + if (old) + log_debug("sd-netlink: received error message with serial %"PRIu32", but another message with " + "the same serial is already stored in the read queue, replacing.", serial); + } - for (i = 0; i < nl->rqueue_partial_size; i++) - if (message_get_serial(nl->rqueue_partial[i]) == - nl->rbuffer->nlmsg_seq) { - first = nl->rqueue_partial[i]; - break; - } + r = hashmap_ensure_put(&nl->rqueue_by_serial, &netlink_message_hash_ops, UINT32_TO_PTR(serial), m); + if (r == -EEXIST) { + if (!sd_netlink_message_is_error(m)) + log_debug("sd-netlink: received message with serial %"PRIu32", but another message with " + "the same serial is already stored in the read queue, ignoring.", serial); + return 0; + } + if (r < 0) { + sd_netlink_message_unref(ordered_set_remove(nl->rqueue, m)); + return r; } - for (struct nlmsghdr *new_msg = nl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) { - _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; - size_t size; + sd_netlink_message_ref(m); + return 0; +} - if (group == 0 && new_msg->nlmsg_pid != nl->sockaddr.nl.nl_pid) - /* not broadcast and not for us */ - continue; +static int netlink_queue_partially_received_message(sd_netlink *nl, sd_netlink_message *m) { + uint32_t serial; + int r; - if (new_msg->nlmsg_type == NLMSG_NOOP) - /* silently drop noop messages */ - continue; + assert(nl); + assert(m); + assert(m->hdr->nlmsg_flags & NLM_F_MULTI); - if (new_msg->nlmsg_type == NLMSG_DONE) { - /* finished reading multi-part message */ - done = true; + if (hashmap_size(nl->rqueue_partial_by_serial) >= NETLINK_RQUEUE_MAX) + return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS), + "sd-netlink: exhausted the partial read queue size (%d)", NETLINK_RQUEUE_MAX); - /* if first is not defined, put NLMSG_DONE into the receive queue. */ - if (first) - continue; - } + serial = message_get_serial(m); + r = hashmap_ensure_put(&nl->rqueue_partial_by_serial, &netlink_message_hash_ops, UINT32_TO_PTR(serial), m); + if (r < 0) + return r; - /* check that we support this message type */ - r = netlink_get_policy_set_and_header_size(nl, new_msg->nlmsg_type, NULL, &size); - if (r < 0) { - if (r == -EOPNOTSUPP) - log_debug("sd-netlink: ignored message with unknown type: %i", - new_msg->nlmsg_type); + sd_netlink_message_ref(m); + return 0; +} - continue; - } +static int parse_message_one(sd_netlink *nl, uint32_t group, const struct nlmsghdr *hdr, sd_netlink_message **ret) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + size_t size; + int r; - /* check that the size matches the message type */ - if (new_msg->nlmsg_len < NLMSG_LENGTH(size)) { - log_debug("sd-netlink: message is shorter than expected, dropping"); - continue; - } + assert(nl); + assert(hdr); + assert(ret); + + /* not broadcast and not for us */ + if (group == 0 && hdr->nlmsg_pid != nl->sockaddr.nl.nl_pid) + goto finalize; + + /* silently drop noop messages */ + if (hdr->nlmsg_type == NLMSG_NOOP) + goto finalize; + + /* check that we support this message type */ + r = netlink_get_policy_set_and_header_size(nl, hdr->nlmsg_type, NULL, &size); + if (r == -EOPNOTSUPP) { + log_debug("sd-netlink: ignored message with unknown type: %i", hdr->nlmsg_type); + goto finalize; + } + if (r < 0) + return r; - r = message_new_empty(nl, &m); - if (r < 0) - return r; + /* check that the size matches the message type */ + if (hdr->nlmsg_len < NLMSG_LENGTH(size)) { + log_debug("sd-netlink: message is shorter than expected, dropping."); + goto finalize; + } - m->multicast_group = group; - m->hdr = memdup(new_msg, new_msg->nlmsg_len); - if (!m->hdr) - return -ENOMEM; + r = message_new_empty(nl, &m); + if (r < 0) + return r; - /* seal and parse the top-level message */ - r = sd_netlink_message_rewind(m, nl); - if (r < 0) - return r; + m->multicast_group = group; + m->hdr = memdup(hdr, hdr->nlmsg_len); + if (!m->hdr) + return -ENOMEM; - /* push the message onto the multi-part message stack */ - if (first) - m->next = first; - first = TAKE_PTR(m); - } + /* seal and parse the top-level message */ + r = sd_netlink_message_rewind(m, nl); + if (r < 0) + return r; - if (len > 0) - log_debug("sd-netlink: discarding %zu bytes of incoming message", len); + *ret = TAKE_PTR(m); + return 1; - if (!first) +finalize: + *ret = NULL; + return 0; +} + +/* On success, the number of bytes received is returned and *ret points to the received message + * which has a valid header and the correct size. + * If nothing useful was received 0 is returned. + * On failure, a negative error code is returned. + */ +int socket_read_message(sd_netlink *nl) { + bool done = false; + uint32_t group; + size_t len; + int r; + + assert(nl); + + /* read nothing, just get the pending message size */ + r = socket_recv_message(nl->fd, NULL, 0, NULL, true); + if (r <= 0) + return r; + len = (size_t) r; + + /* make room for the pending message */ + if (!greedy_realloc((void**) &nl->rbuffer, len, sizeof(uint8_t))) + return -ENOMEM; + + /* read the pending message */ + r = socket_recv_message(nl->fd, nl->rbuffer, MALLOC_SIZEOF_SAFE(nl->rbuffer), &group, false); + if (r <= 0) + return r; + len = (size_t) r; + + if (!NLMSG_OK(nl->rbuffer, len)) { + log_debug("sd-netlink: received invalid message, discarding %zu bytes of incoming message", len); return 0; + } + + for (struct nlmsghdr *hdr = nl->rbuffer; NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; - if (!multi_part || done) { - /* we got a complete message, push it on the read queue */ - r = netlink_rqueue_make_room(nl); + r = parse_message_one(nl, group, hdr, &m); if (r < 0) return r; + if (r == 0) + continue; - nl->rqueue[nl->rqueue_size++] = TAKE_PTR(first); + if (hdr->nlmsg_flags & NLM_F_MULTI) { + if (hdr->nlmsg_type == NLMSG_DONE) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *existing = NULL; - if (multi_part && (i < nl->rqueue_partial_size)) { - /* remove the message form the partial read queue */ - memmove(nl->rqueue_partial + i, nl->rqueue_partial + i + 1, - sizeof(sd_netlink_message*) * (nl->rqueue_partial_size - i - 1)); - nl->rqueue_partial_size--; - } + /* finished reading multi-part message */ + existing = hashmap_remove(nl->rqueue_partial_by_serial, UINT32_TO_PTR(hdr->nlmsg_seq)); + + /* if we receive only NLMSG_DONE, put it into the receive queue. */ + r = netlink_queue_received_message(nl, existing ?: m); + if (r < 0) + return r; + + done = true; + } else { + sd_netlink_message *existing; + + existing = hashmap_get(nl->rqueue_partial_by_serial, UINT32_TO_PTR(hdr->nlmsg_seq)); + if (existing) { + /* This is the continuation of the previously read messages. + * Let's append this message at the end. */ + while (existing->next) + existing = existing->next; + existing->next = TAKE_PTR(m); + } else { + /* This is the first message. Put it into the queue for partially + * received messages. */ + r = netlink_queue_partially_received_message(nl, m); + if (r < 0) + return r; + } + } - return 1; - } else { - /* we only got a partial multi-part message, push it on the - partial read queue */ - if (i < nl->rqueue_partial_size) - nl->rqueue_partial[i] = TAKE_PTR(first); - else { - r = netlink_rqueue_partial_make_room(nl); + } else { + r = netlink_queue_received_message(nl, m); if (r < 0) return r; - nl->rqueue_partial[nl->rqueue_partial_size++] = TAKE_PTR(first); + done = true; } - - return 0; } + + if (len > 0) + log_debug("sd-netlink: discarding trailing %zu bytes of incoming message", len); + + return done; } diff --git a/src/libsystemd/sd-netlink/netlink-util.c b/src/libsystemd/sd-netlink/netlink-util.c index 12cdc99ff2..c6091542d2 100644 --- a/src/libsystemd/sd-netlink/netlink-util.c +++ b/src/libsystemd/sd-netlink/netlink-util.c @@ -673,6 +673,15 @@ int netlink_open_family(sd_netlink **ret, int family) { return 0; } +static bool serial_used(sd_netlink *nl, uint32_t serial) { + assert(nl); + + return + hashmap_contains(nl->reply_callbacks, UINT32_TO_PTR(serial)) || + hashmap_contains(nl->rqueue_by_serial, UINT32_TO_PTR(serial)) || + hashmap_contains(nl->rqueue_partial_by_serial, UINT32_TO_PTR(serial)); +} + void netlink_seal_message(sd_netlink *nl, sd_netlink_message *m) { uint32_t picked; @@ -689,7 +698,7 @@ void netlink_seal_message(sd_netlink *nl, sd_netlink_message *m) { such messages */ nl->serial = nl->serial == UINT32_MAX ? 1 : nl->serial + 1; - } while (hashmap_contains(nl->reply_callbacks, UINT32_TO_PTR(picked))); + } while (serial_used(nl, picked)); m->hdr->nlmsg_seq = picked; message_seal(m); diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c index b99abae640..fe888926a1 100644 --- a/src/libsystemd/sd-netlink/sd-netlink.c +++ b/src/libsystemd/sd-netlink/sd-netlink.c @@ -61,10 +61,6 @@ static int netlink_new(sd_netlink **ret) { .serial = (uint32_t) (now(CLOCK_MONOTONIC) % UINT32_MAX) + 1, }; - /* We guarantee that the read buffer has at least space for a message header */ - if (!greedy_realloc((void**) &nl->rbuffer, sizeof(struct nlmsghdr), sizeof(uint8_t))) - return -ENOMEM; - *ret = TAKE_PTR(nl); return 0; } @@ -120,18 +116,12 @@ int sd_netlink_increase_rxbuf(sd_netlink *nl, size_t size) { static sd_netlink *netlink_free(sd_netlink *nl) { sd_netlink_slot *s; - unsigned i; assert(nl); - for (i = 0; i < nl->rqueue_size; i++) - sd_netlink_message_unref(nl->rqueue[i]); - free(nl->rqueue); - - for (i = 0; i < nl->rqueue_partial_size; i++) - sd_netlink_message_unref(nl->rqueue_partial[i]); - free(nl->rqueue_partial); - + ordered_set_free(nl->rqueue); + hashmap_free(nl->rqueue_by_serial); + hashmap_free(nl->rqueue_partial_by_serial); free(nl->rbuffer); while ((s = nl->slots)) { @@ -179,57 +169,28 @@ int sd_netlink_send( return 1; } -int netlink_rqueue_make_room(sd_netlink *nl) { - assert(nl); - - if (nl->rqueue_size >= NETLINK_RQUEUE_MAX) - return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS), - "sd-netlink: exhausted the read queue size (%d)", - NETLINK_RQUEUE_MAX); - - if (!GREEDY_REALLOC(nl->rqueue, nl->rqueue_size + 1)) - return -ENOMEM; - - return 0; -} - -int netlink_rqueue_partial_make_room(sd_netlink *nl) { - assert(nl); - - if (nl->rqueue_partial_size >= NETLINK_RQUEUE_MAX) - return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS), - "sd-netlink: exhausted the partial read queue size (%d)", - NETLINK_RQUEUE_MAX); - - if (!GREEDY_REALLOC(nl->rqueue_partial, nl->rqueue_partial_size + 1)) - return -ENOMEM; - - return 0; -} - -static int dispatch_rqueue(sd_netlink *nl, sd_netlink_message **message) { +static int dispatch_rqueue(sd_netlink *nl, sd_netlink_message **ret) { + sd_netlink_message *m; int r; assert(nl); - assert(message); + assert(ret); - if (nl->rqueue_size <= 0) { + if (ordered_set_size(nl->rqueue) <= 0) { /* Try to read a new message */ r = socket_read_message(nl); - if (r == -ENOBUFS) { /* FIXME: ignore buffer overruns for now */ + if (r == -ENOBUFS) /* FIXME: ignore buffer overruns for now */ log_debug_errno(r, "sd-netlink: Got ENOBUFS from netlink socket, ignoring."); - return 1; - } - if (r <= 0) + else if (r < 0) return r; } /* Dispatch a queued message */ - *message = nl->rqueue[0]; - nl->rqueue_size--; - memmove(nl->rqueue, nl->rqueue + 1, sizeof(sd_netlink_message*) * nl->rqueue_size); - - return 1; + m = ordered_set_steal_first(nl->rqueue); + if (m) + sd_netlink_message_unref(hashmap_remove_value(nl->rqueue_by_serial, UINT32_TO_PTR(message_get_serial(m)), m)); + *ret = m; + return !!m; } static int process_timeout(sd_netlink *nl) { @@ -469,7 +430,7 @@ int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) { assert_return(nl, -EINVAL); assert_return(!netlink_pid_changed(nl), -ECHILD); - if (nl->rqueue_size > 0) + if (ordered_set_size(nl->rqueue) > 0) return 0; r = netlink_poll(nl, false, timeout_usec); @@ -570,39 +531,32 @@ int sd_netlink_read( timeout = calc_elapse(usec); for (;;) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; usec_t left; - for (unsigned i = 0; i < nl->rqueue_size; i++) { - _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *incoming = NULL; - uint32_t received_serial; + m = hashmap_remove(nl->rqueue_by_serial, UINT32_TO_PTR(serial)); + if (m) { uint16_t type; - received_serial = message_get_serial(nl->rqueue[i]); - if (received_serial != serial) - continue; - - incoming = nl->rqueue[i]; - /* found a match, remove from rqueue and return it */ - memmove(nl->rqueue + i, nl->rqueue + i + 1, - sizeof(sd_netlink_message*) * (nl->rqueue_size - i - 1)); - nl->rqueue_size--; + sd_netlink_message_unref(ordered_set_remove(nl->rqueue, m)); - r = sd_netlink_message_get_errno(incoming); + r = sd_netlink_message_get_errno(m); if (r < 0) return r; - r = sd_netlink_message_get_type(incoming, &type); + r = sd_netlink_message_get_type(m, &type); if (r < 0) return r; if (type == NLMSG_DONE) { - *ret = NULL; + if (ret) + *ret = NULL; return 0; } if (ret) - *ret = TAKE_PTR(incoming); + *ret = TAKE_PTR(m); return 1; } @@ -656,7 +610,7 @@ int sd_netlink_get_events(sd_netlink *nl) { assert_return(nl, -EINVAL); assert_return(!netlink_pid_changed(nl), -ECHILD); - return nl->rqueue_size == 0 ? POLLIN : 0; + return ordered_set_size(nl->rqueue) == 0 ? POLLIN : 0; } int sd_netlink_get_timeout(sd_netlink *nl, uint64_t *timeout_usec) { @@ -666,7 +620,7 @@ int sd_netlink_get_timeout(sd_netlink *nl, uint64_t *timeout_usec) { assert_return(timeout_usec, -EINVAL); assert_return(!netlink_pid_changed(nl), -ECHILD); - if (nl->rqueue_size > 0) { + if (ordered_set_size(nl->rqueue) > 0) { *timeout_usec = 0; return 1; } @@ -678,7 +632,6 @@ int sd_netlink_get_timeout(sd_netlink *nl, uint64_t *timeout_usec) { } *timeout_usec = c->timeout; - return 1; } diff --git a/src/login/logind-dbus.c b/src/login/logind-dbus.c index 86a5decf3f..2ab26b9c6d 100644 --- a/src/login/logind-dbus.c +++ b/src/login/logind-dbus.c @@ -3970,6 +3970,12 @@ int manager_start_scope( if (r < 0) return r; + /* For login session scopes, if a process is OOM killed by the kernel, *don't* terminate the rest of + the scope */ + r = sd_bus_message_append(m, "(sv)", "OOMPolicy", "s", "continue"); + if (r < 0) + return r; + /* disable TasksMax= for the session scope, rely on the slice setting for it */ r = sd_bus_message_append(m, "(sv)", "TasksMax", "t", UINT64_MAX); if (r < 0) diff --git a/src/login/logind.c b/src/login/logind.c index cc153fd6bf..a564f94bfe 100644 --- a/src/login/logind.c +++ b/src/login/logind.c @@ -18,6 +18,7 @@ #include "daemon-util.h" #include "device-util.h" #include "dirent-util.h" +#include "escape.h" #include "fd-util.h" #include "format-util.h" #include "fs-util.h" @@ -299,11 +300,16 @@ static int manager_enumerate_linger_users(Manager *m) { FOREACH_DIRENT(de, d, return -errno) { int k; + _cleanup_free_ char *n = NULL; if (!dirent_is_file(de)) continue; - - k = manager_add_user_by_name(m, de->d_name, NULL); + k = cunescape(de->d_name, 0, &n); + if (k < 0) { + r = log_warning_errno(k, "Failed to unescape username '%s', ignoring: %m", de->d_name); + continue; + } + k = manager_add_user_by_name(m, n, NULL); if (k < 0) r = log_warning_errno(k, "Couldn't add lingering user %s, ignoring: %m", de->d_name); } diff --git a/src/login/systemd-user.in b/src/login/systemd-user.in index 39bcbd71fe..06f7e36458 100644 --- a/src/login/systemd-user.in +++ b/src/login/systemd-user.in @@ -4,18 +4,19 @@ # Used by systemd --user instances. {% if ENABLE_HOMED %} --account sufficient pam_systemd_home.so +-account sufficient pam_systemd_home.so {% endif %} -account sufficient pam_unix.so no_pass_expiry -account required pam_permit.so +account sufficient pam_unix.so no_pass_expiry +account required pam_permit.so {% if HAVE_SELINUX %} -session required pam_selinux.so close -session required pam_selinux.so nottys open +session required pam_selinux.so close +session required pam_selinux.so nottys open {% endif %} -session required pam_loginuid.so -session optional pam_keyinit.so force revoke +session required pam_loginuid.so +session optional pam_keyinit.so force revoke +session required pam_namespace.so {% if ENABLE_HOMED %} --session optional pam_systemd_home.so +-session optional pam_systemd_home.so {% endif %} -session optional pam_systemd.so +session optional pam_systemd.so diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c index e2d0784857..56dd22d757 100644 --- a/src/machine/machined-dbus.c +++ b/src/machine/machined-dbus.c @@ -863,7 +863,7 @@ static int method_set_pool_limit(sd_bus_message *message, void *userdata, sd_bus return 1; /* Will call us back */ /* Set up the machine directory if necessary */ - r = setup_machine_directory(error); + r = setup_machine_directory(error, /* use_btrfs_subvol= */ true, /* use_btrfs_quota= */ true); if (r < 0) return r; diff --git a/src/network/networkd-address.c b/src/network/networkd-address.c index 107f19fd15..5b3b7d128a 100644 --- a/src/network/networkd-address.c +++ b/src/network/networkd-address.c @@ -309,6 +309,14 @@ DEFINE_PRIVATE_HASH_OPS( address_kernel_hash_func, address_kernel_compare_func); +DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR( + address_kernel_hash_ops_free, + Address, + address_kernel_hash_func, + address_kernel_compare_func, + address_free); + +/* The functions below are mainly used by managing Request. */ static void address_hash_func(const Address *a, struct siphash *state) { assert(a); @@ -367,12 +375,37 @@ int address_compare_func(const Address *a1, const Address *a2) { return 0; } -DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR( - address_hash_ops_free, - Address, - address_hash_func, - address_compare_func, - address_free); +int address_equal(const Address *a1, const Address *a2) { + if (a1 == a2) + return true; + + if (!a1 || !a2) + return false; + + return address_compare_func(a1, a2) == 0; +} + +static int address_equalify(Address *address, const Address *src) { + int r; + + assert(address); + assert(src); + + if (address_kernel_compare_func(address, src) != 0) + return -EINVAL; + + if (address->family == AF_INET) { + address->broadcast = src->broadcast; + r = free_and_strdup(&address->label, src->label); + if (r < 0) + return r; + } else { + address->prefixlen = src->prefixlen; + address->in_addr_peer = src->in_addr_peer; + } + + return 0; +} int address_dup(const Address *src, Address **ret) { _cleanup_(address_freep) Address *dest = NULL; @@ -451,7 +484,7 @@ static int address_add(Link *link, Address *address) { assert(link); assert(address); - r = set_ensure_put(&link->addresses, &address_hash_ops_free, address); + r = set_ensure_put(&link->addresses, &address_kernel_hash_ops_free, address); if (r < 0) return r; if (r == 0) @@ -462,14 +495,9 @@ static int address_add(Link *link, Address *address) { } static int address_update(Address *address) { - Link *link; + Link *link = ASSERT_PTR(ASSERT_PTR(address)->link); int r; - assert(address); - assert(address->link); - - link = address->link; - if (address_is_ready(address) && address->family == AF_INET6 && in6_addr_is_link_local(&address->in_addr.in6) && @@ -485,7 +513,7 @@ static int address_update(Address *address) { if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER)) return 0; - r = address_set_masquerade(address, true); + r = address_set_masquerade(address, /* add = */ true); if (r < 0) return log_link_warning_errno(link, r, "Could not enable IP masquerading: %m"); @@ -497,23 +525,16 @@ static int address_update(Address *address) { return r; } - link_update_operstate(link, true); + link_update_operstate(link, /* also_update_master = */ true); link_check_ready(link); return 0; } static int address_drop(Address *address) { - Link *link; - bool ready; + Link *link = ASSERT_PTR(ASSERT_PTR(address)->link); int r; - assert(address); - assert(address->link); - - ready = address_is_ready(address); - link = address->link; - - r = address_set_masquerade(address, false); + r = address_set_masquerade(address, /* add = */ false); if (r < 0) log_link_warning_errno(link, r, "Failed to disable IP masquerading, ignoring: %m"); @@ -522,11 +543,8 @@ static int address_drop(Address *address) { if (address->state == 0) address_free(address); - link_update_operstate(link, true); - - if (link && !ready) - link_check_ready(link); - + link_update_operstate(link, /* also_update_master = */ true); + link_check_ready(link); return 0; } @@ -557,10 +575,10 @@ int link_get_address(Link *link, int family, const union in_addr_union *address, * and does not have peer address. When the prefixlen is zero, then an Address object with an * arbitrary prefixlen will be returned. */ - if (prefixlen != 0) { + if (family == AF_INET6 || prefixlen != 0) { _cleanup_(address_freep) Address *tmp = NULL; - /* If prefixlen is set, then we can use address_get(). */ + /* In this case, we can use address_get(). */ r = address_new(&tmp); if (r < 0) @@ -569,20 +587,24 @@ int link_get_address(Link *link, int family, const union in_addr_union *address, tmp->family = family; tmp->in_addr = *address; tmp->prefixlen = prefixlen; - address_set_broadcast(tmp, link); - if (address_get(link, tmp, &a) >= 0) { - if (ret) - *ret = a; + r = address_get(link, tmp, &a); + if (r < 0) + return r; - return 0; + if (family == AF_INET6) { + /* IPv6 addresses are managed without peer address and prefix length. Hence, we need + * to check them explicitly. */ + if (in_addr_is_set(family, &a->in_addr_peer)) + return -ENOENT; + if (prefixlen != 0 && a->prefixlen != prefixlen) + return -ENOENT; } - if (family == AF_INET6) - return -ENOENT; + if (ret) + *ret = a; - /* IPv4 addresses may have label and/or non-default broadcast address. - * Hence, we need to always fallback below. */ + return 0; } SET_FOREACH(a, link->addresses) { @@ -940,7 +962,11 @@ int link_drop_foreign_addresses(Link *link) { ORDERED_HASHMAP_FOREACH(address, link->network->addresses_by_section) { Address *existing; - if (address_get(link, address, &existing) >= 0) + /* On update, the kernel ignores the address label and broadcast address. Hence we need to + * distinguish addresses with different labels or broadcast addresses. Thus, we need to check + * the existing address with address_equal(). Otherwise, the label or broadcast address + * change will not be applied when we reconfigure the interface. */ + if (address_get(link, address, &existing) >= 0 && address_equal(address, existing)) address_unmark(existing); } @@ -1214,6 +1240,9 @@ int link_request_address( existing = TAKE_PTR(tmp); } else { + r = address_equalify(existing, address); + if (r < 0) + return r; existing->source = address->source; existing->provider = address->provider; existing->duplicate_address_detection = address->duplicate_address_detection; @@ -1483,6 +1512,12 @@ int manager_rtnl_process_address(sd_netlink *rtnl, sd_netlink_message *message, case RTM_NEWADDR: if (address) { /* update flags and etc. */ + r = address_equalify(address, tmp); + if (r < 0) { + log_link_warning_errno(link, r, "Failed to update properties of address %s, ignoring: %m", + IN_ADDR_PREFIX_TO_STRING(address->family, &address->in_addr, address->prefixlen)); + return 0; + } address->flags = tmp->flags; address->scope = tmp->scope; address_set_lifetime(m, address, &cinfo); @@ -2100,9 +2135,8 @@ int network_drop_invalid_addresses(Network *network) { address_free(dup); } - /* Use address_kernel_hash_ops here. The function address_kernel_compare_func() matches - * how kernel compares addresses, and is more lenient than address_compare_func(). - * Hence, the logic of dedup here is stricter than when address_hash_ops is used. */ + /* Use address_kernel_hash_ops, instead of address_kernel_hash_ops_free. Otherwise, the + * Address objects will be freed. */ r = set_ensure_put(&addresses, &address_kernel_hash_ops, address); if (r < 0) return log_oom(); diff --git a/src/network/networkd-address.h b/src/network/networkd-address.h index 7a1e44632d..89b9621791 100644 --- a/src/network/networkd-address.h +++ b/src/network/networkd-address.h @@ -118,6 +118,7 @@ int manager_rtnl_process_address(sd_netlink *nl, sd_netlink_message *message, Ma int network_drop_invalid_addresses(Network *network); int address_compare_func(const Address *a1, const Address *a2); +int address_equal(const Address *a1, const Address *a2); DEFINE_NETWORK_CONFIG_STATE_FUNCTIONS(Address, address); diff --git a/src/network/networkd-dhcp6.c b/src/network/networkd-dhcp6.c index d6ec233351..c44c37f3aa 100644 --- a/src/network/networkd-dhcp6.c +++ b/src/network/networkd-dhcp6.c @@ -163,8 +163,7 @@ static int verify_dhcp6_address(Link *link, const Address *address) { const char *pretty = IN6_ADDR_TO_STRING(&address->in_addr.in6); - if (address_get(link, address, &existing) < 0 && - link_get_address(link, AF_INET6, &address->in_addr, 0, &existing) < 0) { + if (address_get(link, address, &existing) < 0) { /* New address. */ log_level = LOG_INFO; goto simple_log; diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c index ce7dff222b..c7ed5fcfe1 100644 --- a/src/network/networkd-ndisc.c +++ b/src/network/networkd-ndisc.c @@ -168,6 +168,7 @@ static void ndisc_set_route_priority(Link *link, Route *route) { static int ndisc_request_route(Route *in, Link *link, sd_ndisc_router *rt) { _cleanup_(route_freep) Route *route = in; struct in6_addr router; + bool is_new; int r; assert(route); @@ -186,11 +187,16 @@ static int ndisc_request_route(Route *in, Link *link, sd_ndisc_router *rt) { if (!route->protocol_set) route->protocol = RTPROT_RA; - if (route_get(NULL, link, route, NULL) < 0) + is_new = route_get(NULL, link, route, NULL) < 0; + + r = link_request_route(link, TAKE_PTR(route), true, &link->ndisc_messages, + ndisc_route_handler, NULL); + if (r < 0) + return r; + if (r > 0 && is_new) link->ndisc_configured = false; - return link_request_route(link, TAKE_PTR(route), true, &link->ndisc_messages, - ndisc_route_handler, NULL); + return 0; } static int ndisc_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Request *req, Link *link, Address *address) { @@ -212,6 +218,7 @@ static int ndisc_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Reques static int ndisc_request_address(Address *in, Link *link, sd_ndisc_router *rt) { _cleanup_(address_freep) Address *address = in; struct in6_addr router; + bool is_new; int r; assert(address); @@ -229,11 +236,16 @@ static int ndisc_request_address(Address *in, Link *link, sd_ndisc_router *rt) { if (r < 0) return r; - if (address_get(link, address, NULL) < 0) - link->ndisc_configured = false; + is_new = address_get(link, address, NULL) < 0; - return link_request_address(link, TAKE_PTR(address), true, &link->ndisc_messages, + r = link_request_address(link, TAKE_PTR(address), true, &link->ndisc_messages, ndisc_address_handler, NULL); + if (r < 0) + return r; + if (r > 0 && is_new) + link->ndisc_configured = false; + + return 0; } static int ndisc_router_process_default(Link *link, sd_ndisc_router *rt) { @@ -442,7 +454,6 @@ static int ndisc_router_process_onlink_prefix(Link *link, sd_ndisc_router *rt) { return log_oom(); route->family = AF_INET6; - route->flags = RTM_F_PREFIX; route->dst.in6 = prefix; route->dst_prefixlen = prefixlen; route->lifetime_usec = sec_to_usec(lifetime_sec, timestamp_usec); diff --git a/src/network/networkd-setlink.c b/src/network/networkd-setlink.c index b6aaa1e9db..541c4b8a72 100644 --- a/src/network/networkd-setlink.c +++ b/src/network/networkd-setlink.c @@ -502,6 +502,14 @@ static int link_is_ready_to_set_link(Link *link, Request *req) { r = link_down_now(link); if (r < 0) return r; + + /* If the kind of the link is "bond", we need + * set the slave link down as well. */ + if (streq_ptr(link->kind, "bond")) { + r = link_down_slave_links(link); + if (r < 0) + return r; + } } break; @@ -1226,6 +1234,21 @@ int link_down_now(Link *link) { return 0; } +int link_down_slave_links(Link *link) { + Link *slave; + int r; + + assert(link); + + SET_FOREACH(slave, link->slaves) { + r = link_down_now(slave); + if (r < 0) + return r; + } + + return 0; +} + static int link_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) { int r; diff --git a/src/network/networkd-setlink.h b/src/network/networkd-setlink.h index 7e5ba32ef1..841e5eeb9c 100644 --- a/src/network/networkd-setlink.h +++ b/src/network/networkd-setlink.h @@ -25,4 +25,5 @@ int link_request_to_activate(Link *link); int link_request_to_bring_up_or_down(Link *link, bool up); int link_down_now(Link *link); +int link_down_slave_links(Link *link); int link_remove(Link *link); diff --git a/src/network/networkd-util.h b/src/network/networkd-util.h index e8c390196e..f75fb1f868 100644 --- a/src/network/networkd-util.h +++ b/src/network/networkd-util.h @@ -91,12 +91,14 @@ int network_config_state_to_string_alloc(NetworkConfigState s, char **ret); 0); \ } \ static inline bool name##_is_requesting(type *t) { \ + assert(t); \ return FLAGS_SET(t->state, NETWORK_CONFIG_STATE_REQUESTING); \ } \ static inline void name##_enter_configuring(type *t) { \ name##_update_state(t, \ NETWORK_CONFIG_STATE_REQUESTING | \ - NETWORK_CONFIG_STATE_CONFIGURING, \ + NETWORK_CONFIG_STATE_CONFIGURING | \ + NETWORK_CONFIG_STATE_REMOVING, \ NETWORK_CONFIG_STATE_CONFIGURING); \ } \ static inline void name##_enter_configured(type *t) { \ diff --git a/src/network/test-network.c b/src/network/test-network.c index 0145c8b7c7..250ab9eff4 100644 --- a/src/network/test-network.c +++ b/src/network/test-network.c @@ -174,16 +174,6 @@ static int test_load_config(Manager *manager) { return 0; } -static bool address_equal(const Address *a1, const Address *a2) { - if (a1 == a2) - return true; - - if (!a1 || !a2) - return false; - - return address_compare_func(a1, a2) == 0; -} - static void test_address_equality(void) { _cleanup_(address_freep) Address *a1 = NULL, *a2 = NULL; diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index a54f1464ba..0e8aaa1e3c 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -13,6 +13,7 @@ #include "mkdir-label.h" #include "mount-util.h" #include "mountpoint-util.h" +#include "namespace-util.h" #include "nspawn-mount.h" #include "parse-util.h" #include "path-util.h" @@ -510,6 +511,9 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL); } +#define PROC_DEFAULT_MOUNT_FLAGS (MS_NOSUID|MS_NOEXEC|MS_NODEV) +#define SYS_DEFAULT_MOUNT_FLAGS (MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV) + int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, @@ -538,7 +542,7 @@ int mount_all(const char *dest, static const MountPoint mount_table[] = { /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing) */ - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "proc", "/proc", "proc", NULL, PROC_DEFAULT_MOUNT_FLAGS, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_MKDIR|MOUNT_FOLLOW_SYMLINKS }, /* we follow symlinks here since not following them requires /proc/ already being mounted, which we don't have here. */ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, @@ -576,7 +580,7 @@ int mount_all(const char *dest, MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR }, { "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR }, - { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "sysfs", "/sys", "sysfs", NULL, SYS_DEFAULT_MOUNT_FLAGS, MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR }, /* skipped if above was mounted */ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */ @@ -1336,3 +1340,60 @@ done: return r; } + +#define NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS "/run/host/proc" +#define NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS "/run/host/sys" + +int pin_fully_visible_fs(void) { + int r; + + (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, 0755); + (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, 0755); + + r = mount_follow_verbose(LOG_ERR, "proc", NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, "proc", PROC_DEFAULT_MOUNT_FLAGS, NULL); + if (r < 0) + return r; + + r = mount_follow_verbose(LOG_ERR, "sysfs", NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, "sysfs", SYS_DEFAULT_MOUNT_FLAGS, NULL); + if (r < 0) + return r; + + return 0; +} + +static int do_wipe_fully_visible_fs(void) { + if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, MNT_DETACH) < 0) + return log_error_errno(errno, "Failed to unmount temporary proc: %m"); + + if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS) < 0) + return log_error_errno(errno, "Failed to remove temporary proc mountpoint: %m"); + + if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, MNT_DETACH) < 0) + return log_error_errno(errno, "Failed to unmount temporary sys: %m"); + + if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS) < 0) + return log_error_errno(errno, "Failed to remove temporary sys mountpoint: %m"); + + return 0; +} + +int wipe_fully_visible_fs(int mntns_fd) { + _cleanup_close_ int orig_mntns_fd = -EBADF; + int r, rr; + + r = namespace_open(0, NULL, &orig_mntns_fd, NULL, NULL, NULL); + if (r < 0) + return log_error_errno(r, "Failed to pin originating mount namespace: %m"); + + r = namespace_enter(-EBADF, mntns_fd, -EBADF, -EBADF, -EBADF); + if (r < 0) + return log_error_errno(r, "Failed to enter mount namespace: %m"); + + rr = do_wipe_fully_visible_fs(); + + r = namespace_enter(-EBADF, orig_mntns_fd, -EBADF, -EBADF, -EBADF); + if (r < 0) + return log_error_errno(r, "Failed to enter original mount namespace: %m"); + + return rr; +} diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 6bedbf9b3f..bf5e47dce4 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -67,3 +67,5 @@ int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old); int tmpfs_patch_options(const char *options,uid_t uid_shift, const char *selinux_apifs_context, char **ret); +int pin_fully_visible_fs(void); +int wipe_fully_visible_fs(int mntns_fd); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index d7b636209e..1282c8b98b 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -114,6 +114,7 @@ /* The notify socket inside the container it can use to talk to nspawn using the sd_notify(3) protocol */ #define NSPAWN_NOTIFY_SOCKET_PATH "/run/host/notify" +#define NSPAWN_MOUNT_TUNNEL "/run/host/incoming" #define EXIT_FORCE_RESTART 133 @@ -1249,7 +1250,7 @@ static int parse_argv(int argc, char *argv[]) { arg_uid_range = UINT32_C(0x10000); } else if (streq(optarg, "identity")) { - /* identitiy: User namespaces on, UID range is map the 0…0xFFFF range to + /* identity: User namespaces on, UID range is map the 0…0xFFFF range to * itself, i.e. we don't actually map anything, but do take benefit of * isolation of capability sets. */ arg_userns_mode = USER_NAMESPACE_FIXED; @@ -2776,7 +2777,7 @@ static int reset_audit_loginuid(void) { return 0; } -static int setup_propagate(const char *root) { +static int mount_tunnel_dig(const char *root) { const char *p, *q; int r; @@ -2789,11 +2790,11 @@ static int setup_propagate(const char *root) { if (r < 0) return log_error_errno(r, "Failed to create /run/host: %m"); - r = userns_mkdir(root, "/run/host/incoming", 0600, 0, 0); + r = userns_mkdir(root, NSPAWN_MOUNT_TUNNEL, 0600, 0, 0); if (r < 0) - return log_error_errno(r, "Failed to create /run/host/incoming: %m"); + return log_error_errno(r, "Failed to create "NSPAWN_MOUNT_TUNNEL": %m"); - q = prefix_roota(root, "/run/host/incoming"); + q = prefix_roota(root, NSPAWN_MOUNT_TUNNEL); r = mount_nofollow_verbose(LOG_ERR, p, q, NULL, MS_BIND, NULL); if (r < 0) return r; @@ -2802,8 +2803,17 @@ static int setup_propagate(const char *root) { if (r < 0) return r; - /* machined will MS_MOVE into that directory, and that's only supported for non-shared mounts. */ - return mount_nofollow_verbose(LOG_ERR, NULL, q, NULL, MS_SLAVE, NULL); + return 0; +} + +static int mount_tunnel_open(void) { + int r; + + r = mount_follow_verbose(LOG_ERR, NULL, NSPAWN_MOUNT_TUNNEL, NULL, MS_SLAVE, NULL); + if (r < 0) + return r; + + return 0; } static int setup_machine_id(const char *directory) { @@ -3632,7 +3642,7 @@ static int outer_child( _cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL; _cleanup_strv_free_ char **os_release_pairs = NULL; - _cleanup_close_ int fd = -1; + _cleanup_close_ int fd = -1, mntns_fd = -EBADF; bool idmap = false; const char *p; pid_t pid; @@ -3697,6 +3707,15 @@ static int outer_child( return r; if (arg_userns_mode != USER_NAMESPACE_NO) { + r = namespace_open(0, NULL, &mntns_fd, NULL, NULL, NULL); + if (r < 0) + return log_error_errno(r, "Failed to pin outer mount namespace: %m"); + + l = send_one_fd(notify_socket, mntns_fd, 0); + if (l < 0) + return log_error_errno(l, "Failed to send outer mount namespace fd: %m"); + mntns_fd = safe_close(mntns_fd); + /* Let the parent know which UID shift we read from the image */ l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL); if (l < 0) @@ -3727,7 +3746,7 @@ static int outer_child( * place, so that we can make changes to its mount structure (for example, to implement * --volatile=) without this interfering with our ability to access files such as * /etc/localtime to copy into the container. Note that we use a fixed place for this - * (instead of a temporary directory, since we are living in our own mount namspace here + * (instead of a temporary directory, since we are living in our own mount namespace here * already, and thus don't need to be afraid of colliding with anyone else's mounts). */ (void) mkdir_p("/run/systemd/nspawn-root", 0755); @@ -3858,19 +3877,6 @@ static int outer_child( unified_cgroup_hierarchy_socket = safe_close(unified_cgroup_hierarchy_socket); } - /* Mark everything as shared so our mounts get propagated down. This is required to make new bind - * mounts available in systemd services inside the container that create a new mount namespace. See - * https://github.com/systemd/systemd/issues/3860 Further submounts (such as /dev) done after this - * will inherit the shared propagation mode. - * - * IMPORTANT: Do not overmount the root directory anymore from now on to enable moving the root - * directory mount to root later on. - * https://github.com/systemd/systemd/issues/3847#issuecomment-562735251 - */ - r = mount_nofollow_verbose(LOG_ERR, NULL, directory, NULL, MS_SHARED|MS_REC, NULL); - if (r < 0) - return r; - r = recursive_chown(directory, arg_uid_shift, arg_uid_range); if (r < 0) return r; @@ -3910,7 +3916,7 @@ static int outer_child( if (r < 0) return r; - r = setup_propagate(directory); + r = mount_tunnel_dig(directory); if (r < 0) return r; @@ -3974,10 +3980,40 @@ static int outer_child( return r; } - r = mount_move_root(directory); + /* Mark everything as shared so our mounts get propagated down. This is required to make new bind + * mounts available in systemd services inside the container that create a new mount namespace. See + * https://github.com/systemd/systemd/issues/3860 Further submounts (such as /dev) done after this + * will inherit the shared propagation mode. + * + * IMPORTANT: Do not overmount the root directory anymore from now on to enable moving the root + * directory mount to root later on. + * https://github.com/systemd/systemd/issues/3847#issuecomment-562735251 + */ + r = mount_switch_root(directory, MOUNT_ATTR_PROPAGATION_SHARED); if (r < 0) return log_error_errno(r, "Failed to move root directory: %m"); + /* We finished setting up the rootfs which is a shared mount. The mount tunnel needs to be a + * dependent mount otherwise we can't MS_MOVE mounts that were propagated from the host into + * the container. */ + r = mount_tunnel_open(); + if (r < 0) + return r; + + if (arg_userns_mode != USER_NAMESPACE_NO) { + /* In order to mount procfs and sysfs in an unprivileged container the kernel + * requires that a fully visible instance is already present in the target mount + * namespace. Mount one here so the inner child can mount its own instances. Later + * we umount the temporary instances created here before we actually exec the + * payload. Since the rootfs is shared the umount will propagate into the container. + * Note, the inner child wouldn't be able to unmount the instances on its own since + * it doesn't own the originating mount namespace. IOW, the outer child needs to do + * this. */ + r = pin_fully_visible_fs(); + if (r < 0) + return r; + } + fd = setup_notify_child(); if (fd < 0) return fd; @@ -4735,12 +4771,12 @@ static int run_container( rtnl_socket_pair[2] = { -1, -1 }, pid_socket_pair[2] = { -1, -1 }, uuid_socket_pair[2] = { -1, -1 }, - notify_socket_pair[2] = { -1, -1 }, + fd_socket_pair[2] = { -EBADF, -EBADF }, uid_shift_socket_pair[2] = { -1, -1 }, master_pty_socket_pair[2] = { -1, -1 }, unified_cgroup_hierarchy_socket_pair[2] = { -1, -1}; - _cleanup_close_ int notify_socket = -1; + _cleanup_close_ int notify_socket = -1, mntns_fd = -EBADF; _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL; _cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL; _cleanup_(sd_event_unrefp) sd_event *event = NULL; @@ -4787,7 +4823,7 @@ static int run_container( if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uuid_socket_pair) < 0) return log_error_errno(errno, "Failed to create id socket pair: %m"); - if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, notify_socket_pair) < 0) + if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, fd_socket_pair) < 0) return log_error_errno(errno, "Failed to create notify socket pair: %m"); if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, master_pty_socket_pair) < 0) @@ -4840,7 +4876,7 @@ static int run_container( rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]); pid_socket_pair[0] = safe_close(pid_socket_pair[0]); uuid_socket_pair[0] = safe_close(uuid_socket_pair[0]); - notify_socket_pair[0] = safe_close(notify_socket_pair[0]); + fd_socket_pair[0] = safe_close(fd_socket_pair[0]); master_pty_socket_pair[0] = safe_close(master_pty_socket_pair[0]); uid_shift_socket_pair[0] = safe_close(uid_shift_socket_pair[0]); unified_cgroup_hierarchy_socket_pair[0] = safe_close(unified_cgroup_hierarchy_socket_pair[0]); @@ -4854,7 +4890,7 @@ static int run_container( secondary, pid_socket_pair[1], uuid_socket_pair[1], - notify_socket_pair[1], + fd_socket_pair[1], kmsg_socket_pair[1], rtnl_socket_pair[1], uid_shift_socket_pair[1], @@ -4876,12 +4912,16 @@ static int run_container( rtnl_socket_pair[1] = safe_close(rtnl_socket_pair[1]); pid_socket_pair[1] = safe_close(pid_socket_pair[1]); uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]); - notify_socket_pair[1] = safe_close(notify_socket_pair[1]); + fd_socket_pair[1] = safe_close(fd_socket_pair[1]); master_pty_socket_pair[1] = safe_close(master_pty_socket_pair[1]); uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]); unified_cgroup_hierarchy_socket_pair[1] = safe_close(unified_cgroup_hierarchy_socket_pair[1]); if (arg_userns_mode != USER_NAMESPACE_NO) { + mntns_fd = receive_one_fd(fd_socket_pair[0], 0); + if (mntns_fd < 0) + return log_error_errno(mntns_fd, "Failed to receive mount namespace fd from outer child: %m"); + /* The child just let us know the UID shift it might have read from the image. */ l = recv(uid_shift_socket_pair[0], &arg_uid_shift, sizeof arg_uid_shift, 0); if (l < 0) @@ -4958,7 +4998,7 @@ static int run_container( return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading container machined ID."); /* We also retrieve the socket used for notifications generated by outer child */ - notify_socket = receive_one_fd(notify_socket_pair[0], 0); + notify_socket = receive_one_fd(fd_socket_pair[0], 0); if (notify_socket < 0) return log_error_errno(notify_socket, "Failed to receive notification socket from the outer child: %m"); @@ -5143,6 +5183,13 @@ static int run_container( if (r < 0) return r; + if (arg_userns_mode != USER_NAMESPACE_NO) { + r = wipe_fully_visible_fs(mntns_fd); + if (r < 0) + return r; + mntns_fd = safe_close(mntns_fd); + } + /* Let the child know that we are ready and wait that the child is completely ready now. */ if (!barrier_place_and_sync(&barrier)) /* #5 */ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early."); @@ -5656,7 +5703,9 @@ static int run(int argc, char *argv[]) { DISSECT_IMAGE_GENERIC_ROOT | DISSECT_IMAGE_REQUIRE_ROOT | DISSECT_IMAGE_RELAX_VAR_CHECK | - DISSECT_IMAGE_USR_NO_ROOT; + DISSECT_IMAGE_USR_NO_ROOT | + DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES; assert(arg_image); assert(!arg_template); @@ -5798,7 +5847,7 @@ static int run(int argc, char *argv[]) { arg_quiet = true; if (!arg_quiet) - log_info("Spawning container %s on %s.\nPress ^] three times within 1s to kill container.", + log_info("Spawning container %s on %s.\nPress Ctrl-] three times within 1s to kill container.", arg_machine, arg_image ?: arg_directory); assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1) >= 0); diff --git a/src/nss-myhostname/nss-myhostname.c b/src/nss-myhostname/nss-myhostname.c index 120e76be45..3af1d2f0c1 100644 --- a/src/nss-myhostname/nss-myhostname.c +++ b/src/nss-myhostname/nss-myhostname.c @@ -12,6 +12,7 @@ #include "local-addresses.h" #include "macro.h" #include "nss-util.h" +#include "resolve-util.h" #include "signal-util.h" #include "socket-util.h" #include "string-util.h" @@ -21,7 +22,7 @@ * IPv6 we use ::1 which unfortunately will not translate back to the * hostname but instead something like "localhost" or so. */ -#define LOCALADDRESS_IPV4 (htobe32(0x7F000002)) +#define LOCALADDRESS_IPV4 (htobe32(INADDR_LOCALADDRESS)) #define LOCALADDRESS_IPV6 &in6addr_loopback NSS_GETHOSTBYNAME_PROTOTYPES(myhostname); diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c index 70a1dc941e..f97b771c50 100644 --- a/src/oom/oomd-util.c +++ b/src/oom/oomd-util.c @@ -145,7 +145,7 @@ bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad) } int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix) { - uid_t uid, prefix_uid; + uid_t uid; int r; assert(ctx); @@ -160,28 +160,34 @@ int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix) if (r < 0) return log_debug_errno(r, "Failed to get owner/group from %s: %m", ctx->path); - r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, prefix, &prefix_uid); - if (r < 0) - return log_debug_errno(r, "Failed to get owner/group from %s: %m", ctx->path); + if (uid != 0) { + uid_t prefix_uid; - if (uid == prefix_uid || uid == 0) { - /* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used - * as an optional feature of systemd-oomd (and the system might not even support them). */ - r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_avoid"); - if (r == -ENOMEM) - return log_oom_debug(); - if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) - log_debug_errno(r, "Failed to get xattr user.oomd_avoid, ignoring: %m"); - ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_AVOID : ctx->preference; + r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, prefix, &prefix_uid); + if (r < 0) + return log_debug_errno(r, "Failed to get owner/group from %s: %m", prefix); - r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_omit"); - if (r == -ENOMEM) - return log_oom_debug(); - if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) - log_debug_errno(r, "Failed to get xattr user.oomd_omit, ignoring: %m"); - ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_OMIT : ctx->preference; - } else - ctx->preference = MANAGED_OOM_PREFERENCE_NONE; + if (uid != prefix_uid) { + ctx->preference = MANAGED_OOM_PREFERENCE_NONE; + return 0; + } + } + + /* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used + * as an optional feature of systemd-oomd (and the system might not even support them). */ + r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_avoid"); + if (r == -ENOMEM) + return log_oom_debug(); + if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) + log_debug_errno(r, "Failed to get xattr user.oomd_avoid, ignoring: %m"); + ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_AVOID : ctx->preference; + + r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_omit"); + if (r == -ENOMEM) + return log_oom_debug(); + if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r)) + log_debug_errno(r, "Failed to get xattr user.oomd_omit, ignoring: %m"); + ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_OMIT : ctx->preference; return 0; } @@ -236,7 +242,7 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { if (r < 0) return r; - log_debug("oomd dry-run: Would have tried to kill %s with recurse=%s", cg_path, true_false(recurse)); + log_info("oomd dry-run: Would have tried to kill %s with recurse=%s", cg_path, true_false(recurse)); return 0; } diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h index 7fd9e92109..a758d5589b 100644 --- a/src/oom/oomd-util.h +++ b/src/oom/oomd-util.h @@ -109,9 +109,10 @@ static inline int compare_swap_usage(OomdCGroupContext * const *c1, OomdCGroupCo * Returns the number of sorted items; negative on error. */ int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret); -/* If the cgroups represented by `ctx` and `prefix` are owned by the same user, - * then set `ctx->preference` using the `user.oomd_avoid` and `user.oomd_omit` - * xattrs. Otherwise, set `ctx->preference` to MANAGED_OOM_PREFERENCE_NONE. +/* If the the cgroup is owned by root, or the cgroups represented by `ctx` and + * `prefix` are owned by the same user, then set `ctx->preference` using the + * `user.oomd_avoid` and `user.oomd_omit` xattrs. Otherwise, set + * `ctx->preference` to MANAGED_OOM_PREFERENCE_NONE. * * If `prefix` is NULL or the empty string, it is treated as root. If `prefix` * does not specify an ancestor cgroup of `ctx`, -EINVAL is returned. Returns diff --git a/src/partition/repart.c b/src/partition/repart.c index 7e56cc5155..a627d84305 100644 --- a/src/partition/repart.c +++ b/src/partition/repart.c @@ -158,9 +158,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_tpm2_device, freep); STATIC_DESTRUCTOR_REGISTER(arg_tpm2_public_key, freep); STATIC_DESTRUCTOR_REGISTER(arg_filter_partitions, freep); -typedef struct Partition Partition; typedef struct FreeArea FreeArea; -typedef struct Context Context; typedef enum EncryptMode { ENCRYPT_OFF, @@ -180,7 +178,7 @@ typedef enum VerityMode { _VERITY_MODE_INVALID = -EINVAL, } VerityMode; -struct Partition { +typedef struct Partition { char *definition_path; char **drop_in_files; @@ -211,6 +209,7 @@ struct Partition { FreeArea *allocated_to_area; char *copy_blocks_path; + bool copy_blocks_path_is_our_file; bool copy_blocks_auto; const char *copy_blocks_root; int copy_blocks_fd; @@ -233,12 +232,12 @@ struct Partition { size_t roothash_size; char *split_name_format; - char *split_name_resolved; + char *split_path; - Partition *siblings[_VERITY_MODE_MAX]; + struct Partition *siblings[_VERITY_MODE_MAX]; - LIST_FIELDS(Partition, partitions); -}; + LIST_FIELDS(struct Partition, partitions); +} Partition; #define PARTITION_IS_FOREIGN(p) (!(p)->definition_path) #define PARTITION_EXISTS(p) (!!(p)->current_partition) @@ -249,7 +248,7 @@ struct FreeArea { uint64_t allocated; }; -struct Context { +typedef struct Context { LIST_HEAD(Partition, partitions); size_t n_partitions; @@ -263,7 +262,13 @@ struct Context { uint64_t grain_size; sd_id128_t seed; -}; + + char *node; + bool node_is_our_file; + int backing_fd; + + bool from_scratch; +} Context; static const char *encrypt_mode_table[_ENCRYPT_MODE_MAX] = { [ENCRYPT_OFF] = "off", @@ -340,7 +345,10 @@ static Partition* partition_free(Partition *p) { if (p->new_partition) fdisk_unref_partition(p->new_partition); - free(p->copy_blocks_path); + if (p->copy_blocks_path_is_our_file) + unlink_and_free(p->copy_blocks_path); + else + free(p->copy_blocks_path); safe_close(p->copy_blocks_fd); free(p->format); @@ -351,7 +359,7 @@ static Partition* partition_free(Partition *p) { free(p->roothash); free(p->split_name_format); - free(p->split_name_resolved); + unlink_and_free(p->split_path); return mfree(p); } @@ -464,6 +472,12 @@ static Context *context_free(Context *context) { if (context->fdisk_context) fdisk_unref_context(context->fdisk_context); + safe_close(context->backing_fd); + if (context->node_is_our_file) + unlink_and_free(context->node); + else + free(context->node); + return mfree(context); } @@ -1920,11 +1934,7 @@ static int derive_uuid(sd_id128_t base, const char *token, sd_id128_t *ret) { return 0; } -static int context_load_partition_table( - Context *context, - const char *node, - int *backing_fd) { - +static int context_load_partition_table(Context *context) { _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL; _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL; uint64_t left_boundary = UINT64_MAX, first_lba, last_lba, nsectors; @@ -1937,36 +1947,35 @@ static int context_load_partition_table( int r; assert(context); - assert(node); - assert(backing_fd); assert(!context->fdisk_context); assert(!context->free_areas); assert(context->start == UINT64_MAX); assert(context->end == UINT64_MAX); assert(context->total == UINT64_MAX); - c = fdisk_new_context(); - if (!c) - return log_oom(); - /* libfdisk doesn't have an API to operate on arbitrary fds, hence reopen the fd going via the * /proc/self/fd/ magic path if we have an existing fd. Open the original file otherwise. */ - if (*backing_fd < 0) - r = fdisk_assign_device(c, node, arg_dry_run); - else - r = fdisk_assign_device(c, FORMAT_PROC_FD_PATH(*backing_fd), arg_dry_run); + if (context->backing_fd < 0) { + c = fdisk_new_context(); + if (!c) + return log_oom(); + + r = fdisk_assign_device(c, context->node, arg_dry_run); + } else + r = fdisk_new_context_fd(context->backing_fd, arg_dry_run, &c); + if (r == -EINVAL && arg_size_auto) { struct stat st; /* libfdisk returns EINVAL if opening a file of size zero. Let's check for that, and accept * it if automatic sizing is requested. */ - if (*backing_fd < 0) - r = stat(node, &st); + if (context->backing_fd < 0) + r = stat(context->node, &st); else - r = fstat(*backing_fd, &st); + r = fstat(context->backing_fd, &st); if (r < 0) - return log_error_errno(errno, "Failed to stat block device '%s': %m", node); + return log_error_errno(errno, "Failed to stat block device '%s': %m", context->node); if (S_ISREG(st.st_mode) && st.st_size == 0) { /* User the fallback values if we have no better idea */ @@ -1978,16 +1987,16 @@ static int context_load_partition_table( r = -EINVAL; } if (r < 0) - return log_error_errno(r, "Failed to open device '%s': %m", node); + return log_error_errno(r, "Failed to open device '%s': %m", context->node); - if (*backing_fd < 0) { + if (context->backing_fd < 0) { /* If we have no fd referencing the device yet, make a copy of the fd now, so that we have one */ - *backing_fd = fd_reopen(fdisk_get_devfd(c), O_RDONLY|O_CLOEXEC); - if (*backing_fd < 0) - return log_error_errno(*backing_fd, "Failed to duplicate fdisk fd: %m"); + context->backing_fd = fd_reopen(fdisk_get_devfd(c), O_RDONLY|O_CLOEXEC); + if (context->backing_fd < 0) + return log_error_errno(context->backing_fd, "Failed to duplicate fdisk fd: %m"); /* Tell udev not to interfere while we are processing the device */ - if (flock(*backing_fd, arg_dry_run ? LOCK_SH : LOCK_EX) < 0) + if (flock(context->backing_fd, arg_dry_run ? LOCK_SH : LOCK_EX) < 0) return log_error_errno(errno, "Failed to lock block device: %m"); } @@ -2013,7 +2022,7 @@ static int context_load_partition_table( case EMPTY_REFUSE: /* Refuse empty disks, insist on an existing GPT partition table */ if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT)) - return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has no GPT disk label, not repartitioning.", node); + return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has no GPT disk label, not repartitioning.", context->node); break; @@ -2021,9 +2030,9 @@ static int context_load_partition_table( /* Require an empty disk, refuse any existing partition table */ r = fdisk_has_label(c); if (r < 0) - return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", node); + return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node); if (r > 0) - return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s already has a disk label, refusing.", node); + return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s already has a disk label, refusing.", context->node); from_scratch = true; break; @@ -2032,10 +2041,10 @@ static int context_load_partition_table( /* Allow both an empty disk and an existing partition table, but only GPT */ r = fdisk_has_label(c); if (r < 0) - return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", node); + return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", context->node); if (r > 0) { if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT)) - return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has non-GPT disk label, not repartitioning.", node); + return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has non-GPT disk label, not repartitioning.", context->node); } else from_scratch = true; @@ -2091,8 +2100,7 @@ static int context_load_partition_table( _cleanup_free_ char *label_copy = NULL; Partition *last = NULL; struct fdisk_partition *p; - struct fdisk_parttype *pt; - const char *pts, *ids, *label; + const char *label; uint64_t sz, start; bool found = false; sd_id128_t ptid, id; @@ -2110,25 +2118,13 @@ static int context_load_partition_table( fdisk_partition_has_partno(p) <= 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number."); - pt = fdisk_partition_get_type(p); - if (!pt) - return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition: %m"); - - pts = fdisk_parttype_get_string(pt); - if (!pts) - return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition as string: %m"); - - r = sd_id128_from_string(pts, &ptid); + r = fdisk_partition_get_type_as_id128(p, &ptid); if (r < 0) - return log_error_errno(r, "Failed to parse partition type UUID %s: %m", pts); - - ids = fdisk_partition_get_uuid(p); - if (!ids) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a UUID."); + return log_error_errno(r, "Failed to query partition type UUID: %m"); - r = sd_id128_from_string(ids, &id); + r = fdisk_partition_get_uuid_as_id128(p, &id); if (r < 0) - return log_error_errno(r, "Failed to parse partition UUID %s: %m", ids); + return log_error_errno(r, "Failed to query partition UUID: %m"); label = fdisk_partition_get_name(p); if (!isempty(label)) { @@ -2354,30 +2350,47 @@ static const char *partition_label(const Partition *p) { return gpt_partition_type_uuid_to_string(p->type.uuid); } -static int context_dump_partitions(Context *context, const char *node) { +static int context_dump_partitions(Context *context) { _cleanup_(table_unrefp) Table *t = NULL; uint64_t sum_padding = 0, sum_size = 0; int r; - const size_t roothash_col = 13, dropin_files_col = 14; - bool has_roothash = false, has_dropin_files = false; + const size_t roothash_col = 13, dropin_files_col = 14, split_path_col = 15; + bool has_roothash = false, has_dropin_files = false, has_split_path = false; if ((arg_json_format_flags & JSON_FORMAT_OFF) && context->n_partitions == 0) { log_info("Empty partition table."); return 0; } - t = table_new("type", "label", "uuid", "file", "node", "offset", "old size", "raw size", "size", "old padding", "raw padding", "padding", "activity", "roothash", "drop-in files"); + t = table_new("type", + "label", + "uuid", + "file", + "node", + "offset", + "old size", + "raw size", + "size", + "old padding", + "raw padding", + "padding", + "activity", + "roothash", + "drop-in files", + "split path"); if (!t) return log_oom(); if (!DEBUG_LOGGING) { if (arg_json_format_flags & JSON_FORMAT_OFF) (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4, - (size_t) 8, (size_t) 11, roothash_col, dropin_files_col); + (size_t) 8, (size_t) 11, roothash_col, dropin_files_col, + split_path_col); else (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4, (size_t) 5, (size_t) 6, (size_t) 7, (size_t) 9, (size_t) 10, - (size_t) 12, roothash_col, dropin_files_col); + (size_t) 12, roothash_col, dropin_files_col, + split_path_col); } (void) table_set_align_percent(t, table_get_cell(t, 0, 5), 100); @@ -2402,7 +2415,7 @@ static int context_dump_partitions(Context *context, const char *node) { activity = "resize"; label = partition_label(p); - partname = p->partno != UINT64_MAX ? fdisk_partname(node, p->partno+1) : NULL; + partname = p->partno != UINT64_MAX ? fdisk_partname(context->node, p->partno+1) : NULL; r = format_size_change(p->current_size, p->new_size, &size_change); if (r < 0) @@ -2439,12 +2452,14 @@ static int context_dump_partitions(Context *context, const char *node) { TABLE_STRING, padding_change, TABLE_SET_COLOR, !p->partitions_next && sum_padding > 0 ? ansi_underline() : NULL, TABLE_STRING, activity ?: "unchanged", TABLE_STRING, rh, - TABLE_STRV, p->drop_in_files); + TABLE_STRV, p->drop_in_files, + TABLE_STRING, empty_to_null(p->split_path) ?: "-"); if (r < 0) return table_log_add_error(r); has_roothash = has_roothash || !isempty(rh); has_dropin_files = has_dropin_files || !strv_isempty(p->drop_in_files); + has_split_path = has_split_path || !isempty(p->split_path); } if ((arg_json_format_flags & JSON_FORMAT_OFF) && (sum_padding > 0 || sum_size > 0)) { @@ -2469,6 +2484,7 @@ static int context_dump_partitions(Context *context, const char *node) { TABLE_STRING, b, TABLE_EMPTY, TABLE_EMPTY, + TABLE_EMPTY, TABLE_EMPTY); if (r < 0) return table_log_add_error(r); @@ -2486,6 +2502,12 @@ static int context_dump_partitions(Context *context, const char *node) { return log_error_errno(r, "Failed to set columns to display: %m"); } + if (!has_split_path) { + r = table_hide_column_from_display(t, split_path_col); + if (r < 0) + return log_error_errno(r, "Failed to set columns to display: %m"); + } + return table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend); } @@ -2571,7 +2593,7 @@ done: return 0; } -static int context_dump_partition_bar(Context *context, const char *node) { +static int context_dump_partition_bar(Context *context) { _cleanup_free_ Partition **bar = NULL; _cleanup_free_ size_t *start_array = NULL; Partition *last = NULL; @@ -2647,7 +2669,7 @@ static int context_dump_partition_bar(Context *context, const char *node) { } else if (i == context->n_partitions - j) { _cleanup_free_ char *hint = NULL; - (void) partition_hint(p, node, &hint); + (void) partition_hint(p, context->node, &hint); if (streq_ptr(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_VERTICAL))) d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), " ", strna(hint)); @@ -2692,11 +2714,10 @@ static bool context_has_roothash(Context *context) { return false; } -static int context_dump(Context *context, const char *node, bool late) { +static int context_dump(Context *context, bool late) { int r; assert(context); - assert(node); if (arg_pretty == 0 && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF)) return 0; @@ -2711,7 +2732,7 @@ static int context_dump(Context *context, const char *node, bool late) { if (late && FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !context_has_roothash(context)) return 0; - r = context_dump_partitions(context, node); + r = context_dump_partitions(context); if (r < 0) return r; @@ -2720,7 +2741,7 @@ static int context_dump(Context *context, const char *node, bool late) { if (FLAGS_SET(arg_json_format_flags, JSON_FORMAT_OFF) && !late) { putc('\n', stdout); - r = context_dump_partition_bar(context, node); + r = context_dump_partition_bar(context); if (r < 0) return r; @@ -2963,7 +2984,7 @@ static int context_discard_gap_after(Context *context, Partition *p) { return 0; } -static int context_wipe_and_discard(Context *context, bool from_scratch) { +static int context_wipe_and_discard(Context *context) { int r; assert(context); @@ -2984,7 +3005,7 @@ static int context_wipe_and_discard(Context *context, bool from_scratch) { if (r < 0) return r; - if (!from_scratch) { + if (!context->from_scratch) { r = context_discard_partition(context, p); if (r < 0) return r; @@ -2995,7 +3016,7 @@ static int context_wipe_and_discard(Context *context, bool from_scratch) { } } - if (!from_scratch) { + if (!context->from_scratch) { r = context_discard_gap_after(context, NULL); if (r < 0) return r; @@ -3036,6 +3057,36 @@ static PartitionTarget *partition_target_free(PartitionTarget *t) { DEFINE_TRIVIAL_CLEANUP_FUNC(PartitionTarget*, partition_target_free); +static int prepare_temporary_file(PartitionTarget *t, uint64_t size) { + _cleanup_(unlink_and_freep) char *temp = NULL; + _cleanup_close_ int fd = -1; + const char *vt; + int r; + + assert(t); + + r = var_tmp_dir(&vt); + if (r < 0) + return log_error_errno(r, "Could not determine temporary directory: %m"); + + temp = path_join(vt, "repart-XXXXXX"); + if (!temp) + return log_oom(); + + fd = mkostemp_safe(temp); + if (fd < 0) + return log_error_errno(fd, "Failed to create temporary file: %m"); + + if (ftruncate(fd, size) < 0) + return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", + FORMAT_BYTES(size)); + + t->fd = TAKE_FD(fd); + t->path = TAKE_PTR(temp); + + return 0; +} + static int partition_target_prepare( Context *context, Partition *p, @@ -3044,9 +3095,8 @@ static int partition_target_prepare( PartitionTarget **ret) { _cleanup_(partition_target_freep) PartitionTarget *t = NULL; - struct stat st; - int whole_fd; - int r; + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + int whole_fd, r; assert(context); assert(p); @@ -3054,16 +3104,6 @@ static int partition_target_prepare( assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0); - if (fstat(whole_fd, &st) < 0) - return -errno; - - /* If we're operating on a block device, we definitely need privileges to access block devices so we - * can just use loop devices as our target. Otherwise, we're operating on a regular file, in that - * case, let's write to regular files and copy those into the final image so we can run without root - * privileges. On filesystems with reflinking support, we can take advantage of this and just reflink - * the result into the image. - */ - t = new(PartitionTarget, 1); if (!t) return log_oom(); @@ -3072,46 +3112,37 @@ static int partition_target_prepare( .whole_fd = -1, }; - if (S_ISBLK(st.st_mode) || (p->format && !mkfs_supports_root_option(p->format))) { - _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + if (!need_path) { + if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1) + return log_error_errno(errno, "Failed to seek to partition offset: %m"); - /* Loopback block devices are not only useful to turn regular files into block devices, but - * also to cut out sections of block devices into new block devices. */ + t->whole_fd = whole_fd; + *ret = TAKE_PTR(t); + return 0; + } - r = loop_device_make(whole_fd, O_RDWR, p->offset, size, 0, 0, LOCK_EX, &d); - if (r < 0) - return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno); + /* Loopback block devices are not only useful to turn regular files into block devices, but + * also to cut out sections of block devices into new block devices. */ + r = loop_device_make(whole_fd, O_RDWR, p->offset, size, 0, 0, LOCK_EX, &d); + if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) + return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno); + if (r >= 0) { t->loop = TAKE_PTR(d); - } else if (need_path) { - _cleanup_(unlink_and_freep) char *temp = NULL; - _cleanup_close_ int fd = -1; - const char *vt; - - r = var_tmp_dir(&vt); - if (r < 0) - return log_error_errno(r, "Could not determine temporary directory: %m"); - - temp = path_join(vt, "repart-XXXXXX"); - if (!temp) - return log_oom(); + *ret = TAKE_PTR(t); + return 0; + } - fd = mkostemp_safe(temp); - if (fd < 0) - return log_error_errno(fd, "Failed to create temporary file: %m"); + /* If we can't allocate a loop device, let's write to a regular file that we copy into the final + * image so we can run in containers and without needing root privileges. On filesystems with + * reflinking support, we can take advantage of this and just reflink the result into the image. + */ - if (ftruncate(fd, size) < 0) - return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", - FORMAT_BYTES(size)); + log_debug_errno(r, "No access to loop devices, falling back to a regular file"); - t->fd = TAKE_FD(fd); - t->path = TAKE_PTR(temp); - } else { - if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1) - return log_error_errno(errno, "Failed to seek to partition offset: %m"); - - t->whole_fd = whole_fd; - } + r = prepare_temporary_file(t, size); + if (r < 0) + return r; *ret = TAKE_PTR(t); @@ -3677,7 +3708,12 @@ static int context_copy_blocks(Context *context) { return 0; } -static int do_copy_files(Partition *p, const char *root, const Set *denylist) { +static int do_copy_files( + Partition *p, + const char *root, + uid_t override_uid, + gid_t override_gid, + const Set *denylist) { int r; @@ -3720,18 +3756,22 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { if (pfd < 0) return log_error_errno(pfd, "Failed to open parent directory of target: %m"); + /* Make sure everything is owned by the user running repart so that + * make_filesystem() can map the user running repart to "root" in a user + * namespace to have the files owned by root in the final image. */ + r = copy_tree_at( sfd, ".", pfd, fn, - getuid(), getgid(), - COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, + override_uid, override_gid, + COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN, denylist); } else r = copy_tree_at( sfd, ".", tfd, ".", - getuid(), getgid(), - COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, + override_uid, override_gid, + COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS|COPY_GRACEFUL_WARN, denylist); if (r < 0) return log_error_errno(r, "Failed to copy '%s%s' to '%s%s': %m", @@ -3768,6 +3808,9 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { if (r < 0) return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_root), *target); + if (fchown(tfd, override_uid, override_gid) < 0) + return log_error_errno(r, "Failed to change ownership of %s", *target); + (void) copy_xattr(sfd, tfd, COPY_ALL_XATTRS); (void) copy_access(sfd, tfd); (void) copy_times(sfd, tfd, 0); @@ -3777,7 +3820,7 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { return 0; } -static int do_make_directories(Partition *p, const char *root) { +static int do_make_directories(Partition *p, uid_t override_uid, gid_t override_gid, const char *root) { int r; assert(p); @@ -3785,7 +3828,7 @@ static int do_make_directories(Partition *p, const char *root) { STRV_FOREACH(d, p->make_directories) { - r = mkdir_p_root(root, *d, getuid(), getgid(), 0755); + r = mkdir_p_root(root, *d, override_uid, override_gid, 0755); if (r < 0) return log_error_errno(r, "Failed to create directory '%s' in file system: %m", *d); } @@ -3793,6 +3836,11 @@ static int do_make_directories(Partition *p, const char *root) { return 0; } +static bool partition_needs_populate(Partition *p) { + assert(p); + return !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories); +} + static int partition_populate_directory(Partition *p, const Set *denylist, char **ret) { _cleanup_(rm_rf_physical_and_freep) char *root = NULL; _cleanup_close_ int rfd = -1; @@ -3800,11 +3848,6 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char assert(ret); - if (strv_isempty(p->copy_files) && strv_isempty(p->make_directories)) { - *ret = NULL; - return 0; - } - rfd = mkdtemp_open("/var/tmp/repart-XXXXXX", 0, &root); if (rfd < 0) return log_error_errno(rfd, "Failed to create temporary directory: %m"); @@ -3812,15 +3855,11 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char if (fchmod(rfd, 0755) < 0) return log_error_errno(errno, "Failed to change mode of temporary directory: %m"); - /* Make sure everything is owned by the user running repart so that make_filesystem() can map the - * user running repart to "root" in a user namespace to have the files owned by root in the final - * image. */ - - r = do_copy_files(p, root, denylist); + r = do_copy_files(p, root, getuid(), getgid(), denylist); if (r < 0) return r; - r = do_make_directories(p, root); + r = do_make_directories(p, getuid(), getgid(), root); if (r < 0) return r; @@ -3829,27 +3868,11 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char } static int partition_populate_filesystem(Partition *p, const char *node, const Set *denylist) { - _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; - struct stat st; int r; assert(p); assert(node); - if (strv_isempty(p->copy_files) && strv_isempty(p->make_directories)) - return 0; - - if (stat(node, &st) < 0) - return log_error_errno(errno, "Failed to stat %s: %m", node); - - if (!S_ISBLK(st.st_mode)) { - r = loop_device_make_by_path(node, O_RDWR, 0, LOCK_EX, &d); - if (r < 0) - return log_error_errno(r, "Failed to make loopback device of %s: %m", node); - - node = d->node; - } - log_info("Populating %s filesystem with files.", p->format); /* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to @@ -3872,10 +3895,10 @@ static int partition_populate_filesystem(Partition *p, const char *node, const S if (mount_nofollow_verbose(LOG_ERR, node, fs, p->format, MS_NOATIME|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) < 0) _exit(EXIT_FAILURE); - if (do_copy_files(p, fs, denylist) < 0) + if (do_copy_files(p, fs, 0, 0, denylist) < 0) _exit(EXIT_FAILURE); - if (do_make_directories(p, fs) < 0) + if (do_make_directories(p, 0, 0, fs) < 0) _exit(EXIT_FAILURE); r = syncfs_path(AT_FDCWD, fs); @@ -3978,11 +4001,16 @@ static int context_mkfs(Context *context) { log_info("Formatting future partition %" PRIu64 ".", p->partno); - /* We prefer (or are required in the case of read-only filesystems) to populate filesystems - * directly via the corresponding mkfs binary if it supports a --rootdir (or equivalent) - * option. To do that, we need to setup the final directory tree beforehand. */ + /* If we're not writing to a loop device or if we're populating a read-only filesystem, we + * have to populate using the filesystem's mkfs's --root (or equivalent) option. To do that, + * we need to set up the final directory tree beforehand. */ + + if (partition_needs_populate(p) && (!t->loop || fstype_is_ro(p->format))) { + if (!mkfs_supports_root_option(p->format)) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), + "Loop device access is required to populate %s filesystems.", + p->format); - if (mkfs_supports_root_option(p->format)) { r = partition_populate_directory(p, denylist, &root); if (r < 0) return r; @@ -3995,9 +4023,11 @@ static int context_mkfs(Context *context) { log_info("Successfully formatted future partition %" PRIu64 ".", p->partno); - /* Now, we can populate all the other filesystems that we couldn't populate earlier. */ - if (!mkfs_supports_root_option(p->format)) { - r = partition_populate_filesystem(p, partition_target_path(t), denylist); + /* If we're writing to a loop device, we can now mount the empty filesystem and populate it. */ + if (partition_needs_populate(p) && !root) { + assert(t->loop); + + r = partition_populate_filesystem(p, t->loop->node, denylist); if (r < 0) return r; } @@ -4459,7 +4489,7 @@ static int context_mangle_partitions(Context *context) { return 0; } -static int split_name_printf(Partition *p) { +static int split_name_printf(Partition *p, char **ret) { assert(p); const Specifier table[] = { @@ -4472,87 +4502,104 @@ static int split_name_printf(Partition *p) { {} }; - return specifier_printf(p->split_name_format, NAME_MAX, table, arg_root, p, &p->split_name_resolved); + return specifier_printf(p->split_name_format, NAME_MAX, table, arg_root, p, ret); +} + +static int split_node(const char *node, char **ret_base, char **ret_ext) { + _cleanup_free_ char *base = NULL, *ext = NULL; + char *e; + int r; + + assert(node); + assert(ret_base); + assert(ret_ext); + + r = path_extract_filename(node, &base); + if (r == O_DIRECTORY || r == -EADDRNOTAVAIL) + return log_error_errno(r, "Device node %s cannot be a directory", node); + if (r < 0) + return log_error_errno(r, "Failed to extract filename from %s: %m", node); + + e = endswith(base, ".raw"); + if (e) { + ext = strdup(e); + if (!ext) + return log_oom(); + + *e = 0; + } + + *ret_base = TAKE_PTR(base); + *ret_ext = TAKE_PTR(ext); + + return 0; } static int split_name_resolve(Context *context) { + _cleanup_free_ char *parent = NULL, *base = NULL, *ext = NULL; int r; + assert(context); + + r = path_extract_directory(context->node, &parent); + if (r < 0 && r != -EDESTADDRREQ) + return log_error_errno(r, "Failed to extract directory from %s: %m", context->node); + + r = split_node(context->node, &base, &ext); + if (r < 0) + return r; + LIST_FOREACH(partitions, p, context->partitions) { + _cleanup_free_ char *resolved = NULL; + if (p->dropped) continue; if (!p->split_name_format) continue; - r = split_name_printf(p); + r = split_name_printf(p, &resolved); if (r < 0) return log_error_errno(r, "Failed to resolve specifiers in %s: %m", p->split_name_format); + + if (parent) + p->split_path = strjoin(parent, "/", base, ".", resolved, ext); + else + p->split_path = strjoin(base, ".", resolved, ext); + if (!p->split_path) + return log_oom(); } LIST_FOREACH(partitions, p, context->partitions) { - if (!p->split_name_resolved) + if (!p->split_path) continue; LIST_FOREACH(partitions, q, context->partitions) { if (p == q) continue; - if (!q->split_name_resolved) + if (!q->split_path) continue; - if (!streq(p->split_name_resolved, q->split_name_resolved)) + if (!streq(p->split_path, q->split_path)) continue; return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "%s and %s have the same resolved split name \"%s\", refusing", - p->definition_path, q->definition_path, p->split_name_resolved); + p->definition_path, q->definition_path, p->split_path); } } return 0; } -static int split_node(const char *node, char **ret_base, char **ret_ext) { - _cleanup_free_ char *base = NULL, *ext = NULL; - char *e; - int r; - - assert(node); - assert(ret_base); - assert(ret_ext); - - r = path_extract_filename(node, &base); - if (r == O_DIRECTORY || r == -EADDRNOTAVAIL) - return log_error_errno(r, "Device node %s cannot be a directory", arg_node); - if (r < 0) - return log_error_errno(r, "Failed to extract filename from %s: %m", arg_node); - - e = endswith(base, ".raw"); - if (e) { - ext = strdup(e); - if (!ext) - return log_oom(); - - *e = 0; - } - - *ret_base = TAKE_PTR(base); - *ret_ext = TAKE_PTR(ext); - - return 0; -} - static int context_split(Context *context) { - _cleanup_free_ char *base = NULL, *ext = NULL; - _cleanup_close_ int dir_fd = -1; int fd = -1, r; if (!arg_split) return 0; assert(context); - assert(arg_node); /* We can't do resolution earlier because the partition UUIDs for verity partitions are only filled * in after they've been generated. */ @@ -4561,36 +4608,21 @@ static int context_split(Context *context) { if (r < 0) return r; - r = split_node(arg_node, &base, &ext); - if (r < 0) - return r; - - dir_fd = r = open_parent(arg_node, O_PATH|O_CLOEXEC, 0); - if (r == -EDESTADDRREQ) - dir_fd = AT_FDCWD; - else if (r < 0) - return log_error_errno(r, "Failed to open parent directory of %s: %m", arg_node); - LIST_FOREACH(partitions, p, context->partitions) { - _cleanup_free_ char *fname = NULL; _cleanup_close_ int fdt = -1; if (p->dropped) continue; - if (!p->split_name_resolved) + if (!p->split_path) continue; if (partition_skip(p)) continue; - fname = strjoin(base, ".", p->split_name_resolved, ext); - if (!fname) - return log_oom(); - - fdt = openat(dir_fd, fname, O_WRONLY|O_NOCTTY|O_CLOEXEC|O_NOFOLLOW|O_CREAT|O_EXCL, 0666); + fdt = open(p->split_path, O_WRONLY|O_NOCTTY|O_CLOEXEC|O_NOFOLLOW|O_CREAT|O_EXCL, 0666); if (fdt < 0) - return log_error_errno(errno, "Failed to open %s: %m", fname); + return log_error_errno(fdt, "Failed to open split partition file %s: %m", p->split_path); if (fd < 0) assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0); @@ -4600,23 +4632,19 @@ static int context_split(Context *context) { r = copy_bytes(fd, fdt, p->new_size, COPY_REFLINK|COPY_HOLES); if (r < 0) - return log_error_errno(r, "Failed to copy to split partition %s: %m", fname); + return log_error_errno(r, "Failed to copy to split partition %s: %m", p->split_path); } return 0; } -static int context_write_partition_table( - Context *context, - const char *node, - bool from_scratch) { - +static int context_write_partition_table(Context *context) { _cleanup_(fdisk_unref_tablep) struct fdisk_table *original_table = NULL; int capable, r; assert(context); - if (!from_scratch && !context_changed(context)) { + if (!context->from_scratch && !context_changed(context)) { log_info("No changes."); return 0; } @@ -4628,7 +4656,7 @@ static int context_write_partition_table( log_info("Applying changes."); - if (from_scratch) { + if (context->from_scratch) { r = context_wipe_range(context, 0, context->total); if (r < 0) return r; @@ -4652,7 +4680,7 @@ static int context_write_partition_table( /* Wipe fs signatures and discard sectors where the new partitions are going to be placed and in the * gaps between partitions, just to be sure. */ - r = context_wipe_and_discard(context, from_scratch); + r = context_wipe_and_discard(context); if (r < 0) return r; @@ -4682,7 +4710,7 @@ static int context_write_partition_table( else if (capable > 0) { log_info("Telling kernel to reread partition table."); - if (from_scratch) + if (context->from_scratch) r = fdisk_reread_partition_table(context->fdisk_context); else r = fdisk_reread_changes(context->fdisk_context, original_table); @@ -4730,7 +4758,7 @@ static int context_read_seed(Context *context, const char *root) { return 0; } -static int context_factory_reset(Context *context, bool from_scratch) { +static int context_factory_reset(Context *context) { size_t n = 0; int r; @@ -4739,7 +4767,7 @@ static int context_factory_reset(Context *context, bool from_scratch) { if (arg_factory_reset <= 0) return 0; - if (from_scratch) /* Nothing to reset if we start from scratch */ + if (context->from_scratch) /* Nothing to reset if we start from scratch */ return 0; if (arg_dry_run) { @@ -4843,12 +4871,14 @@ static int resolve_copy_blocks_auto_candidate( errno = 0; r = blkid_do_safeprobe(b); - if (IN_SET(r, -2, 1)) { /* nothing found or ambiguous result */ + if (r == _BLKID_SAFEPROBE_ERROR) + return log_error_errno(errno_or_else(EIO), "Unable to probe for partition table of '%s': %m", p); + if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND)) { log_debug("Didn't find partition table on block device '%s'.", p); return false; } - if (r != 0) - return log_error_errno(errno_or_else(EIO), "Unable to probe for partition table of '%s': %m", p); + + assert(r == _BLKID_SAFEPROBE_FOUND); (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); if (!streq_ptr(pttype, "gpt")) { @@ -4860,7 +4890,6 @@ static int resolve_copy_blocks_auto_candidate( pl = blkid_probe_get_partitions(b); if (!pl) return log_error_errno(errno_or_else(EIO), "Unable read partition table of '%s': %m", p); - errno = 0; pp = blkid_partlist_devno_to_partition(pl, partition_devno); if (!pp) { @@ -4889,21 +4918,18 @@ static int resolve_copy_blocks_auto_candidate( return false; } - t = blkid_partition_get_uuid(pp); - if (isempty(t)) { - log_debug("Partition %u:%u has no UUID.", - major(partition_devno), minor(partition_devno)); + r = blkid_partition_get_uuid_id128(pp, &u); + if (r == -ENXIO) { + log_debug_errno(r, "Partition " DEVNUM_FORMAT_STR " has no UUID.", DEVNUM_FORMAT_VAL(partition_devno)); return false; } - - r = sd_id128_from_string(t, &u); if (r < 0) { - log_debug_errno(r, "Failed to parse partition UUID \"%s\": %m", t); + log_debug_errno(r, "Failed to read partition UUID of " DEVNUM_FORMAT_STR ": %m", DEVNUM_FORMAT_VAL(partition_devno)); return false; } - log_debug("Automatically found partition %u:%u of right type " SD_ID128_FORMAT_STR ".", - major(partition_devno), minor(partition_devno), + log_debug("Automatically found partition " DEVNUM_FORMAT_STR " of right type " SD_ID128_FORMAT_STR ".", + DEVNUM_FORMAT_VAL(partition_devno), SD_ID128_FORMAT_VAL(pt_parsed)); if (ret_uuid) @@ -5242,6 +5268,7 @@ static int context_minimize(Context *context) { LIST_FOREACH(partitions, p, context->partitions) { _cleanup_(rm_rf_physical_and_freep) char *root = NULL; _cleanup_(unlink_and_freep) char *temp = NULL; + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; _cleanup_close_ int fd = -1; sd_id128_t fs_uuid; uint64_t fsz; @@ -5258,6 +5285,9 @@ static int context_minimize(Context *context) { if (!p->minimize) continue; + if (!partition_needs_populate(p)) + continue; + assert(!p->copy_blocks_path); r = tempfn_random_child(vt, "repart", &temp); @@ -5272,11 +5302,15 @@ static int context_minimize(Context *context) { return log_error_errno(errno, "Failed to open temporary file %s: %m", temp); /* This may seem huge but it will be created sparse so it doesn't take up any space - * on disk until written to. */ + * on disk until written to. */ if (ftruncate(fd, 1024ULL * 1024ULL * 1024ULL * 1024ULL) < 0) return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(1024ULL * 1024ULL * 1024ULL * 1024ULL)); + r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_EX, &d); + if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) + return log_error_errno(r, "Failed to make loopback device of %s: %m", temp); + /* We're going to populate this filesystem twice so use a random UUID the first time * to avoid UUID conflicts. */ r = sd_id128_randomize(&fs_uuid); @@ -5284,13 +5318,18 @@ static int context_minimize(Context *context) { return r; } - if (mkfs_supports_root_option(p->format)) { + if (!d || fstype_is_ro(p->format)) { + if (!mkfs_supports_root_option(p->format)) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), + "Loop device access is required to populate %s filesystems", + p->format); + r = partition_populate_directory(p, denylist, &root); if (r < 0) return r; } - r = make_filesystem(temp, p->format, strempty(p->new_label), root, fs_uuid, arg_discard); + r = make_filesystem(d ? d->node : temp, p->format, strempty(p->new_label), root, fs_uuid, arg_discard); if (r < 0) return r; @@ -5298,11 +5337,14 @@ static int context_minimize(Context *context) { * loopback file for us. */ if (fstype_is_ro(p->format)) { p->copy_blocks_path = TAKE_PTR(temp); + p->copy_blocks_path_is_our_file = true; continue; } - if (!mkfs_supports_root_option(p->format)) { - r = partition_populate_filesystem(p, temp, denylist); + if (!root) { + assert(d); + + r = partition_populate_filesystem(p, d->node, denylist); if (r < 0) return r; } @@ -5327,6 +5369,8 @@ static int context_minimize(Context *context) { if (minimal_size_by_fs_name(p->format) != UINT64_MAX) fsz = MAX(minimal_size_by_fs_name(p->format), fsz); + d = loop_device_unref(d); + /* Erase the previous filesystem first. */ if (ftruncate(fd, 0)) return log_error_errno(errno, "Failed to erase temporary file: %m"); @@ -5334,17 +5378,24 @@ static int context_minimize(Context *context) { if (ftruncate(fd, fsz)) return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(fsz)); - r = make_filesystem(temp, p->format, strempty(p->new_label), root, p->fs_uuid, arg_discard); + r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_EX, &d); + if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) + return log_error_errno(r, "Failed to make loopback device of %s: %m", temp); + + r = make_filesystem(d ? d->node : temp, p->format, strempty(p->new_label), root, p->fs_uuid, arg_discard); if (r < 0) return r; - if (!mkfs_supports_root_option(p->format)) { - r = partition_populate_filesystem(p, temp, denylist); + if (!root) { + assert(d); + + r = partition_populate_filesystem(p, d->node, denylist); if (r < 0) return r; } p->copy_blocks_path = TAKE_PTR(temp); + p->copy_blocks_path_is_our_file = true; } return 0; @@ -5984,12 +6035,11 @@ static int acquire_root_devno( return 0; } -static int find_root(char **ret, int *ret_fd) { +static int find_root(Context *context) { _cleanup_free_ char *device = NULL; int r; - assert(ret); - assert(ret_fd); + assert(context); if (arg_node) { if (arg_empty == EMPTY_CREATE) { @@ -6004,14 +6054,15 @@ static int find_root(char **ret, int *ret_fd) { if (fd < 0) return log_error_errno(errno, "Failed to create '%s': %m", arg_node); - *ret = TAKE_PTR(s); - *ret_fd = TAKE_FD(fd); + context->node = TAKE_PTR(s); + context->node_is_our_file = true; + context->backing_fd = TAKE_FD(fd); return 0; } /* Note that we don't specify a root argument here: if the user explicitly configured a node * we'll take it relative to the host, not the image */ - r = acquire_root_devno(arg_node, NULL, O_RDONLY|O_CLOEXEC, ret, ret_fd); + r = acquire_root_devno(arg_node, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd); if (r == -EUCLEAN) return btrfs_log_dev_root(LOG_ERR, r, arg_node); if (r < 0) @@ -6033,7 +6084,8 @@ static int find_root(char **ret, int *ret_fd) { FOREACH_STRING(p, "/", "/usr") { - r = acquire_root_devno(p, arg_root, O_RDONLY|O_DIRECTORY|O_CLOEXEC, ret, ret_fd); + r = acquire_root_devno(p, arg_root, O_RDONLY|O_DIRECTORY|O_CLOEXEC, &context->node, + &context->backing_fd); if (r < 0) { if (r == -EUCLEAN) return btrfs_log_dev_root(LOG_ERR, r, p); @@ -6045,7 +6097,7 @@ static int find_root(char **ret, int *ret_fd) { } else if (r < 0) return log_error_errno(r, "Failed to read symlink /run/systemd/volatile-root: %m"); else { - r = acquire_root_devno(device, NULL, O_RDONLY|O_CLOEXEC, ret, ret_fd); + r = acquire_root_devno(device, NULL, O_RDONLY|O_CLOEXEC, &context->node, &context->backing_fd); if (r == -EUCLEAN) return btrfs_log_dev_root(LOG_ERR, r, device); if (r < 0) @@ -6065,11 +6117,7 @@ static int resize_pt(int fd) { * possession of the enlarged backing file. For this it suffices to open the device with libfdisk and * immediately write it again, with no changes. */ - c = fdisk_new_context(); - if (!c) - return log_oom(); - - r = fdisk_assign_device(c, FORMAT_PROC_FD_PATH(fd), 0); + r = fdisk_new_context_fd(fd, /* read_only= */ false, &c); if (r < 0) return log_error_errno(r, "Failed to open device '%s': %m", FORMAT_PROC_FD_PATH(fd)); @@ -6255,9 +6303,7 @@ static int run(int argc, char *argv[]) { _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; _cleanup_(umount_and_rmdir_and_freep) char *mounted_dir = NULL; _cleanup_(context_freep) Context* context = NULL; - _cleanup_free_ char *node = NULL; - _cleanup_close_ int backing_fd = -1; - bool from_scratch, node_is_our_loop = false; + bool node_is_our_loop = false; int r; log_show_color(true); @@ -6323,32 +6369,27 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; - if (context->n_partitions <= 0 && arg_empty == EMPTY_REFUSE) { - log_info("Didn't find any partition definition files, nothing to do."); - return 0; - } - - r = find_root(&node, &backing_fd); + r = find_root(context); if (r < 0) return r; if (arg_size != UINT64_MAX) { r = resize_backing_fd( - node, - &backing_fd, + context->node, + &context->backing_fd, node_is_our_loop ? arg_image : NULL, node_is_our_loop ? loop_device : NULL); if (r < 0) return r; } - r = context_load_partition_table(context, node, &backing_fd); + r = context_load_partition_table(context); if (r == -EHWPOISON) return 77; /* Special return value which means "Not GPT, so not doing anything". This isn't * really an error when called at boot. */ if (r < 0) return r; - from_scratch = r > 0; /* Starting from scratch */ + context->from_scratch = r > 0; /* Starting from scratch */ if (arg_can_factory_reset) { r = context_can_factory_reset(context); @@ -6360,7 +6401,7 @@ static int run(int argc, char *argv[]) { return 0; } - r = context_factory_reset(context, from_scratch); + r = context_factory_reset(context); if (r < 0) return r; if (r > 0) { @@ -6371,16 +6412,11 @@ static int run(int argc, char *argv[]) { /* Reload the reduced partition table */ context_unload_partition_table(context); - r = context_load_partition_table(context, node, &backing_fd); + r = context_load_partition_table(context); if (r < 0) return r; } -#if 0 - (void) context_dump_partitions(context, node); - putchar('\n'); -#endif - r = context_read_seed(context, arg_root); if (r < 0) return r; @@ -6413,14 +6449,14 @@ static int run(int argc, char *argv[]) { assert(arg_size != UINT64_MAX); r = resize_backing_fd( - node, - &backing_fd, + context->node, + &context->backing_fd, node_is_our_loop ? arg_image : NULL, node_is_our_loop ? loop_device : NULL); if (r < 0) return r; - r = context_load_partition_table(context, node, &backing_fd); + r = context_load_partition_table(context); if (r < 0) return r; } @@ -6449,9 +6485,9 @@ static int run(int argc, char *argv[]) { /* Now calculate where each new partition gets placed */ context_place_partitions(context); - (void) context_dump(context, node, /*late=*/ false); + (void) context_dump(context, /*late=*/ false); - r = context_write_partition_table(context, node, from_scratch); + r = context_write_partition_table(context); if (r < 0) return r; @@ -6459,7 +6495,12 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; - (void) context_dump(context, node, /*late=*/ true); + (void) context_dump(context, /*late=*/ true); + + context->node = mfree(context->node); + + LIST_FOREACH(partitions, p, context->partitions) + p->split_path = mfree(p->split_path); return 0; } diff --git a/src/portable/portable.c b/src/portable/portable.c index 76af743771..0909e14aab 100644 --- a/src/portable/portable.c +++ b/src/portable/portable.c @@ -375,7 +375,9 @@ static int portable_extract_by_path( DISSECT_IMAGE_REQUIRE_ROOT | DISSECT_IMAGE_DISCARD_ON_LOOP | DISSECT_IMAGE_RELAX_VAR_CHECK | - DISSECT_IMAGE_USR_NO_ROOT, + DISSECT_IMAGE_USR_NO_ROOT | + DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES, &m); if (r == -ENOPKG) sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Couldn't identify a suitable partition table or file system in '%s'.", path); diff --git a/src/pstore/pstore.c b/src/pstore/pstore.c index f8e6582d5b..6c3d577507 100644 --- a/src/pstore/pstore.c +++ b/src/pstore/pstore.c @@ -114,7 +114,7 @@ static int compare_pstore_entries(const PStoreEntry *a, const PStoreEntry *b) { return strcmp(a->dirent.d_name, b->dirent.d_name); } -static int move_file(PStoreEntry *pe, const char *subdir) { +static int move_file(PStoreEntry *pe, const char *subdir1, const char *subdir2) { _cleanup_free_ char *ifd_path = NULL, *ofd_path = NULL; _cleanup_free_ void *field = NULL; const char *suffix, *message; @@ -128,7 +128,7 @@ static int move_file(PStoreEntry *pe, const char *subdir) { if (!ifd_path) return log_oom(); - ofd_path = path_join(arg_archivedir, subdir, pe->dirent.d_name); + ofd_path = path_join(arg_archivedir, subdir1, subdir2, pe->dirent.d_name); if (!ofd_path) return log_oom(); @@ -171,153 +171,115 @@ static int move_file(PStoreEntry *pe, const char *subdir) { return 0; } -static int write_dmesg(const char *dmesg, size_t size, const char *id) { - _cleanup_(unlink_and_freep) char *tmp_path = NULL; +static int append_dmesg(PStoreEntry *pe, const char *subdir1, const char *subdir2) { + /* Append dmesg chunk to end, create if needed */ _cleanup_free_ char *ofd_path = NULL; _cleanup_close_ int ofd = -1; ssize_t wr; - int r; - if (size == 0) - return 0; + assert(pe); - assert(dmesg); + if (pe->content_size == 0) + return 0; - ofd_path = path_join(arg_archivedir, id, "dmesg.txt"); + ofd_path = path_join(arg_archivedir, subdir1, subdir2, "dmesg.txt"); if (!ofd_path) return log_oom(); - ofd = open_tmpfile_linkable(ofd_path, O_CLOEXEC|O_CREAT|O_TRUNC|O_WRONLY, &tmp_path); + ofd = open(ofd_path, O_CREAT|O_NOFOLLOW|O_NOCTTY|O_CLOEXEC|O_APPEND|O_WRONLY, 0640); if (ofd < 0) - return log_error_errno(ofd, "Failed to open temporary file %s: %m", ofd_path); - wr = write(ofd, dmesg, size); + return log_error_errno(ofd, "Failed to open file %s: %m", ofd_path); + wr = write(ofd, pe->content, pe->content_size); if (wr < 0) return log_error_errno(errno, "Failed to store dmesg to %s: %m", ofd_path); - if (wr != (ssize_t)size) - return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to store dmesg to %s. %zu bytes are lost.", ofd_path, size - wr); - r = link_tmpfile(ofd, tmp_path, ofd_path); - if (r < 0) - return log_error_errno(r, "Failed to write temporary file %s: %m", ofd_path); - tmp_path = mfree(tmp_path); + if ((size_t)wr != pe->content_size) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to store dmesg to %s. %zu bytes are lost.", ofd_path, pe->content_size - wr); return 0; } -static void process_dmesg_files(PStoreList *list) { +static int process_dmesg_files(PStoreList *list) { /* Move files, reconstruct dmesg.txt */ - _cleanup_free_ char *dmesg = NULL, *dmesg_id = NULL; - size_t dmesg_size = 0; - bool dmesg_bad = false; - PStoreEntry *pe; + _cleanup_free_ char *erst_subdir = NULL; + uint64_t last_record_id = 0; + + /* When dmesg is written into pstore, it is done so in small chunks, whatever the exchange buffer + * size is with the underlying pstore backend (ie. EFI may be ~2KiB), which means an example + * pstore with approximately 64KB of storage may have up to roughly 32 dmesg files, some likely + * related. + * + * Here we look at the dmesg filename and try to discern if files are part of a related group, + * meaning the same original dmesg. + * + * The dmesg- filename contains the backend-type and the Common Platform Error Record, CPER, + * record id, a 64-bit number. + * + * Files are processed in reverse lexigraphical order so as to properly reconstruct original dmesg.*/ - /* Handle each dmesg file: files processed in reverse - * order so as to properly reconstruct original dmesg */ for (size_t n = list->n_entries; n > 0; n--) { - bool move_file_and_continue = false; - _cleanup_free_ char *pe_id = NULL; + PStoreEntry *pe; char *p; - size_t plen; pe = &list->entries[n-1]; if (pe->handled) continue; - if (!startswith(pe->dirent.d_name, "dmesg-")) - continue; - if (endswith(pe->dirent.d_name, ".enc.z")) /* indicates a problem */ - move_file_and_continue = true; - p = strrchr(pe->dirent.d_name, '-'); - if (!p) - move_file_and_continue = true; - - if (move_file_and_continue) { - /* A dmesg file on which we do NO additional processing */ - (void) move_file(pe, NULL); - continue; - } - - /* See if this file is one of a related group of files - * in order to reconstruct dmesg */ - - /* When dmesg is written into pstore, it is done so in - * small chunks, whatever the exchange buffer size is - * with the underlying pstore backend (ie. EFI may be - * ~2KiB), which means an example pstore with approximately - * 64KB of storage may have up to roughly 32 dmesg files - * that could be related, depending upon the size of the - * original dmesg. - * - * Here we look at the dmesg filename and try to discern - * if files are part of a related group, meaning the same - * original dmesg. - * - * The two known pstore backends are EFI and ERST. These - * backends store data in the Common Platform Error - * Record, CPER, format. The dmesg- filename contains the - * CPER record id, a 64bit number (in decimal notation). - * In Linux, the record id is encoded with two digits for - * the dmesg part (chunk) number and 3 digits for the - * count number. So allowing an additional digit to - * compensate for advancing time, this code ignores the - * last six digits of the filename in determining the - * record id. - * - * For the EFI backend, the record id encodes an id in the - * upper 32 bits, and a timestamp in the lower 32-bits. - * So ignoring the least significant 6 digits has proven - * to generally identify related dmesg entries. */ -#define PSTORE_FILENAME_IGNORE 6 - - /* determine common portion of record id */ - ++p; /* move beyond dmesg- */ - plen = strlen(p); - if (plen > PSTORE_FILENAME_IGNORE) { - pe_id = memdup_suffix0(p, plen - PSTORE_FILENAME_IGNORE); - if (!pe_id) { - log_oom(); - return; - } - } else - pe_id = mfree(pe_id); - - /* Now move file from pstore to archive storage */ - move_file(pe, pe_id); - - if (dmesg_bad) continue; - - /* If the current record id is NOT the same as the - * previous record id, then start a new dmesg.txt file */ - if (!streq_ptr(pe_id, dmesg_id)) { - /* Encountered a new dmesg group, close out old one, open new one */ - (void) write_dmesg(dmesg, dmesg_size, dmesg_id); - dmesg_size = 0; - - /* now point dmesg_id to storage of pe_id */ - free_and_replace(dmesg_id, pe_id); - } - - /* Reconstruction of dmesg is done as a useful courtesy: do not fail, but don't write garbled - * output either. */ - size_t needed = strlen(pe->dirent.d_name) + strlen(":\n") + pe->content_size + 1; - if (!GREEDY_REALLOC(dmesg, dmesg_size + needed)) { - log_oom(); - dmesg_bad = true; + if (!startswith(pe->dirent.d_name, "dmesg-")) continue; - } - - dmesg_size += sprintf(dmesg + dmesg_size, "%s:\n", pe->dirent.d_name); - if (pe->content) { - memcpy(dmesg + dmesg_size, pe->content, pe->content_size); - dmesg_size += pe->content_size; - } - pe_id = mfree(pe_id); + if ((p = startswith(pe->dirent.d_name, "dmesg-efi-"))) { + /* For the EFI backend, the 3 least significant digits of record id encodes a + * "count" number, the next 2 least significant digits for the dmesg part + * (chunk) number, and the remaining digits as the timestamp. See + * linux/drivers/firmware/efi/efi-pstore.c in efi_pstore_write(). */ + _cleanup_free_ char *subdir1 = NULL, *subdir2 = NULL; + size_t plen = strlen(p); + + if (plen < 6) + continue; + + /* Extract base record id */ + subdir1 = strndup(p, plen - 5); + if (!subdir1) + return log_oom(); + /* Extract "count" field */ + subdir2 = strndup(p + plen - 3, 3); + if (!subdir2) + return log_oom(); + + /* Now move file from pstore to archive storage */ + (void) move_file(pe, subdir1, subdir2); + + /* Append to the dmesg */ + (void) append_dmesg(pe, subdir1, subdir2); + } else if ((p = startswith(pe->dirent.d_name, "dmesg-erst-"))) { + /* For the ERST backend, the record is a monotonically increasing number, seeded as + * a timestamp. See linux/drivers/acpi/apei/erst.c in erst_writer(). */ + uint64_t record_id; + + if (safe_atou64(p, &record_id) < 0) + continue; + if (last_record_id - 1 != record_id) + /* A discontinuity in the number has been detected, this current record id + * will become the directory name for all pieces of the dmesg in this + * series. */ + if (free_and_strdup(&erst_subdir, p) < 0) + return log_oom(); + + /* Now move file from pstore to archive storage */ + (void) move_file(pe, erst_subdir, NULL); + + /* Append to the dmesg */ + (void) append_dmesg(pe, erst_subdir, NULL); + + /* Update, but keep erst_subdir for next file */ + last_record_id = record_id; + } else + log_debug("Unknown backend, ignoring \"%s\".", pe->dirent.d_name); } - - if (!dmesg_bad) - (void) write_dmesg(dmesg, dmesg_size, dmesg_id); + return 0; } static int list_files(PStoreList *list, const char *sourcepath) { @@ -393,11 +355,11 @@ static int run(int argc, char *argv[]) { typesafe_qsort(list.entries, list.n_entries, compare_pstore_entries); /* Process known file types */ - process_dmesg_files(&list); + (void) process_dmesg_files(&list); /* Move left over files out of pstore */ for (size_t n = 0; n < list.n_entries; n++) - move_file(&list.entries[n], NULL); + (void) move_file(&list.entries[n], NULL, NULL); return 0; } diff --git a/src/resolve/fuzz-resource-record.c b/src/resolve/fuzz-resource-record.c new file mode 100644 index 0000000000..15c465933d --- /dev/null +++ b/src/resolve/fuzz-resource-record.c @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fd-util.h" +#include "fuzz.h" +#include "memory-util.h" +#include "resolved-dns-packet.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + _cleanup_free_ char *out = NULL; /* out should be freed after f */ + size_t out_size; + _cleanup_fclose_ FILE *f = NULL; + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL, *copy = NULL; + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + + if (outside_size_range(size, 0, DNS_PACKET_SIZE_MAX)) + return 0; + + if (dns_resource_record_new_from_raw(&rr, data, size) < 0) + return 0; + + assert_se(copy = dns_resource_record_copy(rr)); + assert_se(dns_resource_record_equal(copy, rr) > 0); + + assert_se(f = open_memstream_unlocked(&out, &out_size)); + (void) fprintf(f, "%s", strna(dns_resource_record_to_string(rr))); + + if (dns_resource_record_to_json(rr, &v) < 0) + return 0; + + (void) json_variant_dump(v, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR|JSON_FORMAT_SOURCE, f, NULL); + (void) dns_resource_record_to_wire_format(rr, false); + (void) dns_resource_record_to_wire_format(rr, true); + + return 0; +} diff --git a/src/resolve/meson.build b/src/resolve/meson.build index e11aefce7a..cd02c88039 100644 --- a/src/resolve/meson.build +++ b/src/resolve/meson.build @@ -237,6 +237,11 @@ fuzzers += [ libshared], [lib_openssl_or_gcrypt, libm]], + [files('fuzz-resource-record.c'), + [libsystemd_resolve_core, + libshared], + [lib_openssl_or_gcrypt, + libm]], ] systemd_resolved_sources += files('resolved.c') diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c index ff645fc0d7..5889bd772f 100644 --- a/src/resolve/resolvectl.c +++ b/src/resolve/resolvectl.c @@ -480,7 +480,11 @@ static bool single_label_nonsynthetic(const char *name) { if (!dns_name_is_single_label(name)) return false; - if (is_localhost(name) || is_gateway_hostname(name)) + if (is_localhost(name) || + is_gateway_hostname(name) || + is_outbound_hostname(name) || + is_dns_stub_hostname(name) || + is_dns_proxy_stub_hostname(name)) return false; r = resolve_system_hostname(NULL, &first_label); diff --git a/src/resolve/resolved-dns-rr.c b/src/resolve/resolved-dns-rr.c index d47cdbbd8e..f4fa219ab7 100644 --- a/src/resolve/resolved-dns-rr.c +++ b/src/resolve/resolved-dns-rr.c @@ -1137,12 +1137,15 @@ const char *dns_resource_record_to_string(DnsResourceRecord *rr) { break; default: - t = hexmem(rr->generic.data, rr->generic.data_size); - if (!t) - return NULL; - /* Format as documented in RFC 3597, Section 5 */ - r = asprintf(&s, "%s \\# %zu %s", k, rr->generic.data_size, t); + if (rr->generic.data_size == 0) + r = asprintf(&s, "%s \\# 0", k); + else { + t = hexmem(rr->generic.data, rr->generic.data_size); + if (!t) + return NULL; + r = asprintf(&s, "%s \\# %zu %s", k, rr->generic.data_size, t); + } if (r < 0) return NULL; break; diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c index b586d2c56f..635763954b 100644 --- a/src/resolve/resolved-dns-scope.c +++ b/src/resolve/resolved-dns-scope.c @@ -635,8 +635,11 @@ DnsScopeMatch dns_scope_good_domain( if (dns_name_dont_resolve(domain)) return DNS_SCOPE_NO; - /* Never go to network for the _gateway or _outbound domain — they're something special, synthesized locally. */ - if (is_gateway_hostname(domain) || is_outbound_hostname(domain)) + /* Never go to network for the _gateway, _outbound, _localdnsstub, _localdnsproxy domain — they're something special, synthesized locally. */ + if (is_gateway_hostname(domain) || + is_outbound_hostname(domain) || + is_dns_stub_hostname(domain) || + is_dns_proxy_stub_hostname(domain)) return DNS_SCOPE_NO; switch (s->protocol) { @@ -687,7 +690,7 @@ DnsScopeMatch dns_scope_good_domain( } /* If there's a true search domain defined for this scope, and the query is single-label, - * then let's resolve things here, prefereably. Note that LLMNR considers itself + * then let's resolve things here, preferably. Note that LLMNR considers itself * authoritative for single-label names too, at the same preference, see below. */ if (has_search_domains && dns_name_is_single_label(domain)) return DNS_SCOPE_YES_BASE + 1; @@ -764,8 +767,6 @@ DnsScopeMatch dns_scope_good_domain( return DNS_SCOPE_MAYBE; if ((dns_name_is_single_label(domain) && /* only resolve single label names via LLMNR */ - !is_gateway_hostname(domain) && /* don't resolve "_gateway" with LLMNR, let local synthesizing logic handle that */ - !is_outbound_hostname(domain) && /* similar for "_outbound" */ dns_name_equal(domain, "local") == 0 && /* don't resolve "local" with LLMNR, it's the top-level domain of mDNS after all, see above */ manager_is_own_hostname(s->manager, domain) <= 0)) /* never resolve the local hostname via LLMNR */ return DNS_SCOPE_YES_BASE + 1; /* Return +1, as we consider ourselves authoritative @@ -1116,7 +1117,7 @@ DnsTransaction *dns_scope_find_transaction( !(t->query_flags & SD_RESOLVED_NO_CACHE)) continue; - /* If we are asked to clamp ttls an the existing transaction doesn't do it, we can't + /* If we are asked to clamp ttls and the existing transaction doesn't do it, we can't * reuse */ if ((query_flags & SD_RESOLVED_CLAMP_TTL) && !(t->query_flags & SD_RESOLVED_CLAMP_TTL)) diff --git a/src/resolve/resolved-dns-synthesize.c b/src/resolve/resolved-dns-synthesize.c index b3442ad906..51e06bb91e 100644 --- a/src/resolve/resolved-dns-synthesize.c +++ b/src/resolve/resolved-dns-synthesize.c @@ -7,20 +7,6 @@ #include "missing_network.h" #include "resolved-dns-synthesize.h" -int dns_synthesize_ifindex(int ifindex) { - - /* When the caller asked for resolving on a specific - * interface, we synthesize the answer for that - * interface. However, if nothing specific was claimed and we - * only return localhost RRs, we synthesize the answer for - * localhost. */ - - if (ifindex > 0) - return ifindex; - - return LOOPBACK_IFINDEX; -} - int dns_synthesize_family(uint64_t flags) { /* Picks an address family depending on set flags. This is @@ -57,7 +43,7 @@ DnsProtocol dns_synthesize_protocol(uint64_t flags) { return DNS_PROTOCOL_DNS; } -static int synthesize_localhost_rr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) { +static int synthesize_localhost_rr(Manager *m, const DnsResourceKey *key, DnsAnswer **answer) { int r; assert(m); @@ -77,7 +63,7 @@ static int synthesize_localhost_rr(Manager *m, const DnsResourceKey *key, int if rr->a.in_addr.s_addr = htobe32(INADDR_LOOPBACK); - r = dns_answer_add(*answer, rr, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED, NULL); + r = dns_answer_add(*answer, rr, LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED, NULL); if (r < 0) return r; } @@ -91,7 +77,7 @@ static int synthesize_localhost_rr(Manager *m, const DnsResourceKey *key, int if rr->aaaa.in6_addr = in6addr_loopback; - r = dns_answer_add(*answer, rr, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED, NULL); + r = dns_answer_add(*answer, rr, LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED, NULL); if (r < 0) return r; } @@ -113,7 +99,7 @@ static int answer_add_ptr(DnsAnswer **answer, const char *from, const char *to, return dns_answer_add(*answer, rr, ifindex, flags, NULL); } -static int synthesize_localhost_ptr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) { +static int synthesize_localhost_ptr(Manager *m, const DnsResourceKey *key, DnsAnswer **answer) { int r; assert(m); @@ -125,7 +111,7 @@ static int synthesize_localhost_ptr(Manager *m, const DnsResourceKey *key, int i if (r < 0) return r; - r = answer_add_ptr(answer, dns_resource_key_name(key), "localhost", dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED); + r = answer_add_ptr(answer, dns_resource_key_name(key), "localhost", LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED); if (r < 0) return r; } @@ -225,20 +211,19 @@ static int synthesize_system_hostname_rr(Manager *m, const DnsResourceKey *key, if (n == 0) { struct local_address buffer[2]; - /* If we have no local addresses then use ::1 - * and 127.0.0.2 as local ones. */ + /* If we have no local addresses then use ::1 and 127.0.0.2 as local ones. */ if (IN_SET(af, AF_INET, AF_UNSPEC)) buffer[n++] = (struct local_address) { .family = AF_INET, - .ifindex = dns_synthesize_ifindex(ifindex), - .address.in.s_addr = htobe32(0x7F000002), + .ifindex = LOOPBACK_IFINDEX, + .address.in.s_addr = htobe32(INADDR_LOCALADDRESS), }; if (IN_SET(af, AF_INET6, AF_UNSPEC) && socket_ipv6_is_enabled()) buffer[n++] = (struct local_address) { .family = AF_INET6, - .ifindex = dns_synthesize_ifindex(ifindex), + .ifindex = LOOPBACK_IFINDEX, .address.in6 = in6addr_loopback, }; @@ -260,7 +245,7 @@ static int synthesize_system_hostname_ptr(Manager *m, int af, const union in_add assert(address); assert(answer); - if (af == AF_INET && address->in.s_addr == htobe32(0x7F000002)) { + if (af == AF_INET && address->in.s_addr == htobe32(INADDR_LOCALADDRESS)) { /* Always map the IPv4 address 127.0.0.2 to the local hostname, in addition to "localhost": */ @@ -268,19 +253,19 @@ static int synthesize_system_hostname_ptr(Manager *m, int af, const union in_add if (r < 0) return r; - r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->full_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED); + r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->full_hostname, LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED); if (r < 0) return r; - r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->llmnr_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED); + r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->llmnr_hostname, LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED); if (r < 0) return r; - r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->mdns_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED); + r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->mdns_hostname, LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED); if (r < 0) return r; - r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", "localhost", dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED); + r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", "localhost", LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED); if (r < 0) return r; @@ -356,7 +341,90 @@ static int synthesize_gateway_rr( return 1; /* > 0 means: we have some gateway */ } -static int synthesize_gateway_ptr(Manager *m, int af, const union in_addr_union *address, int ifindex, DnsAnswer **answer) { +static int synthesize_dns_stub_rr( + Manager *m, + const DnsResourceKey *key, + in_addr_t addr, + DnsAnswer **answer) { + + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + int r; + + assert(m); + assert(key); + assert(answer); + + if (!IN_SET(key->type, DNS_TYPE_A, DNS_TYPE_ANY)) + return 1; /* we still consider ourselves the owner of this name */ + + r = dns_answer_reserve(answer, 1); + if (r < 0) + return r; + + rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_A, dns_resource_key_name(key)); + if (!rr) + return -ENOMEM; + + rr->a.in_addr.s_addr = htobe32(addr); + + r = dns_answer_add(*answer, rr, LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED, NULL); + if (r < 0) + return r; + + return 1; +} + +static int synthesize_dns_stub_ptr( + Manager *m, + int af, + const union in_addr_union *address, + DnsAnswer **answer) { + + int r; + + assert(m); + assert(address); + assert(answer); + + if (af != AF_INET) + return 0; + + if (address->in.s_addr == htobe32(INADDR_DNS_STUB)) { + + r = dns_answer_reserve(answer, 1); + if (r < 0) + return r; + + r = answer_add_ptr(answer, "53.0.0.127.in-addr.arpa", "_localdnsstub", LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED); + if (r < 0) + return r; + + return 1; + } + + if (address->in.s_addr == htobe32(INADDR_DNS_PROXY_STUB)) { + + r = dns_answer_reserve(answer, 1); + if (r < 0) + return r; + + r = answer_add_ptr(answer, "54.0.0.127.in-addr.arpa", "_localdnsproxy", LOOPBACK_IFINDEX, DNS_ANSWER_AUTHENTICATED); + if (r < 0) + return r; + + return 1; + } + + return 0; +} + +static int synthesize_gateway_ptr( + Manager *m, + int af, + const union in_addr_union *address, + int ifindex, + DnsAnswer **answer) { + _cleanup_free_ struct local_address *addresses = NULL; int n; @@ -405,7 +473,7 @@ int dns_synthesize_answer( } else if (is_localhost(name)) { - r = synthesize_localhost_rr(m, key, ifindex, &answer); + r = synthesize_localhost_rr(m, key, &answer); if (r < 0) return log_error_errno(r, "Failed to synthesize localhost RRs: %m"); @@ -437,15 +505,30 @@ int dns_synthesize_answer( continue; } - } else if ((dns_name_endswith(name, "127.in-addr.arpa") > 0 && dns_name_equal(name, "2.0.0.127.in-addr.arpa") == 0) || + } else if (is_dns_stub_hostname(name)) { + + r = synthesize_dns_stub_rr(m, key, INADDR_DNS_STUB, &answer); + if (r < 0) + return log_error_errno(r, "Failed to synthesize local DNS stub RRs: %m"); + + } else if (is_dns_proxy_stub_hostname(name)) { + + r = synthesize_dns_stub_rr(m, key, INADDR_DNS_PROXY_STUB, &answer); + if (r < 0) + return log_error_errno(r, "Failed to synthesize local DNS stub RRs: %m"); + + } else if ((dns_name_endswith(name, "127.in-addr.arpa") > 0 && + dns_name_equal(name, "2.0.0.127.in-addr.arpa") == 0 && + dns_name_equal(name, "53.0.0.127.in-addr.arpa") == 0 && + dns_name_equal(name, "54.0.0.127.in-addr.arpa") == 0) || dns_name_equal(name, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa") > 0) { - r = synthesize_localhost_ptr(m, key, ifindex, &answer); + r = synthesize_localhost_ptr(m, key, &answer); if (r < 0) return log_error_errno(r, "Failed to synthesize localhost PTR RRs: %m"); } else if (dns_name_address(name, &af, &address) > 0) { - int v, w; + int v, w, u; if (getenv_bool("SYSTEMD_RESOLVED_SYNTHESIZE_HOSTNAME") == 0) continue; @@ -458,7 +541,11 @@ int dns_synthesize_answer( if (w < 0) return log_error_errno(w, "Failed to synthesize gateway hostname PTR RR: %m"); - if (v == 0 && w == 0) /* This IP address is neither a local one nor a gateway */ + u = synthesize_dns_stub_ptr(m, af, &address, &answer); + if (u < 0) + return log_error_errno(u, "Failed to synthesize local stub hostname PTR PR: %m"); + + if (v == 0 && w == 0 && u == 0) /* This IP address is neither a local one, nor a gateway, nor a stub address */ continue; /* Note that we never synthesize reverse PTR for _outbound, since those are local diff --git a/src/resolve/resolved-dns-synthesize.h b/src/resolve/resolved-dns-synthesize.h index fb624589d7..bf271e862d 100644 --- a/src/resolve/resolved-dns-synthesize.h +++ b/src/resolve/resolved-dns-synthesize.h @@ -5,7 +5,6 @@ #include "resolved-dns-question.h" #include "resolved-manager.h" -int dns_synthesize_ifindex(int ifindex); int dns_synthesize_family(uint64_t flags); DnsProtocol dns_synthesize_protocol(uint64_t flags); diff --git a/src/rpm/systemd-update-helper.in b/src/rpm/systemd-update-helper.in index ab8cdc0ff9..b35d952fab 100755 --- a/src/rpm/systemd-update-helper.in +++ b/src/rpm/systemd-update-helper.in @@ -19,21 +19,21 @@ case "$command" in remove-system-units) if [ -d /run/systemd/system ]; then - systemctl --no-reload disable --now "$@" + systemctl --no-reload disable --now --no-warn "$@" else - systemctl --no-reload disable "$@" + systemctl --no-reload disable --no-warn "$@" fi ;; remove-user-units) - systemctl --global disable "$@" + systemctl --global disable --no-warn "$@" [ -d /run/systemd/system ] || exit 0 users=$(systemctl list-units 'user@*' --legend=no | sed -n -r 's/.*user@([0-9]+).service.*/\1/p') for user in $users; do SYSTEMD_BUS_TIMEOUT={{UPDATE_HELPER_USER_TIMEOUT}} \ - systemctl --user -M "$user@" disable --now "$@" & + systemctl --user -M "$user@" disable --now --no-warn "$@" & done wait ;; diff --git a/src/shared/acl-util.c b/src/shared/acl-util.c index c6c673984d..b734ee1e0c 100644 --- a/src/shared/acl-util.c +++ b/src/shared/acl-util.c @@ -90,6 +90,7 @@ int add_base_acls_if_needed(acl_t *acl_p, const char *path) { _cleanup_(acl_freep) acl_t basic = NULL; assert(acl_p); + assert(path); for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i); r > 0; @@ -208,12 +209,16 @@ int acl_search_groups(const char *path, char ***ret_groups) { return ret; } -int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask) { +int parse_acl(const char *text, acl_t *ret_acl_access, acl_t *ret_acl_default, bool want_mask) { _cleanup_free_ char **a = NULL, **d = NULL; /* strings are not freed */ _cleanup_strv_free_ char **split = NULL; int r = -EINVAL; _cleanup_(acl_freep) acl_t a_acl = NULL, d_acl = NULL; + assert(text); + assert(ret_acl_access); + assert(ret_acl_default); + split = strv_split(text, ","); if (!split) return -ENOMEM; @@ -266,8 +271,8 @@ int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want } } - *acl_access = TAKE_PTR(a_acl); - *acl_default = TAKE_PTR(d_acl); + *ret_acl_access = TAKE_PTR(a_acl); + *ret_acl_default = TAKE_PTR(d_acl); return 0; } @@ -322,7 +327,7 @@ static int acl_entry_equal(acl_entry_t a, acl_entry_t b) { } } -static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) { +static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *ret) { acl_entry_t i; int r; @@ -334,36 +339,40 @@ static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) { if (r < 0) return r; if (r > 0) { - *out = i; - return 1; + if (ret) + *ret = i; + return 0; } } if (r < 0) return -errno; - return 0; + + return -ENOENT; } -int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) { - _cleanup_(acl_freep) acl_t old; +int acls_for_file(const char *path, acl_type_t type, acl_t acl, acl_t *ret) { + _cleanup_(acl_freep) acl_t applied = NULL; acl_entry_t i; int r; - old = acl_get_file(path, type); - if (!old) + assert(path); + + applied = acl_get_file(path, type); + if (!applied) return -errno; - for (r = acl_get_entry(new, ACL_FIRST_ENTRY, &i); + for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); r > 0; - r = acl_get_entry(new, ACL_NEXT_ENTRY, &i)) { + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) { acl_entry_t j; - r = find_acl_entry(old, i, &j); - if (r < 0) - return r; - if (r == 0) - if (acl_create_entry(&old, &j) < 0) + r = find_acl_entry(applied, i, &j); + if (r == -ENOENT) { + if (acl_create_entry(&applied, &j) < 0) return -errno; + } else if (r < 0) + return r; if (acl_copy_entry(j, i) < 0) return -errno; @@ -371,7 +380,8 @@ int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) { if (r < 0) return -errno; - *acl = TAKE_PTR(old); + if (ret) + *ret = TAKE_PTR(applied); return 0; } diff --git a/src/shared/acl-util.h b/src/shared/acl-util.h index 03595c6651..d3a341fbe6 100644 --- a/src/shared/acl-util.h +++ b/src/shared/acl-util.h @@ -15,8 +15,8 @@ int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry); int calc_acl_mask_if_needed(acl_t *acl_p); int add_base_acls_if_needed(acl_t *acl_p, const char *path); int acl_search_groups(const char* path, char ***ret_groups); -int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask); -int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl); +int parse_acl(const char *text, acl_t *ret_acl_access, acl_t *ret_acl_default, bool want_mask); +int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *ret); int fd_add_uid_acl_permission(int fd, uid_t uid, unsigned mask); /* acl_free takes multiple argument types. diff --git a/src/shared/blkid-util.h b/src/shared/blkid-util.h index aa444990fd..abc4b6166d 100644 --- a/src/shared/blkid-util.h +++ b/src/shared/blkid-util.h @@ -4,7 +4,44 @@ #if HAVE_BLKID # include <blkid.h> +# include "sd-id128.h" + # include "macro.h" +# include "string-util.h" DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(blkid_probe, blkid_free_probe, NULL); + +static inline int blkid_partition_get_uuid_id128(blkid_partition p, sd_id128_t *ret) { + const char *s; + + assert(p); + + s = blkid_partition_get_uuid(p); + if (isempty(s)) + return -ENXIO; + + return sd_id128_from_string(s, ret); +} + +static inline int blkid_partition_get_type_id128(blkid_partition p, sd_id128_t *ret) { + const char *s; + + assert(p); + + s = blkid_partition_get_type_string(p); + if (isempty(s)) + return -ENXIO; + + return sd_id128_from_string(s, ret); +} + +/* Define symbolic names for blkid_do_safeprobe() return values, since blkid only uses literal numbers. We + * prefix these symbolic definitions with underscores, to not invade libblkid's namespace needlessly. */ +enum { + _BLKID_SAFEPROBE_FOUND = 0, + _BLKID_SAFEPROBE_NOT_FOUND = 1, + _BLKID_SAFEPROBE_AMBIGUOUS = -2, + _BLKID_SAFEPROBE_ERROR = -1, +}; + #endif diff --git a/src/shared/blockdev-util.c b/src/shared/blockdev-util.c index 72fad160ed..eb9f54306f 100644 --- a/src/shared/blockdev-util.c +++ b/src/shared/blockdev-util.c @@ -43,23 +43,7 @@ static int fd_get_devnum(int fd, BlockDeviceLookupFlag flags, dev_t *ret) { /* If major(st.st_dev) is zero, this might mean we are backed by btrfs, which needs special * handing, to get the backing device node. */ - r = fcntl(fd, F_GETFL); - if (r < 0) - return -errno; - - if (FLAGS_SET(r, O_PATH)) { - _cleanup_close_ int regfd = -1; - - /* The fstat() above we can execute on an O_PATH fd. But the btrfs ioctl we cannot. - * Hence acquire a "real" fd first, without the O_PATH flag. */ - - regfd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); - if (regfd < 0) - return regfd; - - r = btrfs_get_block_device_fd(regfd, &devnum); - } else - r = btrfs_get_block_device_fd(fd, &devnum); + r = btrfs_get_block_device_fd(fd, &devnum); if (r == -ENOTTY) /* not btrfs */ return -ENOTBLK; if (r < 0) @@ -288,21 +272,7 @@ int get_block_device_fd(int fd, dev_t *ret) { return 1; } - r = fcntl(fd, F_GETFL); - if (r < 0) - return -errno; - if (FLAGS_SET(r, O_PATH) && (S_ISREG(st.st_mode) || S_ISDIR(st.st_mode))) { - _cleanup_close_ int real_fd = -1; - - /* The fstat() above we can execute on an O_PATH fd. But the btrfs ioctl we cannot. Hence - * acquire a "real" fd first, without the O_PATH flag. */ - - real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC); - if (real_fd < 0) - return real_fd; - r = btrfs_get_block_device_fd(real_fd, ret); - } else - r = btrfs_get_block_device_fd(fd, ret); + r = btrfs_get_block_device_fd(fd, ret); if (r > 0) return 1; if (r != -ENOTTY) /* not btrfs */ diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c index 8ae891e8a0..4cced23adc 100644 --- a/src/shared/bootspec.c +++ b/src/shared/bootspec.c @@ -980,6 +980,8 @@ static int boot_config_find(const BootConfig *config, const char *id) { if (id[0] == '@') { if (!strcaseeq(id, "@saved")) return -1; + if (!config->entry_selected) + return -1; id = config->entry_selected; } @@ -1268,7 +1270,7 @@ static void boot_entry_file_list( int status = chase_symlinks_and_access(p, root, CHASE_PREFIX_ROOT|CHASE_PROHIBIT_SYMLINKS, F_OK, NULL, NULL); /* Note that this shows two '/' between the root and the file. This is intentional to highlight (in - * the abscence of color support) to the user that the boot loader is only interested in the second + * the absence of color support) to the user that the boot loader is only interested in the second * part of the file. */ printf("%13s%s %s%s/%s", strempty(field), field ? ":" : " ", ansi_grey(), root, ansi_normal()); diff --git a/src/shared/btrfs-util.c b/src/shared/btrfs-util.c index 4574a7899e..ba02de17f8 100644 --- a/src/shared/btrfs-util.c +++ b/src/shared/btrfs-util.c @@ -107,19 +107,11 @@ int btrfs_subvol_make_fd(int fd, const char *subvolume) { if (r < 0) return r; - r = fcntl(fd, F_GETFL); - if (r < 0) - return -errno; - if (FLAGS_SET(r, O_PATH)) { - /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with - * O_PATH. */ - - real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY); - if (real_fd < 0) - return real_fd; - - fd = real_fd; - } + /* If an O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal + * with O_PATH. */ + fd = fd_reopen_condition(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY, O_PATH|O_DIRECTORY, &real_fd); + if (fd < 0) + return fd; strncpy(args.name, subvolume, sizeof(args.name)-1); @@ -255,12 +247,17 @@ int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offs int btrfs_get_block_device_fd(int fd, dev_t *dev) { struct btrfs_ioctl_fs_info_args fsi = {}; + _cleanup_close_ int regfd = -1; uint64_t id; int r; assert(fd >= 0); assert(dev); + fd = fd_reopen_condition(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, O_PATH, ®fd); + if (fd < 0) + return fd; + r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC); if (r < 0) return r; @@ -1768,6 +1765,7 @@ int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) { int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) { _cleanup_free_ uint64_t *qgroups = NULL; + _cleanup_close_ int real_fd = -1; uint64_t parent_subvol; bool changed = false; int n = 0, r; @@ -1811,6 +1809,11 @@ int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermed * qgroup that then includes all its own child subvolumes. */ + /* Turn this into a proper fd, if it is currently O_PATH */ + fd = fd_reopen_condition(fd, O_RDONLY|O_CLOEXEC, O_PATH, &real_fd); + if (fd < 0) + return fd; + if (subvol_id == 0) { r = btrfs_is_subvol_fd(fd); if (r < 0) diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 784ae7794d..6b6383b60b 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2143,6 +2143,9 @@ static int bus_append_scope_property(sd_bus_message *m, const char *field, const if (STR_IN_SET(field, "User", "Group")) return bus_append_string(m, field, eq); + if (streq(field, "OOMPolicy")) + return bus_append_string(m, field, eq); + return 0; } diff --git a/src/shared/copy.c b/src/shared/copy.c index 2b492c38a5..e6265e2c96 100644 --- a/src/shared/copy.c +++ b/src/shared/copy.c @@ -486,11 +486,17 @@ static int fd_copy_symlink( if (r < 0) return r; } - r = symlinkat(target, dt, to); + r = RET_NERRNO(symlinkat(target, dt, to)); if (copy_flags & COPY_MAC_CREATE) mac_selinux_create_file_clear(); - if (r < 0) - return -errno; + if (r < 0) { + if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) { + log_notice_errno(r, "Failed to copy symlink '%s', ignoring: %m", from); + return 0; + } + + return r; + } if (fchownat(dt, to, uid_is_valid(override_uid) ? override_uid : st->st_uid, @@ -798,11 +804,17 @@ static int fd_copy_fifo( if (r < 0) return r; } - r = mkfifoat(dt, to, st->st_mode & 07777); + r = RET_NERRNO(mkfifoat(dt, to, st->st_mode & 07777)); if (copy_flags & COPY_MAC_CREATE) mac_selinux_create_file_clear(); - if (r < 0) - return -errno; + if (r < 0) { + if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) { + log_notice_errno(r, "Failed to copy fifo '%s', ignoring: %m", from); + return 0; + } + + return r; + } if (fchownat(dt, to, uid_is_valid(override_uid) ? override_uid : st->st_uid, @@ -846,11 +858,17 @@ static int fd_copy_node( if (r < 0) return r; } - r = mknodat(dt, to, st->st_mode, st->st_rdev); + r = RET_NERRNO(mknodat(dt, to, st->st_mode, st->st_rdev)); if (copy_flags & COPY_MAC_CREATE) mac_selinux_create_file_clear(); - if (r < 0) - return -errno; + if (r < 0) { + if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) { + log_notice_errno(r, "Failed to copy node '%s', ignoring: %m", from); + return 0; + } + + return r; + } if (fchownat(dt, to, uid_is_valid(override_uid) ? override_uid : st->st_uid, diff --git a/src/shared/copy.h b/src/shared/copy.h index d19361c9a2..1eb6d1ce05 100644 --- a/src/shared/copy.h +++ b/src/shared/copy.h @@ -12,21 +12,22 @@ #include "set.h" typedef enum CopyFlags { - COPY_REFLINK = 1 << 0, /* Try to reflink */ - COPY_MERGE = 1 << 1, /* Merge existing trees with our new one to copy */ - COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */ - COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */ - COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */ - COPY_CRTIME = 1 << 5, /* Generate a user.crtime_usec xattr off the source crtime if there is one, on copying */ - COPY_SIGINT = 1 << 6, /* Check for SIGINT regularly and return EINTR if seen (caller needs to block SIGINT) */ - COPY_SIGTERM = 1 << 7, /* ditto, but for SIGTERM */ - COPY_MAC_CREATE = 1 << 8, /* Create files with the correct MAC label (currently SELinux only) */ - COPY_HARDLINKS = 1 << 9, /* Try to reproduce hard links */ - COPY_FSYNC = 1 << 10, /* fsync() after we are done */ - COPY_FSYNC_FULL = 1 << 11, /* fsync_full() after we are done */ - COPY_SYNCFS = 1 << 12, /* syncfs() the *top-level* dir after we are done */ - COPY_ALL_XATTRS = 1 << 13, /* Preserve all xattrs when copying, not just those in the user namespace */ - COPY_HOLES = 1 << 14, /* Copy holes */ + COPY_REFLINK = 1 << 0, /* Try to reflink */ + COPY_MERGE = 1 << 1, /* Merge existing trees with our new one to copy */ + COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */ + COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */ + COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */ + COPY_CRTIME = 1 << 5, /* Generate a user.crtime_usec xattr off the source crtime if there is one, on copying */ + COPY_SIGINT = 1 << 6, /* Check for SIGINT regularly and return EINTR if seen (caller needs to block SIGINT) */ + COPY_SIGTERM = 1 << 7, /* ditto, but for SIGTERM */ + COPY_MAC_CREATE = 1 << 8, /* Create files with the correct MAC label (currently SELinux only) */ + COPY_HARDLINKS = 1 << 9, /* Try to reproduce hard links */ + COPY_FSYNC = 1 << 10, /* fsync() after we are done */ + COPY_FSYNC_FULL = 1 << 11, /* fsync_full() after we are done */ + COPY_SYNCFS = 1 << 12, /* syncfs() the *top-level* dir after we are done */ + COPY_ALL_XATTRS = 1 << 13, /* Preserve all xattrs when copying, not just those in the user namespace */ + COPY_HOLES = 1 << 14, /* Copy holes */ + COPY_GRACEFUL_WARN = 1 << 15, /* Skip copying file types that aren't supported by the target filesystem */ } CopyFlags; typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata); diff --git a/src/shared/cryptsetup-util.c b/src/shared/cryptsetup-util.c index f697429852..c6614d3579 100644 --- a/src/shared/cryptsetup-util.c +++ b/src/shared/cryptsetup-util.c @@ -208,7 +208,7 @@ int dlopen_cryptsetup(void) { /* libcryptsetup added crypt_reencrypt() in 2.2.0, and marked it obsolete in 2.4.0, replacing it with * crypt_reencrypt_run(), which takes one extra argument but is otherwise identical. The old call is * still available though, and given we want to support 2.2.0 for a while longer, we'll stick to the - * old symbol. Howerver, the old symbols now has a GCC deprecation decorator, hence let's turn off + * old symbol. However, the old symbols now has a GCC deprecation decorator, hence let's turn off * warnings about this for now. */ DISABLE_WARNING_DEPRECATED_DECLARATIONS; diff --git a/src/shared/discover-image.c b/src/shared/discover-image.c index 073e5e8433..8bc165ef4c 100644 --- a/src/shared/discover-image.c +++ b/src/shared/discover-image.c @@ -123,6 +123,7 @@ static char *image_roothash_path(Image *image) { static int image_new( ImageType t, + ImageClass c, const char *pretty, const char *path, const char *filename, @@ -146,6 +147,7 @@ static int image_new( *i = (Image) { .n_ref = 1, .type = t, + .class = c, .read_only = read_only, .crtime = crtime, .mtime = mtime, @@ -203,6 +205,7 @@ static int extract_pretty(const char *path, const char *suffix, char **ret) { } static int image_make( + ImageClass c, const char *pretty, int dfd, const char *path, @@ -278,6 +281,7 @@ static int image_make( return r; r = image_new(IMAGE_SUBVOLUME, + c, pretty, path, filename, @@ -314,6 +318,7 @@ static int image_make( /* It's just a normal directory. */ r = image_new(IMAGE_DIRECTORY, + c, pretty, path, filename, @@ -345,6 +350,7 @@ static int image_make( } r = image_new(IMAGE_RAW, + c, pretty, path, filename, @@ -405,6 +411,7 @@ static int image_make( } r = image_new(IMAGE_BLOCK, + c, pretty, path, filename, @@ -475,13 +482,13 @@ int image_find(ImageClass class, if (!S_ISREG(st.st_mode)) continue; - r = image_make(name, dirfd(d), resolved, raw, &st, ret); + r = image_make(class, name, dirfd(d), resolved, raw, &st, ret); } else { if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode)) continue; - r = image_make(name, dirfd(d), resolved, name, &st, ret); + r = image_make(class, name, dirfd(d), resolved, name, &st, ret); } if (IN_SET(r, -ENOENT, -EMEDIUMTYPE)) continue; @@ -495,7 +502,7 @@ int image_find(ImageClass class, } if (class == IMAGE_MACHINE && streq(name, ".host")) { - r = image_make(".host", AT_FDCWD, NULL, empty_to_root(root), NULL, ret); + r = image_make(class, ".host", AT_FDCWD, NULL, empty_to_root(root), NULL, ret); if (r < 0) return r; @@ -515,9 +522,9 @@ int image_from_path(const char *path, Image **ret) { * overridden by another, different image earlier in the search path */ if (path_equal(path, "/")) - return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret); + return image_make(IMAGE_MACHINE, ".host", AT_FDCWD, NULL, "/", NULL, ret); - return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret); + return image_make(_IMAGE_CLASS_INVALID, NULL, AT_FDCWD, NULL, path, NULL, ret); } int image_find_harder(ImageClass class, const char *name_or_path, const char *root, Image **ret) { @@ -591,7 +598,7 @@ int image_discover( if (hashmap_contains(h, pretty)) continue; - r = image_make(pretty, dirfd(d), resolved, de->d_name, &st, &image); + r = image_make(class, pretty, dirfd(d), resolved, de->d_name, &st, &image); if (IN_SET(r, -ENOENT, -EMEDIUMTYPE)) continue; if (r < 0) @@ -610,7 +617,7 @@ int image_discover( if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) { _cleanup_(image_unrefp) Image *image = NULL; - r = image_make(".host", AT_FDCWD, NULL, empty_to_root("/"), NULL, &image); + r = image_make(IMAGE_MACHINE, ".host", AT_FDCWD, NULL, empty_to_root("/"), NULL, &image); if (r < 0) return r; @@ -1201,7 +1208,9 @@ int image_read_metadata(Image *i) { DISSECT_IMAGE_REQUIRE_ROOT | DISSECT_IMAGE_RELAX_VAR_CHECK | DISSECT_IMAGE_READ_ONLY | - DISSECT_IMAGE_USR_NO_ROOT, + DISSECT_IMAGE_USR_NO_ROOT | + DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES, &m); if (r < 0) return r; @@ -1301,3 +1310,11 @@ static const char* const image_type_table[_IMAGE_TYPE_MAX] = { }; DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType); + +static const char* const image_class_table[_IMAGE_CLASS_MAX] = { + [IMAGE_MACHINE] = "machine", + [IMAGE_PORTABLE] = "portable", + [IMAGE_EXTENSION] = "extension", +}; + +DEFINE_STRING_TABLE_LOOKUP(image_class, ImageClass); diff --git a/src/shared/discover-image.h b/src/shared/discover-image.h index 3726e98d30..a8874228dc 100644 --- a/src/shared/discover-image.h +++ b/src/shared/discover-image.h @@ -34,6 +34,7 @@ typedef struct Image { unsigned n_ref; ImageType type; + ImageClass class; char *name; char *path; bool read_only; @@ -76,6 +77,9 @@ int image_read_only(Image *i, bool b); const char* image_type_to_string(ImageType t) _const_; ImageType image_type_from_string(const char *s) _pure_; +const char* image_class_to_string(ImageClass cl) _const_; +ImageClass image_class_from_string(const char *s) _pure_; + int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local); int image_name_lock(const char *name, int operation, LockFile *ret); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 53682b2542..b3d35e9fbf 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -74,9 +74,15 @@ /* how many times to wait for the device nodes to appear */ #define N_DEVICE_NODE_LIST_ATTEMPTS 10 -int probe_filesystem_full(int fd, const char *path, char **ret_fstype) { +int probe_filesystem_full( + int fd, + const char *path, + uint64_t offset, + uint64_t size, + char **ret_fstype) { + /* Try to find device content type and return it in *ret_fstype. If nothing is found, - * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an + * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and a * different error otherwise. */ #if HAVE_BLKID @@ -105,12 +111,19 @@ int probe_filesystem_full(int fd, const char *path, char **ret_fstype) { path = path_by_fd; } + if (size == 0) /* empty size? nothing found! */ + goto not_found; + b = blkid_new_probe(); if (!b) return -ENOMEM; errno = 0; - r = blkid_probe_set_device(b, fd, 0, 0); + r = blkid_probe_set_device( + b, + fd, + offset, + size == UINT64_MAX ? 0 : size); /* when blkid sees size=0 it understands "everything". We prefer using UINT64_MAX for that */ if (r != 0) return errno_or_else(ENOMEM); @@ -119,14 +132,16 @@ int probe_filesystem_full(int fd, const char *path, char **ret_fstype) { errno = 0; r = blkid_do_safeprobe(b); - if (r == 1) + if (r == _BLKID_SAFEPROBE_NOT_FOUND) goto not_found; - if (r == -2) + if (r == _BLKID_SAFEPROBE_AMBIGUOUS) return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "Results ambiguous for partition %s", path); - if (r != 0) + if (r == _BLKID_SAFEPROBE_ERROR) return log_debug_errno(errno_or_else(EIO), "Failed to probe partition %s: %m", path); + assert(r == _BLKID_SAFEPROBE_FOUND); + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); if (fstype) { @@ -152,7 +167,7 @@ not_found: } #if HAVE_BLKID -static int dissected_image_probe_filesystem(DissectedImage *m) { +static int dissected_image_probe_filesystems(DissectedImage *m, int fd) { int r; assert(m); @@ -165,9 +180,14 @@ static int dissected_image_probe_filesystem(DissectedImage *m) { if (!p->found) continue; - if (!p->fstype && p->mount_node_fd >= 0 && !p->decrypted_node) { - r = probe_filesystem_full(p->mount_node_fd, p->node, &p->fstype); - if (r < 0 && r != -EUCLEAN) + if (!p->fstype) { + /* If we have an fd referring to the partition block device, use that. Otherwise go + * via the whole block device or backing regular file, and read via offset. */ + if (p->mount_node_fd >= 0) + r = probe_filesystem_full(p->mount_node_fd, p->node, 0, UINT64_MAX, &p->fstype); + else + r = probe_filesystem_full(fd, p->node, p->offset, p->size, &p->fstype); + if (r < 0) return r; } @@ -439,10 +459,12 @@ static int dissect_image( errno = 0; r = blkid_do_safeprobe(b); - if (IN_SET(r, -2, 1)) - return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table."); - if (r != 0) + if (r == _BLKID_SAFEPROBE_ERROR) return errno_or_else(EIO); + if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND)) + return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table."); + + assert(r == _BLKID_SAFEPROBE_FOUND); if ((!(flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_GENERIC_ROOT)) || @@ -458,7 +480,7 @@ static int dissect_image( _cleanup_close_ int mount_node_fd = -1; sd_id128_t uuid = SD_ID128_NULL; - if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) { + if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) { mount_node_fd = open_partition(devname, /* is_partition = */ false, m->loop); if (mount_node_fd < 0) return mount_node_fd; @@ -491,13 +513,10 @@ static int dissect_image( m->encrypted = streq_ptr(fstype, "crypto_LUKS"); m->has_verity = verity && verity->data_path; - m->verity_ready = m->has_verity && - verity->root_hash && - (verity->designator < 0 || verity->designator == PARTITION_ROOT); + m->verity_ready = verity_settings_data_covers(verity, PARTITION_ROOT); m->has_verity_sig = false; /* signature not embedded, must be specified */ - m->verity_sig_ready = m->verity_ready && - verity->root_hash_sig; + m->verity_sig_ready = m->verity_ready && verity->root_hash_sig; m->image_uuid = uuid; @@ -539,7 +558,7 @@ static int dissect_image( if (verity && verity->data_path) return -EBADR; - if (FLAGS_SET(flags, DISSECT_IMAGE_MANAGE_PARTITION_DEVICES)) { + if (FLAGS_SET(flags, DISSECT_IMAGE_ADD_PARTITION_DEVICES)) { /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't * do partition scanning. */ r = blockdev_partscan_enabled(fd); @@ -615,7 +634,7 @@ static int dissect_image( * Kernel returns EBUSY if there's already a partition by that number or an overlapping * partition already existent. */ - if (FLAGS_SET(flags, DISSECT_IMAGE_MANAGE_PARTITION_DEVICES)) { + if (FLAGS_SET(flags, DISSECT_IMAGE_ADD_PARTITION_DEVICES)) { r = block_device_add_partition(fd, node, nr, (uint64_t) start * 512, (uint64_t) size * 512); if (r < 0) { if (r != -EBUSY) @@ -627,39 +646,32 @@ static int dissect_image( } if (is_gpt) { - const char *stype, *sid, *fstype = NULL, *label; + const char *fstype = NULL, *label; sd_id128_t type_id, id; GptPartitionType type; bool rw = true, growfs = false; - sid = blkid_partition_get_uuid(pp); - if (!sid) - continue; - if (sd_id128_from_string(sid, &id) < 0) + r = blkid_partition_get_uuid_id128(pp, &id); + if (r < 0) { + log_debug_errno(r, "Failed to read partition UUID, ignoring: %m"); continue; + } - stype = blkid_partition_get_type_string(pp); - if (!stype) - continue; - if (sd_id128_from_string(stype, &type_id) < 0) + r = blkid_partition_get_type_id128(pp, &type_id); + if (r < 0) { + log_debug_errno(r, "Failed to read partition type UUID, ignoring: %m"); continue; + } type = gpt_partition_type_from_uuid(type_id); label = blkid_partition_get_name(pp); /* libblkid returns NULL here if empty */ - if (type.designator == PARTITION_HOME) { - - check_partition_flags(node, pflags, - SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS); - - if (pflags & SD_GPT_FLAG_NO_AUTO) - continue; - - rw = !(pflags & SD_GPT_FLAG_READ_ONLY); - growfs = FLAGS_SET(pflags, SD_GPT_FLAG_GROWFS); - - } else if (type.designator == PARTITION_SRV) { + if (IN_SET(type.designator, + PARTITION_HOME, + PARTITION_SRV, + PARTITION_XBOOTLDR, + PARTITION_TMP)) { check_partition_flags(node, pflags, SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS); @@ -682,17 +694,6 @@ static int dissect_image( fstype = "vfat"; - } else if (type.designator == PARTITION_XBOOTLDR) { - - check_partition_flags(node, pflags, - SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS); - - if (pflags & SD_GPT_FLAG_NO_AUTO) - continue; - - rw = !(pflags & SD_GPT_FLAG_READ_ONLY); - growfs = FLAGS_SET(pflags, SD_GPT_FLAG_GROWFS); - } else if (type.designator == PARTITION_ROOT) { check_partition_flags(node, pflags, @@ -811,6 +812,8 @@ static int dissect_image( if (pflags & SD_GPT_FLAG_NO_AUTO) continue; + fstype = "swap"; + /* We don't have a designator for SD_GPT_LINUX_GENERIC so check the UUID instead. */ } else if (sd_id128_equal(type.uuid, SD_GPT_LINUX_GENERIC)) { @@ -832,17 +835,6 @@ static int dissect_image( return -ENOMEM; } - } else if (type.designator == PARTITION_TMP) { - - check_partition_flags(node, pflags, - SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS); - - if (pflags & SD_GPT_FLAG_NO_AUTO) - continue; - - rw = !(pflags & SD_GPT_FLAG_READ_ONLY); - growfs = FLAGS_SET(pflags, SD_GPT_FLAG_GROWFS); - } else if (type.designator == PARTITION_VAR) { check_partition_flags(node, pflags, @@ -867,7 +859,9 @@ static int dissect_image( return r; if (!sd_id128_equal(var_uuid, id)) { - log_debug("Found a /var/ partition, but its UUID didn't match our expectations, ignoring."); + log_debug("Found a /var/ partition, but its UUID didn't match our expectations " + "(found: " SD_ID128_UUID_FORMAT_STR ", expected: " SD_ID128_UUID_FORMAT_STR "), ignoring.", + SD_ID128_FORMAT_VAL(id), SD_ID128_FORMAT_VAL(var_uuid)); continue; } } @@ -897,7 +891,8 @@ static int dissect_image( dissected_partition_done(m->partitions + type.designator); } - if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) { + if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES) && + type.designator != PARTITION_SWAP) { mount_node_fd = open_partition(node, /* is_partition = */ true, m->loop); if (mount_node_fd < 0) return mount_node_fd; @@ -936,6 +931,7 @@ static int dissect_image( .mount_node_fd = TAKE_FD(mount_node_fd), .offset = (uint64_t) start * 512, .size = (uint64_t) size * 512, + .gpt_flags = pflags, }; } @@ -965,21 +961,19 @@ static int dissect_image( _cleanup_close_ int mount_node_fd = -1; _cleanup_free_ char *o = NULL; sd_id128_t id = SD_ID128_NULL; - const char *sid, *options = NULL; + const char *options = NULL; /* First one wins */ if (m->partitions[PARTITION_XBOOTLDR].found) continue; - if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) { + if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) { mount_node_fd = open_partition(node, /* is_partition = */ true, m->loop); if (mount_node_fd < 0) return mount_node_fd; } - sid = blkid_partition_get_uuid(pp); - if (sid) - (void) sd_id128_from_string(sid, &id); + (void) blkid_partition_get_uuid_id128(pp, &id); options = mount_options_from_designator(mount_options, PARTITION_XBOOTLDR); if (options) { @@ -1053,7 +1047,7 @@ static int dissect_image( _cleanup_free_ char *o = NULL; const char *options; - if (FLAGS_SET(flags, DISSECT_IMAGE_OPEN_PARTITION_DEVICES)) { + if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) { mount_node_fd = open_partition(generic_node, /* is_partition = */ true, m->loop); if (mount_node_fd < 0) return mount_node_fd; @@ -1141,6 +1135,10 @@ static int dissect_image( } } + r = dissected_image_probe_filesystems(m, fd); + if (r < 0) + return r; + return 0; } #endif @@ -1158,7 +1156,6 @@ int dissect_image_file( int r; assert(path); - assert((flags & DISSECT_IMAGE_BLOCK_DEVICE) == 0); assert(ret); fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); @@ -2265,7 +2262,7 @@ int dissected_image_decrypt( } if (!p->decrypted_fstype && p->mount_node_fd >= 0 && p->decrypted_node) { - r = probe_filesystem_full(p->mount_node_fd, p->decrypted_node, &p->decrypted_fstype); + r = probe_filesystem_full(p->mount_node_fd, p->decrypted_node, 0, UINT64_MAX, &p->decrypted_fstype); if (r < 0 && r != -EUCLEAN) return r; } @@ -2973,11 +2970,7 @@ int dissect_loop_device( m->loop = loop_device_ref(loop); - r = dissect_image(m, loop->fd, loop->node, verity, mount_options, flags | DISSECT_IMAGE_BLOCK_DEVICE); - if (r < 0) - return r; - - r = dissected_image_probe_filesystem(m); + r = dissect_image(m, loop->fd, loop->node, verity, mount_options, flags); if (r < 0) return r; @@ -3136,6 +3129,10 @@ int mount_image_privately_interactively( assert(ret_directory); assert(ret_loop_device); + /* We intend to mount this right-away, hence add the partitions if needed and pin them*/ + flags |= DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES; + r = verity_settings_load(&verity, image, NULL, NULL); if (r < 0) return log_error_errno(r, "Failed to load root hash data: %m"); @@ -3230,7 +3227,9 @@ int verity_dissect_and_mount( return log_debug_errno(r, "Failed to load root hash: %m"); dissect_image_flags = (verity.data_path ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0) | - (relax_extension_release_check ? DISSECT_IMAGE_RELAX_SYSEXT_CHECK : 0); + (relax_extension_release_check ? DISSECT_IMAGE_RELAX_SYSEXT_CHECK : 0) | + DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES; /* Note that we don't use loop_device_make here, as the FD is most likely O_PATH which would not be * accepted by LOOP_CONFIGURE, so just let loop_device_make_by_path reopen it as a regular FD. */ diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 1a398010b5..059b9aecbb 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -33,6 +33,7 @@ struct DissectedPartition { int mount_node_fd; uint64_t size; uint64_t offset; + uint64_t gpt_flags; }; #define DISSECTED_PARTITION_NULL \ @@ -74,10 +75,8 @@ typedef enum DissectImageFlags { DISSECT_IMAGE_MOUNT_READ_ONLY, DISSECT_IMAGE_GROWFS = 1 << 18, /* Grow file systems in partitions marked for that to the size of the partitions after mount */ DISSECT_IMAGE_MOUNT_IDMAPPED = 1 << 19, /* Mount mounts with kernel 5.12-style userns ID mapping, if file system type doesn't support uid=/gid= */ - DISSECT_IMAGE_MANAGE_PARTITION_DEVICES = 1 << 20, /* Manage partition devices, e.g. probe each partition in more detail */ - DISSECT_IMAGE_OPEN_PARTITION_DEVICES = 1 << 21, /* Open dissected partitions and decrypted partitions */ - DISSECT_IMAGE_BLOCK_DEVICE = DISSECT_IMAGE_MANAGE_PARTITION_DEVICES | - DISSECT_IMAGE_OPEN_PARTITION_DEVICES, + DISSECT_IMAGE_ADD_PARTITION_DEVICES = 1 << 20, /* Create partition devices via BLKPG_ADD_PARTITION */ + DISSECT_IMAGE_PIN_PARTITION_DEVICES = 1 << 21, /* Open dissected partitions and decrypted partitions and pin them by fd */ DISSECT_IMAGE_RELAX_SYSEXT_CHECK = 1 << 22, /* Don't insist that the extension-release file name matches the image name */ } DissectImageFlags; @@ -135,9 +134,9 @@ MountOptions* mount_options_free_all(MountOptions *options); DEFINE_TRIVIAL_CLEANUP_FUNC(MountOptions*, mount_options_free_all); const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator); -int probe_filesystem_full(int fd, const char *path, char **ret_fstype); +int probe_filesystem_full(int fd, const char *path, uint64_t offset, uint64_t size, char **ret_fstype); static inline int probe_filesystem(const char *path, char **ret_fstype) { - return probe_filesystem_full(-1, path, ret_fstype); + return probe_filesystem_full(-1, path, 0, UINT64_MAX, ret_fstype); } int dissect_image_file( const char *path, @@ -167,6 +166,14 @@ int dissected_image_relinquish(DissectedImage *m); int verity_settings_load(VeritySettings *verity, const char *image, const char *root_hash_path, const char *root_hash_sig_path); void verity_settings_done(VeritySettings *verity); +static inline bool verity_settings_data_covers(const VeritySettings *verity, PartitionDesignator d) { + /* Returns true if the verity settings contain sufficient information to cover the specified partition */ + return verity && + ((d >= 0 && verity->designator == d) || (d == PARTITION_ROOT && verity->designator < 0)) && + verity->root_hash && + verity->data_path; +} + int dissected_image_load_verity_sig_partition(DissectedImage *m, int fd, VeritySettings *verity); bool dissected_image_verity_candidate(const DissectedImage *image, PartitionDesignator d); diff --git a/src/shared/fdisk-util.c b/src/shared/fdisk-util.c new file mode 100644 index 0000000000..eeed1840aa --- /dev/null +++ b/src/shared/fdisk-util.c @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fd-util.h" +#include "fdisk-util.h" + +#if HAVE_LIBFDISK + +int fdisk_new_context_fd(int fd, bool read_only, struct fdisk_context **ret) { + _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL; + int r; + + assert(ret); + + if (fd < 0) + return -EBADF; + + c = fdisk_new_context(); + if (!c) + return -ENOMEM; + + r = fdisk_assign_device(c, FORMAT_PROC_FD_PATH(fd), read_only); + if (r < 0) + return r; + + *ret = TAKE_PTR(c); + return 0; +} + +int fdisk_partition_get_uuid_as_id128(struct fdisk_partition *p, sd_id128_t *ret) { + const char *ids; + + assert(p); + assert(ret); + + ids = fdisk_partition_get_uuid(p); + if (!ids) + return -ENXIO; + + return sd_id128_from_string(ids, ret); +} + +int fdisk_partition_get_type_as_id128(struct fdisk_partition *p, sd_id128_t *ret) { + struct fdisk_parttype *pt; + const char *pts; + + assert(p); + assert(ret); + + pt = fdisk_partition_get_type(p); + if (!pt) + return -ENXIO; + + pts = fdisk_parttype_get_string(pt); + if (!pts) + return -ENXIO; + + return sd_id128_from_string(pts, ret); +} + +#endif diff --git a/src/shared/fdisk-util.h b/src/shared/fdisk-util.h index 64c0c2f324..7f34a042ec 100644 --- a/src/shared/fdisk-util.h +++ b/src/shared/fdisk-util.h @@ -5,6 +5,8 @@ #include <libfdisk.h> +#include "sd-id128.h" + #include "macro.h" DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct fdisk_context*, fdisk_unref_context, NULL); @@ -12,4 +14,9 @@ DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct fdisk_partition*, fdisk_unref_partition, DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct fdisk_parttype*, fdisk_unref_parttype, NULL); DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct fdisk_table*, fdisk_unref_table, NULL); +int fdisk_new_context_fd(int fd, bool read_only, struct fdisk_context **ret); + +int fdisk_partition_get_uuid_as_id128(struct fdisk_partition *p, sd_id128_t *ret); +int fdisk_partition_get_type_as_id128(struct fdisk_partition *p, sd_id128_t *ret); + #endif diff --git a/src/shared/find-esp.c b/src/shared/find-esp.c index fa234c8b5f..e019b81620 100644 --- a/src/shared/find-esp.c +++ b/src/shared/find-esp.c @@ -564,19 +564,21 @@ static int verify_xbootldr_blkid( errno = 0; b = blkid_new_probe_from_filename(node); if (!b) - return log_error_errno(errno ?: SYNTHETIC_ERRNO(ENOMEM), "%s: Failed to create blkid probe: %m", node); + return log_error_errno(errno_or_else(ENOMEM), "%s: Failed to create blkid probe: %m", node); blkid_probe_enable_partitions(b, 1); blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); errno = 0; r = blkid_do_safeprobe(b); - if (r == -2) + if (r == _BLKID_SAFEPROBE_AMBIGUOUS) return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "%s: File system is ambiguous.", node); - else if (r == 1) + if (r == _BLKID_SAFEPROBE_NOT_FOUND) return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "%s: File system does not contain a label.", node); - else if (r != 0) - return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "%s: Failed to probe file system: %m", node); + if (r == _BLKID_SAFEPROBE_ERROR) + return log_error_errno(errno_or_else(EIO), "%s: Failed to probe file system: %m", node); + + assert(r == _BLKID_SAFEPROBE_FOUND); r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &type, NULL); if (r != 0) @@ -588,7 +590,7 @@ static int verify_xbootldr_blkid( errno = 0; r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL); if (r != 0) - return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "%s: Failed to probe PART_ENTRY_TYPE: %m", node); + return log_error_errno(errno_or_else(EIO), "%s: Failed to probe PART_ENTRY_TYPE: %m", node); if (sd_id128_string_equal(v, SD_GPT_XBOOTLDR) <= 0) return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), @@ -597,7 +599,7 @@ static int verify_xbootldr_blkid( errno = 0; r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL); if (r != 0) - return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "%s: Failed to probe PART_ENTRY_UUID: %m", node); + return log_error_errno(errno_or_else(EIO), "%s: Failed to probe PART_ENTRY_UUID: %m", node); r = sd_id128_from_string(v, &uuid); if (r < 0) return log_error_errno(r, "%s: Partition has invalid UUID PART_ENTRY_TYPE=%s: %m", node, v); @@ -607,7 +609,7 @@ static int verify_xbootldr_blkid( errno = 0; r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL); if (r != 0) - return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "%s: Failed to probe PART_ENTRY_TYPE: %m", node); + return log_error_errno(errno_or_else(EIO), "%s: Failed to probe PART_ENTRY_TYPE: %m", node); if (!streq(v, "0xea")) return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), diff --git a/src/shared/gpt.c b/src/shared/gpt.c index 99795530bd..3e1a385124 100644 --- a/src/shared/gpt.c +++ b/src/shared/gpt.c @@ -58,6 +58,33 @@ PartitionDesignator partition_verity_sig_of(PartitionDesignator p) { } } +PartitionDesignator partition_verity_to_data(PartitionDesignator d) { + switch (d) { + + case PARTITION_ROOT_VERITY: + return PARTITION_ROOT; + + case PARTITION_USR_VERITY: + return PARTITION_USR; + + default: + return _PARTITION_DESIGNATOR_INVALID; + } +} + +PartitionDesignator partition_verity_sig_to_data(PartitionDesignator d) { + switch (d) { + + case PARTITION_ROOT_VERITY_SIG: + return PARTITION_ROOT; + + case PARTITION_USR_VERITY_SIG: + return PARTITION_USR; + + default: + return _PARTITION_DESIGNATOR_INVALID; + } +} static const char *const partition_designator_table[_PARTITION_DESIGNATOR_MAX] = { [PARTITION_ROOT] = "root", diff --git a/src/shared/gpt.h b/src/shared/gpt.h index 03af12c9e3..bebfbc6116 100644 --- a/src/shared/gpt.h +++ b/src/shared/gpt.h @@ -32,6 +32,8 @@ bool partition_designator_is_versioned(PartitionDesignator d); PartitionDesignator partition_verity_of(PartitionDesignator p); PartitionDesignator partition_verity_sig_of(PartitionDesignator p); +PartitionDesignator partition_verity_to_data(PartitionDesignator d); +PartitionDesignator partition_verity_sig_to_data(PartitionDesignator d); const char* partition_designator_to_string(PartitionDesignator d) _const_; PartitionDesignator partition_designator_from_string(const char *name) _pure_; diff --git a/src/shared/install.c b/src/shared/install.c index 51aa60bb52..c38ba1bd7a 100644 --- a/src/shared/install.c +++ b/src/shared/install.c @@ -2790,25 +2790,38 @@ static int do_unit_file_disable( _cleanup_(install_context_done) InstallContext ctx = { .scope = scope }; _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + InstallInfo *info; + bool has_install_info = false; int r; STRV_FOREACH(name, names) { if (!unit_name_is_valid(*name, UNIT_NAME_ANY)) return install_changes_add(changes, n_changes, -EUCLEAN, *name, NULL); - r = install_info_add(&ctx, *name, NULL, lp->root_dir, /* auxiliary= */ false, NULL); + r = install_info_add(&ctx, *name, NULL, lp->root_dir, /* auxiliary= */ false, &info); + if (r >= 0) + r = install_info_traverse(&ctx, lp, info, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, NULL); + if (r < 0) - return r; + return install_changes_add(changes, n_changes, r, *name, NULL); + + /* If we enable multiple units, some with install info and others without, + * the "empty [Install] section" warning is not shown. Let's make the behavior + * of disable align with that. */ + has_install_info = has_install_info || install_info_has_rules(info) || install_info_has_also(info); } r = install_context_mark_for_removal(&ctx, lp, &remove_symlinks_to, config_path, changes, n_changes); + if (r >= 0) + r = remove_marked_symlinks(remove_symlinks_to, config_path, lp, flags & UNIT_FILE_DRY_RUN, changes, n_changes); + if (r < 0) return r; - return remove_marked_symlinks(remove_symlinks_to, config_path, lp, flags & UNIT_FILE_DRY_RUN, changes, n_changes); + /* The warning is shown only if it's a no-op */ + return install_changes_have_modification(*changes, *n_changes) || has_install_info; } - int unit_file_disable( LookupScope scope, UnitFileFlags flags, diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c index 1f0b0b4730..fb0b2f5adc 100644 --- a/src/shared/machine-pool.c +++ b/src/shared/machine-pool.c @@ -22,7 +22,7 @@ static int check_btrfs(void) { return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC); } -int setup_machine_directory(sd_bus_error *error) { +int setup_machine_directory(sd_bus_error *error, bool use_btrfs_subvol, bool use_btrfs_quota) { int r; r = check_btrfs(); @@ -31,8 +31,14 @@ int setup_machine_directory(sd_bus_error *error) { if (r == 0) return 0; + if (!use_btrfs_subvol) + return 0; + (void) btrfs_subvol_make_label("/var/lib/machines"); + if (!use_btrfs_quota) + return 0; + r = btrfs_quota_enable("/var/lib/machines", true); if (r < 0) log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m"); @@ -41,5 +47,5 @@ int setup_machine_directory(sd_bus_error *error) { if (r < 0) log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m"); - return 1; + return 0; } diff --git a/src/shared/machine-pool.h b/src/shared/machine-pool.h index 3f528ab060..c57e47878f 100644 --- a/src/shared/machine-pool.h +++ b/src/shared/machine-pool.h @@ -5,4 +5,4 @@ #include "sd-bus.h" -int setup_machine_directory(sd_bus_error *error); +int setup_machine_directory(sd_bus_error *error, bool use_btrfs_subvol, bool use_btrfs_quota); diff --git a/src/shared/meson.build b/src/shared/meson.build index 5f66b865de..3be7ba17bf 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -125,7 +125,6 @@ shared_sources = files( 'exit-status.h', 'extension-release.c', 'extension-release.h', - 'fdisk-util.h', 'fdset.c', 'fdset.h', 'fileio-label.c', @@ -493,3 +492,18 @@ libshared = shared_library( dependencies : libshared_deps, install : true, install_dir : rootpkglibdir) + +shared_fdisk_sources = files( + 'fdisk-util.h', + 'fdisk-util.c', +) + +if get_option('fdisk') != 'false' + libshared_fdisk = static_library( + 'shared-fdisk', + shared_fdisk_sources, + include_directories : includes, + dependencies : [libfdisk], + c_args : ['-fvisibility=default'], + build_by_default : false) +endif diff --git a/src/shared/mkfs-util.c b/src/shared/mkfs-util.c index 3edeaa5285..68e8f12eab 100644 --- a/src/shared/mkfs-util.c +++ b/src/shared/mkfs-util.c @@ -164,7 +164,9 @@ static int do_mcopy(const char *node, const char *root) { return log_error_errno(r, "Failed to read '%s' contents: %m", root); for (size_t i = 0; i < de->n_entries; i++) { - char *p = path_join(root, de->entries[i]->d_name); + _cleanup_free_ char *p = NULL; + + p = path_join(root, de->entries[i]->d_name); if (!p) return log_oom(); diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 681d698800..adb6b6dd27 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -36,6 +36,7 @@ #include "set.h" #include "stat-util.h" #include "stdio-util.h" +#include "string-table.h" #include "string-util.h" #include "strv.h" #include "tmpfile-util.h" @@ -475,47 +476,41 @@ int bind_remount_one_with_mountinfo( return 0; } -int mount_move_root(const char *path) { - assert(path); +static const char *const mount_attr_propagation_type_table[_MOUNT_ATTR_PROPAGATION_TYPE_MAX] = { + [MOUNT_ATTR_PROPAGATION_INHERIT] = "inherited", + [MOUNT_ATTR_PROPAGATION_PRIVATE] = "private", + [MOUNT_ATTR_PROPAGATION_DEPENDENT] = "dependent", + [MOUNT_ATTR_PROPAGATION_SHARED] = "shared", +}; - if (chdir(path) < 0) - return -errno; +DEFINE_STRING_TABLE_LOOKUP(mount_attr_propagation_type, MountAttrPropagationType); - if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) - return -errno; - - if (chroot(".") < 0) - return -errno; - - return RET_NERRNO(chdir("/")); +unsigned int mount_attr_propagation_type_to_flag(MountAttrPropagationType t) { + switch (t) { + case MOUNT_ATTR_PROPAGATION_INHERIT: + return 0; + case MOUNT_ATTR_PROPAGATION_PRIVATE: + return MS_PRIVATE; + case MOUNT_ATTR_PROPAGATION_DEPENDENT: + return MS_SLAVE; + case MOUNT_ATTR_PROPAGATION_SHARED: + return MS_SHARED; + default: + assert_not_reached(); + } } -int mount_pivot_root(const char *path) { - _cleanup_close_ int fd_oldroot = -EBADF, fd_newroot = -EBADF; - - assert(path); - - /* pivot_root() isn't currently supported in the initramfs. */ - if (in_initrd()) - return mount_move_root(path); +static inline int mount_switch_root_pivot(const char *path, int fd_newroot) { + _cleanup_close_ int fd_oldroot = -EBADF; fd_oldroot = open("/", O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); if (fd_oldroot < 0) return log_debug_errno(errno, "Failed to open old rootfs"); - fd_newroot = open(path, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); - if (fd_newroot < 0) - return log_debug_errno(errno, "Failed to open new rootfs '%s': %m", path); - - /* Change into the new rootfs. */ - if (fchdir(fd_newroot) < 0) - return log_debug_errno(errno, "Failed to change into new rootfs '%s': %m", path); - /* Let the kernel tuck the new root under the old one. */ if (pivot_root(".", ".") < 0) return log_debug_errno(errno, "Failed to pivot root to new rootfs '%s': %m", path); - /* At this point the new root is tucked under the old root. If we want * to unmount it we cannot be fchdir()ed into it. So escape back to the * old root. */ @@ -535,6 +530,52 @@ int mount_pivot_root(const char *path) { return 0; } +static inline int mount_switch_root_move(const char *path) { + if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) + return log_debug_errno(errno, "Failed to move new rootfs '%s': %m", path); + + if (chroot(".") < 0) + return log_debug_errno(errno, "Failed to chroot to new rootfs '%s': %m", path); + + if (chdir("/")) + return log_debug_errno(errno, "Failed to chdir to new rootfs '%s': %m", path); + + return 0; +} + +int mount_switch_root(const char *path, MountAttrPropagationType type) { + int r; + _cleanup_close_ int fd_newroot = -EBADF; + unsigned int flags; + + assert(path); + + fd_newroot = open(path, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (fd_newroot < 0) + return log_debug_errno(errno, "Failed to open new rootfs '%s': %m", path); + + /* Change into the new rootfs. */ + if (fchdir(fd_newroot) < 0) + return log_debug_errno(errno, "Failed to change into new rootfs '%s': %m", path); + + r = mount_switch_root_pivot(path, fd_newroot); + if (r < 0) { + /* Failed to pivot_root() fallback to MS_MOVE. For example, this may happen if the + * rootfs is an initramfs in which case pivot_root() isn't supported. */ + log_debug_errno(r, "Failed to pivot into new rootfs '%s': %m", path); + r = mount_switch_root_move(path); + } + if (r < 0) + return log_debug_errno(r, "Failed to switch to new rootfs '%s': %m", path); + + /* Finally, let's establish the requested propagation type. */ + flags = mount_attr_propagation_type_to_flag(type); + if ((flags != 0) && mount(NULL, ".", NULL, flags|MS_REC, 0) < 0) + return log_debug_errno(errno, "Failed to turn new rootfs '%s' into %s mount: %m", + mount_attr_propagation_type_to_string(type), path); + + return 0; +} int repeat_unmount(const char *path, int flags) { bool done = false; diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 29b9ed02f7..56b1c3669c 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -11,6 +11,20 @@ #include "errno-util.h" #include "macro.h" +typedef enum MountAttrPropagationType { + MOUNT_ATTR_PROPAGATION_INHERIT, /* no special MS_* propagation flags */ + MOUNT_ATTR_PROPAGATION_PRIVATE, /* MS_PRIVATE */ + MOUNT_ATTR_PROPAGATION_DEPENDENT, /* MS_SLAVE */ + MOUNT_ATTR_PROPAGATION_SHARED, /* MS_SHARE */ + + _MOUNT_ATTR_PROPAGATION_TYPE_MAX, + _MOUNT_ATTR_PROPAGATION_TYPE_INVALID = -EINVAL, +} MountAttrPropagationType; + +const char* mount_attr_propagation_type_to_string(MountAttrPropagationType t) _const_; +MountAttrPropagationType mount_attr_propagation_type_from_string(const char *s) _pure_; +unsigned int mount_attr_propagation_type_to_flag(MountAttrPropagationType t); + /* The limit used for /dev itself. 4MB should be enough since device nodes and symlinks don't * consume any space and udev isn't supposed to create regular file either. There's no limit on the * max number of inodes since such limit is hard to guess especially on large storage array @@ -54,8 +68,7 @@ static inline int bind_remount_recursive(const char *prefix, unsigned long new_f int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo); -int mount_move_root(const char *path); -int mount_pivot_root(const char *path); +int mount_switch_root(const char *path, MountAttrPropagationType type); DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(FILE*, endmntent, NULL); #define _cleanup_endmntent_ _cleanup_(endmntentp) diff --git a/src/shared/resolve-util.h b/src/shared/resolve-util.h index e58173d864..7c9008c705 100644 --- a/src/shared/resolve-util.h +++ b/src/shared/resolve-util.h @@ -11,6 +11,9 @@ /* 127.0.0.54 in native endian (The IP address we listen on we only implement "proxy" mode) */ #define INADDR_DNS_PROXY_STUB ((in_addr_t) 0x7f000036U) +/* 127.0.0.2 is an address we always map to the local hostname. This is different from 127.0.0.1 which maps to "localhost" */ +#define INADDR_LOCALADDRESS ((in_addr_t) 0x7f000002U) + typedef enum DnsCacheMode DnsCacheMode; enum DnsCacheMode { diff --git a/src/shared/selinux-util.c b/src/shared/selinux-util.c index f7d8353b49..e240cdc2c3 100644 --- a/src/shared/selinux-util.c +++ b/src/shared/selinux-util.c @@ -277,7 +277,7 @@ static int selinux_fix_fd( return 0; /* If the old label is identical to the new one, suppress any kind of error */ - if (getfilecon_raw(FORMAT_PROC_FD_PATH(fd), &oldcon) >= 0 && streq(fcon, oldcon)) + if (getfilecon_raw(FORMAT_PROC_FD_PATH(fd), &oldcon) >= 0 && streq_ptr(fcon, oldcon)) return 0; return log_enforcing_errno(r, "Unable to fix SELinux security context of %s: %m", label_path); @@ -381,9 +381,13 @@ int mac_selinux_get_create_label_from_exe(const char *exe, char **label) { if (getcon_raw(&mycon) < 0) return -errno; + if (!mycon) + return -EOPNOTSUPP; if (getfilecon_raw(exe, &fcon) < 0) return -errno; + if (!fcon) + return -EOPNOTSUPP; sclass = string_to_security_class("process"); if (sclass == 0) @@ -395,14 +399,21 @@ int mac_selinux_get_create_label_from_exe(const char *exe, char **label) { #endif } -int mac_selinux_get_our_label(char **label) { -#if HAVE_SELINUX - assert(label); +int mac_selinux_get_our_label(char **ret) { + assert(ret); +#if HAVE_SELINUX if (!mac_selinux_use()) return -EOPNOTSUPP; - return RET_NERRNO(getcon_raw(label)); + _cleanup_freecon_ char *con = NULL; + if (getcon_raw(&con) < 0) + return -errno; + if (!con) + return -EOPNOTSUPP; + + *ret = TAKE_PTR(con); + return 0; #else return -EOPNOTSUPP; #endif @@ -424,13 +435,20 @@ int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char * if (getcon_raw(&mycon) < 0) return -errno; + if (!mycon) + return -EOPNOTSUPP; if (getpeercon_raw(socket_fd, &peercon) < 0) return -errno; + if (!peercon) + return -EOPNOTSUPP; - if (!exec_label) /* If there is no context set for next exec let's use context of target executable */ + if (!exec_label) { /* If there is no context set for next exec let's use context of target executable */ if (getfilecon_raw(exe, &fcon) < 0) return -errno; + if (!fcon) + return -EOPNOTSUPP; + } bcon = context_new(mycon); if (!bcon) diff --git a/src/shared/tpm2-util.c b/src/shared/tpm2-util.c index 327caa439f..ba8dfb041d 100644 --- a/src/shared/tpm2-util.c +++ b/src/shared/tpm2-util.c @@ -1565,6 +1565,8 @@ finish: return r; } +#define RETRY_UNSEAL_MAX 30u + int tpm2_unseal(const char *device, uint32_t hash_pcr_mask, uint16_t pcr_bank, @@ -1676,44 +1678,53 @@ int tpm2_unseal(const char *device, if (r < 0) goto finish; - r = tpm2_make_policy_session( - c.esys_context, - primary, - hmac_session, - TPM2_SE_POLICY, - hash_pcr_mask, - pcr_bank, - pubkey, pubkey_size, - pubkey_pcr_mask, - signature, - !!pin, - &session, - &policy_digest, - /* ret_pcr_bank= */ NULL); - if (r < 0) - goto finish; + for (unsigned i = RETRY_UNSEAL_MAX;; i--) { + r = tpm2_make_policy_session( + c.esys_context, + primary, + hmac_session, + TPM2_SE_POLICY, + hash_pcr_mask, + pcr_bank, + pubkey, pubkey_size, + pubkey_pcr_mask, + signature, + !!pin, + &session, + &policy_digest, + /* ret_pcr_bank= */ NULL); + if (r < 0) + goto finish; - /* If we know the policy hash to expect, and it doesn't match, we can shortcut things here, and not - * wait until the TPM2 tells us to go away. */ - if (known_policy_hash_size > 0 && - memcmp_nn(policy_digest->buffer, policy_digest->size, known_policy_hash, known_policy_hash_size) != 0) - return log_error_errno(SYNTHETIC_ERRNO(EPERM), - "Current policy digest does not match stored policy digest, cancelling " - "TPM2 authentication attempt."); + /* If we know the policy hash to expect, and it doesn't match, we can shortcut things here, and not + * wait until the TPM2 tells us to go away. */ + if (known_policy_hash_size > 0 && + memcmp_nn(policy_digest->buffer, policy_digest->size, known_policy_hash, known_policy_hash_size) != 0) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Current policy digest does not match stored policy digest, cancelling " + "TPM2 authentication attempt."); - log_debug("Unsealing HMAC key."); + log_debug("Unsealing HMAC key."); - rc = sym_Esys_Unseal( - c.esys_context, - hmac_key, - session, - hmac_session, /* use HMAC session to enable parameter encryption */ - ESYS_TR_NONE, - &unsealed); - if (rc != TSS2_RC_SUCCESS) { - r = log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), - "Failed to unseal HMAC key in TPM: %s", sym_Tss2_RC_Decode(rc)); - goto finish; + rc = sym_Esys_Unseal( + c.esys_context, + hmac_key, + session, + hmac_session, /* use HMAC session to enable parameter encryption */ + ESYS_TR_NONE, + &unsealed); + if (rc == TPM2_RC_PCR_CHANGED && i > 0) { + log_debug("A PCR value changed during the TPM2 policy session, restarting HMAC key unsealing (%u tries left).", i); + session = tpm2_flush_context_verbose(c.esys_context, session); + continue; + } + if (rc != TSS2_RC_SUCCESS) { + r = log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "Failed to unseal HMAC key in TPM: %s", sym_Tss2_RC_Decode(rc)); + goto finish; + } + + break; } secret = memdup(unsealed->buffer, unsealed->size); diff --git a/src/shared/user-record-show.c b/src/shared/user-record-show.c index e89c0de120..da181c6078 100644 --- a/src/shared/user-record-show.c +++ b/src/shared/user-record-show.c @@ -314,6 +314,8 @@ void user_record_show(UserRecord *hr, bool show_full_group_info) { printf(" PBKDF Type: %s\n", hr->luks_pbkdf_type); if (hr->luks_pbkdf_hash_algorithm) printf(" PBKDF Hash: %s\n", hr->luks_pbkdf_hash_algorithm); + if (hr->luks_pbkdf_force_iterations != UINT64_MAX) + printf(" PBKDF Iters: %" PRIu64 "\n", hr->luks_pbkdf_force_iterations); if (hr->luks_pbkdf_time_cost_usec != UINT64_MAX) printf(" PBKDF Time: %s\n", FORMAT_TIMESPAN(hr->luks_pbkdf_time_cost_usec, 0)); if (hr->luks_pbkdf_memory_cost != UINT64_MAX) diff --git a/src/shared/user-record.c b/src/shared/user-record.c index 84cbdb1d30..06bc699572 100644 --- a/src/shared/user-record.c +++ b/src/shared/user-record.c @@ -55,6 +55,7 @@ UserRecord* user_record_new(void) { .luks_discard = -1, .luks_offline_discard = -1, .luks_volume_key_size = UINT64_MAX, + .luks_pbkdf_force_iterations = UINT64_MAX, .luks_pbkdf_time_cost_usec = UINT64_MAX, .luks_pbkdf_memory_cost = UINT64_MAX, .luks_pbkdf_parallel_threads = UINT64_MAX, @@ -1213,6 +1214,7 @@ static int dispatch_per_machine(const char *name, JsonVariant *variant, JsonDisp { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 }, { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE }, { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE }, + { "luksPbkdfForceIterations", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_force_iterations), 0 }, { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 }, { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 }, { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 }, @@ -1566,6 +1568,7 @@ int user_record_load(UserRecord *h, JsonVariant *v, UserRecordLoadFlags load_fla { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 }, { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE }, { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE }, + { "luksPbkdfForceIterations", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_force_iterations), 0 }, { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 }, { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 }, { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 }, @@ -1842,6 +1845,17 @@ const char* user_record_luks_pbkdf_type(UserRecord *h) { return h->luks_pbkdf_type ?: "argon2id"; } +uint64_t user_record_luks_pbkdf_force_iterations(UserRecord *h) { + assert(h); + + /* propagate default "benchmark" mode as itself */ + if (h->luks_pbkdf_force_iterations == UINT64_MAX) + return UINT64_MAX; + + /* clamp everything else to actually accepted number of iterations of libcryptsetup */ + return CLAMP(h->luks_pbkdf_force_iterations, 1U, UINT32_MAX); +} + uint64_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h) { assert(h); diff --git a/src/shared/user-record.h b/src/shared/user-record.h index 47f4035d45..73fb86cc94 100644 --- a/src/shared/user-record.h +++ b/src/shared/user-record.h @@ -309,6 +309,7 @@ typedef struct UserRecord { uint64_t luks_volume_key_size; char *luks_pbkdf_hash_algorithm; char *luks_pbkdf_type; + uint64_t luks_pbkdf_force_iterations; uint64_t luks_pbkdf_time_cost_usec; uint64_t luks_pbkdf_memory_cost; uint64_t luks_pbkdf_parallel_threads; @@ -394,6 +395,7 @@ const char *user_record_luks_cipher(UserRecord *h); const char *user_record_luks_cipher_mode(UserRecord *h); uint64_t user_record_luks_volume_key_size(UserRecord *h); const char* user_record_luks_pbkdf_type(UserRecord *h); +uint64_t user_record_luks_pbkdf_force_iterations(UserRecord *h); usec_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h); uint64_t user_record_luks_pbkdf_memory_cost(UserRecord *h); uint64_t user_record_luks_pbkdf_parallel_threads(UserRecord *h); diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c index 9a7ef8eb29..b9147fbd78 100644 --- a/src/sysext/sysext.c +++ b/src/sysext/sysext.c @@ -522,7 +522,9 @@ static int merge_subprocess(Hashmap *images, const char *workspace) { DISSECT_IMAGE_GENERIC_ROOT | DISSECT_IMAGE_REQUIRE_ROOT | DISSECT_IMAGE_MOUNT_ROOT_ONLY | - DISSECT_IMAGE_USR_NO_ROOT; + DISSECT_IMAGE_USR_NO_ROOT | + DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES; r = verity_settings_load(&verity_settings, img->path, NULL, NULL); if (r < 0) diff --git a/src/systemctl/fuzz-systemctl-parse-argv.c b/src/systemctl/fuzz-systemctl-parse-argv.c index 92f6ecaa8d..606f602c3a 100644 --- a/src/systemctl/fuzz-systemctl-parse-argv.c +++ b/src/systemctl/fuzz-systemctl-parse-argv.c @@ -19,6 +19,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { _cleanup_close_ int orig_stdout_fd = -1; int r; + if (size > 16*1024) + return 0; /* See the comment below about the limit for strv_length(). */ + /* We don't want to fill the logs with messages about parse errors. * Disable most logging if not running standalone */ if (!getenv("SYSTEMD_LOG_LEVEL")) diff --git a/src/systemctl/systemctl-enable.c b/src/systemctl/systemctl-enable.c index 5be4c0c725..86d9f602fa 100644 --- a/src/systemctl/systemctl-enable.c +++ b/src/systemctl/systemctl-enable.c @@ -67,7 +67,7 @@ int verb_enable(int argc, char *argv[], void *userdata) { InstallChange *changes = NULL; size_t n_changes = 0; int carries_install_info = -1; - bool ignore_carries_install_info = arg_quiet; + bool ignore_carries_install_info = arg_quiet || arg_no_warn; int r; if (!argv[1]) @@ -109,9 +109,10 @@ int verb_enable(int argc, char *argv[], void *userdata) { if (streq(verb, "enable")) { r = unit_file_enable(arg_scope, flags, arg_root, names, &changes, &n_changes); carries_install_info = r; - } else if (streq(verb, "disable")) + } else if (streq(verb, "disable")) { r = unit_file_disable(arg_scope, flags, arg_root, names, &changes, &n_changes); - else if (streq(verb, "reenable")) { + carries_install_info = r; + } else if (streq(verb, "reenable")) { r = unit_file_reenable(arg_scope, flags, arg_root, names, &changes, &n_changes); carries_install_info = r; } else if (streq(verb, "link")) @@ -165,7 +166,8 @@ int verb_enable(int argc, char *argv[], void *userdata) { method = "EnableUnitFiles"; expect_carries_install_info = true; } else if (streq(verb, "disable")) { - method = "DisableUnitFiles"; + method = "DisableUnitFilesWithFlagsAndInstallInfo"; + expect_carries_install_info = true; send_force = false; } else if (streq(verb, "reenable")) { method = "ReenableUnitFiles"; @@ -208,7 +210,10 @@ int verb_enable(int argc, char *argv[], void *userdata) { } if (send_runtime) { - r = sd_bus_message_append(m, "b", arg_runtime); + if (streq(method, "DisableUnitFilesWithFlagsAndInstallInfo")) + r = sd_bus_message_append(m, "t", arg_runtime ? UNIT_FILE_RUNTIME : 0); + else + r = sd_bus_message_append(m, "b", arg_runtime); if (r < 0) return bus_log_create_error(r); } @@ -245,7 +250,7 @@ int verb_enable(int argc, char *argv[], void *userdata) { if (carries_install_info == 0 && !ignore_carries_install_info) log_notice("The unit files have no installation config (WantedBy=, RequiredBy=, Also=,\n" "Alias= settings in the [Install] section, and DefaultInstance= for template\n" - "units). This means they are not meant to be enabled using systemctl.\n" + "units). This means they are not meant to be enabled or disabled using systemctl.\n" " \n" /* trick: the space is needed so that the line does not get stripped from output */ "Possible reasons for having this kind of units are:\n" "%1$s A unit may be statically enabled by being symlinked from another unit's\n" diff --git a/src/systemctl/systemctl-start-special.c b/src/systemctl/systemctl-start-special.c index 9363764cd7..edc907c832 100644 --- a/src/systemctl/systemctl-start-special.c +++ b/src/systemctl/systemctl-start-special.c @@ -153,19 +153,8 @@ int verb_start_special(int argc, char *argv[], void *userdata) { return r; if (a == ACTION_REBOOT) { - const char *arg = NULL; - - if (argc > 1) { - if (arg_reboot_argument) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Both --reboot-argument= and positional argument passed to reboot command, refusing."); - - log_notice("Positional argument to reboot command is deprecated, please use --reboot-argument= instead. Accepting anyway."); - arg = argv[1]; - } else - arg = arg_reboot_argument; - - if (arg) { - r = update_reboot_parameter_and_warn(arg, false); + if (arg_reboot_argument) { + r = update_reboot_parameter_and_warn(arg_reboot_argument, false); if (r < 0) return r; } diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 3f28bcc3dc..d13c7867e2 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -22,6 +22,7 @@ #include "rlimit-util.h" #include "sigbus.h" #include "signal-util.h" +#include "stat-util.h" #include "string-table.h" #include "systemctl-add-dependency.h" #include "systemctl-cancel-job.h" @@ -84,6 +85,7 @@ bool arg_show_types = false; int arg_check_inhibitors = -1; bool arg_dry_run = false; bool arg_quiet = false; +bool arg_no_warn = false; bool arg_full = false; bool arg_recursive = false; bool arg_with_dependencies = false; @@ -277,6 +279,8 @@ static int systemctl_help(void) { " kexec, suspend, hibernate, suspend-then-hibernate,\n" " hybrid-sleep, default, rescue, emergency, and exit.\n" " -q --quiet Suppress output\n" + " --no-warn Don't generate warning when trying to enable/disable\n" + " units without install information\n" " --wait For (re)start, wait until service stopped again\n" " For is-system-running, wait until startup is completed\n" " --no-block Do not wait until operation finished\n" @@ -433,6 +437,7 @@ static int systemctl_parse_argv(int argc, char *argv[]) { ARG_READ_ONLY, ARG_MKDIR, ARG_MARKED, + ARG_NO_WARN, }; static const struct option options[] = { @@ -465,6 +470,7 @@ static int systemctl_parse_argv(int argc, char *argv[]) { { "no-wall", no_argument, NULL, ARG_NO_WALL }, { "dry-run", no_argument, NULL, ARG_DRY_RUN }, { "quiet", no_argument, NULL, 'q' }, + { "no-warn", no_argument, NULL, ARG_NO_WARN }, { "root", required_argument, NULL, ARG_ROOT }, { "image", required_argument, NULL, ARG_IMAGE }, { "force", no_argument, NULL, 'f' }, @@ -926,6 +932,10 @@ static int systemctl_parse_argv(int argc, char *argv[]) { arg_marked = true; break; + case ARG_NO_WARN: + arg_no_warn = true; + break; + case '.': /* Output an error mimicking getopt, and print a hint afterwards */ log_error("%s: invalid option -- '.'", program_invocation_name); @@ -1087,7 +1097,7 @@ static int systemctl_main(int argc, char *argv[]) { { "import-environment", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, verb_import_environment }, { "halt", VERB_ANY, 1, VERB_ONLINE_ONLY, verb_start_system_special }, { "poweroff", VERB_ANY, 1, VERB_ONLINE_ONLY, verb_start_system_special }, - { "reboot", VERB_ANY, 2, VERB_ONLINE_ONLY, verb_start_system_special }, + { "reboot", VERB_ANY, 1, VERB_ONLINE_ONLY, verb_start_system_special }, { "kexec", VERB_ANY, 1, VERB_ONLINE_ONLY, verb_start_system_special }, { "suspend", VERB_ANY, 1, VERB_ONLINE_ONLY, verb_start_system_special }, { "hibernate", VERB_ANY, 1, VERB_ONLINE_ONLY, verb_start_system_special }, @@ -1148,6 +1158,13 @@ static int run(int argc, char *argv[]) { if (r <= 0) goto finish; + if (proc_mounted() == 0) + log_warning("%s%s/proc/ is not mounted. This is not a supported mode of operation. Please fix\n" + "your invocation environment to mount /proc/ and /sys/ properly. Proceeding anyway.\n" + "Your mileage may vary.", + emoji_enabled() ? special_glyph(SPECIAL_GLYPH_WARNING_SIGN) : "", + emoji_enabled() ? " " : ""); + if (arg_action != ACTION_SYSTEMCTL && running_in_chroot() > 0) { if (!arg_quiet) log_info("Running in chroot, ignoring request."); diff --git a/src/systemctl/systemctl.h b/src/systemctl/systemctl.h index 2454c4c714..1a7a6e28d3 100644 --- a/src/systemctl/systemctl.h +++ b/src/systemctl/systemctl.h @@ -65,6 +65,7 @@ extern bool arg_show_types; extern int arg_check_inhibitors; extern bool arg_dry_run; extern bool arg_quiet; +extern bool arg_no_warn; extern bool arg_full; extern bool arg_recursive; extern bool arg_with_dependencies; diff --git a/src/sysupdate/sysupdate-partition.c b/src/sysupdate/sysupdate-partition.c index 33d0e584ba..bd0486d99e 100644 --- a/src/sysupdate/sysupdate-partition.c +++ b/src/sysupdate/sysupdate-partition.c @@ -106,9 +106,8 @@ int read_partition_info( PartitionInfo *ret) { _cleanup_free_ char *label_copy = NULL, *device = NULL; - const char *pts, *ids, *label; + const char *label; struct fdisk_partition *p; - struct fdisk_parttype *pt; uint64_t start, size, flags; sd_id128_t ptid, id; GptPartitionType type; @@ -147,25 +146,13 @@ int read_partition_info( if (!label) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a label."); - pt = fdisk_partition_get_type(p); - if (!pt) - return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition: %m"); - - pts = fdisk_parttype_get_string(pt); - if (!pts) - return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition as string: %m"); - - r = sd_id128_from_string(pts, &ptid); + r = fdisk_partition_get_type_as_id128(p, &ptid); if (r < 0) - return log_error_errno(r, "Failed to parse partition type UUID %s: %m", pts); - - ids = fdisk_partition_get_uuid(p); - if (!ids) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a UUID."); + return log_error_errno(r, "Failed to read partition type UUID: %m"); - r = sd_id128_from_string(ids, &id); + r = fdisk_partition_get_uuid_as_id128(p, &id); if (r < 0) - return log_error_errno(r, "Failed to parse partition UUID %s: %m", ids); + return log_error_errno(r, "Failed to read partition UUID: %m"); r = fdisk_partition_get_attrs_as_uint64(p, &flags); if (r < 0) diff --git a/src/test/test-escape.c b/src/test/test-escape.c index da19272624..de82020d5f 100644 --- a/src/test/test-escape.c +++ b/src/test/test-escape.c @@ -219,4 +219,20 @@ TEST(quote_command_line) { "true \"\\$dollar\""); } +static void test_octescape_one(const char *s, const char *expected) { + _cleanup_free_ char *ret; + + assert_se(ret = octescape(s, strlen_ptr(s))); + log_debug("octescape(\"%s\") → \"%s\" (expected: \"%s\")", strnull(s), ret, expected); + assert_se(streq(ret, expected)); +} + +TEST(octescape) { + test_octescape_one(NULL, ""); + test_octescape_one("", ""); + test_octescape_one("foo", "foo"); + test_octescape_one("\"\\\"", "\\042\\134\\042"); + test_octescape_one("\123\213\222", "\123\\213\\222"); +} + DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/test/test-fd-util.c b/src/test/test-fd-util.c index 2b85ceab82..df6ca13785 100644 --- a/src/test/test-fd-util.c +++ b/src/test/test-fd-util.c @@ -483,6 +483,53 @@ TEST(fd_reopen) { fd1 = -1; } +TEST(fd_reopen_condition) { + _cleanup_close_ int fd1 = -1, fd3 = -1; + int fd2, fl; + + /* Open without O_PATH */ + fd1 = open("/usr/", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + assert_se(fd1 >= 0); + + fl = fcntl(fd1, F_GETFL); + assert_se(FLAGS_SET(fl, O_DIRECTORY)); + assert_se(!FLAGS_SET(fl, O_PATH)); + + fd2 = fd_reopen_condition(fd1, O_DIRECTORY, O_DIRECTORY|O_PATH, &fd3); + assert_se(fd2 == fd1); + assert_se(fd3 < 0); + + /* Switch on O_PATH */ + fd2 = fd_reopen_condition(fd1, O_DIRECTORY|O_PATH, O_DIRECTORY|O_PATH, &fd3); + assert_se(fd2 != fd1); + assert_se(fd3 == fd2); + + fl = fcntl(fd2, F_GETFL); + assert_se(FLAGS_SET(fl, O_DIRECTORY)); + assert_se(FLAGS_SET(fl, O_PATH)); + + close_and_replace(fd1, fd3); + + fd2 = fd_reopen_condition(fd1, O_DIRECTORY|O_PATH, O_DIRECTORY|O_PATH, &fd3); + assert_se(fd2 == fd1); + assert_se(fd3 < 0); + + /* Switch off O_PATH again */ + fd2 = fd_reopen_condition(fd1, O_DIRECTORY, O_DIRECTORY|O_PATH, &fd3); + assert_se(fd2 != fd1); + assert_se(fd3 == fd2); + + fl = fcntl(fd2, F_GETFL); + assert_se(FLAGS_SET(fl, O_DIRECTORY)); + assert_se(!FLAGS_SET(fl, O_PATH)); + + close_and_replace(fd1, fd3); + + fd2 = fd_reopen_condition(fd1, O_DIRECTORY, O_DIRECTORY|O_PATH, &fd3); + assert_se(fd2 == fd1); + assert_se(fd3 < 0); +} + TEST(take_fd) { _cleanup_close_ int fd1 = -1, fd2 = -1; int array[2] = { -1, -1 }, i = 0; diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index 4bf0a5daf8..8c4b632ed0 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -27,7 +27,7 @@ static const char *arg_test_dir = NULL; TEST(chase_symlinks) { - _cleanup_free_ char *result = NULL; + _cleanup_free_ char *result = NULL, *pwd = NULL; _cleanup_close_ int pfd = -1; char *temp; const char *top, *p, *pslash, *q, *qslash; @@ -245,6 +245,30 @@ TEST(chase_symlinks) { assert_se(path_equal(result, p)); result = mfree(result); + /* Relative paths */ + + assert_se(safe_getcwd(&pwd) >= 0); + + assert_se(chdir(temp) >= 0); + + p = "this/is/a/relative/path"; + r = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &result, NULL); + assert_se(r == 0); + + p = strjoina(temp, "/", p); + assert_se(path_equal(result, p)); + result = mfree(result); + + p = "this/is/a/relative/path"; + r = chase_symlinks(p, temp, CHASE_NONEXISTENT, &result, NULL); + assert_se(r == 0); + + p = strjoina(temp, "/", p); + assert_se(path_equal(result, p)); + result = mfree(result); + + assert_se(chdir(pwd) >= 0); + /* Path which doesn't exist, but contains weird stuff */ p = strjoina(temp, "/idontexist/.."); diff --git a/src/test/test-gpt.c b/src/test/test-gpt.c index 377b79f155..b8a54055c2 100644 --- a/src/test/test-gpt.c +++ b/src/test/test-gpt.c @@ -46,4 +46,22 @@ TEST(gpt_types_against_architectures) { } } +TEST(verity_mappings) { + for (PartitionDesignator p = 0; p < _PARTITION_DESIGNATOR_MAX; p++) { + PartitionDesignator q; + + q = partition_verity_of(p); + assert_se(q < 0 || partition_verity_to_data(q) == p); + + q = partition_verity_sig_of(p); + assert_se(q < 0 || partition_verity_sig_to_data(q) == p); + + q = partition_verity_to_data(p); + assert_se(q < 0 || partition_verity_of(q) == p); + + q = partition_verity_sig_to_data(p); + assert_se(q < 0 || partition_verity_sig_of(q) == p); + } +} + DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/test/test-locale-util.c b/src/test/test-locale-util.c index aa501b650a..dd96af6894 100644 --- a/src/test/test-locale-util.c +++ b/src/test/test-locale-util.c @@ -82,7 +82,7 @@ TEST(keymaps) { #define dump_glyph(x) log_info(STRINGIFY(x) ": %s", special_glyph(x)) TEST(dump_special_glyphs) { - assert_cc(SPECIAL_GLYPH_SPARKLES + 1 == _SPECIAL_GLYPH_MAX); + assert_cc(SPECIAL_GLYPH_WARNING_SIGN + 1 == _SPECIAL_GLYPH_MAX); log_info("is_locale_utf8: %s", yes_no(is_locale_utf8())); @@ -119,6 +119,7 @@ TEST(dump_special_glyphs) { dump_glyph(SPECIAL_GLYPH_RECYCLING); dump_glyph(SPECIAL_GLYPH_DOWNLOAD); dump_glyph(SPECIAL_GLYPH_SPARKLES); + dump_glyph(SPECIAL_GLYPH_WARNING_SIGN); } DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/test/test-loop-block.c b/src/test/test-loop-block.c index e2b97dd56f..b06ab0d172 100644 --- a/src/test/test-loop-block.c +++ b/src/test/test-loop-block.c @@ -4,6 +4,8 @@ #include <linux/loop.h> #include <pthread.h> #include <sys/file.h> +#include <sys/ioctl.h> +#include <sys/mount.h> #include "alloc-util.h" #include "capability-util.h" @@ -44,6 +46,15 @@ static void verify_dissected_image(DissectedImage *dissected) { assert_se(dissected->partitions[PARTITION_HOME].node); } +static void verify_dissected_image_harder(DissectedImage *dissected) { + verify_dissected_image(dissected); + + assert_se(streq(dissected->partitions[PARTITION_ESP].fstype, "vfat")); + assert_se(streq(dissected->partitions[PARTITION_XBOOTLDR].fstype, "vfat")); + assert_se(streq(dissected->partitions[PARTITION_ROOT].fstype, "ext4")); + assert_se(streq(dissected->partitions[PARTITION_HOME].fstype, "ext4")); +} + static void* thread_func(void *ptr) { int fd = PTR_TO_FD(ptr); int r; @@ -71,7 +82,7 @@ static void* thread_func(void *ptr) { log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted); - r = dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_READ_ONLY, &dissected); + r = dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_ADD_PARTITION_DEVICES|DISSECT_IMAGE_PIN_PARTITION_DEVICES, &dissected); if (r < 0) log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node); assert_se(r >= 0); @@ -220,7 +231,7 @@ static int run(int argc, char *argv[]) { assert_se(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, LO_FLAGS_PARTSCAN, LOCK_EX, &loop) >= 0); #if HAVE_BLKID - assert_se(dissect_loop_device(loop, NULL, NULL, 0, &dissected) >= 0); + assert_se(dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_ADD_PARTITION_DEVICES|DISSECT_IMAGE_PIN_PARTITION_DEVICES, &dissected) >= 0); verify_dissected_image(dissected); FOREACH_STRING(fs, "vfat", "ext4") { @@ -246,8 +257,23 @@ static int run(int argc, char *argv[]) { assert_se(make_filesystem(dissected->partitions[PARTITION_HOME].node, "ext4", "home", NULL, id, true) >= 0); dissected = dissected_image_unref(dissected); + + /* We created the file systems now via the per-partition block devices. But the dissection code might + * probe them via the whole block device. These block devices have separate buffer caches though, + * hence what was written via the partition device might not appear on the whole block device + * yet. Let's hence explicitly flush the whole block device, so that the read-back definitely + * works. */ + assert_se(ioctl(loop->fd, BLKFLSBUF, 0) >= 0); + + /* Try to read once, without pinning or adding partitions, i.e. by only accessing the whole block + * device. */ assert_se(dissect_loop_device(loop, NULL, NULL, 0, &dissected) >= 0); - verify_dissected_image(dissected); + verify_dissected_image_harder(dissected); + dissected = dissected_image_unref(dissected); + + /* Now go via the loopback device after all, but this time add/pin, because now we want to mount it. */ + assert_se(dissect_loop_device(loop, NULL, NULL, DISSECT_IMAGE_ADD_PARTITION_DEVICES|DISSECT_IMAGE_PIN_PARTITION_DEVICES, &dissected) >= 0); + verify_dissected_image_harder(dissected); assert_se(mkdtemp_malloc(NULL, &mounted) >= 0); diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c index bf5192c56f..f156d90073 100644 --- a/src/tmpfiles/tmpfiles.c +++ b/src/tmpfiles/tmpfiles.c @@ -1979,7 +1979,7 @@ static int create_fifo(Item *i) { creation = r >= 0 ? CREATION_NORMAL : CREATION_EXISTING; - /* Open the inode via O_PATH, regardless if we managed to create it or not. Maybe it is is already the FIFO we want */ + /* Open the inode via O_PATH, regardless if we managed to create it or not. Maybe it is already the FIFO we want */ fd = openat(pfd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH); if (fd < 0) { if (r < 0) diff --git a/src/udev/udev-builtin-blkid.c b/src/udev/udev-builtin-blkid.c index 9f5646ffdd..9b5dfbe33b 100644 --- a/src/udev/udev-builtin-blkid.c +++ b/src/udev/udev-builtin-blkid.c @@ -119,8 +119,9 @@ static int find_gpt_root(sd_device *dev, blkid_probe pr, bool test) { #if defined(SD_GPT_ROOT_NATIVE) && ENABLE_EFI - _cleanup_free_ char *root_id = NULL, *root_label = NULL; + _cleanup_free_ char *root_label = NULL; bool found_esp_or_xbootldr = false; + sd_id128_t root_id = SD_ID128_NULL; int r; assert(pr); @@ -137,34 +138,32 @@ static int find_gpt_root(sd_device *dev, blkid_probe pr, bool test) { int nvals = blkid_partlist_numof_partitions(pl); for (int i = 0; i < nvals; i++) { blkid_partition pp; - const char *stype, *sid, *label; - sd_id128_t type; + const char *label; + sd_id128_t type, id; pp = blkid_partlist_get_partition(pl, i); if (!pp) continue; - sid = blkid_partition_get_uuid(pp); - if (!sid) + r = blkid_partition_get_uuid_id128(pp, &id); + if (r < 0) { + log_debug_errno(r, "Failed to get partition UUID, ignoring: %m"); continue; + } - label = blkid_partition_get_name(pp); /* returns NULL if empty */ - - stype = blkid_partition_get_type_string(pp); - if (!stype) + r = blkid_partition_get_type_id128(pp, &type); + if (r < 0) { + log_debug_errno(r, "Failed to get partition type UUID, ignoring: %m"); continue; + } - if (sd_id128_from_string(stype, &type) < 0) - continue; + label = blkid_partition_get_name(pp); /* returns NULL if empty */ if (sd_id128_in_set(type, SD_GPT_ESP, SD_GPT_XBOOTLDR)) { - sd_id128_t id, esp_or_xbootldr; + sd_id128_t esp_or_xbootldr; /* We found an ESP or XBOOTLDR, let's see if it matches the ESP/XBOOTLDR we booted from. */ - if (sd_id128_from_string(sid, &id) < 0) - continue; - r = efi_loader_get_device_part_uuid(&esp_or_xbootldr); if (r < 0) return r; @@ -182,10 +181,8 @@ static int find_gpt_root(sd_device *dev, blkid_probe pr, bool test) { /* We found a suitable root partition, let's remember the first one, or the one with * the newest version, as determined by comparing the partition labels. */ - if (!root_id || strverscmp_improved(label, root_label) > 0) { - r = free_and_strdup(&root_id, sid); - if (r < 0) - return r; + if (sd_id128_is_null(root_id) || strverscmp_improved(label, root_label) > 0) { + root_id = id; r = free_and_strdup(&root_label, label); if (r < 0) @@ -196,8 +193,8 @@ static int find_gpt_root(sd_device *dev, blkid_probe pr, bool test) { /* We found the ESP/XBOOTLDR on this disk, and also found a root partition, nice! Let's export its * UUID */ - if (found_esp_or_xbootldr && root_id) - udev_builtin_add_property(dev, test, "ID_PART_GPT_AUTO_ROOT_UUID", root_id); + if (found_esp_or_xbootldr && !sd_id128_is_null(root_id)) + udev_builtin_add_property(dev, test, "ID_PART_GPT_AUTO_ROOT_UUID", SD_ID128_TO_UUID_STRING(root_id)); #endif return 0; |