From 0d02d0bc51f2fd431f81daa39a970d5dea279f29 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Wed, 22 Apr 2026 17:19:21 +0100 Subject: [PATCH 001/242] sysupdate: Prevent a possible invalid partial+pending state on an instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the resource code is recursing, it’s possible for one iteration to set a partial flag, and then a recursive iteration to set a pending flag (or vice-versa). It doesn’t make sense to have both set at the same time for a specific instance, so make sure to clear the other flag when setting one of them. Add some assertions to make this invariant clearer and easier to debug if it fails. Signed-off-by: Philip Withnall --- src/sysupdate/sysupdate-resource.c | 8 ++++++++ src/sysupdate/sysupdate.c | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/sysupdate/sysupdate-resource.c b/src/sysupdate/sysupdate-resource.c index acd9604f2945d..bd1560371d42e 100644 --- a/src/sysupdate/sysupdate-resource.c +++ b/src/sysupdate/sysupdate-resource.c @@ -139,9 +139,11 @@ static int resource_load_from_directory_recursive( if ((stripped = startswith(de->d_name, ".sysupdate.partial."))) { de_d_name_stripped = stripped; is_partial = true; + is_pending = false; } else if ((stripped = startswith(de->d_name, ".sysupdate.pending."))) { de_d_name_stripped = stripped; is_pending = true; + is_partial = false; } else de_d_name_stripped = de->d_name; @@ -192,6 +194,9 @@ static int resource_load_from_directory_recursive( if (instance->metadata.mode == MODE_INVALID) instance->metadata.mode = st.st_mode & 0775; /* mask out world-writability and suid and stuff, for safety */ + /* Can’t be both partial and pending. */ + assert(!(is_partial && is_pending)); + instance->is_partial = is_partial; instance->is_pending = is_pending; } @@ -313,6 +318,9 @@ static int resource_load_from_blockdev(Resource *rr) { if (instance->metadata.read_only < 0) instance->metadata.read_only = instance->partition_info.read_only; + /* Can’t be both partial and pending. */ + assert(!(is_partial && is_pending)); + instance->is_partial = is_partial; instance->is_pending = is_pending; } diff --git a/src/sysupdate/sysupdate.c b/src/sysupdate/sysupdate.c index 76b6507f9a438..2dd1bfdaac38e 100644 --- a/src/sysupdate/sysupdate.c +++ b/src/sysupdate/sysupdate.c @@ -410,7 +410,10 @@ static int context_discover_update_sets_by_flag(Context *c, UpdateSetFlags flags extra_flags |= UPDATE_PROTECTED; /* Partial or pending updates by definition are not incomplete, they’re - * partial/pending instead */ + * partial/pending instead. While an individual Instance cannot be both partial and + * pending, an UpdateSet as a whole can contain both partial and pending instances. */ + assert(!match || !(match->is_partial && match->is_pending)); + if (match && match->is_partial) extra_flags = (extra_flags | UPDATE_PARTIAL) & ~UPDATE_INCOMPLETE; From b2d19b4651fb87b8f6dc8a427a96d7a9d3a16961 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Wed, 22 Apr 2026 17:23:49 +0100 Subject: [PATCH 002/242] sysupdate: Allow partial+pending flags in a few more places for UpdateSets While a resource Instance can either be partial or pending, but not both; an UpdateSet (which potentially comprises several Instances) can be both partial *and* pending if it contains Instances in both those states. Amend a few bits of internal code to allow that in situations which were previously overlooked. Signed-off-by: Philip Withnall --- src/sysupdate/sysupdate-update-set-flags.c | 5 +++++ src/sysupdate/sysupdate.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/sysupdate/sysupdate-update-set-flags.c b/src/sysupdate/sysupdate-update-set-flags.c index 36801938f65f1..7b684576b0614 100644 --- a/src/sysupdate/sysupdate-update-set-flags.c +++ b/src/sysupdate/sysupdate-update-set-flags.c @@ -70,6 +70,11 @@ const char* update_set_flags_to_string(UpdateSetFlags flags) { case UPDATE_INSTALLED|UPDATE_PARTIAL|UPDATE_NEWEST|UPDATE_PROTECTED: case UPDATE_INSTALLED|UPDATE_AVAILABLE|UPDATE_PARTIAL|UPDATE_NEWEST: case UPDATE_INSTALLED|UPDATE_AVAILABLE|UPDATE_PARTIAL|UPDATE_NEWEST|UPDATE_PROTECTED: + /* can also contain pending instances: */ + case UPDATE_INSTALLED|UPDATE_PARTIAL|UPDATE_PENDING|UPDATE_NEWEST: + case UPDATE_INSTALLED|UPDATE_PARTIAL|UPDATE_PENDING|UPDATE_NEWEST|UPDATE_PROTECTED: + case UPDATE_INSTALLED|UPDATE_AVAILABLE|UPDATE_PARTIAL|UPDATE_PENDING|UPDATE_NEWEST: + case UPDATE_INSTALLED|UPDATE_AVAILABLE|UPDATE_PARTIAL|UPDATE_PENDING|UPDATE_NEWEST|UPDATE_PROTECTED: return "current+partial"; case UPDATE_AVAILABLE|UPDATE_NEWEST: diff --git a/src/sysupdate/sysupdate.c b/src/sysupdate/sysupdate.c index 2dd1bfdaac38e..89efe36c857f0 100644 --- a/src/sysupdate/sysupdate.c +++ b/src/sysupdate/sysupdate.c @@ -669,7 +669,7 @@ static int context_show_version(Context *c, const char *version) { Instance *i = *inst; if (!i) { - assert(FLAGS_SET(us->flags, UPDATE_INCOMPLETE)); + assert(us->flags & (UPDATE_INCOMPLETE|UPDATE_PARTIAL|UPDATE_PENDING)); continue; } From 2babac90137ea4b6a70958133c783131d2619051 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Wed, 22 Apr 2026 17:25:44 +0100 Subject: [PATCH 003/242] sysupdate: Allow a partial version to be the candidate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously we allowed a pending version to be the candidate — but if there are no better choices, then we might as well allow a partial version to be candidate as well. The alternative is having no update candidate when a new version is partially installed (i.e. downloaded but not moved into place). This would mean that an update which is interrupted then needs to be re-run with an explicit version number to progress, rather than being able to be re-run without a version number (as it was in the first place). Signed-off-by: Philip Withnall Helps: https://github.com/systemd/systemd/issues/41502 --- src/sysupdate/sysupdate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/sysupdate/sysupdate.c b/src/sysupdate/sysupdate.c index 89efe36c857f0..cba7960f0be67 100644 --- a/src/sysupdate/sysupdate.c +++ b/src/sysupdate/sysupdate.c @@ -505,8 +505,9 @@ static int context_discover_update_sets_by_flag(Context *c, UpdateSetFlags flags c->candidate && strverscmp_improved(c->newest_installed->version, c->candidate->version) >= 0) c->candidate = NULL; - /* Newest installed is still pending and no candidate is set? Then it becomes the candidate. */ - if (c->newest_installed && FLAGS_SET(c->newest_installed->flags, UPDATE_PENDING) && + /* Newest installed is still pending or partial and no candidate is set? Then it becomes the candidate. */ + if (c->newest_installed && + (c->newest_installed->flags & (UPDATE_PENDING|UPDATE_PARTIAL)) && !c->candidate) c->candidate = c->newest_installed; From e1146fe710d0d1bbe0fdca974f66edd7f6c573cc Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Wed, 22 Apr 2026 17:28:31 +0100 Subject: [PATCH 004/242] sysupdate: Allow a partial version to be vacuumed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously we prevented partial and pending versions from being vacuumed. But until we support resuming downloads, there’s nothing else which can be done with a partial version except to vacuum it and try again. Accordingly, allow partial versions (but not pending versions) to be vacuumed. This behaviour can be changed again once resuming downloads is supported — at that point I expect we’ll want to try resuming the partial download rather than throwing it all away and trying again. Signed-off-by: Philip Withnall Helps: https://github.com/systemd/systemd/issues/41502 --- src/sysupdate/sysupdate-transfer.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/sysupdate/sysupdate-transfer.c b/src/sysupdate/sysupdate-transfer.c index 3ed7a2ae3a488..8db1c81962f70 100644 --- a/src/sysupdate/sysupdate-transfer.c +++ b/src/sysupdate/sysupdate-transfer.c @@ -816,10 +816,13 @@ int transfer_vacuum( continue; } - /* If this is listed among the protected versions, then let's not remove it */ - if (strv_contains(t->protected_versions, instance->metadata.version) || - (extra_protected_version && streq(extra_protected_version, instance->metadata.version))) { - log_debug("Version '%s' is pending/partial but protected, not removing.", instance->metadata.version); + /* If this is pending and listed among the protected versions, then let's not remove it. + * In future, we will also want to keep partial protected versions, but that’s only useful + * once we support resuming downloads. */ + if (instance->is_pending && + (strv_contains(t->protected_versions, instance->metadata.version) || + (extra_protected_version && streq(extra_protected_version, instance->metadata.version)))) { + log_debug("Version '%s' is pending but protected, not removing.", instance->metadata.version); i++; continue; } From 66b950cd3f47f49087ddd4a2f4812e82b209f2b7 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Wed, 22 Apr 2026 17:31:27 +0100 Subject: [PATCH 005/242] updatectl: Show a helpful error if an update is partially downloaded MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an update is partially downloaded and the user tries to update again, `updatectl` can’t currently do anything (it doesn’t yet support resuming downloads). At the moment, though, it’ll return success as if the system was up to date, even though it isn’t up to date. Instead, print a more helpful error message telling the user to try vacuuming the partial version and trying again. I decided not to make it automatically vacuum the partial version, as that seems like a way to get into a nasty retry loop if, for example, the checksum provided by the server doesn’t match that of the downloaded file (which is one way to trigger this code path). Add an integration test which simulates this failure by corrupting the `SHA256SUMS` file, trying to download an update, and then working through the recovery steps. Signed-off-by: Philip Withnall Fixes: https://github.com/systemd/systemd/issues/41502 --- src/sysupdate/sysupdate.c | 4 +-- src/sysupdate/updatectl.c | 4 +++ test/units/TEST-72-SYSUPDATE.sh | 50 +++++++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/sysupdate/sysupdate.c b/src/sysupdate/sysupdate.c index cba7960f0be67..4d083db220b1e 100644 --- a/src/sysupdate/sysupdate.c +++ b/src/sysupdate/sysupdate.c @@ -1029,9 +1029,7 @@ static int context_acquire( if (FLAGS_SET(us->flags, UPDATE_INCOMPLETE)) log_info("Selected update '%s' is already installed, but incomplete. Repairing.", us->version); else if (FLAGS_SET(us->flags, UPDATE_PARTIAL)) { - log_info("Selected update '%s' is already acquired and partially installed. Vacuum it to try installing again.", us->version); - - return 0; + return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN), "Selected update '%s' is already acquired and partially installed. Vacuum it to try installing again.", us->version); } else if (FLAGS_SET(us->flags, UPDATE_PENDING)) { log_info("Selected update '%s' is already acquired and pending installation.", us->version); diff --git a/src/sysupdate/updatectl.c b/src/sysupdate/updatectl.c index c6e5c33fdfe48..16e9d21ae91b4 100644 --- a/src/sysupdate/updatectl.c +++ b/src/sysupdate/updatectl.c @@ -867,6 +867,10 @@ static int update_render_progress(sd_event_source *source, void *userdata) { clear_progress_bar_unbuffered(target); fprintf(stderr, "%s: %s Already up-to-date\n", target, GREEN_CHECK_MARK()); n--; /* Don't consider this target in the total */ + } else if (progress == -EUCLEAN) { + clear_progress_bar_unbuffered(target); + fprintf(stderr, "%s: %s Update is already acquired and partially installed. Vacuum it to try installing again.\n", target, RED_CROSS_MARK()); + total += 100; } else if (progress < 0) { clear_progress_bar_unbuffered(target); fprintf(stderr, "%s: %s %s\n", target, RED_CROSS_MARK(), STRERROR(progress)); diff --git a/test/units/TEST-72-SYSUPDATE.sh b/test/units/TEST-72-SYSUPDATE.sh index 27268c250b5e6..6709cd543f926 100755 --- a/test/units/TEST-72-SYSUPDATE.sh +++ b/test/units/TEST-72-SYSUPDATE.sh @@ -66,6 +66,7 @@ update_checksums_with_best_before() { new_version() { local sector_size="${1:?}" local version="${2:?}" + local corrupt="${3:-}" # Create a pair of random partition payloads, and compress one. # To make not the initial bytes of part1-xxx.raw accidentally match one of the compression header, @@ -90,11 +91,26 @@ new_version() { echo $RANDOM >"$WORKDIR/source/dir-$version/bar.txt" tar --numeric-owner -C "$WORKDIR/source/dir-$version/" -czf "$WORKDIR/source/dir-$version.tar.gz" . - update_checksums + if [[ "$corrupt" == "corrupt-checksum" ]]; then + # As requested, add a deliberately corrupt checksum for this file. This + # will get overwritten next time update_checksums() is called, but the + # integration test will probably have moved on to other things by then. + { + echo "abad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1dea part1-$version.raw" + echo "abad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1dea part2-$version.raw" + echo "abad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1dea part2-$version.raw.gz" + echo "abad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1dea uki-$version.efi" + echo "abad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1dea uki-extra-$version.efi" + echo "abad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1deaabad1dea dir-$version.tar.gz" + } >> "$WORKDIR/source/SHA256SUMS" + else + update_checksums + fi } update_now() { local update_type="${1:?}" + local checks="${2:-}" # Update to newest version. First there should be an update ready, then we # do the update, and then there should not be any ready anymore @@ -105,7 +121,10 @@ update_now() { # modes. Some updates in the test suite need to be monolithic (e.g. when # repairing an installation), so that can be overridden via the local. - "$SYSUPDATE" --verify=no check-new + if [[ "$checks" != "no-checks" ]]; then + "$SYSUPDATE" --verify=no check-new + fi + if [[ "$update_type" == "monolithic" ]]; then "$SYSUPDATE" --verify=no update elif [[ "$update_type" == "split-offline" ]]; then @@ -125,7 +144,10 @@ update_now() { else exit 1 fi - (! "$SYSUPDATE" --verify=no check-new) + + if [[ "$checks" != "no-checks" ]]; then + (! "$SYSUPDATE" --verify=no check-new) + fi } verify_version() { @@ -462,6 +484,28 @@ EOF verify_version_current "$blockdev" "$sector_size" v8 1 verify_version "$blockdev" "$sector_size" v7 2 + # Create a 9th version but corrupt the checksum in SHA256SUMS so pulling it + # fails when verifying the checksum, in order to create a current+partial + # state. Try to update again and verify that this results in an error. + # Vacuum the partial version, regenerate it on the server, try updating + # again and it should succeed. + new_version "$sector_size" v9 "corrupt-checksum" + (! update_now "$update_type") + "$SYSUPDATE" --offline list v9 | grep "partial" >/dev/null + verify_version_current "$blockdev" "$sector_size" v8 1 + # don’t verify the other part of the block device as it’s in an indeterminate state + (! update_now "$update_type" "no-checks") |& tee "$WORKDIR"/update_now-9 + cat "$WORKDIR"/update_now-9 + grep "is already acquired and partially installed. Vacuum it to try installing again." "$WORKDIR"/update_now-9 + "$SYSUPDATE" --offline vacuum |& grep "Removing old partial" >/dev/null + verify_version_current "$blockdev" "$sector_size" v8 1 + # don’t verify the other part of the block device as it’s in an indeterminate state + "$SYSUPDATE" --verify=no list v9 | grep "candidate" >/dev/null + new_version "$sector_size" v9 + update_now "$update_type" + verify_version "$blockdev" "$sector_size" v8 1 + verify_version_current "$blockdev" "$sector_size" v9 2 + # Cleanup [[ -b "$blockdev" ]] && losetup --detach "$blockdev" rm "$BACKING_FILE" From 2d892207a11153750ed090548a29aacb4d38afd2 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 27 Apr 2026 11:01:40 +0200 Subject: [PATCH 006/242] fs-util: Some followups for xopenat_full() --- src/basic/fs-util.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index 3960938309fcd..84b76072d7c63 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -1136,33 +1136,34 @@ static int openat_with_automount(int dir_fd, const char *path, int open_flags, m * does not do that, so we use open_tree() without OPEN_TREE_CLONE which is equivalent to open() with * O_PATH except that it does trigger automounts. Some sandboxes reject open_tree() with EPERM or * ENOSYS, in which case we fall back to plain openat(): autofs wouldn't work inside a restricted - * mount namespace anyway. */ + * mount namespace anyway. open_tree() only ever returns O_PATH fds, so this helper is for O_PATH + * acquisition only. */ static bool can_open_tree = true; - int r; assert(dir_fd >= 0 || dir_fd == AT_FDCWD); assert(path); + assert(FLAGS_SET(open_flags, O_PATH)); if (can_open_tree) { - r = RET_NERRNO(open_tree(dir_fd, path, - OPEN_TREE_CLOEXEC | - (FLAGS_SET(open_flags, O_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0))); - if (r >= 0) { + int fd = RET_NERRNO(open_tree(dir_fd, path, + OPEN_TREE_CLOEXEC | + (FLAGS_SET(open_flags, O_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0))); + if (fd >= 0) { /* open_tree() doesn't honor O_DIRECTORY, so enforce it ourselves to match * the openat() fallback's behavior. */ if (FLAGS_SET(open_flags, O_DIRECTORY)) { - int q = fd_verify_directory(r); + int q = fd_verify_directory(fd); if (q < 0) { - safe_close(r); + safe_close(fd); return q; } } - return r; + return fd; } - if (r != -EPERM && !ERRNO_IS_NEG_NOT_SUPPORTED(r)) - return r; + if (fd != -EPERM && !ERRNO_IS_NEG_NOT_SUPPORTED(fd)) + return fd; can_open_tree = false; } @@ -1178,9 +1179,7 @@ int xopenat_full(int dir_fd, const char *path, int open_flags, XOpenFlags xopen_ assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); /* An inode can only be one of a directory, a regular file or a socket at the same time. */ - assert(!(FLAGS_SET(open_flags, O_DIRECTORY) && FLAGS_SET(xopen_flags, XO_REGULAR))); - assert(!(FLAGS_SET(xopen_flags, XO_REGULAR) && FLAGS_SET(xopen_flags, XO_SOCKET))); - assert(!(FLAGS_SET(open_flags, O_DIRECTORY) && FLAGS_SET(xopen_flags, XO_SOCKET))); + assert(FLAGS_SET(open_flags, O_DIRECTORY) + FLAGS_SET(xopen_flags, XO_REGULAR) + FLAGS_SET(xopen_flags, XO_SOCKET) <= 1); /* Sockets cannot be open()ed, only pinned via O_PATH. */ assert(!FLAGS_SET(xopen_flags, XO_SOCKET) || FLAGS_SET(open_flags, O_PATH)); /* XO_TRIGGER_AUTOMOUNT requires O_PATH and does not support creating inodes. XO_SUBVOLUME From 81d23b58e8d4fe0560d70ae4c6009e8f9f25eca2 Mon Sep 17 00:00:00 2001 From: Christian Goeschel Ndjomouo Date: Sat, 4 Apr 2026 00:16:41 -0400 Subject: [PATCH 007/242] shared/pager: add support for more(1) pager in secure mode The more(1) pager (part of util-linux) now supports secure mode, which can be enabled with the PAGERSECURE environment variable. Adding support for more(1) in secure mode serves as an alternative for systems that do not have less installed or for users who prefer it. Commit d2fce960f9cac740 introduced secure mode in more(1) and the new feature is available in version 2.42 of util-linux. Signed-off-by: Christian Goeschel Ndjomouo --- src/shared/pager.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/shared/pager.c b/src/shared/pager.c index 3c89aacab48d0..61718aeaa6807 100644 --- a/src/shared/pager.c +++ b/src/shared/pager.c @@ -206,6 +206,13 @@ void pager_open(PagerFlags flags) { _exit(EXIT_FAILURE); } + /* Some pager implementations support the PAGERSECURE environment variable, e.g. more(1) */ + r = set_unset_env("PAGERSECURE", use_secure_mode ? "1" : NULL, true); + if (r < 0) { + log_error_errno(r, "Failed to adjust environment variable PAGERSECURE: %m"); + _exit(EXIT_FAILURE); + } + if (trust_pager && pager_args) { /* The pager config might be set globally, and we cannot * know if the user adjusted it to be appropriate for the * secure mode. Thus, start the pager specified through @@ -228,8 +235,8 @@ void pager_open(PagerFlags flags) { static const char* pagers[] = { "pager", "less", "more", "(built-in)" }; for (unsigned i = 0; i < ELEMENTSOF(pagers); i++) { - /* Only less (and our trivial fallback) implement secure mode right now. */ - if (use_secure_mode && !STR_IN_SET(pagers[i], "less", "(built-in)")) + /* Only less, more (and our trivial fallback) implement secure mode right now. */ + if (use_secure_mode && !STR_IN_SET(pagers[i], "less", "more", "(built-in)")) continue; r = loop_write(exe_name_pipe[1], pagers[i], strlen(pagers[i]) + 1); From 84e53b8cfcdd0d3fe5810279bdf261e6cd5910f9 Mon Sep 17 00:00:00 2001 From: Christian Goeschel Ndjomouo Date: Sat, 4 Apr 2026 00:29:02 -0400 Subject: [PATCH 008/242] man: document secure mode support for the more(1) pager Signed-off-by: Christian Goeschel Ndjomouo --- man/common-variables.xml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/man/common-variables.xml b/man/common-variables.xml index 0f807b2cde1d2..74463c23d6083 100644 --- a/man/common-variables.xml +++ b/man/common-variables.xml @@ -190,11 +190,14 @@ allowing untrusted users to execute commands with elevated privileges. This option takes a boolean argument. When set to true, the "secure mode" of the pager is - enabled. In "secure mode", will be set when invoking the pager, which - instructs the pager to disable commands that open or create new files or start new subprocesses. + enabled. In "secure mode", and will be set + when invoking the pager, which instructs the pager to disable commands that open or create new files or + start new subprocesses. Currently only less1 is known - to understand this variable and implement "secure mode". + project='man-pages'>less1 and + more1 are known + to understand these variables, respectively, and implement "secure mode". When set to false, no limitation is placed on the pager. Setting SYSTEMD_PAGERSECURE=0 or not removing it from the inherited environment may allow From df87d750380425384464a991594706137a528a75 Mon Sep 17 00:00:00 2001 From: Christian Goeschel Ndjomouo Date: Mon, 20 Apr 2026 23:41:12 -0400 Subject: [PATCH 009/242] doc: bump util-linux baseline to v2.42 This baseline bump is mainly to support the secure mode feature in more(1) that has been made available since util-linux v2.42. Signed-off-by: Christian Goeschel Ndjomouo --- README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README b/README index ddcb863f4ada2..51a78bd2a1d00 100644 --- a/README +++ b/README @@ -264,9 +264,9 @@ REQUIREMENTS: During runtime, you need the following additional dependencies: - util-linux >= v2.27.1 required (including but not limited to: mount, - umount, swapon, swapoff, sulogin, - agetty, fsck) + util-linux >= v2.42 required (including but not limited to: mount, + umount, swapon, swapoff, sulogin, + agetty, fsck, more) dbus >= 1.4.0 (strictly speaking optional, but recommended) NOTE: If using dbus < 1.9.18, you should override the default policy directory (--with-dbuspolicydir=/etc/dbus-1/system.d). From 01bf2b9f523d3bcddf74cac6af4ac5c36ce75550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 10:54:43 +0200 Subject: [PATCH 010/242] sd-varlink: drop pointless bitfield As is often the case, in this case because of alignment, we are actually not saving any space. With the bitfield we are using one bit of the 8 bytes allocated, and without the bitfield we are using 8 bits of that. But we're paying a price in generated code, at every access site to the field: $ diff <(objdump -S build/libsystemd.so.old) <(objdump -S build/libsystemd.so.new) ... v->protocol_upgrade = false; - fa2d2: 48 8b 45 a8 mov -0x58(%rbp),%rax - fa2d6: 0f b6 90 90 01 00 00 movzbl 0x190(%rax),%edx - fa2dd: 83 e2 fe and $0xfffffffe,%edx - fa2e0: 88 90 90 01 00 00 mov %dl,0x190(%rax) + fa2a9: 48 8b 45 a8 mov -0x58(%rbp),%rax + fa2ad: c6 80 90 01 00 00 00 movb $0x0,0x190(%rax) --- src/libsystemd/sd-varlink/varlink-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libsystemd/sd-varlink/varlink-internal.h b/src/libsystemd/sd-varlink/varlink-internal.h index 8087c2c432464..ff359852f488f 100644 --- a/src/libsystemd/sd-varlink/varlink-internal.h +++ b/src/libsystemd/sd-varlink/varlink-internal.h @@ -107,7 +107,7 @@ typedef struct sd_varlink { * ensure the caller's contract is honored. The transport-layer "stop reading at the * next message boundary" behavior is governed independently by the JsonStream's * bounded_reads flag. */ - bool protocol_upgrade:1; + bool protocol_upgrade; void *userdata; From 242fca7516eeedb3157be47be83f608d56592a46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 11:00:00 +0200 Subject: [PATCH 011/242] sd-varlink: reduce size of varlink structs struct sd_varlink: - /* size: 448, cachelines: 7, members: 21 */ + /* size: 432, cachelines: 7, members: 21 */ struct sd_varlink_server: - /* size: 160, cachelines: 3, members: 21 */ + /* size: 152, cachelines: 3, members: 21 */ --- src/libsystemd/sd-varlink/varlink-internal.h | 28 +++++++++----------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/libsystemd/sd-varlink/varlink-internal.h b/src/libsystemd/sd-varlink/varlink-internal.h index ff359852f488f..32d6d5983a75f 100644 --- a/src/libsystemd/sd-varlink/varlink-internal.h +++ b/src/libsystemd/sd-varlink/varlink-internal.h @@ -74,9 +74,8 @@ typedef enum VarlinkState { typedef struct sd_varlink { unsigned n_ref; - sd_varlink_server *server; - VarlinkState state; + sd_varlink_server *server; /* Transport layer: input/output buffers, fd passing, output queue, read/write/parse * step functions, sd-event integration (input/output/time event sources, idle @@ -87,6 +86,13 @@ typedef struct sd_varlink { unsigned n_pending; + /* Per-call protocol-upgrade marker: set when the *current* method call carries the + * SD_VARLINK_METHOD_UPGRADE flag. Validated by sd_varlink_reply_and_upgrade() to + * ensure the caller's contract is honored. The transport-layer "stop reading at the + * next message boundary" behavior is governed independently by the JsonStream's + * bounded_reads flag. */ + bool protocol_upgrade; + sd_varlink_reply_t reply_callback; sd_json_variant *current; @@ -102,13 +108,6 @@ typedef struct sd_varlink { size_t n_previous_fds; char *sentinel; - /* Per-call protocol-upgrade marker: set when the *current* method call carries the - * SD_VARLINK_METHOD_UPGRADE flag. Validated by sd_varlink_reply_and_upgrade() to - * ensure the caller's contract is honored. The transport-layer "stop reading at the - * next message boundary" behavior is governed independently by the JsonStream's - * bounded_reads flag. */ - bool protocol_upgrade; - void *userdata; sd_event_source *quit_event_source; @@ -145,8 +144,12 @@ typedef struct sd_varlink_server { sd_event *event; int64_t event_priority; - unsigned n_connections; Hashmap *by_uid; /* UID_TO_PTR(uid) → UINT_TO_PTR(n_connections) */ + unsigned n_connections; + unsigned connections_max; + unsigned connections_per_uid_max; + + bool exit_on_idle; void *userdata; @@ -155,11 +158,6 @@ typedef struct sd_varlink_server { char *product; char *version; char *url; - - unsigned connections_max; - unsigned connections_per_uid_max; - - bool exit_on_idle; } sd_varlink_server; #define varlink_log_errno(v, error, fmt, ...) \ From d9e0883316d119fb484c677f27ba24f31baded72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 11:20:06 +0200 Subject: [PATCH 012/242] sd-json: stop printing debug messages about extension fields The intent was good, but we now print two or three of those messages for each report metrics received on the wire. If the json object is extensible, then it's all good and we don't need to inundate the user with this trivial information. (And the message also sounds like something is wrong or unexpected, when it totally isn't.) ... (string):1:73: Unrecognized object field 'object', assuming extension. (string):1:89: Unrecognized object field 'value', assuming extension. json-stream: Received message: {"parameters":{"name":"io.systemd.Network.CarrierState","object":"virbr0","value":"degraded-carrier"},"continues":true} (string):1:66: Unrecognized object field 'object', assuming extension. (string):1:83: Unrecognized object field 'value', assuming extension. json-stream: Received message: {"parameters":{"name":"io.systemd.Network.CarrierState","object":"lo","value":"carrier"},"continues":true} (string):1:66: Unrecognized object field 'object', assuming extension. (string):1:79: Unrecognized object field 'value', assuming extension. json-stream: Received message: {"parameters":{"name":"io.systemd.Network.CarrierState","object":"wlp0s20f3","value":"carrier"},"continues":true} (string):1:66: Unrecognized object field 'object', assuming extension. (string):1:86: Unrecognized object field 'value', assuming extension. ... --- src/libsystemd/sd-json/sd-json.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/libsystemd/sd-json/sd-json.c b/src/libsystemd/sd-json/sd-json.c index 4c541275c42c5..fbc2e55d23f22 100644 --- a/src/libsystemd/sd-json/sd-json.c +++ b/src/libsystemd/sd-json/sd-json.c @@ -5294,10 +5294,8 @@ _public_ int sd_json_dispatch_full( done++; } else { - if (flags & SD_JSON_ALLOW_EXTENSIONS) { - json_log(value, flags|SD_JSON_DEBUG, 0, "Unrecognized object field '%s', assuming extension.", sd_json_variant_string(key)); + if (flags & SD_JSON_ALLOW_EXTENSIONS) continue; - } json_log(value, flags, 0, "Unexpected object field '%s'.", sd_json_variant_string(key)); if (flags & SD_JSON_PERMISSIVE) From 5948ff5fe20395859cab609e0d3648fc697385e0 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Tue, 28 Apr 2026 19:47:14 +0200 Subject: [PATCH 013/242] libsystemd: Clean up meson.build Merge the two blocks adding tests, since there seems to be no obvious reason to have two separate blocks, as they both contain tests from the same libraries. --- src/libsystemd/meson.build | 61 +++++++++++-------- .../sd-hwdb}/test-sd-hwdb.c | 0 .../sd-id128}/test-id128.c | 0 src/{test => libsystemd/sd-json}/test-json.c | 0 .../sd-path}/test-sd-path.c | 0 .../sd-varlink}/test-varlink-idl.c | 0 .../sd-varlink}/test-varlink.c | 0 src/{test => shared}/test-varlink-idl-util.h | 0 src/test/meson.build | 15 ----- 9 files changed, 34 insertions(+), 42 deletions(-) rename src/{test => libsystemd/sd-hwdb}/test-sd-hwdb.c (100%) rename src/{test => libsystemd/sd-id128}/test-id128.c (100%) rename src/{test => libsystemd/sd-json}/test-json.c (100%) rename src/{test => libsystemd/sd-path}/test-sd-path.c (100%) rename src/{test => libsystemd/sd-varlink}/test-varlink-idl.c (100%) rename src/{test => libsystemd/sd-varlink}/test-varlink.c (100%) rename src/{test => shared}/test-varlink-idl-util.h (100%) diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build index 08d8d7c5c39e7..2fab54719474c 100644 --- a/src/libsystemd/meson.build +++ b/src/libsystemd/meson.build @@ -174,29 +174,6 @@ libsystemd_pc = custom_target( ############################################################ -simple_tests += files( - 'sd-journal/test-audit-type.c', - 'sd-journal/test-catalog.c', - 'sd-journal/test-journal-file.c', - 'sd-journal/test-journal-init.c', - 'sd-journal/test-journal-match.c', - 'sd-journal/test-journal-send.c', - 'sd-journal/test-mmap-cache.c', -) - -libsystemd_tests += [ - { - 'sources' : files('sd-journal/test-journal-enum.c'), - 'timeout' : 360, - }, - { - 'sources' : files('sd-event/test-event.c'), - 'timeout' : 120, - } -] - -############################################################ - simple_tests += files( 'sd-bus/test-bus-creds.c', 'sd-bus/test-bus-introspect.c', @@ -204,20 +181,26 @@ simple_tests += files( 'sd-bus/test-bus-vtable.c', 'sd-device/test-device-util.c', 'sd-device/test-sd-device-monitor.c', + 'sd-hwdb/test-sd-hwdb.c', + 'sd-id128/test-id128.c', + 'sd-journal/test-audit-type.c', + 'sd-journal/test-catalog.c', + 'sd-journal/test-journal-file.c', 'sd-journal/test-journal-flush.c', + 'sd-journal/test-journal-init.c', 'sd-journal/test-journal-interleaving.c', + 'sd-journal/test-journal-match.c', + 'sd-journal/test-journal-send.c', 'sd-journal/test-journal-stream.c', 'sd-journal/test-journal.c', + 'sd-journal/test-mmap-cache.c', 'sd-login/test-login.c', 'sd-login/test-sd-login.c', 'sd-netlink/test-netlink.c', + 'sd-path/test-sd-path.c', ) libsystemd_tests += [ - { - 'sources' : files('sd-device/test-sd-device.c'), - 'dependencies' : [ threads, libmount_cflags ], - }, { 'sources' : files('sd-bus/test-bus-address.c'), 'dependencies' : threads @@ -275,6 +258,18 @@ libsystemd_tests += [ 'dependencies' : threads, 'timeout' : 120, }, + { + 'sources' : files('sd-device/test-sd-device.c'), + 'dependencies' : [threads, libmount_cflags], + }, + { + 'sources' : files('sd-event/test-event.c'), + 'timeout' : 120, + }, + { + 'sources' : files('sd-journal/test-journal-enum.c'), + 'timeout' : 360, + }, { 'sources' : files('sd-journal/test-journal-append.c'), 'type' : 'manual', @@ -287,11 +282,23 @@ libsystemd_tests += [ 'sources' : files('sd-journal/test-journal-verify.c'), 'timeout' : 90, }, + { + 'sources' : files('sd-json/test-json.c'), + 'dependencies' : libm, + }, { 'sources' : files('sd-resolve/test-resolve.c'), 'dependencies' : threads, 'timeout' : 120, }, + { + 'sources' : files('sd-varlink/test-varlink.c'), + 'dependencies' : threads, + }, + { + 'sources' : files('sd-varlink/test-varlink-idl.c'), + 'dependencies' : threads, + }, ] if cxx_cmd != '' diff --git a/src/test/test-sd-hwdb.c b/src/libsystemd/sd-hwdb/test-sd-hwdb.c similarity index 100% rename from src/test/test-sd-hwdb.c rename to src/libsystemd/sd-hwdb/test-sd-hwdb.c diff --git a/src/test/test-id128.c b/src/libsystemd/sd-id128/test-id128.c similarity index 100% rename from src/test/test-id128.c rename to src/libsystemd/sd-id128/test-id128.c diff --git a/src/test/test-json.c b/src/libsystemd/sd-json/test-json.c similarity index 100% rename from src/test/test-json.c rename to src/libsystemd/sd-json/test-json.c diff --git a/src/test/test-sd-path.c b/src/libsystemd/sd-path/test-sd-path.c similarity index 100% rename from src/test/test-sd-path.c rename to src/libsystemd/sd-path/test-sd-path.c diff --git a/src/test/test-varlink-idl.c b/src/libsystemd/sd-varlink/test-varlink-idl.c similarity index 100% rename from src/test/test-varlink-idl.c rename to src/libsystemd/sd-varlink/test-varlink-idl.c diff --git a/src/test/test-varlink.c b/src/libsystemd/sd-varlink/test-varlink.c similarity index 100% rename from src/test/test-varlink.c rename to src/libsystemd/sd-varlink/test-varlink.c diff --git a/src/test/test-varlink-idl-util.h b/src/shared/test-varlink-idl-util.h similarity index 100% rename from src/test/test-varlink-idl-util.h rename to src/shared/test-varlink-idl-util.h diff --git a/src/test/meson.build b/src/test/meson.build index 6f9a24eb04483..09c367d3074f3 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -120,7 +120,6 @@ simple_tests += files( 'test-hmac.c', 'test-hostname-setup.c', 'test-hostname-util.c', - 'test-id128.c', 'test-image-filter.c', 'test-image-policy.c', 'test-import-util.c', @@ -180,8 +179,6 @@ simple_tests += files( 'test-replace-var.c', 'test-rlimit-util.c', 'test-rm-rf.c', - 'test-sd-hwdb.c', - 'test-sd-path.c', 'test-secure-bits.c', 'test-serialize.c', 'test-set.c', @@ -346,10 +343,6 @@ executables += [ 'sources' : files('test-ipcrm.c'), 'type' : 'unsafe', }, - test_template + { - 'sources' : files('test-json.c'), - 'dependencies' : libm, - }, test_template + { 'sources' : files('test-kexec.c'), 'link_with' : [libshared], @@ -496,14 +489,6 @@ executables += [ 'sources' : files('test-utmp.c'), 'conditions' : ['ENABLE_UTMP'], }, - test_template + { - 'sources' : files('test-varlink.c'), - 'dependencies' : threads, - }, - test_template + { - 'sources' : files('test-varlink-idl.c'), - 'dependencies' : threads, - }, core_test_template + { 'sources' : files('test-varlink-idl-unit.c'), }, From 087fa20166f0becc7b41056d48e1efb76e4c20f3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 23 Apr 2026 08:59:20 +0200 Subject: [PATCH 014/242] shared: add io.systemd.StorageProvider Varlink interface Generic Varlink API for services that hand out file descriptors to storage volumes. Three methods: Acquire() returns an fd for a named volume (optionally creating it from a template), ListVolumes() enumerates available volumes, ListTemplates() enumerates supported creation templates. Volume types follow kernel inode-type naming: blk (block device), reg (regular file), dir (directory). Intent is that multiple providers can sit behind AF_UNIX sockets in a well-known directory and be consumed uniformly by nspawn, vmspawn, the service manager (BindVolume=) and similar tools. --- src/libsystemd/sd-varlink/test-varlink-idl.c | 2 + src/shared/meson.build | 1 + .../varlink-io.systemd.StorageProvider.c | 119 ++++++++++++++++++ .../varlink-io.systemd.StorageProvider.h | 6 + 4 files changed, 128 insertions(+) create mode 100644 src/shared/varlink-io.systemd.StorageProvider.c create mode 100644 src/shared/varlink-io.systemd.StorageProvider.h diff --git a/src/libsystemd/sd-varlink/test-varlink-idl.c b/src/libsystemd/sd-varlink/test-varlink-idl.c index a645d4d9d360c..a5190897023fd 100644 --- a/src/libsystemd/sd-varlink/test-varlink-idl.c +++ b/src/libsystemd/sd-varlink/test-varlink-idl.c @@ -44,6 +44,7 @@ #include "varlink-io.systemd.Resolve.h" #include "varlink-io.systemd.Resolve.Hook.h" #include "varlink-io.systemd.Resolve.Monitor.h" +#include "varlink-io.systemd.StorageProvider.h" #include "varlink-io.systemd.Udev.h" #include "varlink-io.systemd.Unit.h" #include "varlink-io.systemd.UserDatabase.h" @@ -212,6 +213,7 @@ TEST(parse_format) { &vl_interface_io_systemd_Resolve, &vl_interface_io_systemd_Resolve_Hook, &vl_interface_io_systemd_Resolve_Monitor, + &vl_interface_io_systemd_StorageProvider, &vl_interface_io_systemd_Udev, &vl_interface_io_systemd_Unit, &vl_interface_io_systemd_UserDatabase, diff --git a/src/shared/meson.build b/src/shared/meson.build index c28fe040b6b2b..cd34b02f8506d 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -245,6 +245,7 @@ shared_sources = files( 'varlink-io.systemd.Resolve.Hook.c', 'varlink-io.systemd.Resolve.Monitor.c', 'varlink-io.systemd.Shutdown.c', + 'varlink-io.systemd.StorageProvider.c', 'varlink-io.systemd.Udev.c', 'varlink-io.systemd.Unit.c', 'varlink-io.systemd.UserDatabase.c', diff --git a/src/shared/varlink-io.systemd.StorageProvider.c b/src/shared/varlink-io.systemd.StorageProvider.c new file mode 100644 index 0000000000000..cd2a4f3fda0bc --- /dev/null +++ b/src/shared/varlink-io.systemd.StorageProvider.c @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-polkit.h" +#include "varlink-io.systemd.StorageProvider.h" + +static SD_VARLINK_DEFINE_ENUM_TYPE( + VolumeType, + SD_VARLINK_FIELD_COMMENT("Block device storage volumes, block-addressable"), + SD_VARLINK_DEFINE_ENUM_VALUE(blk), + SD_VARLINK_FIELD_COMMENT("Regular file storage volumes, byte-addressable"), + SD_VARLINK_DEFINE_ENUM_VALUE(reg), + SD_VARLINK_FIELD_COMMENT("POSIX file system storage volumes, path/offset-addressable"), + SD_VARLINK_DEFINE_ENUM_VALUE(dir)); + +static SD_VARLINK_DEFINE_ENUM_TYPE( + CreateMode, + SD_VARLINK_FIELD_COMMENT("Open if exists already, create if missing"), + SD_VARLINK_DEFINE_ENUM_VALUE(any), + SD_VARLINK_FIELD_COMMENT("Create if missing, fail if exists already"), + SD_VARLINK_DEFINE_ENUM_VALUE(new), + SD_VARLINK_FIELD_COMMENT("Open if exists already, fail if missing"), + SD_VARLINK_DEFINE_ENUM_VALUE(open)); + +static SD_VARLINK_DEFINE_METHOD( + Acquire, + SD_VARLINK_FIELD_COMMENT("The name of the storage volume to acquire"), + SD_VARLINK_DEFINE_INPUT(name, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("Determines whether to open or create a storage volume"), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(createMode, CreateMode, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The template to use when creating a new storage volume"), + SD_VARLINK_DEFINE_INPUT(template, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Controls read/write access to the storage volume. If false and the storage volume cannot be opened in writable mode the call will fail. If null, storage volume will be acquired in writable mode if possible, read-only otherwise. If true, storage volume will be opened in read-only mode (and fail if that's not possible)."), + SD_VARLINK_DEFINE_INPUT(readOnly, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Dictates what kind of storage volume to request. Some storage volumes can be acquired either as regular file or as block device. In all other cases if this value doesn't match the volume type, the request will fail."), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(requestAs, VolumeType, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The size of the storage volume, if one is created. Has no effect if no storage volume is created."), + SD_VARLINK_DEFINE_INPUT(createSizeBytes, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + VARLINK_DEFINE_POLKIT_INPUT, + SD_VARLINK_FIELD_COMMENT("Returns an index into the array of file descriptors associated with this reply. This may be used to get the file descriptor of the volume. The file descriptor must be properly opened, i.e. not an O_PATH file descriptor."), + SD_VARLINK_DEFINE_OUTPUT(fileDescriptorIndex, SD_VARLINK_INT, 0), + SD_VARLINK_FIELD_COMMENT("The storage volume type, i.e. ultimately the inode type of the returned file descriptor"), + SD_VARLINK_DEFINE_OUTPUT_BY_TYPE(type, VolumeType, 0), + SD_VARLINK_FIELD_COMMENT("Whether storage volume has been opened in read-only mode"), + SD_VARLINK_DEFINE_OUTPUT(readOnly, SD_VARLINK_BOOL, 0), + SD_VARLINK_FIELD_COMMENT("Base UID for the returned file descriptor (if directory). If not specified shall default to 0."), + SD_VARLINK_DEFINE_OUTPUT(baseUID, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Base GID for the returned file descriptor (if directory). If not specified shall default to 0."), + SD_VARLINK_DEFINE_OUTPUT(baseGID, SD_VARLINK_INT, SD_VARLINK_NULLABLE)); + +static SD_VARLINK_DEFINE_METHOD_FULL( + ListVolumes, + SD_VARLINK_REQUIRES_MORE, + SD_VARLINK_FIELD_COMMENT("Specifies a shell glob to filter enumeration by"), + SD_VARLINK_DEFINE_INPUT(matchName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The storage volume's primary name"), + SD_VARLINK_DEFINE_OUTPUT(name, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("Additional names"), + SD_VARLINK_DEFINE_OUTPUT(aliases, SD_VARLINK_STRING, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("The type of the storage volume"), + SD_VARLINK_DEFINE_OUTPUT_BY_TYPE(type, VolumeType, 0), + SD_VARLINK_FIELD_COMMENT("Whether the storage volume is read-only."), + SD_VARLINK_DEFINE_OUTPUT(readOnly, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Size in bytes, if known"), + SD_VARLINK_DEFINE_OUTPUT(sizeBytes, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Used bytes, if known"), + SD_VARLINK_DEFINE_OUTPUT(usedBytes, SD_VARLINK_INT, SD_VARLINK_NULLABLE)); + +static SD_VARLINK_DEFINE_METHOD_FULL( + ListTemplates, + SD_VARLINK_REQUIRES_MORE, + SD_VARLINK_FIELD_COMMENT("Specifies a shell glob to filter enumeration by"), + SD_VARLINK_DEFINE_INPUT(matchName, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The template's name"), + SD_VARLINK_DEFINE_OUTPUT(name, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("The type of the storage volumes defined by this template"), + SD_VARLINK_DEFINE_OUTPUT_BY_TYPE(type, VolumeType, 0)); + +static SD_VARLINK_DEFINE_ERROR(NoSuchVolume); +static SD_VARLINK_DEFINE_ERROR(VolumeExists); +static SD_VARLINK_DEFINE_ERROR(NoSuchTemplate); +static SD_VARLINK_DEFINE_ERROR(TypeNotSupported); +static SD_VARLINK_DEFINE_ERROR(WrongType); +static SD_VARLINK_DEFINE_ERROR(CreateNotSupported); +static SD_VARLINK_DEFINE_ERROR(CreateSizeRequired); +static SD_VARLINK_DEFINE_ERROR(ReadOnlyVolume); +static SD_VARLINK_DEFINE_ERROR(BadTemplate); + +SD_VARLINK_DEFINE_INTERFACE( + io_systemd_StorageProvider, + "io.systemd.StorageProvider", + SD_VARLINK_INTERFACE_COMMENT("Storage Provider API, a generic interface for acquiring access to storage volumes"), + SD_VARLINK_SYMBOL_COMMENT("Encodes three classes of storage volumes. This follows the kernel's nomenclature for inode types, i.e. reg, dir, blk."), + &vl_type_VolumeType, + SD_VARLINK_SYMBOL_COMMENT("Determines whether to open existing or create a new storage volume."), + &vl_type_CreateMode, + SD_VARLINK_SYMBOL_COMMENT("Acquires a file descriptor for a storage volume."), + &vl_method_Acquire, + SD_VARLINK_SYMBOL_COMMENT("Lists available storage volumes."), + &vl_method_ListVolumes, + SD_VARLINK_SYMBOL_COMMENT("Lists available templates."), + &vl_method_ListTemplates, + SD_VARLINK_SYMBOL_COMMENT("No storage volume under the specified name exists."), + &vl_error_NoSuchVolume, + SD_VARLINK_SYMBOL_COMMENT("A storage volume under the specified name already exists."), + &vl_error_VolumeExists, + SD_VARLINK_SYMBOL_COMMENT("No template under the specified name exists."), + &vl_error_NoSuchTemplate, + SD_VARLINK_SYMBOL_COMMENT("The specified volume type is not supported by this backend or system."), + &vl_error_TypeNotSupported, + SD_VARLINK_SYMBOL_COMMENT("The volume's type does not match the requested volume type."), + &vl_error_WrongType, + SD_VARLINK_SYMBOL_COMMENT("This backend does not support storage volume creation of the requested type."), + &vl_error_CreateNotSupported, + SD_VARLINK_SYMBOL_COMMENT("This backend or selected volume type requires a storage volume size to be specified if the storage volume does not exist yet and needs to be created."), + &vl_error_CreateSizeRequired, + SD_VARLINK_SYMBOL_COMMENT("A storage volume was to be acquired in writable mode, but only read-only access is permitted."), + &vl_error_ReadOnlyVolume, + SD_VARLINK_SYMBOL_COMMENT("Template not suitable for this storage volume type."), + &vl_error_BadTemplate); diff --git a/src/shared/varlink-io.systemd.StorageProvider.h b/src/shared/varlink-io.systemd.StorageProvider.h new file mode 100644 index 0000000000000..707d05644f2cf --- /dev/null +++ b/src/shared/varlink-io.systemd.StorageProvider.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-varlink-idl.h" + +extern const sd_varlink_interface vl_interface_io_systemd_StorageProvider; From 4fd0df2a4b0aeb7aa317666f7e25626b0129c87a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 23 Apr 2026 09:00:06 +0200 Subject: [PATCH 015/242] storage: add systemd-storage-block@.service provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First implementation of io.systemd.StorageProvider, exposing all block devices known to udev (disks, partitions, dm nodes, …) as volumes of type "blk". Names are picked from stable /dev/mapper and /dev/disk/by-* symlinks; content-derived identifiers (by-uuid, by-label, …) are intentionally avoided for security. Volume creation is not supported by this backend. Socket-activated via /run/systemd/io.systemd.StorageProvider/block. Also adds shared storage-util.[ch] (VolumeType / CreateMode helpers) that subsequent providers reuse. --- man/rules/meson.build | 4 + man/systemd-storage-block@.service.xml | 97 ++++++ meson.build | 1 + src/storage/io.systemd.storage.policy | 30 ++ src/storage/meson.build | 11 + src/storage/storage-block.c | 439 ++++++++++++++++++++++++ src/storage/storage-util.c | 23 ++ src/storage/storage-util.h | 43 +++ units/meson.build | 7 + units/systemd-storage-block.socket | 24 ++ units/systemd-storage-block@.service.in | 18 + 11 files changed, 697 insertions(+) create mode 100644 man/systemd-storage-block@.service.xml create mode 100644 src/storage/io.systemd.storage.policy create mode 100644 src/storage/meson.build create mode 100644 src/storage/storage-block.c create mode 100644 src/storage/storage-util.c create mode 100644 src/storage/storage-util.h create mode 100644 units/systemd-storage-block.socket create mode 100644 units/systemd-storage-block@.service.in diff --git a/man/rules/meson.build b/man/rules/meson.build index 4aae561512991..439c33d5abdc1 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1186,6 +1186,10 @@ manpages = [ ['systemd-ssh-issue', '1', [], ''], ['systemd-ssh-proxy', '1', [], ''], ['systemd-stdio-bridge', '1', [], ''], + ['systemd-storage-block@.service', + '8', + ['systemd-storage-block', 'systemd-storage-block.socket'], + ''], ['systemd-storagetm.service', '8', ['systemd-storagetm'], 'ENABLE_STORAGETM'], ['systemd-stub', '7', diff --git a/man/systemd-storage-block@.service.xml b/man/systemd-storage-block@.service.xml new file mode 100644 index 0000000000000..ee6022af053bb --- /dev/null +++ b/man/systemd-storage-block@.service.xml @@ -0,0 +1,97 @@ + + + + + + + + systemd-storage-block@.service + systemd + + + + systemd-storage-block@.service + 8 + + + + systemd-storage-block@.service + systemd-storage-block.socket + systemd-storage-block + Storage provider exposing local block devices as storage volumes + + + + systemd-storage-block@.service + systemd-storage-block.socket + + + + Description + + systemd-storage-block@.service is a system service that implements the + io.systemd.StorageProvider Varlink + interface, exposing the system's block devices (such as disks, partitions, and device-mapper + nodes) as storage volumes that may be acquired by other programs as file descriptors. + + The service is socket-activated via systemd-storage-block.socket, which + listens on the AF_UNIX socket /run/systemd/io.systemd.StorageProvider/block. The + socket directory /run/systemd/io.systemd.StorageProvider/ is the well-known location + where storage providers register, see + storagectl1 for an + enumeration tool. + + See also + systemd-storage-fs@.service8 + for a complementary implementation that exposes regular files and directories from a backing file + system. + + + + Volumes + + The volumes exposed via the provider are identified by an absolute path (which must begin with + /dev/), i.e. as a kernel block device node such as /dev/sda or + /dev/disk/by-id/…. Volume names that are not normalized or that do not begin with + /dev/ are not accepted. + + + + Options + + The following options are understood: + + + + + + + + + Files + + + + /run/systemd/io.systemd.StorageProvider/block + + AF_UNIX socket the service listens on. This is the canonical location + for the block storage provider, and is enumerated by + storagectl providers. + + + + + + + + See Also + + systemd1 + storagectl1 + systemd-storage-fs@.service8 + + + + diff --git a/meson.build b/meson.build index 4f1a791bc7651..325b954a78b24 100644 --- a/meson.build +++ b/meson.build @@ -2139,6 +2139,7 @@ subdir('src/socket-activate') subdir('src/socket-proxy') subdir('src/ssh-generator') subdir('src/stdio-bridge') +subdir('src/storage') subdir('src/storagetm') subdir('src/sulogin-shell') subdir('src/sysctl') diff --git a/src/storage/io.systemd.storage.policy b/src/storage/io.systemd.storage.policy new file mode 100644 index 0000000000000..06af278a5a428 --- /dev/null +++ b/src/storage/io.systemd.storage.policy @@ -0,0 +1,30 @@ + + + + + + + + The systemd Project + https://systemd.io + + + Allow access to block storage volumes + Authentication is required for an application to gain access to block storage volume '$(name)'. + + auth_admin + auth_admin + auth_admin_keep + + + diff --git a/src/storage/meson.build b/src/storage/meson.build new file mode 100644 index 0000000000000..714e50ad9a1ca --- /dev/null +++ b/src/storage/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +executables += [ + libexec_template + { + 'name' : 'systemd-storage-block', + 'sources' : files('storage-block.c', 'storage-util.c'), + }, +] + +install_data('io.systemd.storage.policy', + install_dir : polkitpolicydir) diff --git a/src/storage/storage-block.c b/src/storage/storage-block.c new file mode 100644 index 0000000000000..4c21795c360ad --- /dev/null +++ b/src/storage/storage-block.c @@ -0,0 +1,439 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "sd-device.h" +#include "sd-json.h" +#include "sd-varlink.h" + +#include "blockdev-list.h" +#include "build.h" +#include "bus-polkit.h" +#include "device-private.h" +#include "device-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-table.h" +#include "hashmap.h" +#include "help-util.h" +#include "json-util.h" +#include "log.h" +#include "main-func.h" +#include "options.h" +#include "path-util.h" +#include "storage-util.h" +#include "strv.h" +#include "varlink-io.systemd.StorageProvider.h" +#include "varlink-util.h" + +static int block_device_pick_name( + const BlockDevice *d, + const char **ret_name, + char ***ret_aliases) { + + int r; + + assert(d); + assert(d->node); + assert(ret_name); + assert(ret_aliases); + + static const char *const prefixes[] = { + /* The list of preferred prefixes, in order of preference. Note: for security reasons we only + * use identifiers that do not depend on the *contents* of the device, i.e. we restrict + * ourselves to IDs whose fields are either chosen by whoever created the kernel device or are + * hardware properties, but not names generated from superblock metainformation or similar. */ + "/dev/mapper", + "/dev/disk/by-loop-ref", + "/dev/disk/by-id", + "/dev/disk/by-path", + }; + + const char* found[ELEMENTSOF(prefixes)] = {}; + _cleanup_strv_free_ char **aliases = NULL; + size_t best = SIZE_MAX; + STRV_FOREACH(sl, d->symlinks) { + bool matched = false; + for (size_t i = 0; i < ELEMENTSOF(prefixes); i++) { + if (!path_startswith(*sl, prefixes[i])) + continue; + + if (found[i]) { + /* Two symlinks with the same prefix? Then keep the lower one. */ + if (path_compare(*sl, found[i]) > 0) + continue; + + r = strv_extend(&aliases, found[i]); + if (r < 0) + return r; + } + + found[i] = *sl; + if (i < best) + best = i; + matched = true; + } + + if (!matched) { + r = strv_extend(&aliases, *sl); + if (r < 0) + return r; + } + } + + if (best == SIZE_MAX) /* No preferred prefix found, use the kernel device name */ + *ret_name = d->node; + else { + /* We found a preferred prefix, add the kernel device name to the aliases then. */ + r = strv_extend(&aliases, d->node); + if (r < 0) + return r; + + /* If there are any less preferred prefixes also add them to the aliases array */ + for (size_t i = best + 1; i < ELEMENTSOF(prefixes); i++) { + if (!found[i]) + continue; + + r = strv_extend(&aliases, found[i]); + if (r < 0) + return r; + } + + *ret_name = found[best]; + } + + strv_sort(aliases); + *ret_aliases = TAKE_PTR(aliases); + + return 0; +} + +static bool block_device_match(const BlockDevice *d, const char *match) { + assert(d); + assert(d->node); + + if (!match) + return true; + + if (fnmatch(match, d->node, FNM_NOESCAPE) == 0) + return true; + + STRV_FOREACH(sl, d->symlinks) + if (fnmatch(match, *sl, FNM_NOESCAPE) == 0) + return true; + + return false; +} + +static int vl_method_list_volumes( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + int r; + + assert(link); + assert(FLAGS_SET(flags, SD_VARLINK_METHOD_MORE)); + + struct { + const char *match_name; + } p = {}; + + static const sd_json_dispatch_field dispatch_table[] = { + { "matchName", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, match_name), 0 }, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + BlockDevice *l = NULL; + size_t n = 0; + CLEANUP_ARRAY(l, n, block_device_array_free); + + r = blockdev_list( + BLOCKDEV_LIST_SHOW_SYMLINKS| + BLOCKDEV_LIST_IGNORE_ROOT| + BLOCKDEV_LIST_IGNORE_EMPTY| + BLOCKDEV_LIST_METADATA, + &l, + &n); + if (r < 0) + return r; + + r = sd_varlink_set_sentinel(link, "io.systemd.StorageProvider.NoSuchVolume"); + if (r < 0) + return r; + + FOREACH_ARRAY(d, l, n) { + const char *name = NULL; + _cleanup_strv_free_ char **aliases = NULL; + + if (!block_device_match(d, p.match_name)) + continue; + + r = block_device_pick_name(d, &name, &aliases); + if (r < 0) + return r; + + r = sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR_STRING("name", name), + JSON_BUILD_PAIR_STRV_NON_EMPTY("aliases", aliases), + SD_JSON_BUILD_PAIR_STRING("type", "blk"), + SD_JSON_BUILD_PAIR_CONDITION(d->read_only >= 0, "readOnly", SD_JSON_BUILD_BOOLEAN(d->read_only)), + JSON_BUILD_PAIR_UNSIGNED_NOT_EQUAL("sizeBytes", d->size, UINT64_MAX)); + if (r < 0) + return r; + } + + return 0; +} + +static int vl_method_list_templates( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + /* This storage provider does not support templates */ + assert(link); + assert(FLAGS_SET(flags, SD_VARLINK_METHOD_MORE)); + + return sd_varlink_error(link, "io.systemd.StorageProvider.NoSuchTemplate", NULL); +} + +static int device_open_disk_auto_rw(sd_device *d, int *read_only) { + assert(d); + assert(read_only); + + int fd = sd_device_open(d, *read_only > 0 ? O_RDONLY : O_RDWR); + if (fd < 0) { + if (!ERRNO_IS_NEG_FS_WRITE_REFUSED(fd) || *read_only >= 0) + return log_device_debug_errno(d, fd, "Failed to open device in %s mode: %m", *read_only > 0 ? "read-only" : "read-write"); + + /* Try again in read-only mode */ + fd = sd_device_open(d, O_RDONLY); + if (fd < 0) + return log_device_debug_errno(d, fd, "Failed to open device in read-only mode, too: %m"); + + *read_only = true; + } else + *read_only = *read_only > 0; + + return fd; +} + +static int vl_method_acquire( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + Hashmap **polkit_registry = ASSERT_PTR(userdata); + int r; + + assert(link); + + struct { + const char *name; + CreateMode create_mode; + const char *template; + int read_only; + VolumeType request_as; + uint64_t create_size; + } p = { + .create_mode = CREATE_ANY, + .read_only = -1, + .request_as = _VOLUME_TYPE_INVALID, + .create_size = UINT64_MAX, /* never actually used here, just validated; we don't allow creation of block devices here */ + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, name), SD_JSON_MANDATORY }, + { "createMode", SD_JSON_VARIANT_STRING, json_dispatch_create_mode, voffsetof(p, create_mode), 0 }, + { "template", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, template), 0 }, + { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_tristate, voffsetof(p, read_only), 0 }, + { "requestAs", SD_JSON_VARIANT_STRING, json_dispatch_volume_type, voffsetof(p, request_as), 0 }, + { "createSizeBytes", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, create_size), 0 }, + VARLINK_DISPATCH_POLKIT_FIELD, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + if (!storage_volume_name_is_valid(p.name)) + return sd_varlink_error_invalid_parameter_name(link, "name"); + if (!path_startswith(p.name, "/dev") || !path_is_normalized(p.name)) + return sd_varlink_error(link, "io.systemd.StorageProvider.NoSuchVolume", NULL); + + if (!IN_SET(p.create_mode, CREATE_ANY, CREATE_OPEN)) + return sd_varlink_error(link, "io.systemd.StorageProvider.CreateNotSupported", NULL); + + /* off_t is signed, hence refuse overly long requests */ + if (p.create_size != UINT64_MAX && p.create_size > INT64_MAX) + return sd_varlink_error_invalid_parameter_name(link, "createSizeBytes"); + + if (!isempty(p.template)) { + if (!storage_template_name_is_valid(p.template)) + return sd_varlink_error_invalid_parameter_name(link, "template"); + + return sd_varlink_error(link, "io.systemd.StorageProvider.NoSuchTemplate", NULL); + } + + if (p.request_as >= 0 && p.request_as != VOLUME_BLK) + return sd_varlink_error(link, "io.systemd.StorageProvider.TypeNotSupported", NULL); + + const char *details[] = { + "name", p.name, + NULL + }; + + r = varlink_verify_polkit_async( + link, + /* bus= */ NULL, + "io.systemd.storage.block.acquire", + details, + polkit_registry); + if (r <= 0) + return r; + + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + r = sd_device_new_from_devname(&d, p.name); + if (ERRNO_IS_NEG_DEVICE_ABSENT(r)) + return sd_varlink_error(link, "io.systemd.StorageProvider.NoSuchVolume", NULL); + if (r < 0) + return r; + + if (!device_in_subsystem(d, "block")) + return sd_varlink_error(link, "io.systemd.StorageProvider.NoSuchVolume", NULL); + + /* The error returns are sometimes a bit inconclusive (i.e. read-only media might appear as + * inaccessible due to a permission issue), hence let's do an explicit check first, to give good + * answers */ + if (p.read_only <= 0) { + r = device_get_sysattr_bool(d, "ro"); + if (r < 0) + log_device_debug_errno(d, r, "Failed to acquire read-only flag of device '%s', ignoring: %m", p.name); + else if (r > 0) { + if (p.read_only == 0) + return sd_varlink_error(link, "io.systemd.StorageProvider.ReadOnlyVolume", NULL); + + p.read_only = true; + } + } + + _cleanup_close_ int fd = device_open_disk_auto_rw(d, &p.read_only); + if (ERRNO_IS_NEG_FS_WRITE_REFUSED(fd)) + return sd_varlink_error(link, "io.systemd.StorageProvider.ReadOnlyVolume", NULL); + if (fd < 0) + return fd; + + assert(p.read_only >= 0); /* flag is now definitely initialized to either true or false, not negative anymore */ + + int idx = sd_varlink_push_fd(link, fd); + if (idx < 0) + return idx; + + TAKE_FD(fd); + + return sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR_INTEGER("fileDescriptorIndex", idx), + SD_JSON_BUILD_PAIR_STRING("type", "blk"), + SD_JSON_BUILD_PAIR_BOOLEAN("readOnly", p.read_only)); +} + +static int vl_server(void) { + int r; + + _cleanup_(hashmap_freep) Hashmap *polkit_registry = NULL; + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *varlink_server = NULL; + r = varlink_server_new( + &varlink_server, + SD_VARLINK_SERVER_HANDLE_SIGINT| + SD_VARLINK_SERVER_HANDLE_SIGTERM| + SD_VARLINK_SERVER_ALLOW_FD_PASSING_OUTPUT| + SD_VARLINK_SERVER_INHERIT_USERDATA, + &polkit_registry); + if (r < 0) + return log_error_errno(r, "Failed to allocate Varlink server: %m"); + + r = sd_varlink_server_add_interface(varlink_server, &vl_interface_io_systemd_StorageProvider); + if (r < 0) + return log_error_errno(r, "Failed to add Varlink interface: %m"); + + r = sd_varlink_server_bind_method_many( + varlink_server, + "io.systemd.StorageProvider.Acquire", vl_method_acquire, + "io.systemd.StorageProvider.ListVolumes", vl_method_list_volumes, + "io.systemd.StorageProvider.ListTemplates", vl_method_list_templates); + if (r < 0) + return log_error_errno(r, "Failed to bind Varlink methods: %m"); + + r = sd_varlink_server_loop_auto(varlink_server); + if (r < 0) + return log_error_errno(r, "Failed to run Varlink event loop: %m"); + + return 0; +} + +static int help(void) { + int r; + + help_cmdline("[OPTIONS...]"); + help_abstract("Simple block device backed storage provider"); + + _cleanup_(table_unrefp) Table *options = NULL; + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + help_section("Options:"); + + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("systemd-storage-block", "8"); + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv }; + FOREACH_OPTION(c, &opts, /* on_error= */ return c) + switch (c) { + + OPTION_COMMON_HELP: + return help(); + + OPTION_COMMON_VERSION: + return version(); + } + + if (option_parser_get_n_args(&opts) > 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments."); + + return 1; +} + +static int run(int argc, char* argv[]) { + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + return vl_server(); +} + +DEFINE_MAIN_FUNCTION(run); diff --git a/src/storage/storage-util.c b/src/storage/storage-util.c new file mode 100644 index 0000000000000..793946c03a63e --- /dev/null +++ b/src/storage/storage-util.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "json-util.h" +#include "string-table.h" +#include "storage-util.h" + +static const char *volume_type_table[_VOLUME_TYPE_MAX] = { + [VOLUME_BLK] = "blk", + [VOLUME_REG] = "reg", + [VOLUME_DIR] = "dir", +}; + +static const char *create_mode_table[_CREATE_MODE_MAX] = { + [CREATE_ANY] = "any", + [CREATE_NEW] = "new", + [CREATE_OPEN] = "open", +}; + +DEFINE_STRING_TABLE_LOOKUP(volume_type, VolumeType); +DEFINE_STRING_TABLE_LOOKUP(create_mode, CreateMode); + +JSON_DISPATCH_ENUM_DEFINE(json_dispatch_volume_type, VolumeType, volume_type_from_string); +JSON_DISPATCH_ENUM_DEFINE(json_dispatch_create_mode, CreateMode, create_mode_from_string); diff --git a/src/storage/storage-util.h b/src/storage/storage-util.h new file mode 100644 index 0000000000000..f7a62aeec0835 --- /dev/null +++ b/src/storage/storage-util.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-json.h" + +#include "string-table-fundamental.h" +#include "string-util.h" + +/* This closely follows the kernel's inode type naming, i.e. is supposed to be a subset of what + * inode_type_from_string() parses. */ +typedef enum VolumeType { + VOLUME_BLK, + VOLUME_REG, + VOLUME_DIR, + _VOLUME_TYPE_MAX, + _VOLUME_TYPE_INVALID = -EINVAL, +} VolumeType; + +typedef enum CreateMode { + CREATE_ANY, + CREATE_NEW, + CREATE_OPEN, + _CREATE_MODE_MAX, + _CREATE_MODE_INVALID = -EINVAL, +} CreateMode; + +DECLARE_STRING_TABLE_LOOKUP(volume_type, VolumeType); +DECLARE_STRING_TABLE_LOOKUP(create_mode, CreateMode); + +int json_dispatch_volume_type(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); +int json_dispatch_create_mode(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata); + +static inline bool storage_volume_name_is_valid(const char *n) { + return string_is_safe(n, /* flags= */ 0); +} + +static inline bool storage_template_name_is_valid(const char *n) { + return string_is_safe(n, /* flags= */ 0); +} + +static inline bool storage_provider_name_is_valid(const char *n) { + return string_is_safe(n, STRING_FILENAME); +} diff --git a/units/meson.build b/units/meson.build index 622e1e69cf7c2..3cac3c876ae1c 100644 --- a/units/meson.build +++ b/units/meson.build @@ -804,6 +804,13 @@ units = [ 'conditions' : ['ENABLE_SYSUSERS'], 'symlinks' : ['sysinit.target.wants/'], }, + { + 'file' : 'systemd-storage-block.socket', + 'symlinks' : ['sockets.target.wants/'] + }, + { + 'file' : 'systemd-storage-block@.service.in', + }, { 'file' : 'systemd-storagetm.service.in', 'conditions' : ['ENABLE_STORAGETM'], diff --git a/units/systemd-storage-block.socket b/units/systemd-storage-block.socket new file mode 100644 index 0000000000000..1d18b481a375a --- /dev/null +++ b/units/systemd-storage-block.socket @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Simple Block Device Backed Storage Provider +Documentation=man:systemd-storage-block@..service(8) +DefaultDependencies=no +Before=sockets.target + +[Socket] +ListenStream=/run/systemd/io.systemd.StorageProvider/block +FileDescriptorName=varlink +SocketMode=0666 +Accept=yes +MaxConnectionsPerSource=16 + +[Install] +WantedBy=sockets.target diff --git a/units/systemd-storage-block@.service.in b/units/systemd-storage-block@.service.in new file mode 100644 index 0000000000000..801551e2ff802 --- /dev/null +++ b/units/systemd-storage-block@.service.in @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Simple Block Device Backed Storage Provider +Documentation=man:systemd-storage-block@.service(8) +DefaultDependencies=no +Conflicts=shutdown.target initrd-switch-root.target +Before=shutdown.target initrd-switch-root.target + +[Service] +ExecStart=-{{LIBEXECDIR}}/systemd-storage-block From 9825195c255e7818627a3d52c2544c9e635e14fd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 23 Apr 2026 09:00:46 +0200 Subject: [PATCH 016/242] storage: add systemd-storage-fs@.service provider Second StorageProvider implementation, exposing regular files and directories from a backing filesystem. In system mode the backing directory is /var/lib/storage/, in user mode $XDG_STATE_HOME/storage/; entries with a .volume suffix are exposed, with the inode type determining whether the volume is reported as reg, dir or (via symlinked/bind-mounted device node) blk. Unlike the block provider, this one supports creating volumes on-demand from a small set of built-in templates: sparse-file, allocated-file, directory and subvolume. --- man/rules/meson.build | 4 + man/systemd-storage-fs@.service.xml | 199 ++++++ src/storage/io.systemd.storage.policy | 10 + src/storage/meson.build | 8 +- src/storage/storage-fs.c | 807 ++++++++++++++++++++++ units/meson.build | 7 + units/systemd-storage-fs.socket | 25 + units/systemd-storage-fs@.service.in | 19 + units/user/meson.build | 7 + units/user/systemd-storage-fs.socket | 23 + units/user/systemd-storage-fs@.service.in | 15 + 11 files changed, 1123 insertions(+), 1 deletion(-) create mode 100644 man/systemd-storage-fs@.service.xml create mode 100644 src/storage/storage-fs.c create mode 100644 units/systemd-storage-fs.socket create mode 100644 units/systemd-storage-fs@.service.in create mode 100644 units/user/systemd-storage-fs.socket create mode 100644 units/user/systemd-storage-fs@.service.in diff --git a/man/rules/meson.build b/man/rules/meson.build index 439c33d5abdc1..7f4fa07f7ba77 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1190,6 +1190,10 @@ manpages = [ '8', ['systemd-storage-block', 'systemd-storage-block.socket'], ''], + ['systemd-storage-fs@.service', + '8', + ['systemd-storage-fs', 'systemd-storage-fs.socket'], + ''], ['systemd-storagetm.service', '8', ['systemd-storagetm'], 'ENABLE_STORAGETM'], ['systemd-stub', '7', diff --git a/man/systemd-storage-fs@.service.xml b/man/systemd-storage-fs@.service.xml new file mode 100644 index 0000000000000..4fe0734398c98 --- /dev/null +++ b/man/systemd-storage-fs@.service.xml @@ -0,0 +1,199 @@ + + + + + + + + systemd-storage-fs@.service + systemd + + + + systemd-storage-fs@.service + 8 + + + + systemd-storage-fs@.service + systemd-storage-fs.socket + systemd-storage-fs + Storage provider exposing regular files and directories as storage volumes + + + + systemd-storage-fs@.service + systemd-storage-fs.socket + + + + Description + + systemd-storage-fs@.service is a service that implements the + io.systemd.StorageProvider Varlink + interface, exposing regular files and directories in /var/lib/storage/*.volume (if + used in system mode) or $XDG_STATE_HOME/storage (when used in user mode) as storage + volumes. Acquired volumes are returned to the caller as file descriptors. Unlike + systemd-storage-block@.service8, + this implementation also supports creating new volumes on demand from a small set of built-in + templates. + + The service is socket-activated via systemd-storage-fs.socket. In system mode + it listens on the AF_UNIX socket /run/systemd/io.systemd.StorageProvider/fs, in user + mode on $XDG_RUNTIME_DIR/systemd/io.systemd.StorageProvider/fs. See + storagectl1 for an + enumeration tool. + + See also + systemd-storage-block@.service8 + for a complementary implementation that exposes local block devices as storage volumes. + + + + Volumes + + Volumes are stored below the storage directory: + + + /var/lib/storage/ when run in system mode. + + $XDG_STATE_HOME/storage/ (typically + ~/.local/state/storage/) when run in user mode. + + + Each volume on disk is stored as a directory entry with a .volume suffix in + the storage directory. Entries which are regular files are exposed as volumes of type + reg; entries which are directories are exposed as volumes of type + dir. Moreover, block device nodes may be symlinked (or bind mounted) into the + directory, which are then exposed as volumes of type blk. + + For directory volumes, the root of the file system passed to clients is placed in a subdirectory + root/ of the NAME.volume directory. The former (and all inodes + below it) must be owned by the foreign UID range, the latter by the host's root. + + When acquiring a volume, symlinks are followed. + + An administrator is permitted to freely manipulate the volume hierarchy directly as long as the + rules described above are followed. In particular, it's permitted to copy, mount or symlink arbitrary + external resources (regardless if directory, regular file or block) into the volume directory, so that + they are exposed as additional volumes. + + + + Templates + + The provider supports creating new volumes automatically when they are acquired. The caller may + select a template that determines configuration details of the volume to create. The + following built-in templates are available: + + + + sparse-file + + Creates a volume backed by a sparsely populated regular file. This is the default + template when creating a regular file volume. (Volume type is reg.) + + + + + + allocated-file + + Creates a volume backed by a fully allocated regular file. (Volume type is + reg.) + + + + + + directory + + Creates a volume backed by a regular directory. (Volume type is + dir.) + + + + + + subvolume + + Creates a btrfs subvolume as backing inode (falling back to a regular directory if + the storage directory is not on btrfs). This is the default template when creating a directory + volume. (Volume type is dir.) + + + + + + + + Options + + The following command-line options are understood: + + + + + + Operate in system mode. Volumes are stored below + /var/lib/storage/. This is the default when invoked from + systemd-storage-fs@.service in the system manager. + + + + + + + + Operate in user mode. Volumes are stored below + $XDG_STATE_HOME/storage/. This is the default when invoked from + systemd-storage-fs@.service in the user manager. + + + + + + + + + + + Files + + + + /var/lib/storage/ + $XDG_STATE_HOME/storage/ + + The storage directory used to back the system mode and user mode service + instances respectively. Each volume is stored as an entry with a + .volume suffix below this directory. + + + + + + /run/systemd/io.systemd.StorageProvider/fs + $XDG_RUNTIME_DIR/systemd/io.systemd.StorageProvider/fs + + AF_UNIX sockets the service listens on, in system and user mode + respectively. These are the canonical locations for the fs storage + provider, and are enumerated by storagectl providers. + + + + + + + + See Also + + systemd1 + storagectl1 + systemd-storage-block@.service8 + + + + diff --git a/src/storage/io.systemd.storage.policy b/src/storage/io.systemd.storage.policy index 06af278a5a428..7b25553501520 100644 --- a/src/storage/io.systemd.storage.policy +++ b/src/storage/io.systemd.storage.policy @@ -27,4 +27,14 @@ auth_admin_keep + + + Allow access to file system storage volumes + Authentication is required for an application to gain access to file system storage volume '$(name)'. + + auth_admin + auth_admin + auth_admin_keep + + diff --git a/src/storage/meson.build b/src/storage/meson.build index 714e50ad9a1ca..05c5e24ece4ac 100644 --- a/src/storage/meson.build +++ b/src/storage/meson.build @@ -3,7 +3,13 @@ executables += [ libexec_template + { 'name' : 'systemd-storage-block', - 'sources' : files('storage-block.c', 'storage-util.c'), + 'sources' : files('storage-block.c'), + 'extract' : files('storage-util.c') + }, + libexec_template + { + 'name' : 'systemd-storage-fs', + 'sources' : files('storage-fs.c'), + 'objects' : ['systemd-storage-block'], }, ] diff --git a/src/storage/storage-fs.c b/src/storage/storage-fs.c new file mode 100644 index 0000000000000..47ec9829c494d --- /dev/null +++ b/src/storage/storage-fs.c @@ -0,0 +1,807 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "sd-device.h" +#include "sd-json.h" + +#include "alloc-util.h" +#include "build.h" +#include "bus-polkit.h" +#include "chase.h" +#include "chattr-util.h" +#include "device-private.h" +#include "device-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-table.h" +#include "fs-util.h" +#include "hashmap.h" +#include "help-util.h" +#include "log.h" +#include "main-func.h" +#include "mount-util.h" +#include "options.h" +#include "path-lookup.h" +#include "path-util.h" +#include "recurse-dir.h" +#include "runtime-scope.h" +#include "stat-util.h" +#include "storage-util.h" +#include "string-table.h" +#include "tmpfile-util.h" +#include "uid-classification.h" +#include "varlink-io.systemd.StorageProvider.h" +#include "varlink-util.h" + +static RuntimeScope arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; + +/* For now we maintain a simple, compiled-in list of templates. One of those days we might want to move these + * into configurable drop-in files on disk. */ +typedef enum Template { + TEMPLATE_SPARSE_FILE, + TEMPLATE_ALLOCATED_FILE, + TEMPLATE_DIRECTORY, + TEMPLATE_SUBVOLUME, + _TEMPLATE_MAX, + _TEMPLATE_INVALID = -EINVAL, +} Template; + +static const char *template_table[_TEMPLATE_MAX] = { + [TEMPLATE_SPARSE_FILE] = "sparse-file", + [TEMPLATE_ALLOCATED_FILE] = "allocated-file", + [TEMPLATE_DIRECTORY] = "directory", + [TEMPLATE_SUBVOLUME] = "subvolume", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP(template, Template); + +static VolumeType volume_type_from_template(Template t) { + switch (t) { + + case TEMPLATE_SPARSE_FILE: + case TEMPLATE_ALLOCATED_FILE: + return VOLUME_REG; + + case TEMPLATE_DIRECTORY: + case TEMPLATE_SUBVOLUME: + return VOLUME_DIR; + + default: + return _VOLUME_TYPE_INVALID; + } +} + +static int open_storage_dir(void) { + int r; + + _cleanup_free_ char *state_dir = NULL; + r = state_directory_generic(arg_runtime_scope, /* suffix= */ NULL, &state_dir); + if (r < 0) + return log_error_errno(r, "Failed to get state directory path: %m"); + + _cleanup_close_ int state_fd = chase_and_open(state_dir, /* root= */ NULL, CHASE_TRIGGER_AUTOFS|CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY, O_CLOEXEC|O_CREAT|O_DIRECTORY, /* ret_path= */ NULL); + if (state_fd < 0) + return log_error_errno(state_fd, "Failed to open '%s': %m", state_dir); + + /* First we try to open the storage directory. If it exists this will work and we are happy. If we + * get ENOENT we'll try to create it. If that works, great. If we get EEXIST we'll try to reopen it + * again, to deal with other instances of ourselves racing with us. We only do this exactly once + * though, under the assumption that the dir is never removed, only created during runtime. */ + _cleanup_close_ int storage_fd = chase_and_openat(XAT_FDROOT, state_fd, "storage", CHASE_TRIGGER_AUTOFS|CHASE_MUST_BE_DIRECTORY, O_CLOEXEC|O_DIRECTORY, /* ret_path= */ NULL); + if (storage_fd == -ENOENT) { + storage_fd = xopenat_full(state_fd, "storage", O_EXCL|O_CREAT|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW, XO_LABEL|XO_SUBVOLUME, 0700); + if (storage_fd == -EEXIST) + storage_fd = chase_and_openat(XAT_FDROOT, state_fd, "storage", CHASE_TRIGGER_AUTOFS|CHASE_MUST_BE_DIRECTORY, O_CLOEXEC|O_DIRECTORY, /* ret_path= */ NULL); + } + if (storage_fd < 0) + return log_error_errno(storage_fd, "Failed to open '%s/storage/': %m", state_dir); + + return TAKE_FD(storage_fd); +} + +static int vl_method_list_volumes( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + int r; + + assert(link); + assert(FLAGS_SET(flags, SD_VARLINK_METHOD_MORE)); + + struct { + const char *match_name; + } p = {}; + + static const sd_json_dispatch_field dispatch_table[] = { + { "matchName", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, match_name), 0 }, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + _cleanup_close_ int fd = open_storage_dir(); + if (fd < 0) + return fd; + + _cleanup_free_ DirectoryEntries *dentries = NULL; + r = readdir_all(fd, RECURSE_DIR_SORT, &dentries); + if (r < 0) + return r; + + r = sd_varlink_set_sentinel(link, "io.systemd.StorageProvider.NoSuchVolume"); + if (r < 0) + return r; + + FOREACH_ARRAY(dp, dentries->entries, dentries->n_entries) { + struct dirent *d = *dp; + + if (!IN_SET(d->d_type, DT_REG, DT_DIR, DT_LNK, DT_BLK, DT_UNKNOWN)) + continue; + + const char *e = endswith(d->d_name, ".volume"); + if (!e) + continue; + + _cleanup_free_ char *n = strndup(d->d_name, e - d->d_name); + if (!n) + return log_oom_debug(); + + if (!storage_volume_name_is_valid(n)) + continue; + + if (p.match_name && fnmatch(p.match_name, n, FNM_NOESCAPE) != 0) + continue; + + _cleanup_close_ int pin_fd = -EBADF; + r = chaseat(XAT_FDROOT, fd, d->d_name, CHASE_TRIGGER_AUTOFS, /* ret_path= */ NULL, &pin_fd); + if (r < 0) { + log_debug_errno(r, "Failed to stat() '%s' in storage directory, ignoring: %m", d->d_name); + continue; + } + + struct stat st; + if (fstat(pin_fd, &st) < 0) + return log_debug_errno(errno, "Failed to stat() '%s' in storage directory: %m", d->d_name); + + uint64_t size = UINT64_MAX, used = UINT64_MAX; + bool ro = false; + + switch (st.st_mode & S_IFMT) { + case S_IFREG: + ro = (st.st_mode & 0222) == 0; + size = st.st_size; + used = (uint64_t) st.st_blocks * UINT64_C(512); + break; + + case S_IFDIR: + r = fd_is_read_only_fs(pin_fd); + if (r < 0) + log_debug_errno(r, "Failed to determine if '%s' is read-only, ignoring", d->d_name); + else + ro = r > 0; + break; + + case S_IFBLK: { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + + r = sd_device_new_from_stat_rdev(&dev, &st); + if (r < 0) + log_debug_errno(r, "Failed to acquire device for '%s', ignoring: %m", d->d_name); + else { + r = device_get_sysattr_bool(dev, "ro"); + if (r < 0) + log_device_debug_errno(dev, r, "Failed to get read/only state of '%s', ignoring: %m", d->d_name); + else + ro = r > 0; + + r = device_get_sysattr_u64(dev, "size", &size); + if (r < 0) + log_device_debug_errno(dev, r, "Failed to acquire size of device '%s', ignoring: %m", d->d_name); + else + /* the 'size' sysattr is always in multiples of 512, even on 4K sector block devices! */ + assert_se(MUL_ASSIGN_SAFE(&size, 512)); /* Overflow check for coverity */ + } + + break; + } + + default: + log_debug("Volume of unexpected inode type, ignoring: %s", d->d_name); + continue; + } + + r = sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR_STRING("name", n), + SD_JSON_BUILD_PAIR_STRING("type", inode_type_to_string(st.st_mode)), + SD_JSON_BUILD_PAIR_BOOLEAN("readOnly", ro), + SD_JSON_BUILD_PAIR_CONDITION(size != UINT64_MAX, "sizeBytes", SD_JSON_BUILD_UNSIGNED(size)), + SD_JSON_BUILD_PAIR_CONDITION(used != UINT64_MAX, "usedBytes", SD_JSON_BUILD_UNSIGNED(used))); + if (r < 0) + return r; + } + + return 0; +} + +static int vl_method_list_templates( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + int r; + + assert(link); + assert(FLAGS_SET(flags, SD_VARLINK_METHOD_MORE)); + + struct { + const char *match_name; + } p = {}; + + static const sd_json_dispatch_field dispatch_table[] = { + { "matchName", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, match_name), 0 }, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + r = sd_varlink_set_sentinel(link, "io.systemd.StorageProvider.NoSuchTemplate"); + if (r < 0) + return r; + + for (Template t = 0; t < _TEMPLATE_MAX; t++) { + const char *n = template_to_string(t); + + if (p.match_name && fnmatch(p.match_name, n, FNM_NOESCAPE) != 0) + continue; + + r = sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR_STRING("name", n), + SD_JSON_BUILD_PAIR_STRING("type", volume_type_to_string(volume_type_from_template(t)))); + if (r < 0) + return r; + } + + return 0; +} + +static int create_volume_dir( + int storage_fd, + const char *filename, + Template t) { + + int r; + + assert(storage_fd >= 0); + assert(filename); + + XOpenFlags xopen_flags; + switch (t) { + + case TEMPLATE_DIRECTORY: + xopen_flags = 0; + break; + + case TEMPLATE_SUBVOLUME: + xopen_flags = XO_SUBVOLUME; + break; + + default: + return -ENOMEDIUM; /* Recognizable error for: template doesn't apply here */ + } + + _cleanup_free_ char *tf = NULL; + r = tempfn_random(filename, /* extra= */ NULL, &tf); + if (r < 0) + return r; + + _cleanup_close_ int volume_fd = xopenat_full(storage_fd, tf, O_CREAT|O_EXCL|O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW, xopen_flags, 0700); + if (volume_fd < 0) + return volume_fd; + + _cleanup_close_ int root_fd = xopenat_full(volume_fd, "root", O_CREAT|O_EXCL|O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW, xopen_flags, 0755); + if (root_fd < 0) { + r = root_fd; + goto fail; + } + + r = RET_NERRNO(fchown(root_fd, FOREIGN_UID_MIN, FOREIGN_UID_MIN)); + if (r < 0) + goto fail; + + r = rename_noreplace(storage_fd, tf, storage_fd, filename); + if (r < 0) + goto fail; + + return TAKE_FD(root_fd); + +fail: + if (root_fd >= 0) { + assert(volume_fd >= 0); + root_fd = safe_close(root_fd); + (void) unlinkat(volume_fd, "root", AT_REMOVEDIR); + } + + if (volume_fd >= 0) { + volume_fd = safe_close(volume_fd); + (void) unlinkat(storage_fd, tf, AT_REMOVEDIR); + } + + return r; +} + +static int create_volume_reg( + int storage_fd, + const char *filename, + Template t, + uint64_t create_size) { + int r; + + assert(storage_fd >= 0); + assert(filename); + + bool sparse; + switch (t) { + + case TEMPLATE_SPARSE_FILE: + sparse = true; + break; + + case TEMPLATE_ALLOCATED_FILE: + sparse = false; + break; + + default: + return -ENOMEDIUM; /* Recognizable error for: template doesn't apply here */ + } + + _cleanup_free_ char *tf = NULL; + _cleanup_close_ int fd = open_tmpfile_linkable_at(storage_fd, filename, O_RDWR|O_CLOEXEC, &tf); + if (fd < 0) + return fd; + + CLEANUP_TMPFILE_AT(storage_fd, tf); + + r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL); + if (r < 0 && !ERRNO_IS_IOCTL_NOT_SUPPORTED(r)) + return r; + + if (create_size > 0) { + if (sparse) + r = RET_NERRNO(ftruncate(fd, create_size)); + else + r = RET_NERRNO(fallocate(fd, /* mode= */ 0, /* offset= */ 0, create_size)); + if (r < 0) + return r; + } + + r = RET_NERRNO(fchmod(fd, 0600)); + if (r < 0) + return r; + + r = link_tmpfile_at(fd, storage_fd, tf, filename, /* flags= */ 0); + if (r < 0) + return r; + + tf = mfree(tf); /* disarm clean-up */ + + return TAKE_FD(fd); +} + +static int vl_method_acquire( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + Hashmap **polkit_registry = ASSERT_PTR(userdata); + int r; + + assert(link); + + struct { + const char *name; + CreateMode create_mode; + const char *template; + int read_only; + VolumeType request_as; + uint64_t create_size; + } p = { + .create_mode = CREATE_ANY, + .read_only = -1, + .request_as = _VOLUME_TYPE_INVALID, + .create_size = UINT64_MAX, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, name), SD_JSON_MANDATORY }, + { "createMode", SD_JSON_VARIANT_STRING, json_dispatch_create_mode, voffsetof(p, create_mode), 0 }, + { "template", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, template), 0 }, + { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_tristate, voffsetof(p, read_only), 0 }, + { "requestAs", SD_JSON_VARIANT_STRING, json_dispatch_volume_type, voffsetof(p, request_as), 0 }, + { "createSizeBytes", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, create_size), 0 }, + VARLINK_DISPATCH_POLKIT_FIELD, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + if (!storage_volume_name_is_valid(p.name)) + return sd_varlink_error_invalid_parameter_name(link, "name"); + + if (!IN_SET(p.create_mode, CREATE_ANY, CREATE_OPEN, CREATE_NEW)) + return sd_varlink_error(link, "io.systemd.StorageProvider.CreateNotSupported", NULL); + + /* off_t is signed, hence refuse overly long requests */ + if (p.create_size != UINT64_MAX && p.create_size > INT64_MAX) + return sd_varlink_error_invalid_parameter_name(link, "createSizeBytes"); + + Template t = _TEMPLATE_INVALID; + if (!isempty(p.template)) { + if (!storage_template_name_is_valid(p.template)) + return sd_varlink_error_invalid_parameter_name(link, "template"); + + t = template_from_string(p.template); + if (t < 0) + return sd_varlink_error(link, "io.systemd.StorageProvider.NoSuchTemplate", NULL); + } + + if (p.read_only > 0) { + if (p.create_mode == CREATE_NEW) + return sd_varlink_error_invalid_parameter_name(link, "readOnly"); + + p.create_mode = CREATE_OPEN; + } + + /* Add a suffix so that we are never attempted to open a temporary file assuming it was a valid + * volume. */ + _cleanup_free_ char *filename = strjoin(p.name, ".volume"); + if (!filename) + return log_oom_debug(); + + if (!filename_is_valid(filename)) + return sd_varlink_error_invalid_parameter_name(link, "name"); + + if (arg_runtime_scope != RUNTIME_SCOPE_USER) { + const char *details[] = { + "name", p.name, + NULL + }; + + r = varlink_verify_polkit_async( + link, + /* bus= */ NULL, + "io.systemd.storage.fs.acquire", + details, + polkit_registry); + if (r <= 0) + return r; + } + + _cleanup_close_ int storage_fd = open_storage_dir(); + if (storage_fd < 0) + return storage_fd; + + _cleanup_close_ int pin_fd = -EBADF, real_fd = -EBADF; + r = chaseat(XAT_FDROOT, storage_fd, filename, CHASE_TRIGGER_AUTOFS, /* ret_path= */ NULL, &pin_fd); + if (r < 0) { + if (r != -ENOENT) + return r; + if (p.create_mode == CREATE_OPEN || p.read_only > 0) + return sd_varlink_error(link, "io.systemd.StorageProvider.NoSuchVolume", NULL); + + /* Doesn't exist yet: create it now */ + + if (p.request_as < 0) /* Make a choice: pick default type */ + p.request_as = t < 0 ? VOLUME_DIR : volume_type_from_template(t); + + /* Try to create the volume */ + switch (p.request_as) { + + case VOLUME_DIR: { + + if (t < 0) /* Make a choice: pick default template */ + t = TEMPLATE_SUBVOLUME; + + real_fd = create_volume_dir(storage_fd, filename, t); + break; + } + + case VOLUME_REG: { + if (p.create_size == UINT64_MAX) + return sd_varlink_error(link, "io.systemd.StorageProvider.CreateSizeRequired", NULL); + + if (t < 0) /* Make a choice: pick default template */ + t = TEMPLATE_SPARSE_FILE; + + real_fd = create_volume_reg(storage_fd, filename, t, p.create_size); + break; + } + + case VOLUME_BLK: + /* We don't support creating block devices, we only support if they are symlinked + * into the storage directory. */ + return sd_varlink_error(link, "io.systemd.StorageProvider.CreateNotSupported", NULL); + + default: + assert_not_reached(); + } + + if (real_fd == -ENOMEDIUM) + return sd_varlink_error(link, "io.systemd.StorageProvider.BadTemplate", NULL); + if (real_fd == -EEXIST) { + if (p.create_mode == CREATE_NEW) + return sd_varlink_error(link, "io.systemd.StorageProvider.VolumeExists", NULL); + + /* If we failed to open the volume and reached this point, then the volume already + * exists by now (i.e. we ran into a race). In that case, try to pin it a second time + * (but only once, let's never loop around this). */ + r = chaseat(XAT_FDROOT, storage_fd, filename, CHASE_TRIGGER_AUTOFS, /* ret_path= */ NULL, &pin_fd); + if (r < 0) + return r; + } else if (real_fd < 0) + return real_fd; + + } else if (p.create_mode == CREATE_NEW) + return sd_varlink_error(link, "io.systemd.StorageProvider.VolumeExists", NULL); + + /* At this point, we either already opened the real fd, or we managed to pin it (but not both) */ + assert((real_fd >= 0) != (pin_fd >= 0)); + + /* Let's first settle the volume type */ + struct stat st; + if (fstat(real_fd >= 0 ? real_fd : pin_fd, &st) < 0) + return -errno; + + if (p.request_as == VOLUME_REG) { + /* First, check for the other supported types and generate a nice error */ + if (IN_SET(st.st_mode & S_IFMT, S_IFDIR, S_IFBLK)) + return sd_varlink_error(link, "io.systemd.StorageProvider.WrongType", NULL); + + /* Second verify cover all other types */ + r = stat_verify_regular(&st); + if (r < 0) + return r; + } else if (p.request_as == VOLUME_DIR) { + if (IN_SET(st.st_mode & S_IFMT, S_IFREG, S_IFBLK)) + return sd_varlink_error(link, "io.systemd.StorageProvider.WrongType", NULL); + + r = stat_verify_directory(&st); + if (r < 0) + return r; + } else if (p.request_as == VOLUME_BLK) { + if (IN_SET(st.st_mode & S_IFMT, S_IFREG, S_IFDIR)) + return sd_varlink_error(link, "io.systemd.StorageProvider.WrongType", NULL); + + r = stat_verify_block(&st); + if (r < 0) + return r; + + } else if (S_ISREG(st.st_mode)) + p.request_as = VOLUME_REG; + else if (S_ISDIR(st.st_mode)) + p.request_as = VOLUME_DIR; + else if (S_ISBLK(st.st_mode)) + p.request_as = VOLUME_BLK; + else + return log_debug_errno(SYNTHETIC_ERRNO(EBADF), "Unexpected inode type, refusing."); + + /* Let's now acquire a real fd for the pinned fd, if we still need to */ + if (real_fd < 0) { + assert(pin_fd >= 0); + + XOpenFlags xopen_flags = + (p.read_only < 0 && !S_ISDIR(st.st_mode) ? XO_AUTO_RW_RO : 0); + int open_flags = + (p.read_only < 0 ? 0 : (p.read_only > 0 || S_ISDIR(st.st_mode) ? O_RDONLY : O_RDWR)); + + const char *subdir = NULL; + if (p.request_as == VOLUME_DIR) { + /* We place the root of the directory tree one level down, to separate ownership of + * the inode: the upper inode is owned by the host, the lower one by the volume. This + * matters so that the host one can be owned by the host's root, and the volume one + * by the foreign UID range. */ + subdir = "root"; + open_flags |= O_DIRECTORY|O_NOFOLLOW; + } + + real_fd = xopenat_full(pin_fd, subdir, open_flags|O_CLOEXEC, xopen_flags, /* mode= */ MODE_INVALID); + if (real_fd < 0) + return log_debug_errno(real_fd, "Failed to reopen volume fd for '%s': %m", filename); + + /* In directory mode we might be looking at a different inode node, refresh the stat data */ + if (p.request_as == VOLUME_DIR && fstat(real_fd, &st) < 0) + return -errno; + } + + assert(real_fd >= 0); + + bool ro; + switch (p.request_as) { + + case VOLUME_REG: + case VOLUME_BLK: { + assert(IN_SET(st.st_mode & S_IFMT, S_IFREG, S_IFBLK)); + + int open_flags = fcntl(real_fd, F_GETFL, 0); + if (open_flags < 0) + return -errno; + + ro = (open_flags & O_ACCMODE_STRICT) == O_RDONLY; + break; + } + + case VOLUME_DIR: { + assert(S_ISDIR(st.st_mode)); + + if (!uid_is_foreign(st.st_uid) || + !gid_is_foreign(st.st_gid)) + return log_debug_errno(SYNTHETIC_ERRNO(EPERM), "Storage directory not owned by foreign UID/GID range."); + + /* Let's now generate a new mount for the directory tree, where propagation is disabled, and the + * flags are all set to good defaults */ + _cleanup_close_ int mount_fd = open_tree_attr_with_fallback( + real_fd, + /* path= */ NULL, + OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW, + &(struct mount_attr) { + .attr_set = (p.read_only > 0 ? MOUNT_ATTR_RDONLY : 0), + .attr_clr = MOUNT_ATTR_NOSUID|MOUNT_ATTR_NOEXEC|MOUNT_ATTR_NODEV, + .propagation = MS_PRIVATE, + }); + if (mount_fd < 0) + return log_debug_errno(mount_fd, "Failed to generate per-volume mount: %m"); + + /* Let's turn on propagation again now that it is disconnected, simply because MS_SHARED is + * generally the default for everything we return. */ + + if (mount_setattr(mount_fd, "", AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW, + &(struct mount_attr) { + .propagation = MS_SHARED, + }, MOUNT_ATTR_SIZE_VER0) < 0) + return log_debug_errno(errno, "Failed to enable propagation on per-volume mount: %m"); + + close_and_replace(real_fd, mount_fd); + + r = fd_is_read_only_fs(real_fd); + if (r < 0) + return r; + + ro = r > 0; + break; + } + + default: + assert_not_reached(); + } + + if (p.read_only == 0 && ro) + return sd_varlink_error(link, "io.systemd.StorageProvider.ReadOnlyVolume", NULL); + + int idx = sd_varlink_push_fd(link, real_fd); + if (idx < 0) + return idx; + + TAKE_FD(real_fd); + + return sd_varlink_replybo( + link, + SD_JSON_BUILD_PAIR_INTEGER("fileDescriptorIndex", idx), + SD_JSON_BUILD_PAIR_STRING("type", inode_type_to_string(st.st_mode)), + SD_JSON_BUILD_PAIR_BOOLEAN("readOnly", ro), + SD_JSON_BUILD_PAIR_CONDITION(p.request_as == VOLUME_DIR, "baseUID", SD_JSON_BUILD_INTEGER(FOREIGN_UID_BASE)), + SD_JSON_BUILD_PAIR_CONDITION(p.request_as == VOLUME_DIR, "baseGID", SD_JSON_BUILD_INTEGER(FOREIGN_UID_BASE))); +} + +static int vl_server(void) { + int r; + + _cleanup_(hashmap_freep) Hashmap *polkit_registry = NULL; + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *varlink_server = NULL; + r = varlink_server_new( + &varlink_server, + SD_VARLINK_SERVER_HANDLE_SIGINT| + SD_VARLINK_SERVER_HANDLE_SIGTERM| + SD_VARLINK_SERVER_ALLOW_FD_PASSING_OUTPUT| + SD_VARLINK_SERVER_INHERIT_USERDATA, + &polkit_registry); + if (r < 0) + return log_error_errno(r, "Failed to allocate Varlink server: %m"); + + r = sd_varlink_server_add_interface(varlink_server, &vl_interface_io_systemd_StorageProvider); + if (r < 0) + return log_error_errno(r, "Failed to add Varlink interface: %m"); + + r = sd_varlink_server_bind_method_many( + varlink_server, + "io.systemd.StorageProvider.Acquire", vl_method_acquire, + "io.systemd.StorageProvider.ListVolumes", vl_method_list_volumes, + "io.systemd.StorageProvider.ListTemplates", vl_method_list_templates); + if (r < 0) + return log_error_errno(r, "Failed to bind Varlink methods: %m"); + + r = sd_varlink_server_loop_auto(varlink_server); + if (r < 0) + return log_error_errno(r, "Failed to run Varlink event loop: %m"); + + return 0; +} + +static int help(void) { + int r; + + help_cmdline("[OPTIONS...]"); + help_abstract("Simple file system backed storage provider"); + + _cleanup_(table_unrefp) Table *options = NULL; + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + help_section("Options:"); + + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("systemd-storage-fs", "8"); + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv }; + FOREACH_OPTION(c, &opts, /* on_error= */ return c) + switch (c) { + + OPTION_COMMON_HELP: + return help(); + + OPTION_COMMON_VERSION: + return version(); + + OPTION_LONG("system", NULL, "Operate in system mode"): + arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; + break; + + OPTION_LONG("user", NULL, "Operate in user mode"): + arg_runtime_scope = RUNTIME_SCOPE_USER; + break; + } + + if (option_parser_get_n_args(&opts) > 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments."); + + return 1; +} + +static int run(int argc, char* argv[]) { + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + return vl_server(); +} + +DEFINE_MAIN_FUNCTION(run); diff --git a/units/meson.build b/units/meson.build index 3cac3c876ae1c..0f7ce75bd8967 100644 --- a/units/meson.build +++ b/units/meson.build @@ -811,6 +811,13 @@ units = [ { 'file' : 'systemd-storage-block@.service.in', }, + { + 'file' : 'systemd-storage-fs.socket', + 'symlinks' : ['sockets.target.wants/'] + }, + { + 'file' : 'systemd-storage-fs@.service.in', + }, { 'file' : 'systemd-storagetm.service.in', 'conditions' : ['ENABLE_STORAGETM'], diff --git a/units/systemd-storage-fs.socket b/units/systemd-storage-fs.socket new file mode 100644 index 0000000000000..c83cf0a11fda8 --- /dev/null +++ b/units/systemd-storage-fs.socket @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Simple File System Backed Storage Provider +Documentation=man:systemd-storage-fs@.service(8) +DefaultDependencies=no +RequiresMountsFor=/var/lib/storage +Before=sockets.target + +[Socket] +ListenStream=/run/systemd/io.systemd.StorageProvider/fs +FileDescriptorName=varlink +SocketMode=0666 +Accept=yes +MaxConnectionsPerSource=16 + +[Install] +WantedBy=sockets.target diff --git a/units/systemd-storage-fs@.service.in b/units/systemd-storage-fs@.service.in new file mode 100644 index 0000000000000..39b6da36ee76b --- /dev/null +++ b/units/systemd-storage-fs@.service.in @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Simple File System Backed Storage Provider +Documentation=man:systemd-storage-fs@.service(8) +DefaultDependencies=no +RequiresMountsFor=/var/lib/storage +Conflicts=shutdown.target initrd-switch-root.target +Before=shutdown.target initrd-switch-root.target + +[Service] +ExecStart=-{{LIBEXECDIR}}/systemd-storage-fs diff --git a/units/user/meson.build b/units/user/meson.build index a9c6d44281c28..39c41a4c1cd8c 100644 --- a/units/user/meson.build +++ b/units/user/meson.build @@ -61,6 +61,13 @@ units = [ 'file' : 'systemd-journalctl.socket', 'symlinks' : ['sockets.target.wants/'], }, + { + 'file' : 'systemd-storage-fs.socket', + 'symlinks' : ['sockets.target.wants/'] + }, + { + 'file' : 'systemd-storage-fs@.service.in', + }, { 'file' : 'systemd-tmpfiles-clean.service' }, { 'file' : 'systemd-tmpfiles-clean.timer' }, { 'file' : 'systemd-tmpfiles-setup.service' }, diff --git a/units/user/systemd-storage-fs.socket b/units/user/systemd-storage-fs.socket new file mode 100644 index 0000000000000..fa8018b2e8552 --- /dev/null +++ b/units/user/systemd-storage-fs.socket @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Simple File System Backed Storage Provider +Documentation=man:systemd-storage-fs.service(8) +Before=sockets.target + +[Socket] +ListenStream=%t/systemd/io.systemd.StorageProvider/fs +FileDescriptorName=varlink +SocketMode=0600 +Accept=yes +MaxConnectionsPerSource=16 + +[Install] +WantedBy=sockets.target diff --git a/units/user/systemd-storage-fs@.service.in b/units/user/systemd-storage-fs@.service.in new file mode 100644 index 0000000000000..95afa9165fa5f --- /dev/null +++ b/units/user/systemd-storage-fs@.service.in @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Simple File System Backed Storage Provider +Documentation=man:systemd-storage-fs.service(8) + +[Service] +ExecStart=-{{LIBEXECDIR}}/systemd-storage-fs --user From 4002a7e8be0965b3bbc4aa1833298314464778d8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 22 Apr 2026 23:44:04 +0200 Subject: [PATCH 017/242] storage: add 'storagectl' command-line tool CLI for inspecting and using storage providers. Scans /run/systemd/io.systemd.StorageProvider/ (or the user-mode equivalent) for AF_UNIX sockets and talks to each one over Varlink. Verbs: "volumes" lists volumes across all providers, "templates" lists supported creation templates, "providers" lists the endpoints themselves. Also installed as a mount.storage helper, so 'mount -t storage PROVIDER:VOLUME /mnt' (or 'mount -t storage.' to put a fresh filesystem on a block volume) acquires the volume and mounts it. Ships with bash/zsh completions and a man page. --- man/rules/meson.build | 1 + man/storagectl.xml | 281 +++++++++++ shell-completion/bash/meson.build | 1 + shell-completion/bash/storagectl | 74 +++ shell-completion/zsh/_storagectl | 35 ++ shell-completion/zsh/meson.build | 1 + src/storage/meson.build | 10 + src/storage/storagectl.c | 812 ++++++++++++++++++++++++++++++ 8 files changed, 1215 insertions(+) create mode 100644 man/storagectl.xml create mode 100644 shell-completion/bash/storagectl create mode 100644 shell-completion/zsh/_storagectl create mode 100644 src/storage/storagectl.c diff --git a/man/rules/meson.build b/man/rules/meson.build index 7f4fa07f7ba77..719838064c02f 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -972,6 +972,7 @@ manpages = [ ['sd_watchdog_enabled', '3', [], ''], ['shutdown', '8', [], ''], ['smbios-type-11', '7', [], ''], + ['storagectl', '1', ['mount.storage'], ''], ['sysctl.d', '5', [], ''], ['sysext.conf', '5', diff --git a/man/storagectl.xml b/man/storagectl.xml new file mode 100644 index 0000000000000..5fddf3ca08db5 --- /dev/null +++ b/man/storagectl.xml @@ -0,0 +1,281 @@ + + + + + + + + storagectl + systemd + + + + storagectl + 1 + + + + storagectl + mount.storage + Enumerate and mount storage volumes provided by storage providers + + + + + storagectl + OPTIONS + COMMAND + NAME + + + + mount + -t + storage + PROVIDER:VOLUME + DIRECTORY + + + + mount + -t + storage.FSTYPE + PROVIDER:VOLUME + DIRECTORY + + + + + Description + + storagectl may be used to inspect storage providers and the storage + volumes they expose. A storage provider is a service implementing the + io.systemd.StorageProvider Varlink + interface, registered as an AF_UNIX socket below the well-known socket directory + /run/systemd/io.systemd.StorageProvider/ (in system mode) or + $XDG_RUNTIME_DIR/systemd/io.systemd.StorageProvider/ (in user mode). The two + storage providers shipped with systemd are + systemd-storage-block@.service8, + which exposes the system's block devices, and + systemd-storage-fs@.service8, + which exposes regular files and directories from a backing file system. + + The tool also provides a mount8 helper + for the file system type storage, which permits mounting storage volumes to arbitrary + places. See "Use as a mount helper" below for details. + + + + Commands + + The following commands are understood: + + + + + volumes GLOB + + List storage volumes provided by all storage providers running on the + system (or, with , in the user runtime). The optional + GLOB argument is a shell-style pattern (see + fnmatch3) + that filters the result by volume name. The output is a table containing the providing + service, the volume name, its type (blk, reg or + dir), whether it is read-only, and — if known — its size and the number + of bytes used. + + This is the default command if none is specified. + + + + + + templates GLOB + + List volume templates supported by the running storage providers. Templates + encapsulate a configuration to use when creating volumes on-the-fly, when they are acquired. Template + support is an optional feature for providers, and only applies to providers that allow creation + of volumes on-the-fly. See the respective provider documentation for details, for example + systemd-storage-fs@.service8. The + optional GLOB argument filters by template name. Storage providers that do + not implement template-based volume creation (such as the block-device provider) do not contribute to + this output. + + + + + + providers + + List the storage providers known to the system. This is determined by scanning the + well-known socket directory for AF_UNIX sockets that look like + io.systemd.StorageProvider endpoints. For each provider it is also reported + whether the socket can currently be connected to. + + + + + + + + Options + + The following options are understood: + + + + + + Operate on system-wide storage providers. Sockets are looked for in + /run/systemd/io.systemd.StorageProvider/. This is the default. + + + + + + + + Operate on per-user storage providers. Sockets are looked for in + $XDG_RUNTIME_DIR/systemd/io.systemd.StorageProvider/. + + + + + + + + + + + + + + + Use as a mount helper + + The tool provides the /sbin/mount.storage alias, implementing the + mount8 + "external helper" interface, allowing storage volumes to be mounted with the regular + mount command. The volume to mount is encoded as the source of the mount, + in the form + PROVIDER:VOLUME, where + PROVIDER is the name of a storage provider (as listed by + storagectl providers) and VOLUME is the volume + name. Two file system type spellings are recognized: + + + + storage + + Acquires a directory volume and bind-mounts its directory tree onto the + target. + + + + + + storage.FSTYPE + + Acquires a regular file or block device volume and mounts it as a file system of type + FSTYPE (for example storage.ext4, + storage.btrfs, …). + + + + + + The standard mount options are forwarded to + mount. In addition, the following storage.-prefixed + options are interpreted by mount.storage itself and stripped from the + forwarded list: + + + + MODE + + Takes one of any (open if it exists, otherwise create — the + default), open (fail if the volume does not yet exist) or new + (fail if the volume already exists). + + + + + + NAME + + The template to use when creating a new volume, if it is missing and the provider + supports on-the-fly creation of volumes. + + + + + + BYTES + + When creating a new volume on-the-fly, the size in bytes to allocate. Accepts the + usual K/M/G/T suffixes + (base 1024). Required when creating a regular file volume. + + + + + + + + + Examples + + + Enumerate available storage providers, volumes and templates + + $ storagectl providers +$ storagectl volumes +$ storagectl volumes '*foo*' +$ storagectl templates + + + + Mount a directory volume from the file system provider + + # mount -t storage fs:myvol /mnt/myvol + + If the volume myvol does not yet exist, it will be created using + the default subvolume template. + + + + Create and mount an ext4 file system from a regular file. + + # mount -t storage.ext4 fs:scratch /mnt/scratch -o loop + + + + Mount a block device volume read-only + + # mount -t storage.ext4 -o ro block:/dev/disk/by-id/usb-foo /mnt/foo + + + + + Exit status + + On success, 0 is returned, a non-zero failure code otherwise. + + + + + + See Also + + systemd1 + systemd-storage-block@.service8 + systemd-storage-fs@.service8 + varlinkctl1 + mount8 + + + + diff --git a/shell-completion/bash/meson.build b/shell-completion/bash/meson.build index 154910979ea56..b0e56608e8f37 100644 --- a/shell-completion/bash/meson.build +++ b/shell-completion/bash/meson.build @@ -36,6 +36,7 @@ foreach item : [ ['portablectl', 'ENABLE_PORTABLED'], ['resolvectl', 'ENABLE_RESOLVE'], ['run0', ''], + ['storagectl', ''], ['systemd-analyze', ''], ['systemd-cat', ''], ['systemd-cgls', ''], diff --git a/shell-completion/bash/storagectl b/shell-completion/bash/storagectl new file mode 100644 index 0000000000000..5aefc30ed162d --- /dev/null +++ b/shell-completion/bash/storagectl @@ -0,0 +1,74 @@ +# shellcheck shell=bash +# storagectl(1) completion -*- shell-script -*- +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. +# +# systemd is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with systemd; If not, see . + +__contains_word () { + local w word=$1; shift + for w in "$@"; do + [[ $w = "$word" ]] && return + done +} + +_storagectl() { + local i verb comps + local cur=${COMP_WORDS[COMP_CWORD]} prev=${COMP_WORDS[COMP_CWORD-1]} + + local -A OPTS=( + [STANDALONE]='-h --help --version --no-pager --no-legend --no-ask-password + --system --user' + [ARG]='--json' + ) + + if __contains_word "$prev" ${OPTS[ARG]}; then + case $prev in + --json) + comps=$( storagectl --json=help 2>/dev/null ) + ;; + esac + COMPREPLY=( $(compgen -W '$comps' -- "$cur") ) + return 0 + fi + + if [[ "$cur" = -* ]]; then + COMPREPLY=( $(compgen -W '${OPTS[*]}' -- "$cur") ) + return 0 + fi + + local -A VERBS=( + [STANDALONE]='volumes templates providers help' + ) + + for ((i=0; i < COMP_CWORD; i++)); do + if __contains_word "${COMP_WORDS[i]}" ${VERBS[*]} && + ! __contains_word "${COMP_WORDS[i-1]}" ${OPTS[ARG]}; then + verb=${COMP_WORDS[i]} + break + fi + done + + if [[ -z ${verb-} ]]; then + comps=${VERBS[*]} + elif __contains_word "$verb" ${VERBS[STANDALONE]}; then + comps='' + fi + + COMPREPLY=( $(compgen -W '$comps' -- "$cur") ) + return 0 +} + +complete -F _storagectl storagectl diff --git a/shell-completion/zsh/_storagectl b/shell-completion/zsh/_storagectl new file mode 100644 index 0000000000000..b2fdf595a1076 --- /dev/null +++ b/shell-completion/zsh/_storagectl @@ -0,0 +1,35 @@ +#compdef storagectl +# SPDX-License-Identifier: LGPL-2.1-or-later + +(( $+functions[_storagectl_commands] )) || _storagectl_commands() +{ + local -a _storagectl_cmds + _storagectl_cmds=( + "volumes:List storage volumes" + "templates:List storage volume templates" + "providers:List storage providers" + "help:Prints a short help text and exits" + ) + if (( CURRENT == 1 )); then + _describe -t commands 'storagectl command' _storagectl_cmds + else + local curcontext="$curcontext" + cmd="${${_storagectl_cmds[(r)$words[1]:*]%%:*}}" + if (( $+functions[_storagectl_$cmd] )); then + _storagectl_$cmd + else + _message "no more options" + fi + fi +} + +_arguments \ + '(- *)'{-h,--help}'[Prints a short help text and exits.]' \ + '(- *)--version[Prints a short version string and exits.]' \ + '--no-pager[Do not pipe output into a pager]' \ + '--no-legend[Do not show the headers and footers]' \ + '--no-ask-password[Do not query the user for authentication]' \ + '--json=[Show output as JSON]:mode:(pretty short off help)' \ + '--system[Operate in system mode]' \ + '--user[Operate in user mode]' \ + '*::storagectl command:_storagectl_commands' diff --git a/shell-completion/zsh/meson.build b/shell-completion/zsh/meson.build index b1bff151e41a3..6cc8a2d57f83e 100644 --- a/shell-completion/zsh/meson.build +++ b/shell-completion/zsh/meson.build @@ -33,6 +33,7 @@ foreach item : [ ['_sd_machines', 'ENABLE_MACHINED'], ['_sd_outputmodes', ''], ['_sd_unit_files', ''], + ['_storagectl', ''], ['_systemd', ''], ['_systemd-analyze', ''], ['_systemd-delta', ''], diff --git a/src/storage/meson.build b/src/storage/meson.build index 05c5e24ece4ac..21456141dec8c 100644 --- a/src/storage/meson.build +++ b/src/storage/meson.build @@ -11,7 +11,17 @@ executables += [ 'sources' : files('storage-fs.c'), 'objects' : ['systemd-storage-block'], }, + executable_template + { + 'name' : 'storagectl', + 'public' : true, + 'sources' : files('storagectl.c'), + 'objects' : ['systemd-storage-block'], + }, ] +install_symlink('mount.storage', + pointing_to : sbin_to_bin + 'storagectl', + install_dir : sbindir) + install_data('io.systemd.storage.policy', install_dir : polkitpolicydir) diff --git a/src/storage/storagectl.c b/src/storage/storagectl.c new file mode 100644 index 0000000000000..a21072e78f204 --- /dev/null +++ b/src/storage/storagectl.c @@ -0,0 +1,812 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-varlink.h" + +#include +#include +#include +#include + +#include "alloc-util.h" +#include "ansi-color.h" +#include "argv-util.h" +#include "build.h" +#include "bus-util.h" +#include "errno-list.h" +#include "escape.h" +#include "extract-word.h" +#include "fd-util.h" +#include "format-table.h" +#include "format-util.h" +#include "help-util.h" +#include "json-util.h" +#include "main-func.h" +#include "mount-util.h" +#include "namespace-util.h" +#include "options.h" +#include "parse-argument.h" +#include "parse-util.h" +#include "path-lookup.h" +#include "path-util.h" +#include "polkit-agent.h" +#include "recurse-dir.h" +#include "runtime-scope.h" +#include "socket-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "storage-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" +#include "varlink-util.h" +#include "verbs.h" + +static sd_json_format_flags_t arg_json_format_flags = SD_JSON_FORMAT_OFF; +static PagerFlags arg_pager_flags = 0; +static bool arg_legend = true; +static bool arg_ask_password = true; +static RuntimeScope arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; + +static int help(void) { + int r; + + help_cmdline("[OPTIONS...] COMMAND"); + help_abstract("Enumerate storage volumes and providers."); + + _cleanup_(table_unrefp) Table *verbs = NULL; + r = verbs_get_help_table(&verbs); + if (r < 0) + return r; + + _cleanup_(table_unrefp) Table *options = NULL; + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + (void) table_sync_column_widths(0, verbs, options); + + help_section("Commands:"); + + r = table_print_or_warn(verbs); + if (r < 0) + return r; + + help_section("Options:"); + + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("storagectl", "1"); + return 0; +} + +VERB_COMMON_HELP_HIDDEN(help); + +static const char *ro_color(int ro) { + if (ro > 0) + return ansi_highlight_red(); + if (ro == 0) + return ansi_highlight_green(); + + return NULL; +} + +static int on_list_reply( + sd_varlink *link, + sd_json_variant *parameters, + const char *error_id, + sd_varlink_reply_flags_t flags, + void* userdata) { + + Table *t = ASSERT_PTR(userdata); + int r; + + assert(link); + + const char *d = ASSERT_PTR(sd_varlink_get_description(link)); + + if (error_id) { + log_debug("%s: Received error '%s', ignoring.", d, error_id); + return 0; + } + + _cleanup_free_ char *provider = NULL; + r = path_extract_filename(d, &provider); + if (r < 0) + return log_error_errno(r, "Failed to extract provider name from socket path: %m"); + + struct { + const char *name; + const char *type; + int read_only; + uint64_t size_bytes; + uint64_t used_bytes; + } p = { + .read_only = -1, + .size_bytes = UINT64_MAX, + .used_bytes = UINT64_MAX, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, name), 0 }, + { "type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, type), 0 }, + { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_tristate, voffsetof(p, read_only), 0 }, + { "sizeBytes", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, size_bytes), 0 }, + { "usedBytes", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, used_bytes), 0 }, + {} + }; + + r = sd_json_dispatch(parameters, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &p); + if (r < 0) + return log_error_errno(r, "Failed to decode List() reply: %m"); + + r = table_add_many( + t, + TABLE_STRING, provider, + TABLE_STRING, p.name, + TABLE_STRING, p.type, + TABLE_TRISTATE, p.read_only, + TABLE_SET_COLOR, ro_color(p.read_only)); + if (r < 0) + return table_log_add_error(r); + + if (p.size_bytes == UINT64_MAX) + r = table_add_many(t, TABLE_EMPTY, TABLE_SET_ALIGN_PERCENT, 100); + else + r = table_add_many(t, TABLE_SIZE, p.size_bytes, TABLE_SET_ALIGN_PERCENT, 100); + if (r < 0) + return table_log_add_error(r); + + if (p.used_bytes == UINT64_MAX) + r = table_add_many(t, TABLE_EMPTY, TABLE_SET_ALIGN_PERCENT, 100); + else + r = table_add_many(t, TABLE_SIZE, p.used_bytes, TABLE_SET_ALIGN_PERCENT, 100); + if (r < 0) + return table_log_add_error(r); + + return 0; +} + +VERB(verb_list_volumes, "volumes", "GLOB", /* min_args= */ VERB_ANY, /* max_args= */ 2, VERB_DEFAULT, "List storage volumes"); +static int verb_list_volumes(int argc, char *argv[], uintptr_t data, void *userdata) { + int r; + + assert(argc <= 2); + + _cleanup_free_ char *socket_path = NULL; + r = runtime_directory_generic(arg_runtime_scope, "systemd/io.systemd.StorageProvider", &socket_path); + if (r < 0) + return log_error_errno(r, "Failed to determine socket directory: %m"); + + _cleanup_(table_unrefp) Table *t = table_new("provider", "name", "type", "ro", "size", "used"); + if (!t) + return log_oom(); + + (void) table_set_sort(t, (size_t) 0, (size_t) 1); + table_set_ersatz_string(t, TABLE_ERSATZ_DASH); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; + if (argc >= 2) { + r = sd_json_buildo( + &v, + SD_JSON_BUILD_PAIR_STRING("matchName", argv[1])); + if (r < 0) + return log_oom(); + } + + ssize_t n = varlink_execute_directory( + socket_path, + "io.systemd.StorageProvider.ListVolumes", + v, + /* more= */ true, + /* timeout_usec= */ 0, /* 0 means default */ + on_list_reply, + t); + if (n < 0 && n != -ENOENT) + return log_error_errno(n, "Failed to enumerate storage volumes: %m"); + + if (!table_isempty(t)) { + r = table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend); + if (r < 0) + return r; + } + + if (arg_legend && FLAGS_SET(arg_json_format_flags, SD_JSON_FORMAT_OFF)) { + if (table_isempty(t)) + printf("No storage volumes.\n"); + else + printf("\n%zu storage volumes listed.\n", table_get_rows(t) - 1); + } + + return 0; +} + +static int on_list_templates_reply( + sd_varlink *link, + sd_json_variant *parameters, + const char *error_id, + sd_varlink_reply_flags_t flags, + void* userdata) { + + Table *t = ASSERT_PTR(userdata); + int r; + + assert(link); + + const char *d = ASSERT_PTR(sd_varlink_get_description(link)); + + if (error_id) { + log_debug("%s: Received error '%s', ignoring.", d, error_id); + return 0; + } + + _cleanup_free_ char *provider = NULL; + r = path_extract_filename(d, &provider); + if (r < 0) + return log_error_errno(r, "Failed to extract provider name from socket path: %m"); + + struct { + const char *name; + const char *type; + } p = { + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, name), 0 }, + { "type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, type), 0 }, + {} + }; + + r = sd_json_dispatch(parameters, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &p); + if (r < 0) + return log_error_errno(r, "Failed to decode ListTemplates() reply: %m"); + + r = table_add_many( + t, + TABLE_STRING, provider, + TABLE_STRING, p.name, + TABLE_STRING, p.type); + if (r < 0) + return table_log_add_error(r); + + return 0; +} + +VERB(verb_templates, "templates", "GLOB", /* min_args= */ VERB_ANY, /* max_args= */ 2, /* flags= */ 0, "List storage volume templates"); +static int verb_templates(int argc, char *argv[], uintptr_t data, void *userdata) { + int r; + + assert(argc <= 2); + + _cleanup_free_ char *socket_path = NULL; + r = runtime_directory_generic(arg_runtime_scope, "systemd/io.systemd.StorageProvider", &socket_path); + if (r < 0) + return log_error_errno(r, "Failed to determine socket directory: %m"); + + _cleanup_(table_unrefp) Table *t = table_new("provider", "name", "type"); + if (!t) + return log_oom(); + + (void) table_set_sort(t, (size_t) 0, (size_t) 1); + table_set_ersatz_string(t, TABLE_ERSATZ_DASH); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; + if (argc >= 2) { + r = sd_json_buildo( + &v, + SD_JSON_BUILD_PAIR_STRING("matchName", argv[1])); + if (r < 0) + return log_oom(); + } + + ssize_t n = varlink_execute_directory( + socket_path, + "io.systemd.StorageProvider.ListTemplates", + v, + /* more= */ true, + /* timeout_usec= */ 0, /* 0 means default */ + on_list_templates_reply, + t); + if (n < 0 && n != -ENOENT) + return log_error_errno(n, "Failed to enumerate storage volume templates: %m"); + + if (!table_isempty(t)) { + r = table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend); + if (r < 0) + return r; + } + + if (arg_legend && FLAGS_SET(arg_json_format_flags, SD_JSON_FORMAT_OFF)) { + if (table_isempty(t)) + printf("No templates.\n"); + else + printf("\n%zu templates listed.\n", table_get_rows(t) - 1); + } + + return 0; +} + +VERB_NOARG(verb_providers, "providers", "List storage providers"); +static int verb_providers(int argc, char *argv[], uintptr_t data, void *userdata) { + int r; + + _cleanup_free_ char *socket_path = NULL; + r = runtime_directory_generic(arg_runtime_scope, "systemd/io.systemd.StorageProvider", &socket_path); + if (r < 0) + return log_error_errno(r, "Failed to determine socket directory: %m"); + + _cleanup_(table_unrefp) Table *t = table_new("provider", "listening"); + if (!t) + return log_oom(); + + (void) table_set_sort(t, (size_t) 0); + table_set_ersatz_string(t, TABLE_ERSATZ_DASH); + + _cleanup_close_ int fd = open(socket_path, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (fd < 0) { + if (errno != ENOENT) + return log_error_errno(errno, "Failed to open '%s': %m", socket_path); + } else { + _cleanup_free_ DirectoryEntries *dentries = NULL; + r = readdir_all(fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE, &dentries); + if (r < 0) + return log_error_errno(r, "Failed to enumerate '%s': %m", socket_path); + + FOREACH_ARRAY(dp, dentries->entries, dentries->n_entries) { + struct dirent *de = *dp; + + if (de->d_type != DT_SOCK) + continue; + + if (!storage_provider_name_is_valid(de->d_name)) + continue; + + _cleanup_close_ int socket_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (socket_fd < 0) + return log_error_errno(errno, "Failed to allocate AF_UNIX/SOCK_STREAM socket: %m"); + + _cleanup_free_ char *no = NULL; + r = connect_unix_path(socket_fd, fd, de->d_name); + if (r < 0) { + no = strjoin("no (", ERRNO_NAME(r), ")"); + if (!no) + return log_oom(); + } + + r = table_add_many(t, + TABLE_STRING, de->d_name, + TABLE_STRING, no ?: "yes", + TABLE_SET_COLOR, ansi_highlight_green_red(!no)); + if (r < 0) + return table_log_add_error(r); + } + } + + if (!table_isempty(t)) { + r = table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend); + if (r < 0) + return r; + } + + if (arg_legend && FLAGS_SET(arg_json_format_flags, SD_JSON_FORMAT_OFF)) { + if (table_isempty(t)) + printf("No providers.\n"); + else + printf("\n%zu providers listed.\n", table_get_rows(t) - 1); + } + + return 0; +} + +static int parse_argv(int argc, char *argv[], char ***args) { + int r; + + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv }; + FOREACH_OPTION(c, &opts, /* on_error= */ return c) + switch (c) { + + OPTION_COMMON_HELP: + return help(); + + OPTION_COMMON_VERSION: + return version(); + + OPTION_COMMON_NO_PAGER: + arg_pager_flags |= PAGER_DISABLE; + break; + + OPTION_COMMON_NO_LEGEND: + arg_legend = false; + break; + + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); + if (r <= 0) + return r; + break; + + OPTION_COMMON_NO_ASK_PASSWORD: + arg_ask_password = false; + break; + + OPTION_LONG("system", NULL, "Operate in system mode"): + arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; + break; + + OPTION_LONG("user", NULL, "Operate in user mode"): + arg_runtime_scope = RUNTIME_SCOPE_USER; + break; + } + + *args = option_parser_get_args(&opts); + return 1; +} + +static int run_as_mount_helper(int argc, char *argv[]) { + int c, r; + + /* Implements util-linux "external helper" command line interface, as per mount(8) man page. + * + * Usage: + * + * mount -t storage fs:mydirvolume /some/place # Directory volumes + * mount -t storage.ext4 fs:myblkvolume /some/place # Block volumes + */ + + const char *fstype = NULL, *options = NULL; + bool fake = false; + + while ((c = getopt(argc, argv, "sfnvN:o:t:")) >= 0) { + switch (c) { + + case 'f': + fake = true; + break; + + case 'o': + options = optarg; + break; + + case 't': + fstype = startswith(optarg, "storage."); + if (fstype) { + /* Paranoia: don't allow "storage.storage.storage.…" chains... */ + if (startswith(fstype, "storage.") || streq(fstype, "storage")) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Refusing nested storage volumes."); + } else if (!streq(optarg, "storage")) + log_warning("Unexpected file system type '%s', ignoring.", optarg); + + break; + + case 's': /* sloppy mount options */ + case 'n': /* aka --no-mtab */ + case 'v': /* aka --verbose */ + log_debug("Ignoring option -%c, not implemented.", c); + break; + + case 'N': /* aka --namespace= */ + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Option -%c is not implemented, refusing.", c); + + case '?': + return -EINVAL; + } + } + + if (optind + 2 != argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Expected a storage volume specification and target directory as only arguments."); + + const char *colon = strchr(argv[optind], ':'); + if (!colon) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid storage volume specification, refusing: %s", argv[optind]); + + _cleanup_free_ char *provider = strndup(argv[optind], colon - argv[optind]); + if (!provider) + return log_oom(); + if (!storage_provider_name_is_valid(provider)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid storage provider name: %s", provider); + + _cleanup_free_ char *name = strdup(colon + 1); + if (!name) + return log_oom(); + if (!storage_volume_name_is_valid(name)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid storage volume name: %s", name); + + _cleanup_free_ char *path = NULL; + r = parse_path_argument(argv[optind+1], /* suppress_root= */ false, &path); + if (r < 0) + return r; + + _cleanup_free_ char *filtered = NULL, *template = NULL; + CreateMode create_mode = _CREATE_MODE_INVALID; + uint64_t create_size = UINT64_MAX; + int read_only = -1; + for (const char *p = options;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, ",", EXTRACT_KEEP_QUOTE|EXTRACT_UNESCAPE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to extract mount option: %m"); + if (r == 0) + break; + + const char *t = startswith(word, "storage."); + if (t) { + const char *v; + if ((v = startswith(t, "create="))) { + create_mode = create_mode_from_string(v); + if (create_mode < 0) + return log_error_errno(create_mode, "Failed to parse storage.create= parameter: %s", v); + } else if ((v = startswith(t, "create-size="))) { + r = parse_size(v, /* base= */ 1024, &create_size); + if (r < 0) + return log_error_errno(r, "Failed to parse storage.create-size= parameter: %s", v); + } else if ((v = startswith(t, "template="))) { + if (!storage_template_name_is_valid(v)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid template name, refusing: %s", v); + + r = free_and_strdup(&template, v); + if (r < 0) + return log_oom(); + } else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown mount option '%s', refusing.", word); + } else if (streq(word, "ro")) + read_only = true; + else if (streq(word, "rw")) + read_only = false; + else if (!strextend_with_separator(&filtered, ",", word)) + return log_oom(); + } + + if (fake) + return 0; + + _cleanup_free_ char *socket_path = NULL; + r = runtime_directory_generic(arg_runtime_scope, "systemd/io.systemd.StorageProvider", &socket_path); + if (r < 0) + return log_error_errno(r, "Failed to determine socket directory: %m"); + + if (!path_extend(&socket_path, provider)) + return log_oom(); + + _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; + r = sd_varlink_connect_address(&link, socket_path); + if (r < 0) + return log_error_errno(r, "Failed to connect to '%s': %m", socket_path); + + r = sd_varlink_set_allow_fd_passing_input(link, true); + if (r < 0) + return log_error_errno(r, "Failed to enable file descriptor passing: %m"); + + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + + sd_json_variant *mreply = NULL; + const char *merror_id = NULL, *vtype = fstype ? "reg" : "dir"; + r = sd_varlink_callbo( + link, + "io.systemd.StorageProvider.Acquire", + &mreply, + &merror_id, + SD_JSON_BUILD_PAIR_STRING("name", name), + SD_JSON_BUILD_PAIR_CONDITION(create_mode >= 0, "createMode", SD_JSON_BUILD_STRING(create_mode_to_string(create_mode))), + JSON_BUILD_PAIR_STRING_NON_EMPTY("template", template), + SD_JSON_BUILD_PAIR_CONDITION(read_only >= 0, "readOnly", SD_JSON_BUILD_BOOLEAN(read_only)), + SD_JSON_BUILD_PAIR_STRING("requestAs", vtype), + SD_JSON_BUILD_PAIR_CONDITION(create_size != UINT64_MAX, "createSizeBytes", SD_JSON_BUILD_UNSIGNED(create_size)), + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + if (r < 0) + return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %m"); + _cleanup_(sd_json_variant_unrefp) sd_json_variant *reply = sd_json_variant_ref(mreply); + if (merror_id) { + /* Copy out the error ID, as the follow-up call will invalidate it */ + _cleanup_free_ char *error_id = strdup(merror_id); + if (!error_id) + return log_oom(); + + /* Hmm, the type might not have been right for the backend or the volume? then try + * again, and switch from "reg" to "blk", maybe it works then. (We keep the original + * reply referenced, since we prefer generating an error for the first error.) */ + if (streq(vtype, "reg") && STR_IN_SET(error_id, + "io.systemd.StorageProvider.TypeNotSupported", + "io.systemd.StorageProvider.WrongType")) { + + sd_json_variant *freply = NULL; + const char *ferror_id = NULL; + r = sd_varlink_callbo( + link, + "io.systemd.StorageProvider.Acquire", + &freply, + &ferror_id, + SD_JSON_BUILD_PAIR_STRING("name", name), + SD_JSON_BUILD_PAIR_CONDITION(create_mode >= 0, "createMode", SD_JSON_BUILD_STRING(create_mode_to_string(create_mode))), + JSON_BUILD_PAIR_STRING_NON_EMPTY("template", template), + SD_JSON_BUILD_PAIR_CONDITION(read_only >= 0, "readOnly", SD_JSON_BUILD_BOOLEAN(read_only)), + SD_JSON_BUILD_PAIR_STRING("requestAs", "blk"), + SD_JSON_BUILD_PAIR_CONDITION(create_size != UINT64_MAX, "createSizeBytes", SD_JSON_BUILD_UNSIGNED(create_size)), + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + if (r < 0) + return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %m"); + if (!ferror_id) { + /* The 2nd call worked? then let's forget about the first failure */ + sd_json_variant_unref(reply); + reply = sd_json_variant_ref(freply); + error_id = mfree(error_id); + } + + /* NB: if both fail we show the Varlink error of the first call here, i.e. of the preferred type */ + } + + if (error_id) { + if (streq(error_id, "io.systemd.StorageProvider.NoSuchVolume")) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Volume '%s' not known.", name); + if (streq(error_id, "io.systemd.StorageProvider.NoSuchTemplate")) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Template '%s' not known.", template); + if (streq(error_id, "io.systemd.StorageProvider.VolumeExists")) + return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Volume '%s' exists already.", name); + if (streq(error_id, "io.systemd.StorageProvider.TypeNotSupported")) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Storage provider does not support the specified volume type '%s'.", vtype); + if (streq(error_id, "io.systemd.StorageProvider.WrongType")) + return log_error_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Volume '%s' is not of type '%s'.", name, vtype); + if (streq(error_id, "io.systemd.StorageProvider.CreateNotSupported")) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Storage provider does not support creating volumes."); + if (streq(error_id, "io.systemd.StorageProvider.CreateSizeRequired")) + return log_error_errno(SYNTHETIC_ERRNO(ENODATA), "Storage provider requires a create size to be provided when creating volumes on-the-fly. Use 'storage.create-size=' mount option."); + if (streq(error_id, "io.systemd.StorageProvider.ReadOnlyVolume")) + return log_error_errno(SYNTHETIC_ERRNO(EROFS), "Volume '%s' is read-only.", name); + if (streq(error_id, "io.systemd.StorageProvider.BadTemplate")) + return log_error_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Template does not apply to this volume type."); + + r = sd_varlink_error_to_errno(error_id, reply); /* If this is a system errno style error, output it with %m */ + if (r != -EBADR) + return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %m"); + + return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %s", error_id); + } + } + + struct { + unsigned fd_idx; + int read_only; + const char *type; + uid_t base_uid; + gid_t base_gid; + } p = { + .fd_idx = UINT_MAX, + .read_only = -1, + .base_uid = UID_INVALID, + .base_gid = GID_INVALID, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "fileDescriptorIndex", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, voffsetof(p, fd_idx), SD_JSON_MANDATORY }, + { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_tristate, voffsetof(p, read_only), 0 }, + { "type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, type), SD_JSON_MANDATORY }, + { "baseUID", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uid_gid, voffsetof(p, base_uid), 0 }, + { "baseGID", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uid_gid, voffsetof(p, base_gid), 0 }, + {} + }; + + r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &p); + if (r < 0) + return log_error_errno(r, "Failed to decode Acquire() reply: %m"); + + _cleanup_close_ int fd = sd_varlink_take_fd(link, p.fd_idx); + if (fd < 0) + return log_error_errno(fd, "Failed to acquire fd from Varlink connection: %m"); + + struct stat st; + if (fstat(fd, &st) < 0) + return log_error_errno(errno, "Failed to stat returned file descriptor: %m"); + + _cleanup_strv_free_ char **cmdline = strv_new("mount", "-c"); + if (!cmdline) + return log_oom(); + + if (fstype) { + if (!STR_IN_SET(p.type, "reg", "blk")) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mounting as file system type '%s' requested, but volume is not a block device or regular file.", fstype); + + r = stat_verify_regular_or_block(&st); + if (r < 0) + return log_error_errno(r, "File descriptor for block/regular volume is not a block or regular inode: %m"); + + if (strv_extend_strv(&cmdline, STRV_MAKE("-t", fstype), /* filter_duplicates= */ false) < 0) + return log_oom(); + } else { + if (!streq(p.type, "dir")) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mount as directory requested, but volume is not a directory."); + + if (!uid_is_valid(p.base_uid) || !gid_is_valid(p.base_gid)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Provider did not report base UID/GID, cannot mount."); + + if (p.base_uid > UINT32_MAX - 0x10000U || + p.base_gid > UINT32_MAX - 0x10000U) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Returned base UID/GID out of range."); + + r = stat_verify_directory(&st); + if (r < 0) + return log_error_errno(r, "File descriptor for directory volume is not a directory inode: %m"); + + if (st.st_uid < p.base_uid || st.st_uid >= p.base_uid + 0x10000 || + st.st_gid < p.base_gid || st.st_gid >= p.base_gid + 0x10000) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "File descriptor for directory volume is not owned by base UID/GID range, refusing."); + + /* Now move the mount into our own UID/GID range */ + _cleanup_free_ char *uid_line = asprintf_safe( + UID_FMT " " UID_FMT " " UID_FMT "\n", + p.base_uid, (uid_t) 0, (uid_t) 0x10000); + _cleanup_free_ char *gid_line = asprintf_safe( + GID_FMT " " GID_FMT " " GID_FMT "\n", + p.base_gid, (gid_t) 0, (gid_t) 0x10000); + if (!uid_line || !gid_line) + return log_oom(); + + _cleanup_close_ int userns_fd = userns_acquire(uid_line, gid_line, /* setgroups_deny= */ true); + if (userns_fd < 0) + return log_error_errno(userns_fd, "Failed to acquire new user namespace: %m"); + + _cleanup_close_ int remapped_fd = open_tree_attr_with_fallback( + fd, + /* path= */ NULL, + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC, + &(struct mount_attr) { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = userns_fd, + }); + if (remapped_fd < 0) + return log_error_errno(remapped_fd, "Failed to set ID mapping on returned mount: %m"); + + close_and_replace(fd, remapped_fd); + + if (strv_extend(&cmdline, "--bind") < 0) + return log_oom(); + } + + if (p.read_only > 0) + read_only = true; + + if (!strextend_with_separator(&filtered, ",", read_only > 0 ? "ro" : "rw")) + return log_oom(); + + if (strv_extend_strv(&cmdline, STRV_MAKE("-o", filtered), /* filter_duplicates= */ false) < 0) + return log_oom(); + + if (strv_extend_strv(&cmdline, STRV_MAKE(FORMAT_PROC_FD_PATH(fd), path), /* filter_duplicates= */ false) < 0) + return log_oom(); + + r = fd_cloexec(fd, false); + if (r < 0) + return log_error_errno(r, "Failed to disable O_CLOEXEC for mount fd: %m"); + + if (DEBUG_LOGGING) { + _cleanup_free_ char *q = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY); + log_debug("Chain-loading: %s", strna(q)); + } + + /* NB: we do not honour $PATH here, since as plugin to /bin/mount we might be called in a setuid() + * context, and hence don't want to chain to programs potentially under user control. */ + execv("/bin/mount", cmdline); + return log_error_errno(errno, "Failed to execute mount tool: %m"); +} + +static int run(int argc, char *argv[]) { + int r; + + log_setup(); + + if (invoked_as(argv, "mount.storage")) + return run_as_mount_helper(argc, argv); + + char **args = NULL; + r = parse_argv(argc, argv, &args); + if (r <= 0) + return r; + + return dispatch_verb_with_args(args, /* userdata= */ NULL); +} + +DEFINE_MAIN_FUNCTION(run); From 804bf405d932fd1305b1c8f7a09990fb03cb8fcc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 02:25:07 +0200 Subject: [PATCH 018/242] test: add integration test for storagectl and storage providers VM-only test that exercises both shipped providers through storagectl: verifies the well-known sockets exist, lists providers/volumes/ templates, creates and acquires volumes from each template (sparse-file, allocated-file, directory, subvolume), attaches a loop device to cover the block provider, and exercises the mount.storage helper. --- test/units/TEST-87-AUX-UTILS-VM.storagectl.sh | 211 ++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100755 test/units/TEST-87-AUX-UTILS-VM.storagectl.sh diff --git a/test/units/TEST-87-AUX-UTILS-VM.storagectl.sh b/test/units/TEST-87-AUX-UTILS-VM.storagectl.sh new file mode 100755 index 0000000000000..a11a952a8e8da --- /dev/null +++ b/test/units/TEST-87-AUX-UTILS-VM.storagectl.sh @@ -0,0 +1,211 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +if ! command -v storagectl >/dev/null; then + echo "storagectl not found, skipping." + exit 77 +fi + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +# Unset $PAGER so we don't have to use --no-pager everywhere +export PAGER= + +# storagectl runs in a VM-only test +if systemd-detect-virt -cq ; then + echo "can't run in a container, skipping." + exit 77 +fi + +at_exit() { + set +e + + if [[ -n "${MOUNT_DIR:-}" ]] && mountpoint -q "$MOUNT_DIR"; then + umount "$MOUNT_DIR" + fi + if [[ -n "${LOOP:-}" ]]; then + systemd-dissect --detach "$LOOP" + fi + if [[ -n "${WORK_DIR:-}" ]]; then + rm -fr "$WORK_DIR" + fi + rm -fr /var/lib/storage/test-87-storage-*.volume +} +trap at_exit EXIT + +# The storage providers are socket-activated by sockets.target, so the listening +# AF_UNIX sockets should already exist. +test -S /run/systemd/io.systemd.StorageProvider/block +test -S /run/systemd/io.systemd.StorageProvider/fs + +WORK_DIR="$(mktemp -d /tmp/test-storagectl.XXXXXXXXXX)" +MOUNT_DIR="$WORK_DIR/mnt" +mkdir -p "$MOUNT_DIR" + +# --- storagectl basic --- + +storagectl --help +storagectl --version +storagectl help + +# Unknown verb / option +(! storagectl this-verb-does-not-exist) +(! storagectl --no-such-option providers) + +# --- storagectl providers --- + +storagectl providers +storagectl providers --no-legend +storagectl providers --no-pager +storagectl providers --json=pretty | jq . +storagectl providers --json=short | jq . + +providers_output="$(storagectl providers --no-legend)" +assert_in 'block' "$providers_output" +assert_in 'fs' "$providers_output" +assert_in 'yes' "$providers_output" + +# --- storagectl volumes --- + +# 'volumes' is the default verb +storagectl +storagectl volumes +storagectl volumes --no-legend +storagectl volumes --no-pager +storagectl volumes --json=pretty | jq . +storagectl volumes --json=short | jq . + +# Glob filter that matches nothing should not error +storagectl volumes 'no-such-volume-*' + +# --- storagectl templates --- + +storagectl templates +storagectl templates --no-legend --no-pager +storagectl templates --json=pretty | jq . +storagectl templates --json=short | jq --seq . + +templates_output="$(storagectl templates --no-legend)" +assert_in 'sparse-file' "$templates_output" +assert_in 'allocated-file' "$templates_output" +assert_in 'directory' "$templates_output" +assert_in 'subvolume' "$templates_output" + +# Glob filter +storagectl templates 'sparse-*' --no-legend | grep sparse-file >/dev/null +(! storagectl templates 'sparse-*' --no-legend | grep allocated-file >/dev/null) +storagectl templates 'no-such-template-*' + +# --- direct varlink calls --- + +varlinkctl introspect /run/systemd/io.systemd.StorageProvider/block io.systemd.StorageProvider +varlinkctl introspect /run/systemd/io.systemd.StorageProvider/fs io.systemd.StorageProvider + +# Block provider does not expose templates +varlinkctl call --more /run/systemd/io.systemd.StorageProvider/block \ + io.systemd.StorageProvider.ListTemplates '{}' \ + --graceful=io.systemd.StorageProvider.NoSuchTemplate + +# fs provider lists the four built-in templates +varlinkctl call --more --json=short /run/systemd/io.systemd.StorageProvider/fs \ + io.systemd.StorageProvider.ListTemplates '{}' | grep '"name":"sparse-file"' >/dev/null + +# Block provider rejects names not under /dev/ +varlinkctl call /run/systemd/io.systemd.StorageProvider/block \ + io.systemd.StorageProvider.Acquire '{"name":"/tmp/no-such-dev"}' \ + --graceful=io.systemd.StorageProvider.NoSuchVolume + +# fs provider rejects bad volume names (contain '/' → not a valid filename) +varlinkctl call /run/systemd/io.systemd.StorageProvider/fs \ + io.systemd.StorageProvider.Acquire '{"name":"bad/name"}' \ + --graceful=org.varlink.service.InvalidParameter + +# --- mount.storage: regular file via fs provider --- + +TESTVOL_REG="test-87-storage-reg-$RANDOM" +truncate -s 32M "/var/lib/storage/$TESTVOL_REG.volume" +mkfs.ext4 "/var/lib/storage/$TESTVOL_REG.volume" +mount -t storage.ext4 "fs:$TESTVOL_REG" "$MOUNT_DIR" +mountpoint -q "$MOUNT_DIR" +echo "hello reg" >"$MOUNT_DIR/hello" +umount "$MOUNT_DIR" + +# Volume now appears in 'storagectl volumes' +volumes_after_create="$(storagectl volumes "$TESTVOL_REG" --no-legend)" +assert_in "$TESTVOL_REG" "$volumes_after_create" +assert_in 'reg' "$volumes_after_create" + +# Re-mount existing (default storage.create=any) +mount -t storage.ext4 "fs:$TESTVOL_REG" "$MOUNT_DIR" +test -f "$MOUNT_DIR/hello" +umount "$MOUNT_DIR" + +# storage.create=open succeeds for existing volume +mount -t storage.ext4 -o "storage.create=open" "fs:$TESTVOL_REG" "$MOUNT_DIR" +umount "$MOUNT_DIR" + +# storage.create=new on existing volume must fail +(! mount -t storage.ext4 -o "storage.create=new,storage.create-size=16M" "fs:$TESTVOL_REG" "$MOUNT_DIR") + +# Read-only mount +mount -t storage.ext4 -o ro "fs:$TESTVOL_REG" "$MOUNT_DIR" +findmnt -n -o options "$MOUNT_DIR" | grep -E '(^|,)ro(,|$)' >/dev/null +(! touch "$MOUNT_DIR/readonly-test") +umount "$MOUNT_DIR" + +rm -f "/var/lib/storage/$TESTVOL_REG.volume" + +# storage.create=open on missing volume must fail +(! mount -t storage.ext4 -o "storage.create=open" "fs:test-87-storage-missing-$RANDOM" "$MOUNT_DIR") + +# --- mount.storage: directory volume via fs provider (requires idmapped mounts) --- + +TESTVOL_DIR="test-87-storage-dir-$RANDOM" +if mount -t storage "fs:$TESTVOL_DIR" "$MOUNT_DIR"; then + mountpoint -q "$MOUNT_DIR" + test -d "/var/lib/storage/$TESTVOL_DIR.volume/root" + echo "dir test" >"$MOUNT_DIR/hello" + test -f "/var/lib/storage/$TESTVOL_DIR.volume/root/hello" + umount "$MOUNT_DIR" + rm -fr "/var/lib/storage/$TESTVOL_DIR.volume" +else + echo "Directory volume mounting failed (idmapped mounts unsupported?), skipping." + rm -fr "/var/lib/storage/$TESTVOL_DIR.volume" +fi + +# --- mount.storage: block device via block provider --- + +truncate -s 32M "$WORK_DIR/block.img" +mkfs.ext4 -L sd-storage-blk "$WORK_DIR/block.img" +LOOP="$(systemd-dissect --attach --loop-ref=test-storagectl "$WORK_DIR/block.img")" + +mount -t storage.ext4 "block:$LOOP" "$MOUNT_DIR" +mountpoint -q "$MOUNT_DIR" +echo "hello blk" >"$MOUNT_DIR/hello" +umount "$MOUNT_DIR" + +# Read-only mount of the block volume +mount -t storage.ext4 -o ro "block:$LOOP" "$MOUNT_DIR" +findmnt -n -o options "$MOUNT_DIR" | grep -E '(^|,)ro(,|$)' >/dev/null +test -f "$MOUNT_DIR/hello" +umount "$MOUNT_DIR" + +# Block volume is enumerable; matchName globs over device node and aliases +varlinkctl call --more --json=short /run/systemd/io.systemd.StorageProvider/block \ + io.systemd.StorageProvider.ListVolumes "{\"matchName\":\"$LOOP\"}" | + grep '"type":"blk"' >/dev/null + +systemd-dissect --detach "$LOOP" +unset LOOP + +# --- error cases --- + +# Bad provider name (no such socket) +(! mount -t storage.ext4 "no-such-provider:foo" "$MOUNT_DIR") +# Bad volume specification (no colon) +(! mount -t storage.ext4 "no-colon-here" "$MOUNT_DIR") +# Refuse nested storage volumes (FS type "storage.storage") +(! mount -t storage.storage "fs:something" "$MOUNT_DIR") From eccfd2c97b66694ee1b7c18f47837f1dc17ea839 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 23 Apr 2026 12:16:24 +0200 Subject: [PATCH 019/242] TODO: track StorageProvider follow-ups, sketch a NetworkProvider sibling Records the still-missing StorageProvider integrations (nspawn, vmspawn, service-manager BindVolume=) and replaces the now-obsolete generic "storage API via varlink" entry with a NetworkProvider proposal modelled on it. --- TODO.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/TODO.md b/TODO.md index e51f6734f065e..304f6c3a5ecb8 100644 --- a/TODO.md +++ b/TODO.md @@ -128,6 +128,11 @@ SPDX-License-Identifier: LGPL-2.1-or-later ## Features +- StorageProvider interface + storagectl + - hook-up in systemd-nspawn + - hook-up in systemd-vmspawn + - hook-up in service manager (BindVolume=) + - a small tool that can do basic btrfs raid policy mgmt. i.e. gets started as part of the initial transaction for some btrfs raid fs, waits for some time, then puts message on screen (plymouth, console) that some devices apparently @@ -2545,8 +2550,9 @@ SPDX-License-Identifier: LGPL-2.1-or-later - systemd-tpm2-support: add a some logic that detects if system is in DA lockout mode, and queries the user for TPM recovery PIN then. -- systemd: add storage API via varlink, where everyone can drop a socket in a - dir, similar, do the same thing for networking +- add a networking provider API, inspired by the StorageProvider. Make networkd + a provider that exposes interfaces for adding tap, tun, veth via the api, + base this on .netdev logic somehow. - $SYSTEMD_EXECPID that the service manager sets should be augmented with $SYSTEMD_EXECPIDFD (and similar for From 0d71d58da084a5548bcb762aa4b0fad9dc49fac9 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Wed, 29 Apr 2026 11:50:01 +0200 Subject: [PATCH 020/242] sd-bus: store the strv size when extending it So strv_push_with_size() doesn't have to recalculate the size every time. --- src/libsystemd/sd-bus/bus-message.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libsystemd/sd-bus/bus-message.c b/src/libsystemd/sd-bus/bus-message.c index 94be969f7f420..017ffb7a6127a 100644 --- a/src/libsystemd/sd-bus/bus-message.c +++ b/src/libsystemd/sd-bus/bus-message.c @@ -4331,6 +4331,7 @@ int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz) { _public_ int sd_bus_message_read_strv_extend(sd_bus_message *m, char ***l) { char type; const char *contents, *s; + size_t n; int r; assert(m); @@ -4347,9 +4348,10 @@ _public_ int sd_bus_message_read_strv_extend(sd_bus_message *m, char ***l) { if (r <= 0) return r; + n = strv_length(*l); /* sd_bus_message_read_basic() does content validation for us. */ while ((r = sd_bus_message_read_basic(m, *contents, &s)) > 0) { - r = strv_extend(l, s); + r = strv_extend_with_size(l, &n, s); if (r < 0) return r; } From 51a88ac72330f20a030b8938b6bfce2b2215d8a0 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Wed, 29 Apr 2026 12:02:32 +0200 Subject: [PATCH 021/242] core: limit the number of units that can be requested over ListUnitsByNames --- src/core/dbus-manager.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 1bc73e7b434c9..6579708df0117 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -971,6 +971,10 @@ static int method_list_units_by_names(sd_bus_message *message, void *userdata, s if (r < 0) return r; + if (strv_length(units) > MAX(hashmap_size(m->units), (unsigned) MANAGER_MAX_NAMES / 2)) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many unit names requested."); + r = sd_bus_message_new_method_return(message, &reply); if (r < 0) return r; From c3ace5621b0dad786fd9675914ba0f60ed69373a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 12:01:59 +0200 Subject: [PATCH 022/242] cpio: move TPM PCR info into CpioTarget The PR to measure into is closely associated with where we place a resource in the initrd cpios. Hence, let's also track it in CpioTarget, thus simplifying our function parameter lists that way. No change in behaviour. --- src/boot/cpio.c | 31 +++++++++++++++++++++++-------- src/boot/cpio.h | 3 +-- src/boot/stub.c | 8 -------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/boot/cpio.c b/src/boot/cpio.c index 81792b00a89f4..31638b1c8fc22 100644 --- a/src/boot/cpio.c +++ b/src/boot/cpio.c @@ -5,6 +5,7 @@ #include "iovec-util-fundamental.h" #include "measure.h" #include "string-util-fundamental.h" +#include "tpm2-pcr.h" #include "util.h" static char *write_cpio_word(char *p, uint32_t v) { @@ -306,7 +307,6 @@ EFI_STATUS pack_cpio( const char16_t *match_suffix, const char16_t *exclude_suffix, const CpioTarget *target, - uint32_t tpm_pcr, const char16_t *tpm_description, struct iovec *ret_buffer, bool *ret_measured) { @@ -425,12 +425,16 @@ EFI_STATUS pack_cpio( return log_error_status(err, "Failed to pack cpio trailer: %m"); err = tpm_log_ipl_event( - tpm_pcr, POINTER_TO_PHYSICAL_ADDRESS(buffer), buffer_size, tpm_description, ret_measured); + target->tpm_pcr, + POINTER_TO_PHYSICAL_ADDRESS(buffer), + buffer_size, + tpm_description, + ret_measured); if (err != EFI_SUCCESS) return log_error_status( err, - "Unable to add cpio TPM measurement for PCR %u (%ls), ignoring: %m", - tpm_pcr, + "Unable to add cpio TPM measurement for PCR %u (%ls): %m", + target->tpm_pcr, tpm_description); *ret_buffer = IOVEC_MAKE(TAKE_PTR(buffer), buffer_size); @@ -450,7 +454,6 @@ EFI_STATUS pack_cpio_literal( size_t data_size, const CpioTarget *target, const char16_t *target_filename, - uint32_t tpm_pcr, const char16_t *tpm_description, struct iovec *ret_buffer, bool *ret_measured) { @@ -486,12 +489,16 @@ EFI_STATUS pack_cpio_literal( return log_error_status(err, "Failed to pack cpio trailer: %m"); err = tpm_log_ipl_event( - tpm_pcr, POINTER_TO_PHYSICAL_ADDRESS(buffer), buffer_size, tpm_description, ret_measured); + target->tpm_pcr, + POINTER_TO_PHYSICAL_ADDRESS(buffer), + buffer_size, + tpm_description, + ret_measured); if (err != EFI_SUCCESS) return log_error_status( err, - "Unable to add cpio TPM measurement for PCR %u (%ls), ignoring: %m", - tpm_pcr, + "Unable to add cpio TPM measurement for PCR %u (%ls): %m", + target->tpm_pcr, tpm_description); *ret_buffer = IOVEC_MAKE(TAKE_PTR(buffer), buffer_size); @@ -506,46 +513,54 @@ const CpioTarget cpio_target_credentials = { .directory = ".extra/credentials", .dir_mode = 0500, .access_mode = 0400, + .tpm_pcr = TPM2_PCR_KERNEL_CONFIG, }; const CpioTarget cpio_target_global_credentials = { .directory = ".extra/global_credentials", .dir_mode = 0500, .access_mode = 0400, + .tpm_pcr = TPM2_PCR_KERNEL_CONFIG, }; const CpioTarget cpio_target_sysext = { .directory = ".extra/sysext", .dir_mode = 0555, .access_mode = 0444, + .tpm_pcr = TPM2_PCR_SYSEXTS, }; const CpioTarget cpio_target_global_sysext = { .directory = ".extra/global_sysext", .dir_mode = 0555, .access_mode = 0444, + .tpm_pcr = TPM2_PCR_SYSEXTS, }; const CpioTarget cpio_target_confext = { .directory = ".extra/confext", .dir_mode = 0555, .access_mode = 0444, + .tpm_pcr = TPM2_PCR_KERNEL_CONFIG, }; const CpioTarget cpio_target_global_confext = { .directory = ".extra/global_confext", .dir_mode = 0555, .access_mode = 0444, + .tpm_pcr = TPM2_PCR_KERNEL_CONFIG, }; const CpioTarget cpio_target_meta = { .directory = ".extra", .dir_mode = 0555, .access_mode = 0444, + .tpm_pcr = UINT32_MAX, }; const CpioTarget cpio_target_meta_secret = { .directory = ".extra", .dir_mode = 0555, .access_mode = 0400, + .tpm_pcr = UINT32_MAX, }; diff --git a/src/boot/cpio.h b/src/boot/cpio.h index 3c311bc714d28..3aa525779344f 100644 --- a/src/boot/cpio.h +++ b/src/boot/cpio.h @@ -8,6 +8,7 @@ typedef struct CpioTarget { const char *directory; /* Path to directory where to place resources */ uint32_t dir_mode; /* Access mode for the directory */ uint32_t access_mode; /* Access mode for the files in the directory */ + uint32_t tpm_pcr; /* Where to measure this data into */ } CpioTarget; EFI_STATUS pack_cpio_one( @@ -35,7 +36,6 @@ EFI_STATUS pack_cpio( const char16_t *match_suffix, const char16_t *exclude_suffix, const CpioTarget *target, - uint32_t tpm_pcr, const char16_t *tpm_description, struct iovec *ret_buffer, bool *ret_measured); @@ -45,7 +45,6 @@ EFI_STATUS pack_cpio_literal( size_t data_size, const CpioTarget *target, const char16_t *target_filename, - uint32_t tpm_pcr, const char16_t *tpm_description, struct iovec *ret_buffer, bool *ret_measured); diff --git a/src/boot/stub.c b/src/boot/stub.c index 8632a603a21de..52927e91ff077 100644 --- a/src/boot/stub.c +++ b/src/boot/stub.c @@ -819,7 +819,6 @@ static void generate_sidecar_initrds( u".cred", /* exclude_suffix= */ NULL, &cpio_target_credentials, - /* tpm_pcr= */ TPM2_PCR_KERNEL_CONFIG, u"Credentials initrd", initrds + INITRD_CREDENTIAL, &m) == EFI_SUCCESS) @@ -830,7 +829,6 @@ static void generate_sidecar_initrds( u".cred", /* exclude_suffix= */ NULL, &cpio_target_global_credentials, - /* tpm_pcr= */ TPM2_PCR_KERNEL_CONFIG, u"Global credentials initrd", initrds + INITRD_GLOBAL_CREDENTIAL, &m) == EFI_SUCCESS) @@ -841,7 +839,6 @@ static void generate_sidecar_initrds( u".raw", /* ideally we'd pick up only *.sysext.raw here, but for compat we pick up *.raw instead … */ u".confext.raw", /* … but then exclude *.confext.raw again */ &cpio_target_sysext, - /* tpm_pcr= */ TPM2_PCR_SYSEXTS, u"System extension initrd", initrds + INITRD_SYSEXT, &m) == EFI_SUCCESS) @@ -852,7 +849,6 @@ static void generate_sidecar_initrds( u".raw", /* as above */ u".confext.raw", &cpio_target_global_sysext, - /* tpm_pcr= */ TPM2_PCR_SYSEXTS, u"Global system extension initrd", initrds + INITRD_GLOBAL_SYSEXT, &m) == EFI_SUCCESS) @@ -863,7 +859,6 @@ static void generate_sidecar_initrds( u".confext.raw", /* exclude_suffix= */ NULL, &cpio_target_confext, - /* tpm_pcr= */ TPM2_PCR_KERNEL_CONFIG, u"Configuration extension initrd", initrds + INITRD_CONFEXT, &m) == EFI_SUCCESS) @@ -874,7 +869,6 @@ static void generate_sidecar_initrds( u".confext.raw", /* exclude_suffix= */ NULL, &cpio_target_global_confext, - /* tpm_pcr= */ TPM2_PCR_KERNEL_CONFIG, u"Global configuration extension initrd", initrds + INITRD_GLOBAL_CONFEXT, &m) == EFI_SUCCESS) @@ -926,7 +920,6 @@ static void generate_embedded_initrds( sections[t->section].memory_size, &cpio_target_meta, t->filename, - /* tpm_pcr= */ UINT32_MAX, /* tpm_description= */ NULL, initrds + t->initrd_index, /* ret_measured= */ NULL); @@ -948,7 +941,6 @@ static void generate_boot_secret_initrd( BOOT_SECRET_SIZE, &cpio_target_meta_secret, u"boot-secret", - /* tpm_pcr= */ UINT32_MAX, /* tpm_description= */ NULL, initrds + INITRD_BOOT_SECRET, /* ret_measured= */ NULL); From feba5dcc5fdac6886a5c7e250cfd980e669ab4d7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 20 Mar 2026 17:46:15 +0100 Subject: [PATCH 023/242] boot: parse 'extra' type 1 stanza too This loads the new 'extra' stanza, but doesn't actually do anything with it yet. That's added in a later commit. Replaces: #39286 Implements: https://github.com/uapi-group/specifications/pull/212 --- src/boot/boot.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/boot/boot.c b/src/boot/boot.c index df5ce31fa3a3f..6ee2aded0895f 100644 --- a/src/boot/boot.c +++ b/src/boot/boot.c @@ -120,6 +120,7 @@ typedef struct BootEntry { char16_t *options; bool options_implied; /* If true, these options are implied if we invoke the PE binary without any parameters (as in: UKI). If false we must specify these options explicitly. */ char16_t **initrd; + char16_t **extras; char16_t key; EFI_STATUS (*call)(const struct BootEntry *entry, EFI_FILE *root_dir, EFI_HANDLE parent_image); int tries_done; @@ -424,6 +425,8 @@ static void print_status(Config *config, char16_t *loaded_image_path) { printf(" url: %ls\n", entry->url); STRV_FOREACH(initrd, entry->initrd) printf(" initrd: %ls\n", *initrd); + STRV_FOREACH(extra, entry->extras) + printf(" extra: %ls\n", *extra); if (entry->devicetree) printf(" devicetree: %ls\n", entry->devicetree); if (entry->options) @@ -1047,6 +1050,7 @@ static BootEntry* boot_entry_free(BootEntry *entry) { free(entry->devicetree); free(entry->options); strv_free(entry->initrd); + strv_free(entry->extras); free(entry->directory); free(entry->current_name); free(entry->next_name); @@ -1363,7 +1367,7 @@ static void boot_entry_add_type1( _cleanup_(boot_entry_freep) BootEntry *entry = NULL; char *line; - size_t pos = 0, n_initrd = 0; + size_t pos = 0, n_initrd = 0, n_extras = 0; char *key, *value; EFI_STATUS err; @@ -1492,6 +1496,14 @@ static void boot_entry_add_type1( entry->initrd[n_initrd++] = xstr8_to_path(value); entry->initrd[n_initrd] = NULL; + } else if (streq8(key, "extra")) { + entry->extras = xrealloc( + entry->extras, + n_extras == 0 ? 0 : (n_extras + 1) * sizeof(uint16_t *), + (n_extras + 2) * sizeof(uint16_t *)); + entry->extras[n_extras++] = xstr8_to_path(value); + entry->extras[n_extras] = NULL; + } else if (streq8(key, "options")) { _cleanup_free_ char16_t *new = NULL; From d5572aca2c38aa7f573e6f36e28dcf82860af8b5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 25 Mar 2026 18:15:38 +0100 Subject: [PATCH 024/242] boot: load extra files for UKIs into memory and register as initrds This generates on-the-fly cpio initrds from 'extra' resources declared in Type #1 entries and installs them via the Linux initrd protocol so that they get passed to the Linux kernel. Replaces: #39286 --- src/boot/boot.c | 210 +++++++++++++++++++++++++++++++++++++++++-- src/boot/meson.build | 2 +- 2 files changed, 204 insertions(+), 8 deletions(-) diff --git a/src/boot/boot.c b/src/boot/boot.c index 6ee2aded0895f..fff35de864b8c 100644 --- a/src/boot/boot.c +++ b/src/boot/boot.c @@ -3,6 +3,7 @@ #include "bcd.h" #include "bootspec-fundamental.h" #include "console.h" +#include "cpio.h" #include "device-path-util.h" #include "devicetree.h" #include "drivers.h" @@ -38,6 +39,9 @@ #include "version.h" #include "vmm.h" +/* Safety margin, refuse larger extra files (this is not load bearing, only a safety net for robustness reasons). */ +#define EXTRA_SIZE_MAX (1024U * 1024U * 1536U) + /* Magic string for recognizing our own binaries */ #define SD_MAGIC "#### LoaderInfo: systemd-boot " GIT_VERSION " ####" DECLARE_NOALLOC_SECTION(".sdmagic", SD_MAGIC); @@ -2581,7 +2585,9 @@ static EFI_STATUS initrd_prepare( assert(ret_initrd_pages); assert(ret_initrd_size); - if (entry->type != LOADER_LINUX || strv_isempty(entry->initrd)) { + assert(entry->type == LOADER_LINUX); + + if (strv_isempty(entry->initrd)) { *ret_options = NULL; *ret_initrd_pages = (Pages) {}; *ret_initrd_size = 0; @@ -2685,6 +2691,174 @@ static EFI_STATUS initrd_prepare( return EFI_SUCCESS; } +static EFI_STATUS load_extras( + EFI_FILE *root, + const BootEntry *entry, + Pages *ret_initrd_pages, + size_t *ret_initrd_size) { + + EFI_STATUS err; + + assert(root); + assert(entry); + assert(ret_initrd_pages); + assert(ret_initrd_size); + + assert(IN_SET(entry->type, LOADER_UKI, LOADER_UKI_URL)); + + _cleanup_(iovec_done) struct iovec previous_initrd = {}, confext_initrd = {}, sysext_initrd = {}, credential_initrd = {}; + + const struct ExtraResourceInfo { + const char16_t *suffix; + const CpioTarget *target; + struct iovec *iovec; + const char16_t *tpm_description; + } table[] = { + { u".cred", &cpio_target_credentials, &credential_initrd, u"Entry credentials initrd" }, + { u".sysext.raw", &cpio_target_sysext, &sysext_initrd, u"Entry system extension initrd" }, + { u".confext.raw", &cpio_target_confext, &confext_initrd, u"Entry configuration extension initrd" }, + }; + + if (strv_isempty(entry->extras)) + goto nothing; + + uint32_t inode = 1; /* inode counter, so that each item gets a new inode */ + unsigned n = 0; + + STRV_FOREACH(i, entry->extras) { + _cleanup_file_close_ EFI_FILE *handle = NULL; + err = root->Open(root, &handle, *i, EFI_FILE_MODE_READ, /* Attributes= */ 0); + if (err != EFI_SUCCESS) { + log_warning_status(err, "Failed to open extra file '%ls', ignoring: %m", *i); + continue; + } + + _cleanup_free_ EFI_FILE_INFO *info = NULL; + err = get_file_info(handle, &info, /* ret_size= */ NULL); + if (err != EFI_SUCCESS) { + log_warning_status(err, "Failed to get information about file '%ls', ignoring: %m", *i); + continue; + } + + if (FLAGS_SET(info->Attribute, EFI_FILE_DIRECTORY)) { + log_warning("Extra file '%ls' is a directory, ignoring.", *i); + continue; + } + + if (info->FileSize == 0) { + log_warning("Extra file '%ls' is empty, ignoring.", *i); + continue; + } + if (info->FileSize > EXTRA_SIZE_MAX) { + log_warning("Extra file '%ls' is larger than allowed extra file size, ignoring.", *i); + continue; + } + + if (!is_ascii(info->FileName)) { + log_warning("Extra file name '%ls' is not valid ASCII, ignoring.", *i); + continue; + } + if (strlen16(info->FileName) > 255) { /* Max filename size on Linux */ + log_warning("Filename '%ls' too long, ignoring.", *i); + continue; + } + + const struct ExtraResourceInfo *x = NULL; + FOREACH_ELEMENT(j, table) { + if (endswith_no_case(info->FileName, j->suffix)) { + x = j; + break; + } + } + if (!x) { + log_warning("Unrecognized type of extra file '%ls', ignoring.", info->FileName); + continue; + } + + _cleanup_free_ char *content = NULL; + size_t contentsize = 0; /* avoid false maybe-uninitialized warning */ + err = file_handle_read(handle, /* offset= */ 0, info->FileSize, &content, &contentsize); + if (err != EFI_SUCCESS) { + log_warning_status(err, "Failed to read '%ls', ignoring: %m", *i); + continue; + } + + /* Generate the leading directory inodes right before adding the first files to the + * archive. Otherwise the cpio archive cannot be unpacked, since the leading dirs won't + * exist. Note that we potentially do redundant work here: a prior iteration might already + * have created the prefix for us, but to simplify this we regenerate it anyway. It's very + * little data, and simplifies the implementation here a lot. */ + err = pack_cpio_prefix(x->target, &inode, &x->iovec->iov_base, &x->iovec->iov_len); + if (err != EFI_SUCCESS) + return log_error_status(err, "Failed to pack cpio prefix '%s': %m", x->target->directory); + + err = pack_cpio_one( + info->FileName, + content, contentsize, + x->target, + &inode, + &x->iovec->iov_base, &x->iovec->iov_len); + if (err != EFI_SUCCESS) + return log_error_status(err, "Failed to pack cpio file '%ls': %m", info->FileName); + + n++; + } + + if (n == 0) /* Nothing actually loaded */ + goto nothing; + + FOREACH_ELEMENT(x, table) { + if (x->iovec->iov_len <= 0) + continue; + + err = pack_cpio_trailer(&x->iovec->iov_base, &x->iovec->iov_len); + if (err != EFI_SUCCESS) + return log_error_status(err, "Failed to pack cpio trailer: %m"); + + err = tpm_log_ipl_event( + x->target->tpm_pcr, + POINTER_TO_PHYSICAL_ADDRESS(x->iovec->iov_base), + x->iovec->iov_len, + x->tpm_description, + /* ret_measured= */ NULL); + if (err != EFI_SUCCESS) + return log_error_status( + err, + "Unable to add cpio TPM measurement for PCR %u (%ls): %m", + x->target->tpm_pcr, + x->tpm_description); + } + + /* Be nice: pick up any previously registered initrds and prepend them to what we are generating here */ + err = initrd_read_previous(&previous_initrd); + if (err == EFI_NOT_FOUND) + log_debug_status(err, "No previous initrd installed."); + else if (err != EFI_SUCCESS) + log_warning_status(err, "Failed to read previously registered initrd, ignoring."); + else + log_debug("Successfully loaded previously installed initrd (%zu bytes).", previous_initrd.iov_len); + + err = combine_initrds( + (const struct iovec[]) { + previous_initrd, + credential_initrd, + sysext_initrd, + confext_initrd, + }, + /* n_initrds= */ 4, + ret_initrd_pages, + ret_initrd_size); + if (err != EFI_SUCCESS) + return log_error_status(err, "Failed to combine previous with extra initrds: %m"); + + return EFI_SUCCESS; + +nothing: + *ret_initrd_pages = (Pages) {}; + *ret_initrd_size = 0; + return EFI_SUCCESS; +} + static EFI_STATUS expand_path( EFI_HANDLE parent_image, EFI_DEVICE_PATH *path, @@ -2833,15 +3007,11 @@ static EFI_STATUS call_image_start( return log_error_status(err, "Error loading EFI binary %ls: %m", entry->loader); } - _cleanup_(cleanup_initrd) EFI_HANDLE initrd_handle = NULL; _cleanup_free_ char16_t *options_initrd = NULL; - _cleanup_pages_ Pages initrd_pages = {}; + _cleanup_pages_ Pages initrd_pages = {}; /* Note: please keep order intact: these pages should be released after the initrd handle is released */ + _cleanup_(cleanup_initrd) EFI_HANDLE initrd_handle = NULL; size_t initrd_size = 0; if (image_root) { - err = initrd_prepare(image_root, entry, &options_initrd, &initrd_pages, &initrd_size); - if (err != EFI_SUCCESS) - return log_error_status(err, "Error preparing initrd: %m"); - /* DTBs are loaded by the kernel before ExitBootServices(), and they can be used to map and * assign arbitrary memory ranges, so skip them when secure boot is enabled as the DTB here * is unverified. */ @@ -2851,9 +3021,35 @@ static EFI_STATUS call_image_start( return log_error_status(err, "Error loading %ls: %m", entry->devicetree); } + switch (entry->type) { + + case LOADER_LINUX: + /* For traditional Linux we follow 'initrd' links, because that's how things worked in the good old days */ + err = initrd_prepare(image_root, entry, &options_initrd, &initrd_pages, &initrd_size); + if (err != EFI_SUCCESS) + return log_error_status(err, "Error preparing initrd: %m"); + + break; + + case LOADER_UKI: + case LOADER_UKI_URL: + /* For modern UKIs we'll not bother with 'initrd', but we'll instead support 'extra' + * for loading credentials, sysext and confext. */ + + err = load_extras(image_root, entry, &initrd_pages, &initrd_size); + if (err != EFI_SUCCESS) + return err; /* load_extras() logs on its own */ + break; + + default: + ; + } + err = initrd_register(&IOVEC_MAKE(PHYSICAL_ADDRESS_TO_POINTER(initrd_pages.addr), initrd_size), &initrd_handle); if (err != EFI_SUCCESS) return log_error_status(err, "Error registering initrd: %m"); + + /* NB: the initrd pages remain in our possession, we will free them if executing the image fails below */ } EFI_LOADED_IMAGE_PROTOCOL *loaded_image; diff --git a/src/boot/meson.build b/src/boot/meson.build index dfac98f034a6d..29fb64efbee1b 100644 --- a/src/boot/meson.build +++ b/src/boot/meson.build @@ -309,6 +309,7 @@ endif libefi_sources = files( 'chid.c', 'console.c', + 'cpio.c', 'device-path-util.c', 'devicetree.c', 'drivers.c', @@ -341,7 +342,6 @@ systemd_boot_sources = files( stub_sources = files( 'boot-secret.c', - 'cpio.c', 'linux.c', 'splash.c', 'stub.c', From a3d0e761d4a6e1e59844beb153f88e8daa21b2cc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 12:39:26 +0200 Subject: [PATCH 025/242] boot: downgrade log level for an error we ignore --- src/boot/cpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/boot/cpio.c b/src/boot/cpio.c index 31638b1c8fc22..36c536681cc12 100644 --- a/src/boot/cpio.c +++ b/src/boot/cpio.c @@ -406,7 +406,7 @@ EFI_STATUS pack_cpio( err = file_read(extra_dir, items[i], 0, 0, &content, &contentsize); if (err != EFI_SUCCESS) { - log_error_status(err, "Failed to read %ls, ignoring: %m", items[i]); + log_warning_status(err, "Failed to read %ls, ignoring: %m", items[i]); continue; } From 6b1324fb867d89147585ee20160dbe8f37beefc8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 13:35:41 +0200 Subject: [PATCH 026/242] man: add a brief note about type 1 extra lines --- man/systemd-stub.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/man/systemd-stub.xml b/man/systemd-stub.xml index bf23c900d026c..95f62ca66b56a 100644 --- a/man/systemd-stub.xml +++ b/man/systemd-stub.xml @@ -291,6 +291,14 @@ by systemd-creds encrypt -T (see systemd-creds1 for details); in case of the system extension images by using signed Verity images. + + Note that earlier components of the boot process might register additional initrds, and thus + additional "companion" resources such as system extensions, configuration extensions and credentials for + consumption by the kernel and OS eventually booted. For example, + systemd-boot7 does + this for resources configured in UAPI.1 Type #1 extra + lines. systemd-stub will combine any resources provided that way with the companion + file resources it acquires itself. From fb0143f1ceb03f1b8f8437f5787d5d402a0d2dd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 12:01:30 +0200 Subject: [PATCH 027/242] shared/options: add option_parser_get_help_table_ns() helper It'll be used in the next commit. --- src/run/run.c | 2 +- src/shared/options.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/run/run.c b/src/run/run.c index ce35b48fba4cb..5827d91e1f9e4 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -188,7 +188,7 @@ static int help_sudo_mode(void) { * sudo's short switches, hence please do not introduce new short switches unless they have a roughly * equivalent purpose on sudo. Use long options for everything private to run0. */ - r = option_parser_get_help_table_full("run0", /* group= */ NULL, &opts_table); + r = option_parser_get_help_table_ns("run0", &opts_table); if (r < 0) return r; diff --git a/src/shared/options.h b/src/shared/options.h index 5f55dd5d19fa7..f50fbdb3cb3e5 100644 --- a/src/shared/options.h +++ b/src/shared/options.h @@ -236,6 +236,8 @@ int _option_parser_get_help_table_full( Table **ret); #define option_parser_get_help_table_full(namespace, group, ret) \ _option_parser_get_help_table_full(ALIGN_PTR(__start_SYSTEMD_OPTIONS), __stop_SYSTEMD_OPTIONS, namespace, group, ret) +#define option_parser_get_help_table_ns(ns, ret) \ + option_parser_get_help_table_full(ns, /* group= */ NULL, ret) #define option_parser_get_help_table_group(group, ret) \ option_parser_get_help_table_full(/* namespace= */ NULL, group, ret) #define option_parser_get_help_table(ret) \ From c2c98878520e816c2d6535edebd3a5e233360ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sun, 26 Apr 2026 12:51:36 +0200 Subject: [PATCH 028/242] udevadm: convert option parsing to the new option parser Verb dispatch is left untouched for now. Co-developed-by: Claude Opus 4.7 (1M context) --- src/udev/udevadm.c | 70 +++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/src/udev/udevadm.c b/src/udev/udevadm.c index 70ff213cb9999..23e03d6fb0e64 100644 --- a/src/udev/udevadm.c +++ b/src/udev/udevadm.c @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include #include -#include "alloc-util.h" #include "argv-util.h" +#include "format-table.h" +#include "help-util.h" #include "label-util.h" #include "main-func.h" -#include "pretty-print.h" +#include "options.h" #include "udev-util.h" #include "udevadm.h" #include "udevd.h" @@ -28,60 +28,53 @@ static int help(void) { { "lock", "Lock a block device" }, }; - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("udevadm", "8", &link); + r = option_parser_get_help_table_ns("udevadm", &options); if (r < 0) - return log_oom(); + return r; - printf("%s [--help] [--version] [--debug] COMMAND [COMMAND OPTIONS]\n\n" - "Send control commands or test the device manager.\n\n" - "Commands:\n", - program_invocation_short_name); + help_cmdline("[OPTIONS…] COMMAND [COMMAND OPTIONS…]"); + help_abstract("Send control commands or test the device manager."); + help_section("Commands:"); FOREACH_ELEMENT(desc, short_descriptions) printf(" %-12s %s\n", (*desc)[0], (*desc)[1]); - printf("\nSee the %s for details.\n", link); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("udevadm", "8"); return 0; } -static int parse_argv(int argc, char *argv[]) { - static const struct option options[] = { - { "debug", no_argument, NULL, 'd' }, - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, 'V' }, - {} - }; - int c; - +static int parse_argv(int argc, char *argv[], char ***remaining_args) { assert(argc >= 0); assert(argv); + assert(remaining_args); - /* Resetting to 0 forces the invocation of an internal initialization routine of getopt_long() - * that checks for GNU extensions in optstring ('-' or '+' at the beginning). */ - optind = 0; - while ((c = getopt_long(argc, argv, "+dhV", options, NULL)) >= 0) + OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION, "udevadm" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'd': - log_set_max_level(LOG_DEBUG); - break; + OPTION_NAMESPACE("udevadm"): {} - case 'h': + OPTION_COMMON_HELP: return help(); - case 'V': + OPTION_COMMON_VERSION_WITH_HIDDEN_V: return print_version(); - case '?': - return -EINVAL; - - default: - assert_not_reached(); + OPTION('d', "debug", NULL, "Enable debug logging"): + log_set_max_level(LOG_DEBUG); + break; } + *remaining_args = option_parser_get_args(&opts); return 1; /* work to do */ } @@ -99,7 +92,7 @@ static int verb_help_main(int argc, char *argv[], uintptr_t _data, void *userdat return help(); } -static int udevadm_main(int argc, char *argv[]) { +static int udevadm_main(char **args) { static const Verb verbs[] = { { "cat", VERB_ANY, VERB_ANY, 0, verb_cat_main }, { "info", VERB_ANY, VERB_ANY, 0, verb_info_main }, @@ -118,10 +111,11 @@ static int udevadm_main(int argc, char *argv[]) { {} }; - return dispatch_verb(argc, argv, verbs, NULL); + return _dispatch_verb_with_args(args, verbs, verbs + ELEMENTSOF(verbs) - 1, NULL); } static int run(int argc, char *argv[]) { + char **args = NULL; int r; if (invoked_as(argv, "udevd")) @@ -130,7 +124,7 @@ static int run(int argc, char *argv[]) { (void) udev_parse_config(); log_setup(); - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -138,7 +132,7 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; - return udevadm_main(argc, argv); + return udevadm_main(args); } DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); From 07fc22cd0384da5dc4a5d576cc90b7e02d38cfe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 27 Apr 2026 09:30:54 +0200 Subject: [PATCH 029/242] udevadm-cat: convert to OPTION macros --- src/udev/udevadm-cat.c | 90 +++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 49 deletions(-) diff --git a/src/udev/udevadm-cat.c b/src/udev/udevadm-cat.c index 9d94f5a86c652..48ca72041627f 100644 --- a/src/udev/udevadm-cat.c +++ b/src/udev/udevadm-cat.c @@ -1,14 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include - #include "alloc-util.h" #include "conf-files.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" +#include "options.h" #include "parse-argument.h" #include "pretty-print.h" #include "static-destruct.h" -#include "strv.h" #include "udevadm.h" #include "udevadm-util.h" @@ -19,83 +19,75 @@ static bool arg_config = false; STATIC_DESTRUCTOR_REGISTER(arg_root, freep); static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("udevadm", "8", &link); + r = option_parser_get_help_table_ns("udevadm-cat", &options); if (r < 0) - return log_oom(); - - printf("%s cat [OPTIONS] [FILE...]\n" - "\n%sShow udev rules files.%s\n\n" - " -h --help Show this help\n" - " -V --version Show package version\n" - " --root=PATH Operate on an alternate filesystem root\n" - " --tldr Skip comments and empty lines\n" - " --config Show udev.conf rather than udev rules files\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - link); + return r; + help_cmdline("cat [OPTIONS...] [FILE...]"); + help_abstract("Show udev rules files."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("udevadm", "8"); return 0; } -static int parse_argv(int argc, char *argv[]) { - enum { - ARG_ROOT = 0x100, - ARG_TLDR, - ARG_CONFIG, - }; - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, 'V' }, - { "root", required_argument, NULL, ARG_ROOT }, - { "tldr", no_argument, NULL, ARG_TLDR }, - { "config", no_argument, NULL, ARG_CONFIG }, - {} - }; - - int r, c; +static int parse_argv(int argc, char *argv[], char ***remaining_args) { + int r; assert(argc >= 0); assert(argv); + assert(remaining_args); + + OptionParser opts = { argc, argv, .namespace = "udevadm-cat" }; - while ((c = getopt_long(argc, argv, "hVN:", options, NULL)) >= 0) + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'h': + + OPTION_NAMESPACE("udevadm-cat"): {} + + OPTION_COMMON_HELP: return help(); - case 'V': + + OPTION_COMMON_VERSION_WITH_HIDDEN_V: return print_version(); - case ARG_ROOT: - r = parse_path_argument(optarg, /* suppress_root= */ true, &arg_root); + + OPTION_LONG("root", "PATH", + "Operate on an alternate filesystem root"): + r = parse_path_argument(opts.arg, /* suppress_root= */ true, &arg_root); if (r < 0) return r; break; - case ARG_TLDR: + + OPTION_LONG("tldr", NULL, + "Skip comments and empty lines"): arg_cat_flags = CAT_TLDR; break; - case ARG_CONFIG: + + OPTION_LONG("config", NULL, + "Show udev.conf rather than udev rules files"): arg_config = true; break; - case '?': - return -EINVAL; - default: - assert_not_reached(); } - if (arg_config && optind < argc) + if (arg_config && option_parser_get_n_args(&opts) > 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Combination of --config and FILEs is not supported."); + *remaining_args = option_parser_get_args(&opts); return 1; } int verb_cat_main(int argc, char *argv[], uintptr_t _data, void *userdata) { + char **args = NULL; int r; - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -107,7 +99,7 @@ int verb_cat_main(int argc, char *argv[], uintptr_t _data, void *userdata) { CLEANUP_ARRAY(files, n_files, conf_file_free_array); - r = search_rules_files(strv_skip(argv, optind), arg_root, &files, &n_files); + r = search_rules_files(args, arg_root, &files, &n_files); if (r < 0) return r; From 765fc4125b871394580e988192c497ad5625ae90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 12:02:25 +0200 Subject: [PATCH 030/242] shared/options: add OPTION_COMMON_RESOLVE_NAMES --- src/shared/options.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/shared/options.h b/src/shared/options.h index f50fbdb3cb3e5..a171f5f6a43f7 100644 --- a/src/shared/options.h +++ b/src/shared/options.h @@ -150,10 +150,13 @@ typedef struct Option { "(file, provider:PROVIDER)") /* A form used in udev code for compatibility. -V is accepted but not documented. */ -#define OPTION_COMMON_VERSION_WITH_HIDDEN_V \ - OPTION_COMMON_VERSION: {} \ +#define OPTION_COMMON_VERSION_WITH_HIDDEN_V \ + OPTION_COMMON_VERSION: {} \ OPTION_SHORT('V', NULL, /* help= */ NULL) +#define OPTION_COMMON_RESOLVE_NAMES \ + OPTION('N', "resolve-names", "MODE", \ + "When to resolve users and groups (early, late, or never)") /* This is magically mapped to the beginning and end of the section */ extern const Option __start_SYSTEMD_OPTIONS[]; From bfc07f83da24a883da843ae25fefd8376a1e217d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 20:28:00 +0200 Subject: [PATCH 031/242] udev: fix stale optarg use Fixup for 8623980980d3798f26f23aa56c1491cfd6ceb7b2. This didn't cause any problems until the conversion away from getopt_long(). --- src/udev/udevadm-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/udev/udevadm-util.c b/src/udev/udevadm-util.c index c30af47ff7c73..7e2420a77e8a9 100644 --- a/src/udev/udevadm-util.c +++ b/src/udev/udevadm-util.c @@ -144,7 +144,7 @@ int parse_resolve_name_timing(const char *str, ResolveNameTiming *ret) { if (streq(str, "help")) return DUMP_STRING_TABLE(resolve_name_timing, ResolveNameTiming, _RESOLVE_NAME_TIMING_MAX); - ResolveNameTiming v = resolve_name_timing_from_string(optarg); + ResolveNameTiming v = resolve_name_timing_from_string(str); if (v < 0) return log_error_errno(v, "--resolve-names= must be 'early', 'late', or 'never'."); From 5893cf3dcf35f8017c15e34dacdc51a695163aec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 10:48:04 +0200 Subject: [PATCH 032/242] udev: convert udev-config.c to OPTION macros --timeout-signal is now documented (fixup for e209926778267cbd3e09ed8137bf45b7f370aed0). Co-developed-by: Claude Opus 4.7 --- src/udev/udev-config.c | 130 ++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 74 deletions(-) diff --git a/src/udev/udev-config.c b/src/udev/udev-config.c index 17deadfe76071..e234d6fe6d994 100644 --- a/src/udev/udev-config.c +++ b/src/udev/udev-config.c @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include #include #include "conf-parser.h" @@ -8,10 +7,12 @@ #include "daemon-util.h" #include "fd-util.h" #include "fileio.h" +#include "format-table.h" #include "hashmap.h" +#include "help-util.h" #include "limits-util.h" +#include "options.h" #include "parse-util.h" -#include "pretty-print.h" #include "proc-cmdline.h" #include "serialize.h" #include "signal-util.h" @@ -149,110 +150,91 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat } static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("systemd-udevd.service", "8", &link); + r = option_parser_get_help_table_ns("udevd", &options); if (r < 0) - return log_oom(); - - printf("%s [OPTIONS...]\n\n" - "Rule-based manager for device events and files.\n\n" - " -h --help Print this message\n" - " -V --version Print version of the program\n" - " -d --daemon Detach and run in the background\n" - " -D --debug Enable debug output\n" - " -c --children-max=INT Set maximum number of workers\n" - " -e --exec-delay=SECONDS Seconds to wait before executing RUN=\n" - " -t --event-timeout=SECONDS Seconds to wait before terminating an event\n" - " -N --resolve-names=early|late|never\n" - " When to resolve users and groups\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - link); + return r; + + help_cmdline("[OPTIONS...]"); + help_abstract("Rule-based manager for device events and files."); + + help_section("Options:"); + + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("systemd-udevd.service", "8"); return 0; } static int parse_argv(int argc, char *argv[], UdevConfig *config) { - enum { - ARG_TIMEOUT_SIGNAL, - }; - - static const struct option options[] = { - { "daemon", no_argument, NULL, 'd' }, - { "debug", no_argument, NULL, 'D' }, - { "children-max", required_argument, NULL, 'c' }, - { "exec-delay", required_argument, NULL, 'e' }, - { "event-timeout", required_argument, NULL, 't' }, - { "resolve-names", required_argument, NULL, 'N' }, - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, 'V' }, - { "timeout-signal", required_argument, NULL, ARG_TIMEOUT_SIGNAL }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); assert(config); - while ((c = getopt_long(argc, argv, "c:de:Dt:N:hV", options, NULL)) >= 0) { + OptionParser opts = { argc, argv, OPTION_PARSER_NORMAL, "udevd" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'd': + OPTION_NAMESPACE("udevd"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + printf("%s\n", GIT_VERSION); + return 0; + + OPTION('d', "daemon", NULL, "Detach and run in the background"): arg_daemonize = true; break; - case 'c': - r = safe_atou(optarg, &config->children_max); - if (r < 0) - log_warning_errno(r, "Failed to parse --children-max= value '%s', ignoring: %m", optarg); + + OPTION('D', "debug", NULL, "Enable debug output"): + arg_debug = true; + config->log_level = LOG_DEBUG; break; - case 'e': - r = parse_sec(optarg, &config->exec_delay_usec); + + OPTION('c', "children-max", "INT", "Set maximum number of workers"): + r = safe_atou(opts.arg, &config->children_max); if (r < 0) - log_warning_errno(r, "Failed to parse --exec-delay= value '%s', ignoring: %m", optarg); + log_warning_errno(r, "Failed to parse --children-max= value '%s', ignoring: %m", opts.arg); break; - case ARG_TIMEOUT_SIGNAL: - r = signal_from_string(optarg); - if (r <= 0) - log_warning_errno(r, "Failed to parse --timeout-signal= value '%s', ignoring: %m", optarg); - else - config->timeout_signal = r; - break; - case 't': - r = parse_sec(optarg, &config->timeout_usec); + OPTION('e', "exec-delay", "SECONDS", "Seconds to wait before executing RUN="): + r = parse_sec(opts.arg, &config->exec_delay_usec); if (r < 0) - log_warning_errno(r, "Failed to parse --event-timeout= value '%s', ignoring: %m", optarg); + log_warning_errno(r, "Failed to parse --exec-delay= value '%s', ignoring: %m", opts.arg); break; - case 'D': - arg_debug = true; - config->log_level = LOG_DEBUG; + + OPTION('t', "event-timeout", "SECONDS", "Seconds to wait before terminating an event"): + r = parse_sec(opts.arg, &config->timeout_usec); + if (r < 0) + log_warning_errno(r, "Failed to parse --event-timeout= value '%s', ignoring: %m", opts.arg); break; - case 'N': { - ResolveNameTiming t; - t = resolve_name_timing_from_string(optarg); + OPTION_COMMON_RESOLVE_NAMES: { + ResolveNameTiming t = resolve_name_timing_from_string(opts.arg); if (t < 0) - log_warning("Invalid --resolve-names= value '%s', ignoring.", optarg); + log_warning("Invalid --resolve-names= value '%s', ignoring.", opts.arg); else config->resolve_name_timing = t; break; } - case 'h': - return help(); - case 'V': - printf("%s\n", GIT_VERSION); - return 0; - case '?': - return -EINVAL; - default: - assert_not_reached(); + OPTION_LONG("timeout-signal", "SIGNAL", "Signal used when terminating an event"): + r = signal_from_string(opts.arg); + if (r <= 0) + log_warning_errno(r, "Failed to parse --timeout-signal= value '%s', ignoring: %m", opts.arg); + else + config->timeout_signal = r; + break; } - } return 1; } From ce4746f228085950b455c2c0c55a3ab9d17ba89e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:00:00 +0200 Subject: [PATCH 033/242] udevadm-hwdb: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-hwdb.c | 97 +++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/src/udev/udevadm-hwdb.c b/src/udev/udevadm-hwdb.c index 5810efefd8ce2..f4060673ebfe7 100644 --- a/src/udev/udevadm-hwdb.c +++ b/src/udev/udevadm-hwdb.c @@ -1,10 +1,12 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include +#include "format-table.h" +#include "help-util.h" #include "hwdb-util.h" #include "log.h" +#include "options.h" #include "udevadm.h" static const char *arg_test = NULL; @@ -14,65 +16,64 @@ static bool arg_update = false; static bool arg_strict = false; static int help(void) { - printf("%s hwdb [OPTIONS]\n\n" - " -h --help Print this message\n" - " -V --version Print version of the program\n" - " -u --update Update the hardware database\n" - " -s --strict When updating, return non-zero exit value on any parsing error\n" - " --usr Generate in " UDEVLIBEXECDIR " instead of /etc/udev\n" - " -t --test=MODALIAS Query database and print result\n" - " -r --root=PATH Alternative root path in the filesystem\n\n" - "NOTE:\n" - "The sub-command 'hwdb' is deprecated, and is left for backwards compatibility.\n" - "Please use systemd-hwdb instead.\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table_ns("udevadm-hwdb", &options); + if (r < 0) + return r; + help_cmdline("hwdb [OPTIONS]"); + help_abstract("Update or query the hardware database."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + printf("\nNOTE:\n" + "The sub-command 'hwdb' is deprecated, and is left for backwards compatibility.\n" + "Please use systemd-hwdb instead.\n"); return 0; } static int parse_argv(int argc, char *argv[]) { - enum { - ARG_USR = 0x100, - }; - - static const struct option options[] = { - { "update", no_argument, NULL, 'u' }, - { "usr", no_argument, NULL, ARG_USR }, - { "strict", no_argument, NULL, 's' }, - { "test", required_argument, NULL, 't' }, - { "root", required_argument, NULL, 'r' }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - {} - }; - - int c; - - while ((c = getopt_long(argc, argv, "ust:r:Vh", options, NULL)) >= 0) + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv, .namespace = "udevadm-hwdb" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'u': + + OPTION_NAMESPACE("udevadm-hwdb"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('u', "update", NULL, "Update the hardware database"): arg_update = true; break; - case ARG_USR: - arg_hwdb_bin_dir = UDEVLIBEXECDIR; - break; - case 's': + + OPTION('s', "strict", NULL, + "When updating, return non-zero exit value on any parsing error"): arg_strict = true; break; - case 't': - arg_test = optarg; + + OPTION_LONG("usr", NULL, + "Generate in " UDEVLIBEXECDIR " instead of /etc/udev"): + arg_hwdb_bin_dir = UDEVLIBEXECDIR; + break; + + OPTION('t', "test", "MODALIAS", "Query database and print result"): + arg_test = opts.arg; break; - case 'r': - arg_root = optarg; + + OPTION('r', "root", "PATH", "Alternative root path in the filesystem"): + arg_root = opts.arg; break; - case 'V': - return print_version(); - case 'h': - return help(); - case '?': - return -EINVAL; - default: - assert_not_reached(); } return 1; From 57516477999d09f27be689a01c79360b32fb370f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:01:28 +0200 Subject: [PATCH 034/242] udevadm-test-builtin: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-test-builtin.c | 70 ++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/src/udev/udevadm-test-builtin.c b/src/udev/udevadm-test-builtin.c index f17df9a7d51a2..31ac569957017 100644 --- a/src/udev/udevadm-test-builtin.c +++ b/src/udev/udevadm-test-builtin.c @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include -#include - #include "device-private.h" #include "device-util.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" +#include "options.h" #include "udev-builtin.h" #include "udevadm.h" #include "udevadm-util.h" @@ -15,51 +15,57 @@ static const char *arg_command = NULL; static const char *arg_syspath = NULL; static int help(void) { - printf("%s test-builtin [OPTIONS] COMMAND DEVPATH\n\n" - "Test a built-in command.\n\n" - " -h --help Print this message\n" - " -V --version Print version of the program\n" - " -a --action=ACTION|help Set action string\n" - "\nCommands:\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; - udev_builtin_list(); + r = option_parser_get_help_table_ns("udevadm-test-builtin", &options); + if (r < 0) + return r; + help_cmdline("test-builtin [OPTIONS] COMMAND DEVPATH"); + help_abstract("Test a built-in command."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_section("Commands:"); + udev_builtin_list(); return 0; } static int parse_argv(int argc, char *argv[]) { - static const struct option options[] = { - { "action", required_argument, NULL, 'a' }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - {} - }; + int r; - int r, c; + assert(argc >= 0); + assert(argv); - while ((c = getopt_long(argc, argv, "a:Vh", options, NULL)) >= 0) + OptionParser opts = { argc, argv, .namespace = "udevadm-test-builtin" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'a': - r = parse_device_action(optarg, &arg_action); + + OPTION_NAMESPACE("udevadm-test-builtin"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('a', "action", "ACTION|help", "Set action string"): + r = parse_device_action(opts.arg, &arg_action); if (r <= 0) return r; break; - case 'V': - return print_version(); - case 'h': - return help(); - case '?': - return -EINVAL; - default: - assert_not_reached(); } - if (argc != optind + 2) + if (option_parser_get_n_args(&opts) != 2) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected two arguments: command string and device path."); - arg_command = ASSERT_PTR(argv[optind]); - arg_syspath = ASSERT_PTR(argv[optind+1]); + char **args = option_parser_get_args(&opts); + arg_command = ASSERT_PTR(args[0]); + arg_syspath = ASSERT_PTR(args[1]); return 1; } From 12e5e0e90381b79f1d58d86f9ec06973f6b9b9c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:16:04 +0200 Subject: [PATCH 035/242] udevadm-verify: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-verify.c | 93 +++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 53 deletions(-) diff --git a/src/udev/udevadm-verify.c b/src/udev/udevadm-verify.c index 6af7f06ab05fe..1ecc1fbee9c78 100644 --- a/src/udev/udevadm-verify.c +++ b/src/udev/udevadm-verify.c @@ -1,16 +1,17 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include #include #include "alloc-util.h" +#include "ansi-color.h" #include "conf-files.h" #include "errno-util.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" +#include "options.h" #include "parse-argument.h" -#include "pretty-print.h" #include "static-destruct.h" -#include "strv.h" #include "udev-rules.h" #include "udevadm.h" #include "udevadm-util.h" @@ -23,81 +24,66 @@ static bool arg_style = true; STATIC_DESTRUCTOR_REGISTER(arg_root, freep); static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("udevadm", "8", &link); + r = option_parser_get_help_table_ns("udevadm-verify", &options); if (r < 0) - return log_oom(); - - printf("%s verify [OPTIONS] [FILE...]\n" - "\n%sVerify udev rules files.%s\n\n" - " -h --help Show this help\n" - " -V --version Show package version\n" - " -N --resolve-names=early|late|never When to resolve names\n" - " --root=PATH Operate on an alternate filesystem root\n" - " --no-summary Do not show summary\n" - " --no-style Ignore style issues\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - link); + return r; + + help_cmdline("verify [OPTIONS] [FILE...]"); + help_abstract("Verify udev rules files."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("udevadm", "8"); return 0; } -static int parse_argv(int argc, char *argv[]) { - enum { - ARG_ROOT = 0x100, - ARG_NO_SUMMARY, - ARG_NO_STYLE, - }; - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, 'V' }, - { "resolve-names", required_argument, NULL, 'N' }, - { "root", required_argument, NULL, ARG_ROOT }, - { "no-summary", no_argument, NULL, ARG_NO_SUMMARY }, - { "no-style", no_argument, NULL, ARG_NO_STYLE }, - {} - }; - - int r, c; +static int parse_argv(int argc, char *argv[], char ***remaining_args) { + int r; assert(argc >= 0); assert(argv); + assert(remaining_args); + + OptionParser opts = { argc, argv, .namespace = "udevadm-verify" }; - while ((c = getopt_long(argc, argv, "hVN:", options, NULL)) >= 0) + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'h': + + OPTION_NAMESPACE("udevadm-verify"): {} + + OPTION_COMMON_HELP: return help(); - case 'V': + + OPTION('V', "version", NULL, "Show package version"): return print_version(); - case 'N': - r = parse_resolve_name_timing(optarg, &arg_resolve_name_timing); + + OPTION_COMMON_RESOLVE_NAMES: + r = parse_resolve_name_timing(opts.arg, &arg_resolve_name_timing); if (r <= 0) return r; break; - case ARG_ROOT: - r = parse_path_argument(optarg, /* suppress_root= */ true, &arg_root); + + OPTION_LONG("root", "PATH", "Operate on an alternate filesystem root"): + r = parse_path_argument(opts.arg, /* suppress_root= */ true, &arg_root); if (r < 0) return r; break; - case ARG_NO_SUMMARY: + + OPTION_LONG("no-summary", NULL, "Do not show summary"): arg_summary = false; break; - case ARG_NO_STYLE: + OPTION_LONG("no-style", NULL, "Ignore style issues"): arg_style = false; break; - - case '?': - return -EINVAL; - default: - assert_not_reached(); } + *remaining_args = option_parser_get_args(&opts); return 1; } @@ -158,9 +144,10 @@ static int verify_rules(UdevRules *rules, ConfFile * const *files, size_t n_file int verb_verify_main(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(udev_rules_freep) UdevRules *rules = NULL; + char **args = NULL; int r; - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -173,7 +160,7 @@ int verb_verify_main(int argc, char *argv[], uintptr_t _data, void *userdata) { CLEANUP_ARRAY(files, n_files, conf_file_free_array); - r = search_rules_files(strv_skip(argv, optind), arg_root, &files, &n_files); + r = search_rules_files(args, arg_root, &files, &n_files); if (r < 0) return r; From a517a6297e6dfc98cfd823baf692be3553088899 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:17:14 +0200 Subject: [PATCH 036/242] udevadm-test: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-test.c | 95 +++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/src/udev/udevadm-test.c b/src/udev/udevadm-test.c index f3ac39717e946..a7841333016f9 100644 --- a/src/udev/udevadm-test.c +++ b/src/udev/udevadm-test.c @@ -3,7 +3,6 @@ * Copyright © 2003-2004 Greg Kroah-Hartman */ -#include #include #include @@ -12,7 +11,10 @@ #include "alloc-util.h" #include "device-private.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" +#include "options.h" #include "parse-argument.h" #include "static-destruct.h" #include "strv.h" @@ -33,55 +35,59 @@ static sd_json_format_flags_t arg_json_format_flags = SD_JSON_FORMAT_OFF; STATIC_DESTRUCTOR_REGISTER(arg_extra_rules_dir, strv_freep); static int help(void) { + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table_ns("udevadm-test", &options); + if (r < 0) + return r; - printf("%s test [OPTIONS] DEVPATH\n\n" - "Test an event run.\n\n" - " -h --help Show this help\n" - " -V --version Show package version\n" - " -a --action=ACTION|help Set action string\n" - " -N --resolve-names=early|late|never When to resolve names\n" - " -D --extra-rules-dir=DIR Also load rules from the directory\n" - " -v --verbose Show verbose logs\n" - " --json=pretty|short|off Generate JSON output\n", - program_invocation_short_name); + help_cmdline("test [OPTIONS] DEVPATH"); + help_abstract("Test an event run."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("udevadm", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - enum { - ARG_JSON = 0x100, - }; - - static const struct option options[] = { - { "action", required_argument, NULL, 'a' }, - { "resolve-names", required_argument, NULL, 'N' }, - { "extra-rules-dir", required_argument, NULL, 'D' }, - { "verbose", no_argument, NULL, 'v' }, - { "json", required_argument, NULL, ARG_JSON }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - {} - }; - - int r, c; - - while ((c = getopt_long(argc, argv, "a:N:D:vVh", options, NULL)) >= 0) + int r; + + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv, .namespace = "udevadm-test" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'a': - r = parse_device_action(optarg, &arg_action); + + OPTION_NAMESPACE("udevadm-test"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('a', "action", "ACTION|help", "Set action string"): + r = parse_device_action(opts.arg, &arg_action); if (r <= 0) return r; break; - case 'N': - r = parse_resolve_name_timing(optarg, &arg_resolve_name_timing); + + OPTION_COMMON_RESOLVE_NAMES: + r = parse_resolve_name_timing(opts.arg, &arg_resolve_name_timing); if (r <= 0) return r; break; - case 'D': { + + OPTION('D', "extra-rules-dir", "DIR", "Also load rules from the directory"): { _cleanup_free_ char *p = NULL; - r = parse_path_argument(optarg, /* suppress_root= */ false, &p); + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &p); if (r < 0) return r; @@ -90,25 +96,20 @@ static int parse_argv(int argc, char *argv[]) { return log_oom(); break; } - case 'v': + + OPTION('v', "verbose", NULL, "Show verbose logs"): arg_verbose = true; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; break; - case 'V': - return print_version(); - case 'h': - return help(); - case '?': - return -EINVAL; - default: - assert_not_reached(); } - arg_syspath = argv[optind]; + char **args = option_parser_get_args(&opts); + arg_syspath = args[0]; if (!arg_syspath) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "syspath parameter missing."); From 5a2144f9bc78c41d670791f7c8063a4574296f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:22:32 +0200 Subject: [PATCH 037/242] udevadm-monitor: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-monitor.c | 95 ++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/src/udev/udevadm-monitor.c b/src/udev/udevadm-monitor.c index 6f33cc3710cca..0c165241a2e3d 100644 --- a/src/udev/udevadm-monitor.c +++ b/src/udev/udevadm-monitor.c @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include - #include "sd-device.h" #include "sd-event.h" @@ -9,8 +7,11 @@ #include "device-monitor-private.h" #include "device-private.h" #include "device-util.h" +#include "format-table.h" #include "format-util.h" #include "hashmap.h" +#include "help-util.h" +#include "options.h" #include "set.h" #include "static-destruct.h" #include "string-util.h" @@ -99,60 +100,70 @@ static int setup_monitor(MonitorNetlinkGroup sender, sd_event *event, sd_device_ } static int help(void) { - printf("%s monitor [OPTIONS]\n\n" - "Listen to kernel and udev events.\n\n" - " -h --help Show this help\n" - " -V --version Show package version\n" - " -p --property Print the event properties\n" - " -k --kernel Print kernel uevents\n" - " -u --udev Print udev events\n" - " -s --subsystem-match=SUBSYSTEM[/DEVTYPE] Filter events by subsystem\n" - " -t --tag-match=TAG Filter events by tag\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; + r = option_parser_get_help_table_ns("udevadm-monitor", &options); + if (r < 0) + return r; + + help_cmdline("monitor [OPTIONS]"); + help_abstract("Listen to kernel and udev events."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("udevadm", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - static const struct option options[] = { - { "property", no_argument, NULL, 'p' }, - { "environment", no_argument, NULL, 'e' }, /* alias for -p */ - { "kernel", no_argument, NULL, 'k' }, - { "udev", no_argument, NULL, 'u' }, - { "subsystem-match", required_argument, NULL, 's' }, - { "tag-match", required_argument, NULL, 't' }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - {} - }; - - int r, c; - - while ((c = getopt_long(argc, argv, "pekus:t:Vh", options, NULL)) >= 0) + int r; + + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv, .namespace = "udevadm-monitor" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'p': - case 'e': + + OPTION_NAMESPACE("udevadm-monitor"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('e', "environment", NULL, /* help= */ NULL): {} /* hidden alias for -p */ + OPTION('p', "property", NULL, "Print the event properties"): arg_show_property = true; break; - case 'k': + + OPTION('k', "kernel", NULL, "Print kernel uevents"): arg_print_kernel = true; break; - case 'u': + + OPTION('u', "udev", NULL, "Print udev events"): arg_print_udev = true; break; - case 's': { + + OPTION('s', "subsystem-match", "SUBSYSTEM[/DEVTYPE]", + "Filter events by subsystem"): { _cleanup_free_ char *subsystem = NULL, *devtype = NULL; const char *slash; - slash = strchr(optarg, '/'); + slash = strchr(opts.arg, '/'); if (slash) { devtype = strdup(slash + 1); if (!devtype) return log_oom(); - subsystem = strndup(optarg, slash - optarg); + subsystem = strndup(opts.arg, slash - opts.arg); } else - subsystem = strdup(optarg); + subsystem = strdup(opts.arg); if (!subsystem) return log_oom(); @@ -165,20 +176,12 @@ static int parse_argv(int argc, char *argv[]) { TAKE_PTR(devtype); break; } - case 't': - r = set_put_strdup(&arg_tag_filter, optarg); + + OPTION('t', "tag-match", "TAG", "Filter events by tag"): + r = set_put_strdup(&arg_tag_filter, opts.arg); if (r < 0) return log_oom(); break; - - case 'V': - return print_version(); - case 'h': - return help(); - case '?': - return -EINVAL; - default: - assert_not_reached(); } if (!arg_print_kernel && !arg_print_udev) { From ed2b92e2057a4bdae32ab7c81480a3a6c70e2487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:28:35 +0200 Subject: [PATCH 038/242] udevadm-settle: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-settle.c | 89 +++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/src/udev/udevadm-settle.c b/src/udev/udevadm-settle.c index b71759dc818e6..1292462d28c25 100644 --- a/src/udev/udevadm-settle.c +++ b/src/udev/udevadm-settle.c @@ -4,7 +4,6 @@ * Copyright © 2009 Scott James Remnant */ -#include #include #include "sd-bus.h" @@ -14,6 +13,9 @@ #include "alloc-util.h" #include "bus-util.h" +#include "format-table.h" +#include "help-util.h" +#include "options.h" #include "path-util.h" #include "string-util.h" #include "strv.h" @@ -28,60 +30,63 @@ static usec_t arg_timeout_usec = 120 * USEC_PER_SEC; static const char *arg_exists = NULL; static int help(void) { - printf("%s settle [OPTIONS]\n\n" - "Wait for pending udev events.\n\n" - " -h --help Show this help\n" - " -V --version Show package version\n" - " -t --timeout=SEC Maximum time to wait for events\n" - " -E --exit-if-exists=FILE Stop waiting if file exists\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table_ns("udevadm-settle", &options); + if (r < 0) + return r; + help_cmdline("settle [OPTIONS]"); + help_abstract("Wait for pending udev events."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("udevadm", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - static const struct option options[] = { - { "timeout", required_argument, NULL, 't' }, - { "exit-if-exists", required_argument, NULL, 'E' }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - { "seq-start", required_argument, NULL, 's' }, /* removed */ - { "seq-end", required_argument, NULL, 'e' }, /* removed */ - { "quiet", no_argument, NULL, 'q' }, /* removed */ - {} - }; - - int c, r; - - while ((c = getopt_long(argc, argv, "t:E:Vhs:e:q", options, NULL)) >= 0) { + int r; + + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv, .namespace = "udevadm-settle" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 't': - r = parse_sec(optarg, &arg_timeout_usec); + + OPTION_NAMESPACE("udevadm-settle"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('t', "timeout", "SEC", "Maximum time to wait for events"): + r = parse_sec(opts.arg, &arg_timeout_usec); if (r < 0) - return log_error_errno(r, "Failed to parse timeout value '%s': %m", optarg); + return log_error_errno(r, "Failed to parse timeout value '%s': %m", opts.arg); break; - case 'E': - if (!path_is_valid(optarg)) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid path: %s", optarg); - arg_exists = optarg; + OPTION('E', "exit-if-exists", "FILE", "Stop waiting if file exists"): + if (!path_is_valid(opts.arg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid path: %s", opts.arg); + + arg_exists = opts.arg; break; - case 'V': - return print_version(); - case 'h': - return help(); - case 's': - case 'e': - case 'q': + + OPTION('s', "seq-start", "ARG", NULL): {} /* removed */ + OPTION('e', "seq-end", "ARG", NULL): {} /* removed */ + OPTION('q', "quiet", NULL, NULL): /* removed */ return log_info_errno(SYNTHETIC_ERRNO(EINVAL), "Option -%c no longer supported.", - c); - case '?': - return -EINVAL; - default: - assert_not_reached(); + opts.opt->short_code); } - } return 1; } From a00de0b648aa9e62daef97c8d5461be8e8d55afc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:31:51 +0200 Subject: [PATCH 039/242] udevadm-lock: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-lock.c | 91 ++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 56 deletions(-) diff --git a/src/udev/udevadm-lock.c b/src/udev/udevadm-lock.c index 483b64973d401..cebce08007eb0 100644 --- a/src/udev/udevadm-lock.c +++ b/src/udev/udevadm-lock.c @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include #include #include #include @@ -9,11 +8,14 @@ #include "device-util.h" #include "fd-util.h" #include "fdset.h" +#include "format-table.h" +#include "glyph-util.h" #include "hash-funcs.h" +#include "help-util.h" #include "lock-util.h" +#include "options.h" #include "path-util.h" #include "pidref.h" -#include "pretty-print.h" #include "process-util.h" #include "signal-util.h" #include "sort-util.h" @@ -33,70 +35,52 @@ STATIC_DESTRUCTOR_REGISTER(arg_backing, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_cmdline, strv_freep); static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("udevadm", "8", &link); + r = option_parser_get_help_table_ns("udevadm-lock", &options); if (r < 0) - return log_oom(); + return r; - printf("%s [OPTIONS...] COMMAND\n" - "%s [OPTIONS...] --print\n" - "\n%sLock a block device and run a command.%s\n\n" - " -h --help Print this message\n" - " -V --version Print version of the program\n" - " -d --device=DEVICE Block device to lock\n" - " -b --backing=FILE File whose backing block device to lock\n" - " -t --timeout=SECS Block at most the specified time waiting for lock\n" - " -p --print Only show which block device the lock would be taken on\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - link); + help_cmdline("lock [OPTIONS...] COMMAND"); + help_cmdline("lock [OPTIONS...] --print"); + help_abstract("Lock a block device and run a command."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("udevadm", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, 'V' }, - { "device", required_argument, NULL, 'd' }, - { "backing", required_argument, NULL, 'b' }, - { "timeout", required_argument, NULL, 't' }, - { "print", no_argument, NULL, 'p' }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); - /* Resetting to 0 forces the invocation of an internal initialization routine of getopt_long() - * that checks for GNU extensions in optstring ('-' or '+' at the beginning). */ - optind = 0; - while ((c = getopt_long(argc, argv, arg_print ? "hVd:b:t:p" : "+hVd:b:t:p", options, NULL)) >= 0) + OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION, "udevadm-lock" }; + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'h': + OPTION_NAMESPACE("udevadm-lock"): {} + + OPTION_COMMON_HELP: return help(); - case 'V': + OPTION('V', "version", NULL, "Show package version"): return print_version(); - case 'd': - case 'b': { + OPTION('d', "device", "DEVICE", "Block device to lock"): {} /* fall through */ + OPTION('b', "backing", "FILE", "File whose backing block device to lock"): { _cleanup_free_ char *s = NULL; - char ***l = c == 'd' ? &arg_devices : &arg_backing; + char ***l = opts.opt->short_code == 'd' ? &arg_devices : &arg_backing; - r = path_make_absolute_cwd(optarg, &s); + r = path_make_absolute_cwd(opts.arg, &s); if (r < 0) - return log_error_errno(r, "Failed to make path '%s' absolute: %m", optarg); + return log_error_errno(r, "Failed to make path '%s' absolute: %m", opts.arg); path_simplify(s); @@ -107,31 +91,26 @@ static int parse_argv(int argc, char *argv[]) { break; } - case 't': - r = parse_sec(optarg, &arg_timeout_usec); + OPTION('t', "timeout", "SECS", "Block at most the specified time waiting for lock"): + r = parse_sec(opts.arg, &arg_timeout_usec); if (r < 0) - return log_error_errno(r, "Failed to parse --timeout= parameter: %s", optarg); + return log_error_errno(r, "Failed to parse --timeout= parameter: %s", opts.arg); break; - case 'p': + OPTION('p', "print", NULL, "Only show which block device the lock would be taken on"): arg_print = true; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } + char **args = option_parser_get_args(&opts); if (arg_print) { - if (optind != argc) + if (!strv_isempty(args)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No arguments expected."); } else { - if (optind + 1 > argc) + if (strv_isempty(args)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too few arguments, command to execute."); - arg_cmdline = strv_copy(argv + optind); + arg_cmdline = strv_copy(args); if (!arg_cmdline) return log_oom(); } From 0257deff36d1045d31144976a9c4b58e90d7bc1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:33:16 +0200 Subject: [PATCH 040/242] udevadm-control: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-control.c | 141 +++++++++++++++---------------------- 1 file changed, 56 insertions(+), 85 deletions(-) diff --git a/src/udev/udevadm-control.c b/src/udev/udevadm-control.c index 964f721731ceb..ed586d5542d1f 100644 --- a/src/udev/udevadm-control.c +++ b/src/udev/udevadm-control.c @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include -#include #include #include "creds-util.h" #include "errno-util.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" +#include "options.h" #include "parse-argument.h" #include "parse-util.h" #include "static-destruct.h" @@ -47,151 +48,121 @@ static bool arg_has_control_commands(void) { } static int help(void) { - printf("%s control OPTION\n\n" - "Control the udev daemon.\n\n" - " -h --help Show this help\n" - " -V --version Show package version\n" - " -e --exit Instruct the daemon to cleanup and exit\n" - " -l --log-level=LEVEL Set the udev log level for the daemon\n" - " -s --stop-exec-queue Do not execute events, queue only\n" - " -S --start-exec-queue Execute events, flush queue\n" - " -R --reload Reload rules and databases\n" - " -p --property=KEY=VALUE Set a global property for all events\n" - " -m --children-max=N Maximum number of children\n" - " --ping Wait for udev to respond to a ping message\n" - " --trace=BOOL Enable/disable trace logging\n" - " --revert Revert previously set configurations\n" - " -t --timeout=SECONDS Maximum time to block for a reply\n" - " --load-credentials Load udev rules from credentials\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table_ns("udevadm-control", &options); + if (r < 0) + return r; + + help_cmdline("control OPTION"); + help_abstract("Control the udev daemon."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("udevadm", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - enum { - ARG_PING = 0x100, - ARG_TRACE, - ARG_REVERT, - ARG_LOAD_CREDENTIALS, - }; - - static const struct option options[] = { - { "exit", no_argument, NULL, 'e' }, - { "log-level", required_argument, NULL, 'l' }, - { "log-priority", required_argument, NULL, 'l' }, /* for backward compatibility */ - { "stop-exec-queue", no_argument, NULL, 's' }, - { "start-exec-queue", no_argument, NULL, 'S' }, - { "reload", no_argument, NULL, 'R' }, - { "reload-rules", no_argument, NULL, 'R' }, /* alias for -R */ - { "property", required_argument, NULL, 'p' }, - { "env", required_argument, NULL, 'p' }, /* alias for -p */ - { "children-max", required_argument, NULL, 'm' }, - { "ping", no_argument, NULL, ARG_PING }, - { "trace", required_argument, NULL, ARG_TRACE }, - { "revert", no_argument, NULL, ARG_REVERT }, - { "timeout", required_argument, NULL, 't' }, - { "load-credentials", no_argument, NULL, ARG_LOAD_CREDENTIALS }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "el:sSRp:m:t:Vh", options, NULL)) >= 0) + OptionParser opts = { argc, argv, .namespace = "udevadm-control" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'e': + OPTION_NAMESPACE("udevadm-control"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('e', "exit", NULL, "Instruct the daemon to cleanup and exit"): arg_exit = true; break; - case 'l': - arg_log_level = log_level_from_string(optarg); + OPTION_LONG("log-priority", "LEVEL", NULL): {} /* backward compat alias for --log-level */ + OPTION('l', "log-level", "LEVEL", "Set the udev log level for the daemon"): + arg_log_level = log_level_from_string(opts.arg); if (arg_log_level < 0) - return log_error_errno(arg_log_level, "Failed to parse log level '%s': %m", optarg); + return log_error_errno(arg_log_level, "Failed to parse log level '%s': %m", opts.arg); break; - case 's': + OPTION('s', "stop-exec-queue", NULL, "Do not execute events, queue only"): arg_start_exec_queue = false; break; - case 'S': + OPTION('S', "start-exec-queue", NULL, "Execute events, flush queue"): arg_start_exec_queue = true; break; - case 'R': + OPTION_LONG("reload-rules", NULL, NULL): {} /* hidden alias for -R */ + OPTION('R', "reload", NULL, "Reload rules and databases"): arg_reload = true; break; - case 'p': - if (!strchr(optarg, '=')) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "expect = instead of '%s'", optarg); + OPTION_LONG("env", "KEY=VALUE", NULL): {} /* hidden alias for -p */ + OPTION('p', "property", "KEY=VALUE", "Set a global property for all events"): + if (!strchr(opts.arg, '=')) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "expect = instead of '%s'", opts.arg); - r = strv_extend(&arg_env, optarg); + r = strv_extend(&arg_env, opts.arg); if (r < 0) return log_error_errno(r, "Failed to extend environment: %m"); break; - case 'm': { + OPTION('m', "children-max", "N", "Maximum number of children"): { unsigned i; - r = safe_atou(optarg, &i); + r = safe_atou(opts.arg, &i); if (r < 0) - return log_error_errno(r, "Failed to parse maximum number of children '%s': %m", optarg); + return log_error_errno(r, "Failed to parse maximum number of children '%s': %m", opts.arg); arg_max_children = i; break; } - case ARG_PING: + OPTION_LONG("ping", NULL, "Wait for udev to respond to a ping message"): arg_ping = true; break; - case ARG_TRACE: - r = parse_boolean_argument("--trace=", optarg, NULL); + OPTION_LONG("trace", "BOOL", "Enable/disable trace logging"): + r = parse_boolean_argument("--trace=", opts.arg, NULL); if (r < 0) return r; arg_trace = r; break; - case ARG_REVERT: + OPTION_LONG("revert", NULL, "Revert previously set configurations"): arg_revert = true; break; - case 't': - r = parse_sec(optarg, &arg_timeout); + OPTION('t', "timeout", "SECONDS", "Maximum time to block for a reply"): + r = parse_sec(opts.arg, &arg_timeout); if (r < 0) - return log_error_errno(r, "Failed to parse timeout value '%s': %m", optarg); + return log_error_errno(r, "Failed to parse timeout value '%s': %m", opts.arg); break; - case ARG_LOAD_CREDENTIALS: + OPTION_LONG("load-credentials", NULL, "Load udev rules from credentials"): arg_load_credentials = true; break; - - case 'V': - return print_version(); - - case 'h': - return help(); - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } if (!arg_has_control_commands() && !arg_load_credentials) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No control command option is specified."); - if (optind < argc) + if (option_parser_get_n_args(&opts) > 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Extraneous argument: %s", argv[optind]); + "This subprogram takes no positional arguments."); return 1; } From 5656636a417cd859cd5717798719e220962097ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:34:15 +0200 Subject: [PATCH 041/242] udevadm-wait: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-wait.c | 98 +++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/src/udev/udevadm-wait.c b/src/udev/udevadm-wait.c index 0e285fc36b247..a361bac61a3a7 100644 --- a/src/udev/udevadm-wait.c +++ b/src/udev/udevadm-wait.c @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include -#include #include #include @@ -10,7 +8,10 @@ #include "device-monitor-private.h" #include "device-util.h" #include "event-util.h" +#include "format-table.h" #include "fs-util.h" +#include "help-util.h" +#include "options.h" #include "parse-util.h" #include "path-util.h" #include "static-destruct.h" @@ -297,79 +298,72 @@ static int setup_periodic_timer(sd_event *event) { } static int help(void) { - printf("%s wait [OPTIONS] DEVICE [DEVICE…]\n\n" - "Wait for devices or device symlinks being created.\n\n" - " -h --help Print this message\n" - " -V --version Print version of the program\n" - " -t --timeout=SEC Maximum time to wait for the device\n" - " --initialized=BOOL Wait for devices being initialized by systemd-udevd\n" - " --removed Wait for devices being removed\n" - " --settle Also wait for all queued events being processed\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table_ns("udevadm-wait", &options); + if (r < 0) + return r; + + help_cmdline("wait [OPTIONS] DEVICE [DEVICE…]"); + help_abstract("Wait for devices or device symlinks being created."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("udevadm", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - enum { - ARG_INITIALIZED = 0x100, - ARG_REMOVED, - ARG_SETTLE, - }; - - static const struct option options[] = { - { "timeout", required_argument, NULL, 't' }, - { "initialized", required_argument, NULL, ARG_INITIALIZED }, - { "removed", no_argument, NULL, ARG_REMOVED }, - { "settle", no_argument, NULL, ARG_SETTLE }, - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, 'V' }, - {} - }; - - int c, r; - - while ((c = getopt_long(argc, argv, "t:hV", options, NULL)) >= 0) + int r; + + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv, .namespace = "udevadm-wait" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 't': - r = parse_sec(optarg, &arg_timeout_usec); + + OPTION_NAMESPACE("udevadm-wait"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('t', "timeout", "SEC", "Maximum time to wait for the device"): + r = parse_sec(opts.arg, &arg_timeout_usec); if (r < 0) - return log_error_errno(r, "Failed to parse -t/--timeout= parameter: %s", optarg); + return log_error_errno(r, "Failed to parse -t/--timeout= parameter: %s", opts.arg); break; - case ARG_INITIALIZED: - r = parse_boolean(optarg); + OPTION_LONG("initialized", "BOOL", + "Wait for devices being initialized by systemd-udevd"): + r = parse_boolean(opts.arg); if (r < 0) - return log_error_errno(r, "Failed to parse --initialized= parameter: %s", optarg); + return log_error_errno(r, "Failed to parse --initialized= parameter: %s", opts.arg); arg_wait_until = r ? WAIT_UNTIL_INITIALIZED : WAIT_UNTIL_ADDED; break; - case ARG_REMOVED: + OPTION_LONG("removed", NULL, "Wait for devices being removed"): arg_wait_until = WAIT_UNTIL_REMOVED; break; - case ARG_SETTLE: + OPTION_LONG("settle", NULL, "Also wait for all queued events being processed"): arg_settle = true; break; - - case 'V': - return print_version(); - - case 'h': - return help(); - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - if (optind >= argc) + char **args = option_parser_get_args(&opts); + if (strv_isempty(args)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too few arguments, expected at least one device path or device symlink."); - arg_devices = strv_copy(argv + optind); + arg_devices = strv_copy(args); if (!arg_devices) return log_oom(); From 222b417494092ed677ebfd2349605d52ae00d87d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:36:00 +0200 Subject: [PATCH 042/242] udevadm-trigger: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-trigger.c | 220 +++++++++++++++---------------------- 1 file changed, 89 insertions(+), 131 deletions(-) diff --git a/src/udev/udevadm-trigger.c b/src/udev/udevadm-trigger.c index afa6a84262084..62ccba37c5b8d 100644 --- a/src/udev/udevadm-trigger.c +++ b/src/udev/udevadm-trigger.c @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#include #include #include "sd-device.h" @@ -10,7 +9,10 @@ #include "device-enumerator-private.h" #include "device-private.h" #include "device-util.h" +#include "format-table.h" +#include "help-util.h" #include "id128-util.h" +#include "options.h" #include "set.h" #include "static-destruct.h" #include "string-table.h" @@ -320,214 +322,170 @@ static int setup_matches(sd_device_enumerator *e) { } static int help(void) { - printf("%s trigger [OPTIONS] DEVPATH\n\n" - "Request events from the kernel.\n\n" - " -h --help Show this help\n" - " -V --version Show package version\n" - " -v --verbose Print the list of devices while running\n" - " -n --dry-run Do not actually trigger the events\n" - " -q --quiet Suppress error logging in triggering events\n" - " -t --type= Type of events to trigger\n" - " devices sysfs devices (default)\n" - " subsystems sysfs subsystems and drivers\n" - " all sysfs devices, subsystems, and drivers\n" - " -c --action=ACTION|help Event action value, default is \"change\"\n" - " -s --subsystem-match=SUBSYSTEM Trigger devices from a matching subsystem\n" - " -S --subsystem-nomatch=SUBSYSTEM Exclude devices from a matching subsystem\n" - " -a --attr-match=FILE[=VALUE] Trigger devices with a matching attribute\n" - " -A --attr-nomatch=FILE[=VALUE] Exclude devices with a matching attribute\n" - " -p --property-match=KEY=VALUE Trigger devices with a matching property\n" - " -g --tag-match=TAG Trigger devices with a matching tag\n" - " -y --sysname-match=NAME Trigger devices with this /sys path\n" - " --name-match=NAME Trigger devices with this /dev name\n" - " -b --parent-match=NAME Trigger devices with that parent device\n" - " --include-parents Trigger parent devices of found devices\n" - " --initialized-match Trigger devices that are already initialized\n" - " --initialized-nomatch Trigger devices that are not initialized yet\n" - " -w --settle Wait for the triggered events to complete\n" - " --wait-daemon[=SECONDS] Wait for udevd daemon to be initialized\n" - " before triggering uevents\n" - " --uuid Print synthetic uevent UUID\n" - " --prioritized-subsystem=SUBSYSTEM[,SUBSYSTEM…]\n" - " Trigger devices from a matching subsystem first\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table_ns("udevadm-trigger", &options); + if (r < 0) + return r; + + help_cmdline("trigger [OPTIONS] DEVPATH"); + help_abstract("Request events from the kernel."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("udevadm", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - enum { - ARG_NAME = 0x100, - ARG_PING, - ARG_UUID, - ARG_PRIORITIZED_SUBSYSTEM, - ARG_INITIALIZED_MATCH, - ARG_INITIALIZED_NOMATCH, - ARG_INCLUDE_PARENTS, - }; - - static const struct option options[] = { - { "verbose", no_argument, NULL, 'v' }, - { "dry-run", no_argument, NULL, 'n' }, - { "quiet", no_argument, NULL, 'q' }, - { "type", required_argument, NULL, 't' }, - { "action", required_argument, NULL, 'c' }, - { "subsystem-match", required_argument, NULL, 's' }, - { "subsystem-nomatch", required_argument, NULL, 'S' }, - { "attr-match", required_argument, NULL, 'a' }, - { "attr-nomatch", required_argument, NULL, 'A' }, - { "property-match", required_argument, NULL, 'p' }, - { "tag-match", required_argument, NULL, 'g' }, - { "sysname-match", required_argument, NULL, 'y' }, - { "name-match", required_argument, NULL, ARG_NAME }, - { "parent-match", required_argument, NULL, 'b' }, - { "include-parents", no_argument, NULL, ARG_INCLUDE_PARENTS }, - { "initialized-match", no_argument, NULL, ARG_INITIALIZED_MATCH }, - { "initialized-nomatch", no_argument, NULL, ARG_INITIALIZED_NOMATCH }, - { "settle", no_argument, NULL, 'w' }, - { "wait-daemon", optional_argument, NULL, ARG_PING }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - { "uuid", no_argument, NULL, ARG_UUID }, - { "prioritized-subsystem", required_argument, NULL, ARG_PRIORITIZED_SUBSYSTEM }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "vnqt:c:s:S:a:A:p:g:y:b:wVh", options, NULL)) >= 0) { + OptionParser opts = { argc, argv, .namespace = "udevadm-trigger" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case 'v': + + OPTION_NAMESPACE("udevadm-trigger"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('v', "verbose", NULL, "Print the list of devices while running"): arg_verbose = true; break; - case 'n': + OPTION('n', "dry-run", NULL, "Do not actually trigger the events"): arg_dry_run = true; break; - case 'q': + OPTION('q', "quiet", NULL, "Suppress error logging in triggering events"): arg_quiet = true; break; - case 't': - arg_scan_type = scan_type_from_string(optarg); + OPTION('t', "type", "TYPE", "Type of sysfs events to trigger:"): {} + OPTION_HELP_VERBATIM(" devices", "- devices (default)"): {} + OPTION_HELP_VERBATIM(" subsystems", "- subsystems and drivers"): {} + OPTION_HELP_VERBATIM(" all", "- devices, subsystems, and drivers"): + arg_scan_type = scan_type_from_string(opts.arg); if (arg_scan_type < 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown type --type=%s", optarg); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown type --type=%s", opts.arg); break; - case 'c': - r = parse_device_action(optarg, &arg_action); + OPTION('c', "action", "ACTION|help", "Event action value, default is \"change\""): + r = parse_device_action(opts.arg, &arg_action); if (r <= 0) return r; break; - case 's': - r = strv_extend(&arg_subsystem_match, optarg); + OPTION('s', "subsystem-match", "SUBSYSTEM", + "Trigger devices from a matching subsystem"): + r = strv_extend(&arg_subsystem_match, opts.arg); if (r < 0) return log_oom(); break; - case 'S': - r = strv_extend(&arg_subsystem_nomatch, optarg); + OPTION('S', "subsystem-nomatch", "SUBSYSTEM", + "Exclude devices from a matching subsystem"): + r = strv_extend(&arg_subsystem_nomatch, opts.arg); if (r < 0) return log_oom(); break; - case 'a': - r = strv_extend(&arg_attr_match, optarg); + OPTION('a', "attr-match", "FILE[=VALUE]", + "Trigger devices with a matching attribute"): + r = strv_extend(&arg_attr_match, opts.arg); if (r < 0) return log_oom(); break; - case 'A': - r = strv_extend(&arg_attr_nomatch, optarg); + OPTION('A', "attr-nomatch", "FILE[=VALUE]", + "Exclude devices with a matching attribute"): + r = strv_extend(&arg_attr_nomatch, opts.arg); if (r < 0) return log_oom(); break; - case 'p': - r = strv_extend(&arg_property_match, optarg); + OPTION('p', "property-match", "KEY=VALUE", + "Trigger devices with a matching property"): + r = strv_extend(&arg_property_match, opts.arg); if (r < 0) return log_oom(); break; - case 'g': - r = strv_extend(&arg_tag_match, optarg); + OPTION('g', "tag-match", "TAG", "Trigger devices with a matching tag"): + r = strv_extend(&arg_tag_match, opts.arg); if (r < 0) return log_oom(); break; - case 'y': - r = strv_extend(&arg_sysname_match, optarg); + OPTION('y', "sysname-match", "NAME", "Trigger devices with this /sys path"): + r = strv_extend(&arg_sysname_match, opts.arg); if (r < 0) return log_oom(); break; - case 'b': - r = strv_extend(&arg_parent_match, optarg); + OPTION_LONG("name-match", "NAME", "Trigger devices with this /dev name"): + r = strv_extend(&arg_name_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_INCLUDE_PARENTS: + OPTION('b', "parent-match", "NAME", "Trigger devices with that parent device"): + r = strv_extend(&arg_parent_match, opts.arg); + if (r < 0) + return log_oom(); + break; + + OPTION_LONG("include-parents", NULL, "Trigger parent devices of found devices"): arg_include_parents = true; break; - case 'w': - arg_settle = true; + OPTION_LONG("initialized-match", NULL, + "Trigger devices that are already initialized"): + arg_initialized_match = MATCH_INITIALIZED_YES; break; - case ARG_NAME: - r = strv_extend(&arg_name_match, optarg); - if (r < 0) - return log_oom(); + OPTION_LONG("initialized-nomatch", NULL, + "Trigger devices that are not initialized yet"): + arg_initialized_match = MATCH_INITIALIZED_NO; + break; + + OPTION('w', "settle", NULL, "Wait for the triggered events to complete"): + arg_settle = true; break; - case ARG_PING: + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "wait-daemon", "SECONDS", + "Wait for udevd daemon to be initialized before triggering uevents"): arg_ping = true; - if (optarg) { - r = parse_sec(optarg, &arg_ping_timeout_usec); + if (opts.arg) { + r = parse_sec(opts.arg, &arg_ping_timeout_usec); if (r < 0) - log_error_errno(r, "Failed to parse timeout value '%s', ignoring: %m", optarg); + log_error_errno(r, "Failed to parse timeout value '%s', ignoring: %m", opts.arg); } break; - case ARG_UUID: + OPTION_LONG("uuid", NULL, "Print synthetic uevent UUID"): arg_uuid = true; break; - case ARG_PRIORITIZED_SUBSYSTEM: - r = strv_split_and_extend(&arg_prioritized_subsystems, optarg, ",", /* filter_duplicates= */ false); + OPTION_LONG("prioritized-subsystem", "SUBSYSTEM[,SUBSYSTEM…]", + "Trigger devices from a matching subsystem first"): + r = strv_split_and_extend(&arg_prioritized_subsystems, opts.arg, ",", /* filter_duplicates= */ false); if (r < 0) return log_oom(); break; - - case ARG_INITIALIZED_MATCH: - arg_initialized_match = MATCH_INITIALIZED_YES; - break; - - case ARG_INITIALIZED_NOMATCH: - arg_initialized_match = MATCH_INITIALIZED_NO; - break; - - case 'V': - return print_version(); - - case 'h': - return help(); - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - } - r = strv_extend_strv(&arg_devices, argv + optind, /* filter_duplicates= */ false); + r = strv_extend_strv(&arg_devices, option_parser_get_args(&opts), /* filter_duplicates= */ false); if (r < 0) return log_error_errno(r, "Failed to build argument list: %m"); From c86ba4e037e1879e0bc7e93449b3fdfe4939dde9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 11:40:54 +0200 Subject: [PATCH 043/242] udevadm-info: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm-info.c | 292 ++++++++++++++++------------------------ 1 file changed, 113 insertions(+), 179 deletions(-) diff --git a/src/udev/udevadm-info.c b/src/udev/udevadm-info.c index 62d7dce4217de..3795856592c1d 100644 --- a/src/udev/udevadm-info.c +++ b/src/udev/udevadm-info.c @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -20,7 +19,10 @@ #include "errno-util.h" #include "fd-util.h" #include "fileio.h" +#include "format-table.h" #include "glyph-util.h" +#include "help-util.h" +#include "options.h" #include "pager.h" #include "parse-argument.h" #include "sort-util.h" @@ -800,51 +802,21 @@ static int query_device(QueryType query, sd_device* device) { } static int help(void) { - printf("%s info [OPTIONS] [DEVPATH|FILE]\n\n" - "Query sysfs or the udev database.\n\n" - " -h --help Print this message\n" - " -V --version Print version of the program\n" - " -q --query=TYPE Query device information:\n" - " name Name of device node\n" - " symlink Pointing to node\n" - " path sysfs device path\n" - " property The device properties\n" - " all All values\n" - " --property=NAME Show only properties by this name\n" - " --value When showing properties, print only their values\n" - " -p --path=SYSPATH sysfs device path used for query or attribute walk\n" - " -n --name=NAME Node or symlink name used for query or attribute walk\n" - " -r --root Prepend dev directory to path names\n" - " -a --attribute-walk Print all key matches walking along the chain\n" - " of parent devices\n" - " -t --tree Show tree of devices\n" - " -d --device-id-of-file=FILE Print major:minor of device containing this file\n" - " -x --export Export key/value pairs\n" - " -P --export-prefix Export the key name with a prefix\n" - " -e --export-db Export the content of the udev database\n" - " -c --cleanup-db Clean up the udev database\n" - " -w --wait-for-initialization[=SECONDS]\n" - " Wait for device to be initialized\n" - " --no-pager Do not pipe output into a pager\n" - " --json=pretty|short|off Generate JSON output\n" - " --subsystem-match=SUBSYSTEM\n" - " Query devices matching a subsystem\n" - " --subsystem-nomatch=SUBSYSTEM\n" - " Query devices not matching a subsystem\n" - " --attr-match=FILE[=VALUE]\n" - " Query devices that match an attribute\n" - " --attr-nomatch=FILE[=VALUE]\n" - " Query devices that do not match an attribute\n" - " --property-match=KEY=VALUE\n" - " Query devices with matching properties\n" - " --tag-match=TAG Query devices with a matching tag\n" - " --sysname-match=NAME Query devices with this /sys path\n" - " --name-match=NAME Query devices with this /dev name\n" - " --parent-match=NAME Query devices with this parent device\n" - " --initialized-match Query devices that are already initialized\n" - " --initialized-nomatch Query devices that are not initialized yet\n", - program_invocation_short_name); + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table_ns("udevadm-info", &options); + if (r < 0) + return r; + + help_cmdline("info [OPTIONS] [DEVPATH|FILE]"); + help_abstract("Query sysfs or the udev database."); + help_section("Options:"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("udevadm", "8"); return 0; } @@ -1006,90 +978,64 @@ static int print_tree(sd_device* below) { } static int parse_argv(int argc, char *argv[]) { - - enum { - ARG_PROPERTY = 0x100, - ARG_VALUE, - ARG_NO_PAGER, - ARG_JSON, - ARG_SUBSYSTEM_MATCH, - ARG_SUBSYSTEM_NOMATCH, - ARG_ATTR_MATCH, - ARG_ATTR_NOMATCH, - ARG_PROPERTY_MATCH, - ARG_TAG_MATCH, - ARG_SYSNAME_MATCH, - ARG_NAME_MATCH, - ARG_PARENT_MATCH, - ARG_INITIALIZED_MATCH, - ARG_INITIALIZED_NOMATCH, - }; - - static const struct option options[] = { - { "attribute-walk", no_argument, NULL, 'a' }, - { "tree", no_argument, NULL, 't' }, - { "cleanup-db", no_argument, NULL, 'c' }, - { "device-id-of-file", required_argument, NULL, 'd' }, - { "export", no_argument, NULL, 'x' }, - { "export-db", no_argument, NULL, 'e' }, - { "export-prefix", required_argument, NULL, 'P' }, - { "help", no_argument, NULL, 'h' }, - { "name", required_argument, NULL, 'n' }, - { "path", required_argument, NULL, 'p' }, - { "property", required_argument, NULL, ARG_PROPERTY }, - { "query", required_argument, NULL, 'q' }, - { "root", no_argument, NULL, 'r' }, - { "value", no_argument, NULL, ARG_VALUE }, - { "version", no_argument, NULL, 'V' }, - { "wait-for-initialization", optional_argument, NULL, 'w' }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "json", required_argument, NULL, ARG_JSON }, - { "subsystem-match", required_argument, NULL, ARG_SUBSYSTEM_MATCH }, - { "subsystem-nomatch", required_argument, NULL, ARG_SUBSYSTEM_NOMATCH }, - { "attr-match", required_argument, NULL, ARG_ATTR_MATCH }, - { "attr-nomatch", required_argument, NULL, ARG_ATTR_NOMATCH }, - { "property-match", required_argument, NULL, ARG_PROPERTY_MATCH }, - { "tag-match", required_argument, NULL, ARG_TAG_MATCH }, - { "sysname-match", required_argument, NULL, ARG_SYSNAME_MATCH }, - { "name-match", required_argument, NULL, ARG_NAME_MATCH }, - { "parent-match", required_argument, NULL, ARG_PARENT_MATCH }, - { "initialized-match", no_argument, NULL, ARG_INITIALIZED_MATCH }, - { "initialized-nomatch", no_argument, NULL, ARG_INITIALIZED_NOMATCH }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "atced:n:p:q:rxP:w::Vh", options, NULL)) >= 0) + OptionParser opts = { argc, argv, .namespace = "udevadm-info" }; + + FOREACH_OPTION(c, &opts, /* on_error= */ return c) switch (c) { - case ARG_PROPERTY: + OPTION_NAMESPACE("udevadm-info"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION('V', "version", NULL, "Show package version"): + return print_version(); + + OPTION('q', "query", "TYPE", "Query device information:"): {} + OPTION_HELP_VERBATIM(" name", "- name of device node"): {} + OPTION_HELP_VERBATIM(" symlink", "- pointing to node"): {} + OPTION_HELP_VERBATIM(" path", "- sysfs device path"): {} + OPTION_HELP_VERBATIM(" property", "- the device properties"): {} + OPTION_HELP_VERBATIM(" all", "- all values"): + arg_query = query_type_from_string(opts.arg); + if (arg_query < 0) { + if (streq(opts.arg, "env")) /* deprecated */ + arg_query = QUERY_PROPERTY; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown query type '%s'", opts.arg); + } + break; + + OPTION_LONG("property", "NAME", "Show only properties by this name"): /* Make sure that if the empty property list was specified, we won't show any properties. */ - if (isempty(optarg) && !arg_properties) { + if (isempty(opts.arg) && !arg_properties) { arg_properties = new0(char*, 1); if (!arg_properties) return log_oom(); } else { - r = strv_split_and_extend(&arg_properties, optarg, ",", true); + r = strv_split_and_extend(&arg_properties, opts.arg, ",", true); if (r < 0) return log_oom(); } break; - case ARG_VALUE: + OPTION_LONG("value", NULL, + "When showing properties, print only their values"): arg_value = true; break; - case 'n': - case 'p': { - const char *prefix = c == 'n' ? "/dev/" : "/sys/"; + OPTION('p', "path", "SYSPATH", "sysfs device path used for query or attribute walk"): {} /* fall through */ + OPTION('n', "name", "NAME", "Node or symlink name used for query or attribute walk"): { + const char *prefix = opts.opt->short_code == 'n' ? "/dev/" : "/sys/"; char *path; - path = path_join(path_startswith(optarg, prefix) ? NULL : prefix, optarg); + path = path_join(path_startswith(opts.arg, prefix) ? NULL : prefix, opts.arg); if (!path) return log_oom(); @@ -1099,159 +1045,147 @@ static int parse_argv(int argc, char *argv[]) { break; } - case 'q': - arg_query = query_type_from_string(optarg); - if (arg_query < 0) { - if (streq(optarg, "env")) /* deprecated */ - arg_query = QUERY_PROPERTY; - else - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown query type '%s'", optarg); - } + OPTION('r', "root", NULL, "Prepend dev directory to path names"): + arg_root = true; break; - case 'r': - arg_root = true; + OPTION('a', "attribute-walk", NULL, + "Print all key matches walking along the chain of parent devices"): + arg_action_type = ACTION_ATTRIBUTE_WALK; + break; + + OPTION('t', "tree", NULL, "Show tree of devices"): + arg_action_type = ACTION_TREE; break; - case 'd': + OPTION('d', "device-id-of-file", "FILE", + "Print major:minor of device containing this file"): arg_action_type = ACTION_DEVICE_ID_FILE; - r = free_and_strdup(&arg_name, optarg); + r = free_and_strdup(&arg_name, opts.arg); if (r < 0) return log_oom(); break; - case 'a': - arg_action_type = ACTION_ATTRIBUTE_WALK; + OPTION('x', "export", NULL, "Export key/value pairs"): + arg_export = true; break; - case 't': - arg_action_type = ACTION_TREE; + OPTION('P', "export-prefix", "NAME", "Export the key name with a prefix"): + arg_export = true; + arg_export_prefix = opts.arg; break; - case 'e': + OPTION('e', "export-db", NULL, "Export the content of the udev database"): arg_action_type = ACTION_EXPORT; break; - case 'c': + OPTION('c', "cleanup-db", NULL, "Clean up the udev database"): arg_action_type = ACTION_CLEANUP_DB; break; - case 'x': - arg_export = true; - break; - - case 'P': - arg_export = true; - arg_export_prefix = optarg; - break; - - case 'w': - if (optarg) { - r = parse_sec(optarg, &arg_wait_for_initialization_timeout); + OPTION_FULL(OPTION_OPTIONAL_ARG, 'w', "wait-for-initialization", "SECS", + "Wait for device to be initialized"): + if (opts.arg) { + r = parse_sec(opts.arg, &arg_wait_for_initialization_timeout); if (r < 0) return log_error_errno(r, "Failed to parse timeout value: %m"); } else arg_wait_for_initialization_timeout = USEC_INFINITY; break; - case 'V': - return print_version(); - - case 'h': - return help(); - - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; break; - case ARG_SUBSYSTEM_MATCH: - r = strv_extend(&arg_subsystem_match, optarg); + OPTION_LONG("subsystem-match", "SUBSYSTEM", + "Query devices matching a subsystem"): + r = strv_extend(&arg_subsystem_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_SUBSYSTEM_NOMATCH: - r = strv_extend(&arg_subsystem_nomatch, optarg); + OPTION_LONG("subsystem-nomatch", "SUBSYSTEM", + "Query devices not matching a subsystem"): + r = strv_extend(&arg_subsystem_nomatch, opts.arg); if (r < 0) return log_oom(); break; - case ARG_ATTR_MATCH: - if (!strchr(optarg, '=')) + OPTION_LONG("attr-match", "FILE[=VALUE]", + "Query devices that match an attribute"): + if (!strchr(opts.arg, '=')) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Expected = instead of '%s'", optarg); + "Expected = instead of '%s'", opts.arg); - r = strv_extend(&arg_attr_match, optarg); + r = strv_extend(&arg_attr_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_ATTR_NOMATCH: - if (!strchr(optarg, '=')) + OPTION_LONG("attr-nomatch", "FILE[=VALUE]", + "Query devices that do not match an attribute"): + if (!strchr(opts.arg, '=')) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Expected = instead of '%s'", optarg); + "Expected = instead of '%s'", opts.arg); - r = strv_extend(&arg_attr_nomatch, optarg); + r = strv_extend(&arg_attr_nomatch, opts.arg); if (r < 0) return log_oom(); break; - case ARG_PROPERTY_MATCH: - if (!strchr(optarg, '=')) + OPTION_LONG("property-match", "KEY=VALUE", + "Query devices with matching properties"): + if (!strchr(opts.arg, '=')) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Expected = instead of '%s'", optarg); + "Expected = instead of '%s'", opts.arg); - r = strv_extend(&arg_property_match, optarg); + r = strv_extend(&arg_property_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_TAG_MATCH: - r = strv_extend(&arg_tag_match, optarg); + OPTION_LONG("tag-match", "TAG", "Query devices with a matching tag"): + r = strv_extend(&arg_tag_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_SYSNAME_MATCH: - r = strv_extend(&arg_sysname_match, optarg); + OPTION_LONG("sysname-match", "NAME", "Query devices with this /sys path"): + r = strv_extend(&arg_sysname_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_NAME_MATCH: - r = strv_extend(&arg_name_match, optarg); + OPTION_LONG("name-match", "NAME", "Query devices with this /dev name"): + r = strv_extend(&arg_name_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_PARENT_MATCH: - r = strv_extend(&arg_parent_match, optarg); + OPTION_LONG("parent-match", "NAME", "Query devices with this parent device"): + r = strv_extend(&arg_parent_match, opts.arg); if (r < 0) return log_oom(); break; - case ARG_INITIALIZED_MATCH: + OPTION_LONG("initialized-match", NULL, + "Query devices that are already initialized"): arg_initialized_match = MATCH_INITIALIZED_YES; break; - case ARG_INITIALIZED_NOMATCH: + OPTION_LONG("initialized-nomatch", NULL, + "Query devices that are not initialized yet"): arg_initialized_match = MATCH_INITIALIZED_NO; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - r = strv_extend_strv(&arg_devices, argv + optind, /* filter_duplicates= */ false); + r = strv_extend_strv(&arg_devices, option_parser_get_args(&opts), /* filter_duplicates= */ false); if (r < 0) return log_error_errno(r, "Failed to build argument list: %m"); From 408d18f4d215747f7eba352cff1ea3b8c14fb574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 13:24:36 +0200 Subject: [PATCH 044/242] udev-builtin-blkid: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udev-builtin-blkid.c | 37 ++++++++++++++--------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/udev/udev-builtin-blkid.c b/src/udev/udev-builtin-blkid.c index ca40b62d782b9..16eaced0dcf46 100644 --- a/src/udev/udev-builtin-blkid.c +++ b/src/udev/udev-builtin-blkid.c @@ -10,7 +10,6 @@ #endif #include -#include #include #include #include @@ -26,6 +25,7 @@ #include "fd-util.h" #include "initrd-util.h" #include "gpt.h" +#include "options.h" #include "parse-util.h" #include "string-util.h" #include "strv.h" @@ -506,13 +506,6 @@ static int builtin_blkid(UdevEvent *event, int argc, char *argv[]) { int64_t offset = 0; int r; - static const struct option options[] = { - { "offset", required_argument, NULL, 'o' }, - { "hint", required_argument, NULL, 'H' }, - { "noraid", no_argument, NULL, 'R' }, - {} - }; - r = dlopen_libblkid(LOG_DEBUG); if (r < 0) return log_device_debug_errno(dev, r, "blkid not available: %m"); @@ -522,32 +515,32 @@ static int builtin_blkid(UdevEvent *event, int argc, char *argv[]) { if (!pr) return log_device_debug_errno(dev, errno_or_else(ENOMEM), "Failed to create blkid prober: %m"); - for (;;) { - int option; + OptionParser opts = { argc, argv, .namespace = "udev-builtin-blkid" }; - option = getopt_long(argc, argv, "o:H:R", options, NULL); - if (option == -1) - break; + FOREACH_OPTION(c, &opts, /* on_error= */ return c) + switch (c) { + + OPTION_NAMESPACE("udev-builtin-blkid"): {} - switch (option) { - case 'H': + OPTION('H', "hint", "HINT", NULL): errno = 0; - r = sym_blkid_probe_set_hint(pr, optarg, 0); + r = sym_blkid_probe_set_hint(pr, opts.arg, 0); if (r < 0) - return log_device_error_errno(dev, errno_or_else(ENOMEM), "Failed to use '%s' probing hint: %m", optarg); + return log_device_error_errno(dev, errno_or_else(ENOMEM), "Failed to use '%s' probing hint: %m", opts.arg); break; - case 'o': - r = safe_atoi64(optarg, &offset); + + OPTION('o', "offset", "OFFSET", NULL): + r = safe_atoi64(opts.arg, &offset); if (r < 0) - return log_device_error_errno(dev, r, "Failed to parse '%s' as an integer: %m", optarg); + return log_device_error_errno(dev, r, "Failed to parse '%s' as an integer: %m", opts.arg); if (offset < 0) return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL), "Invalid offset %"PRIi64".", offset); break; - case 'R': + + OPTION('R', "noraid", NULL, NULL): noraid = true; break; } - } r = sd_device_get_devname(dev, &devnode); if (r < 0) From ab9acf8c8cf75906ebe0827c200f5bbd31f28580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 12:20:58 +0200 Subject: [PATCH 045/242] shared/options: add new helper option_parser_get_arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit option_parser_next_arg() is renamed to option_parser_peek_next_arg() to match option_parser_consume_next_arg(). A new helper is added option_parser_get_arg(…, n). It is a common pattern to only need a single arg, and getting an array and extracting a single item from it is too verbose. --- src/cryptenroll/cryptenroll.c | 13 +++++-------- src/growfs/growfs.c | 3 +-- src/nspawn/nspawn.c | 2 +- src/shared/options.c | 12 ++++++++++-- src/shared/options.h | 8 +++++++- src/test/test-options.c | 19 ++++++++++++------- 6 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/cryptenroll/cryptenroll.c b/src/cryptenroll/cryptenroll.c index 5d0c782689392..f7e7ff121804a 100644 --- a/src/cryptenroll/cryptenroll.c +++ b/src/cryptenroll/cryptenroll.c @@ -31,7 +31,6 @@ #include "process-util.h" #include "string-table.h" #include "string-util.h" -#include "strv.h" #include "tpm2-pcr.h" #include "tpm2-util.h" @@ -579,14 +578,12 @@ static int parse_argv(int argc, char *argv[]) { break; } - char **args = option_parser_get_args(&opts); + if (option_parser_get_n_args(&opts) > 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too many arguments, refusing."); - if (strv_length(args) > 1) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Too many arguments, refusing."); - - if (args[0]) - r = parse_path_argument(args[0], false, &arg_node); + const char *arg = option_parser_get_arg(&opts, 0); + if (arg) + r = parse_path_argument(arg, false, &arg_node); else if (!wipe_requested()) r = determine_default_node(); else diff --git a/src/growfs/growfs.c b/src/growfs/growfs.c index 3e9eb678bf038..efb94e3765053 100644 --- a/src/growfs/growfs.c +++ b/src/growfs/growfs.c @@ -181,8 +181,7 @@ static int parse_argv(int argc, char *argv[]) { "%s expects exactly one argument (the mount point).", program_invocation_short_name); - arg_target = option_parser_get_args(&opts)[0]; - + arg_target = option_parser_get_arg(&opts, 0); return 1; } diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index e4e0359ce6d79..6c9c1050c6921 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -1420,7 +1420,7 @@ static int parse_argv(int argc, char *argv[]) { * the old container user functionality. To maintain backwards compatibility * with the space-separated form (--user NAME), if the next opts.arg does not look * like an option, interpret it as a user name. */ - const char *t = option_parser_next_arg(&opts); + const char *t = option_parser_peek_next_arg(&opts); if (t && t[0] != '-') { opts.arg = option_parser_consume_next_arg(&opts); log_warning("--user NAME is deprecated, use --uid=NAME instead."); diff --git a/src/shared/options.c b/src/shared/options.c index 01684fc1fced5..85ab3155bf867 100644 --- a/src/shared/options.c +++ b/src/shared/options.c @@ -344,7 +344,7 @@ int option_parse( return r; } -char* option_parser_next_arg(const OptionParser *state) { +char* option_parser_peek_next_arg(const OptionParser *state) { /* Peek at the next argument, whatever it is (option or position arg). * May return NULL. */ @@ -360,7 +360,7 @@ char* option_parser_consume_next_arg(OptionParser *state) { * so we won't try to interpret it as an option. * May return NULL. */ - char *t = option_parser_next_arg(state); + char *t = option_parser_peek_next_arg(state); if (t) shift_arg(state->argv, state->positional_offset++, state->optind++); return t; @@ -388,6 +388,14 @@ size_t option_parser_get_n_args(const OptionParser *state) { return state->argc - state->positional_offset; } +char* option_parser_get_arg(const OptionParser *state, size_t i) { + assert(state->optind > 0); + assert(state->state == OPTION_PARSER_DONE); + assert(state->positional_offset <= state->argc); + + return (size_t) (state->argc - state->positional_offset) > i ? state->argv[state->positional_offset + i] : NULL; +} + char* option_get_synopsis(const Option *opt, const char *joiner, bool show_metavar) { assert(opt); assert(!(opt->flags & (OPTION_NAMESPACE_MARKER | diff --git a/src/shared/options.h b/src/shared/options.h index a171f5f6a43f7..5803eb120ef67 100644 --- a/src/shared/options.h +++ b/src/shared/options.h @@ -224,11 +224,17 @@ int option_parse( break; \ } else -char* option_parser_next_arg(const OptionParser *state); +/* Those helpers are used *during* option parsing and allow looking at or taking the next item in + * the argv array, either an option or a positional parameter. */ +char* option_parser_peek_next_arg(const OptionParser *state); char* option_parser_consume_next_arg(OptionParser *state); +/* Those helpers are used *after* option parsing and return the positional arguments (and unparsed + * options in case option parsing was stopped early, e.g. via "--"). */ char** option_parser_get_args(const OptionParser *state); size_t option_parser_get_n_args(const OptionParser *state); +char* option_parser_get_arg(const OptionParser *state, size_t i); + char* option_get_synopsis(const Option *opt, const char *joiner, bool show_metavar); int _option_parser_get_help_table_full( diff --git a/src/test/test-options.c b/src/test/test-options.c index 04fddc1c34700..efa3a73d69edd 100644 --- a/src/test/test-options.c +++ b/src/test/test-options.c @@ -58,7 +58,12 @@ static void test_option_parse_one( ASSERT_TRUE(strv_equal(args, remaining)); ASSERT_STREQ(argv[0], saved_argv0); - ASSERT_EQ(option_parser_get_n_args(&opts), strv_length(remaining)); + size_t l = strv_length(remaining); + ASSERT_EQ(option_parser_get_n_args(&opts), l); + ASSERT_STREQ(option_parser_get_arg(&opts, 0), l > 0 ? remaining[0] : NULL); + ASSERT_STREQ(option_parser_get_arg(&opts, 1), l > 1 ? remaining[1] : NULL); + ASSERT_STREQ(option_parser_get_arg(&opts, 2), l > 2 ? remaining[2] : NULL); + ASSERT_STREQ(option_parser_get_arg(&opts, 3), l > 3 ? remaining[3] : NULL); } static void test_option_invalid_one( @@ -1331,7 +1336,7 @@ TEST(option_optional_arg_consume) { /* --user without arg: next arg is positional (doesn't start with -). * The option parser returns NULL for the arg. The caller would then - * use option_parser_next_arg/consume_next_arg to grab it. */ + * use option_parser_peek_next_arg/consume_next_arg to grab it. */ { char **argv = STRV_MAKE("arg0", "--user", "someuser", "pos1"); int argc = strv_length(argv); @@ -1341,7 +1346,7 @@ TEST(option_optional_arg_consume) { ASSERT_OK_POSITIVE(option_parse(options, options + 3, &opts)); ASSERT_STREQ(opts.opt->long_code, "user"); ASSERT_NULL(opts.arg); - ASSERT_STREQ(option_parser_next_arg(&opts), "someuser"); + ASSERT_STREQ(option_parser_peek_next_arg(&opts), "someuser"); ASSERT_STREQ(option_parser_consume_next_arg(&opts), "someuser"); ASSERT_EQ(option_parse(options, options + 3, &opts), 0); @@ -1361,7 +1366,7 @@ TEST(option_optional_arg_consume) { ASSERT_OK_POSITIVE(option_parse(options, options + 3, &opts)); ASSERT_STREQ(opts.opt->long_code, "user"); ASSERT_NULL(opts.arg); - ASSERT_NULL(option_parser_next_arg(&opts)); + ASSERT_NULL(option_parser_peek_next_arg(&opts)); ASSERT_NULL(option_parser_consume_next_arg(&opts)); ASSERT_EQ(option_parse(options, options + 3, &opts), 0); @@ -1381,12 +1386,12 @@ TEST(option_optional_arg_consume) { ASSERT_OK_POSITIVE(option_parse(options, options + 3, &opts)); ASSERT_STREQ(opts.opt->long_code, "user"); ASSERT_NULL(opts.arg); - ASSERT_STREQ(option_parser_next_arg(&opts), "-u"); + ASSERT_STREQ(option_parser_peek_next_arg(&opts), "-u"); ASSERT_OK_POSITIVE(option_parse(options, options + 3, &opts)); ASSERT_STREQ(opts.opt->long_code, "uid"); ASSERT_STREQ(opts.arg, "nobody"); - ASSERT_NULL(option_parser_next_arg(&opts)); + ASSERT_NULL(option_parser_peek_next_arg(&opts)); ASSERT_NULL(option_parser_consume_next_arg(&opts)); ASSERT_EQ(option_parse(options, options + 3, &opts), 0); @@ -1413,7 +1418,7 @@ TEST(option_optional_arg_consume) { const char *arg = opts.arg; if (!arg) { - const char *t = option_parser_next_arg(&opts); + const char *t = option_parser_peek_next_arg(&opts); if (t && t[0] != '-') arg = option_parser_consume_next_arg(&opts); } From fcfd42a30c98743935b822f57a38a35bce060cce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 13:24:40 +0200 Subject: [PATCH 046/242] udev-builtin-hwdb: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/udev/udev-builtin-hwdb.c | 42 +++++++++++++++--------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/src/udev/udev-builtin-hwdb.c b/src/udev/udev-builtin-hwdb.c index 082af2e6031bd..4817c3af24e20 100644 --- a/src/udev/udev-builtin-hwdb.c +++ b/src/udev/udev-builtin-hwdb.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include -#include #include #include "sd-hwdb.h" @@ -9,6 +8,7 @@ #include "alloc-util.h" #include "device-util.h" #include "hwdb-util.h" +#include "options.h" #include "parse-util.h" #include "string-util.h" #include "udev-builtin.h" @@ -128,13 +128,6 @@ static int udev_builtin_hwdb_search( } static int builtin_hwdb(UdevEvent *event, int argc, char *argv[]) { - static const struct option options[] = { - { "filter", required_argument, NULL, 'f' }, - { "device", required_argument, NULL, 'd' }, - { "subsystem", required_argument, NULL, 's' }, - { "lookup-prefix", required_argument, NULL, 'p' }, - {} - }; const char *filter = NULL, *device = NULL, *subsystem = NULL, *prefix = NULL; _cleanup_(sd_device_unrefp) sd_device *srcdev = NULL; sd_device *dev = ASSERT_PTR(ASSERT_PTR(event)->dev); @@ -143,35 +136,34 @@ static int builtin_hwdb(UdevEvent *event, int argc, char *argv[]) { if (!hwdb) return -EINVAL; - for (;;) { - int option; + OptionParser opts = { argc, argv, .namespace = "udev-builtin-hwdb" }; - option = getopt_long(argc, argv, "f:d:s:p:", options, NULL); - if (option == -1) - break; + FOREACH_OPTION(c, &opts, /* on_error= */ return c) + switch (c) { + + OPTION_NAMESPACE("udev-builtin-hwdb"): {} - switch (option) { - case 'f': - filter = optarg; + OPTION('f', "filter", "FILTER", NULL): + filter = opts.arg; break; - case 'd': - device = optarg; + OPTION('d', "device", "DEVICE", NULL): + device = opts.arg; break; - case 's': - subsystem = optarg; + OPTION('s', "subsystem", "SUBSYSTEM", NULL): + subsystem = opts.arg; break; - case 'p': - prefix = optarg; + OPTION('p', "lookup-prefix", "PREFIX", NULL): + prefix = opts.arg; break; } - } /* query a specific key given as argument */ - if (argv[optind]) { - r = udev_builtin_hwdb_lookup(event, prefix, argv[optind], filter); + char *modalias = option_parser_get_arg(&opts, 0); + if (modalias) { + r = udev_builtin_hwdb_lookup(event, prefix, modalias, filter); if (r < 0) return log_device_debug_errno(dev, r, "Failed to look up hwdb: %m"); if (r == 0) From 13f6feda78664ef55cc92cf2c71e83d43a78b23b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 14:52:26 +0200 Subject: [PATCH 047/242] udevadm: convert verb dispatch to VERB macros Co-developed-by: Claude Opus 4.7 --- src/udev/udevadm.c | 79 ++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 48 deletions(-) diff --git a/src/udev/udevadm.c b/src/udev/udevadm.c index 23e03d6fb0e64..856d1fc4c23e2 100644 --- a/src/udev/udevadm.c +++ b/src/udev/udevadm.c @@ -14,33 +14,26 @@ #include "verbs.h" static int help(void) { - static const char *const short_descriptions[][2] = { - { "info", "Query sysfs or the udev database" }, - { "trigger", "Request events from the kernel" }, - { "settle", "Wait for pending udev events" }, - { "control", "Control the udev daemon" }, - { "monitor", "Listen to kernel and udev events" }, - { "test", "Test an event run" }, - { "test-builtin", "Test a built-in command" }, - { "verify", "Verify udev rules files" }, - { "cat", "Show udev rules files" }, - { "wait", "Wait for device or device symlink" }, - { "lock", "Lock a block device" }, - }; - - _cleanup_(table_unrefp) Table *options = NULL; + _cleanup_(table_unrefp) Table *verbs = NULL, *options = NULL; int r; + r = verbs_get_help_table(&verbs); + if (r < 0) + return r; + r = option_parser_get_help_table_ns("udevadm", &options); if (r < 0) return r; + (void) table_sync_column_widths(0, verbs, options); + help_cmdline("[OPTIONS…] COMMAND [COMMAND OPTIONS…]"); help_abstract("Send control commands or test the device manager."); help_section("Commands:"); - FOREACH_ELEMENT(desc, short_descriptions) - printf(" %-12s %s\n", (*desc)[0], (*desc)[1]); + r = table_print_or_warn(verbs); + if (r < 0) + return r; help_section("Options:"); r = table_print_or_warn(options); @@ -51,6 +44,26 @@ static int help(void) { return 0; } +VERB_COMMON_HELP(help); + +VERB_SCOPE(, verb_info_main, "info", "[DEVPATH|FILE]", VERB_ANY, VERB_ANY, 0, "Query sysfs or the udev database"); +VERB_SCOPE(, verb_trigger_main, "trigger", "DEVPATH", VERB_ANY, VERB_ANY, 0, "Request events from the kernel"); +VERB_SCOPE(, verb_settle_main, "settle", NULL, VERB_ANY, VERB_ANY, 0, "Wait for pending udev events"); +VERB_SCOPE(, verb_control_main, "control", "OPTION", VERB_ANY, VERB_ANY, 0, "Control the udev daemon"); +VERB_SCOPE(, verb_monitor_main, "monitor", NULL, VERB_ANY, VERB_ANY, 0, "Listen to kernel and udev events"); +VERB_SCOPE(, verb_test_main, "test", "DEVPATH", VERB_ANY, VERB_ANY, 0, "Test an event run"); +VERB_SCOPE(, verb_builtin_main, "test-builtin", "COMMAND DEVPATH", VERB_ANY, VERB_ANY, 0, "Test a built-in command"); +VERB_SCOPE(, verb_verify_main, "verify", "[FILE…]", VERB_ANY, VERB_ANY, 0, "Verify udev rules files"); +VERB_SCOPE(, verb_cat_main, "cat", "[FILE…]", VERB_ANY, VERB_ANY, 0, "Show udev rules files"); +VERB_SCOPE(, verb_wait_main, "wait", "DEVICE [DEVICE…]", VERB_ANY, VERB_ANY, 0, "Wait for device or device symlink"); +VERB_SCOPE(, verb_lock_main, "lock", "[OPTIONS…] COMMAND", VERB_ANY, VERB_ANY, 0, "Lock a block device"); +VERB_SCOPE(, verb_hwdb_main, "hwdb", NULL, VERB_ANY, VERB_ANY, 0, /* help= */ NULL); /* deprecated */ + +VERB_NOARG(verb_version_main, "version", /* help= */ NULL); +static int verb_version_main(int argc, char *argv[], uintptr_t _data, void *userdata) { + return print_version(); +} + static int parse_argv(int argc, char *argv[], char ***remaining_args) { assert(argc >= 0); assert(argv); @@ -84,36 +97,6 @@ int print_version(void) { return 0; } -static int verb_version_main(int argc, char *argv[], uintptr_t _data, void *userdata) { - return print_version(); -} - -static int verb_help_main(int argc, char *argv[], uintptr_t _data, void *userdata) { - return help(); -} - -static int udevadm_main(char **args) { - static const Verb verbs[] = { - { "cat", VERB_ANY, VERB_ANY, 0, verb_cat_main }, - { "info", VERB_ANY, VERB_ANY, 0, verb_info_main }, - { "trigger", VERB_ANY, VERB_ANY, 0, verb_trigger_main }, - { "settle", VERB_ANY, VERB_ANY, 0, verb_settle_main }, - { "control", VERB_ANY, VERB_ANY, 0, verb_control_main }, - { "monitor", VERB_ANY, VERB_ANY, 0, verb_monitor_main }, - { "hwdb", VERB_ANY, VERB_ANY, 0, verb_hwdb_main }, - { "test", VERB_ANY, VERB_ANY, 0, verb_test_main }, - { "test-builtin", VERB_ANY, VERB_ANY, 0, verb_builtin_main }, - { "wait", VERB_ANY, VERB_ANY, 0, verb_wait_main }, - { "lock", VERB_ANY, VERB_ANY, 0, verb_lock_main }, - { "verify", VERB_ANY, VERB_ANY, 0, verb_verify_main }, - { "version", VERB_ANY, VERB_ANY, 0, verb_version_main }, - { "help", VERB_ANY, VERB_ANY, 0, verb_help_main }, - {} - }; - - return _dispatch_verb_with_args(args, verbs, verbs + ELEMENTSOF(verbs) - 1, NULL); -} - static int run(int argc, char *argv[]) { char **args = NULL; int r; @@ -132,7 +115,7 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; - return udevadm_main(args); + return dispatch_verb_with_args(args, NULL); } DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); From 32a291abe9d22efcbb7f613b41d90c68d7c315b2 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 15:52:29 +0200 Subject: [PATCH 048/242] sd-boot: minor tweaks as follow-up for #41863 This addresses some trivial points made by @keszybz in the PR review. --- src/boot/boot.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/boot/boot.c b/src/boot/boot.c index fff35de864b8c..a2a1becc9aaa0 100644 --- a/src/boot/boot.c +++ b/src/boot/boot.c @@ -2764,12 +2764,11 @@ static EFI_STATUS load_extras( } const struct ExtraResourceInfo *x = NULL; - FOREACH_ELEMENT(j, table) { + FOREACH_ELEMENT(j, table) if (endswith_no_case(info->FileName, j->suffix)) { x = j; break; } - } if (!x) { log_warning("Unrecognized type of extra file '%ls', ignoring.", info->FileName); continue; @@ -3034,7 +3033,7 @@ static EFI_STATUS call_image_start( case LOADER_UKI: case LOADER_UKI_URL: /* For modern UKIs we'll not bother with 'initrd', but we'll instead support 'extra' - * for loading credentials, sysext and confext. */ + * for loading credentials, sysexts, and confexts. */ err = load_extras(image_root, entry, &initrd_pages, &initrd_size); if (err != EFI_SUCCESS) From fd035af9bf9611aee6c4ded0d5f485ea95acc703 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 16:05:36 +0200 Subject: [PATCH 049/242] update TODO This is mostly stuff discussed in #41776. --- TODO.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/TODO.md b/TODO.md index 304f6c3a5ecb8..60a11dcee3ad1 100644 --- a/TODO.md +++ b/TODO.md @@ -132,6 +132,16 @@ SPDX-License-Identifier: LGPL-2.1-or-later - hook-up in systemd-nspawn - hook-up in systemd-vmspawn - hook-up in service manager (BindVolume=) + - introduce a locking concept: right now all access to volumes is fully + shared. Let's add a basic locking concept: supporting backends can take an + additional locking flag (which has to be combined with Varlink's "more"), + in which case access would only be handed out to one client at a time, with + the lock's lifetime synced up with the Varlink connection lifetime. + - introduce a volume lifecycle concept: optionally support volumes whose + whole lifecycle is associated with the varlink connections they are tied + to: when the last varlink connection that acquired them goes away, the + volume is auto-destroyed. Would be exposed via a new flag on the Acquire + call, similar to the locking logic above. - a small tool that can do basic btrfs raid policy mgmt. i.e. gets started as part of the initial transaction for some btrfs raid fs, waits for some time, From 82614a4c6f19f0902b6fefc2988ccc54edb47eb5 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Wed, 29 Apr 2026 13:48:49 +0200 Subject: [PATCH 050/242] sd-json,user-record: store the strv size when extending it So strv_push_with_size() doesn't have to recalculate the size every time. --- src/libsystemd/sd-json/json-util.c | 3 ++- src/libsystemd/sd-json/sd-json.c | 3 ++- src/shared/user-record.c | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libsystemd/sd-json/json-util.c b/src/libsystemd/sd-json/json-util.c index c321579ef5093..27306409fe7c4 100644 --- a/src/libsystemd/sd-json/json-util.c +++ b/src/libsystemd/sd-json/json-util.c @@ -291,6 +291,7 @@ int json_dispatch_path(const char *name, sd_json_variant *variant, sd_json_dispa int json_dispatch_strv_path(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) { _cleanup_strv_free_ char **n = NULL; char ***l = ASSERT_PTR(userdata); + size_t s = 0; int r; assert(variant); @@ -310,7 +311,7 @@ int json_dispatch_strv_path(const char *name, sd_json_variant *variant, sd_json_ if (r < 0) return r; - r = strv_extend(&n, a); + r = strv_extend_with_size(&n, &s, a); if (r < 0) return json_log_oom(variant, flags); } diff --git a/src/libsystemd/sd-json/sd-json.c b/src/libsystemd/sd-json/sd-json.c index fbc2e55d23f22..659dffb2bac7e 100644 --- a/src/libsystemd/sd-json/sd-json.c +++ b/src/libsystemd/sd-json/sd-json.c @@ -5661,6 +5661,7 @@ _public_ int sd_json_dispatch_strv(const char *name, sd_json_variant *variant, s _cleanup_strv_free_ char **l = NULL; char ***s = userdata; sd_json_variant *e; + size_t n = 0; int r; assert_return(variant, -EINVAL); @@ -5694,7 +5695,7 @@ _public_ int sd_json_dispatch_strv(const char *name, sd_json_variant *variant, s if ((flags & SD_JSON_STRICT) && !string_is_safe(sd_json_variant_string(e), STRING_ALLOW_EMPTY|STRING_ALLOW_GLOBS)) return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name)); - r = strv_extend(&l, sd_json_variant_string(e)); + r = strv_extend_with_size(&l, &n, sd_json_variant_string(e)); if (r < 0) return json_log(e, flags, r, "Failed to append array element: %m"); } diff --git a/src/shared/user-record.c b/src/shared/user-record.c index 4dfb2c72d70f0..cf33d92215b8d 100644 --- a/src/shared/user-record.c +++ b/src/shared/user-record.c @@ -518,6 +518,7 @@ static int json_dispatch_locales(const char *name, sd_json_variant *variant, sd_ char ***l = userdata; const char *locale; sd_json_variant *e; + size_t s = 0; int r; if (sd_json_variant_is_null(variant)) { @@ -536,7 +537,7 @@ static int json_dispatch_locales(const char *name, sd_json_variant *variant, sd_ if (!locale_is_valid(locale)) return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of valid locales.", strna(name)); - r = strv_extend(&n, locale); + r = strv_extend_with_size(&n, &s, locale); if (r < 0) return json_log_oom(variant, flags); } @@ -593,6 +594,7 @@ static int json_dispatch_weight(const char *name, sd_json_variant *variant, sd_j int json_dispatch_user_group_list(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) { char ***list = ASSERT_PTR(userdata); _cleanup_strv_free_ char **l = NULL; + size_t s = 0; int r; if (!sd_json_variant_is_array(variant)) @@ -606,7 +608,7 @@ int json_dispatch_user_group_list(const char *name, sd_json_variant *variant, sd if (!valid_user_group_name(sd_json_variant_string(e), FLAGS_SET(flags, SD_JSON_RELAX) ? VALID_USER_RELAX : 0)) return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a valid user/group name: %s", sd_json_variant_string(e)); - r = strv_extend(&l, sd_json_variant_string(e)); + r = strv_extend_with_size(&l, &s, sd_json_variant_string(e)); if (r < 0) return json_log(e, flags, r, "Failed to append array element: %m"); } From 994f016a7fb621d782500f54cd3b3b2a06d0d9a4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Apr 2026 18:28:16 +0200 Subject: [PATCH 051/242] blockdev-list: fix per-element leak in block_device_array_free() (#41869) FOREACH_ARRAY declares 'i' as the iterator but the body passed 'd' (the array base) to block_device_done(). Since mfree() leaves the field NULL after the first call, element 0 is freed repeatedly while elements 1..N-1 leak their node, symlinks strv, model, vendor and subsystem. The bug predates the sanitizer-instrumented callers. PR #41776's new systemd-storage-block daemon runs blockdev_list() under ASan/LSan in TEST-87-AUX-UTILS-VM and exposes it (15 allocs / 804 bytes leaked per ListVolumes request). The fix also benefits repart and blockdev_list's internal CLEANUP_ARRAY cleanup. Follow-up for 9f6b2745eaa15be80568fde2a44d0a10ed6eb2a1 --- src/shared/blockdev-list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared/blockdev-list.c b/src/shared/blockdev-list.c index 5b11c8169477f..0efc90fd54696 100644 --- a/src/shared/blockdev-list.c +++ b/src/shared/blockdev-list.c @@ -27,7 +27,7 @@ void block_device_done(BlockDevice *d) { void block_device_array_free(BlockDevice *d, size_t n_devices) { FOREACH_ARRAY(i, d, n_devices) - block_device_done(d); + block_device_done(i); free(d); } From 33ac56f46230f2c425c16eb6297b979f1bb91228 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 29 Apr 2026 15:36:32 +0100 Subject: [PATCH 052/242] man: add section about systemd-boot Type#1 sidecars Follow-up for 6b1324fb867d89147585ee20160dbe8f37beefc8 Co-developed-by: Claude Opus 4.7 --- man/systemd-boot.xml | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/man/systemd-boot.xml b/man/systemd-boot.xml index dab10ed8ef12a..1acf5d083e580 100644 --- a/man/systemd-boot.xml +++ b/man/systemd-boot.xml @@ -406,6 +406,66 @@ loader.conf5. + + Companion Files + + For Type #1 boot loader entries (as defined in the UAPI.1 Boot Loader + Specification) systemd-boot will collect additional companion resources + declared via the extra key in the entry, dynamically generate + cpio initrd archives from them, and register those archives via the Linux initrd EFI + protocol so that they are passed to the kernel together with the entry's own initrd. This is supported + for entries referencing a Unified Kernel Image (UKI) via the uki or + uki-url keys. Each extra key references a single regular file + (relative to the root of the file system containing the entry snippet) and the key may be specified + multiple times. Companion resources are recognized by file name suffix: + + + Files with the .cred suffix are packed into a + cpio archive placed in the /.extra/credentials/ directory of + the initrd file hierarchy. This is intended to convey auxiliary, encrypted, authenticated credentials + for use with LoadCredentialEncrypted=. See + systemd.exec5 and + systemd-creds1 for + details on encrypted credentials. The generated cpio archive is measured into TPM + PCR 12 (if a TPM is present). + + Files with the .sysext.raw suffix are packed into a + cpio archive placed in the /.extra/sysext/ directory of the + initrd file hierarchy. This is intended to pass additional entry-specific system extension images to + the initrd. See + systemd-sysext8 for + details on system extension images. The generated cpio archive is measured into TPM + PCR 13 (if a TPM is present). + + Files with the .confext.raw suffix are packed into a + cpio archive placed in the /.extra/confext/ directory of the + initrd file hierarchy. This is intended to pass additional entry-specific configuration extension + images to the initrd. See + systemd-confext8 + for details on configuration extension images. The generated cpio archive is + measured into TPM PCR 12 (if a TPM is present). + + + When the booted kernel is a UKI, the systemd-stub UEFI stub embedded in it will + combine the companion resources injected here with any companion files it itself collects from the UKI's + .extra.d/ drop-in directory and from /loader/credentials/ and + /loader/extensions/, so that all sources are merged uniformly into + /.extra/ in the initrd. See + systemd-stub7 for + details. + + Example Type #1 entry making use of the extra key: + + title My OS +version 1.2.3 +machine-id 6a9857a393724b7a981ebb5b8495b9ea +uki /6a9857a393724b7a981ebb5b8495b9ea/1.2.3/img.efi +extra /6a9857a393724b7a981ebb5b8495b9ea/1.2.3/foo.cred +extra /6a9857a393724b7a981ebb5b8495b9ea/1.2.3/bar.sysext.raw +extra /6a9857a393724b7a981ebb5b8495b9ea/1.2.3/baz.confext.raw + + EFI Variables From 3f2189ca2544cce99c2aa7a35881007830dc221a Mon Sep 17 00:00:00 2001 From: Samuel Dainard Date: Tue, 28 Apr 2026 15:57:26 +0000 Subject: [PATCH 053/242] binfmt-util: handle ELOOP/EACCES from automount in read-only bind mounts When /proc is bind-mounted read-only (common in mock/Koji buildroots, containers, and other sandboxed environments), opening /proc/sys/fs/binfmt_misc returns ELOOP if it is an automount point that cannot be triggered in the read-only context. Currently binfmt_mounted_and_writable() only handles ENOENT, so ELOOP propagates as an error. This causes test-binfmt-util to fail with SIGABRT and disable_binfmt() to log a spurious warning at shutdown. Treat ELOOP and EACCES the same as ENOENT: binfmt_misc is not usably available, return false. Note: PR #37006 (merged April 2025) addressed ELOOP in the xstatfsat() path, but the open() call in binfmt_mounted_and_writable() remained unhandled. Fixes #38070 --- src/shared/binfmt-util.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/shared/binfmt-util.c b/src/shared/binfmt-util.c index d21fd10136fb4..0faca5966341c 100644 --- a/src/shared/binfmt-util.c +++ b/src/shared/binfmt-util.c @@ -18,6 +18,12 @@ int binfmt_mounted_and_writable(void) { fd = RET_NERRNO(open("/proc/sys/fs/binfmt_misc", O_CLOEXEC | O_DIRECTORY | O_PATH)); if (fd == -ENOENT) return false; + /* ELOOP happens when binfmt_misc is an automount point under a read-only bind mount of /proc — + * the kernel cannot trigger the automount and returns ELOOP instead. Common in mock/Koji buildroots. */ + if (fd == -ELOOP || ERRNO_IS_NEG_PRIVILEGE(fd)) { + log_debug_errno(fd, "Failed to open /proc/sys/fs/binfmt_misc, ignoring: %m"); + return false; + } if (fd < 0) return fd; From a69f0b8b28d06786581d21281665a89a4318c309 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 18:47:48 +0200 Subject: [PATCH 054/242] repart: hide read-only block devices from candidates If they are read-only they are no candidates, since we cannot write to them. --- src/repart/repart.c | 9 ++++++++- src/shared/blockdev-list.c | 21 ++++++++++++++------- src/shared/blockdev-list.h | 1 + 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/repart/repart.c b/src/repart/repart.c index 2d35da28c2b12..b82827f869ee3 100644 --- a/src/repart/repart.c +++ b/src/repart/repart.c @@ -9896,7 +9896,13 @@ static int parse_argv(int argc, char *argv[]) { OPTION_LONG("list-devices", NULL, "List candidate block devices to operate on"): - r = blockdev_list(BLOCKDEV_LIST_REQUIRE_PARTITION_SCANNING|BLOCKDEV_LIST_SHOW_SYMLINKS|BLOCKDEV_LIST_IGNORE_ZRAM, /* ret_devices= */ NULL, /* ret_n_devices= */ NULL); + r = blockdev_list( + BLOCKDEV_LIST_SHOW_SYMLINKS| + BLOCKDEV_LIST_REQUIRE_PARTITION_SCANNING| + BLOCKDEV_LIST_IGNORE_ZRAM| + BLOCKDEV_LIST_IGNORE_READ_ONLY, + /* ret_devices= */ NULL, + /* ret_n_devices= */ NULL); if (r < 0) return r; @@ -10876,6 +10882,7 @@ static int vl_method_list_candidate_devices( BLOCKDEV_LIST_REQUIRE_PARTITION_SCANNING| BLOCKDEV_LIST_IGNORE_ZRAM| BLOCKDEV_LIST_METADATA| + BLOCKDEV_LIST_IGNORE_READ_ONLY| (p.ignore_empty ? BLOCKDEV_LIST_IGNORE_EMPTY : 0)| (p.ignore_root ? BLOCKDEV_LIST_IGNORE_ROOT : 0), &l, diff --git a/src/shared/blockdev-list.c b/src/shared/blockdev-list.c index 0efc90fd54696..181afb42890f5 100644 --- a/src/shared/blockdev-list.c +++ b/src/shared/blockdev-list.c @@ -188,6 +188,20 @@ int blockdev_list(BlockDevListFlags flags, BlockDevice **ret_devices, size_t *re } } + int ro = -1; + if (FLAGS_SET(flags, BLOCKDEV_LIST_IGNORE_READ_ONLY) || FLAGS_SET(flags, BLOCKDEV_LIST_METADATA)) { + r = device_get_sysattr_bool(dev, "ro"); + if (r < 0) + log_device_debug_errno(dev, r, "Failed to acquire read-only flag of device '%s', ignoring: %m", node); + else + ro = r; + + if (ro > 0 && FLAGS_SET(flags, BLOCKDEV_LIST_IGNORE_READ_ONLY)) { + log_device_debug(dev, "Device '%s' is read-only, skipping.", node); + continue; + } + } + _cleanup_strv_free_ char **list = NULL; if (FLAGS_SET(flags, BLOCKDEV_LIST_SHOW_SYMLINKS)) { FOREACH_DEVICE_DEVLINK(dev, sl) @@ -198,17 +212,10 @@ int blockdev_list(BlockDevListFlags flags, BlockDevice **ret_devices, size_t *re } _cleanup_free_ char *model = NULL, *vendor = NULL, *subsystem = NULL; - int ro = -1; if (FLAGS_SET(flags, BLOCKDEV_LIST_METADATA)) { (void) blockdev_get_prop(dev, "ID_MODEL_FROM_DATABASE", "ID_MODEL", &model); (void) blockdev_get_prop(dev, "ID_VENDOR_FROM_DATABASE", "ID_VENDOR", &vendor); (void) blockdev_get_subsystem(dev, &subsystem); - - r = device_get_sysattr_bool(dev, "ro"); - if (r < 0) - log_device_debug_errno(dev, r, "Failed to acquire read-only flag of device '%s', ignoring: %m", node); - else - ro = r; } if (ret_devices) { diff --git a/src/shared/blockdev-list.h b/src/shared/blockdev-list.h index d82345435f7e2..67f8efba97187 100644 --- a/src/shared/blockdev-list.h +++ b/src/shared/blockdev-list.h @@ -11,6 +11,7 @@ typedef enum BlockDevListFlags { BLOCKDEV_LIST_IGNORE_ROOT = 1 << 4, /* Ignore the block device we are currently booted from */ BLOCKDEV_LIST_IGNORE_EMPTY = 1 << 5, /* Ignore disks of zero size (usually drives without a medium) */ BLOCKDEV_LIST_METADATA = 1 << 6, /* Fill in model, vendor, subsystem, read_only */ + BLOCKDEV_LIST_IGNORE_READ_ONLY = 1 << 7, /* Ignore read-only block devices */ } BlockDevListFlags; typedef struct BlockDevice { From cd7aceeaa31e3890d02e773fb1e68769abdf5809 Mon Sep 17 00:00:00 2001 From: Dan Anderson Date: Wed, 29 Apr 2026 22:53:10 -0400 Subject: [PATCH 055/242] Improve error logging for fstat failure Small hygiene fix. r must be >= 0 as per the prior statement (otherwise we would have returned). This is really only going to be r == 0, which means return r; is return 0; I'm updating this to use log_debug_errno --- src/mountfsd/mountwork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mountfsd/mountwork.c b/src/mountfsd/mountwork.c index 54a5203da2cc6..9f469d6061fde 100644 --- a/src/mountfsd/mountwork.c +++ b/src/mountfsd/mountwork.c @@ -1362,7 +1362,7 @@ static int vl_method_make_directory( struct stat parent_stat; if (fstat(parent_fd, &parent_stat) < 0) - return r; + return log_debug_errno(errno, "Failed to fstat parent directory fd: %m"); r = stat_verify_directory(&parent_stat); if (r < 0) From 9d2f5b4611a47b9e5a31296cea70c2d8c6c86bbb Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 27 Apr 2026 18:03:51 +0000 Subject: [PATCH 056/242] fundamental/cleanup: add CLEANUP_ELEMENTS() and DEFINE_POINTER_ARRAY_CLEAR_FUNC() DEFINE_POINTER_ARRAY_CLEAR_FUNC() generates a helper of the form helper_array_clear(T *array, size_t n) that drops each element but does not free the array itself, parallel to DEFINE_POINTER_ARRAY_FREE_FUNC() for cases where the array has automatic storage duration. CLEANUP_ELEMENTS() pairs with these helpers to provide a _cleanup_-like attribute for fixed-size arrays: the bound is taken from ELEMENTSOF(), and the helper is invoked across the elements at scope exit. Compared to CLEANUP_ARRAY(), the storage is neither freed nor zeroed. Migrate various logic across the tree over to the new macros. sd-device: use DEFINE_POINTER_ARRAY_CLEAR_FUNC() for sd_device_unref_array_clear() Replace the local device_unref_many() helper with the macro-generated equivalent. format-table: switch help-table arrays to CLEANUP_ELEMENTS() Generate table_unref_array_clear() via DEFINE_POINTER_ARRAY_CLEAR_FUNC() and convert the help-table arrays in bootctl, cryptenroll, nspawn, repart and vmspawn to CLEANUP_ELEMENTS(). The arrays no longer need a trailing NULL slot, so the size matches ELEMENTSOF() of the groups array. firewall-util: switch netlink message arrays to CLEANUP_ELEMENTS() Generate sd_netlink_message_unref_array_clear() via DEFINE_POINTER_ARRAY_CLEAR_FUNC() in place of the NULL-terminated sd_netlink_message_unref_many(), and convert the two stack arrays of sd_netlink_message pointers to CLEANUP_ELEMENTS(). --- src/bootctl/bootctl.c | 6 ++- src/cryptenroll/cryptenroll.c | 3 +- src/fundamental/cleanup-fundamental.h | 39 ++++++++++++++++++++ src/libsystemd/sd-device/device-enumerator.c | 13 ++----- src/nspawn/nspawn.c | 3 +- src/repart/repart.c | 3 +- src/run/run.c | 3 +- src/shared/firewall-util.c | 8 ++-- src/shared/format-table.h | 2 +- src/vmspawn/vmspawn.c | 3 +- 10 files changed, 63 insertions(+), 20 deletions(-) diff --git a/src/bootctl/bootctl.c b/src/bootctl/bootctl.c index 04213dc8e17aa..967c21458d9ee 100644 --- a/src/bootctl/bootctl.c +++ b/src/bootctl/bootctl.c @@ -298,8 +298,10 @@ static int help(void) { "Options", }; - _cleanup_(table_unref_many) Table *verb_tables[ELEMENTSOF(verb_groups) + 1] = {}; - _cleanup_(table_unref_many) Table *option_tables[ELEMENTSOF(option_groups) + 1] = {}; + Table *verb_tables[ELEMENTSOF(verb_groups)] = {}; + CLEANUP_ELEMENTS(verb_tables, table_unref_array_clear); + Table *option_tables[ELEMENTSOF(option_groups)] = {}; + CLEANUP_ELEMENTS(option_tables, table_unref_array_clear); for (size_t i = 0; i < ELEMENTSOF(verb_groups); i++) { r = verbs_get_help_table_group(verb_groups[i], &verb_tables[i]); diff --git a/src/cryptenroll/cryptenroll.c b/src/cryptenroll/cryptenroll.c index f7e7ff121804a..6561d86107843 100644 --- a/src/cryptenroll/cryptenroll.c +++ b/src/cryptenroll/cryptenroll.c @@ -241,7 +241,8 @@ static int help(void) { "TPM2 Enrollment", }; - _cleanup_(table_unref_many) Table *tables[ELEMENTSOF(groups) + 1] = {}; + Table *tables[ELEMENTSOF(groups)] = {}; + CLEANUP_ELEMENTS(tables, table_unref_array_clear); for (size_t i = 0; i < ELEMENTSOF(groups); i++) { r = option_parser_get_help_table_group(groups[i], &tables[i]); diff --git a/src/fundamental/cleanup-fundamental.h b/src/fundamental/cleanup-fundamental.h index 9094cff2331e0..8d499e5c3498b 100644 --- a/src/fundamental/cleanup-fundamental.h +++ b/src/fundamental/cleanup-fundamental.h @@ -64,6 +64,15 @@ free(array); \ } +/* Like DEFINE_POINTER_ARRAY_FREE_FUNC() but does not deallocate the array itself, useful for + * arrays with automatic storage duration (e.g. on the stack). */ +#define DEFINE_POINTER_ARRAY_CLEAR_FUNC(type, helper) \ + void helper ## _array_clear(type *array, size_t n) { \ + assert(array || n == 0); \ + FOREACH_ARRAY(item, array, n) \ + *item = helper(*item); \ + } + /* Clean up an array of objects of known size by dropping all the items in it. * Then free the array itself. */ #define DEFINE_ARRAY_FREE_FUNC(name, type, helper) \ @@ -108,3 +117,33 @@ static inline void array_cleanup(const ArrayCleanup *c) { _f; \ }), \ } + +/* An automatic _cleanup_-like logic for fixed-size arrays where the bound is known via + * ELEMENTSOF(). Unlike CLEANUP_ARRAY() this neither frees the storage nor zeroes it: it just + * invokes func() across the elements when leaving scope. */ +typedef struct ElementsCleanup { + void *array; + size_t n; + free_array_func_t pfunc; +} ElementsCleanup; + +static inline void elements_cleanup(const ElementsCleanup *c) { + assert(c); + + if (c->n == 0) + return; + + assert(c->array); + assert(c->pfunc); + c->pfunc(c->array, c->n); +} + +#define CLEANUP_ELEMENTS(_array, _func) \ + _cleanup_(elements_cleanup) _unused_ const ElementsCleanup CONCATENATE(_cleanup_elements_, UNIQ) = { \ + .array = (_array), \ + .n = ELEMENTSOF(_array), \ + .pfunc = (free_array_func_t) ({ \ + void (*_f)(typeof((_array)[0]) *a, size_t b) = _func; \ + _f; \ + }), \ + } diff --git a/src/libsystemd/sd-device/device-enumerator.c b/src/libsystemd/sd-device/device-enumerator.c index b3fe85a976167..d1a48defe906c 100644 --- a/src/libsystemd/sd-device/device-enumerator.c +++ b/src/libsystemd/sd-device/device-enumerator.c @@ -82,18 +82,13 @@ _public_ int sd_device_enumerator_new(sd_device_enumerator **ret) { return 0; } -static void device_unref_many(sd_device **devices, size_t n) { - assert(devices || n == 0); - - for (size_t i = 0; i < n; i++) - sd_device_unref(devices[i]); -} +static DEFINE_POINTER_ARRAY_CLEAR_FUNC(sd_device*, sd_device_unref); static void device_enumerator_unref_devices(sd_device_enumerator *enumerator) { assert(enumerator); hashmap_clear(enumerator->devices_by_syspath); - device_unref_many(enumerator->devices, enumerator->n_devices); + sd_device_unref_array_clear(enumerator->devices, enumerator->n_devices); enumerator->devices = mfree(enumerator->devices); enumerator->n_devices = 0; } @@ -461,7 +456,7 @@ static int enumerator_sort_devices(sd_device_enumerator *enumerator) { typesafe_qsort(devices + n_sorted, n - n_sorted, device_compare); - device_unref_many(enumerator->devices, enumerator->n_devices); + sd_device_unref_array_clear(enumerator->devices, enumerator->n_devices); enumerator->n_devices = n; free_and_replace(enumerator->devices, devices); @@ -470,7 +465,7 @@ static int enumerator_sort_devices(sd_device_enumerator *enumerator) { return 0; failed: - device_unref_many(devices, n); + sd_device_unref_array_clear(devices, n); free(devices); return r; } diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 6c9c1050c6921..f96a6b08b981c 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -405,7 +405,8 @@ static int help(void) { "Other", }; - _cleanup_(table_unref_many) Table* tables[ELEMENTSOF(groups) + 1] = {}; + Table* tables[ELEMENTSOF(groups)] = {}; + CLEANUP_ELEMENTS(tables, table_unref_array_clear); for (size_t i = 0; i < ELEMENTSOF(groups); i++) { r = option_parser_get_help_table_group(groups[i], &tables[i]); diff --git a/src/repart/repart.c b/src/repart/repart.c index b82827f869ee3..26588c6242b4d 100644 --- a/src/repart/repart.c +++ b/src/repart/repart.c @@ -9648,7 +9648,8 @@ static int help(void) { "El Torito boot catalog", }; - _cleanup_(table_unref_many) Table *option_tables[ELEMENTSOF(option_groups) + 1] = {}; + Table *option_tables[ELEMENTSOF(option_groups)] = {}; + CLEANUP_ELEMENTS(option_tables, table_unref_array_clear); for (size_t i = 0; i < ELEMENTSOF(option_groups); i++) { r = option_parser_get_help_table_group(option_groups[i], &option_tables[i]); diff --git a/src/run/run.c b/src/run/run.c index 5827d91e1f9e4..9d1042e845a33 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -151,7 +151,8 @@ static int help(void) { "Timer options", }; - _cleanup_(table_unref_many) Table *tables[ELEMENTSOF(groups) + 1] = {}; + Table *tables[ELEMENTSOF(groups)] = {}; + CLEANUP_ELEMENTS(tables, table_unref_array_clear); for (size_t i = 0; i < ELEMENTSOF(groups); i++) { r = option_parser_get_help_table_full("systemd-run", groups[i], &tables[i]); diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c index 651870e369889..4693972ff2752 100644 --- a/src/shared/firewall-util.c +++ b/src/shared/firewall-util.c @@ -50,7 +50,7 @@ static const char* dnat_map_name(void) { return cached; } -static DEFINE_ARRAY_DONE_FUNC(sd_netlink_message*, sd_netlink_message_unref); +static DEFINE_POINTER_ARRAY_CLEAR_FUNC(sd_netlink_message*, sd_netlink_message_unref); static int nfnl_open_expr_container(sd_netlink_message *m, const char *name) { int r; @@ -724,7 +724,8 @@ static uint32_t concat_types2(enum nft_key_types a, enum nft_key_types b) { } static int fw_nftables_init_family(sd_netlink *nfnl, int family) { - _cleanup_(sd_netlink_message_unref_many) sd_netlink_message *messages[10] = {}; + sd_netlink_message *messages[10] = {}; + CLEANUP_ELEMENTS(messages, sd_netlink_message_unref_array_clear); size_t msgcnt = 0, ip_type_size; uint32_t set_id = 0; int ip_type, r; @@ -1045,7 +1046,8 @@ static int fw_nftables_add_local_dnat_internal( uint16_t remote_port, const union in_addr_union *previous_remote) { - _cleanup_(sd_netlink_message_unref_many) sd_netlink_message *messages[3] = {}; + sd_netlink_message *messages[3] = {}; + CLEANUP_ELEMENTS(messages, sd_netlink_message_unref_array_clear); uint32_t data[5], key[2], dlen; size_t msgcnt = 0; int r; diff --git a/src/shared/format-table.h b/src/shared/format-table.h index 5b98d49017524..ba4d33cfc6719 100644 --- a/src/shared/format-table.h +++ b/src/shared/format-table.h @@ -101,7 +101,7 @@ Table* table_new_vertical(void); Table* table_unref(Table *t); DEFINE_TRIVIAL_CLEANUP_FUNC(Table*, table_unref); -static inline DEFINE_ARRAY_DONE_FUNC(Table*, table_unref); +static inline DEFINE_POINTER_ARRAY_CLEAR_FUNC(Table*, table_unref); int table_add_cell_full(Table *t, TableCell **ret_cell, TableDataType dt, const void *data, size_t minimum_width, size_t maximum_width, unsigned weight, unsigned align_percent, unsigned ellipsize_percent); static inline int table_add_cell(Table *t, TableCell **ret_cell, TableDataType dt, const void *data) { diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 14df0fc989f65..8e4cbf3e80611 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -235,7 +235,8 @@ static int help(void) { "Credentials", }; - _cleanup_(table_unref_many) Table* tables[ELEMENTSOF(groups) + 1] = {}; + Table* tables[ELEMENTSOF(groups)] = {}; + CLEANUP_ELEMENTS(tables, table_unref_array_clear); for (size_t i = 0; i < ELEMENTSOF(groups); i++) { r = option_parser_get_help_table_group(groups[i], &tables[i]); From 1c534452e961d4663f96b046f6242f00394f8e75 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Wed, 29 Apr 2026 19:18:17 +0200 Subject: [PATCH 057/242] dns-question: limit the number of questions per query Let's cap the number of question each query can have to something reasonable - 128 questions per query should be more than enough for any real-world scenario. --- src/shared/dns-question.c | 3 +++ src/shared/dns-question.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/shared/dns-question.c b/src/shared/dns-question.c index ac4cc8e998007..28840d64b948a 100644 --- a/src/shared/dns-question.c +++ b/src/shared/dns-question.c @@ -608,6 +608,9 @@ int dns_json_dispatch_question(const char *name, sd_json_variant *variant, sd_js if (!sd_json_variant_is_array(variant)) return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name)); + if (sd_json_variant_elements(variant) > DNS_QUESTION_ITEMS_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(E2BIG), "Too many questions in a single query."); + _cleanup_(dns_question_unrefp) DnsQuestion *nq = NULL; nq = dns_question_new(sd_json_variant_elements(variant)); if (!nq) diff --git a/src/shared/dns-question.h b/src/shared/dns-question.h index 4b0fc68fd648c..85de7ad06d8d7 100644 --- a/src/shared/dns-question.h +++ b/src/shared/dns-question.h @@ -5,6 +5,8 @@ #include "shared-forward.h" +#define DNS_QUESTION_ITEMS_MAX 128U + /* A simple array of resource keys */ typedef enum DnsQuestionFlags { From 7671b43cb88532cce2aa9ad12f777922206d6a42 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Wed, 29 Apr 2026 16:50:57 +0200 Subject: [PATCH 058/242] sd-json: limit the number of env variables to something reasonable Let's start with 1024, as that should be plenty for all sane use cases. --- src/libsystemd/sd-json/json-util.c | 3 +++ src/libsystemd/sd-json/json-util.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/libsystemd/sd-json/json-util.c b/src/libsystemd/sd-json/json-util.c index 27306409fe7c4..40102a69989ed 100644 --- a/src/libsystemd/sd-json/json-util.c +++ b/src/libsystemd/sd-json/json-util.c @@ -653,6 +653,9 @@ int json_dispatch_strv_environment(const char *name, sd_json_variant *variant, s if (!sd_json_variant_is_array(variant)) return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name)); + if (sd_json_variant_elements(variant) > ENVIRONMENT_ASSIGNMENTS_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(E2BIG), "Too many environment variable assignments."); + sd_json_variant *i; JSON_VARIANT_ARRAY_FOREACH(i, variant) { const char *e; diff --git a/src/libsystemd/sd-json/json-util.h b/src/libsystemd/sd-json/json-util.h index cea2d368b43db..34d79d5238aaa 100644 --- a/src/libsystemd/sd-json/json-util.h +++ b/src/libsystemd/sd-json/json-util.h @@ -9,6 +9,8 @@ #include "sd-forward.h" #include "string-util.h" /* IWYU pragma: keep */ +#define ENVIRONMENT_ASSIGNMENTS_MAX 1024U + #define JSON_VARIANT_REPLACE(v, q) \ do { \ typeof(v)* _v = &(v); \ From e5687f689f20b051420fe0154ea4391af697ed7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 23:36:15 +0200 Subject: [PATCH 059/242] report: absorb "facts" into "metrics" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets rid of the duality in the cmdline interface and various APIs. The general plan is collect both "facts" and "metrics" in a single list. We have various producers which respond on the io.systemd.Facts endpoint. Those will need to be adjusted to respond to io.systemd.Metrics. Cmdline interface: 'metrics' (unchanged) 'describe-metrics' → 'describe' 'facts' → merged into 'metrics' 'describe-facts' → merged into 'describe' --- man/systemd-report.xml | 10 +- src/report/report-upload.c | 10 +- src/report/report.c | 225 +----------------- src/report/report.h | 7 +- .../fake-report-server.py | 4 +- test/units/TEST-74-AUX-UTILS.report.sh | 36 +-- 6 files changed, 27 insertions(+), 265 deletions(-) diff --git a/man/systemd-report.xml b/man/systemd-report.xml index b53a50c2f8681..f14600dfe5d32 100644 --- a/man/systemd-report.xml +++ b/man/systemd-report.xml @@ -18,7 +18,7 @@ systemd-report - Generate report of system facts and metrics + Generate report of system metrics @@ -33,7 +33,7 @@ Note: this command is experimental for now. While it is likely to become a regular component of systemd, it might still change in behaviour and interface. - systemd-report requests facts and metrics from the system and writes them to + systemd-report requests metrics from the system and writes them to standard output. @@ -56,14 +56,14 @@ - describe-metrics MATCH + describe MATCH Acquire a list of metric families from all local services providing them, and write them to standard output. This returns primarily static information about metrics, their data types and human readable description, without values. - Match expressions similar to those supported by metrics are supported for - describe-metrics, too. + Match expressions supported by metrics are supported by + describe too. diff --git a/src/report/report-upload.c b/src/report/report-upload.c index c64bf86e13336..486e815e8d857 100644 --- a/src/report/report-upload.c +++ b/src/report/report-upload.c @@ -59,18 +59,10 @@ static int build_json_report(Context *context, sd_json_variant **ret) { usec_t ts = now(CLOCK_REALTIME); int r; - const char *ident; - if (IN_SET(context->action, ACTION_LIST_METRICS, ACTION_DESCRIBE_METRICS)) - ident = "metrics"; - else if (IN_SET(context->action, ACTION_LIST_FACTS, ACTION_DESCRIBE_FACTS)) - ident = "facts"; - else - assert_not_reached(); - r = sd_json_buildo(ret, SD_JSON_BUILD_PAIR("timestamp", SD_JSON_BUILD_STRING(FORMAT_TIMESTAMP_STYLE(ts, TIMESTAMP_UTC))), - SD_JSON_BUILD_PAIR(ident, + SD_JSON_BUILD_PAIR("metrics", SD_JSON_BUILD_VARIANT_ARRAY(context->metrics, context->n_metrics))); if (r < 0) return log_error_errno(r, "Failed to build JSON data: %m"); diff --git a/src/report/report.c b/src/report/report.c index fef01c094ef9f..feedcaa43a5b8 100644 --- a/src/report/report.c +++ b/src/report/report.c @@ -28,8 +28,8 @@ #include "verbs.h" #include "web-util.h" -#define METRICS_OR_FACTS_MAX 4096U -#define METRICS_OR_FACTS_LINKS_MAX 128U +#define METRICS_MAX 4096U +#define METRICS_LINKS_MAX 128U #define TIMEOUT_USEC (30 * USEC_PER_SEC) /* 30 seconds */ static PagerFlags arg_pager_flags = 0; @@ -87,8 +87,6 @@ DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( static const char* const action_method_table[] = { [ACTION_LIST_METRICS] = "io.systemd.Metrics.List", [ACTION_DESCRIBE_METRICS] = "io.systemd.Metrics.Describe", - [ACTION_LIST_FACTS] = "io.systemd.Facts.List", - [ACTION_DESCRIBE_FACTS] = "io.systemd.Facts.Describe", }; DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(action_method, Action); @@ -249,7 +247,7 @@ static int on_query_reply( goto finish; } - if (context->n_metrics >= METRICS_OR_FACTS_MAX) { + if (context->n_metrics >= METRICS_MAX) { context->n_skipped_metrics++; goto finish; } @@ -436,107 +434,6 @@ static int output_collected_describe(Context *context, Table **ret) { return 0; } -static int facts_output_list(Context *context, Table **ret) { - int r; - - assert(context); - assert(ret); - - _cleanup_(table_unrefp) Table *table = table_new("family", "object", "value"); - if (!table) - return log_oom(); - - table_set_ersatz_string(table, TABLE_ERSATZ_DASH); - table_set_sort(table, (size_t) 0, (size_t) 1, (size_t) 2); - - FOREACH_ARRAY(m, context->metrics, context->n_metrics) { - struct { - const char *name; - const char *object; - sd_json_variant *value; - } d = {}; - - static const sd_json_dispatch_field dispatch_table[] = { - { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(d, name), SD_JSON_MANDATORY }, - { "object", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(d, object), 0 }, - { "value", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_variant_noref, voffsetof(d, value), SD_JSON_MANDATORY }, - {} - }; - - r = sd_json_dispatch(*m, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &d); - if (r < 0) { - _cleanup_free_ char *t = NULL; - int k = sd_json_variant_format(*m, /* flags= */ 0, &t); - if (k < 0) - return log_error_errno(k, "Failed to format JSON: %m"); - - log_warning_errno(r, "Cannot parse fact, skipping: %s", t); - continue; - } - - r = table_add_many( - table, - TABLE_STRING, d.name, - TABLE_STRING, d.object, - TABLE_JSON, d.value, - TABLE_SET_WEIGHT, 50U); - if (r < 0) - return table_log_add_error(r); - } - - *ret = TAKE_PTR(table); - return 0; -} - -static int facts_output_describe(Context *context, Table **ret) { - int r; - - assert(context); - assert(ret); - - _cleanup_(table_unrefp) Table *table = table_new("family", "description"); - if (!table) - return log_oom(); - - table_set_ersatz_string(table, TABLE_ERSATZ_DASH); - table_set_sort(table, (size_t) 0, (size_t) 1); - - FOREACH_ARRAY(m, context->metrics, context->n_metrics) { - struct { - const char *name; - const char *description; - } d = {}; - - static const sd_json_dispatch_field dispatch_table[] = { - { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(d, name), SD_JSON_MANDATORY }, - { "description", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(d, description), 0 }, - {} - }; - - r = sd_json_dispatch(*m, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &d); - if (r < 0) { - _cleanup_free_ char *t = NULL; - int k = sd_json_variant_format(*m, /* flags= */ 0, &t); - if (k < 0) - return log_error_errno(k, "Failed to format JSON: %m"); - - log_warning_errno(r, "Cannot parse fact description, skipping: %s", t); - continue; - } - - r = table_add_many( - table, - TABLE_STRING, d.name, - TABLE_STRING, d.description, - TABLE_SET_WEIGHT, 50U); - if (r < 0) - return table_log_add_error(r); - } - - *ret = TAKE_PTR(table); - return 0; -} - static int output_collected(Context *context) { int r; @@ -555,13 +452,8 @@ static int output_collected(Context *context) { return log_error_errno(r, "Failed to write JSON: %m"); } - if (context->n_metrics == 0 && arg_legend) { - if (IN_SET(context->action, ACTION_LIST_FACTS, ACTION_DESCRIBE_FACTS)) - log_info("No facts collected."); - else - log_info("No metrics collected."); - } - + if (context->n_metrics == 0 && arg_legend) + log_info("No metrics collected."); return 0; } @@ -577,14 +469,6 @@ static int output_collected(Context *context) { r = output_collected_describe(context, &table); break; - case ACTION_LIST_FACTS: - r = facts_output_list(context, &table); - break; - - case ACTION_DESCRIBE_FACTS: - r = facts_output_describe(context, &table); - break; - default: assert_not_reached(); } @@ -598,12 +482,10 @@ static int output_collected(Context *context) { } if (arg_legend && !sd_json_format_enabled(arg_json_format_flags)) { - const char *type = IN_SET(context->action, ACTION_LIST_FACTS, ACTION_DESCRIBE_FACTS) ? "facts" : "metrics"; - if (table_isempty(table)) - printf("No %s available.\n", type); + printf("No metrics available.\n"); else - printf("\n%zu %s listed.\n", table_get_rows(table) - 1, type); + printf("\n%zu metrics listed.\n", table_get_rows(table) - 1); } return 0; @@ -707,7 +589,7 @@ static int readdir_sources(char **ret_directory, DirectoryEntries **ret) { VERB_FULL(verb_metrics, "metrics", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_LIST_METRICS, "Acquire list of metrics and their values"); -VERB_FULL(verb_metrics, "describe-metrics", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_DESCRIBE_METRICS, +VERB_FULL(verb_metrics, "describe", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_DESCRIBE_METRICS, "Describe available metrics"); static int verb_metrics(int argc, char *argv[], uintptr_t data, void *userdata) { Action action = data; @@ -746,7 +628,7 @@ static int verb_metrics(int argc, char *argv[], uintptr_t data, void *userdata) FOREACH_ARRAY(i, de->entries, de->n_entries) { struct dirent *d = *i; - if (set_size(context.link_infos) >= METRICS_OR_FACTS_LINKS_MAX) { + if (set_size(context.link_infos) >= METRICS_LINKS_MAX) { n_skipped_sources++; break; } @@ -792,93 +674,6 @@ static int verb_metrics(int argc, char *argv[], uintptr_t data, void *userdata) return 0; } -VERB_FULL(verb_facts, "facts", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_LIST_FACTS, - "Acquire list of facts and their values"); -VERB_FULL(verb_facts, "describe-facts", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_DESCRIBE_FACTS, - "Describe available facts"); -static int verb_facts(int argc, char *argv[], uintptr_t data, void *userdata) { - Action action = data; - int r; - - assert(argc >= 1); - assert(argv); - assert(IN_SET(action, ACTION_LIST_FACTS, ACTION_DESCRIBE_FACTS)); - - /* Enable JSON-SEQ mode here, since we'll dump a large series of JSON objects */ - arg_json_format_flags |= SD_JSON_FORMAT_SEQ; - - r = parse_metrics_matches(argv + 1); - if (r < 0) - return r; - - _cleanup_(context_done) Context context = { - .action = action, - }; - size_t n_skipped_sources = 0; - - _cleanup_free_ DirectoryEntries *de = NULL; - _cleanup_free_ char *sources_path = NULL; - r = readdir_sources(&sources_path, &de); - if (r < 0) - return r; - if (r > 0) { - r = sd_event_default(&context.event); - if (r < 0) - return log_error_errno(r, "Failed to get event loop: %m"); - - r = sd_event_set_signal_exit(context.event, true); - if (r < 0) - return log_error_errno(r, "Failed to enable exit on SIGINT/SIGTERM: %m"); - - FOREACH_ARRAY(i, de->entries, de->n_entries) { - struct dirent *d = *i; - - if (set_size(context.link_infos) >= METRICS_OR_FACTS_LINKS_MAX) { - n_skipped_sources++; - break; - } - - _cleanup_free_ char *p = path_join(sources_path, d->d_name); - if (!p) - return log_oom(); - - (void) call_collect(&context, d->d_name, p); - } - } - - if (set_isempty(context.link_infos)) { - if (arg_legend) - log_info("No facts sources found."); - } else { - assert(context.event); - - r = sd_event_loop(context.event); - if (r < 0) - return log_error_errno(r, "Failed to run event loop: %m"); - - if (arg_url) - r = upload_collected(&context); - else - r = output_collected(&context); - if (r < 0) - return r; - } - - if (n_skipped_sources > 0) - return log_warning_errno(SYNTHETIC_ERRNO(EUCLEAN), - "Too many facts sources, only %u sources contacted, %zu sources skipped.", - set_size(context.link_infos), n_skipped_sources); - if (context.n_invalid_metrics > 0) - return log_warning_errno(SYNTHETIC_ERRNO(EUCLEAN), - "%zu facts are not valid.", - context.n_invalid_metrics); - if (context.n_skipped_metrics > 0) - return log_warning_errno(SYNTHETIC_ERRNO(EUCLEAN), - "Too many facts, only %zu facts collected, %zu facts skipped.", - context.n_metrics, context.n_skipped_metrics); - return 0; -} - VERB_NOARG(verb_list_sources, "list-sources", "Show list of known metrics sources"); static int verb_list_sources(int argc, char *argv[], uintptr_t _data, void *userdata) { int r; @@ -955,7 +750,7 @@ static int help(void) { (void) table_sync_column_widths(0, options, verbs); printf("%s [OPTIONS...] COMMAND ...\n" - "\n%sAcquire metrics and facts from local sources.%s\n" + "\n%sAcquire metrics from local sources.%s\n" "\n%sCommands:%s\n", program_invocation_short_name, ansi_highlight(), diff --git a/src/report/report.h b/src/report/report.h index 4d7b5bdd3f0bb..4adb20349514a 100644 --- a/src/report/report.h +++ b/src/report/report.h @@ -16,19 +16,16 @@ extern usec_t arg_network_timeout_usec; typedef enum Action { ACTION_LIST_METRICS, ACTION_DESCRIBE_METRICS, - ACTION_LIST_FACTS, - ACTION_DESCRIBE_FACTS, _ACTION_MAX, _ACTION_INVALID = -EINVAL, } Action; -/* The structure for collected "metrics" or "facts". The fields - * are prefixed with just "metrics" for brevity. */ +/* The structure for collected "metrics". */ typedef struct Context { Action action; sd_event *event; Set *link_infos; - sd_json_variant **metrics; /* Collected metrics or facts for sorting */ + sd_json_variant **metrics; /* Collected metrics for sorting */ size_t n_metrics, n_skipped_metrics, n_invalid_metrics; struct iovec_wrapper upload_answer; } Context; diff --git a/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-report-server.py b/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-report-server.py index 4875a00bada6a..6beb6383c204f 100755 --- a/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-report-server.py +++ b/test/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-report-server.py @@ -35,8 +35,8 @@ def do_POST(self): print(f"JSON: {s if len(s := str(data)) < 80 else s[:40] + '…' + s[-40:]}") - if "metrics" not in data and "facts" not in data: - self.send_error(400, "Missing 'metrics' or 'facts' field") + if "metrics" not in data: + self.send_error(400, "Missing 'metrics' field") return response = json.dumps({"status": "ok"}).encode() diff --git a/test/units/TEST-74-AUX-UTILS.report.sh b/test/units/TEST-74-AUX-UTILS.report.sh index 53b83c4dd9477..73678fcabf1f8 100755 --- a/test/units/TEST-74-AUX-UTILS.report.sh +++ b/test/units/TEST-74-AUX-UTILS.report.sh @@ -16,9 +16,9 @@ REPORT=/usr/lib/systemd/systemd-report "$REPORT" metrics "$REPORT" metrics -j "$REPORT" metrics --no-legend -"$REPORT" describe-metrics -"$REPORT" describe-metrics -j -"$REPORT" describe-metrics --no-legend +"$REPORT" describe +"$REPORT" describe -j +"$REPORT" describe --no-legend "$REPORT" list-sources "$REPORT" list-sources -j "$REPORT" list-sources --no-legend @@ -26,9 +26,9 @@ REPORT=/usr/lib/systemd/systemd-report "$REPORT" metrics io "$REPORT" metrics io.systemd piff "$REPORT" metrics piff -"$REPORT" describe-metrics io -"$REPORT" describe-metrics io.systemd piff -"$REPORT" describe-metrics piff +"$REPORT" describe io +"$REPORT" describe io.systemd piff +"$REPORT" describe piff # test io.systemd.CGroup Metrics systemctl start systemd-report-cgroup.socket @@ -46,26 +46,6 @@ varlinkctl --more call /run/systemd/report/io.systemd.Network io.systemd.Metrics # Make sure the service for "system facts" is enabled systemctl start systemd-report-basic.socket -# Test facts verbs -"$REPORT" facts -"$REPORT" facts -j -"$REPORT" facts --no-legend -"$REPORT" describe-facts -"$REPORT" describe-facts -j -"$REPORT" describe-facts --no-legend - -# Test facts with match filters -"$REPORT" facts io -"$REPORT" facts io.systemd piff -"$REPORT" facts piff -"$REPORT" describe-facts io -"$REPORT" describe-facts io.systemd piff -"$REPORT" describe-facts piff - -# Test facts via direct Varlink call on existing socket -varlinkctl --more call /run/systemd/report/io.systemd.Basic io.systemd.Facts.List {} -varlinkctl --more call /run/systemd/report/io.systemd.Basic io.systemd.Facts.Describe {} - # Test HTTP upload (plain http) FAKE_SERVER=/usr/lib/systemd/tests/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-report-server.py CERTDIR=$(mktemp -d) @@ -81,7 +61,6 @@ systemd-run -p Type=notify --unit=fake-report-server "$FAKE_SERVER" systemctl status fake-report-server "$REPORT" metrics --url=http://localhost:8089/ -"$REPORT" facts --url=http://localhost:8089/ # Test HTTPS upload with generated TLS certificates openssl req -x509 -newkey rsa:2048 -keyout "$CERTDIR/server.key" -out "$CERTDIR/server.crt" \ @@ -91,6 +70,5 @@ systemd-run -p Type=notify --unit=fake-report-server-tls \ "$FAKE_SERVER" --cert="$CERTDIR/server.crt" --key="$CERTDIR/server.key" --port=8090 systemctl status fake-report-server-tls -"$REPORT" metrics --url=https://localhost:8090/ --key=- --trust="$CERTDIR/server.crt" -"$REPORT" facts --url=https://localhost:8090/ --key=- --trust="$CERTDIR/server.crt" \ +"$REPORT" metrics --url=https://localhost:8090/ --key=- --trust="$CERTDIR/server.crt" \ --extra-header='Authorization: Bearer magic string' From 99ce7a0770bd3eb53ebde3e93c35d8c684eb1abb Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 30 Apr 2026 08:52:35 +0200 Subject: [PATCH 060/242] options: get rid of "on_error" parameter to FOREACH_OPTION I am really not a fan of full code lines passed to macros as parameters. Let's get rid of the 3rd parameter of FOREACH_OPTION() hence: 1. Let's return errors just as a regular value (though a negative one), that can be handled via a OPTION_ERROR case statement for the switch. This normalizes handling of the error, just like any other event returned by the option parser. 2. In order to avoid exploding the amount of boilerplate in each use (that just propagates the error on OPTION_ERROR), let's then introduce an explicit FOREACH_OPTION_OR_RETURN(), that returns from the calling function on its own (and makes that clear in the name). Together this cleans up, normalizes the logic and shortens the code. --- src/ac-power/ac-power.c | 2 +- src/ask-password/ask-password.c | 2 +- src/battery-check/battery-check.c | 2 +- src/binfmt/binfmt.c | 2 +- src/bless-boot/bless-boot.c | 2 +- src/bless-boot/boot-check-no-failures.c | 2 +- src/bootctl/bootctl.c | 2 +- src/cgls/cgls.c | 2 +- src/cgtop/cgtop.c | 2 +- src/core/executor.c | 2 +- src/creds/creds.c | 2 +- src/cryptenroll/cryptenroll.c | 2 +- src/cryptsetup/cryptsetup.c | 2 +- src/delta/delta.c | 2 +- src/detect-virt/detect-virt.c | 2 +- src/dissect/dissect.c | 2 +- src/escape/escape-tool.c | 2 +- src/factory-reset/factory-reset-tool.c | 2 +- src/firstboot/firstboot.c | 2 +- src/growfs/growfs.c | 2 +- src/hibernate-resume/hibernate-resume.c | 2 +- src/hostname/hostnamectl.c | 2 +- src/hwdb/hwdb.c | 2 +- src/id128/id128.c | 2 +- src/imds/imds-tool.c | 2 +- src/imds/imdsd.c | 2 +- src/import/export.c | 2 +- src/import/import-fs.c | 2 +- src/import/import.c | 2 +- src/import/importctl.c | 2 +- src/import/pull.c | 2 +- src/journal-remote/journal-gatewayd.c | 2 +- src/journal-remote/journal-remote-main.c | 2 +- src/journal/bsod.c | 2 +- src/journal/cat.c | 2 +- src/keyutil/keyutil.c | 2 +- src/libsystemd-network/test-ndisc-send.c | 2 +- src/libsystemd/sd-journal/test-journal-append.c | 2 +- src/libudev/test-libudev.c | 2 +- src/login/inhibit.c | 2 +- src/machine-id-setup/machine-id-setup-main.c | 2 +- src/measure/measure-tool.c | 2 +- src/modules-load/modules-load.c | 2 +- src/mute-console/mute-console.c | 2 +- src/network/generator/network-generator-main.c | 2 +- src/notify/notify.c | 2 +- src/nspawn/nspawn.c | 2 +- src/oom/oomctl.c | 2 +- src/oom/oomd.c | 2 +- src/path/path-tool.c | 2 +- src/pcrextend/pcrextend.c | 2 +- src/pcrlock/pcrlock.c | 2 +- src/ptyfwd/ptyfwd-tool.c | 2 +- src/random-seed/random-seed-tool.c | 2 +- src/repart/repart.c | 2 +- src/report/report-basic-server.c | 2 +- src/report/report.c | 2 +- src/run/run.c | 4 ++-- src/sbsign/sbsign.c | 2 +- src/shared/options.h | 14 ++++++++------ src/shutdown/shutdown.c | 2 +- src/sleep/sleep.c | 2 +- src/socket-activate/socket-activate.c | 2 +- src/socket-proxy/socket-proxyd.c | 2 +- src/ssh-generator/ssh-issue.c | 2 +- src/stdio-bridge/stdio-bridge.c | 2 +- src/storage/storage-block.c | 2 +- src/storage/storage-fs.c | 2 +- src/storage/storagectl.c | 2 +- src/storagetm/storagetm.c | 2 +- src/sysctl/sysctl.c | 2 +- src/sysupdate/sysupdate.c | 2 +- src/sysupdate/updatectl.c | 2 +- src/sysusers/sysusers.c | 2 +- src/test/test-chase-manual.c | 2 +- src/test/test-options.c | 5 ++++- src/timedate/timedatectl.c | 2 +- src/tmpfiles/test-offline-passwd.c | 2 +- src/tmpfiles/tmpfiles.c | 2 +- src/tpm2-setup/tpm2-clear.c | 2 +- src/tpm2-setup/tpm2-setup.c | 2 +- .../tty-ask-password-agent.c | 2 +- src/udev/ata_id/ata_id.c | 2 +- src/udev/cdrom_id/cdrom_id.c | 2 +- src/udev/dmi_memory_id/dmi_memory_id.c | 2 +- src/udev/fido_id/fido_id.c | 2 +- src/udev/iocost/iocost.c | 2 +- src/udev/mtd_probe/mtd_probe.c | 2 +- src/udev/scsi_id/scsi_id.c | 4 ++-- src/udev/udev-builtin-blkid.c | 2 +- src/udev/udev-builtin-hwdb.c | 2 +- src/udev/udev-config.c | 2 +- src/udev/udevadm-cat.c | 2 +- src/udev/udevadm-control.c | 2 +- src/udev/udevadm-hwdb.c | 2 +- src/udev/udevadm-info.c | 2 +- src/udev/udevadm-lock.c | 2 +- src/udev/udevadm-monitor.c | 2 +- src/udev/udevadm-settle.c | 2 +- src/udev/udevadm-test-builtin.c | 2 +- src/udev/udevadm-test.c | 2 +- src/udev/udevadm-trigger.c | 2 +- src/udev/udevadm-verify.c | 2 +- src/udev/udevadm-wait.c | 2 +- src/udev/udevadm.c | 2 +- src/udev/v4l_id/v4l_id.c | 2 +- src/update-done/update-done.c | 2 +- src/validatefs/validatefs.c | 2 +- src/varlinkctl/varlinkctl.c | 2 +- src/vmspawn/vmspawn.c | 2 +- src/vpick/vpick-tool.c | 2 +- 111 files changed, 123 insertions(+), 118 deletions(-) diff --git a/src/ac-power/ac-power.c b/src/ac-power/ac-power.c index 530ee82ff0665..2a9c517329321 100644 --- a/src/ac-power/ac-power.c +++ b/src/ac-power/ac-power.c @@ -43,7 +43,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/ask-password/ask-password.c b/src/ask-password/ask-password.c index 129fbf4d7e753..6a1abf5f999a1 100644 --- a/src/ask-password/ask-password.c +++ b/src/ask-password/ask-password.c @@ -75,7 +75,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/battery-check/battery-check.c b/src/battery-check/battery-check.c index 3e957d9fa71df..13dc8960f2efb 100644 --- a/src/battery-check/battery-check.c +++ b/src/battery-check/battery-check.c @@ -83,7 +83,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/binfmt/binfmt.c b/src/binfmt/binfmt.c index ed37fba276afb..4e24b35db744b 100644 --- a/src/binfmt/binfmt.c +++ b/src/binfmt/binfmt.c @@ -141,7 +141,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/bless-boot/bless-boot.c b/src/bless-boot/bless-boot.c index 8d2fe21a11f66..e0afb3611c278 100644 --- a/src/bless-boot/bless-boot.c +++ b/src/bless-boot/bless-boot.c @@ -81,7 +81,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/bless-boot/boot-check-no-failures.c b/src/bless-boot/boot-check-no-failures.c index 37b0f7fd6d2b2..9fa42a7ed6620 100644 --- a/src/bless-boot/boot-check-no-failures.c +++ b/src/bless-boot/boot-check-no-failures.c @@ -46,7 +46,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/bootctl/bootctl.c b/src/bootctl/bootctl.c index 967c21458d9ee..6869e838cfc4e 100644 --- a/src/bootctl/bootctl.c +++ b/src/bootctl/bootctl.c @@ -420,7 +420,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_GROUP("Block Device Discovery Commands"): {} diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c index 9ed57c35cdf4f..cdb47ba8bdc57 100644 --- a/src/cgls/cgls.c +++ b/src/cgls/cgls.c @@ -72,7 +72,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, OPTION_PARSER_RETURN_POSITIONAL_ARGS }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index d1d20992bd159..dfee990a0f831 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -722,7 +722,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/core/executor.c b/src/core/executor.c index 20bc65b63e6de..00761c6e3f7a6 100644 --- a/src/core/executor.c +++ b/src/core/executor.c @@ -64,7 +64,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/creds/creds.c b/src/creds/creds.c index e14a9a921cda7..95af91c120db7 100644 --- a/src/creds/creds.c +++ b/src/creds/creds.c @@ -826,7 +826,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/cryptenroll/cryptenroll.c b/src/cryptenroll/cryptenroll.c index 6561d86107843..bcc4b6cca66cd 100644 --- a/src/cryptenroll/cryptenroll.c +++ b/src/cryptenroll/cryptenroll.c @@ -279,7 +279,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c index 43f2873da262c..2130c54047c04 100644 --- a/src/cryptsetup/cryptsetup.c +++ b/src/cryptsetup/cryptsetup.c @@ -2507,7 +2507,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/delta/delta.c b/src/delta/delta.c index 92b77f9ddce5b..27dfc105ee7d6 100644 --- a/src/delta/delta.c +++ b/src/delta/delta.c @@ -520,7 +520,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/detect-virt/detect-virt.c b/src/detect-virt/detect-virt.c index f88528fccf992..be39634583f2c 100644 --- a/src/detect-virt/detect-virt.c +++ b/src/detect-virt/detect-virt.c @@ -55,7 +55,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index 338aed8391d97..280d1ada5fdbf 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -232,7 +232,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_NO_PAGER: diff --git a/src/escape/escape-tool.c b/src/escape/escape-tool.c index 98f0b9a0146a0..09e0338c348fd 100644 --- a/src/escape/escape-tool.c +++ b/src/escape/escape-tool.c @@ -58,7 +58,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/factory-reset/factory-reset-tool.c b/src/factory-reset/factory-reset-tool.c index 975c391fc8fae..e26e948e93416 100644 --- a/src/factory-reset/factory-reset-tool.c +++ b/src/factory-reset/factory-reset-tool.c @@ -72,7 +72,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c index 721fbba21e102..3d768b491f83a 100644 --- a/src/firstboot/firstboot.c +++ b/src/firstboot/firstboot.c @@ -1270,7 +1270,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/growfs/growfs.c b/src/growfs/growfs.c index efb94e3765053..30d371200d47f 100644 --- a/src/growfs/growfs.c +++ b/src/growfs/growfs.c @@ -162,7 +162,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/hibernate-resume/hibernate-resume.c b/src/hibernate-resume/hibernate-resume.c index 5f42097194e18..d2dccd59bda8c 100644 --- a/src/hibernate-resume/hibernate-resume.c +++ b/src/hibernate-resume/hibernate-resume.c @@ -57,7 +57,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/hostname/hostnamectl.c b/src/hostname/hostnamectl.c index 9ad2a0b4ec05e..2989840b364d7 100644 --- a/src/hostname/hostnamectl.c +++ b/src/hostname/hostnamectl.c @@ -767,7 +767,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/hwdb/hwdb.c b/src/hwdb/hwdb.c index 286ea000dbeec..5ad3bac3211ee 100644 --- a/src/hwdb/hwdb.c +++ b/src/hwdb/hwdb.c @@ -83,7 +83,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/id128/id128.c b/src/id128/id128.c index fbcacdbe4608b..ceac8a832e5c1 100644 --- a/src/id128/id128.c +++ b/src/id128/id128.c @@ -239,7 +239,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/imds/imds-tool.c b/src/imds/imds-tool.c index 7752e1f769cfe..06bc6c4487d73 100644 --- a/src/imds/imds-tool.c +++ b/src/imds/imds-tool.c @@ -84,7 +84,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/imds/imdsd.c b/src/imds/imdsd.c index c803b27829aba..a0c54ad84d7af 100644 --- a/src/imds/imdsd.c +++ b/src/imds/imdsd.c @@ -2251,7 +2251,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/import/export.c b/src/import/export.c index 5b233e71a5bfd..a77333643c6bc 100644 --- a/src/import/export.c +++ b/src/import/export.c @@ -240,7 +240,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/import/import-fs.c b/src/import/import-fs.c index 878d0c5b8f14a..513a2c62d3960 100644 --- a/src/import/import-fs.c +++ b/src/import/import-fs.c @@ -316,7 +316,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/import/import.c b/src/import/import.c index 43740aeac7d46..798b6b743a21c 100644 --- a/src/import/import.c +++ b/src/import/import.c @@ -319,7 +319,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/import/importctl.c b/src/import/importctl.c index 65fff5f5f3a43..d4a6483f36d7c 100644 --- a/src/import/importctl.c +++ b/src/import/importctl.c @@ -1115,7 +1115,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/import/pull.c b/src/import/pull.c index 0cc23dc6ed4b2..6a1f913ff8a5c 100644 --- a/src/import/pull.c +++ b/src/import/pull.c @@ -366,7 +366,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/journal-remote/journal-gatewayd.c b/src/journal-remote/journal-gatewayd.c index ffef7edea5ec8..e70fc4f6dbf37 100644 --- a/src/journal-remote/journal-gatewayd.c +++ b/src/journal-remote/journal-gatewayd.c @@ -1121,7 +1121,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/journal-remote/journal-remote-main.c b/src/journal-remote/journal-remote-main.c index 5709f87f74617..614ec61be907d 100644 --- a/src/journal-remote/journal-remote-main.c +++ b/src/journal-remote/journal-remote-main.c @@ -907,7 +907,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/journal/bsod.c b/src/journal/bsod.c index 1701605590209..e380e08b1c20c 100644 --- a/src/journal/bsod.c +++ b/src/journal/bsod.c @@ -250,7 +250,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/journal/cat.c b/src/journal/cat.c index f8b5e0df31727..b2b1689ff26d5 100644 --- a/src/journal/cat.c +++ b/src/journal/cat.c @@ -62,7 +62,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/keyutil/keyutil.c b/src/keyutil/keyutil.c index 474f42fec72ec..2a66fabb19542 100644 --- a/src/keyutil/keyutil.c +++ b/src/keyutil/keyutil.c @@ -90,7 +90,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/libsystemd-network/test-ndisc-send.c b/src/libsystemd-network/test-ndisc-send.c index de04198d370c0..87b8abefd58f0 100644 --- a/src/libsystemd-network/test-ndisc-send.c +++ b/src/libsystemd-network/test-ndisc-send.c @@ -80,7 +80,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_VERSION: diff --git a/src/libsystemd/sd-journal/test-journal-append.c b/src/libsystemd/sd-journal/test-journal-append.c index 75a1fce6fc98c..c71240660dcd7 100644 --- a/src/libsystemd/sd-journal/test-journal-append.c +++ b/src/libsystemd/sd-journal/test-journal-append.c @@ -153,7 +153,7 @@ int main(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: { diff --git a/src/libudev/test-libudev.c b/src/libudev/test-libudev.c index a653f0c6c8fdd..06feb1ffbc61a 100644 --- a/src/libudev/test-libudev.c +++ b/src/libudev/test-libudev.c @@ -430,7 +430,7 @@ static int parse_args(int argc, char *argv[], const char **syspath, const char * OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/login/inhibit.c b/src/login/inhibit.c index 4abfc1c6d3acd..78c784c30fad8 100644 --- a/src/login/inhibit.c +++ b/src/login/inhibit.c @@ -203,7 +203,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/machine-id-setup/machine-id-setup-main.c b/src/machine-id-setup/machine-id-setup-main.c index 9dd389dbaa4f9..2363427b5f54e 100644 --- a/src/machine-id-setup/machine-id-setup-main.c +++ b/src/machine-id-setup/machine-id-setup-main.c @@ -78,7 +78,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/measure/measure-tool.c b/src/measure/measure-tool.c index a92a418f61fce..eeb001f3fed4a 100644 --- a/src/measure/measure-tool.c +++ b/src/measure/measure-tool.c @@ -139,7 +139,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/modules-load/modules-load.c b/src/modules-load/modules-load.c index f6806d604ab55..0917f800a1a84 100644 --- a/src/modules-load/modules-load.c +++ b/src/modules-load/modules-load.c @@ -361,7 +361,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/mute-console/mute-console.c b/src/mute-console/mute-console.c index be6b5fac09166..d5788de09b3b9 100644 --- a/src/mute-console/mute-console.c +++ b/src/mute-console/mute-console.c @@ -63,7 +63,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/network/generator/network-generator-main.c b/src/network/generator/network-generator-main.c index 721d36b831945..df9ce9265dbbb 100644 --- a/src/network/generator/network-generator-main.c +++ b/src/network/generator/network-generator-main.c @@ -174,7 +174,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/notify/notify.c b/src/notify/notify.c index 00f915dc7bff1..6c50e4c57c394 100644 --- a/src/notify/notify.c +++ b/src/notify/notify.c @@ -155,7 +155,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index f96a6b08b981c..efe927f36e9b6 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -605,7 +605,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) { + FOREACH_OPTION_OR_RETURN(c, &opts) { switch (c) { OPTION_COMMON_HELP: diff --git a/src/oom/oomctl.c b/src/oom/oomctl.c index b73e2eb5abfe5..82ffe0e8379fd 100644 --- a/src/oom/oomctl.c +++ b/src/oom/oomctl.c @@ -93,7 +93,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/oom/oomd.c b/src/oom/oomd.c index 2250d7ec7f189..62eecfc065c65 100644 --- a/src/oom/oomd.c +++ b/src/oom/oomd.c @@ -53,7 +53,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/path/path-tool.c b/src/path/path-tool.c index 22544b9463854..29696501d03a0 100644 --- a/src/path/path-tool.c +++ b/src/path/path-tool.c @@ -206,7 +206,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/pcrextend/pcrextend.c b/src/pcrextend/pcrextend.c index 5b846b9d3a9dc..f452363209d66 100644 --- a/src/pcrextend/pcrextend.c +++ b/src/pcrextend/pcrextend.c @@ -84,7 +84,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/pcrlock/pcrlock.c b/src/pcrlock/pcrlock.c index 752f67cbdb990..09f49b2ed250e 100644 --- a/src/pcrlock/pcrlock.c +++ b/src/pcrlock/pcrlock.c @@ -5193,7 +5193,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { bool auto_location = true; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/ptyfwd/ptyfwd-tool.c b/src/ptyfwd/ptyfwd-tool.c index 6d98a8e7ef09e..e7b531c873088 100644 --- a/src/ptyfwd/ptyfwd-tool.c +++ b/src/ptyfwd/ptyfwd-tool.c @@ -65,7 +65,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/random-seed/random-seed-tool.c b/src/random-seed/random-seed-tool.c index 2eabcea176c2a..f573e84412ffb 100644 --- a/src/random-seed/random-seed-tool.c +++ b/src/random-seed/random-seed-tool.c @@ -352,7 +352,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/repart/repart.c b/src/repart/repart.c index 26588c6242b4d..ad19f0ab1ec7a 100644 --- a/src/repart/repart.c +++ b/src/repart/repart.c @@ -9689,7 +9689,7 @@ static int parse_argv(int argc, char *argv[]) { bool auto_public_key_pcr_mask = true, auto_pcrlock = true; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_GROUP("Options"): {} diff --git a/src/report/report-basic-server.c b/src/report/report-basic-server.c index 1e2eca31eae68..bca943fd7faee 100644 --- a/src/report/report-basic-server.c +++ b/src/report/report-basic-server.c @@ -59,7 +59,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/report/report.c b/src/report/report.c index fef01c094ef9f..390871e942863 100644 --- a/src/report/report.c +++ b/src/report/report.c @@ -987,7 +987,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/run/run.c b/src/run/run.c index 9d1042e845a33..46b8014e580c5 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -255,7 +255,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION, "systemd-run" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("systemd-run"): {} @@ -783,7 +783,7 @@ static int parse_argv_sudo_mode(int argc, char *argv[]) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION, "run0" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("run0"): {} diff --git a/src/sbsign/sbsign.c b/src/sbsign/sbsign.c index 7d866fde87555..f5a88b2849fe2 100644 --- a/src/sbsign/sbsign.c +++ b/src/sbsign/sbsign.c @@ -96,7 +96,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/shared/options.h b/src/shared/options.h index 5803eb120ef67..1f28dab8ad51f 100644 --- a/src/shared/options.h +++ b/src/shared/options.h @@ -216,13 +216,15 @@ int option_parse( const Option options_end[], OptionParser *state); -/* Iterate over options. */ -#define FOREACH_OPTION(c, state, on_error) \ +/* Iterate over options. Don't forget to handle errors (negative c)! */ +#define FOREACH_OPTION(c, state) \ + for (int c; (c = option_parse(ALIGN_PTR(__start_SYSTEMD_OPTIONS), __stop_SYSTEMD_OPTIONS, state)) != 0; ) + +#define FOREACH_OPTION_OR_RETURN(c, state) \ for (int c; (c = option_parse(ALIGN_PTR(__start_SYSTEMD_OPTIONS), __stop_SYSTEMD_OPTIONS, state)) != 0; ) \ - if (c < 0) { \ - on_error; \ - break; \ - } else + if (c < 0) \ + return c; \ + else /* Those helpers are used *during* option parsing and allow looking at or taking the next item in * the argv array, either an option or a positional parameter. */ diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c index c572138596d38..131550c46b20d 100644 --- a/src/shutdown/shutdown.c +++ b/src/shutdown/shutdown.c @@ -66,7 +66,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, OPTION_PARSER_RETURN_POSITIONAL_ARGS }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_LOG_LEVEL: diff --git a/src/sleep/sleep.c b/src/sleep/sleep.c index 3b2f9d698bb84..53f306a8faefc 100644 --- a/src/sleep/sleep.c +++ b/src/sleep/sleep.c @@ -731,7 +731,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/socket-activate/socket-activate.c b/src/socket-activate/socket-activate.c index 03cf327b6259e..768a2a3ea7235 100644 --- a/src/socket-activate/socket-activate.c +++ b/src/socket-activate/socket-activate.c @@ -358,7 +358,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/socket-proxy/socket-proxyd.c b/src/socket-proxy/socket-proxyd.c index ea68009b35802..77dc903535633 100644 --- a/src/socket-proxy/socket-proxyd.c +++ b/src/socket-proxy/socket-proxyd.c @@ -423,7 +423,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/ssh-generator/ssh-issue.c b/src/ssh-generator/ssh-issue.c index ee128b5e1811c..2028d3f942393 100644 --- a/src/ssh-generator/ssh-issue.c +++ b/src/ssh-generator/ssh-issue.c @@ -163,7 +163,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { const char *verb = NULL; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/stdio-bridge/stdio-bridge.c b/src/stdio-bridge/stdio-bridge.c index 4be5205d59894..01686d2cd6c0b 100644 --- a/src/stdio-bridge/stdio-bridge.c +++ b/src/stdio-bridge/stdio-bridge.c @@ -49,7 +49,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/storage/storage-block.c b/src/storage/storage-block.c index 4c21795c360ad..e5454a29c28a0 100644 --- a/src/storage/storage-block.c +++ b/src/storage/storage-block.c @@ -408,7 +408,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argv); OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/storage/storage-fs.c b/src/storage/storage-fs.c index 47ec9829c494d..c01e91a4cefe6 100644 --- a/src/storage/storage-fs.c +++ b/src/storage/storage-fs.c @@ -768,7 +768,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argv); OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/storage/storagectl.c b/src/storage/storagectl.c index a21072e78f204..2bc7b7c2a3e40 100644 --- a/src/storage/storagectl.c +++ b/src/storage/storagectl.c @@ -406,7 +406,7 @@ static int parse_argv(int argc, char *argv[], char ***args) { assert(argv); OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/storagetm/storagetm.c b/src/storagetm/storagetm.c index 5129887d795fe..384e88f7b88bc 100644 --- a/src/storagetm/storagetm.c +++ b/src/storagetm/storagetm.c @@ -80,7 +80,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index 6a9e33e6e6f7b..e124b56fc9c12 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -362,7 +362,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/sysupdate/sysupdate.c b/src/sysupdate/sysupdate.c index 648dd093e6160..ff8829115148e 100644 --- a/src/sysupdate/sysupdate.c +++ b/src/sysupdate/sysupdate.c @@ -1865,7 +1865,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/sysupdate/updatectl.c b/src/sysupdate/updatectl.c index 65d2c7675ed45..f09c18cd173bb 100644 --- a/src/sysupdate/updatectl.c +++ b/src/sysupdate/updatectl.c @@ -1689,7 +1689,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_LONG("reboot", NULL, "Reboot after updating to newer version"): diff --git a/src/sysusers/sysusers.c b/src/sysusers/sysusers.c index 38fe4f4515161..05a3e2db509e4 100644 --- a/src/sysusers/sysusers.c +++ b/src/sysusers/sysusers.c @@ -2103,7 +2103,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_CAT_CONFIG: diff --git a/src/test/test-chase-manual.c b/src/test/test-chase-manual.c index daa8713f48009..410522ceb161f 100644 --- a/src/test/test-chase-manual.c +++ b/src/test/test-chase-manual.c @@ -39,7 +39,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/test/test-options.c b/src/test/test-options.c index efa3a73d69edd..d00262fa34bb2 100644 --- a/src/test/test-options.c +++ b/src/test/test-options.c @@ -842,7 +842,10 @@ static void test_macros_parse_one( OptionParser opts = { argc, argv, mode, namespace }; - FOREACH_OPTION(c, &opts, assert_not_reached()) { + FOREACH_OPTION(c, &opts) { + + assert(c >= 0); + log_debug("%c %s: %s=%s", opts.opt->short_code != 0 ? opts.opt->short_code : ' ', opts.opt->long_code ?: "", diff --git a/src/timedate/timedatectl.c b/src/timedate/timedatectl.c index 211f7d7a6d280..c35b090035eac 100644 --- a/src/timedate/timedatectl.c +++ b/src/timedate/timedatectl.c @@ -949,7 +949,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/tmpfiles/test-offline-passwd.c b/src/tmpfiles/test-offline-passwd.c index 21b2697ceeab6..f357ef8865d8d 100644 --- a/src/tmpfiles/test-offline-passwd.c +++ b/src/tmpfiles/test-offline-passwd.c @@ -45,7 +45,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION('r', "root", "PATH", "Operate on an alternate filesystem root"): diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c index 0cc06ca8ec0aa..44843f3ca77ec 100644 --- a/src/tmpfiles/tmpfiles.c +++ b/src/tmpfiles/tmpfiles.c @@ -4189,7 +4189,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_LONG("create", NULL, "Create and adjust files and directories"): diff --git a/src/tpm2-setup/tpm2-clear.c b/src/tpm2-setup/tpm2-clear.c index b65905c03dbcd..19186ecc02fd8 100644 --- a/src/tpm2-setup/tpm2-clear.c +++ b/src/tpm2-setup/tpm2-clear.c @@ -52,7 +52,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/tpm2-setup/tpm2-setup.c b/src/tpm2-setup/tpm2-setup.c index b8e585225be2a..bb08e31a81c87 100644 --- a/src/tpm2-setup/tpm2-setup.c +++ b/src/tpm2-setup/tpm2-setup.c @@ -78,7 +78,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/tty-ask-password-agent/tty-ask-password-agent.c b/src/tty-ask-password-agent/tty-ask-password-agent.c index cd49503156db7..d675e4269ac16 100644 --- a/src/tty-ask-password-agent/tty-ask-password-agent.c +++ b/src/tty-ask-password-agent/tty-ask-password-agent.c @@ -475,7 +475,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/udev/ata_id/ata_id.c b/src/udev/ata_id/ata_id.c index ea28ad027d313..c2fabdcdb844b 100644 --- a/src/udev/ata_id/ata_id.c +++ b/src/udev/ata_id/ata_id.c @@ -379,7 +379,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/udev/cdrom_id/cdrom_id.c b/src/udev/cdrom_id/cdrom_id.c index b78096bde6362..27423e985155e 100644 --- a/src/udev/cdrom_id/cdrom_id.c +++ b/src/udev/cdrom_id/cdrom_id.c @@ -920,7 +920,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/udev/dmi_memory_id/dmi_memory_id.c b/src/udev/dmi_memory_id/dmi_memory_id.c index 269ea15252101..a1708c128c928 100644 --- a/src/udev/dmi_memory_id/dmi_memory_id.c +++ b/src/udev/dmi_memory_id/dmi_memory_id.c @@ -664,7 +664,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/udev/fido_id/fido_id.c b/src/udev/fido_id/fido_id.c index 6b31f49a48076..a19c7eebec6e7 100644 --- a/src/udev/fido_id/fido_id.c +++ b/src/udev/fido_id/fido_id.c @@ -49,7 +49,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/udev/iocost/iocost.c b/src/udev/iocost/iocost.c index 3b926fa4a24f2..eadab1cb8a091 100644 --- a/src/udev/iocost/iocost.c +++ b/src/udev/iocost/iocost.c @@ -86,7 +86,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/udev/mtd_probe/mtd_probe.c b/src/udev/mtd_probe/mtd_probe.c index 3e5f162343dbb..fe9924f1b6e28 100644 --- a/src/udev/mtd_probe/mtd_probe.c +++ b/src/udev/mtd_probe/mtd_probe.c @@ -56,7 +56,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/udev/scsi_id/scsi_id.c b/src/udev/scsi_id/scsi_id.c index f272648c420c9..d7970722848c8 100644 --- a/src/udev/scsi_id/scsi_id.c +++ b/src/udev/scsi_id/scsi_id.c @@ -228,7 +228,7 @@ static int set_options(int argc, char **argv, char *maj_min_dev) { OptionParser opts = { argc, argv }; int r; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: @@ -319,7 +319,7 @@ static int per_dev_options(struct scsi_id_device *dev_scsi, int *good_bad, enum /* We reuse the option parser, but only a subset of the options is supported here. * If any others are encountered, return an error. */ - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) if (opts.opt->short_code == 'b') *good_bad = 0; else if (opts.opt->short_code == 'g') diff --git a/src/udev/udev-builtin-blkid.c b/src/udev/udev-builtin-blkid.c index 16eaced0dcf46..4cd22a889fcf7 100644 --- a/src/udev/udev-builtin-blkid.c +++ b/src/udev/udev-builtin-blkid.c @@ -517,7 +517,7 @@ static int builtin_blkid(UdevEvent *event, int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udev-builtin-blkid" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udev-builtin-blkid"): {} diff --git a/src/udev/udev-builtin-hwdb.c b/src/udev/udev-builtin-hwdb.c index 4817c3af24e20..dececd9c0377c 100644 --- a/src/udev/udev-builtin-hwdb.c +++ b/src/udev/udev-builtin-hwdb.c @@ -138,7 +138,7 @@ static int builtin_hwdb(UdevEvent *event, int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udev-builtin-hwdb" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udev-builtin-hwdb"): {} diff --git a/src/udev/udev-config.c b/src/udev/udev-config.c index e234d6fe6d994..541ba16dd906b 100644 --- a/src/udev/udev-config.c +++ b/src/udev/udev-config.c @@ -179,7 +179,7 @@ static int parse_argv(int argc, char *argv[], UdevConfig *config) { OptionParser opts = { argc, argv, OPTION_PARSER_NORMAL, "udevd" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevd"): {} diff --git a/src/udev/udevadm-cat.c b/src/udev/udevadm-cat.c index 48ca72041627f..62d30d0234d24 100644 --- a/src/udev/udevadm-cat.c +++ b/src/udev/udevadm-cat.c @@ -46,7 +46,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv, .namespace = "udevadm-cat" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-cat"): {} diff --git a/src/udev/udevadm-control.c b/src/udev/udevadm-control.c index ed586d5542d1f..a6ffe83cecaf6 100644 --- a/src/udev/udevadm-control.c +++ b/src/udev/udevadm-control.c @@ -74,7 +74,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-control" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-control"): {} diff --git a/src/udev/udevadm-hwdb.c b/src/udev/udevadm-hwdb.c index f4060673ebfe7..b029db2262a04 100644 --- a/src/udev/udevadm-hwdb.c +++ b/src/udev/udevadm-hwdb.c @@ -42,7 +42,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-hwdb" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-hwdb"): {} diff --git a/src/udev/udevadm-info.c b/src/udev/udevadm-info.c index 3795856592c1d..a5cbedc8deeda 100644 --- a/src/udev/udevadm-info.c +++ b/src/udev/udevadm-info.c @@ -985,7 +985,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-info" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-info"): {} diff --git a/src/udev/udevadm-lock.c b/src/udev/udevadm-lock.c index cebce08007eb0..c1c3211d34992 100644 --- a/src/udev/udevadm-lock.c +++ b/src/udev/udevadm-lock.c @@ -62,7 +62,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION, "udevadm-lock" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-lock"): {} diff --git a/src/udev/udevadm-monitor.c b/src/udev/udevadm-monitor.c index 0c165241a2e3d..c7d1f40fc49b6 100644 --- a/src/udev/udevadm-monitor.c +++ b/src/udev/udevadm-monitor.c @@ -126,7 +126,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-monitor" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-monitor"): {} diff --git a/src/udev/udevadm-settle.c b/src/udev/udevadm-settle.c index 1292462d28c25..211a8ff1fbf8c 100644 --- a/src/udev/udevadm-settle.c +++ b/src/udev/udevadm-settle.c @@ -56,7 +56,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-settle" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-settle"): {} diff --git a/src/udev/udevadm-test-builtin.c b/src/udev/udevadm-test-builtin.c index 31ac569957017..9c0082800f37a 100644 --- a/src/udev/udevadm-test-builtin.c +++ b/src/udev/udevadm-test-builtin.c @@ -42,7 +42,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-test-builtin" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-test-builtin"): {} diff --git a/src/udev/udevadm-test.c b/src/udev/udevadm-test.c index a7841333016f9..ac368e0f00eec 100644 --- a/src/udev/udevadm-test.c +++ b/src/udev/udevadm-test.c @@ -61,7 +61,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-test" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-test"): {} diff --git a/src/udev/udevadm-trigger.c b/src/udev/udevadm-trigger.c index 62ccba37c5b8d..583d85be0b8d8 100644 --- a/src/udev/udevadm-trigger.c +++ b/src/udev/udevadm-trigger.c @@ -348,7 +348,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-trigger" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-trigger"): {} diff --git a/src/udev/udevadm-verify.c b/src/udev/udevadm-verify.c index 1ecc1fbee9c78..f4388f843adc6 100644 --- a/src/udev/udevadm-verify.c +++ b/src/udev/udevadm-verify.c @@ -51,7 +51,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv, .namespace = "udevadm-verify" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-verify"): {} diff --git a/src/udev/udevadm-wait.c b/src/udev/udevadm-wait.c index a361bac61a3a7..6017401440689 100644 --- a/src/udev/udevadm-wait.c +++ b/src/udev/udevadm-wait.c @@ -324,7 +324,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, .namespace = "udevadm-wait" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm-wait"): {} diff --git a/src/udev/udevadm.c b/src/udev/udevadm.c index 856d1fc4c23e2..47d4335baec7f 100644 --- a/src/udev/udevadm.c +++ b/src/udev/udevadm.c @@ -71,7 +71,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION, "udevadm" }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_NAMESPACE("udevadm"): {} diff --git a/src/udev/v4l_id/v4l_id.c b/src/udev/v4l_id/v4l_id.c index 93ca2d3b997fe..1a53e1092fb7a 100644 --- a/src/udev/v4l_id/v4l_id.c +++ b/src/udev/v4l_id/v4l_id.c @@ -43,7 +43,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/update-done/update-done.c b/src/update-done/update-done.c index b55c9941a9de3..67ce353e114d2 100644 --- a/src/update-done/update-done.c +++ b/src/update-done/update-done.c @@ -98,7 +98,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/validatefs/validatefs.c b/src/validatefs/validatefs.c index 58f8feb805dca..506b8198709d5 100644 --- a/src/validatefs/validatefs.c +++ b/src/validatefs/validatefs.c @@ -66,7 +66,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: return help(); diff --git a/src/varlinkctl/varlinkctl.c b/src/varlinkctl/varlinkctl.c index fbf4b217ff691..fbd5e2499a5d5 100644 --- a/src/varlinkctl/varlinkctl.c +++ b/src/varlinkctl/varlinkctl.c @@ -126,7 +126,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 8e4cbf3e80611..81c035c250d62 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -328,7 +328,7 @@ static int parse_argv(int argc, char *argv[]) { OptionParser opts = { argc, argv, OPTION_PARSER_STOP_AT_FIRST_NONOPTION }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION_COMMON_HELP: diff --git a/src/vpick/vpick-tool.c b/src/vpick/vpick-tool.c index f18edb263f8e9..f0b5ef44dfb67 100644 --- a/src/vpick/vpick-tool.c +++ b/src/vpick/vpick-tool.c @@ -101,7 +101,7 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { OptionParser opts = { argc, argv }; - FOREACH_OPTION(c, &opts, /* on_error= */ return c) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { OPTION('B', "basename", "BASENAME", "Look for specified basename"): From 4ce8cd17da3c5688a36106a1af48a16dd13daae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 09:31:46 +0200 Subject: [PATCH 061/242] fundamental: drop now-unused macro Followup for 9d2f5b4611a47b9e5a31296cea70c2d8c6c86bbb. --- src/fundamental/cleanup-fundamental.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/fundamental/cleanup-fundamental.h b/src/fundamental/cleanup-fundamental.h index 8d499e5c3498b..b9f9c0724546b 100644 --- a/src/fundamental/cleanup-fundamental.h +++ b/src/fundamental/cleanup-fundamental.h @@ -45,14 +45,6 @@ #define DEFINE_TRIVIAL_CLEANUP_FUNC_FULL_MACRO(type, macro, empty) \ DEFINE_TRIVIAL_CLEANUP_FUNC_FULL_MACRO_RENAME(type, macro, macro##p, empty) -/* Clean up a NULL-terminated array by dropping all the items in it (up to the first NULL). - * The array itself is not deallocated. */ -#define DEFINE_ARRAY_DONE_FUNC(type, helper) \ - void helper ## _many(type (*p)[]) { \ - for (type *t = *ASSERT_PTR(p); *t; t++) \ - *t = helper(*t); \ - } - /* Clean up an array of pointers to objects by dropping all the items in it. * The size of the array is passed in as a parameter, so NULL items may appear in the middle of the array. * Free the array itself afterwards. */ From f78ba86d262adee5720a704fd6049db6354d9048 Mon Sep 17 00:00:00 2001 From: Yaping Li <202858510+YapingLi04@users.noreply.github.com> Date: Wed, 29 Apr 2026 15:17:22 -0700 Subject: [PATCH 062/242] report: report user and system CPU time per cgroup Extend io.systemd.CGroup.CpuUsage from a single per-unit nanosecond counter to three rows distinguished by a "type" field of "total", "user", or "system". The values come from cpu.stat's usage_usec, user_usec and system_usec keys, read in a single keyed-attribute fetch and cached on each CGroupInfo so each scrape only opens cpu.stat once per cgroup. --- src/report/report-cgroup.c | 106 ++++++++++++++++++++++--- test/units/TEST-74-AUX-UTILS.report.sh | 7 ++ 2 files changed, 104 insertions(+), 9 deletions(-) diff --git a/src/report/report-cgroup.c b/src/report/report-cgroup.c index c3dabe41b1016..9a52c03d17741 100644 --- a/src/report/report-cgroup.c +++ b/src/report/report-cgroup.c @@ -22,6 +22,10 @@ typedef struct CGroupInfo { uint64_t io_rbytes; uint64_t io_rios; int io_stat_cached; /* 0 = not attempted, > 0 = cached, < 0 = -errno */ + uint64_t cpu_total_nsec; + uint64_t cpu_user_nsec; + uint64_t cpu_system_nsec; + int cpu_stat_cached; /* 0 = not attempted, > 0 = cached, < 0 = -errno */ } CGroupInfo; static CGroupInfo *cgroup_info_free(CGroupInfo *info) { @@ -154,6 +158,89 @@ static int walk_cgroups(CGroupContext *ctx, CGroupInfo ***ret, size_t *ret_n) { return 0; } +/* Parse cpu.stat for a cgroup once, extracting usage_usec, user_usec and system_usec + * in a single read so each scrape only opens the file once per cgroup. */ +static int cpu_stat_parse( + const char *cgroup_path, + uint64_t *ret_total_nsec, + uint64_t *ret_user_nsec, + uint64_t *ret_system_nsec) { + + char *values[3] = {}; + uint64_t total_us, user_us, system_us; + int r; + + assert(cgroup_path); + assert(ret_total_nsec); + assert(ret_user_nsec); + assert(ret_system_nsec); + + r = cg_get_keyed_attribute( + cgroup_path, + "cpu.stat", + STRV_MAKE("usage_usec", "user_usec", "system_usec"), + values); + if (r < 0) + return r; + + r = safe_atou64(values[0], &total_us); + if (r >= 0) + r = safe_atou64(values[1], &user_us); + if (r >= 0) + r = safe_atou64(values[2], &system_us); + + free_many_charp(values, ELEMENTSOF(values)); + if (r < 0) + return r; + + *ret_total_nsec = total_us * NSEC_PER_USEC; + *ret_user_nsec = user_us * NSEC_PER_USEC; + *ret_system_nsec = system_us * NSEC_PER_USEC; + return 0; +} + +static int ensure_cpu_stat_cached(CGroupInfo *info) { + int r; + + assert(info); + + if (info->cpu_stat_cached > 0) + return 0; + if (info->cpu_stat_cached < 0) + return info->cpu_stat_cached; + + r = cpu_stat_parse(info->path, &info->cpu_total_nsec, &info->cpu_user_nsec, &info->cpu_system_nsec); + if (r < 0) { + if (r != -ENOENT) + log_debug_errno(r, "Failed to parse cpu.stat for '%s': %m", info->path); + info->cpu_stat_cached = r; + return r; + } + + info->cpu_stat_cached = 1; + return 0; +} + +static int cpu_usage_send_one( + MetricFamilyContext *context, + const char *unit, + uint64_t value_nsec, + const char *type) { + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *fields = NULL; + int r; + + assert(context); + assert(unit); + assert(type); + + r = sd_json_buildo(&fields, SD_JSON_BUILD_PAIR_STRING("type", type)); + if (r < 0) + return r; + + return metric_build_send_unsigned(context, unit, value_nsec, fields); +} + static int cpu_usage_build_json(MetricFamilyContext *context, void *userdata) { CGroupContext *ctx = ASSERT_PTR(userdata); CGroupInfo **cgroups; @@ -167,17 +254,18 @@ static int cpu_usage_build_json(MetricFamilyContext *context, void *userdata) { return 0; /* Skip metric on failure */ FOREACH_ARRAY(c, cgroups, n_cgroups) { - uint64_t us; + if (ensure_cpu_stat_cached(*c) < 0) + continue; - r = cg_get_keyed_attribute_uint64((*c)->path, "cpu.stat", "usage_usec", &us); + r = cpu_usage_send_one(context, (*c)->unit, (*c)->cpu_total_nsec, "total"); if (r < 0) - continue; + return r; - r = metric_build_send_unsigned( - context, - (*c)->unit, - us * NSEC_PER_USEC, - /* fields= */ NULL); + r = cpu_usage_send_one(context, (*c)->unit, (*c)->cpu_user_nsec, "user"); + if (r < 0) + return r; + + r = cpu_usage_send_one(context, (*c)->unit, (*c)->cpu_system_nsec, "system"); if (r < 0) return r; } @@ -451,7 +539,7 @@ static const MetricFamily cgroup_metric_family_table[] = { /* Keep metrics ordered alphabetically */ { .name = METRIC_IO_SYSTEMD_CGROUP_PREFIX "CpuUsage", - .description = "Per unit metric: CPU usage in nanoseconds", + .description = "Per unit metric: CPU usage in nanoseconds (type=total|user|system)", .type = METRIC_FAMILY_TYPE_COUNTER, .generate = cpu_usage_build_json, }, diff --git a/test/units/TEST-74-AUX-UTILS.report.sh b/test/units/TEST-74-AUX-UTILS.report.sh index 53b83c4dd9477..8bca8447f3e3f 100755 --- a/test/units/TEST-74-AUX-UTILS.report.sh +++ b/test/units/TEST-74-AUX-UTILS.report.sh @@ -37,6 +37,13 @@ varlinkctl list-methods /run/systemd/report/io.systemd.CGroup varlinkctl --more call /run/systemd/report/io.systemd.CGroup io.systemd.Metrics.List {} varlinkctl --more call /run/systemd/report/io.systemd.CGroup io.systemd.Metrics.Describe {} +# CpuUsage emits one row per (cgroup, type) where type is total, user, or system. +# Confirm all three are present. +cgroup_metrics=$(varlinkctl --more --json=short call /run/systemd/report/io.systemd.CGroup io.systemd.Metrics.List {}) +echo "$cgroup_metrics" | grep '"name":"io.systemd.CGroup.CpuUsage"' | grep '"type":"total"' >/dev/null +echo "$cgroup_metrics" | grep '"name":"io.systemd.CGroup.CpuUsage"' | grep '"type":"user"' >/dev/null +echo "$cgroup_metrics" | grep '"name":"io.systemd.CGroup.CpuUsage"' | grep '"type":"system"' >/dev/null + # test io.systemd.Network Metrics varlinkctl info /run/systemd/report/io.systemd.Network varlinkctl list-methods /run/systemd/report/io.systemd.Network From 630f5a1fc4d8355caf68b82dc042ff02f56080ab Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 30 Apr 2026 18:19:49 +0100 Subject: [PATCH 063/242] mkosi: update debian commit reference to 1302f123d9ab65bbaff5d95935eabfd659456550 * 1302f123d9 Restrict wildcard for new files * a6d0098d10 Install new files for upstream build * ce07fd7616 d/t/boot-and-services: use coreutils tunable in apparmor test (LP: #2125614) --- mkosi/mkosi.pkgenv/mkosi.conf.d/debian-ubuntu.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkosi/mkosi.pkgenv/mkosi.conf.d/debian-ubuntu.conf b/mkosi/mkosi.pkgenv/mkosi.conf.d/debian-ubuntu.conf index f46a0a0372322..f72e35d6584b0 100644 --- a/mkosi/mkosi.pkgenv/mkosi.conf.d/debian-ubuntu.conf +++ b/mkosi/mkosi.pkgenv/mkosi.conf.d/debian-ubuntu.conf @@ -9,5 +9,5 @@ Environment= GIT_URL=https://salsa.debian.org/systemd-team/systemd.git GIT_SUBDIR=debian GIT_BRANCH=debian/master - GIT_COMMIT=94af257c72ac3e9bf20e324ff31c3bd5d8197f0e + GIT_COMMIT=1302f123d9ab65bbaff5d95935eabfd659456550 PKG_SUBDIR=debian From acdaececf08fa7653d3d04885ae9eea18127d206 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Apr 2026 16:19:55 +0200 Subject: [PATCH 064/242] bootspec: add boot_config_selected_entry() helper --- src/shared/bootspec.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h index 951a81f08c6c4..afc5a576c9048 100644 --- a/src/shared/bootspec.h +++ b/src/shared/bootspec.h @@ -116,6 +116,16 @@ static inline const BootEntry* boot_config_default_entry(const BootConfig *confi return config->entries + config->default_entry; } +static inline const BootEntry* boot_config_selected_entry(const BootConfig *config) { + assert(config); + + if (config->selected_entry < 0) + return NULL; + + assert((size_t) config->selected_entry < config->n_entries); + return config->entries + config->selected_entry; +} + void boot_config_free(BootConfig *config); int boot_loader_read_conf(BootConfig *config, FILE *file, const char *path); From 0a46727ce7bdb3a99e43bcb321aa7df500a8f24a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 13 Mar 2026 22:52:18 +0100 Subject: [PATCH 065/242] bootspec: make pe_find_uki_sections() non-static --- src/shared/bootspec.c | 22 ++++++++++++++-------- src/shared/bootspec.h | 2 ++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c index 3338d75f660df..ebd63788c0ad1 100644 --- a/src/shared/bootspec.c +++ b/src/shared/bootspec.c @@ -963,7 +963,7 @@ static int trim_cmdline(char **cmdline) { * the ones we do care about and we are willing to load into memory have this size limit.) */ #define PE_SECTION_SIZE_MAX (4U*1024U*1024U) -static int pe_find_uki_sections( +int pe_find_uki_sections( int fd, const char *path, unsigned profile, @@ -979,9 +979,6 @@ static int pe_find_uki_sections( assert(fd >= 0); assert(path); assert(profile != UINT_MAX); - assert(ret_osrelease); - assert(ret_profile); - assert(ret_cmdline); r = pe_load_headers_and_sections(fd, path, §ions, &pe_header); if (r < 0) @@ -1038,13 +1035,22 @@ static int pe_find_uki_sections( if (trim_cmdline(&cmdline_text) < 0) return log_oom(); - *ret_osrelease = TAKE_PTR(osrelease_text); - *ret_profile = TAKE_PTR(profile_text); - *ret_cmdline = TAKE_PTR(cmdline_text); + if (ret_osrelease) + *ret_osrelease = TAKE_PTR(osrelease_text); + if (ret_profile) + *ret_profile = TAKE_PTR(profile_text); + if (ret_cmdline) + *ret_cmdline = TAKE_PTR(cmdline_text); return 1; nothing: - *ret_osrelease = *ret_profile = *ret_cmdline = NULL; + if (ret_osrelease) + *ret_osrelease = NULL; + if (ret_profile) + *ret_profile = NULL; + if (ret_cmdline) + *ret_cmdline = NULL; + return 0; } diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h index afc5a576c9048..01abe05e72dd9 100644 --- a/src/shared/bootspec.h +++ b/src/shared/bootspec.h @@ -162,3 +162,5 @@ int show_boot_entries( int boot_filename_extract_tries(const char *fname, char **ret_stripped, unsigned *ret_tries_left, unsigned *ret_tries_done); int boot_entry_to_json(const BootConfig *c, size_t i, sd_json_variant **ret); + +int pe_find_uki_sections(int fd, const char *path, unsigned profile, char **ret_osrelease, char **ret_profile, char **ret_cmdline); From d292874bf6845605b733ad8e6926a5bee23ef5d3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 20 Mar 2026 10:21:11 +0100 Subject: [PATCH 066/242] bootspec: add bootspec_extract_osrelease() helper --- src/shared/bootspec.c | 145 ++++++++++++++++++++++++++++++++++-------- src/shared/bootspec.h | 2 + 2 files changed, 119 insertions(+), 28 deletions(-) diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c index ebd63788c0ad1..c0111d1f9c433 100644 --- a/src/shared/bootspec.c +++ b/src/shared/bootspec.c @@ -31,6 +31,7 @@ #include "string-util.h" #include "strv.h" #include "uki.h" +#include "utf8.h" static const char* const boot_entry_type_description_table[_BOOT_ENTRY_TYPE_MAX] = { [BOOT_ENTRY_TYPE1] = "Boot Loader Specification Type #1 (.conf)", @@ -711,56 +712,143 @@ static int boot_entries_find_type1( return 0; } -static int boot_entry_load_unified( - const char *root, - const BootEntrySource source, - const char *path, - unsigned profile, - const char *osrelease_text, - const char *profile_text, - const char *cmdline_text, - BootEntry *ret) { +static void mangle_osrelease_string(char **s, const char *field) { + assert(s); + assert(field); - _cleanup_free_ char *fname = NULL, *os_pretty_name = NULL, *os_image_id = NULL, *os_name = NULL, *os_id = NULL, - *os_image_version = NULL, *os_version = NULL, *os_version_id = NULL, *os_build_id = NULL; - const char *k, *good_name, *good_version, *good_sort_key; - int r; + if (!isempty(*s) && !string_has_cc(*s, /* ok= */ NULL) && utf8_is_valid(*s)) + return; - assert(root); - assert(path); - assert(osrelease_text); - assert(ret); + if (*s) { + log_debug("OS release field '%s' is not clean, suppressing.", field); + *s = mfree(*s); + } +} - k = path_startswith(path, root); - if (!k) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not below root: %s", path); +int bootspec_extract_osrelease( + const char *text, + char **ret_good_name, + char **ret_good_version, + char **ret_good_sort_key, + char **ret_os_id, + char **ret_os_version_id, + char **ret_image_id, + char **ret_image_version) { + + int r; + + assert(text); - r = parse_env_data(osrelease_text, /* size= */ SIZE_MAX, - ".osrel", + _cleanup_free_ char *os_pretty_name = NULL, *image_id = NULL, *os_name = NULL, *os_id = NULL, + *image_version = NULL, *os_version = NULL, *os_version_id = NULL, *os_build_id = NULL; + r = parse_env_data(text, /* size= */ SIZE_MAX, + "os-release", "PRETTY_NAME", &os_pretty_name, - "IMAGE_ID", &os_image_id, + "IMAGE_ID", &image_id, "NAME", &os_name, "ID", &os_id, - "IMAGE_VERSION", &os_image_version, + "IMAGE_VERSION", &image_version, "VERSION", &os_version, "VERSION_ID", &os_version_id, "BUILD_ID", &os_build_id); if (r < 0) - return log_error_errno(r, "Failed to parse os-release data from unified kernel image %s: %m", path); + return r; + + mangle_osrelease_string(&os_pretty_name, "PRETTY_NAME"); + mangle_osrelease_string(&image_id, "IMAGE_ID"); + mangle_osrelease_string(&os_name, "NAME"); + mangle_osrelease_string(&os_id, "ID"); + mangle_osrelease_string(&image_version, "IMAGE_VERSION"); + mangle_osrelease_string(&os_version, "VERSION"); + mangle_osrelease_string(&os_version_id, "VERSION_ID"); + mangle_osrelease_string(&os_build_id, "BUILD_ID"); + const char *good_name, *good_version, *good_sort_key; if (!bootspec_pick_name_version_sort_key( os_pretty_name, - os_image_id, + image_id, os_name, os_id, - os_image_version, + image_version, os_version, os_version_id, os_build_id, &good_name, &good_version, &good_sort_key)) - return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Missing fields in os-release data from unified kernel image %s, refusing.", path); + return -EBADMSG; + + _cleanup_free_ char *copy_good_name = NULL, *copy_good_version = NULL, *copy_good_sort_key = NULL; + if (ret_good_name) { + copy_good_name = strdup(good_name); + if (!copy_good_name) + return -ENOMEM; + } + + if (ret_good_version && good_version) { + copy_good_version = strdup(good_version); + if (!copy_good_version) + return -ENOMEM; + } + + if (ret_good_sort_key && good_sort_key) { + copy_good_sort_key = strdup(good_sort_key); + if (!copy_good_sort_key) + return -ENOMEM; + } + + if (ret_good_name) + *ret_good_name = TAKE_PTR(copy_good_name); + if (ret_good_version) + *ret_good_version = TAKE_PTR(copy_good_version); + if (ret_good_sort_key) + *ret_good_sort_key = TAKE_PTR(copy_good_sort_key); + + if (ret_os_id) + *ret_os_id = TAKE_PTR(os_id); + if (ret_os_version_id) + *ret_os_version_id = TAKE_PTR(os_version_id); + if (ret_image_id) + *ret_image_id = TAKE_PTR(image_id); + if (ret_image_version) + *ret_image_version = TAKE_PTR(image_version); + + return 0; +} + +static int boot_entry_load_unified( + const char *root, + const BootEntrySource source, + const char *path, + unsigned profile, + const char *osrelease_text, + const char *profile_text, + const char *cmdline_text, + BootEntry *ret) { + + int r; + + assert(root); + assert(path); + assert(osrelease_text); + assert(ret); + + const char *k = path_startswith(path, root); + if (!k) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not below root: %s", path); + + _cleanup_free_ char *good_name = NULL, *good_version = NULL, *good_sort_key = NULL, *os_id = NULL, *os_version_id = NULL; + r = bootspec_extract_osrelease( + osrelease_text, + &good_name, + &good_version, + &good_sort_key, + &os_id, + &os_version_id, + /* ret_image_id= */ NULL, + /* ret_image_version= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to extract name/version/sort-key from os-release data from unified kernel image %s, refusing: %m", path); _cleanup_free_ char *profile_id = NULL, *profile_title = NULL; if (profile_text) { @@ -773,6 +861,7 @@ static int boot_entry_load_unified( return log_error_errno(r, "Failed to parse profile data from unified kernel image '%s': %m", path); } + _cleanup_free_ char *fname = NULL; r = path_extract_filename(path, &fname); if (r < 0) return log_error_errno(r, "Failed to extract file name from '%s': %m", path); diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h index 01abe05e72dd9..d5f6930be99d1 100644 --- a/src/shared/bootspec.h +++ b/src/shared/bootspec.h @@ -164,3 +164,5 @@ int boot_filename_extract_tries(const char *fname, char **ret_stripped, unsigned int boot_entry_to_json(const BootConfig *c, size_t i, sd_json_variant **ret); int pe_find_uki_sections(int fd, const char *path, unsigned profile, char **ret_osrelease, char **ret_profile, char **ret_cmdline); + +int bootspec_extract_osrelease(const char *text, char **ret_good_name, char **ret_good_version, char **ret_good_sort_key, char **ret_os_id, char **ret_os_version_id, char **ret_image_id, char **ret_image_version); From 4d0f6ac5931c051871e46e98a2ff7eb37136ea57 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 15 Apr 2026 16:09:48 +0200 Subject: [PATCH 067/242] bootctl: add helpers that format a type1 menu entry filename from a commit nr --- src/bootctl/bootspec-util.c | 214 +++++++++++++++++++++++++++++++ src/bootctl/bootspec-util.h | 12 ++ src/bootctl/meson.build | 6 + src/bootctl/test-bootspec-util.c | 51 ++++++++ 4 files changed, 283 insertions(+) create mode 100644 src/bootctl/test-bootspec-util.c diff --git a/src/bootctl/bootspec-util.c b/src/bootctl/bootspec-util.c index b96687430ca32..5f9842c9d80a3 100644 --- a/src/bootctl/bootspec-util.c +++ b/src/bootctl/bootspec-util.c @@ -1,11 +1,18 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "alloc-util.h" +#include "boot-entry.h" #include "bootspec-util.h" #include "devnum-util.h" #include "efi-loader.h" #include "errno-util.h" #include "log.h" +#include "parse-util.h" +#include "path-util.h" +#include "stdio-util.h" +#include "string-util.h" #include "strv.h" +#include "utf8.h" int boot_config_load_and_select( BootConfig *config, @@ -39,3 +46,210 @@ int boot_config_load_and_select( return boot_config_select_special_entries(config, /* skip_efivars= */ !!root); } + +int boot_entry_make_commit_filename( + const char *entry_token, + uint64_t entry_commit, + const char *version, + unsigned profile_nr, + unsigned tries_left, + char **ret) { + + assert(entry_token); + assert(ret); + + /* Generate a new entry filename from the entry token, the commit number, and (optionally) the + * image/OS version, (if non-zero) the profile number, and (unless UINT_MAX) the number of tries + * left. */ + + if (!boot_entry_token_valid(entry_token)) + return -EINVAL; + if (!entry_commit_valid(entry_commit)) + return -EINVAL; + + _cleanup_free_ char *filename = asprintf_safe("%s-commit_%" PRIu64, entry_token, entry_commit); + if (!filename) + return -ENOMEM; + if (version && !strextend(&filename, ".", version)) + return -ENOMEM; + if (profile_nr > 0 && strextendf(&filename, "@%u", profile_nr) < 0) + return -ENOMEM; + if (tries_left != UINT_MAX && strextendf(&filename, "+%u", tries_left) < 0) + return -ENOMEM; + if (!strextend(&filename, ".conf")) + return -ENOMEM; + + if (!filename_is_valid(filename) || string_has_cc(filename, /* ok= */ NULL) || !utf8_is_valid(filename)) + return -EINVAL; + + *ret = TAKE_PTR(filename); + return 0; +} + +int boot_entry_parse_commit_filename( + const char *filename, + char **ret_entry_token, + uint64_t *ret_entry_commit) { + + int r; + + assert(filename); + + if (!filename_is_valid(filename)) + return -EINVAL; + + _cleanup_free_ char *stripped = NULL; + r = boot_filename_extract_tries(filename, &stripped, /* ret_tries_left= */ NULL, /* ret_tries_done= */ NULL); + if (r < 0) + return r; + + const char *a = strrstr_no_case(stripped, "-commit_"); + if (!a) + return -EBADMSG; + + const char *c = endswith_no_case(stripped, ".conf"); + if (!c) + return -EBADMSG; + + assert(a < c); + + _cleanup_free_ char *entry_token = strndup(stripped, a - stripped); + if (!entry_token) + return -ENOMEM; + + if (!boot_entry_token_valid(entry_token)) + return -EBADMSG; + + const char *b = a + STRLEN("-commit_"); + size_t n = strspn(b, DIGITS); + if (n <= 0 || !IN_SET(b[n], '+', '.', '@')) + return -EBADMSG; + + _cleanup_free_ char *entry_commit_string = strndup(b, n); + if (!entry_commit_string) + return -ENOMEM; + + uint64_t entry_commit; + r = safe_atou64_full(entry_commit_string, 10, &entry_commit); + if (r < 0) + return r; + if (!entry_commit_valid(entry_commit)) + return -EBADMSG; + + if (ret_entry_token) + *ret_entry_token = TAKE_PTR(entry_token); + if (ret_entry_commit) + *ret_entry_commit = entry_commit; + + return 0; +} + +int boot_entry_parse_commit( + BootEntry *entry, + char **ret_entry_token, + uint64_t *ret_entry_commit) { + + int r; + + assert(entry); + + if (entry->type != BOOT_ENTRY_TYPE1) + return -EADDRNOTAVAIL; + + _cleanup_free_ char *fn = NULL; + r = path_extract_filename(entry->path, &fn); + if (r < 0) + return r; + + return boot_entry_parse_commit_filename(fn, ret_entry_token, ret_entry_commit); +} + +int boot_config_find_oldest_commit( + BootConfig *config, + const char *entry_token, + char ***ret_ids) { + + int r; + + assert(config); + assert(entry_token); + assert(ret_ids); + + uint64_t commit_oldest = UINT64_MAX, commit_2nd_oldest = UINT64_MAX, commit_blocked = UINT64_MAX; + + /* First, determine which commit is the oldest (that isn't the current one), and hence the candidate + * to be removed */ + FOREACH_ARRAY(b, config->entries, config->n_entries) { + _cleanup_free_ char *et = NULL; + uint64_t ec; + + r = boot_entry_parse_commit(b, &et, &ec); + if (r == -EADDRNOTAVAIL) + continue; + if (r < 0) { + log_debug_errno(r, "Failed to parse entry filename of '%s', ignoring: %m", strna(b->id)); + continue; + } + + if (!streq(et, entry_token)) /* Not ours? */ + continue; + + if (ec < commit_oldest) { + commit_2nd_oldest = commit_oldest; + commit_oldest = ec; + } else if (ec > commit_oldest && ec < commit_2nd_oldest) + commit_2nd_oldest = ec; + + if (boot_config_selected_entry(config) == b) { + assert(commit_blocked == UINT64_MAX); + commit_blocked = ec; + } + } + + uint64_t commit_picked; + if (commit_oldest == UINT64_MAX) + return log_debug_errno(SYNTHETIC_ERRNO(ENXIO), "No matching entry found while determining oldest entry."); + if (commit_oldest != commit_blocked) + commit_picked = commit_oldest; + else { + if (commit_2nd_oldest == UINT64_MAX) + return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), "Only matching entry found while determining oldest entry is current one, skipping it."); + + assert(commit_2nd_oldest != commit_blocked); + commit_picked = commit_2nd_oldest; + } + + log_debug("Determined commit %" PRIu64 " to be oldest.", commit_picked); + + /* Second loop: actually remove all entries matching this commit (which can be multiple, since UKIs + * have profiles) */ + _cleanup_(strv_freep) char **l = NULL; + FOREACH_ARRAY(b, config->entries, config->n_entries) { + _cleanup_free_ char *et = NULL; + uint64_t ec; + + r = boot_entry_parse_commit(b, &et, &ec); + if (r == -EADDRNOTAVAIL) + continue; + if (r < 0) { + log_debug_errno(r, "Failed to parse entry filename of '%s', ignoring: %m", strna(b->id)); + continue; + } + + if (!streq(et, entry_token)) /* Not ours? */ + continue; + + if (ec != commit_picked) + continue; + + r = strv_extend(&l, b->id); + if (r < 0) + return r; + } + + /* The list cannot be empty, the first loop above and the 2nd loop must have found the same matching + * entries, and if the first loop didn't find any we'd not come this far. */ + assert(!strv_isempty(l)); + *ret_ids = TAKE_PTR(l); + return 0; +} diff --git a/src/bootctl/bootspec-util.h b/src/bootctl/bootspec-util.h index 51dac12b9f44b..0824c8040fb64 100644 --- a/src/bootctl/bootspec-util.h +++ b/src/bootctl/bootspec-util.h @@ -4,3 +4,15 @@ #include "bootspec.h" int boot_config_load_and_select(BootConfig *config, const char *root, const char *esp_path, dev_t esp_devid, const char *xbootldr_path, dev_t xbootldr_devid); + +static inline bool entry_commit_valid(uint64_t commit) { + return commit > 0 && commit < UINT64_MAX; +} + +int boot_entry_make_commit_filename(const char *entry_token, uint64_t entry_commit, const char *version, unsigned profile_nr, unsigned tries_left, char **ret); + +int boot_entry_parse_commit_filename(const char *filename, char **ret_entry_token, uint64_t *ret_entry_commit); + +int boot_entry_parse_commit(BootEntry *entry, char **ret_entry_token, uint64_t *ret_entry_commit); + +int boot_config_find_oldest_commit(BootConfig *config, const char *entry_token, char ***ret_ids); diff --git a/src/bootctl/meson.build b/src/bootctl/meson.build index f8349df7168e3..ff33cde3f615b 100644 --- a/src/bootctl/meson.build +++ b/src/bootctl/meson.build @@ -25,4 +25,10 @@ executables += [ 'link_with' : boot_link_with, 'dependencies' : [libopenssl_cflags], }, + test_template + { + 'sources' : files( + 'test-bootspec-util.c', + 'bootspec-util.c', + ), + }, ] diff --git a/src/bootctl/test-bootspec-util.c b/src/bootctl/test-bootspec-util.c new file mode 100644 index 0000000000000..1fa891469f460 --- /dev/null +++ b/src/bootctl/test-bootspec-util.c @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bootspec-util.h" +#include "tests.h" + +static void test_one( + const char *entry_token, + uint64_t entry_commit, + const char *version, + unsigned profile_nr, + unsigned tries_left, + const char *expected) { + + _cleanup_free_ char *fn = NULL; + ASSERT_OK(boot_entry_make_commit_filename(entry_token, entry_commit, version, profile_nr, tries_left, &fn)); + ASSERT_STREQ(fn, expected); + + _cleanup_free_ char *token = NULL; + uint64_t commit = 0; + ASSERT_OK(boot_entry_parse_commit_filename(fn, &token, &commit)); + ASSERT_STREQ(token, entry_token); + ASSERT_EQ(commit, entry_commit); +} + +TEST(boot_entry_commit_filename) { + test_one("foo", 1, NULL, 0, UINT_MAX, "foo-commit_1.conf"); + test_one("foo", 42, "1.0", 0, UINT_MAX, "foo-commit_42.1.0.conf"); + test_one("foo", 42, "1.0", 3, UINT_MAX, "foo-commit_42.1.0@3.conf"); + test_one("foo", 42, "1.0", 3, 5, "foo-commit_42.1.0@3+5.conf"); + test_one("foo", 42, NULL, 3, UINT_MAX, "foo-commit_42@3.conf"); + test_one("foo", 42, NULL, 3, 7, "foo-commit_42@3+7.conf"); + test_one("foo", 42, NULL, 0, 9, "foo-commit_42+9.conf"); + test_one("my-token", 123456, "v2", 0, UINT_MAX, "my-token-commit_123456.v2.conf"); + + /* Invalid inputs for make */ + _cleanup_free_ char *fn = NULL; + ASSERT_ERROR(boot_entry_make_commit_filename("foo/bar", 1, NULL, 0, UINT_MAX, &fn), EINVAL); + ASSERT_ERROR(boot_entry_make_commit_filename("foo", 0, NULL, 0, UINT_MAX, &fn), EINVAL); + ASSERT_ERROR(boot_entry_make_commit_filename("foo", UINT64_MAX, NULL, 0, UINT_MAX, &fn), EINVAL); + + /* Invalid inputs for parse */ + _cleanup_free_ char *token = NULL; + uint64_t commit = 0; + ASSERT_ERROR(boot_entry_parse_commit_filename("foo.conf", &token, &commit), EBADMSG); + ASSERT_ERROR(boot_entry_parse_commit_filename("foo-commit_.conf", &token, &commit), EBADMSG); + ASSERT_ERROR(boot_entry_parse_commit_filename("foo-commit_abc.conf", &token, &commit), EBADMSG); + ASSERT_ERROR(boot_entry_parse_commit_filename("foo-commit_0.conf", &token, &commit), EBADMSG); +} + +DEFINE_TEST_MAIN(LOG_INFO); From e68bf712be1b348ae8c13fe0e04cc1f42d57ca9e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 10 Apr 2026 14:48:25 +0200 Subject: [PATCH 068/242] bootctl: rework/modernize "unlink" and add Varlink API for it Among other things this changes tracking of the location of resources during GC from using the BootEntrySource enum rather than a path, since we have that and it is more efficient and easier to grok. --- man/bootctl.xml | 25 +- shell-completion/bash/bootctl | 10 +- shell-completion/zsh/_bootctl | 21 + src/bootctl/bootctl-cleanup.c | 7 +- src/bootctl/bootctl-unlink.c | 623 ++++++++++++++++---- src/bootctl/bootctl-unlink.h | 6 +- src/bootctl/bootctl.c | 15 +- src/bootctl/bootctl.h | 1 + src/shared/shared-forward.h | 2 + src/shared/varlink-io.systemd.BootControl.c | 22 +- 10 files changed, 609 insertions(+), 123 deletions(-) diff --git a/man/bootctl.xml b/man/bootctl.xml index 558891eaa1169..fb5f4b2b2a9eb 100644 --- a/man/bootctl.xml +++ b/man/bootctl.xml @@ -102,11 +102,17 @@ - ID + ID - Removes a boot loader entry including the files it refers to. Takes a single boot - loader entry ID string or a glob pattern as argument. Referenced files such as kernel or initrd are - only removed if no other entry refers to them. + Removes a boot loader entry including the files it refers to. Takes an optional boot + loader entry ID string or a glob pattern as argument. Referenced files such as the kernel, initrds, + system extensions (sysexts), configuration extensions (confexts) or credential files are only removed + if no other entry refers to them. + + If no ID argument is specified, the option + must be specified, in which case the boot loader entry with the lowest version is removed (for + robustness reasons the currently booted menu entry is never removed, nor is the last existing boot + loader entry). @@ -560,6 +566,17 @@ + + + + When used with unlink, selects the oldest installed boot loader + entry matching the boot entry token for removal (rather than passing an explicit entry ID). This is + useful for pruning older installed boot loader entries. Note that the currently booted entry is never + removed, nor is the last remaining one. + + + + diff --git a/shell-completion/bash/bootctl b/shell-completion/bash/bootctl index d7714731c2aac..792fc0c0acc83 100644 --- a/shell-completion/bash/bootctl +++ b/shell-completion/bash/bootctl @@ -40,8 +40,10 @@ _bootctl() { --dry-run' [ARG]='--esp-path --boot-path --root --image --image-policy --install-source --variables --random-seed --make-entry-directory --entry-token --json - --efi-boot-option-description --secure-boot-auto-enroll --private-key - --private-key-source --certificate --certificate-source' + --efi-boot-option-description --efi-boot-option-description-with-device + --secure-boot-auto-enroll --private-key + --private-key-source --certificate --certificate-source + --oldest' ) if __contains_word "$prev" ${OPTS[ARG]}; then @@ -67,7 +69,7 @@ _bootctl() { --install-source) comps="image host auto" ;; - --random-seed|--variables|--secure-boot-auto-enroll) + --random-seed|--variables|--secure-boot-auto-enroll|--oldest|--efi-boot-option-description-with-device) comps="yes no" ;; --json) @@ -85,7 +87,7 @@ _bootctl() { local -A VERBS=( [STANDALONE]='help status install update remove is-installed random-seed list set-timeout set-timeout-oneshot cleanup' - [BOOTENTRY]='set-default set-oneshot set-sysfail unlink' + [BOOTENTRY]='set-default set-oneshot set-sysfail set-preferred unlink' [BOOLEAN]='reboot-to-firmware' [FILE]='kernel-identify kernel-inspect' ) diff --git a/shell-completion/zsh/_bootctl b/shell-completion/zsh/_bootctl index f7ed2a8e4148a..c23c1c888dae5 100644 --- a/shell-completion/zsh/_bootctl +++ b/shell-completion/zsh/_bootctl @@ -24,10 +24,26 @@ _bootctl_set-oneshot() { _bootctl_comp_ids } +_bootctl_set-sysfail() { + _bootctl_comp_ids +} + +_bootctl_set-preferred() { + _bootctl_comp_ids +} + _bootctl_unlink() { _bootctl_comp_ids } +_bootctl_kernel-identify() { + _files +} + +_bootctl_kernel-inspect() { + _files +} + _bootctl_reboot-to-firmware() { local -a _completions _completions=( yes no ) @@ -49,10 +65,14 @@ _bootctl_reboot-to-firmware() { "list:List boot loader entries" "set-default:Set the default boot loader entry" "set-oneshot:Set the default boot loader entry only for the next boot" + "set-sysfail:Set boot loader entry used in case of a system failure" + "set-preferred:Set the preferred boot loader entry" "set-timeout:Set the menu timeout" "set-timeout-oneshot:Set the menu timeout for the next boot only" "unlink:Remove boot loader entry" "cleanup:Remove files in ESP not referenced in any boot entry" + "kernel-identify:Identify kernel image type" + "kernel-inspect:Print details about the kernel image" ) if (( CURRENT == 1 )); then _describe -t commands 'bootctl command' _bootctl_cmds || compadd "$@" @@ -79,6 +99,7 @@ _arguments \ '--no-pager[Do not pipe output into a pager]' \ '--graceful[Do not fail when locating ESP or writing fails]' \ '--dry-run[Dry run (unlink and cleanup)]' \ + '--oldest=[Delete oldest boot menu entry]:options:(yes no)' \ '--root=[Operate under the specified directory]:PATH' \ '--image=[Operate on the specified image]:PATH' \ '--install-source[Where to pick files when using --root=/--image=]:options:(image host auto)' \ diff --git a/src/bootctl/bootctl-cleanup.c b/src/bootctl/bootctl-cleanup.c index 1e8819bea1813..011567d187be6 100644 --- a/src/bootctl/bootctl-cleanup.c +++ b/src/bootctl/bootctl-cleanup.c @@ -49,6 +49,7 @@ static int list_remove_orphaned_file( static int cleanup_orphaned_files( const BootConfig *config, + BootEntrySource source, const char *root) { _cleanup_hashmap_free_ Hashmap *known_files = NULL; @@ -65,7 +66,7 @@ static int cleanup_orphaned_files( if (r < 0) return r; - r = boot_config_count_known_files(config, root, &known_files); + r = boot_config_count_known_files(config, source, &known_files); if (r < 0) return log_error_errno(r, "Failed to count files in %s: %m", root); @@ -116,10 +117,10 @@ int verb_cleanup(int argc, char *argv[], uintptr_t _data, void *userdata) { return r; r = 0; - RET_GATHER(r, cleanup_orphaned_files(&config, arg_esp_path)); + RET_GATHER(r, cleanup_orphaned_files(&config, BOOT_ENTRY_ESP, arg_esp_path)); if (arg_xbootldr_path && xbootldr_devid != esp_devid) - RET_GATHER(r, cleanup_orphaned_files(&config, arg_xbootldr_path)); + RET_GATHER(r, cleanup_orphaned_files(&config, BOOT_ENTRY_XBOOTLDR, arg_xbootldr_path)); return r; } diff --git a/src/bootctl/bootctl-unlink.c b/src/bootctl/bootctl-unlink.c index 0d0e7ad076b60..80e74926c6c76 100644 --- a/src/bootctl/bootctl-unlink.c +++ b/src/bootctl/bootctl-unlink.c @@ -3,20 +3,70 @@ #include #include +#include "sd-id128.h" +#include "sd-json.h" +#include "sd-varlink.h" + #include "alloc-util.h" +#include "boot-entry.h" #include "bootctl.h" #include "bootctl-unlink.h" #include "bootspec.h" #include "bootspec-util.h" #include "chase.h" +#include "efi-loader.h" #include "errno-util.h" +#include "fd-util.h" +#include "find-esp.h" #include "hashmap.h" +#include "id128-util.h" +#include "json-util.h" #include "log.h" #include "path-util.h" +#include "stat-util.h" +#include "string-util.h" #include "strv.h" +typedef struct UnlinkContext { + char *root; + int root_fd; + + sd_id128_t machine_id; + BootEntryTokenType entry_token_type; + char *entry_token; + + char *esp_path; + dev_t esp_devid; + int esp_fd; + + char *xbootldr_path; + dev_t xbootldr_devid; + int xbootldr_fd; +} UnlinkContext; + +#define UNLINK_CONTEXT_NULL \ + (UnlinkContext) { \ + .root_fd = -EBADF, \ + .entry_token_type = _BOOT_ENTRY_TOKEN_TYPE_INVALID, \ + .esp_fd = -EBADF, \ + .xbootldr_fd = -EBADF, \ + } + +static void unlink_context_done(UnlinkContext *c) { + assert(c); + + c->root = mfree(c->root); + c->root_fd = safe_close(c->root_fd); + + c->entry_token = mfree(c->entry_token); + + c->esp_path = mfree(c->esp_path); + c->esp_fd = safe_close(c->esp_fd); + c->xbootldr_path = mfree(c->xbootldr_path); + c->xbootldr_fd = safe_close(c->xbootldr_fd); +} + static int ref_file(Hashmap **known_files, const char *fn, int increment) { - char *k = NULL; int n, r; assert(known_files); @@ -26,13 +76,15 @@ static int ref_file(Hashmap **known_files, const char *fn, int increment) { if (!fn) return 0; + char *k = NULL; n = PTR_TO_INT(hashmap_get2(*known_files, fn, (void**)&k)); - n += increment; + if (!INC_SAFE(&n, increment)) + return -EOVERFLOW; assert(n >= 0); if (n == 0) { - (void) hashmap_remove(*known_files, fn); + (void) hashmap_remove(*known_files, k); free(k); } else if (!k) { _cleanup_free_ char *t = NULL; @@ -40,12 +92,14 @@ static int ref_file(Hashmap **known_files, const char *fn, int increment) { t = strdup(fn); if (!t) return -ENOMEM; + r = hashmap_ensure_put(known_files, &path_hash_ops_free, t, INT_TO_PTR(n)); if (r < 0) return r; + TAKE_PTR(t); } else { - r = hashmap_update(*known_files, fn, INT_TO_PTR(n)); + r = hashmap_update(*known_files, k, INT_TO_PTR(n)); if (r < 0) return r; } @@ -53,195 +107,548 @@ static int ref_file(Hashmap **known_files, const char *fn, int increment) { return n; } +static int boot_entry_ref_files( + const BootEntry *e, + Hashmap **known_files, + int increment) { + + int r; + + assert(e); + assert(known_files); + assert(increment != 0); + + r = ref_file(known_files, e->kernel, increment); + if (r < 0) + return r; + + r = ref_file(known_files, e->efi, increment); + if (r < 0) + return r; + + r = ref_file(known_files, e->uki, increment); + if (r < 0) + return r; + + STRV_FOREACH(s, e->initrd) { + r = ref_file(known_files, *s, increment); + if (r < 0) + return r; + } + + r = ref_file(known_files, e->device_tree, increment); + if (r < 0) + return r; + + STRV_FOREACH(s, e->device_tree_overlay) { + r = ref_file(known_files, *s, increment); + if (r < 0) + return r; + } + + return 0; +} + int boot_config_count_known_files( const BootConfig *config, - const char* root, + BootEntrySource source, Hashmap **ret_known_files) { - _cleanup_hashmap_free_ Hashmap *known_files = NULL; int r; assert(config); assert(ret_known_files); - for (size_t i = 0; i < config->n_entries; i++) { - const BootEntry *e = config->entries + i; + _cleanup_hashmap_free_ Hashmap *known_files = NULL; + FOREACH_ARRAY(e, config->entries, config->n_entries) { - if (!path_equal(e->root, root)) + if (e->source != source) continue; - r = ref_file(&known_files, e->kernel, +1); - if (r < 0) - return r; - r = ref_file(&known_files, e->efi, +1); - if (r < 0) - return r; - r = ref_file(&known_files, e->uki, +1); - if (r < 0) - return r; - STRV_FOREACH(s, e->initrd) { - r = ref_file(&known_files, *s, +1); - if (r < 0) - return r; - } - r = ref_file(&known_files, e->device_tree, +1); + r = boot_entry_ref_files(e, &known_files, +1); if (r < 0) return r; - STRV_FOREACH(s, e->device_tree_overlay) { - r = ref_file(&known_files, *s, +1); - if (r < 0) - return r; - } } *ret_known_files = TAKE_PTR(known_files); - return 0; } -static void deref_unlink_file(Hashmap **known_files, const char *fn, const char *root) { - _cleanup_free_ char *path = NULL; +static int unref_unlink_file( + Hashmap **known_files, + const char *root, + int root_fd, + const char *path, + bool dry_run) { + int r; assert(known_files); /* just gracefully ignore this. This way the caller doesn't have to verify whether the bootloader entry is relevant */ - if (!fn || !root) - return; + if (root_fd < 0 || !root || !path) + return 0; - r = ref_file(known_files, fn, -1); + r = ref_file(known_files, path, -1); if (r < 0) - return (void) log_warning_errno(r, "Failed to deref \"%s\", ignoring: %m", fn); + return log_error_errno(r, "Failed to unref '%s': %m", path); if (r > 0) - return; + return 0; - if (arg_dry_run) { - r = chase_and_access(fn, root, CHASE_PREFIX_ROOT|CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS, F_OK, &path); - if (r < 0) - log_info_errno(r, "Unable to determine whether \"%s\" exists, ignoring: %m", fn); - else - log_info("Would remove \"%s\"", path); - return; + if (dry_run) { + _cleanup_free_ char *resolved = NULL; + r = chase_and_accessat( + /* root_fd= */ root_fd, + /* dir_fd= */ root_fd, + path, + CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS|CHASE_MUST_BE_REGULAR, + F_OK, + &resolved); + if (r < 0) { + log_warning_errno(r, "Unable to determine whether '%s' exists, ignoring: %m", path); + return 0; + } + + log_info("Would remove '%s'", resolved); + return 1; } - r = chase_and_unlink(fn, root, CHASE_PREFIX_ROOT|CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS, 0, &path); - if (r >= 0) - log_info("Removed \"%s\"", path); - else if (r != -ENOENT) - return (void) log_warning_errno(r, "Failed to remove \"%s\", ignoring: %m", fn); - - _cleanup_free_ char *d = NULL; - if (path_extract_directory(fn, &d) >= 0 && !path_equal(d, "/")) { - r = chase_and_unlink(d, root, CHASE_PREFIX_ROOT|CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS, AT_REMOVEDIR, NULL); - if (r < 0 && !IN_SET(r, -ENOTEMPTY, -ENOENT)) - log_warning_errno(r, "Failed to remove directory \"%s\", ignoring: %m", d); + _cleanup_free_ char *resolved = NULL; + r = chase_and_unlinkat( + /* root_fd= */ root_fd, + /* dir_fd= */ root_fd, + path, + CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS, + /* unlink_flags= */ 0, + &resolved); + if (r == -ENOENT) + log_debug("Resource '%s' is already removed, skipping.", path); + else if (r < 0) { + log_warning_errno(r, "Failed to remove '%s', ignoring: %m", path); + return 0; + } else + log_info("Removed '%s'", resolved); + + _cleanup_free_ char *parent = NULL; + r = path_extract_directory(path, &parent); + if (r < 0) + log_debug_errno(r, "Failed to extract parent directory of '%s', ignoring.", path); + else { + _cleanup_free_ char *resolved_parent = NULL; + r = chase_and_unlinkat( + /* root_fd= */ root_fd, + /* dir_fd= */ root_fd, + parent, + CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS, + AT_REMOVEDIR, + &resolved_parent); + if (IN_SET(r, -ENOTEMPTY, -ENOENT)) + log_debug_errno(r, "Failed to remove directory '%s', ignoring: %m", parent); + else if (r < 0) + log_warning_errno(r, "Failed to remove directory '%s', ignoring: %m", parent); + else + log_info("Removed '%s'.", resolved_parent); } + + return 1; } -static int boot_config_find_in(const BootConfig *config, const char *root, const char *id) { +static ssize_t boot_config_find_in( + const BootConfig *config, + BootEntrySource source, + const char *id) { + assert(config); + assert(source >= 0); + assert(source < _BOOT_ENTRY_SOURCE_MAX); - if (!root || !id) + if (!id) return -ENOENT; for (size_t i = 0; i < config->n_entries; i++) - if (path_equal(config->entries[i].root, root) && + if (config->entries[i].source == source && fnmatch(id, config->entries[i].id, FNM_CASEFOLD) == 0) - return i; + return (ssize_t) i; return -ENOENT; } -static int unlink_entry(const BootConfig *config, const char *root, const char *id) { - _cleanup_hashmap_free_ Hashmap *known_files = NULL; - const BootEntry *e = NULL; - int r; - - assert(config); +int boot_entry_unlink( + const BootEntry *e, + const char *root, + int root_fd, + Hashmap *known_files, + bool dry_run) { - r = boot_config_count_known_files(config, root, &known_files); - if (r < 0) - return log_error_errno(r, "Failed to count files in %s: %m", root); - - r = boot_config_find_in(config, root, id); - if (r < 0) - return 0; /* There is nothing to remove. */ - - if (r == config->default_entry) - log_warning("%s is the default boot entry", id); - if (r == config->selected_entry) - log_warning("%s is the selected boot entry", id); + int r; - e = &config->entries[r]; + assert(e); + assert(root_fd >= 0); - deref_unlink_file(&known_files, e->kernel, e->root); - deref_unlink_file(&known_files, e->efi, e->root); - deref_unlink_file(&known_files, e->uki, e->root); + (void) unref_unlink_file(&known_files, root, root_fd, e->kernel, dry_run); + (void) unref_unlink_file(&known_files, root, root_fd, e->efi, dry_run); + (void) unref_unlink_file(&known_files, root, root_fd, e->uki, dry_run); STRV_FOREACH(s, e->initrd) - deref_unlink_file(&known_files, *s, e->root); - deref_unlink_file(&known_files, e->device_tree, e->root); + (void) unref_unlink_file(&known_files, root, root_fd, *s, dry_run); + (void) unref_unlink_file(&known_files, root, root_fd, e->device_tree, dry_run); STRV_FOREACH(s, e->device_tree_overlay) - deref_unlink_file(&known_files, *s, e->root); + (void) unref_unlink_file(&known_files, root, root_fd, *s, dry_run); - if (arg_dry_run) + if (dry_run) log_info("Would remove \"%s\"", e->path); else { - r = chase_and_unlink(e->path, root, CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS, 0, NULL); + const char *p = path_startswith(e->path, root); + if (!p) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File '%s' is not inside root '%s', refusing.", e->path, root); + + _cleanup_free_ char *resolved = NULL; + r = chase_and_unlinkat( + /* root_fd= */ root_fd, + /* dir_fd= */ root_fd, + p, + CHASE_PROHIBIT_SYMLINKS|CHASE_TRIGGER_AUTOFS, + /* unlink_flags= */ 0, + &resolved); if (r == -ENOENT) return 0; /* Already removed? */ if (r < 0) return log_error_errno(r, "Failed to remove \"%s\": %m", e->path); - log_info("Removed %s", e->path); + log_info("Removed '%s'.", resolved); } return 0; } -int verb_unlink(int argc, char *argv[], uintptr_t _data, void *userdata) { - dev_t esp_devid = 0, xbootldr_devid = 0; +static int unlink_entry( + const BootConfig *config, + const char *root, + int root_fd, + BootEntrySource source, + char **ids, + bool dry_run) { + + size_t n_removed = 0; + int r; + + assert(config); + + _cleanup_hashmap_free_ Hashmap *known_files = NULL; + r = boot_config_count_known_files(config, source, &known_files); + if (r < 0) + return log_error_errno(r, "Failed to count files in %s: %m", root); + + int ret = 0; + STRV_FOREACH(id, ids) { + log_debug("Unlinking '%s'", *id); + ssize_t idx = boot_config_find_in(config, source, *id); + if (idx < 0) + continue; /* There is nothing to remove. */ + + log_debug("Index %zi", idx); + + if (idx == config->default_entry) + log_warning("%s is the default boot entry", *id); + if (idx == config->selected_entry) + log_warning("%s is the selected boot entry", *id); + + r = boot_entry_unlink(config->entries + idx, root, root_fd, known_files, dry_run); + if (r < 0) + RET_GATHER(ret, r); + else + n_removed++; + } + + if (n_removed == 0) + log_info("No matching entries found or removed."); + + return ret; +} + +static int unlink_context_from_cmdline(UnlinkContext *ret) { int r; + assert(ret); + + _cleanup_(unlink_context_done) UnlinkContext b = UNLINK_CONTEXT_NULL; + b.entry_token_type = arg_entry_token_type; + + if (strdup_to(&b.entry_token, arg_entry_token) < 0) + return log_oom(); + + if (arg_root) { + b.root_fd = open(arg_root, O_CLOEXEC|O_DIRECTORY|O_PATH); + if (b.root_fd < 0) + return log_error_errno(errno, "Failed to open root directory '%s': %m", arg_root); + + if (strdup_to(&b.root, arg_root) < 0) + return log_oom(); + } else + b.root_fd = XAT_FDROOT; + r = acquire_esp(/* unprivileged_mode= */ false, /* graceful= */ false, - /* ret_fd= */ NULL, + &b.esp_fd, /* ret_part= */ NULL, /* ret_pstart= */ NULL, /* ret_psize= */ NULL, /* ret_uuid= */ NULL, - &esp_devid); - if (r == -EACCES) /* We really need the ESP path for this call, hence also log about access errors */ - return log_error_errno(r, "Failed to determine ESP location: %m"); - if (r < 0) - return r; + &b.esp_devid); + if (r < 0 && r != -ENOKEY) + return r; /* About all other errors acquire_esp() logs on its own */ + if (r > 0) { + if (arg_root) { + const char *e = path_startswith(arg_esp_path, arg_root); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "ESP path '%s' not below specified root '%s', refusing.", arg_esp_path, arg_root); + + r = strdup_to(&b.esp_path, e); + } else + r = strdup_to(&b.esp_path, arg_esp_path); + if (r < 0) + return log_oom(); + } r = acquire_xbootldr( /* unprivileged_mode= */ false, - /* ret_fd= */ NULL, + &b.xbootldr_fd, /* ret_uuid= */ NULL, - &xbootldr_devid); - if (r == -EACCES) - return log_error_errno(r, "Failed to determine XBOOTLDR partition: %m"); - if (r < 0) + &b.xbootldr_devid); + if (r < 0 && r != -ENOKEY) return r; + if (r > 0) { + if (arg_root) { + const char *e = path_startswith(arg_xbootldr_path, arg_root); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "XBOOTLDR path '%s' not below specified root '%s', refusing.", arg_xbootldr_path, arg_root); + + r = strdup_to(&b.xbootldr_path, e); + } else + r = strdup_to(&b.xbootldr_path, arg_xbootldr_path); + if (r < 0) + return log_oom(); + } + + /* Only if we found neither ESP nor XBOOTLDR let's fail. */ + if (!b.xbootldr_path && !b.esp_path) + return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Neither ESP nor XBOOTLDR found, refusing."); + + *ret = TAKE_GENERIC(b, UnlinkContext, UNLINK_CONTEXT_NULL); + return 0; +} + +static int run_unlink( + UnlinkContext *c, + char **_ids, + bool dry_run) { + + int r; + assert(c); + + _cleanup_free_ char *x = NULL, *y = NULL; + if (c->root && c->esp_path) { + x = path_join(c->root, c->esp_path); + if (!x) + return log_oom(); + } + + if (c->root && c->xbootldr_path) { + y = path_join(c->root, c->xbootldr_path); + if (!y) + return log_oom(); + } _cleanup_(boot_config_free) BootConfig config = BOOT_CONFIG_NULL; r = boot_config_load_and_select( &config, - arg_root, - arg_esp_path, - esp_devid, - arg_xbootldr_path, - xbootldr_devid); + c->root, + x ?: c->esp_path, + c->esp_devid, + y ?: c->xbootldr_path, + c->xbootldr_devid); if (r < 0) return r; + _cleanup_(strv_freep) char **ids = NULL; + if (strv_isempty(_ids)) { + r = id128_get_machine_at(c->root_fd, &c->machine_id); + if (r < 0 && !ERRNO_IS_NEG_MACHINE_ID_UNSET(r)) + return log_error_errno(r, "Failed to get machine-id: %m"); + + const char *e = secure_getenv("KERNEL_INSTALL_CONF_ROOT"); + r = boot_entry_token_ensure_at( + e ? XAT_FDROOT : c->root_fd, + e, + c->machine_id, + /* machine_id_is_random= */ false, + &c->entry_token_type, + &c->entry_token); + if (r < 0) + return r; + + r = boot_config_find_oldest_commit( + &config, + c->entry_token, + &ids); + if (r == -ENXIO) + return log_error_errno(r, "No suitable boot menu entry to delete found."); + if (r == -EBUSY) + return log_error_errno(r, "Refusing to remove currently booted boot menu entry."); + if (r < 0) + return log_error_errno(r, "Failed to find suitable oldest boot menu entry: %m"); + + STRV_FOREACH(id, ids) + log_info("Will unlink '%s'.", *id); + } else { + ids = strv_copy(_ids); + if (!ids) + return log_oom(); + } + + strv_sort_uniq(ids); + r = 0; - RET_GATHER(r, unlink_entry(&config, arg_esp_path, argv[1])); + if (c->esp_path) + RET_GATHER(r, unlink_entry(&config, x ?: c->esp_path, c->esp_fd, BOOT_ENTRY_ESP, ids, dry_run)); - if (arg_xbootldr_path && xbootldr_devid != esp_devid) - RET_GATHER(r, unlink_entry(&config, arg_xbootldr_path, argv[1])); + if (c->xbootldr_path && c->xbootldr_devid != c->esp_devid) + RET_GATHER(r, unlink_entry(&config, y ?: c->xbootldr_path, c->xbootldr_fd, BOOT_ENTRY_XBOOTLDR, ids, dry_run)); return r; } + +int verb_unlink(int argc, char *argv[], uintptr_t _data, void *userdata) { + int r; + + assert(argc < 3); + + if (arg_oldest != isempty(argv[1])) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Exactly one of an entry ID or --oldest= must be specified."); + + const char *id = empty_to_null(argv[1]); + + _cleanup_(unlink_context_done) UnlinkContext c = UNLINK_CONTEXT_NULL; + r = unlink_context_from_cmdline(&c); + if (r < 0) + return r; + + return run_unlink(&c, STRV_MAKE(id), arg_dry_run); +} + +static JSON_DISPATCH_ENUM_DEFINE(json_dispatch_boot_entry_token_type, BootEntryTokenType, boot_entry_token_type_from_string); + +typedef struct UnlinkParameters { + UnlinkContext context; + unsigned root_fd_index; + const char *id; + bool oldest; +} UnlinkParameters; + +static void unlink_parameters_done(UnlinkParameters *p) { + assert(p); + + unlink_context_done(&p->context); +} + +int vl_method_unlink( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + int r; + + assert(link); + + _cleanup_(unlink_parameters_done) UnlinkParameters p = { + .context = UNLINK_CONTEXT_NULL, + .root_fd_index = UINT_MAX, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "rootFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, voffsetof(p, root_fd_index), 0 }, + { "rootDirectory", SD_JSON_VARIANT_STRING, json_dispatch_path, voffsetof(p, context.root), 0 }, + { "bootEntryTokenType", SD_JSON_VARIANT_STRING, json_dispatch_boot_entry_token_type, voffsetof(p, context.entry_token_type), 0 }, + { "id", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, id), 0 }, + { "oldest", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, voffsetof(p, oldest), 0 }, + {}, + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + /* Only allow oldest *or* id to be set */ + if (p.oldest == !!p.id) + return sd_varlink_error_invalid_parameter_name(link, "id"); + if (p.id && !efi_loader_entry_name_valid(p.id)) + return sd_varlink_error_invalid_parameter_name(link, "id"); + + if (p.root_fd_index != UINT_MAX) { + p.context.root_fd = sd_varlink_peek_dup_fd(link, p.root_fd_index); + if (p.context.root_fd < 0) + return log_debug_errno(p.context.root_fd, "Failed to acquire root fd from Varlink: %m"); + + r = fd_verify_safe_flags_full(p.context.root_fd, O_DIRECTORY); + if (r < 0) + return sd_varlink_error_invalid_parameter_name(link, "rootFileDescriptor"); + + r = fd_verify_directory(p.context.root_fd); + if (r < 0) + return log_debug_errno(r, "Specified file descriptor does not refer to a directory: %m"); + + if (!p.context.root) { + r = fd_get_path(p.context.root_fd, &p.context.root); + if (r < 0) + return log_debug_errno(r, "Failed to get path of file descriptor: %m"); + + if (empty_or_root(p.context.root)) + p.context.root = mfree(p.context.root); + } + } else if (p.context.root) { + p.context.root_fd = open(p.context.root, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (p.context.root_fd < 0) + return log_debug_errno(errno, "Failed to open '%s': %m", p.context.root); + } else + p.context.root_fd = XAT_FDROOT; + + if (p.context.entry_token_type < 0) + p.context.entry_token_type = BOOT_ENTRY_TOKEN_AUTO; + + r = find_esp_and_warn_at_full( + p.context.root_fd, + /* path= */ NULL, + /* unprivileged_mode= */ false, + &p.context.esp_path, + &p.context.esp_fd, + /* ret_part= */ NULL, + /* ret_pstart= */ NULL, + /* ret_psize= */ NULL, + /* ret_uuid= */ NULL, + &p.context.esp_devid); + if (r < 0 && r != -ENOKEY) + return r; + r = find_xbootldr_and_warn_at_full( + p.context.root_fd, + /* path= */ NULL, + /* unprivileged_mode= */ false, + &p.context.xbootldr_path, + &p.context.xbootldr_fd, + /* ret_uuid= */ NULL, + &p.context.xbootldr_devid); + if (r < 0 && r != -ENOKEY) + return r; + + /* Only if we found neither ESP nor XBOOTLDR let's fail. */ + if (!p.context.xbootldr_path && !p.context.esp_path) + return sd_varlink_error(link, "io.systemd.BootControl.NoDollarBootFound", NULL); + + r = run_unlink(&p.context, STRV_MAKE(p.id), /* dry_run= */ false); + if (r == -EUNATCH) /* no boot entry token is set */ + return sd_varlink_error(link, "io.systemd.BootControl.BootEntryTokenUnavailable", NULL); + if (r < 0) + return r; + + return sd_varlink_reply(link, NULL); +} diff --git a/src/bootctl/bootctl-unlink.h b/src/bootctl/bootctl-unlink.h index 5c33088859437..728c775d26e8e 100644 --- a/src/bootctl/bootctl-unlink.h +++ b/src/bootctl/bootctl-unlink.h @@ -5,4 +5,8 @@ int verb_unlink(int argc, char *argv[], uintptr_t _data, void *userdata); -int boot_config_count_known_files(const BootConfig *config, const char* root, Hashmap **ret_known_files); +int vl_method_unlink(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); + +int boot_config_count_known_files(const BootConfig *config, BootEntrySource source, Hashmap **ret_known_files); + +int boot_entry_unlink(const BootEntry *e, const char *root, int root_fd, Hashmap *known_files, bool dry_run); diff --git a/src/bootctl/bootctl.c b/src/bootctl/bootctl.c index 6869e838cfc4e..67a20814daf9e 100644 --- a/src/bootctl/bootctl.c +++ b/src/bootctl/bootctl.c @@ -34,6 +34,7 @@ #include "options.h" #include "pager.h" #include "parse-argument.h" +#include "parse-util.h" #include "path-util.h" #include "pretty-print.h" #include "string-table.h" @@ -80,6 +81,7 @@ char *arg_certificate_source = NULL; char *arg_private_key = NULL; KeySourceType arg_private_key_source_type = OPENSSL_KEY_SOURCE_FILE; char *arg_private_key_source = NULL; +bool arg_oldest = false; STATIC_DESTRUCTOR_REGISTER(arg_esp_path, freep); STATIC_DESTRUCTOR_REGISTER(arg_xbootldr_path, freep); @@ -361,7 +363,7 @@ VERB_GROUP("Boot Loader Specification Commands"); VERB_SCOPE_NOARG(, verb_list, "list", "List boot loader entries"); -VERB_SCOPE(, verb_unlink, "unlink", "ID", 2, 2, 0, +VERB_SCOPE(, verb_unlink, "unlink", "ID", VERB_ANY, 2, 0, "Remove boot loader entry"); VERB_SCOPE_NOARG(, verb_cleanup, "cleanup", @@ -631,6 +633,14 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { if (r < 0) return r; break; + + OPTION_LONG("oldest", "BOOL", + "Delete oldest boot menu entry"): + r = parse_boolean_argument("--oldest=", opts.arg, &arg_oldest); + if (r < 0) + return r; + + break; } char **args = option_parser_get_args(&opts); @@ -700,7 +710,8 @@ static int vl_server(void) { "io.systemd.BootControl.ListBootEntries", vl_method_list_boot_entries, "io.systemd.BootControl.SetRebootToFirmware", vl_method_set_reboot_to_firmware, "io.systemd.BootControl.GetRebootToFirmware", vl_method_get_reboot_to_firmware, - "io.systemd.BootControl.Install", vl_method_install); + "io.systemd.BootControl.Install", vl_method_install, + "io.systemd.BootControl.Unlink", vl_method_unlink); if (r < 0) return log_error_errno(r, "Failed to bind Varlink methods: %m"); diff --git a/src/bootctl/bootctl.h b/src/bootctl/bootctl.h index d0daab9dd12b3..ea097ba329753 100644 --- a/src/bootctl/bootctl.h +++ b/src/bootctl/bootctl.h @@ -51,6 +51,7 @@ extern char *arg_certificate_source; extern char *arg_private_key; extern KeySourceType arg_private_key_source_type; extern char *arg_private_key_source; +extern bool arg_oldest; static inline const char* arg_dollar_boot_path(void) { /* $BOOT shall be the XBOOTLDR partition if it exists, and otherwise the ESP */ diff --git a/src/shared/shared-forward.h b/src/shared/shared-forward.h index 1207fe8a25826..e850d8982bd30 100644 --- a/src/shared/shared-forward.h +++ b/src/shared/shared-forward.h @@ -14,6 +14,7 @@ struct in_addr_full; typedef enum AskPasswordFlags AskPasswordFlags; typedef enum BootEntryTokenType BootEntryTokenType; +typedef enum BootEntrySource BootEntrySource; typedef enum BusPrintPropertyFlags BusPrintPropertyFlags; typedef enum BusTransport BusTransport; typedef enum CatFlags CatFlags; @@ -48,6 +49,7 @@ typedef enum UserStorage UserStorage; typedef struct AskPasswordRequest AskPasswordRequest; typedef struct Bitmap Bitmap; typedef struct BootConfig BootConfig; +typedef struct BootEntry BootEntry; typedef struct BPFProgram BPFProgram; typedef struct BusObjectImplementation BusObjectImplementation; typedef struct CalendarSpec CalendarSpec; diff --git a/src/shared/varlink-io.systemd.BootControl.c b/src/shared/varlink-io.systemd.BootControl.c index 70203300ffe5e..42f7b53492e45 100644 --- a/src/shared/varlink-io.systemd.BootControl.c +++ b/src/shared/varlink-io.systemd.BootControl.c @@ -134,6 +134,19 @@ static SD_VARLINK_DEFINE_METHOD( SD_VARLINK_FIELD_COMMENT("If true the boot loader will be registered in an EFI boot entry via EFI variables, otherwise this is omitted"), SD_VARLINK_DEFINE_INPUT(touchVariables, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE)); +static SD_VARLINK_DEFINE_METHOD( + Unlink, + SD_VARLINK_FIELD_COMMENT("Index into array of file descriptors passed along with this message, pointing to file descriptor to root file system to operate on"), + SD_VARLINK_DEFINE_INPUT(rootFileDescriptor, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Root directory to operate relative to. If both this and rootFileDescriptor is specified, this is purely informational. If only this is specified, it is what will be used."), + SD_VARLINK_DEFINE_INPUT(rootDirectory, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Selects how to identify boot entries"), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(bootEntryTokenType, BootEntryTokenType, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The ID of the boot loader entry to remove."), + SD_VARLINK_DEFINE_INPUT(id, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("If true, remove the oldest entry."), + SD_VARLINK_DEFINE_INPUT(oldest, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE)); + static SD_VARLINK_DEFINE_ERROR( RebootToFirmwareNotSupported); @@ -143,6 +156,9 @@ static SD_VARLINK_DEFINE_ERROR( static SD_VARLINK_DEFINE_ERROR( NoESPFound); +static SD_VARLINK_DEFINE_ERROR( + NoDollarBootFound); + static SD_VARLINK_DEFINE_ERROR( BootEntryTokenUnavailable); @@ -170,11 +186,15 @@ SD_VARLINK_DEFINE_INTERFACE( &vl_type_BootEntryTokenType, SD_VARLINK_SYMBOL_COMMENT("Install the boot loader on the ESP."), &vl_method_Install, + SD_VARLINK_SYMBOL_COMMENT("Unlink a boot menu item"), + &vl_method_Unlink, SD_VARLINK_SYMBOL_COMMENT("SetRebootToFirmware() and GetRebootToFirmware() return this if the firmware does not actually support the reboot-to-firmware-UI concept."), &vl_error_RebootToFirmwareNotSupported, SD_VARLINK_SYMBOL_COMMENT("No boot entry defined."), &vl_error_NoSuchBootEntry, SD_VARLINK_SYMBOL_COMMENT("No EFI System Partition (ESP) found."), &vl_error_NoESPFound, - SD_VARLINK_SYMBOL_COMMENT("The select boot entry token could not be determined."), + SD_VARLINK_SYMBOL_COMMENT("Neither ESP nor XBOOTLDR found, hence no $BOOT location identified."), + &vl_error_NoDollarBootFound, + SD_VARLINK_SYMBOL_COMMENT("The selected boot entry token could not be determined."), &vl_error_BootEntryTokenUnavailable); From e48d8a1ea0d3a37939fc2f008a68fe173e219a4b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 9 Feb 2026 23:51:42 +0100 Subject: [PATCH 069/242] bootctl: add "link" command --- man/bootctl.xml | 101 ++ shell-completion/bash/bootctl | 7 +- shell-completion/zsh/_bootctl | 5 + src/bootctl/bootctl-link.c | 1206 +++++++++++++++++++ src/bootctl/bootctl-link.h | 8 + src/bootctl/bootctl.c | 122 ++ src/bootctl/bootctl.h | 10 + src/bootctl/meson.build | 1 + src/shared/efi-loader.c | 11 + src/shared/efi-loader.h | 2 + src/shared/varlink-io.systemd.BootControl.c | 47 +- 11 files changed, 1515 insertions(+), 5 deletions(-) create mode 100644 src/bootctl/bootctl-link.c create mode 100644 src/bootctl/bootctl-link.h diff --git a/man/bootctl.xml b/man/bootctl.xml index fb5f4b2b2a9eb..39ddfdd58c06a 100644 --- a/man/bootctl.xml +++ b/man/bootctl.xml @@ -117,6 +117,36 @@ + + KERNEL + + Creates one or more Type #1 boot loader entries pointing to the specified UKI. Takes + the path to a Unified Kernel Image (UKI) as argument. The UKI is copied into the ESP (or XBOOTLDR + partition if present) below the configured entry token directory, and one or more Boot Loader + Specification Type #1 entries are generated referring to it (one per UKI profile, if multiple + profiles are embedded). + + The title, version, commit number and initial try counter of the generated entries + may be overridden with , , + and . Additional sidecar resources + (system extension images, configuration extension images, credential files) to pass to the UKI at + boot may be specified with . + + If the ESP/XBOOTLDR do not have enough free space for the new boot loader entry and its + referenced resources the oldest existing boot loader entry matching the selected entry token is + removed (along with any resources referenced by it that are no longer referenced by any other + entry). This step is repeated until the new boot loader entry fits. For robustness reasons the + currently booted boot loader entry is never removed, nor is the last existing boot loader + entry. + + By default, the operation refuses to proceed if the resulting ESP/XBOOTLDR free space would + drop below a safety threshold after automatic removal of older entries completes; use + to adjust. + + + + @@ -577,6 +607,77 @@ + + + + When used with link, controls the minimum amount of free space + (in bytes) that must remain on the target partition (ESP or XBOOTLDR) after the new entry has been + materialized. The operation fails if installing the entry would drop the free space below this + threshold. Accepts the usual size suffixes (K, M, G, …). If empty, the built-in default is + restored. If set to zero no minimum amount of free space is kept. + + + + + + + + When used with link, specifies the title of the generated boot + loader entry (the title field of the Type #1 entry). If not specified, a title is + derived from the UKI's embedded metadata. + + + + + + + + When used with link, specifies the version string of the + generated boot loader entry (the version field of the Type #1 entry). If not + specified, the version is derived from the UKI's embedded metadata. Used by the boot loader to sort + and select entries. + + + + + + + + When used with link, specifies the commit number for the generated + boot loader entry. + + + + + + + + + When used with link, registers an additional sidecar resource + file that shall be passed to the UKI at boot. This may be a system extension image + (*.sysext.raw), configuration extension image + (*.confext.raw), or credential file + (*.cred). The file is copied into the ESP/XBOOTLDR alongside the UKI and the + boot loader will load and pass it to the kernel via initrd. This option may be used multiple times + to register more than one extra resource. If passed an empty argument, all previously specified + extras are cleared. + + + + + + + + When used with link, initializes the boot counting + tries-left counter for the generated entry. If set, the resulting boot entry file + is named according to the boot counting scheme described in the Automatic Boot Assessment documentation, + so that the boot loader decreases the counter on each attempted boot and eventually marks the entry + as bad. If not specified, boot counting is not enabled for the generated entry. + + + + diff --git a/shell-completion/bash/bootctl b/shell-completion/bash/bootctl index 792fc0c0acc83..3b7290c230c14 100644 --- a/shell-completion/bash/bootctl +++ b/shell-completion/bash/bootctl @@ -43,7 +43,8 @@ _bootctl() { --efi-boot-option-description --efi-boot-option-description-with-device --secure-boot-auto-enroll --private-key --private-key-source --certificate --certificate-source - --oldest' + --oldest --keep-free --entry-title --entry-version --entry-commit + -X --extra --tries-left' ) if __contains_word "$prev" ${OPTS[ARG]}; then @@ -62,7 +63,7 @@ _bootctl() { --entry-token) comps="machine-id os-id os-image-id auto literal:" ;; - --image|--root) + --image|--root|-X|--extra) compopt -o nospace comps=$( compgen -A file -- "$cur" ) ;; @@ -89,7 +90,7 @@ _bootctl() { [STANDALONE]='help status install update remove is-installed random-seed list set-timeout set-timeout-oneshot cleanup' [BOOTENTRY]='set-default set-oneshot set-sysfail set-preferred unlink' [BOOLEAN]='reboot-to-firmware' - [FILE]='kernel-identify kernel-inspect' + [FILE]='kernel-identify kernel-inspect link' ) for ((i=0; i < COMP_CWORD; i++)); do diff --git a/shell-completion/zsh/_bootctl b/shell-completion/zsh/_bootctl index c23c1c888dae5..423828ba33243 100644 --- a/shell-completion/zsh/_bootctl +++ b/shell-completion/zsh/_bootctl @@ -36,6 +36,10 @@ _bootctl_unlink() { _bootctl_comp_ids } +_bootctl_link() { + _files +} + _bootctl_kernel-identify() { _files } @@ -70,6 +74,7 @@ _bootctl_reboot-to-firmware() { "set-timeout:Set the menu timeout" "set-timeout-oneshot:Set the menu timeout for the next boot only" "unlink:Remove boot loader entry" + "link:Add boot loader entry" "cleanup:Remove files in ESP not referenced in any boot entry" "kernel-identify:Identify kernel image type" "kernel-inspect:Print details about the kernel image" diff --git a/src/bootctl/bootctl-link.c b/src/bootctl/bootctl-link.c new file mode 100644 index 0000000000000..6358189e7d2f3 --- /dev/null +++ b/src/bootctl/bootctl-link.c @@ -0,0 +1,1206 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "sd-json.h" +#include "sd-varlink.h" + +#include "boot-entry.h" +#include "bootctl.h" +#include "bootctl-link.h" +#include "bootctl-unlink.h" +#include "bootspec.h" +#include "bootspec-util.h" +#include "chase.h" +#include "copy.h" +#include "dirent-util.h" +#include "efi-loader.h" +#include "env-file.h" +#include "errno-util.h" +#include "fd-util.h" +#include "find-esp.h" +#include "format-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "id128-util.h" +#include "io-util.h" +#include "json-util.h" +#include "kernel-image.h" +#include "log.h" +#include "parse-argument.h" +#include "path-util.h" +#include "recurse-dir.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" +#include "uki.h" +#include "utf8.h" + +/* Keeps track of an "extra" file to associate with the type 1 entries to generate */ +typedef struct ExtraFile { + /* The source and the temporary file we copy it into */ + int source_fd, temp_fd; + char *filename, *temp_filename; +} ExtraFile; + +#define EXTRA_FILE_NULL \ + (const ExtraFile) { \ + .source_fd = -EBADF, \ + .temp_fd = -EBADF, \ + } + +/* Keeps track of a specific UKI profile we need to generate a type entry for */ +typedef struct Profile { + /* The final and the temporary file for the .conf entry file, while we write it */ + char *entry_filename, *entry_temp_filename; + int entry_temp_fd; +} Profile; + +typedef struct LinkContext { + char *root; + int root_fd; + + sd_id128_t machine_id; + BootEntryTokenType entry_token_type; + char *entry_token; + + char *entry_title; + char *entry_version; + uint64_t entry_commit; + + BootEntrySource dollar_boot_source; + char *dollar_boot_path; + int dollar_boot_fd; + int entry_token_dir_fd; + int loader_entries_dir_fd; + + /* The UKI source and temporary target while we write it. Note that for now we exclusively support + * UKIs, but let's keep things somewhat generic to keep options open for the future. */ + char *kernel_filename, *kernel_temp_filename; + int kernel_fd, kernel_temp_fd; + + ExtraFile *extra; + size_t n_extra; + + Profile *profiles; + size_t n_profiles; + + unsigned tries_left; + + uint64_t keep_free; + + char **linked_ids; +} LinkContext; + +#define LINK_CONTEXT_NULL \ + (LinkContext) { \ + .root_fd = -EBADF, \ + .entry_token_type = _BOOT_ENTRY_TOKEN_TYPE_INVALID, \ + .dollar_boot_fd = -EBADF, \ + .loader_entries_dir_fd = -EBADF, \ + .entry_token_dir_fd = -EBADF, \ + .kernel_fd = -EBADF, \ + .kernel_temp_fd = -EBADF, \ + .tries_left = UINT_MAX, \ + .keep_free = UINT64_MAX, \ + } + +static void extra_file_done(ExtraFile *x) { + assert(x); + + x->source_fd = safe_close(x->source_fd); + x->temp_fd = safe_close(x->temp_fd); + x->filename = mfree(x->filename); + x->temp_filename = mfree(x->temp_filename); +} + +static void profile_done(Profile *p) { + assert(p); + + p->entry_filename = mfree(p->entry_filename); + p->entry_temp_filename = mfree(p->entry_temp_filename); + p->entry_temp_fd = safe_close(p->entry_temp_fd); +} + +static void link_context_unlink_temporary(LinkContext *c) { + assert(c); + + if (c->kernel_temp_filename) { + if (c->entry_token_dir_fd >= 0) + (void) unlinkat(c->entry_token_dir_fd, c->kernel_temp_filename, /* flags= */ 0); + + c->kernel_temp_fd = safe_close(c->kernel_temp_fd); + c->kernel_temp_filename = mfree(c->kernel_temp_filename); + } + + FOREACH_ARRAY(x, c->extra, c->n_extra) { + if (!x->temp_filename) + continue; + + if (c->entry_token_dir_fd >= 0) + (void) unlinkat(c->entry_token_dir_fd, x->temp_filename, /* flags= */ 0); + + x->temp_fd = safe_close(x->temp_fd); + x->temp_filename = mfree(x->temp_filename); + } + + FOREACH_ARRAY(p, c->profiles, c->n_profiles) { + if (!p->entry_temp_filename) + continue; + + if (c->loader_entries_dir_fd >= 0) + (void) unlinkat(c->loader_entries_dir_fd, p->entry_temp_filename, /* flags= */ 0); + + p->entry_temp_fd = safe_close(p->entry_temp_fd); + p->entry_temp_filename = mfree(p->entry_temp_filename); + } +} + +static void link_context_clear_profiles(LinkContext *c) { + assert(c); + + FOREACH_ARRAY(p, c->profiles, c->n_profiles) + profile_done(p); + + c->profiles = mfree(c->profiles); + c->n_profiles = 0; +} + +static void link_context_done(LinkContext *c) { + assert(c); + + link_context_unlink_temporary(c); + + FOREACH_ARRAY(x, c->extra, c->n_extra) + extra_file_done(x); + + c->extra = mfree(c->extra); + c->n_extra = 0; + + link_context_clear_profiles(c); + + c->kernel_filename = mfree(c->kernel_filename); + c->kernel_fd = safe_close(c->kernel_fd); + c->kernel_temp_filename = mfree(c->kernel_temp_filename); + c->kernel_temp_fd = safe_close(c->kernel_temp_fd); + + c->root = mfree(c->root); + c->root_fd = safe_close(c->root_fd); + + c->entry_token = mfree(c->entry_token); + c->entry_title = mfree(c->entry_title); + c->entry_version = mfree(c->entry_version); + + c->dollar_boot_path = mfree(c->dollar_boot_path); + c->dollar_boot_fd = safe_close(c->dollar_boot_fd); + c->entry_token_dir_fd = safe_close(c->entry_token_dir_fd); + c->loader_entries_dir_fd = safe_close(c->loader_entries_dir_fd); + + c->linked_ids = strv_free(c->linked_ids); +} + +static int link_context_from_cmdline(LinkContext *ret, const char *kernel) { + int r; + + assert(ret); + assert(kernel); + + _cleanup_(link_context_done) LinkContext b = LINK_CONTEXT_NULL; + b.entry_token_type = arg_entry_token_type; + b.tries_left = arg_tries_left; + b.entry_commit = arg_entry_commit; + b.keep_free = arg_keep_free; + + if (strdup_to(&b.entry_token, arg_entry_token) < 0 || + strdup_to(&b.entry_title, arg_entry_title) < 0 || + strdup_to(&b.entry_version, arg_entry_version) < 0) + return log_oom(); + + if (arg_root) { + b.root_fd = open(arg_root, O_CLOEXEC|O_DIRECTORY|O_PATH); + if (b.root_fd < 0) + return log_error_errno(errno, "Failed to open root directory '%s': %m", arg_root); + + if (strdup_to(&b.root, arg_root) < 0) + return log_oom(); + } else + b.root_fd = XAT_FDROOT; + + r = path_extract_filename(kernel, &b.kernel_filename); + if (r < 0) + return log_error_errno(r, "Failed to extract filename from kernel path '%s': %m", kernel); + if (!efi_loader_entry_resource_filename_valid(b.kernel_filename)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Kernel '%s' is not suitable for reference in a boot menu entry.", kernel); + b.kernel_fd = xopenat_full(AT_FDCWD, kernel, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, XO_REGULAR, /* mode= */ MODE_INVALID); + if (b.kernel_fd < 0) + return log_error_errno(b.kernel_fd, "Failed to open kernel path '%s': %m", kernel); + + KernelImageType kit = _KERNEL_IMAGE_TYPE_INVALID; + r = inspect_kernel(b.kernel_fd, /* filename= */ NULL, &kit); + if (r == -EBADMSG) + return log_error_errno(r, "Kernel image '%s' is not valid.", kernel); + if (r < 0) + return log_error_errno(r, "Failed to determine kernel image type of '%s': %m", kernel); + if (kit != KERNEL_IMAGE_TYPE_UKI) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Kernel image '%s' is not a UKI.", kernel); + + STRV_FOREACH(x, arg_extras) { + _cleanup_free_ char *fn = NULL; + r = path_extract_filename(*x, &fn); + if (r < 0) + return log_error_errno(r, "Failed to extract filename from path '%s': %m", *x); + if (r == O_DIRECTORY) + return log_error_errno(SYNTHETIC_ERRNO(EISDIR), "Extra file path '%s' does not refer to regular file.", *x); + + _cleanup_close_ int fd = -EBADF; + fd = xopenat_full(AT_FDCWD, *x, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, XO_REGULAR, /* mode= */ MODE_INVALID); + if (fd < 0) + return log_error_errno(fd, "Failed to open '%s': %m", *x); + + if (!GREEDY_REALLOC(b.extra, b.n_extra+1)) + return log_oom(); + + b.extra[b.n_extra++] = (ExtraFile) { + .source_fd = TAKE_FD(fd), + .filename = TAKE_PTR(fn), + .temp_fd = -EBADF, + }; + } + + r = acquire_xbootldr( + /* unprivileged_mode= */ false, + &b.dollar_boot_fd, + /* ret_uuid= */ NULL, + /* ret_devid= */ NULL); + if (r < 0) + return r; + if (r > 0) { /* XBOOTLDR has been found */ + assert(arg_xbootldr_path); + + if (arg_root) { + const char *e = path_startswith(arg_xbootldr_path, arg_root); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "XBOOTLDR path '%s' not below specified root '%s', refusing.", arg_xbootldr_path, arg_root); + + r = strdup_to(&b.dollar_boot_path, e); + } else + r = strdup_to(&b.dollar_boot_path, arg_xbootldr_path); + if (r < 0) + return log_oom(); + + b.dollar_boot_source = BOOT_ENTRY_XBOOTLDR; + } else { + /* No XBOOTLDR has been found, look for ESP */ + + r = acquire_esp(/* unprivileged_mode= */ false, + /* graceful= */ false, + &b.dollar_boot_fd, + /* ret_part= */ NULL, + /* ret_pstart= */ NULL, + /* ret_psize= */ NULL, + /* ret_uuid= */ NULL, + /* ret_devid= */ NULL); + if (r < 0) + return r; + + assert(arg_esp_path); + + if (arg_root) { + const char *e = path_startswith(arg_esp_path, arg_root); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "ESP path '%s' not below specified root '%s', refusing.", arg_esp_path, arg_root); + + r = strdup_to(&b.dollar_boot_path, e); + } else + r = strdup_to(&b.dollar_boot_path, arg_esp_path); + if (r < 0) + return log_oom(); + + b.dollar_boot_source = BOOT_ENTRY_ESP; + } + + *ret = TAKE_GENERIC(b, LinkContext, LINK_CONTEXT_NULL); + return 0; +} + +static int link_context_load_etc_machine_id(LinkContext *c) { + int r; + + assert(c); + + r = id128_get_machine_at(c->root_fd, &c->machine_id); + if (ERRNO_IS_NEG_MACHINE_ID_UNSET(r)) /* Not set or empty */ + return 0; + if (r < 0) + return log_error_errno(r, "Failed to get machine-id: %m"); + + log_debug("Loaded machine ID %s from '%s/etc/machine-id'.", SD_ID128_TO_STRING(c->machine_id), strempty(c->root)); + return 0; +} + +static int link_context_pick_entry_token(LinkContext *c) { + int r; + + assert(c); + + r = link_context_load_etc_machine_id(c); + if (r < 0) + return r; + + const char *e = secure_getenv("KERNEL_INSTALL_CONF_ROOT"); + r = boot_entry_token_ensure_at( + e ? XAT_FDROOT : c->root_fd, + e, + c->machine_id, + /* machine_id_is_random= */ false, + &c->entry_token_type, + &c->entry_token); + if (r < 0) + return r; + + log_debug("Using entry token: %s", c->entry_token); + return 0; +} + +static int begin_copy_file( + int source_fd, + const char *filename, + int target_dir_fd, + int *ret_tmpfile_fd, + char **ret_tmpfile_filename) { + + int r; + + assert(source_fd >= 0); + assert(filename); + assert(target_dir_fd >= 0); + assert(ret_tmpfile_fd); + assert(ret_tmpfile_filename); + + if (faccessat(target_dir_fd, filename, F_OK, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno != ENOENT) + return log_error_errno(errno, "Failed to check if '%s' exists already: %m", filename); + } else { + log_info("'%s' already in place, not copying.", filename); + + *ret_tmpfile_fd = -EBADF; + *ret_tmpfile_filename = NULL; + return 0; + } + + _cleanup_free_ char *t = NULL; + _cleanup_close_ int write_fd = open_tmpfile_linkable_at(target_dir_fd, filename, O_WRONLY|O_CLOEXEC, &t); + if (write_fd < 0) + return log_error_errno(write_fd, "Failed to create '%s': %m", filename); + + CLEANUP_TMPFILE_AT(target_dir_fd, t); + + r = copy_bytes(source_fd, write_fd, UINT64_MAX, COPY_REFLINK|COPY_SEEK0_SOURCE); + if (r < 0) + return log_error_errno(r, "Failed to copy data into '%s': %m", filename); + + (void) copy_times(source_fd, write_fd, /* flags= */ 0); + (void) fchmod(write_fd, 0644); + + *ret_tmpfile_fd = TAKE_FD(write_fd); + *ret_tmpfile_filename = TAKE_PTR(t); + + return 1; +} + +static int begin_write_entry_file( + LinkContext *c, + unsigned profile_nr, + const char *osrelease_text, + const char *profile_text, + Profile *ret) { + + int r; + + assert(c); + assert(osrelease_text); + assert(ret); + + assert(c->entry_token); + assert(c->kernel_filename); + assert(c->loader_entries_dir_fd >= 0); + + _cleanup_free_ char *good_name = NULL, *good_sort_key = NULL, *os_version_id = NULL, *image_version = NULL; + r = bootspec_extract_osrelease( + osrelease_text, + /* These three fields are used by systemd-stub for showing entries + sorting them */ + &good_name, /* human readable */ + /* ret_good_version= */ NULL, + &good_sort_key, + /* These four fields are the raw fields provided in os-release */ + /* ret_os_id= */ NULL, + &os_version_id, + /* ret_image_id= */ NULL, + &image_version); + if (r < 0) + return log_error_errno(r, "Failed to extract name/version/sort-key from os-release data from unified kernel image, refusing."); + + assert(good_name); /* This one is the only field guaranteed to be defined once the above succeeds */ + + _cleanup_free_ char *profile_id = NULL, *profile_title = NULL; + if (profile_text) { + r = parse_env_data( + profile_text, /* size= */ SIZE_MAX, + ".profile", + "ID", &profile_id, + "TITLE", &profile_title); + if (r < 0) + return log_error_errno(r, "Failed to parse profile data from unified kernel image: %m"); + } + + const char *version = c->entry_version ?: image_version ?: os_version_id; + + _cleanup_free_ char *filename = NULL; + r = boot_entry_make_commit_filename( + c->entry_token, + c->entry_commit, + version, + profile_nr, + c->tries_left, + &filename); + if (r < 0) + return log_error_errno(r, "Failed to generate filename for entry file: %m"); + + if (faccessat(c->loader_entries_dir_fd, filename, F_OK, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno != ENOENT) + return log_error_errno(errno, "Failed to check if '%s' exists: %m", filename); + } else + return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Boot menu entry '%s' exists already, refusing.", filename); + + log_info("Writing new boot menu entry '%s/loader/entries/%s' for profile %u.", c->dollar_boot_path, filename, profile_nr); + + _cleanup_free_ char *t = NULL; + _cleanup_close_ int write_fd = open_tmpfile_linkable_at(c->loader_entries_dir_fd, filename, O_WRONLY|O_CLOEXEC, &t); + if (write_fd < 0) + return log_error_errno(write_fd, "Failed to create '%s': %m", filename); + + CLEANUP_TMPFILE_AT(c->loader_entries_dir_fd, t); + + _cleanup_free_ char *_title = NULL; + const char *title; + if (profile_title || profile_id) { + _title = strjoin(c->entry_title ?: good_name, " (", profile_title ?: profile_id, ")"); + if (!_title) + return log_oom(); + + title = _title; + } else if (profile_nr > 0) { + _title = asprintf_safe("%s (Profile #%u)", c->entry_title ?: good_name, profile_nr); + if (!_title) + return log_oom(); + + title = _title; + } else + title = c->entry_title ?: good_name; + + /* Do some validation that this will result in a valid type #1 entry before we write this out */ + if (string_has_cc(title, /* ok= */ NULL) || !utf8_is_valid(title)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to generate valid title for new commit: %s", title); + if (string_has_cc(c->kernel_filename, /* ok= */ NULL) || !utf8_is_valid(c->kernel_filename)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UKI filename is not suitable for inclusion in new commit: %s", c->kernel_filename); + + _cleanup_free_ char *text = NULL; + if (asprintf(&text, + "title %s\n" + "uki /%s/%s\n" + "version %" PRIu64 "%s%s\n", + title, + c->entry_token, c->kernel_filename, + c->entry_commit, isempty(version) ? "" : ".", strempty(version)) < 0) + return log_oom(); + + if (good_sort_key && strextendf(&text, "sort-key %s\n", good_sort_key) < 0) + return log_oom(); + + if (profile_nr > 0 && strextendf(&text, "profile %u\n", profile_nr) < 0) + return log_oom(); + + if (!sd_id128_is_null(c->machine_id) && strextendf(&text, "machine-id " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(c->machine_id)) < 0) + return log_oom(); + + FOREACH_ARRAY(x, c->extra, c->n_extra) { + if (string_has_cc(x->filename, /* ok= */ NULL) || !utf8_is_valid(x->filename)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Extra filename is not suitable for inclusion in new commit: %s", x->filename); + + if (strextendf(&text, + "extra /%s/%s\n", + c->entry_token, + x->filename) < 0) + return log_oom(); + } + + r = loop_write(write_fd, text, /* nbytes= */ SIZE_MAX); + if (r < 0) + return log_error_errno(r, "Failed to write entry file: %m"); + + *ret = (Profile) { + .entry_filename = TAKE_PTR(filename), + .entry_temp_filename = TAKE_PTR(t), + .entry_temp_fd = TAKE_FD(write_fd), + }; + + return 0; +} + +static int finalize_file( + const char *filename, + int target_dir_fd, + int tmpfile_fd, + const char *tmpfile_filename) { + + int r; + + assert(filename); + assert(target_dir_fd >= 0); + + if (tmpfile_fd < 0) /* If the file already existed, we don't move anything into place. */ + return 0; + + r = link_tmpfile_at(tmpfile_fd, target_dir_fd, tmpfile_filename, filename, LINK_TMPFILE_REPLACE|LINK_TMPFILE_SYNC); + if (r < 0) + return log_error_errno(r, "Failed to move from '%s' into place: %m", filename); + + log_info("Installed '%s' into place.", filename); + return 1; +} + +static int link_context_pick_entry_commit(LinkContext *c) { + int r; + + assert(c); + assert(c->loader_entries_dir_fd >= 0); + assert(c->entry_token); + + /* Already have a commit nr? */ + if (c->entry_commit != 0) + return 0; + + _cleanup_close_ int opened_fd = fd_reopen(c->loader_entries_dir_fd, O_DIRECTORY|O_CLOEXEC); + if (opened_fd < 0) + return log_error_errno(opened_fd, "Failed to reopen loader entries dir: %m"); + + _cleanup_free_ DirectoryEntries *dentries = NULL; + r = readdir_all(opened_fd, RECURSE_DIR_IGNORE_DOT, &dentries); + if (r < 0) + return log_error_errno(r, "Failed to read loader entries directory: %m"); + + uint64_t m = 0; /* largest commit number seen */ + FOREACH_ARRAY(i, dentries->entries, dentries->n_entries) { + const struct dirent *de = *i; + + /* We look for files named -commit_[.][.p].conf */ + + if (!dirent_is_file(de)) + continue; + + if (!efi_loader_entry_name_valid(de->d_name)) + continue; + + _cleanup_free_ char *et = NULL; + uint64_t ec; + r = boot_entry_parse_commit_filename(de->d_name, &et, &ec); + if (r < 0) { + log_debug_errno(r, "Cannot extract entry token/commit number from '%s', ignoring.", de->d_name); + continue; + } + + if (!streq(c->entry_token, et)) + continue; + + log_debug("Found existing commit %" PRIu64 ".", ec); + if (ec > m) + m = ec; + } + + assert(m < UINT64_MAX); + uint64_t next = m + 1; + + if (!entry_commit_valid(next)) + return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Too many commits already in place, refusing."); + + log_debug("Picking commit %" PRIu64 " for new commit.", next); + c->entry_commit = next; + return 0; +} + +static int clean_temporary_files(int fd) { + int r; + + assert(fd >= 0); + + /* Before we create any new files let's clear any possible left-overs from a previous run. We look + * specifically for all temporary files whose name starts with .# because that's what we create, via + * open_tmpfile_linkable_at(). + * + * Ideally, this would not be necessary because O_TMPFILE would ensure that files are not + * materialized before they are fully written. However, vfat currently does not support O_TMPFILE, + * hence we need to clean things up manually. */ + + _cleanup_close_ int dfd = fd_reopen(fd, O_CLOEXEC|O_DIRECTORY); + if (dfd < 0) + return log_error_errno(dfd, "Failed to open directory: %m"); + + _cleanup_free_ DirectoryEntries *de = NULL; + r = readdir_all(dfd, RECURSE_DIR_ENSURE_TYPE, &de); + if (r < 0) + return log_error_errno(r, "Failed to enumerate contents of directory: %m"); + + FOREACH_ARRAY(i, de->entries, de->n_entries) { + struct dirent *e = *i; + + if (e->d_type != DT_REG) + continue; + + if (!startswith_no_case(e->d_name, ".#")) + continue; + + if (unlinkat(dfd, e->d_name, /* flags= */ 0) < 0 && errno != ENOENT) + log_warning_errno(errno, "Failed to remove temporary file '%s', ignoring: %m", e->d_name); + } + + return 0; +} + +static int link_context_unlink_oldest(LinkContext *c) { + int r; + + assert(c); + + /* We only load the entries from the partition we want to make space on (!) */ + _cleanup_(boot_config_free) BootConfig config = BOOT_CONFIG_NULL; + r = boot_config_load_and_select( + &config, + c->root, + c->dollar_boot_source == BOOT_ENTRY_ESP ? c->dollar_boot_path : NULL, + /* esp_devid= */ 0, + c->dollar_boot_source == BOOT_ENTRY_XBOOTLDR ? c->dollar_boot_path : NULL, + /* xbootldr_devid= */ 0); + if (r < 0) + return r; + + _cleanup_(strv_freep) char **ids = NULL; + r = boot_config_find_oldest_commit( + &config, + c->entry_token, + &ids); + if (r == -ENXIO) + return log_error_errno(r, "No suitable boot menu entry to delete found."); + if (r == -EBUSY) + return log_error_errno(r, "Refusing to remove currently booted boot menu entry."); + if (r < 0) + return log_error_errno(r, "Failed to find suitable oldest boot menu entry: %m"); + + _cleanup_(hashmap_freep) Hashmap *known_files = NULL; + r = boot_config_count_known_files(&config, c->dollar_boot_source, &known_files); + if (r < 0) + return r; + + int ret = 0; + STRV_FOREACH(id, ids) { + const BootEntry *entry = boot_config_find_entry(&config, *id); + if (!entry) + continue; + + RET_GATHER(ret, boot_entry_unlink(entry, c->dollar_boot_path, c->dollar_boot_fd, known_files, /* dry_run= */ false)); + } + + if (ret < 0) + return ret; + + return 1; +} + +static int verify_keep_free(LinkContext *c) { + int r; + + assert(c); + + if (c->keep_free == 0) + return 0; + + uint64_t f; + r = vfs_free_bytes(ASSERT_FD(c->dollar_boot_fd), &f); + if (r < 0) + return log_error_errno(r, "Failed to statvfs() the $BOOT partition: %m"); + + if (f < c->keep_free) + return log_error_errno( + SYNTHETIC_ERRNO(EDQUOT), + "Not installing boot menu entry, free space after installation of %s would be below configured keep free size %s.", + FORMAT_BYTES(f), FORMAT_BYTES(c->keep_free)); + + return 0; +} + +static int run_link_now(LinkContext *c) { + int r; + + assert(c); + assert(c->dollar_boot_fd >= 0); + + _cleanup_free_ char *j = path_join(empty_to_root(c->root), c->dollar_boot_path); + if (!j) + return log_oom(); + + if (c->loader_entries_dir_fd < 0) { + r = chaseat(/* root_fd= */ c->dollar_boot_fd, + /* dir_fd= */ c->dollar_boot_fd, + "loader/entries", + CHASE_PROHIBIT_SYMLINKS|CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY, + /* ret_path= */ NULL, + &c->loader_entries_dir_fd); + if (r < 0) + return log_error_errno(r, "Failed to pin '/loader/entries' directory below '%s': %m", j); + } + + /* Remove any left-overs from an earlier run before we write new stuff */ + (void) clean_temporary_files(c->loader_entries_dir_fd); + + r = link_context_pick_entry_commit(c); + if (r < 0) + return r; + + log_info("Will create commit %" PRIu64 ".", c->entry_commit); + + if (c->entry_token_dir_fd < 0) { + r = chaseat(/* root_fd= */ c->dollar_boot_fd, + /* dir_fd= */ c->dollar_boot_fd, + c->entry_token, + CHASE_PROHIBIT_SYMLINKS|CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY, + /* ret_path= */ NULL, + &c->entry_token_dir_fd); + if (r < 0) + return log_error_errno(r, "Failed to pin '/%s' directory below '%s': %m", c->entry_token, j); + } + + /* As above */ + (void) clean_temporary_files(c->entry_token_dir_fd); + + /* Synchronize everything to disk before we verify the disk space, to ensure the counters are + * accurate (some file systems delay accurate counters) */ + (void) syncfs(c->dollar_boot_fd); + + /* Before we start copying things, let's see if there's even a remote chance to get this copied + * in. Note that we do not try to be overly smart here, i.e. we do not try to calculate how much + * extra space we'll need here. Doing that is not trivial since after all the same resources can be + * referenced by multiple entries, which makes copying them multiple times unnecessary. */ + r = verify_keep_free(c); + if (r < 0) + return r; + + for (unsigned p = 0; p < UNIFIED_PROFILES_MAX; p++) { + _cleanup_free_ char *osrelease = NULL, *profile = NULL; + r = pe_find_uki_sections(c->kernel_fd, j, p, &osrelease, &profile, /* ret_cmdline= */ NULL); + if (r < 0) + return r; + if (r == 0) /* this profile does not exist, we are done */ + break; + + if (!GREEDY_REALLOC(c->profiles, c->n_profiles+1)) + return log_oom(); + + r = begin_write_entry_file( + c, + p, + osrelease, + profile, + c->profiles + c->n_profiles); + if (r < 0) + return r; + + c->n_profiles++; + } + + if (c->n_profiles == 0) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "UKI with no valid profile, refusing."); + + r = begin_copy_file( + c->kernel_fd, + c->kernel_filename, + c->entry_token_dir_fd, + &c->kernel_temp_fd, + &c->kernel_temp_filename); + if (r < 0) + return r; + + FOREACH_ARRAY(x, c->extra, c->n_extra) { + r = begin_copy_file( + x->source_fd, + x->filename, + c->entry_token_dir_fd, + &x->temp_fd, + &x->temp_filename); + if (r < 0) + return r; + } + + /* We copied all files into place, but they are not materialized yet. Let's ensure the data hits the + * disk before we proceed */ + (void) syncfs(c->dollar_boot_fd); + + /* Before we materialize things, let's ensure the space to keep free is not taken */ + r = verify_keep_free(c); + if (r < 0) + return r; + + /* We successfully managed to put all resources we need into the $BOOT partition. Now, let's + * "materialize" them by linking them into the file system. Before this point we'd get rid of every + * file we created on error again. But from now on we switch modes: what we manage to move into place + * we leave in place even on error. These are not lost resources after all, the GC logic implemented + * by "bootctl cleanup" will take care of removing things again if necessary. */ + + r = finalize_file( + c->kernel_filename, + c->entry_token_dir_fd, + c->kernel_temp_fd, + c->kernel_temp_filename); + if (r < 0) + return r; + + c->kernel_temp_fd = safe_close(c->kernel_temp_fd); + c->kernel_temp_filename = mfree(c->kernel_temp_filename); + + FOREACH_ARRAY(x, c->extra, c->n_extra) { + r = finalize_file( + x->filename, + c->entry_token_dir_fd, + x->temp_fd, + x->temp_filename); + if (r < 0) + return r; + + x->temp_fd = safe_close(x->temp_fd); + x->temp_filename = mfree(x->temp_filename); + } + + /* Finally, after all our resources are in place, also materialize the menu entry files themselves */ + FOREACH_ARRAY(profile, c->profiles, c->n_profiles) { + r = finalize_file( + profile->entry_filename, + c->loader_entries_dir_fd, + profile->entry_temp_fd, + profile->entry_temp_filename); + if (r < 0) + return r; + + profile->entry_temp_fd = safe_close(profile->entry_temp_fd); + profile->entry_temp_filename = mfree(profile->entry_temp_filename); + + _cleanup_free_ char *stripped = NULL; + r = boot_filename_extract_tries( + profile->entry_filename, + &stripped, + /* ret_tries_left= */ NULL, + /* ret_tries_done= */ NULL); + if (r < 0) + return log_warning_errno(r, "Failed to extract tries counters from id '%s'", profile->entry_filename); + + if (strv_consume(&c->linked_ids, TAKE_PTR(stripped)) < 0) + return log_oom(); + } + + (void) syncfs(c->dollar_boot_fd); + return 0; +} + +static int run_link(LinkContext *c) { + int r; + + assert(c); + assert(c->dollar_boot_path); + assert(c->dollar_boot_fd >= 0); + + if (c->keep_free == UINT64_MAX) + c->keep_free = KEEP_FREE_BYTES_DEFAULT; + + r = link_context_pick_entry_token(c); + if (r < 0) + return r; + + unsigned n_removals = 0; + for (;;) { + r = run_link_now(c); + if (r < 0) { + if (!ERRNO_IS_NEG_DISK_SPACE(r)) + return r; + } else + break; + + log_notice("Attempt to link entry failed due to exhausted disk space, trying to remove oldest boot menu entry."); + + link_context_unlink_temporary(c); + link_context_clear_profiles(c); + + if (link_context_unlink_oldest(c) <= 0) { + log_warning("Attempted to make space on $BOOT, but this failed, attempt to link entry failed."); + return r; /* propagate original error */ + } + + /* Close entry token dir here, quite possible the unlinking above might have removed it too, in case it was empty */ + c->entry_token_dir_fd = safe_close(c->entry_token_dir_fd); + + log_info("Removing oldest boot menu entry succeeded, will retry to create boot loader menu entry."); + n_removals++; + } + + _cleanup_free_ char *j = strv_join(c->linked_ids, "', '"); + if (!j) + return log_oom(); + + if (n_removals > 0) + log_info("Successfully installed boot loader entries '%s', after removing %u old entries.", j, n_removals); + else + log_info("Successfully installed boot loader entries '%s'.", j); + + return 0; +} + +int verb_link(int argc, char *argv[], uintptr_t data, void *userdata) { + int r; + + assert(argc == 2); + + _cleanup_free_ char *x = NULL; + r = parse_path_argument(argv[1], /* suppress_root= */ false, &x); + if (r < 0) + return r; + + _cleanup_(link_context_done) LinkContext c = LINK_CONTEXT_NULL; + r = link_context_from_cmdline(&c, x); + if (r < 0) + return r; + + return run_link(&c); +} + +static JSON_DISPATCH_ENUM_DEFINE(json_dispatch_boot_entry_token_type, BootEntryTokenType, boot_entry_token_type_from_string); + +typedef struct LinkParameters { + LinkContext context; + unsigned root_fd_index; + unsigned kernel_fd_index; + sd_varlink *link; +} LinkParameters; + +static void link_parameters_done(LinkParameters *p) { + assert(p); + + link_context_done(&p->context); +} + +typedef struct ExtraParameters { + ExtraFile extra_file; + unsigned fd_index; +} ExtraParameters; + +static void extra_parameters_done(ExtraParameters *p) { + assert(p); + + extra_file_done(&p->extra_file); +} + +static int json_dispatch_loader_entry_resource_filename(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) { + char **n = ASSERT_PTR(userdata); + const char *filename; + int r; + + assert(variant); + + r = json_dispatch_const_filename(name, variant, flags, &filename); + if (r < 0) + return r; + + if (filename && !efi_loader_entry_resource_filename_valid(filename)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid boot entry resource filename.", strna(name)); + + if (free_and_strdup(n, filename) < 0) + return json_log_oom(variant, flags); + + return 0; +} + +static int dispatch_extras(const char *name, sd_json_variant *v, sd_json_dispatch_flags_t flags, void *userdata) { + LinkParameters *c = ASSERT_PTR(userdata); + int r; + + if (!sd_json_variant_is_array(v)) + return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name)); + + sd_json_variant *i; + JSON_VARIANT_ARRAY_FOREACH(i, v) { + _cleanup_(extra_parameters_done) ExtraParameters xp = { + .extra_file = EXTRA_FILE_NULL, + .fd_index = UINT_MAX, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "filename", SD_JSON_VARIANT_STRING, json_dispatch_loader_entry_resource_filename, offsetof(ExtraParameters, extra_file.filename), SD_JSON_MANDATORY }, + { "fileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, offsetof(ExtraParameters, fd_index), SD_JSON_MANDATORY }, + {}, + }; + + r = sd_json_dispatch(i, dispatch_table, /* flags= */ 0, &xp); + if (r < 0) + return r; + + xp.extra_file.source_fd = sd_varlink_peek_dup_fd(c->link, xp.fd_index); + if (xp.extra_file.source_fd < 0) + return log_debug_errno(xp.extra_file.source_fd, "Failed to acquire extra fd from Varlink: %m"); + + r = fd_verify_safe_flags(xp.extra_file.source_fd); + if (r < 0) + return sd_varlink_error_invalid_parameter_name(c->link, name); + + r = fd_verify_regular(xp.extra_file.source_fd); + if (r < 0) + return log_debug_errno(r, "Failed to validate that the extra file is a regular file descriptor: %m"); + + if (!GREEDY_REALLOC(c->context.extra, c->context.n_extra+1)) + return log_oom(); + + c->context.extra[c->context.n_extra++] = TAKE_GENERIC(xp.extra_file, ExtraFile, EXTRA_FILE_NULL); + } + + return 0; +} + +int vl_method_link( + sd_varlink *link, + sd_json_variant *parameters, + sd_varlink_method_flags_t flags, + void *userdata) { + + int r; + + assert(link); + + _cleanup_(link_parameters_done) LinkParameters p = { + .context = LINK_CONTEXT_NULL, + .root_fd_index = UINT_MAX, + .kernel_fd_index = UINT_MAX, + .link = link, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "rootFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, voffsetof(p, root_fd_index), 0 }, + { "rootDirectory", SD_JSON_VARIANT_STRING, json_dispatch_path, voffsetof(p, context.root), 0 }, + { "bootEntryTokenType", SD_JSON_VARIANT_STRING, json_dispatch_boot_entry_token_type, voffsetof(p, context.entry_token_type), 0 }, + { "entryTitle", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, voffsetof(p, context.entry_title), 0 }, + { "entryVersion", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, voffsetof(p, context.entry_version), 0 }, + { "entryCommit", SD_JSON_VARIANT_INTEGER, sd_json_dispatch_uint64, voffsetof(p, context.entry_commit), 0 }, + { "kernelFilename", SD_JSON_VARIANT_STRING, json_dispatch_loader_entry_resource_filename, voffsetof(p, context.kernel_filename), SD_JSON_MANDATORY }, + { "kernelFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, voffsetof(p, kernel_fd_index), SD_JSON_MANDATORY }, + { "extraFiles", SD_JSON_VARIANT_ARRAY, dispatch_extras, 0, 0 }, + { "triesLeft", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, voffsetof(p, context.tries_left), 0 }, + { "keepFree", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, context.keep_free), 0 }, + {}, + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + if (p.root_fd_index != UINT_MAX) { + p.context.root_fd = sd_varlink_peek_dup_fd(link, p.root_fd_index); + if (p.context.root_fd < 0) + return log_debug_errno(p.context.root_fd, "Failed to acquire root fd from Varlink: %m"); + + r = fd_verify_safe_flags_full(p.context.root_fd, O_DIRECTORY); + if (r < 0) + return sd_varlink_error_invalid_parameter_name(link, "rootFileDescriptor"); + + r = fd_verify_directory(p.context.root_fd); + if (r < 0) + return log_debug_errno(r, "Specified file descriptor does not refer to a directory: %m"); + + if (!p.context.root) { + r = fd_get_path(p.context.root_fd, &p.context.root); + if (r < 0) + return log_debug_errno(r, "Failed to get path of file descriptor: %m"); + + if (empty_or_root(p.context.root)) + p.context.root = mfree(p.context.root); + } + } else if (p.context.root) { + p.context.root_fd = open(p.context.root, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (p.context.root_fd < 0) + return log_debug_errno(errno, "Failed to open '%s': %m", p.context.root); + } else + p.context.root_fd = XAT_FDROOT; + + if (p.context.entry_token_type < 0) + p.context.entry_token_type = BOOT_ENTRY_TOKEN_AUTO; + + if (p.context.entry_title && !efi_loader_entry_title_valid(p.context.entry_title)) + return sd_varlink_error_invalid_parameter_name(link, "entryTitle"); + + if (p.context.entry_version && !version_is_valid_versionspec(p.context.entry_version)) + return sd_varlink_error_invalid_parameter_name(link, "entryVersion"); + + if (p.context.entry_commit != 0 && !entry_commit_valid(p.context.entry_commit)) + return sd_varlink_error_invalid_parameter_name(link, "entryCommit"); + + p.context.kernel_fd = sd_varlink_peek_dup_fd(link, p.kernel_fd_index); + if (p.context.kernel_fd < 0) + return log_debug_errno(p.context.kernel_fd, "Failed to acquire kernel fd from Varlink: %m"); + + r = fd_verify_safe_flags(p.context.kernel_fd); + if (r < 0) + return sd_varlink_error_invalid_parameter_name(link, "kernelFileDescriptor"); + r = fd_verify_regular(p.context.kernel_fd); + if (r < 0) + return log_debug_errno(r, "Failed to validate that kernel image file is a regular file descriptor: %m"); + + /* Refuse non-UKIs for now. */ + KernelImageType kit = _KERNEL_IMAGE_TYPE_INVALID; + r = inspect_kernel(p.context.kernel_fd, /* filename= */ NULL, &kit); + if (r == -EBADMSG) + return sd_varlink_error(link, "io.systemd.BootControl.InvalidKernelImage", NULL); + if (r < 0) + return r; + if (kit != KERNEL_IMAGE_TYPE_UKI) + return sd_varlink_error(link, "io.systemd.BootControl.InvalidKernelImage", NULL); + + r = find_xbootldr_and_warn_at( + p.context.root_fd, + /* path= */ NULL, + /* unprivileged_mode= */ false, + &p.context.dollar_boot_path, + &p.context.dollar_boot_fd); + if (r < 0) { + if (r != -ENOKEY) + return r; + + /* No XBOOTLDR found, let's look for ESP then. */ + + r = find_esp_and_warn_at( + p.context.root_fd, + /* path= */ NULL, + /* unprivileged_mode= */ false, + &p.context.dollar_boot_path, + &p.context.dollar_boot_fd); + if (r == -ENOKEY) + return sd_varlink_error(link, "io.systemd.BootControl.NoDollarBootFound", NULL); + if (r < 0) + return r; + + p.context.dollar_boot_source = BOOT_ENTRY_ESP; + } else + p.context.dollar_boot_source = BOOT_ENTRY_XBOOTLDR; + + r = run_link(&p.context); + if (r == -EUNATCH) /* no boot entry token is set */ + return sd_varlink_error(link, "io.systemd.BootControl.BootEntryTokenUnavailable", NULL); + if (r < 0) + return r; + + return sd_varlink_replybo(link, SD_JSON_BUILD_PAIR_STRV("ids", p.context.linked_ids)); +} diff --git a/src/bootctl/bootctl-link.h b/src/bootctl/bootctl-link.h new file mode 100644 index 0000000000000..de64563b87c7f --- /dev/null +++ b/src/bootctl/bootctl-link.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "shared-forward.h" + +int verb_link(int argc, char *argv[], uintptr_t data, void *userdata); + +int vl_method_link(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); diff --git a/src/bootctl/bootctl.c b/src/bootctl/bootctl.c index 67a20814daf9e..c627a7dd077d5 100644 --- a/src/bootctl/bootctl.c +++ b/src/bootctl/bootctl.c @@ -9,6 +9,7 @@ #include "bootctl.h" #include "bootctl-cleanup.h" #include "bootctl-install.h" +#include "bootctl-link.h" #include "bootctl-random-seed.h" #include "bootctl-reboot-to-firmware.h" #include "bootctl-set-efivar.h" @@ -16,6 +17,7 @@ #include "bootctl-uki.h" #include "bootctl-unlink.h" #include "bootctl-util.h" +#include "bootspec-util.h" #include "build.h" #include "crypto-util.h" #include "devnum-util.h" @@ -82,6 +84,12 @@ char *arg_private_key = NULL; KeySourceType arg_private_key_source_type = OPENSSL_KEY_SOURCE_FILE; char *arg_private_key_source = NULL; bool arg_oldest = false; +uint64_t arg_keep_free = KEEP_FREE_BYTES_DEFAULT; +char *arg_entry_title = NULL; +char *arg_entry_version = NULL; +uint64_t arg_entry_commit = 0; +char **arg_extras = NULL; +unsigned arg_tries_left = UINT_MAX; STATIC_DESTRUCTOR_REGISTER(arg_esp_path, freep); STATIC_DESTRUCTOR_REGISTER(arg_xbootldr_path, freep); @@ -95,6 +103,9 @@ STATIC_DESTRUCTOR_REGISTER(arg_certificate, freep); STATIC_DESTRUCTOR_REGISTER(arg_certificate_source, freep); STATIC_DESTRUCTOR_REGISTER(arg_private_key, freep); STATIC_DESTRUCTOR_REGISTER(arg_private_key_source, freep); +STATIC_DESTRUCTOR_REGISTER(arg_entry_title, freep); +STATIC_DESTRUCTOR_REGISTER(arg_entry_version, freep); +STATIC_DESTRUCTOR_REGISTER(arg_extras, strv_freep); static const char* const install_source_table[_INSTALL_SOURCE_MAX] = { [INSTALL_SOURCE_IMAGE] = "image", @@ -366,6 +377,9 @@ VERB_SCOPE_NOARG(, verb_list, "list", VERB_SCOPE(, verb_unlink, "unlink", "ID", VERB_ANY, 2, 0, "Remove boot loader entry"); +VERB_SCOPE(, verb_link, "link", "KERNEL", 2, 2, 0, + "Create boot loader entry for specified kernel"); + VERB_SCOPE_NOARG(, verb_cleanup, "cleanup", "Remove files in ESP not referenced in any boot entry"); @@ -641,8 +655,115 @@ static int parse_argv(int argc, char *argv[], char ***ret_args) { return r; break; + + OPTION_LONG("keep-free", "BYTES", + "How much space to keep free on ESP/XBOOTLDR"): + + if (isempty(opts.arg)) + arg_keep_free = KEEP_FREE_BYTES_DEFAULT; + else { + r = parse_size(opts.arg, 1024, &arg_keep_free); + if (r < 0) + return log_error_errno(r, "Failed to parse --keep-free=: %s", opts.arg); + } + + break; + + OPTION_LONG("entry-title", "TITLE", + "Selects the entry title for the new boot menu entry"): + + if (isempty(opts.arg)) { + arg_entry_title = mfree(arg_entry_title); + break; + } + + if (!efi_loader_entry_title_valid(opts.arg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Not a valid boot menu entry title: %s", opts.arg); + + r = free_and_strdup_warn(&arg_entry_title, opts.arg); + if (r < 0) + return r; + break; + + OPTION_LONG("entry-version", "VERSION", + "Selects the entry version for the new boot menu entry"): + if (isempty(opts.arg)) { + arg_entry_version = mfree(arg_entry_version); + break; + } + + if (!version_is_valid_versionspec(opts.arg)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Not a valid boot menu entry version: %s", opts.arg); + + r = free_and_strdup_warn(&arg_entry_version, opts.arg); + if (r < 0) + return r; + break; + + OPTION_LONG("entry-commit", "NR", + "Selects the entry commit version for the new boot menu entry"): { + if (isempty(opts.arg)) { + arg_entry_commit = 0; + break; + } + + uint64_t n; + r = safe_atou64(opts.arg, &n); + if (r < 0) + return log_error_errno(r, "Failed to parse --entry-commit= parameter: %s", opts.arg); + if (!entry_commit_valid(n)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Not a valid entry commit number."); + + arg_entry_commit = n; + break; + } + + OPTION('X', "extra", "PATH", + "Pass extra resource (confext, sysext, credential) to the invoked UKI of the boot menu entry"): { + + if (isempty(opts.arg)) { + arg_extras = strv_free(arg_extras); + break; + } + + _cleanup_free_ char *x = NULL; + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &x); + if (r < 0) + return r; + + _cleanup_free_ char *fn = NULL; + r = path_extract_filename(x, &fn); + if (r < 0) + return log_error_errno(r, "Failed to extract filename from '%s': %m", x); + if (!efi_loader_entry_resource_filename_valid(fn)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Extra filename '%s' is not suitable for reference in a boot menu entry.", fn); + + r = strv_consume(&arg_extras, TAKE_PTR(x)); + if (r < 0) + return log_oom(); + + strv_uniq(arg_extras); + break; } + OPTION_LONG("tries-left", "NR", + "Set boot menu entries tries-left counter to the specified value"): { + if (isempty(opts.arg)) { + arg_tries_left = UINT_MAX; + break; + } + + unsigned u; + r = safe_atou(opts.arg, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse tries left counter: %s", opts.arg); + if (u >= UINT_MAX) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Tries left counter too large, refusing: %u", u); + + arg_tries_left = u; + break; + }} + char **args = option_parser_get_args(&opts); if (!!arg_print_esp_path + !!arg_print_dollar_boot_path + (arg_print_root_device > 0) + arg_print_loader_path + arg_print_stub_path + arg_print_efi_architecture > 1) @@ -711,6 +832,7 @@ static int vl_server(void) { "io.systemd.BootControl.SetRebootToFirmware", vl_method_set_reboot_to_firmware, "io.systemd.BootControl.GetRebootToFirmware", vl_method_get_reboot_to_firmware, "io.systemd.BootControl.Install", vl_method_install, + "io.systemd.BootControl.Link", vl_method_link, "io.systemd.BootControl.Unlink", vl_method_unlink); if (r < 0) return log_error_errno(r, "Failed to bind Varlink methods: %m"); diff --git a/src/bootctl/bootctl.h b/src/bootctl/bootctl.h index ea097ba329753..b478ae0ec1d1a 100644 --- a/src/bootctl/bootctl.h +++ b/src/bootctl/bootctl.h @@ -52,6 +52,12 @@ extern char *arg_private_key; extern KeySourceType arg_private_key_source_type; extern char *arg_private_key_source; extern bool arg_oldest; +extern uint64_t arg_keep_free; +extern char *arg_entry_title; +extern char *arg_entry_version; +extern uint64_t arg_entry_commit; +extern char **arg_extras; +extern unsigned arg_tries_left; static inline const char* arg_dollar_boot_path(void) { /* $BOOT shall be the XBOOTLDR partition if it exists, and otherwise the ESP */ @@ -68,3 +74,7 @@ int acquire_xbootldr(int unprivileged_mode, int *ret_fd, sd_id128_t *ret_uuid, d * string, but we limit the length to something reasonable to prevent from the firmware * having to deal with a potentially too long string. */ #define EFI_BOOT_OPTION_DESCRIPTION_MAX ((size_t) 255) + +/* Before we "materialize" a new entry, let's ensure we have this much space free still on the partition, by + * default */ +#define KEEP_FREE_BYTES_DEFAULT (5U * U64_MB) diff --git a/src/bootctl/meson.build b/src/bootctl/meson.build index ff33cde3f615b..06137bdae00ce 100644 --- a/src/bootctl/meson.build +++ b/src/bootctl/meson.build @@ -3,6 +3,7 @@ bootctl_sources = files( 'bootctl.c', 'bootctl-install.c', + 'bootctl-link.c', 'bootctl-random-seed.c', 'bootctl-reboot-to-firmware.c', 'bootctl-set-efivar.c', diff --git a/src/shared/efi-loader.c b/src/shared/efi-loader.c index ce10a44d34ccc..20e4719bb067e 100644 --- a/src/shared/efi-loader.c +++ b/src/shared/efi-loader.c @@ -433,3 +433,14 @@ bool efi_loader_entry_name_valid(const char *s) { return in_charset(s, ALPHANUMERICAL "+-_.@"); } + +bool efi_loader_entry_title_valid(const char *s) { + return string_is_safe(s, /* flags= */ 0); +} + +bool efi_loader_entry_resource_filename_valid(const char *s) { + /* Validates file names so that they are safe for their inclusion in boot loader type #1 + * entries. i.e. may not contain CCs, and should be ASCII */ + + return string_is_safe(s, STRING_ASCII|STRING_FILENAME); +} diff --git a/src/shared/efi-loader.h b/src/shared/efi-loader.h index abf8bdc49ef04..c51c2dbb2f918 100644 --- a/src/shared/efi-loader.h +++ b/src/shared/efi-loader.h @@ -23,3 +23,5 @@ int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat int efi_get_variable_id128(const char *variable, sd_id128_t *ret); bool efi_loader_entry_name_valid(const char *s); +bool efi_loader_entry_title_valid(const char *s); +bool efi_loader_entry_resource_filename_valid(const char *s); diff --git a/src/shared/varlink-io.systemd.BootControl.c b/src/shared/varlink-io.systemd.BootControl.c index 42f7b53492e45..c16a295d8979f 100644 --- a/src/shared/varlink-io.systemd.BootControl.c +++ b/src/shared/varlink-io.systemd.BootControl.c @@ -27,7 +27,7 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( SD_VARLINK_FIELD_COMMENT("The location of the local addon."), SD_VARLINK_DEFINE_FIELD(localAddon, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("The command line options by the addon."), - SD_VARLINK_DEFINE_FIELD(options, SD_VARLINK_STRING, 0)); + SD_VARLINK_DEFINE_FIELD(options, SD_VARLINK_STRING, SD_VARLINK_NULLABLE)); static SD_VARLINK_DEFINE_STRUCT_TYPE( BootEntry, @@ -147,6 +147,40 @@ static SD_VARLINK_DEFINE_METHOD( SD_VARLINK_FIELD_COMMENT("If true, remove the oldest entry."), SD_VARLINK_DEFINE_INPUT(oldest, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE)); +static SD_VARLINK_DEFINE_STRUCT_TYPE( + BootEntryExtraFile, + SD_VARLINK_FIELD_COMMENT("The name of the extra file"), + SD_VARLINK_DEFINE_FIELD(filename, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("Index into array of file descriptors, pointing to a file descriptor referencing the extra file."), + SD_VARLINK_DEFINE_FIELD(fileDescriptor, SD_VARLINK_INT, 0)); + +static SD_VARLINK_DEFINE_METHOD( + Link, + SD_VARLINK_FIELD_COMMENT("Index into array of file descriptors passed along with this message, pointing to file descriptor to root file system to operate on"), + SD_VARLINK_DEFINE_INPUT(rootFileDescriptor, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Root directory to operate relative to. If both this and rootFileDescriptor is specified, this is purely informational. If only this is specified, it is what will be used."), + SD_VARLINK_DEFINE_INPUT(rootDirectory, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Selects how to identify boot entries"), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(bootEntryTokenType, BootEntryTokenType, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The entry title for the newly created boot menu entry"), + SD_VARLINK_DEFINE_INPUT(entryTitle, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The entry version for the newly created boot menu entry"), + SD_VARLINK_DEFINE_INPUT(entryVersion, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The commit number for the newly created boot menu entry"), + SD_VARLINK_DEFINE_INPUT(entryCommit, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Target filename for the kernel image (UKI) in the $BOOT partition"), + SD_VARLINK_DEFINE_INPUT(kernelFilename, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("Index into array of file descriptors passed along with this message, pointing to file descriptor to the kernel image to copy"), + SD_VARLINK_DEFINE_INPUT(kernelFileDescriptor, SD_VARLINK_INT, 0), + SD_VARLINK_FIELD_COMMENT("An array of 'extra' files for this entry, i.e. credentials, confexts, sysexts, addons."), + SD_VARLINK_DEFINE_INPUT_BY_TYPE(extraFiles, BootEntryExtraFile, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("What to set the triesLeft counter of the boot menu entry to initially."), + SD_VARLINK_DEFINE_INPUT(triesLeft, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("How much space to always keep free on ESP/XBOOTLDR. Defaults to 1 MiB"), + SD_VARLINK_DEFINE_INPUT(keepFree, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("The IDs of the created boot loader entries."), + SD_VARLINK_DEFINE_OUTPUT(ids, SD_VARLINK_STRING, SD_VARLINK_ARRAY)); + static SD_VARLINK_DEFINE_ERROR( RebootToFirmwareNotSupported); @@ -162,6 +196,9 @@ static SD_VARLINK_DEFINE_ERROR( static SD_VARLINK_DEFINE_ERROR( BootEntryTokenUnavailable); +static SD_VARLINK_DEFINE_ERROR( + InvalidKernelImage); + SD_VARLINK_DEFINE_INTERFACE( io_systemd_BootControl, "io.systemd.BootControl", @@ -172,6 +209,8 @@ SD_VARLINK_DEFINE_INTERFACE( &vl_type_BootEntrySource, SD_VARLINK_SYMBOL_COMMENT("A structure encapsulating an addon of a boot entry"), &vl_type_BootEntryAddon, + SD_VARLINK_SYMBOL_COMMENT("An additional file to install"), + &vl_type_BootEntryExtraFile, SD_VARLINK_SYMBOL_COMMENT("A structure encapsulating a boot entry"), &vl_type_BootEntry, SD_VARLINK_SYMBOL_COMMENT("The operation to execute"), @@ -188,6 +227,8 @@ SD_VARLINK_DEFINE_INTERFACE( &vl_method_Install, SD_VARLINK_SYMBOL_COMMENT("Unlink a boot menu item"), &vl_method_Unlink, + SD_VARLINK_SYMBOL_COMMENT("Install a kernel as boot menu item"), + &vl_method_Link, SD_VARLINK_SYMBOL_COMMENT("SetRebootToFirmware() and GetRebootToFirmware() return this if the firmware does not actually support the reboot-to-firmware-UI concept."), &vl_error_RebootToFirmwareNotSupported, SD_VARLINK_SYMBOL_COMMENT("No boot entry defined."), @@ -197,4 +238,6 @@ SD_VARLINK_DEFINE_INTERFACE( SD_VARLINK_SYMBOL_COMMENT("Neither ESP nor XBOOTLDR found, hence no $BOOT location identified."), &vl_error_NoDollarBootFound, SD_VARLINK_SYMBOL_COMMENT("The selected boot entry token could not be determined."), - &vl_error_BootEntryTokenUnavailable); + &vl_error_BootEntryTokenUnavailable, + SD_VARLINK_SYMBOL_COMMENT("The specified kernel image is not valid."), + &vl_error_InvalidKernelImage); From e7128bb795949da6b785f2c78f75c8442ffb0b77 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Feb 2026 15:13:02 +0100 Subject: [PATCH 070/242] bootspec: generalize "addon" logic for all kinds of extra sidecars Let's pick up all kinds of sidecars and show them, not just addons This also fixes some issues regarding "root" directory handling. In one context we'd resolve a directory claiming it was a "root", but it wasn't. Implements: https://github.com/uapi-group/specifications/pull/212 --- src/shared/bootspec.c | 471 ++++++++++++++------ src/shared/bootspec.h | 28 +- src/shared/varlink-io.systemd.BootControl.c | 2 + test/units/TEST-87-AUX-UTILS-VM.bootctl.sh | 2 +- 4 files changed, 346 insertions(+), 157 deletions(-) diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c index c0111d1f9c433..a167e19a90b58 100644 --- a/src/shared/bootspec.c +++ b/src/shared/bootspec.c @@ -14,10 +14,12 @@ #include "efi-loader.h" #include "efivars.h" #include "env-file.h" +#include "errno-util.h" #include "extract-word.h" #include "fd-util.h" #include "fileio.h" #include "find-esp.h" +#include "json-util.h" #include "log.h" #include "parse-util.h" #include "path-util.h" @@ -65,15 +67,65 @@ static const char* const boot_entry_source_table[_BOOT_ENTRY_SOURCE_MAX] = { DEFINE_STRING_TABLE_LOOKUP_TO_STRING(boot_entry_source, BootEntrySource); -static void boot_entry_addons_done(BootEntryAddons *addons) { - assert(addons); +static BootEntryExtraType boot_entry_extra_type_from_filename(const char *path) { + if (!path) + return _BOOT_ENTRY_EXTRA_TYPE_INVALID; - FOREACH_ARRAY(addon, addons->items, addons->n_items) { - free(addon->cmdline); - free(addon->location); + if (endswith_no_case(path, ".addon.efi")) + return BOOT_ENTRY_ADDON; + if (endswith_no_case(path, ".confext.raw")) + return BOOT_ENTRY_CONFEXT; + if (endswith_no_case(path, ".sysext.raw")) + return BOOT_ENTRY_SYSEXT; + if (endswith_no_case(path, ".cred")) + return BOOT_ENTRY_CREDENTIAL; + + return _BOOT_ENTRY_EXTRA_TYPE_INVALID; +} + +static void boot_entry_extras_done(BootEntryExtras *extras) { + assert(extras); + + FOREACH_ARRAY(extra, extras->items, extras->n_items) { + free(extra->location); + free(extra->cmdline); + } + extras->items = mfree(extras->items); + extras->n_items = 0; +} + +static int boot_entry_extras_add( + BootEntryExtras *extras, + BootEntryExtraType type, + const char *path, + const char *cmdline) { + + assert(extras); + assert(type >= 0); + assert(type < _BOOT_ENTRY_EXTRA_TYPE_MAX); + assert(path); + + _cleanup_free_ char *p = strdup(path); + if (!p) + return -ENOMEM; + + _cleanup_free_ char *c = NULL; + if (cmdline) { + c = strdup(cmdline); + if (!c) + return -ENOMEM; } - addons->items = mfree(addons->items); - addons->n_items = 0; + + if (!GREEDY_REALLOC(extras->items, extras->n_items + 1)) + return -ENOMEM; + + extras->items[extras->n_items++] = (BootEntryExtra) { + .type = type, + .location = TAKE_PTR(p), + .cmdline = TAKE_PTR(c), + }; + + return 0; } static void boot_entry_free(BootEntry *entry) { @@ -91,7 +143,7 @@ static void boot_entry_free(BootEntry *entry) { free(entry->machine_id); free(entry->architecture); strv_free(entry->options); - boot_entry_addons_done(&entry->local_addons); + boot_entry_extras_done(&entry->local_extras); free(entry->kernel); free(entry->efi); free(entry->uki); @@ -213,6 +265,50 @@ static int parse_path_many( return strv_extend_strv_consume(s, TAKE_PTR(f), /* filter_duplicates= */ false); } +static int parse_extra( + const char *fname, + unsigned line, + const char *field, + BootEntryExtras *extras, + const char *p) { + + int r; + + assert(extras); + + _cleanup_strv_free_ char **l = strv_split(p, NULL); + if (!l) + return -ENOMEM; + + STRV_FOREACH(i, l) { + _cleanup_free_ char *c = NULL; + r = mangle_path(fname, line, field, *i, &c); + if (r < 0) + return r; + if (r == 0) + continue; + + BootEntryExtraType type = boot_entry_extra_type_from_filename(c); + if (type < 0) { + log_debug_errno(type, "Failed to determine boot entry extra type of '%s', skipping: %m", c); + continue; + } + + /* Let's filter out EFI addons for now. We have no protocol for passing them from sd-boot to + * sd-stub, hence supporting them would require major plumbing first. */ + if (type == BOOT_ENTRY_ADDON) { + log_debug("EFI addons are currently not supported for Type #1 entries, skipping '%s'.", c); + continue; + } + + r = boot_entry_extras_add(extras, type, c, /* cmdline= */ NULL); + if (r < 0) + return r; + } + + return 0; +} + static int parse_tries(const char *fname, const char **p, unsigned *ret) { _cleanup_free_ char *d = NULL; unsigned tries; @@ -421,6 +517,8 @@ static int boot_entry_load_type1( r = parse_path_one(tmp.path, line, field, &tmp.device_tree, p); else if (streq(field, "devicetree-overlay")) r = parse_path_many(tmp.path, line, field, &tmp.device_tree_overlay, p); + else if (streq(field, "extra")) + r = parse_extra(tmp.path, line, field, &tmp.local_extras, p); else { log_syntax(NULL, LOG_WARNING, tmp.path, line, 0, "Unknown line '%s', ignoring.", field); continue; @@ -458,7 +556,7 @@ int boot_config_load_type1( return r; config->n_entries++; - entry->global_addons = &config->global_addons[source]; + entry->global_extras = &config->global_extras[source]; return 0; } @@ -479,8 +577,8 @@ void boot_config_free(BootConfig *config) { boot_entry_free(i); free(config->entries); - FOREACH_ELEMENT(i, config->global_addons) - boot_entry_addons_done(i); + FOREACH_ELEMENT(i, config->global_extras) + boot_entry_extras_done(i); set_free(config->inodes_seen); } @@ -1188,126 +1286,140 @@ static int pe_find_addon_sections( return 0; } -static int insert_boot_entry_addon( - BootEntryAddons *addons, - char *location, - char *cmdline) { - - assert(addons); - - if (!GREEDY_REALLOC(addons->items, addons->n_items + 1)) - return log_oom(); - - addons->items[addons->n_items++] = (BootEntryAddon) { - .location = location, - .cmdline = cmdline, - }; - - return 0; -} - -static int boot_entries_find_unified_addons( +static int boot_entries_find_unified_extras( BootConfig *config, int d_fd, - const char *addon_dir, - const char *root, - BootEntryAddons *ret_addons) { + const char *extra_dir, + BootEntryExtraType only_type, + const char *where, + bool suppress_seen, + BootEntryExtras *extras) { - _cleanup_closedir_ DIR *d = NULL; - _cleanup_free_ char *full = NULL; - _cleanup_(boot_entry_addons_done) BootEntryAddons addons = {}; int r; - assert(ret_addons); assert(config); + assert(extras); - r = chase_and_opendirat(d_fd, d_fd, addon_dir, /* chase_flags= */ 0, &full, &d); + _cleanup_closedir_ DIR *d = NULL; + r = chase_and_opendirat( + /* root_fd= */ d_fd, + /* dir_fd= */ d_fd, + extra_dir, + /* chase_flags= */ 0, + /* ret_path= */ NULL, + &d); if (r == -ENOENT) return 0; if (r < 0) - return log_error_errno(r, "Failed to open '%s/%s': %m", root, skip_leading_slash(addon_dir)); - - FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s: %m", full)) { - _cleanup_free_ char *j = NULL, *cmdline = NULL, *location = NULL; - _cleanup_close_ int fd = -EBADF; + return log_error_errno(r, "Failed to open '%s/%s': %m", where, skip_leading_slash(extra_dir)); + FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read '%s': %m", extra_dir)) { if (!dirent_is_file(de)) continue; - if (!endswith_no_case(de->d_name, ".addon.efi")) + BootEntryExtraType type = boot_entry_extra_type_from_filename(de->d_name); + if (type < 0) { + log_debug_errno(type, "Unrecognized extra file '%s', skipping.", de->d_name); continue; + } + if (only_type >= 0 && type != only_type) { + log_debug("Extra file '%s' type not permitted in '%s', skipping.", de->d_name, extra_dir); + continue; + } - fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOFOLLOW|O_NOCTTY); - if (fd < 0) { - log_warning_errno(errno, "Failed to open %s/%s, ignoring: %m", full, de->d_name); + _cleanup_free_ char *location = path_join(extra_dir, de->d_name); + if (!location) + return log_oom(); + + _cleanup_close_ int pin_fd = openat(dirfd(d), de->d_name, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (pin_fd < 0) { + log_debug_errno(errno, "Failed to pin '%s', ignoring: %m", location); continue; } - r = config_check_inode_relevant_and_unseen(config, fd, de->d_name); - if (r < 0) - return r; - if (r == 0) /* inode already seen or otherwise not relevant */ + r = fd_verify_regular(pin_fd); + if (r < 0) { + log_debug_errno(r, "Unrecognized inode type of '%s', skipping.", location); continue; + } - j = path_join(full, de->d_name); - if (!j) - return log_oom(); + if (suppress_seen) { + r = config_check_inode_relevant_and_unseen(config, pin_fd, location); + if (r < 0) + return r; + if (r == 0) /* inode already seen or otherwise not relevant */ + continue; + } - if (pe_find_addon_sections(fd, j, &cmdline) <= 0) - continue; + _cleanup_free_ char *cmdline = NULL; + if (type == BOOT_ENTRY_ADDON) { + _cleanup_close_ int fd = fd_reopen(pin_fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); + if (fd < 0) { + log_debug_errno(fd, "Failed to open '%s', ignoring: %m", location); + continue; + } - location = strdup(j); - if (!location) - return log_oom(); + /* Try to extract the command line, but let's handle any failures gracefully, but + * still mention the extra file exists. */ + (void) pe_find_addon_sections(fd, location, &cmdline); + } - r = insert_boot_entry_addon(&addons, location, cmdline); + r = boot_entry_extras_add(extras, type, location, cmdline); if (r < 0) return r; - - TAKE_PTR(location); - TAKE_PTR(cmdline); } - *ret_addons = TAKE_STRUCT(addons); return 0; } -static int boot_entries_find_unified_global_addons( +static int boot_entries_find_unified_global_extras( BootConfig *config, - const char *root, - const char *d_name, - BootEntryAddons *ret_addons) { - - int r; - _cleanup_closedir_ DIR *d = NULL; + const char *where, + const char *extra_dir, + BootEntryExtraType only_type, + BootEntryExtras *extras) { - assert(ret_addons); + assert(extras); - r = chase_and_opendir(root, NULL, CHASE_PROHIBIT_SYMLINKS, NULL, &d); - if (r == -ENOENT) + _cleanup_close_ int where_fd = RET_NERRNO(open(where, O_DIRECTORY|O_CLOEXEC)); + if (where_fd == -ENOENT) return 0; - if (r < 0) - return log_error_errno(r, "Failed to open '%s/%s': %m", root, skip_leading_slash(d_name)); - - return boot_entries_find_unified_addons(config, dirfd(d), d_name, root, ret_addons); + if (where_fd < 0) + return log_error_errno(where_fd, "Failed to open '%s': %m", where); + + return boot_entries_find_unified_extras( + config, + where_fd, + extra_dir, + only_type, + where, + /* suppress_seen= */ true, + extras); } -static int boot_entries_find_unified_local_addons( +static int boot_entries_find_unified_local_extras( BootConfig *config, int d_fd, - const char *d_name, - const char *root, + const char *uki, + const char *where, BootEntry *ret) { - _cleanup_free_ char *addon_dir = NULL; + _cleanup_free_ char *extra_dir = NULL; assert(ret); - addon_dir = strjoin(d_name, ".extra.d"); - if (!addon_dir) + extra_dir = strjoin(uki, ".extra.d"); + if (!extra_dir) return log_oom(); - return boot_entries_find_unified_addons(config, d_fd, addon_dir, root, &ret->local_addons); + return boot_entries_find_unified_extras( + config, + d_fd, + extra_dir, + /* only_type= */ _BOOT_ENTRY_EXTRA_TYPE_INVALID, + where, + /* suppress_seen= */ false, + &ret->local_extras); } static int boot_entries_find_unified( @@ -1369,11 +1481,11 @@ static int boot_entries_find_unified( if (boot_entry_load_unified(root, source, j, p, osrelease, profile, cmdline, entry) < 0) continue; - /* look for .efi.extra.d */ - (void) boot_entries_find_unified_local_addons(config, dirfd(d), de->d_name, full, entry); + /* Look for .efi.extra.d/ */ + (void) boot_entries_find_unified_local_extras(config, dirfd(d), de->d_name, full, entry); - /* Set up the backpointer, so that we can find the global addons */ - entry->global_addons = &config->global_addons[source]; + /* Set up the backpointer, so that we can find the global extras */ + entry->global_extras = &config->global_extras[source]; config->n_entries++; } @@ -1614,45 +1726,73 @@ int boot_config_finalize(BootConfig *config) { return 0; } -int boot_config_load( +static int boot_entries_load( BootConfig *config, - const char *esp_path, - const char *xbootldr_path) { + BootEntrySource source, + const char *where) { /* Mount point of ESP/XBOOTLDR */ int r; assert(config); + assert(source >= 0); + assert(source < _BOOT_ENTRY_SOURCE_MAX); - if (esp_path) { - r = boot_loader_read_conf_path(config, esp_path, "/loader/loader.conf"); - if (r < 0) - return r; + if (!where) + return 0; - r = boot_entries_find_type1(config, esp_path, BOOT_ENTRY_ESP, "/loader/entries"); - if (r < 0) - return r; + r = boot_entries_find_type1(config, where, source, "/loader/entries"); + if (r < 0) + return r; - r = boot_entries_find_unified(config, esp_path, BOOT_ENTRY_ESP, "/EFI/Linux/"); - if (r < 0) - return r; + r = boot_entries_find_unified(config, where, source, "/EFI/Linux/"); + if (r < 0) + return r; - r = boot_entries_find_unified_global_addons(config, esp_path, "/loader/addons/", - &config->global_addons[BOOT_ENTRY_ESP]); + static const struct { + BootEntryExtraType extra_type; + const char *directory; + } table[] = { + { BOOT_ENTRY_ADDON, "/loader/addons/" }, + { BOOT_ENTRY_CONFEXT, "/loader/extensions/" }, + { BOOT_ENTRY_SYSEXT, "/loader/extensions/" }, + { BOOT_ENTRY_CREDENTIAL, "/loader/credentials/" }, + }; + + FOREACH_ELEMENT(i, table) { + r = boot_entries_find_unified_global_extras( + config, + where, + i->directory, + i->extra_type, + &config->global_extras[source]); if (r < 0) return r; } - if (xbootldr_path) { - r = boot_entries_find_type1(config, xbootldr_path, BOOT_ENTRY_XBOOTLDR, "/loader/entries"); + return 0; +} + +int boot_config_load( + BootConfig *config, + const char *esp_path, + const char *xbootldr_path) { + + int r; + + assert(config); + + if (esp_path) { + r = boot_loader_read_conf_path(config, esp_path, "/loader/loader.conf"); if (r < 0) return r; - r = boot_entries_find_unified(config, xbootldr_path, BOOT_ENTRY_XBOOTLDR, "/EFI/Linux/"); + r = boot_entries_load(config, BOOT_ENTRY_ESP, esp_path); if (r < 0) return r; + } - r = boot_entries_find_unified_global_addons(config, xbootldr_path, "/loader/addons/", - &config->global_addons[BOOT_ENTRY_XBOOTLDR]); + if (xbootldr_path) { + r = boot_entries_load(config, BOOT_ENTRY_XBOOTLDR, xbootldr_path); if (r < 0) return r; } @@ -1724,7 +1864,7 @@ int boot_config_augment_from_loader( char **found_by_loader, bool auto_only) { - static const BootEntryAddons no_addons = (BootEntryAddons) {}; + static const BootEntryExtras no_extras = (BootEntryExtras) {}; static const char *const title_table[] = { /* Pretty names for a few well-known automatically discovered entries. */ "auto-osx", "macOS", @@ -1783,7 +1923,7 @@ int boot_config_augment_from_loader( .tries_left = UINT_MAX, .tries_done = UINT_MAX, .profile = UINT_MAX, - .global_addons = &no_addons, + .global_extras = &no_extras, }; } @@ -1806,10 +1946,10 @@ static void boot_entry_file_list( const char *field, const char *root, const char *p, - int *ret_status) { + int *pstatus) { assert(p); - assert(ret_status); + assert(pstatus); int status = chase_and_access(p, root, CHASE_PREFIX_ROOT|CHASE_PROHIBIT_SYMLINKS, F_OK, NULL); @@ -1824,16 +1964,23 @@ static void boot_entry_file_list( } else printf("%s\n", p); - if (*ret_status == 0 && status < 0) - *ret_status = status; + if (*pstatus == 0 && status < 0) + *pstatus = status; } -static void print_addon( - BootEntryAddon *addon, - const char *addon_str) { +static void print_extra( + const BootEntry *e, + const BootEntryExtra *extra, + const char *field, + int *status) { + + assert(e); + assert(extra); + + boot_entry_file_list(field, e->root, extra->location, status); - printf(" %s: %s\n", addon_str, addon->location); - printf(" options: %s%s\n", glyph(GLYPH_TREE_RIGHT), addon->cmdline); + if (extra->cmdline) + printf(" options: %s%s\n", glyph(GLYPH_TREE_RIGHT), extra->cmdline); } static int indent_embedded_newlines(char *cmdline, char **ret_cmdline) { @@ -1851,12 +1998,10 @@ static int indent_embedded_newlines(char *cmdline, char **ret_cmdline) { return -ENOMEM; *ret_cmdline = TAKE_PTR(t); - return 0; } -static int print_cmdline(const BootEntry *e) { - +static int print_cmdline(const BootEntry *e, int *status) { _cleanup_free_ char *options = NULL, *combined_cmdline = NULL, *t2 = NULL; assert(e); @@ -1877,17 +2022,20 @@ static int print_cmdline(const BootEntry *e) { return log_oom(); } - FOREACH_ARRAY(addon, e->global_addons->items, e->global_addons->n_items) { - print_addon(addon, "global-addon"); - if (!strextend(&t2, " ", addon->cmdline)) - return log_oom(); + FOREACH_ARRAY(extra, e->global_extras->items, e->global_extras->n_items) { + print_extra(e, extra, "extra", status); + + if (extra->cmdline) + if (!strextend(&t2, " ", extra->cmdline)) + return log_oom(); } - FOREACH_ARRAY(addon, e->local_addons.items, e->local_addons.n_items) { - /* Add space at the beginning of addon_str to align it correctly */ - print_addon(addon, " local-addon"); - if (!strextend(&t2, " ", addon->cmdline)) - return log_oom(); + FOREACH_ARRAY(extra, e->local_extras.items, e->local_extras.n_items) { + print_extra(e, extra, "extra", status); + + if (extra->cmdline) + if (!strextend(&t2, " ", extra->cmdline)) + return log_oom(); } /* Don't print the combined cmdline if it's same as options. */ @@ -1904,19 +2052,19 @@ static int print_cmdline(const BootEntry *e) { } static int json_addon( - BootEntryAddon *addon, - const char *addon_str, + const BootEntryExtra *extra, + const char *extra_str, sd_json_variant **array) { int r; - assert(addon); - assert(addon_str); + assert(extra); + assert(extra_str); r = sd_json_variant_append_arraybo( array, - SD_JSON_BUILD_PAIR_STRING(addon_str, addon->location), - SD_JSON_BUILD_PAIR_STRING("options", addon->cmdline)); + SD_JSON_BUILD_PAIR_STRING(extra_str, extra->location), + JSON_BUILD_PAIR_STRING_NON_EMPTY("options", extra->cmdline)); if (r < 0) return log_oom(); @@ -1940,20 +2088,31 @@ static int json_cmdline( return log_oom(); } - FOREACH_ARRAY(addon, e->global_addons->items, e->global_addons->n_items) { - r = json_addon(addon, "globalAddon", &addons_array); + /* NB: these JSON fields are kinda obsolete, we want the more generic 'extra' ones to be used. */ + FOREACH_ARRAY(extra, e->global_extras->items, e->global_extras->n_items) { + if (extra->type != BOOT_ENTRY_ADDON) + continue; + + r = json_addon(extra, "globalAddon", &addons_array); if (r < 0) return r; - if (!strextend(&combined_cmdline, " ", addon->cmdline)) - return log_oom(); + + if (extra->cmdline) + if (!strextend(&combined_cmdline, " ", extra->cmdline)) + return log_oom(); } - FOREACH_ARRAY(addon, e->local_addons.items, e->local_addons.n_items) { - r = json_addon(addon, "localAddon", &addons_array); + FOREACH_ARRAY(extra, e->local_extras.items, e->local_extras.n_items) { + if (extra->type != BOOT_ENTRY_ADDON) + continue; + + r = json_addon(extra, "localAddon", &addons_array); if (r < 0) return r; - if (!strextend(&combined_cmdline, " ", addon->cmdline)) - return log_oom(); + + if (extra->cmdline) + if (!strextend(&combined_cmdline, " ", extra->cmdline)) + return log_oom(); } r = sd_json_variant_merge_objectbo( @@ -2064,7 +2223,7 @@ int show_boot_entry( *s, &status); - r = print_cmdline(e); + r = print_cmdline(e, &status); if (r < 0) return r; @@ -2144,6 +2303,24 @@ int boot_entry_to_json(const BootConfig *c, size_t i, sd_json_variant **ret) { if (r < 0) return log_oom(); + _cleanup_(sd_json_variant_unrefp) sd_json_variant *jextras = NULL; + FOREACH_ARRAY(extra, e->global_extras->items, e->global_extras->n_items) { + r = sd_json_variant_append_arrayb(&jextras, SD_JSON_BUILD_STRING(extra->location)); + if (r < 0) + return log_oom(); + } + FOREACH_ARRAY(extra, e->local_extras.items, e->local_extras.n_items) { + r = sd_json_variant_append_arrayb(&jextras, SD_JSON_BUILD_STRING(extra->location)); + if (r < 0) + return log_oom(); + } + + r = sd_json_variant_merge_objectbo( + &v, + SD_JSON_BUILD_PAIR_CONDITION(!!jextras, "extras", SD_JSON_BUILD_VARIANT(jextras))); + if (r < 0) + return log_oom(); + *ret = TAKE_PTR(v); return 1; } diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h index d5f6930be99d1..677667383c9de 100644 --- a/src/shared/bootspec.h +++ b/src/shared/bootspec.h @@ -20,15 +20,25 @@ typedef enum BootEntrySource { _BOOT_ENTRY_SOURCE_INVALID = -EINVAL, } BootEntrySource; -typedef struct BootEntryAddon { +typedef enum BootEntryExtraType { + BOOT_ENTRY_ADDON, + BOOT_ENTRY_CONFEXT, + BOOT_ENTRY_SYSEXT, + BOOT_ENTRY_CREDENTIAL, + _BOOT_ENTRY_EXTRA_TYPE_MAX, + _BOOT_ENTRY_EXTRA_TYPE_INVALID = -EINVAL, +} BootEntryExtraType; + +typedef struct BootEntryExtra { + BootEntryExtraType type; char *location; - char *cmdline; -} BootEntryAddon; + char *cmdline; /* only for BOOT_ENTRY_ADDON */ +} BootEntryExtra; -typedef struct BootEntryAddons { - BootEntryAddon *items; +typedef struct BootEntryExtras { + BootEntryExtra *items; size_t n_items; -} BootEntryAddons; +} BootEntryExtras; typedef struct BootEntry { BootEntryType type; @@ -46,8 +56,8 @@ typedef struct BootEntry { char *machine_id; char *architecture; char **options; - BootEntryAddons local_addons; - const BootEntryAddons *global_addons; /* Backpointer into the BootConfig; we don't own this here */ + BootEntryExtras local_extras; + const BootEntryExtras *global_extras; /* Backpointer into the BootConfig; we don't own this here */ char *kernel; /* linux is #defined to 1, yikes! */ char *efi; char *uki; @@ -84,7 +94,7 @@ typedef struct BootConfig { BootEntry *entries; size_t n_entries; - BootEntryAddons global_addons[_BOOT_ENTRY_SOURCE_MAX]; + BootEntryExtras global_extras[_BOOT_ENTRY_SOURCE_MAX]; ssize_t default_entry; ssize_t selected_entry; diff --git a/src/shared/varlink-io.systemd.BootControl.c b/src/shared/varlink-io.systemd.BootControl.c index c16a295d8979f..920b9479db0a4 100644 --- a/src/shared/varlink-io.systemd.BootControl.c +++ b/src/shared/varlink-io.systemd.BootControl.c @@ -80,6 +80,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( SD_VARLINK_DEFINE_FIELD(isSelected, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("Addon images of the entry."), SD_VARLINK_DEFINE_FIELD_BY_TYPE(addons, BootEntryAddon, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), + SD_VARLINK_FIELD_COMMENT("Extra files associated with the entry."), + SD_VARLINK_DEFINE_FIELD(extras, SD_VARLINK_STRING, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY), SD_VARLINK_FIELD_COMMENT("Command line options of the entry."), SD_VARLINK_DEFINE_FIELD(cmdline, SD_VARLINK_STRING, SD_VARLINK_NULLABLE)); diff --git a/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh b/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh index 440c3e5edfbcc..668c0cfac4580 100755 --- a/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh +++ b/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh @@ -374,7 +374,7 @@ testcase_00_secureboot() { bootctl status | grep "Secure Boot: enabled" >/dev/null # Ensure the addon is fully loaded and parsed - bootctl status | grep "global-addon: loader/addons/test.addon.efi" >/dev/null + bootctl status | grep "extra: /boot//loader/addons/test.addon.efi" >/dev/null bootctl status | grep "cmdline" | grep addonfoobar >/dev/null grep -q addonfoobar /proc/cmdline } From e7ab31b4d1c90ffe41e29b60731742c3cdeed1bd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 20 Mar 2026 17:33:07 +0100 Subject: [PATCH 071/242] bootctl: make sure "unlink" properly tracks "extra" files --- src/bootctl/bootctl-unlink.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/bootctl/bootctl-unlink.c b/src/bootctl/bootctl-unlink.c index 80e74926c6c76..6e8e5c4bb1703 100644 --- a/src/bootctl/bootctl-unlink.c +++ b/src/bootctl/bootctl-unlink.c @@ -146,6 +146,12 @@ static int boot_entry_ref_files( return r; } + FOREACH_ARRAY(x, e->local_extras.items, e->local_extras.n_items) { + r = ref_file(known_files, x->location, increment); + if (r < 0) + return r; + } + return 0; } @@ -294,6 +300,8 @@ int boot_entry_unlink( (void) unref_unlink_file(&known_files, root, root_fd, e->device_tree, dry_run); STRV_FOREACH(s, e->device_tree_overlay) (void) unref_unlink_file(&known_files, root, root_fd, *s, dry_run); + FOREACH_ARRAY(x, e->local_extras.items, e->local_extras.n_items) + (void) unref_unlink_file(&known_files, root, root_fd, x->location, dry_run); if (dry_run) log_info("Would remove \"%s\"", e->path); From c68b8ff4d192b47d1b99a52b6217c1dc3f7a39ea Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 17 Apr 2026 15:01:00 +0200 Subject: [PATCH 072/242] ci: add integration test for new bootctl functionality --- test/units/TEST-87-AUX-UTILS-VM.bootctl.sh | 271 +++++++++++++++++++++ 1 file changed, 271 insertions(+) diff --git a/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh b/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh index 668c0cfac4580..90daaf52eadc2 100755 --- a/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh +++ b/test/units/TEST-87-AUX-UTILS-VM.bootctl.sh @@ -397,4 +397,275 @@ testcase_install_varlink() { bootctl is-installed } +cleanup_link() { + if [[ -n "${LINK_WORKDIR:-}" ]]; then + rm -rf "$LINK_WORKDIR" + unset LINK_WORKDIR + fi + restore_esp +} + +testcase_bootctl_link() { + if ! command -v ukify >/dev/null; then + echo "ukify not found, skipping." + return 0 + fi + + backup_esp + LINK_WORKDIR="$(mktemp --directory /tmp/test-bootctl-link.XXXXXXXXXX)" + trap cleanup_link RETURN ERR + + # Ensure loader/entries directory is present + bootctl install --make-entry-directory=yes + + local ESP + ESP="$(bootctl --print-esp-path)" + + # Build a minimal UKI via ukify. The .linux content does not need to be a + # real kernel — bootctl link only requires a valid PE with .osrel (and the + # systemd-stub SBAT marker that pe_is_uki() checks for). + cat >"$LINK_WORKDIR/os-release" <<'EOF' +ID=testos +NAME="Test OS" +PRETTY_NAME="Test OS" +EOF + echo "fake-kernel" >"$LINK_WORKDIR/vmlinuz" + echo "fake-initrd" >"$LINK_WORKDIR/initrd" + echo "fake-sysext-data" >"$LINK_WORKDIR/hello.sysext.raw" + echo "fake-confext-data" >"$LINK_WORKDIR/hello.confext.raw" + echo "fake-credential" >"$LINK_WORKDIR/hello.cred" + + ukify build \ + --linux "$LINK_WORKDIR/vmlinuz" \ + --initrd "$LINK_WORKDIR/initrd" \ + --os-release "@$LINK_WORKDIR/os-release" \ + --uname "1.2.3-testkernel" \ + --cmdline "quiet" \ + --output "$LINK_WORKDIR/testuki.efi" + + # Pin an explicit entry token so the resulting filenames are deterministic + local TOKEN="systemdtest" + local BOOTCTL=(bootctl "--entry-token=literal:$TOKEN") + + # --- Test 1: basic link/unlink --- + "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" + + # Exactly one entry file should exist, named "${TOKEN}-commit_1.conf" + local ENTRY="$ESP/loader/entries/${TOKEN}-commit_1.conf" + test -f "$ENTRY" + test -f "$ESP/$TOKEN/testuki.efi" + + # Verify the entry file contents + grep "^title " "$ENTRY" >/dev/null + grep "^uki /${TOKEN}/testuki.efi\$" "$ENTRY" >/dev/null + grep "^version 1\$" "$ENTRY" >/dev/null + + # Make sure bootctl list sees it + bootctl list --json=short | grep -F "${TOKEN}-commit_1.conf" >/dev/null + + # Remove it again using the ID (entry IDs include the .conf suffix) + "${BOOTCTL[@]}" unlink "${TOKEN}-commit_1.conf" + test ! -e "$ENTRY" + test ! -e "$ESP/$TOKEN/testuki.efi" + + # --- Test 2: link with --entry-title/--entry-version/--entry-commit/--tries-left --- + "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" \ + --entry-title="My Funky Entry" \ + --entry-version="9.8.7" \ + --entry-commit=42 \ + --tries-left=3 + + ENTRY="$ESP/loader/entries/${TOKEN}-commit_42.9.8.7+3.conf" + test -f "$ENTRY" + test -f "$ESP/$TOKEN/testuki.efi" + + grep "^title My Funky Entry\$" "$ENTRY" >/dev/null + grep "^version 42.9.8.7\$" "$ENTRY" >/dev/null + grep "^uki /${TOKEN}/testuki.efi\$" "$ENTRY" >/dev/null + + # Unlink using the ID (the tries counter "+3" is stripped from the canonical ID) + "${BOOTCTL[@]}" unlink "${TOKEN}-commit_42.9.8.7.conf" + test ! -e "$ENTRY" + test ! -e "$ESP/$TOKEN/testuki.efi" + + # --- Test 3: link with extras (-X and --extra=) --- + "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" \ + --entry-commit=50 \ + -X "$LINK_WORKDIR/hello.sysext.raw" \ + --extra="$LINK_WORKDIR/hello.confext.raw" \ + -X "$LINK_WORKDIR/hello.cred" + + ENTRY="$ESP/loader/entries/${TOKEN}-commit_50.conf" + test -f "$ENTRY" + test -f "$ESP/$TOKEN/testuki.efi" + test -f "$ESP/$TOKEN/hello.sysext.raw" + test -f "$ESP/$TOKEN/hello.confext.raw" + test -f "$ESP/$TOKEN/hello.cred" + + grep "^extra /${TOKEN}/hello.sysext.raw\$" "$ENTRY" >/dev/null + grep "^extra /${TOKEN}/hello.confext.raw\$" "$ENTRY" >/dev/null + grep "^extra /${TOKEN}/hello.cred\$" "$ENTRY" >/dev/null + + # Unlink must also clean up the extra resources + "${BOOTCTL[@]}" unlink "${TOKEN}-commit_50.conf" + test ! -e "$ENTRY" + test ! -e "$ESP/$TOKEN/testuki.efi" + test ! -e "$ESP/$TOKEN/hello.sysext.raw" + test ! -e "$ESP/$TOKEN/hello.confext.raw" + test ! -e "$ESP/$TOKEN/hello.cred" + + # --- Test 4: --oldest drops the lowest commit first --- + "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" --entry-commit=10 + "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" --entry-commit=20 + "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" --entry-commit=30 + + test -f "$ESP/loader/entries/${TOKEN}-commit_10.conf" + test -f "$ESP/loader/entries/${TOKEN}-commit_20.conf" + test -f "$ESP/loader/entries/${TOKEN}-commit_30.conf" + test -f "$ESP/$TOKEN/testuki.efi" + + "${BOOTCTL[@]}" unlink --oldest=yes + test ! -e "$ESP/loader/entries/${TOKEN}-commit_10.conf" + test -f "$ESP/loader/entries/${TOKEN}-commit_20.conf" + test -f "$ESP/loader/entries/${TOKEN}-commit_30.conf" + test -f "$ESP/$TOKEN/testuki.efi" + + "${BOOTCTL[@]}" unlink --oldest=yes + test ! -e "$ESP/loader/entries/${TOKEN}-commit_20.conf" + test -f "$ESP/loader/entries/${TOKEN}-commit_30.conf" + test -f "$ESP/$TOKEN/testuki.efi" + + # --- Test 5: --dry-run leaves everything in place --- + "${BOOTCTL[@]}" --dry-run unlink "${TOKEN}-commit_30.conf" + test -f "$ESP/loader/entries/${TOKEN}-commit_30.conf" + test -f "$ESP/$TOKEN/testuki.efi" + + # Actually remove it now + "${BOOTCTL[@]}" unlink "${TOKEN}-commit_30.conf" + test ! -e "$ESP/loader/entries/${TOKEN}-commit_30.conf" + test ! -e "$ESP/$TOKEN/testuki.efi" + + # --- Test 6: invalid combinations are rejected --- + # Neither an ID nor --oldest + (! "${BOOTCTL[@]}" unlink) + # Both an ID and --oldest + (! "${BOOTCTL[@]}" unlink --oldest=yes "${TOKEN}-commit_1.conf") + + # --- Test 7: refusing to link when --keep-free cannot be satisfied --- + (! "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" --entry-commit=99 --keep-free=1T) + test ! -e "$ESP/loader/entries/${TOKEN}-commit_99.conf" + + # --- Test 8: refusing to re-link the same commit number --- + "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" --entry-commit=77 + (! "${BOOTCTL[@]}" link "$LINK_WORKDIR/testuki.efi" --entry-commit=77) + "${BOOTCTL[@]}" unlink "${TOKEN}-commit_77.conf" + + # --- Test 9: passing a non-UKI is rejected --- + (! "${BOOTCTL[@]}" link "$LINK_WORKDIR/vmlinuz") + + # === Varlink coverage === + # + # Exercise io.systemd.BootControl.Link/Unlink by forking bootctl as a + # varlink server via 'varlinkctl call '. Note the Varlink schema + # has no way to supply a literal entry token (unlike --entry-token= on + # the command line), so the token is chosen by bootctl from + # machine-id/os-release — we recover it from the returned id. + local BOOTCTL_BIN vreply vid vtoken + BOOTCTL_BIN="$(type -p bootctl)" + + # --- Test 10: Link + Unlink via varlink --- + vreply="$(varlinkctl call --json=short \ + --push-fd="$LINK_WORKDIR/testuki.efi" \ + "$BOOTCTL_BIN" io.systemd.BootControl.Link \ + '{"kernelFilename":"vluki.efi","kernelFileDescriptor":0}')" + vid="$(echo "$vreply" | jq -r '.ids[0]')" + test -n "$vid" + test "$vid" != "null" + vtoken="${vid%%-commit_*}" + test -n "$vtoken" + + test -f "$ESP/loader/entries/$vid" + test -f "$ESP/$vtoken/vluki.efi" + grep "^uki /$vtoken/vluki.efi\$" "$ESP/loader/entries/$vid" >/dev/null + + varlinkctl call --quiet "$BOOTCTL_BIN" io.systemd.BootControl.Unlink \ + "{\"id\":\"$vid\"}" + test ! -e "$ESP/loader/entries/$vid" + test ! -e "$ESP/$vtoken/vluki.efi" + + # --- Test 11: Link with entryTitle/entryVersion/entryCommit/triesLeft + extraFiles via varlink --- + vreply="$(varlinkctl call --json=short \ + --push-fd="$LINK_WORKDIR/testuki.efi" \ + --push-fd="$LINK_WORKDIR/hello.sysext.raw" \ + --push-fd="$LINK_WORKDIR/hello.cred" \ + "$BOOTCTL_BIN" io.systemd.BootControl.Link \ + '{"kernelFilename":"vluki2.efi","kernelFileDescriptor":0,"entryTitle":"Varlink Title","entryVersion":"2.3.4","entryCommit":111,"triesLeft":2,"extraFiles":[{"filename":"hello.sysext.raw","fileDescriptor":1},{"filename":"hello.cred","fileDescriptor":2}]}')" + vid="$(echo "$vreply" | jq -r '.ids[0]')" + # The returned id has the tries counter ("+2") stripped + assert_eq "$vid" "$vtoken-commit_111.2.3.4.conf" + # The on-disk entry filename includes the tries counter + local VENTRY="$ESP/loader/entries/$vtoken-commit_111.2.3.4+2.conf" + test -f "$VENTRY" + test -f "$ESP/$vtoken/vluki2.efi" + test -f "$ESP/$vtoken/hello.sysext.raw" + test -f "$ESP/$vtoken/hello.cred" + + grep "^title Varlink Title\$" "$VENTRY" >/dev/null + grep "^version 111.2.3.4\$" "$VENTRY" >/dev/null + grep "^extra /$vtoken/hello.sysext.raw\$" "$VENTRY" >/dev/null + grep "^extra /$vtoken/hello.cred\$" "$VENTRY" >/dev/null + + varlinkctl call --quiet "$BOOTCTL_BIN" io.systemd.BootControl.Unlink \ + "{\"id\":\"$vid\"}" + test ! -e "$VENTRY" + test ! -e "$ESP/$vtoken/vluki2.efi" + test ! -e "$ESP/$vtoken/hello.sysext.raw" + test ! -e "$ESP/$vtoken/hello.cred" + + # --- Test 12: Unlink oldest via varlink --- + local c + for c in 210 220 230; do + varlinkctl call --quiet \ + --push-fd="$LINK_WORKDIR/testuki.efi" \ + "$BOOTCTL_BIN" io.systemd.BootControl.Link \ + "{\"kernelFilename\":\"vluki3.efi\",\"kernelFileDescriptor\":0,\"entryCommit\":$c}" + done + test -f "$ESP/loader/entries/$vtoken-commit_210.conf" + test -f "$ESP/loader/entries/$vtoken-commit_220.conf" + test -f "$ESP/loader/entries/$vtoken-commit_230.conf" + + varlinkctl call --quiet "$BOOTCTL_BIN" io.systemd.BootControl.Unlink \ + '{"oldest":true}' + test ! -e "$ESP/loader/entries/$vtoken-commit_210.conf" + test -f "$ESP/loader/entries/$vtoken-commit_220.conf" + test -f "$ESP/loader/entries/$vtoken-commit_230.conf" + test -f "$ESP/$vtoken/vluki3.efi" + + # Clean up remaining entries + varlinkctl call --quiet "$BOOTCTL_BIN" io.systemd.BootControl.Unlink \ + "{\"id\":\"$vtoken-commit_220.conf\"}" + varlinkctl call --quiet "$BOOTCTL_BIN" io.systemd.BootControl.Unlink \ + "{\"id\":\"$vtoken-commit_230.conf\"}" + test ! -e "$ESP/loader/entries/$vtoken-commit_220.conf" + test ! -e "$ESP/loader/entries/$vtoken-commit_230.conf" + test ! -e "$ESP/$vtoken/vluki3.efi" + + # --- Test 13: Link with a non-UKI via varlink returns InvalidKernelImage --- + varlinkctl call --quiet \ + --push-fd="$LINK_WORKDIR/vmlinuz" \ + --graceful=io.systemd.BootControl.InvalidKernelImage \ + "$BOOTCTL_BIN" io.systemd.BootControl.Link \ + '{"kernelFilename":"notauki.efi","kernelFileDescriptor":0}' + + # --- Test 14: Unlink with invalid argument combinations is rejected --- + # Both id and oldest=true + (! varlinkctl call "$BOOTCTL_BIN" io.systemd.BootControl.Unlink \ + '{"id":"foo.conf","oldest":true}') + # Neither id nor oldest + (! varlinkctl call "$BOOTCTL_BIN" io.systemd.BootControl.Unlink '{}') + # Invalid id characters (e.g. a glob) + (! varlinkctl call "$BOOTCTL_BIN" io.systemd.BootControl.Unlink \ + '{"id":"foo*.conf"}') +} + run_testcases From 5e74fd06ba92ac1956b29daeb2a2ecef21d01dc6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Feb 2026 12:39:02 +0100 Subject: [PATCH 073/242] update TODO --- TODO.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/TODO.md b/TODO.md index 60a11dcee3ad1..b5777556e999d 100644 --- a/TODO.md +++ b/TODO.md @@ -143,6 +143,12 @@ SPDX-License-Identifier: LGPL-2.1-or-later volume is auto-destroyed. Would be exposed via a new flag on the Acquire call, similar to the locking logic above. +- clean up credential naming a bit: let's say encrypted creds always should + carry .cred suffix, and unencrypted should not. + +- clean up naming of sidecar files in sd-stub: let's put global ones strictly + into /loader/extras/ + - a small tool that can do basic btrfs raid policy mgmt. i.e. gets started as part of the initial transaction for some btrfs raid fs, waits for some time, then puts message on screen (plymouth, console) that some devices apparently @@ -2163,10 +2169,6 @@ SPDX-License-Identifier: LGPL-2.1-or-later - run0: maybe enable utmp for run0 sessions, so that they are easily visible. -- sd-boot/sd-stub: install a uefi "handle" to a sidecar dir of bls type #1 - entries with an "uki" or "uki-url" stanza, and make sd-stub look for - that. That way we can parameterize type #1 entries nicely. - - **sd-boot:** - do something useful if we find exactly zero entries (ignoring items such as reboot/poweroff/factory reset). Show a help text or so. From 6a672f5df87ebd6b4edd1aea3a1a31fc76651313 Mon Sep 17 00:00:00 2001 From: Jim Spentzos Date: Fri, 1 May 2026 00:59:23 +0000 Subject: [PATCH 074/242] po: Translated using Weblate (Greek) Currently translated at 100.0% (266 of 266 strings) Co-authored-by: Jim Spentzos Translate-URL: https://translate.fedoraproject.org/projects/systemd/main/el/ Translation: systemd/main --- po/el.po | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/po/el.po b/po/el.po index c7b321f942d13..1e93e147b1b8d 100644 --- a/po/el.po +++ b/po/el.po @@ -10,7 +10,7 @@ msgid "" msgstr "" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2026-03-06 03:46+0900\n" -"PO-Revision-Date: 2026-03-03 08:58+0000\n" +"PO-Revision-Date: 2026-05-01 00:59+0000\n" "Last-Translator: Jim Spentzos \n" "Language-Team: Greek \n" @@ -19,7 +19,7 @@ msgstr "" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" -"X-Generator: Weblate 5.16.1\n" +"X-Generator: Weblate 5.17.1\n" #: src/core/org.freedesktop.systemd1.policy.in:22 msgid "Send passphrase back to system" @@ -1078,13 +1078,12 @@ msgid "DHCP server sends force renew message" msgstr "Ο διακομιστής DHCP στέλνει μήνυμα αναγκαστικής ανανέωσης" #: src/network/org.freedesktop.network1.policy:144 -#, fuzzy -#| msgid "Authentication is required to send force renew message." msgid "" "Authentication is required to send a force renew message from the DHCP " "server." msgstr "" -"Απαιτείται ταυτοποίηση για την αποστολή μηνύματος αναγκαστικής ανανέωσης." +"Απαιτείται ταυτοποίηση για την αποστολή μηνύματος αναγκαστικής ανανέωσης από " +"τον διακομιστή DHCP." #: src/network/org.freedesktop.network1.policy:154 msgid "Renew dynamic addresses" From eb357bfff8685c10e7b1f6365b3a80cd792f0336 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Thu, 30 Apr 2026 11:59:26 +0200 Subject: [PATCH 075/242] dbus-manager: limit the number of states/patterns per query Let's cap the number of states/patterns per query to something reasonable, i.e. max 256 states and 4K patterns per query. --- src/core/dbus-manager.c | 20 ++++++++++++++++++++ src/core/manager.h | 6 +++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 076a26c6fd171..37b38c6ae9ee5 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -1251,6 +1251,14 @@ static int list_units_filtered(sd_bus_message *message, void *userdata, sd_bus_e /* Anyone can call this method */ + if (strv_length(states) > MANAGER_MAX_STATES_PER_CALL) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many states in a single query."); + + if (strv_length(patterns) > MANAGER_MAX_PATTERNS_PER_CALL) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many patterns in a single query."); + r = sd_bus_message_new_method_return(message, &reply); if (r < 0) return r; @@ -1434,6 +1442,10 @@ static int dump_impl( assert(message); + if (strv_length(patterns) > MANAGER_MAX_PATTERNS_PER_CALL) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many patterns in a single query."); + /* 'status' access is the bare minimum always needed for this, as the policy might straight out * forbid a client from querying any information from systemd, regardless of any rate limiting. */ r = mac_selinux_access_check(message, "status", reterr_error); @@ -2177,6 +2189,14 @@ static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, /* Anyone can call this method */ + if (strv_length(states) > MANAGER_MAX_STATES_PER_CALL) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many states in a single query."); + + if (strv_length(patterns) > MANAGER_MAX_PATTERNS_PER_CALL) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many patterns in a single query."); + r = mac_selinux_access_check(message, "status", reterr_error); if (r < 0) return r; diff --git a/src/core/manager.h b/src/core/manager.h index 7d58c330a1b82..3bb1a0154a399 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -14,9 +14,13 @@ struct libmnt_monitor; -/* Enforce upper limit how many names we allow */ +/* Enforce upper limit on how many names we allow */ #define MANAGER_MAX_NAMES 131072 /* 128K */ +/* Enforce upper limit on the number of patterns/states requested over IPC */ +#define MANAGER_MAX_PATTERNS_PER_CALL 4096U +#define MANAGER_MAX_STATES_PER_CALL 256U + /* On sigrtmin+18, private commands */ enum { MANAGER_SIGNAL_COMMAND_DUMP_JOBS = _COMMON_SIGNAL_COMMAND_PRIVATE_BASE + 0, From 49c1e1bcf2b482b6de35a4212a06ed1d8c382745 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Thu, 30 Apr 2026 14:03:47 +0200 Subject: [PATCH 076/242] dbus: limit the number of env variables to something reasonable, vol. 2 Turns out we can utilize this limit at a couple more places, so let's move the previously defined limit constant to env-util.h and use it to guard a couple more D-Bus methods. Also, bump it a bit, given it's meant to be a safety cap that can't be hit in valid scenarios. Follow-up for 7671b43cb88532cce2aa9ad12f777922206d6a42. --- src/basic/env-util.h | 2 ++ src/core/dbus-manager.c | 10 ++++++++++ src/libsystemd/sd-json/json-util.h | 2 -- src/machine/machine-dbus.c | 4 ++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/basic/env-util.h b/src/basic/env-util.h index 28338a1458e07..4063517660b30 100644 --- a/src/basic/env-util.h +++ b/src/basic/env-util.h @@ -3,6 +3,8 @@ #include "basic-forward.h" +#define ENVIRONMENT_ASSIGNMENTS_MAX 16384U + size_t sc_arg_max(void); bool env_name_is_valid(const char *e); diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 37b38c6ae9ee5..0e93bc723c092 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -1892,6 +1892,10 @@ static int method_set_environment(sd_bus_message *message, void *userdata, sd_bu r = sd_bus_message_read_strv(message, &plus); if (r < 0) return r; + + if (strv_length(plus) > ENVIRONMENT_ASSIGNMENTS_MAX) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many environment assignments in a single query."); if (!strv_env_is_valid(plus)) return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments"); @@ -1923,6 +1927,9 @@ static int method_unset_environment(sd_bus_message *message, void *userdata, sd_ if (r < 0) return r; + if (strv_length(minus) > ENVIRONMENT_ASSIGNMENTS_MAX) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many environment variable names in a single query."); if (!strv_env_name_or_assignment_is_valid(minus)) return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment variable names or assignments"); @@ -1959,6 +1966,9 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd if (r < 0) return r; + if (strv_length(plus) > ENVIRONMENT_ASSIGNMENTS_MAX || strv_length(minus) > ENVIRONMENT_ASSIGNMENTS_MAX) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many environment variable names or assignments in a single query."); if (!strv_env_name_or_assignment_is_valid(minus)) return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment variable names or assignments"); diff --git a/src/libsystemd/sd-json/json-util.h b/src/libsystemd/sd-json/json-util.h index 34d79d5238aaa..cea2d368b43db 100644 --- a/src/libsystemd/sd-json/json-util.h +++ b/src/libsystemd/sd-json/json-util.h @@ -9,8 +9,6 @@ #include "sd-forward.h" #include "string-util.h" /* IWYU pragma: keep */ -#define ENVIRONMENT_ASSIGNMENTS_MAX 1024U - #define JSON_VARIANT_REPLACE(v, q) \ do { \ typeof(v)* _v = &(v); \ diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index a9d15ca5f72b1..28f64b3c9b683 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -435,6 +435,10 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu r = sd_bus_message_read_strv(message, &env); if (r < 0) return r; + + if (strv_length(env) > ENVIRONMENT_ASSIGNMENTS_MAX) + return sd_bus_error_set(error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many environment assignments in a single query."); if (!strv_env_is_valid(env)) return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments"); From 2cfaf50cc86670b4b671dd4d4f9614b2c2cfb736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Miguel=20Sarasola?= Date: Fri, 1 May 2026 19:30:42 +0200 Subject: [PATCH 077/242] hwdb: Add missing Steelseries Arctis Pro Wireless The Hub for these headsets uses the following USB entries: Bus 007 Device 002: ID 0451:2036 Texas Instruments, Inc. TUSB2036 Hub Bus 007 Device 003: ID 1038:1290 SteelSeries ApS Arctis Pro Wireless Bus 007 Device 004: ID 1038:1294 SteelSeries ApS Arctis Pro Wireless --- hwdb.d/70-sound-card.hwdb | 1 + 1 file changed, 1 insertion(+) diff --git a/hwdb.d/70-sound-card.hwdb b/hwdb.d/70-sound-card.hwdb index 03a0a5eefc28c..f9ceeacb7d79c 100644 --- a/hwdb.d/70-sound-card.hwdb +++ b/hwdb.d/70-sound-card.hwdb @@ -55,6 +55,7 @@ usb:v1038p2216* usb:v1038p2236* usb:v1038p12C2* usb:v1038p1290* +usb:v1038p1294* usb:v1038p12EC* usb:v1038p2269* usb:v1038p226D* From fa9b3b26c7421ebf39004094ecee0ca9bcea2bd9 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sat, 2 May 2026 22:31:03 +0900 Subject: [PATCH 078/242] socket-util: introduce tos_to_priority() This maps from TOS, which can be used for setsockopt(IPPROTO_IP, IP_TOS), to socket priority, which can be used for setsockopt(SOL_SOCKET, SO_PRIORITY). With this, we can set priority like the following: ``` uint8_t tos = IPTOS_CLASS_CS6; setsockopt_int(fd, IPPROTO_IP, IP_TOS, tos); setsockopt_int(fd, SOL_SOCKET, SO_PRIORITY, tos_to_priority(tos)); ``` Co-authored with Google Gemini. --- src/basic/socket-util.c | 27 +++++++++++++++++++++++++++ src/basic/socket-util.h | 2 ++ src/test/test-socket-util.c | 20 ++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c index 698aa69a4b0f1..2e0ee684ff98b 100644 --- a/src/basic/socket-util.c +++ b/src/basic/socket-util.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -1873,3 +1874,29 @@ void cmsg_close_all(struct msghdr *mh) { } } } + +int tos_to_priority(uint8_t tos) { + /* Map the IP Precedence (top 3 bits of the TOS field) to Linux internal packet priorities + * (TC_PRIO_*). This exactly mirrors the standard Linux kernel IP precedence-to-priority mapping + * (rt_tos2priority) to ensure consistent behavior when explicitly setting SO_PRIORITY. */ + switch (IPTOS_PREC(tos)) { + case IPTOS_PREC_NETCONTROL: /* 0xc0 (CS7) - Network Control. Used for infrastructure control (e.g., STP, keepalives). */ + case IPTOS_PREC_INTERNETCONTROL: /* 0xe0 (CS6) - Internetwork Control. Used for routing protocols (e.g., OSPF, BGP) and DHCP. */ + return TC_PRIO_CONTROL; + + case IPTOS_PREC_CRITIC_ECP: /* 0xa0 (CS5) - Critical. Used for delay-sensitive traffic like Voice over IP (VoIP). */ + case IPTOS_PREC_FLASHOVERRIDE: /* 0x80 (CS4) - Flash Override. Used for interactive video and multimedia. */ + return TC_PRIO_INTERACTIVE; + + case IPTOS_PREC_FLASH: /* 0x60 (CS3) - Flash. Used for broadcast video and call signaling (e.g., SIP). */ + case IPTOS_PREC_IMMEDIATE: /* 0x40 (CS2) - Immediate. Used for OAM (Operations, Administration, and Management) and transactional data. */ + return TC_PRIO_INTERACTIVE_BULK; + + case IPTOS_PREC_PRIORITY: /* 0x20 (CS1) - Priority. Used for background traffic and bulk data transfers. */ + return TC_PRIO_BULK; + + case IPTOS_PREC_ROUTINE: /* 0x00 (CS0) - Routine. Best effort traffic. */ + default: + return TC_PRIO_BESTEFFORT; + } +} diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h index 78b948ad461b5..208eb7ac077be 100644 --- a/src/basic/socket-util.h +++ b/src/basic/socket-util.h @@ -270,3 +270,5 @@ int netlink_socket_get_multicast_groups(int fd, size_t *ret_len, uint32_t **ret_ int socket_get_cookie(int fd, uint64_t *ret); void cmsg_close_all(struct msghdr *mh); + +int tos_to_priority(uint8_t tos); diff --git a/src/test/test-socket-util.c b/src/test/test-socket-util.c index 090839ac06842..713844b09b720 100644 --- a/src/test/test-socket-util.c +++ b/src/test/test-socket-util.c @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include "alloc-util.h" @@ -530,4 +532,22 @@ TEST(getpeerpidref) { ASSERT_TRUE(!pidref_equal(&pidref0, &pidref_pid1)); } +TEST(tos_to_priority) { + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS7), TC_PRIO_CONTROL); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS6), TC_PRIO_CONTROL); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS5), TC_PRIO_INTERACTIVE); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS4), TC_PRIO_INTERACTIVE); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS3), TC_PRIO_INTERACTIVE_BULK); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS2), TC_PRIO_INTERACTIVE_BULK); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS1), TC_PRIO_BULK); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS0), TC_PRIO_BESTEFFORT); + + /* check if lower bits are correctly filtered. */ + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS7 | IPTOS_LOWDELAY), TC_PRIO_CONTROL); + ASSERT_EQ(tos_to_priority(IPTOS_CLASS_CS1 | IPTOS_LOWCOST), TC_PRIO_BULK); + + ASSERT_EQ(tos_to_priority(0x00), TC_PRIO_BESTEFFORT); + ASSERT_EQ(tos_to_priority(0xff), TC_PRIO_CONTROL); +} + DEFINE_TEST_MAIN(LOG_DEBUG); From 5b10b583581eac104805f67a87cf19f086447d86 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sat, 2 May 2026 22:10:03 +0900 Subject: [PATCH 079/242] iovec-util: introduce iovec_done_and_memdup() It is similar to free_and_strdup(), but for struct iovec. --- src/basic/iovec-util.c | 15 +++++++++++++++ src/basic/iovec-util.h | 1 + src/test/test-iovec-util.c | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/src/basic/iovec-util.c b/src/basic/iovec-util.c index dab734b9010f1..429f08dea9ad4 100644 --- a/src/basic/iovec-util.c +++ b/src/basic/iovec-util.c @@ -129,6 +129,21 @@ struct iovec* iovec_memdup(const struct iovec *source, struct iovec *ret) { return ret; } +int iovec_done_and_memdup(struct iovec *iovec, const struct iovec *source) { + assert(iovec); + + if (iovec_equal(iovec, source)) + return 0; + + struct iovec copy; + if (!iovec_memdup(source, ©)) + return -ENOMEM; + + iovec_done(iovec); + *iovec = copy; + return 1; +} + struct iovec* iovec_append(struct iovec *iovec, const struct iovec *append) { assert(iovec_is_valid(iovec)); diff --git a/src/basic/iovec-util.h b/src/basic/iovec-util.h index c8261861a0ff7..a0a059550b7db 100644 --- a/src/basic/iovec-util.h +++ b/src/basic/iovec-util.h @@ -43,5 +43,6 @@ static inline bool iovec_equal(const struct iovec *a, const struct iovec *b) { } struct iovec* iovec_memdup(const struct iovec *source, struct iovec *ret); +int iovec_done_and_memdup(struct iovec *iovec, const struct iovec *source); struct iovec* iovec_append(struct iovec *iovec, const struct iovec *append); diff --git a/src/test/test-iovec-util.c b/src/test/test-iovec-util.c index bd73be1ea76e6..e510e86ca144a 100644 --- a/src/test/test-iovec-util.c +++ b/src/test/test-iovec-util.c @@ -163,4 +163,29 @@ TEST(iovec_make_byte) { ASSERT_EQ(memcmp_nn(x.iov_base, x.iov_len, "x", 1), 0); } +TEST(iovec_done_and_memdup) { + _cleanup_(iovec_done) struct iovec iov = {}; + + ASSERT_OK_ZERO(iovec_done_and_memdup(&iov, NULL)); + ASSERT_TRUE(!iovec_is_set(&iov)); + ASSERT_OK_ZERO(iovec_done_and_memdup(&iov, &(struct iovec) {})); + ASSERT_TRUE(!iovec_is_set(&iov)); + ASSERT_OK_POSITIVE(iovec_done_and_memdup(&iov, &IOVEC_MAKE_STRING("aaa"))); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE_STRING("aaa"))); + ASSERT_OK_POSITIVE(iovec_done_and_memdup(&iov, &IOVEC_MAKE_STRING("bbbbb"))); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE_STRING("bbbbb"))); + ASSERT_OK_POSITIVE(iovec_done_and_memdup(&iov, NULL)); + ASSERT_TRUE(!iovec_is_set(&iov)); + ASSERT_OK_POSITIVE(iovec_done_and_memdup(&iov, &IOVEC_MAKE_STRING("ccc"))); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE_STRING("ccc"))); + ASSERT_OK_POSITIVE(iovec_done_and_memdup(&iov, &(struct iovec) {})); + ASSERT_TRUE(!iovec_is_set(&iov)); + ASSERT_OK_ZERO(iovec_done_and_memdup(&iov, &iov)); + ASSERT_TRUE(!iovec_is_set(&iov)); + ASSERT_OK_POSITIVE(iovec_done_and_memdup(&iov, &IOVEC_MAKE_STRING("ddd"))); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE_STRING("ddd"))); + ASSERT_OK_ZERO(iovec_done_and_memdup(&iov, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE_STRING("ddd"))); +} + DEFINE_TEST_MAIN(LOG_INFO); From db28490c9f3c4146f1892509064bb8e43e78e590 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sat, 2 May 2026 23:03:15 +0900 Subject: [PATCH 080/242] siphash24: introduce siphash24_compress_iovec() helper function --- src/basic/siphash24.c | 11 +++++++++++ src/basic/siphash24.h | 1 + 2 files changed, 12 insertions(+) diff --git a/src/basic/siphash24.c b/src/basic/siphash24.c index 72d6bbe4f0cb7..b73ede34c8bc4 100644 --- a/src/basic/siphash24.c +++ b/src/basic/siphash24.c @@ -21,6 +21,7 @@ #include +#include "iovec-util.h" #include "siphash24.h" #include "string-util.h" #include "unaligned.h" @@ -156,6 +157,16 @@ void siphash24_compress_string(const char *in, struct siphash *state) { siphash24_compress_safe(in, strlen_ptr(in), state); } +void siphash24_compress_iovec(const struct iovec *iov, struct siphash *state) { + assert(iovec_is_valid(iov)); + assert(state); + + if (!iovec_is_set(iov)) + return; + + siphash24_compress(iov->iov_base, iov->iov_len, state); +} + uint64_t siphash24_finalize(struct siphash *state) { uint64_t b; diff --git a/src/basic/siphash24.h b/src/basic/siphash24.h index d72233beda89b..772e2728b69e0 100644 --- a/src/basic/siphash24.h +++ b/src/basic/siphash24.h @@ -36,6 +36,7 @@ static inline void siphash24_compress_safe(const void *in, size_t inlen, struct } void siphash24_compress_string(const char *in, struct siphash *state); +void siphash24_compress_iovec(const struct iovec *iov, struct siphash *state); uint64_t siphash24_finalize(struct siphash *state); From d667b6b97ff45e0739d165563b896f7d80417b99 Mon Sep 17 00:00:00 2001 From: Simon Lucido Date: Mon, 20 Apr 2026 17:05:27 +0200 Subject: [PATCH 081/242] core: add ReloadCount to Manager and bump on successful reload Introduce a counter that tracks how many configuration reloads have been successfully completed by the manager. The increment lives in manager_reload() right after the "point of no return", so failed reload attempts that bail out earlier (e.g. during serialization) do not bump the counter. It is accessible as a new ReloadCount property to org.freedesktop.systemd1.Manager (D-Bus) and ReloadCount to io.systemd.Manager.Describe (Varlink). Also add an integration test for ReloadCount that verifies that the new ReloadCount property increments by one per daemon-reload, accumulates correctly across multiple reloads, and that D-Bus and Varlink return identical values. Also tests that the counter reset after a reexec. Co-developed-by: Claude Opus 4.7 Signed-off-by: Simon Lucido --- NEWS | 6 +++ man/org.freedesktop.systemd1.xml | 13 ++++++- src/core/dbus-manager.c | 1 + src/core/manager.c | 4 ++ src/core/manager.h | 3 ++ src/core/varlink-manager.c | 3 +- src/shared/varlink-io.systemd.Manager.c | 4 +- test/units/TEST-07-PID1.reload-count.sh | 51 +++++++++++++++++++++++++ 8 files changed, 81 insertions(+), 4 deletions(-) create mode 100755 test/units/TEST-07-PID1.reload-count.sh diff --git a/NEWS b/NEWS index 451e3f1b79603..49061c5e11a22 100644 --- a/NEWS +++ b/NEWS @@ -63,6 +63,12 @@ CHANGES WITH 261 in spe: require direct IMDS access. The new meson option "-Dimds-network=" can be used to change the default mode to "locked" at build-time. + * The manager exposes a new ReloadCount property on its D-Bus and + Varlink interfaces (org.freedesktop.systemd1.Manager and + io.systemd.Manager respectively). The counter increments after + each successfully completed daemon-reload. It is not preserved + across daemon-reexec. + Changes in systemd-sysext/systemd-confext: * New initrd services systemd-sysext-sysroot.service and diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 76a8dd045f6c6..847e76f95c767 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -572,6 +572,8 @@ node /org/freedesktop/systemd1 { readonly s CtrlAltDelBurstAction = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly u SoftRebootsCount = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t ReloadCount = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b DefaultMemoryZSwapWriteback = ...; }; @@ -1279,6 +1281,8 @@ node /org/freedesktop/systemd1 { + + @@ -1866,6 +1870,10 @@ node /org/freedesktop/systemd1 { SoftRebootsCount encodes how many soft-reboots were successfully completed since the last full boot. Starts at 0. + ReloadCount encodes the number of successfully completed configuration + reloads of the manager. The counter is reset to 0 on + daemon-reexec and on the initial boot. + Virtualization contains a short ID string describing the virtualization technology the system runs in. On bare-metal hardware this is the empty string. Otherwise, it contains an identifier such as kvm, vmware and so on. For a full list of @@ -12646,8 +12654,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ DefaultMemoryZSwapWriteback, DefaultCPUPressureThresholdUSec, DefaultCPUPressureWatch, - DefaultIOPressureThresholdUSec, and - DefaultIOPressureWatch were added in version 261. + DefaultIOPressureThresholdUSec, + DefaultIOPressureWatch, and + ReloadCount were added in version 261. Unit Objects diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 076a26c6fd171..88de34c4ea434 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -2978,6 +2978,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("DefaultRestrictSUIDSGID", "b", bus_property_get_bool, offsetof(Manager, defaults.restrict_suid_sgid), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CtrlAltDelBurstAction", "s", bus_property_get_emergency_action, offsetof(Manager, cad_burst_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SoftRebootsCount", "u", bus_property_get_unsigned, offsetof(Manager, soft_reboots_count), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ReloadCount", "t", NULL, offsetof(Manager, reload_count), 0), SD_BUS_PROPERTY("DefaultMemoryZSwapWriteback", "b", bus_property_get_bool, offsetof(Manager, defaults.memory_zswap_writeback), SD_BUS_VTABLE_PROPERTY_CONST), /* deprecated cgroup v1 property */ diff --git a/src/core/manager.c b/src/core/manager.c index 17908d4db864e..da4e9ca408127 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3650,6 +3650,10 @@ int manager_reload(Manager *m) { /* 💀 This is the point of no return, from here on there is no way back. 💀 */ reloading = NULL; + /* Bump before sending the Reloading signal, so any client that reads + * ReloadCount in response to that signal observes the new value. */ + m->reload_count = saturate_add(m->reload_count, 1, UINT64_MAX); + bus_manager_send_reloading(m, true); /* Start by flushing out all jobs and units, all generated units, all runtime environments, all dynamic users diff --git a/src/core/manager.h b/src/core/manager.h index 7d58c330a1b82..abf1764d7859e 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -492,6 +492,9 @@ typedef struct Manager { unsigned soft_reboots_count; + /* The number of successfully completed configuration reloads. */ + uint64_t reload_count; + /* Original ambient capabilities when we were initialized */ uint64_t saved_ambient_set; } Manager; diff --git a/src/core/varlink-manager.c b/src/core/varlink-manager.c index 0bef5cbe9848d..384d4709c9786 100644 --- a/src/core/varlink-manager.c +++ b/src/core/varlink-manager.c @@ -193,7 +193,8 @@ static int manager_runtime_build_json(sd_json_variant **ret, const char *name, v JSON_BUILD_PAIR_DUAL_TIMESTAMP_NON_NULL("WatchdogLastPingTimestamp", watchdog_get_last_ping_as_dual_timestamp(&watchdog_last_ping)), SD_JSON_BUILD_PAIR_STRING("SystemState", manager_state_to_string(manager_state(m))), SD_JSON_BUILD_PAIR_UNSIGNED("ExitCode", m->return_value), - SD_JSON_BUILD_PAIR_UNSIGNED("SoftRebootsCount", m->soft_reboots_count)); + SD_JSON_BUILD_PAIR_UNSIGNED("SoftRebootsCount", m->soft_reboots_count), + SD_JSON_BUILD_PAIR_UNSIGNED("ReloadCount", m->reload_count)); } int vl_method_describe_manager(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { diff --git a/src/shared/varlink-io.systemd.Manager.c b/src/shared/varlink-io.systemd.Manager.c index 0c5ab53702b0d..81b3e894a348d 100644 --- a/src/shared/varlink-io.systemd.Manager.c +++ b/src/shared/varlink-io.systemd.Manager.c @@ -194,7 +194,9 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( SD_VARLINK_FIELD_COMMENT("Exit code of the manager"), SD_VARLINK_DEFINE_FIELD(ExitCode, SD_VARLINK_INT, 0), SD_VARLINK_FIELD_COMMENT("Encodes how many soft-reboots were successfully completed"), - SD_VARLINK_DEFINE_FIELD(SoftRebootsCount, SD_VARLINK_INT, 0)); + SD_VARLINK_DEFINE_FIELD(SoftRebootsCount, SD_VARLINK_INT, 0), + SD_VARLINK_FIELD_COMMENT("Number of successfully completed configuration reloads"), + SD_VARLINK_DEFINE_FIELD(ReloadCount, SD_VARLINK_INT, 0)); static SD_VARLINK_DEFINE_METHOD( Describe, diff --git a/test/units/TEST-07-PID1.reload-count.sh b/test/units/TEST-07-PID1.reload-count.sh new file mode 100755 index 0000000000000..7c31b65c5fc75 --- /dev/null +++ b/test/units/TEST-07-PID1.reload-count.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# Verify that the manager exposes a ReloadCount property that increments on +# every daemon-reload, resets to zero across daemon-reexec (since the count +# is not serialized), and is reachable over both D-Bus and Varlink. + +read_count_dbus() { + busctl -j get-property org.freedesktop.systemd1 \ + /org/freedesktop/systemd1 \ + org.freedesktop.systemd1.Manager \ + ReloadCount | jq -r '.data' +} + +read_count_varlink() { + varlinkctl call /run/systemd/io.systemd.Manager \ + io.systemd.Manager.Describe '{}' | jq -r '.runtime.ReloadCount' +} + +# Sanity: both transports must agree. +dbus_count=$(read_count_dbus) +varlink_count=$(read_count_varlink) +(( dbus_count == varlink_count )) + +# A single reload bumps the counter by one. +before=$(read_count_dbus) +systemctl daemon-reload +(( $(read_count_dbus) == before + 1 )) + +# Multiple reloads accumulate. +systemctl daemon-reload +systemctl daemon-reload +(( $(read_count_dbus) == before + 3 )) + +# And both transports still agree after the reload. +dbus_count=$(read_count_dbus) +varlink_count=$(read_count_varlink) +(( dbus_count == varlink_count )) + +# A daemon-reexec resets the counter back to zero on both transports, since +# reload_count lives only in memory and is not carried across the reexec. +# `systemctl daemon-reexec` returns as soon as the old PID 1 closes its bus +# connection, which is before the new PID 1 has rebound /run/systemd/private. +# Use --watch-bind=yes to block on inotify until the new socket is live. +systemctl daemon-reexec +busctl --watch-bind=yes call org.freedesktop.systemd1 /org/freedesktop/systemd1 \ + org.freedesktop.DBus.Peer Ping >/dev/null +(( $(read_count_dbus) == 0 )) +(( $(read_count_varlink) == 0 )) From 6d4c714d88147f75c231b7fa004191962c4c0b1b Mon Sep 17 00:00:00 2001 From: kakolla Date: Sun, 3 May 2026 01:01:24 -0700 Subject: [PATCH 082/242] hwdb: add correct keyboard mapping for touchpad_toggle event on msi gs66 stealth laptop --- hwdb.d/60-keyboard.hwdb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hwdb.d/60-keyboard.hwdb b/hwdb.d/60-keyboard.hwdb index 771b7dc43e477..99006885f076f 100644 --- a/hwdb.d/60-keyboard.hwdb +++ b/hwdb.d/60-keyboard.hwdb @@ -1605,6 +1605,10 @@ evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMicro-Star*:pn*:* evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMICRO-STAR*:pnGF63*:* KEYBOARD_KEY_85=touchpad_toggle # Toggle touchpad, sends meta+ctrl+toggle +# MSI GS66 Stealth toggles touchpad using Fn+F3 where the keyboard key is 76 +evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMICRO-STAR*:pn*Stealth GS66*:* + KEYBOARD_KEY_76=touchpad_toggle # Toggle touchpad + evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMICRO-STAR*:pnGE60*:* evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMICRO-STAR*:pnGE70*:* KEYBOARD_KEY_c2=ejectcd From e53fd6c8488d73abbdcd6db03163e2908b6587a3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 10:59:56 +0000 Subject: [PATCH 083/242] build(deps): bump systemd/mkosi Bumps [systemd/mkosi](https://github.com/systemd/mkosi) from 66d51024b7149f40be4702e84275c936373ace97 to 9a28ad20bbea61894ea7b971d318a71f4374cf3b. - [Release notes](https://github.com/systemd/mkosi/releases) - [Commits](https://github.com/systemd/mkosi/compare/66d51024b7149f40be4702e84275c936373ace97...9a28ad20bbea61894ea7b971d318a71f4374cf3b) --- updated-dependencies: - dependency-name: systemd/mkosi dependency-version: 9a28ad20bbea61894ea7b971d318a71f4374cf3b dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/workflows/coverage.yml | 2 +- .github/workflows/linter.yml | 2 +- .github/workflows/mkosi.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index c2b9493f6d8ba..ef366657fc9f7 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd with: persist-credentials: false - - uses: systemd/mkosi@66d51024b7149f40be4702e84275c936373ace97 + - uses: systemd/mkosi@9a28ad20bbea61894ea7b971d318a71f4374cf3b # Freeing up disk space with rm -rf can take multiple minutes. Since we don't need the extra free space # immediately, we remove the files in the background. However, we first move them to a different location diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 775b4f3f9d6fd..070f9b814e125 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -40,7 +40,7 @@ jobs: GITHUB_ACTIONS_CONFIG_FILE: actionlint.yml ENABLE_GITHUB_PULL_REQUEST_SUMMARY_COMMENT: false - - uses: systemd/mkosi@66d51024b7149f40be4702e84275c936373ace97 + - uses: systemd/mkosi@9a28ad20bbea61894ea7b971d318a71f4374cf3b - name: Check that tabs are not used in Python code run: sh -c '! git grep -P "\\t" -- src/core/generate-bpf-delegate-configs.py src/boot/generate-hwids-section.py src/ukify/ukify.py test/integration-tests/integration-test-wrapper.py' diff --git a/.github/workflows/mkosi.yml b/.github/workflows/mkosi.yml index 859e50a34ccc8..6c734cbdae877 100644 --- a/.github/workflows/mkosi.yml +++ b/.github/workflows/mkosi.yml @@ -169,7 +169,7 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd with: persist-credentials: false - - uses: systemd/mkosi@66d51024b7149f40be4702e84275c936373ace97 + - uses: systemd/mkosi@9a28ad20bbea61894ea7b971d318a71f4374cf3b # Freeing up disk space with rm -rf can take multiple minutes. Since we don't need the extra free space # immediately, we remove the files in the background. However, we first move them to a different location From 7d32d1227984d53cfd8de2c0307b44f54b51cd65 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 10:55:59 +0000 Subject: [PATCH 084/242] build(deps): bump meson from 1.10.2 to 1.11.1 in /.github/workflows Bumps [meson](https://github.com/mesonbuild/meson) from 1.10.2 to 1.11.1. - [Release notes](https://github.com/mesonbuild/meson/releases) - [Commits](https://github.com/mesonbuild/meson/compare/1.10.2...1.11.1) --- updated-dependencies: - dependency-name: meson dependency-version: 1.11.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/requirements.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt index 95f1bf1a6a5ad..6e412a6318a23 100644 --- a/.github/workflows/requirements.txt +++ b/.github/workflows/requirements.txt @@ -1,6 +1,5 @@ -meson==1.10.2 \ - --hash=sha256:5f84ef186e6e788d9154db63620fc61b3ece69f643b94b43c8b9203c43d89b36 \ - --hash=sha256:7890287d911dd4ee1ebd0efb61ed0321bfcd87c725df923a837cf90c6508f96b +meson==1.11.1 \ + --hash=sha256:9b3a023657e393dbc5335b95c561337d49b7a458f5541e47ec44f2cc566e0d80 ninja==1.13.0 \ --hash=sha256:11be2d22027bde06f14c343f01d31446747dbb51e72d00decca2eb99be911e2f \ --hash=sha256:1c97223cdda0417f414bf864cfb73b72d8777e57ebb279c5f6de368de0062988 \ From a3502284def364235a9c31b016738607a75ddddc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 10:59:43 +0000 Subject: [PATCH 085/242] build(deps): bump the actions group with 6 updates Bumps the actions group with 6 updates: | Package | From | To | | --- | --- | --- | | [github/codeql-action](https://github.com/github/codeql-action) | `4.32.4` | `4.35.2` | | [actions/github-script](https://github.com/actions/github-script) | `8.0.0` | `9.0.0` | | [aws-actions/configure-aws-credentials](https://github.com/aws-actions/configure-aws-credentials) | `6.0.0` | `6.1.0` | | [redhat-plumbers-in-action/gather-pull-request-metadata](https://github.com/redhat-plumbers-in-action/gather-pull-request-metadata) | `1.9.0` | `1.9.1` | | [super-linter/super-linter](https://github.com/super-linter/super-linter) | `8.5.0` | `8.6.0` | | [softprops/action-gh-release](https://github.com/softprops/action-gh-release) | `2.6.1` | `3.0.0` | Updates `github/codeql-action` from 4.32.4 to 4.35.2 - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/89a39a4e59826350b863aa6b6252a07ad50cf83e...95e58e9a2cdfd71adc6e0353d5c52f41a045d225) Updates `actions/github-script` from 8.0.0 to 9.0.0 - [Release notes](https://github.com/actions/github-script/releases) - [Commits](https://github.com/actions/github-script/compare/ed597411d8f924073f98dfc5c65a23a2325f34cd...3a2844b7e9c422d3c10d287c895573f7108da1b3) Updates `aws-actions/configure-aws-credentials` from 6.0.0 to 6.1.0 - [Release notes](https://github.com/aws-actions/configure-aws-credentials/releases) - [Changelog](https://github.com/aws-actions/configure-aws-credentials/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-actions/configure-aws-credentials/compare/8df5847569e6427dd6c4fb1cf565c83acfa8afa7...ec61189d14ec14c8efccab744f656cffd0e33f37) Updates `redhat-plumbers-in-action/gather-pull-request-metadata` from 1.9.0 to 1.9.1 - [Release notes](https://github.com/redhat-plumbers-in-action/gather-pull-request-metadata/releases) - [Commits](https://github.com/redhat-plumbers-in-action/gather-pull-request-metadata/compare/b86d1eaf7038cf88a56b26ba3e504f10e07b0ce5...62fc85c7acd15db62a0bdf007c8dbeda86eaf3b6) Updates `super-linter/super-linter` from 8.5.0 to 8.6.0 - [Release notes](https://github.com/super-linter/super-linter/releases) - [Changelog](https://github.com/super-linter/super-linter/blob/main/CHANGELOG.md) - [Commits](https://github.com/super-linter/super-linter/compare/61abc07d755095a68f4987d1c2c3d1d64408f1f9...9e863354e3ff62e0727d37183162c4a88873df41) Updates `softprops/action-gh-release` from 2.6.1 to 3.0.0 - [Release notes](https://github.com/softprops/action-gh-release/releases) - [Changelog](https://github.com/softprops/action-gh-release/blob/master/CHANGELOG.md) - [Commits](https://github.com/softprops/action-gh-release/compare/153bb8e04406b158c6c84fc1615b65b24149a1fe...b4309332981a82ec1c5618f44dd2e27cc8bfbfda) --- updated-dependencies: - dependency-name: github/codeql-action dependency-version: 4.35.2 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: actions/github-script dependency-version: 9.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions - dependency-name: aws-actions/configure-aws-credentials dependency-version: 6.1.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: redhat-plumbers-in-action/gather-pull-request-metadata dependency-version: 1.9.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions - dependency-name: super-linter/super-linter dependency-version: 8.6.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: softprops/action-gh-release dependency-version: 3.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions ... Signed-off-by: dependabot[bot] --- .github/workflows/cifuzz.yml | 2 +- .github/workflows/claude-review.yml | 6 +++--- .github/workflows/gather-pr-metadata.yml | 2 +- .github/workflows/labeler.yml | 6 +++--- .github/workflows/linter.yml | 2 +- .github/workflows/make-release.yml | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index d352b2c7b4028..41f06cf8f1a56 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -67,7 +67,7 @@ jobs: path: ./out/artifacts - name: Upload Sarif if: always() && steps.build.outcome == 'success' - uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e + uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 with: # Path to SARIF file relative to the root of the repository sarif_file: cifuzz-sarif/results.sarif diff --git a/.github/workflows/claude-review.yml b/.github/workflows/claude-review.yml index 6e0c32d6aec30..516a4087b52e9 100644 --- a/.github/workflows/claude-review.yml +++ b/.github/workflows/claude-review.yml @@ -67,7 +67,7 @@ jobs: - name: Fetch PR context and create tracking comment id: context - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 with: script: | const owner = context.repo.owner; @@ -179,7 +179,7 @@ jobs: sudo apt-get update && sudo apt-get install -y bubblewrap socat - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 + uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 with: role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ secrets.AWS_ROLE_NAME }} role-session-name: GitHubActions-Claude-${{ github.run_id }} @@ -417,7 +417,7 @@ jobs: name: review-result.json - name: Post review comments - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 env: REVIEW_RESULT: ${{ needs.review.result }} PR_NUMBER: ${{ needs.setup.outputs.pr_number }} diff --git a/.github/workflows/gather-pr-metadata.yml b/.github/workflows/gather-pr-metadata.yml index 2ae9a098a6949..6f325c779d8ad 100644 --- a/.github/workflows/gather-pr-metadata.yml +++ b/.github/workflows/gather-pr-metadata.yml @@ -22,7 +22,7 @@ jobs: - id: metadata name: Gather Pull Request Metadata - uses: redhat-plumbers-in-action/gather-pull-request-metadata@b86d1eaf7038cf88a56b26ba3e504f10e07b0ce5 + uses: redhat-plumbers-in-action/gather-pull-request-metadata@62fc85c7acd15db62a0bdf007c8dbeda86eaf3b6 - name: Upload Pull Request Metadata artifact uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 48d926a62b9a4..022d499f7f526 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -44,7 +44,7 @@ jobs: sync-labels: false - name: Set or remove labels based on systemd development workflow - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 if: startsWith(github.event_name, 'pull_request') && github.event.action != 'closed' && !github.event.pull_request.draft with: script: | @@ -85,7 +85,7 @@ jobs: } - name: Add please-review label on command in issue comment - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 if: github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/please-review') with: script: | @@ -97,7 +97,7 @@ jobs: }) - name: Remove specific labels when PR is closed or merged - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 if: startsWith(github.event_name, 'pull_request') && github.event.action == 'closed' with: script: | diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 070f9b814e125..5e9b83b08edec 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -30,7 +30,7 @@ jobs: persist-credentials: false - name: Lint Code Base - uses: super-linter/super-linter/slim@61abc07d755095a68f4987d1c2c3d1d64408f1f9 + uses: super-linter/super-linter/slim@9e863354e3ff62e0727d37183162c4a88873df41 env: DEFAULT_BRANCH: main MULTI_STATUS: false diff --git a/.github/workflows/make-release.yml b/.github/workflows/make-release.yml index 3aa169f55ad5c..170d5d49d294e 100644 --- a/.github/workflows/make-release.yml +++ b/.github/workflows/make-release.yml @@ -18,7 +18,7 @@ jobs: steps: - name: Release - uses: softprops/action-gh-release@153bb8e04406b158c6c84fc1615b65b24149a1fe + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda with: prerelease: ${{ contains(github.ref_name, '-rc') }} draft: ${{ github.repository == 'systemd/systemd' }} From 26cba7ffee4aa19e003da704be3362ca2a8be966 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 2 May 2026 23:18:22 +0100 Subject: [PATCH 086/242] test: make varlink StartTransient checks compatible with jq 1.6 The new "varlinkctl --more StartTransient" subtest pipes a JSON-SEQ stream of multiple records into "jq --seq -e ...". CentOS 9 ships jq 1.6, where -e only inspects the last input record's output: when the trailing record (the final reply) doesn't match the "select()" filter, jq exits non-zero even though earlier records match, so the test fails. Use --slurp which collapses the records into an array first and returns a single bool. Follow-up for 1cde1cc3bab595fe7b4e2befbfa08a01a172db0f --- test/units/TEST-26-SYSTEMCTL.sh | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/test/units/TEST-26-SYSTEMCTL.sh b/test/units/TEST-26-SYSTEMCTL.sh index 32842c1e90e64..ed030031d26cf 100755 --- a/test/units/TEST-26-SYSTEMCTL.sh +++ b/test/units/TEST-26-SYSTEMCTL.sh @@ -552,32 +552,35 @@ result=$(varlinkctl call "$MANAGER_SOCKET" io.systemd.Unit.StartTransient \ echo "$result" | grep '"context"' >/dev/null # Streaming with notifyJobChanges: should get intermediate state updates and a final result +# Note: use --slurp + any() rather than 'select() -e' because in jq 1.6 (shipped on +# CentOS 9) -e checks only the last input record's output, so a select() that filters +# out the trailing record makes jq exit non-zero even when earlier records match. defer_transient_cleanup varlink-transient-test3.service result=$(varlinkctl call --more "$MANAGER_SOCKET" io.systemd.Unit.StartTransient \ '{"context":{"ID":"varlink-transient-test3.service","Service":{"Type":"oneshot","ExecStart":[{"path":"/bin/true"}]}},"notifyJobChanges":true}') -printf '%s' "$result" | jq --seq -e 'select(.job.State == "waiting")' >/dev/null -printf '%s' "$result" | jq --seq -e 'select(.job.Result == "done")' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .job.State == "waiting")' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .job.Result == "done")' >/dev/null # Fire-and-forget: --more without notify flags should return immediately with context+runtime defer_transient_cleanup varlink-transient-fireforget.service result=$(varlinkctl call --more "$MANAGER_SOCKET" io.systemd.Unit.StartTransient \ '{"context":{"ID":"varlink-transient-fireforget.service","Service":{"Type":"oneshot","ExecStart":[{"path":"/bin/true"}]}}}') -printf '%s' "$result" | jq --seq -e 'select(.context)' >/dev/null -printf '%s' "$result" | jq --seq -e 'select(.runtime)' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .context)' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .runtime)' >/dev/null # Streaming with notifyUnitChanges: should get unit state change notifications defer_transient_cleanup varlink-transient-unitnotify.service result=$(varlinkctl call --more "$MANAGER_SOCKET" io.systemd.Unit.StartTransient \ '{"context":{"ID":"varlink-transient-unitnotify.service","Service":{"Type":"oneshot","ExecStart":[{"path":"/bin/true"}]}},"notifyUnitChanges":true}') -printf '%s' "$result" | jq --seq -e 'select(.runtime.ActiveState)' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .runtime.ActiveState)' >/dev/null # Streaming with both notifyJobChanges and notifyUnitChanges defer_transient_cleanup varlink-transient-both.service result=$(varlinkctl call --more "$MANAGER_SOCKET" io.systemd.Unit.StartTransient \ '{"context":{"ID":"varlink-transient-both.service","Service":{"Type":"oneshot","ExecStart":[{"path":"/bin/true"}]}},"notifyJobChanges":true,"notifyUnitChanges":true}') -printf '%s' "$result" | jq --seq -e 'select(.job.State)' >/dev/null -printf '%s' "$result" | jq --seq -e 'select(.runtime.ActiveState)' >/dev/null -printf '%s' "$result" | jq --seq -e 'select(.job.Result == "done")' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .job.State)' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .runtime.ActiveState)' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .job.Result == "done")' >/dev/null # prepare for the error case below: create a long-running service, then try to create it again while it's active defer_transient_cleanup varlink-transient-exists.service @@ -589,7 +592,7 @@ timeout 10 bash -c 'until systemctl is-active varlink-transient-exists.service; defer_transient_cleanup varlink-transient-multi.service result=$(varlinkctl call --more "$MANAGER_SOCKET" io.systemd.Unit.StartTransient \ '{"context":{"ID":"varlink-transient-multi.service","Service":{"Type":"oneshot","ExecStart":[{"path":"/bin/true"},{"path":"/bin/true"}]}},"notifyJobChanges":true}') -printf '%s' "$result" | jq --seq -e 'select(.job.Result == "done")' >/dev/null +printf '%s' "$result" | jq --seq --slurp -e 'any(.[]; .job.Result == "done")' >/dev/null # Transient service with Description and RemainAfterExit defer_transient_cleanup varlink-transient-desc.service From afa4a559e113a0b7c570806ae655c035ae962b7d Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 2 May 2026 23:46:46 +0100 Subject: [PATCH 087/242] test: avoid getting stuck on /dev/fuse On Fedora Rawhide checking /dev/fuse in the test is getting stuck and timing out: [ 47.930104] TEST-13-NSPAWN.sh[2588]: + testcase_fuse [ 47.930424] TEST-13-NSPAWN.sh[2589]: ++ cat [ 58.772538] audit: type=1131 audit(1777728357.726:778): pid=1 uid=0 auid=4294967295 ses=4294967295 msg='unit=systemd-importd comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=success' [ 901.882562] audit: type=1131 audit(1777729200.830:782): pid=1 uid=0 auid=4294967295 ses=4294967295 msg='unit=systemd-tmpfiles-clean comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=success' Finishing after writing 176921 entries qemu-system-x86_64: terminating on signal 15 from pid 70717 (/usr/bin/python3) Wrap it with a timeout to avoid getting stuck forever Follow-up for dc3223919f663b7c8b8d8d1d6072b4487df7709b --- test/units/TEST-13-NSPAWN.nspawn.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/units/TEST-13-NSPAWN.nspawn.sh b/test/units/TEST-13-NSPAWN.nspawn.sh index 47c19f08c01f2..0332a12f64665 100755 --- a/test/units/TEST-13-NSPAWN.nspawn.sh +++ b/test/units/TEST-13-NSPAWN.nspawn.sh @@ -1350,7 +1350,10 @@ testcase_unpriv() { } testcase_fuse() { - if [[ "$(cat <>/dev/fuse 2>&1)" != 'cat: -: Operation not permitted' ]]; then + # On some kernels reading from /dev/fuse without an attached connection blocks indefinitely + # rather than returning EPERM, so guard the probe with a short timeout and skip the test + # whenever we don't get the expected error string. + if [[ "$(timeout --foreground 5 cat <>/dev/fuse 2>&1)" != 'cat: -: Operation not permitted' ]]; then echo "FUSE is not supported, skipping the test..." return 0 fi @@ -1381,7 +1384,7 @@ testcase_fuse() { # "cat: -: Operation not permitted" # pass the test; opened but not read # "bash: line 1: /dev/fuse: Operation not permitted" # fail the test; could not open # "" # fail the test; reading worked - [[ "$(systemd-nspawn --register=no --pipe --directory="$root" \ + [[ "$(timeout --foreground 30 systemd-nspawn --register=no --pipe --directory="$root" \ bash -c 'cat <>/dev/fuse' 2>&1)" == 'cat: -: Operation not permitted' ]] rm -fr "$root" @@ -1390,7 +1393,7 @@ testcase_fuse() { testcase_unpriv_fuse() { # Same as above, but for unprivileged operation. - if [[ "$(cat <>/dev/fuse 2>&1)" != 'cat: -: Operation not permitted' ]]; then + if [[ "$(timeout --foreground 5 cat <>/dev/fuse 2>&1)" != 'cat: -: Operation not permitted' ]]; then echo "FUSE is not supported, skipping the test..." return 0 fi @@ -1409,7 +1412,7 @@ testcase_unpriv_fuse() { create_dummy_ddi "$tmpdir" "$name" chown --recursive testuser: "$tmpdir" - [[ "$(run0 -u testuser --pipe systemd-run \ + [[ "$(timeout --foreground 60 run0 -u testuser --pipe systemd-run \ --user \ --pipe \ --property=Delegate=yes \ From 378d6bd9c68f7085448975ae9e95d20e37c6390b Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 3 May 2026 00:06:56 +0100 Subject: [PATCH 088/242] test: fix json encoding issue due to backslashes TEST-74-AUX-UTILS.sh[3782]: + varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List '{"name": "proc-sysrq\x2dtrigger.mount"}' TEST-74-AUX-UTILS.sh[3783]: + jq -e .context.Mount TEST-74-AUX-UTILS.sh[3782]: Failed to parse parameters at :1:10: Invalid argument Use jq to encode the input --- test/units/TEST-74-AUX-UTILS.varlinkctl.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/units/TEST-74-AUX-UTILS.varlinkctl.sh b/test/units/TEST-74-AUX-UTILS.varlinkctl.sh index 9a22757067f24..3e6d4a9a257e6 100755 --- a/test/units/TEST-74-AUX-UTILS.varlinkctl.sh +++ b/test/units/TEST-74-AUX-UTILS.varlinkctl.sh @@ -235,13 +235,17 @@ varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List '{"pid": {" # test for AutomountContext/Runtime automount_id=$(varlinkctl call --collect /run/systemd/io.systemd.Manager io.systemd.Unit.List '{}' | jq -r '.[] | select(.context.Type == "automount" and .runtime.LoadState == "loaded") .context.ID' | grep -v null | tail -n 1) test -n "$automount_id" -varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"name\": \"$automount_id\"}" | jq -e '.context.Automount' -varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"name\": \"$automount_id\"}" | jq -e '.runtime.Automount' +# Use jq to JSON-encode the unit name as it may contain backslash escapes (e.g. \x2d) that +# are not valid JSON escape sequences and would be rejected by varlinkctl's JSON parser. +automount_params=$(jq -cn --arg name "$automount_id" '{name: $name}') +varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "$automount_params" | jq -e '.context.Automount' +varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "$automount_params" | jq -e '.runtime.Automount' # test for MountContext/Runtime mount_id=$(varlinkctl call --collect /run/systemd/io.systemd.Manager io.systemd.Unit.List '{}' | jq -r '.[] | select(.context.Type == "mount" and .runtime.LoadState == "loaded") .context.ID' | grep -v null | tail -n 1) test -n "$mount_id" -varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"name\": \"$mount_id\"}" | jq -e '.context.Mount' -varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "{\"name\": \"$mount_id\"}" | jq -e '.runtime.Mount' +mount_params=$(jq -cn --arg name "$mount_id" '{name: $name}') +varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "$mount_params" | jq -e '.context.Mount' +varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Unit.List "$mount_params" | jq -e '.runtime.Mount' # test io.systemd.Metrics varlinkctl info /run/systemd/report/io.systemd.Manager From 8fbc93345e50d4541600e1ec9e633d4c33f7d787 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 3 May 2026 13:27:32 +0100 Subject: [PATCH 089/242] test: bump device timeout to 300s for TPM2 tests Booting with TPM2 has become slower recently so tests are randomly failing, try to bump the default device timeout in those test VMs --- test/integration-tests/TEST-70-TPM2/meson.build | 3 +++ test/integration-tests/TEST-86-MULTI-PROFILE-UKI/meson.build | 3 +++ 2 files changed, 6 insertions(+) diff --git a/test/integration-tests/TEST-70-TPM2/meson.build b/test/integration-tests/TEST-70-TPM2/meson.build index bf66f8f73e3ce..5932215ceb6f3 100644 --- a/test/integration-tests/TEST-70-TPM2/meson.build +++ b/test/integration-tests/TEST-70-TPM2/meson.build @@ -10,5 +10,8 @@ integration_tests += [ 'vm' : true, 'firmware' : 'auto', 'tpm' : true, + 'cmdline' : integration_test_template['cmdline'] + [ + 'systemd.default_device_timeout_sec=300', + ], }, ] diff --git a/test/integration-tests/TEST-86-MULTI-PROFILE-UKI/meson.build b/test/integration-tests/TEST-86-MULTI-PROFILE-UKI/meson.build index 51a70970906b9..acc3512e33d7e 100644 --- a/test/integration-tests/TEST-86-MULTI-PROFILE-UKI/meson.build +++ b/test/integration-tests/TEST-86-MULTI-PROFILE-UKI/meson.build @@ -7,5 +7,8 @@ integration_tests += [ 'vm' : true, 'firmware' : 'auto', 'tpm' : true, + 'cmdline' : integration_test_template['cmdline'] + [ + 'systemd.default_device_timeout_sec=300', + ], }, ] From 2fc008b9e437065c5e33b98cc7410dc91f555564 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 3 May 2026 16:23:41 +0100 Subject: [PATCH 090/242] test: make TEST-70-TPM2 robust against reruns The test leaves a lot of state around, and when the test is re-run, for example due to the qemu bug that makes a VM reboot instead of shutting down, it fails. Do more cleanups in the traps. [ 162.642175] TEST-70-TPM2.sh[2815]: Calculated public key name: 000b2b66edc3a466e81059286aaf38d09ea42a7a9dcdf6ba3b664c62f0cae4ce4f66 [ 162.642628] TEST-70-TPM2.sh[2815]: PolicyAuthorize calculated digest: 2caa740101f65734d50395d6abc64fa46015d40d1f5de239434578544e592a92 [ 162.643681] TEST-70-TPM2.sh[2815]: Calculated NV index name: 000b439cfa1534815bbe8d33b80c56f5a8d17d36fe94a7782b23a37b50def5fc5eaa [ 162.645111] TEST-70-TPM2.sh[2815]: PolicyAuthorizeNV calculated digest: 69ee0e89fafe6b9df2cd6a5defbf74aa46cf6d92703e645d463549da4ba5e1a4 [ 162.645407] TEST-70-TPM2.sh[2815]: Combined signed PCR policies and pcrlock policies cannot be calculated offline, currently. [ 162.649576] TEST-70-TPM2.sh[2815]: Releasing crypt device /dev/loop0 context. [ 162.652433] TEST-70-TPM2.sh[2815]: Releasing device-mapper backend. [ 162.653518] TEST-70-TPM2.sh[2815]: Closing read only fd for /dev/loop0. [ 162.654359] TEST-70-TPM2.sh[2815]: Closing read write fd for /dev/loop0. [ 162.654786] TEST-70-TPM2.sh[2815]: Failed to encrypt device: Operation not supported Fixes https://github.com/systemd/systemd/issues/38241 --- test/units/TEST-70-TPM2.creds.sh | 8 ++++++-- test/units/TEST-70-TPM2.cryptenroll.sh | 6 ++++++ test/units/TEST-70-TPM2.cryptsetup.sh | 13 +++++++++++++ test/units/TEST-70-TPM2.measure.sh | 25 +++++++++++++++++++++++++ test/units/TEST-70-TPM2.nvpcr.sh | 2 +- test/units/TEST-70-TPM2.pcrextend.sh | 10 ++++++++++ test/units/TEST-70-TPM2.pcrlock.sh | 21 ++++++++++++++++++++- 7 files changed, 81 insertions(+), 4 deletions(-) diff --git a/test/units/TEST-70-TPM2.creds.sh b/test/units/TEST-70-TPM2.creds.sh index 15899d1057899..53ff862e18cd5 100755 --- a/test/units/TEST-70-TPM2.creds.sh +++ b/test/units/TEST-70-TPM2.creds.sh @@ -5,6 +5,12 @@ set -o pipefail export SYSTEMD_LOG_LEVEL=debug +at_exit() { + rm -f /tmp/testdata /tmp/testdata.encrypted +} + +trap at_exit EXIT + # Ensure that sandboxing doesn't stop creds from being accessible echo "test" >/tmp/testdata systemd-creds encrypt /tmp/testdata /tmp/testdata.encrypted --with-key=tpm2 @@ -12,5 +18,3 @@ systemd-creds encrypt /tmp/testdata /tmp/testdata.encrypted --with-key=tpm2 systemd-run -p PrivateDevices=yes -p LoadCredentialEncrypted=testdata.encrypted:/tmp/testdata.encrypted --pipe --wait systemd-creds cat testdata.encrypted | cmp - /tmp/testdata # SetCredentialEncrypted systemd-run -p PrivateDevices=yes -p SetCredentialEncrypted=testdata.encrypted:"$(cat /tmp/testdata.encrypted)" --pipe --wait systemd-creds cat testdata.encrypted | cmp - /tmp/testdata - -rm -f /tmp/testdata diff --git a/test/units/TEST-70-TPM2.cryptenroll.sh b/test/units/TEST-70-TPM2.cryptenroll.sh index d09f702093681..07309429d749d 100755 --- a/test/units/TEST-70-TPM2.cryptenroll.sh +++ b/test/units/TEST-70-TPM2.cryptenroll.sh @@ -11,6 +11,12 @@ cryptenroll_wipe_and_check() {( grep -qE "Wiped slot [[:digit:]]+" /tmp/cryptenroll.out )} +at_exit() { + rm -f "${IMAGE:-}" /tmp/cryptenroll.out /tmp/password +} + +trap at_exit EXIT + # There is an external issue with libcryptsetup on ppc64 that hits 95% of Ubuntu ppc64 test runs, so skip it if [[ "$(uname -m)" == "ppc64le" ]]; then echo "Skipping systemd-cryptenroll tests on ppc64le, see https://github.com/systemd/systemd/issues/27716" diff --git a/test/units/TEST-70-TPM2.cryptsetup.sh b/test/units/TEST-70-TPM2.cryptsetup.sh index 24c87d0f2495c..5a7f0facfcc0c 100755 --- a/test/units/TEST-70-TPM2.cryptsetup.sh +++ b/test/units/TEST-70-TPM2.cryptsetup.sh @@ -31,10 +31,23 @@ tpm_check_failure_with_wrong_pin() { } at_exit() { + set +e + + umount /tmp/dditest.mnt + systemd-cryptsetup detach test-volume + systemd-cryptsetup detach dditest + # Evict the TPM primary key that we persisted if [[ -n "${PERSISTENT_HANDLE:-}" ]]; then tpm2_evictcontrol -c "$PERSISTENT_HANDLE" fi + + if [[ -n "${DEVICE:-}" ]]; then + systemd-dissect --detach "$DEVICE" + fi + + rm -rf /tmp/dditest /tmp/dditest.mnt + rm -f /tmp/dditest.raw "${IMAGE:-}" "${PRIMARY:-}" /tmp/passphrase /tmp/pcr.dat /tmp/srk.pub /tmp/srk2.pub } trap at_exit EXIT diff --git a/test/units/TEST-70-TPM2.measure.sh b/test/units/TEST-70-TPM2.measure.sh index 30fa51e52137c..90d6390da0964 100755 --- a/test/units/TEST-70-TPM2.measure.sh +++ b/test/units/TEST-70-TPM2.measure.sh @@ -14,6 +14,31 @@ if [[ ! -x "${SD_MEASURE:?}" ]]; then exit 0 fi +at_exit() { + set +e + + systemd-cryptsetup detach test-volume2 + rm -f "${IMAGE:-}" \ + /tmp/passphrase \ + /tmp/pcrsign-private.pem \ + /tmp/pcrsign-public.pem \ + /tmp/pcrsign.sig \ + /tmp/pcrsign.sig2 \ + /tmp/pcrsign.sig3 \ + /tmp/pcrsign.sig4 \ + /tmp/pcrsign.sig5 \ + /tmp/pcrsign.sig6 \ + /tmp/pcrsign.sig7 \ + /tmp/pcrtestdata \ + /tmp/pcrtestdata.encrypted \ + /tmp/result \ + /tmp/result.json \ + /tmp/tpmdata1 \ + /tmp/tpmdata2 +} + +trap at_exit EXIT + IMAGE="$(mktemp /tmp/systemd-measure-XXX.image)" echo HALLO >/tmp/tpmdata1 diff --git a/test/units/TEST-70-TPM2.nvpcr.sh b/test/units/TEST-70-TPM2.nvpcr.sh index 29319e601aced..571b3eea770b3 100755 --- a/test/units/TEST-70-TPM2.nvpcr.sh +++ b/test/units/TEST-70-TPM2.nvpcr.sh @@ -21,7 +21,7 @@ at_exit() { fi rm -rf /run/nvpcr /tmp/nvpcr - rm -f /var/tmp/nvpcr.raw /run/verity.d/test-79-nvpcr.crt + rm -f /var/tmp/nvpcr.raw /run/verity.d/test-70-nvpcr.crt /run/systemd/nvpcr/test.anchor } trap at_exit EXIT diff --git a/test/units/TEST-70-TPM2.pcrextend.sh b/test/units/TEST-70-TPM2.pcrextend.sh index 14808f07637bd..ec330576b2531 100755 --- a/test/units/TEST-70-TPM2.pcrextend.sh +++ b/test/units/TEST-70-TPM2.pcrextend.sh @@ -19,6 +19,16 @@ at_exit() { # Dump the event log on fail, to make debugging a bit easier jq --seq --slurp Date: Sun, 3 May 2026 16:33:38 +0100 Subject: [PATCH 091/242] test: make TEST-86-MULTI-PROFILE-UKI robust against reruns When qemu reboots instead of shutting down after the last iteration, the profile is already set to profile2 but the /root/encrypted.raw is gone so the test fails. Reset the default boot entry at the end of the test to make it robust against reruns. Fixes https://github.com/systemd/systemd/issues/39553 --- test/units/TEST-86-MULTI-PROFILE-UKI.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/units/TEST-86-MULTI-PROFILE-UKI.sh b/test/units/TEST-86-MULTI-PROFILE-UKI.sh index 5518cb656149f..72fb7b6f554af 100755 --- a/test/units/TEST-86-MULTI-PROFILE-UKI.sh +++ b/test/units/TEST-86-MULTI-PROFILE-UKI.sh @@ -64,6 +64,9 @@ elif [[ "$ID" == "profile1" ]]; then elif [[ "$ID" == "profile2" ]]; then grep testprofile2=1 /proc/cmdline rm /root/encrypted.raw + # Reset the default boot entry so a subsequent re-run of the test does not + # boot straight back into @profile2 (where encrypted.raw is now gone) and fail. + bootctl set-default "" else exit 1 fi From 5580db885c08b45bec5f665cbb0b114fa2538736 Mon Sep 17 00:00:00 2001 From: Valentin David Date: Sat, 18 Apr 2026 15:09:00 +0200 Subject: [PATCH 092/242] boot: Try to load UKI from simple filesystem before LoadImage When the source buffer is NULL, the firmware is supposed to try to load the UKI with simple filesystem protocol then load file 2 protocol. But it seems on some versions of AMI, it does not use simple filesystem protocol, and then fails to load if the ESP was loaded from an El Torito boot catalog. Trying to load the source buffer from the simple filesystem protocol protocols seems work around this limitation. Shim for example, also loads the source buffer before calling LoadImage. So it seems to be a safe thing to do. We could also maybe in the future use load file 2 protocol if simple filesystem failed in the first place. --- src/boot/shim.c | 33 ++++++++++++--------------------- src/boot/util.c | 26 ++++++++++++++++++++++++++ src/boot/util.h | 1 + 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/boot/shim.c b/src/boot/shim.c index 97410d6669659..10a0642ac3b9f 100644 --- a/src/boot/shim.c +++ b/src/boot/shim.c @@ -8,7 +8,6 @@ * https://github.com/mjg59/efitools */ -#include "device-path-util.h" #include "efi-efivars.h" #include "secure-boot.h" #include "shim.h" @@ -56,24 +55,7 @@ static bool shim_validate( if (!device_path) return false; - EFI_HANDLE device_handle; - EFI_DEVICE_PATH *file_dp = (EFI_DEVICE_PATH *) device_path; - err = BS->LocateDevicePath( - MAKE_GUID_PTR(EFI_SIMPLE_FILE_SYSTEM_PROTOCOL), &file_dp, &device_handle); - if (err != EFI_SUCCESS) - return false; - - _cleanup_file_close_ EFI_FILE *root = NULL; - err = open_volume(device_handle, &root); - if (err != EFI_SUCCESS) - return false; - - _cleanup_free_ char16_t *dp_str = NULL; - err = device_path_to_str(file_dp, &dp_str); - if (err != EFI_SUCCESS) - return false; - - err = file_read(root, dp_str, 0, 0, &file_buffer_owned, &file_size); + err = load_file_from_simple_filesystem(device_path, &file_buffer_owned, &file_size); if (err != EFI_SUCCESS) return false; @@ -111,12 +93,21 @@ EFI_STATUS shim_load_image( if (have_shim) install_security_override(shim_validate, NULL); + _cleanup_free_ char *source_buffer = NULL; + size_t source_size = 0; + + /* For some AMI firmware, BS->LoadImage() does not read correctly when the file comes the ESP on an + * optical drive. But the simple filesystem protocol does work. So we try to load it. If that does + * not work, we let BS->LoadImage() try instead. + */ + (void) load_file_from_simple_filesystem(device_path, &source_buffer, &source_size); + EFI_STATUS ret = BS->LoadImage( /* BootPolicy= */ boot_policy, parent, (EFI_DEVICE_PATH *) device_path, - /* SourceBuffer= */ NULL, - /* SourceSize= */ 0, + source_buffer, + source_size, ret_image); if (have_shim) uninstall_security_override(); diff --git a/src/boot/util.c b/src/boot/util.c index c40a9aad65b0d..22981bc00db1f 100644 --- a/src/boot/util.c +++ b/src/boot/util.c @@ -195,6 +195,32 @@ EFI_STATUS file_read( return file_handle_read(handle, offset, size, ret, ret_size); } +EFI_STATUS load_file_from_simple_filesystem(const EFI_DEVICE_PATH *device_path, char **file_buffer, size_t *file_size) { + EFI_STATUS err; + EFI_HANDLE device_handle; + EFI_DEVICE_PATH *file_dp = (EFI_DEVICE_PATH *) device_path; + + assert(device_path); + assert(file_buffer); + assert(file_size); + + err = BS->LocateDevicePath(MAKE_GUID_PTR(EFI_SIMPLE_FILE_SYSTEM_PROTOCOL), &file_dp, &device_handle); + if (err != EFI_SUCCESS) + return err; + + _cleanup_file_close_ EFI_FILE *root = NULL; + err = open_volume(device_handle, &root); + if (err != EFI_SUCCESS) + return err; + + _cleanup_free_ char16_t *dp_str = NULL; + err = device_path_to_str(file_dp, &dp_str); + if (err != EFI_SUCCESS) + return err; + + return file_read(root, dp_str, 0, 0, file_buffer, file_size); +} + void set_attribute_safe(size_t attr) { /* Various UEFI implementations suppress color changes from a color to the same color. Often, we want * to force out the color change though, hence change the color here once, and then back. We simply diff --git a/src/boot/util.h b/src/boot/util.h index 2c8cc36ea580d..fa552d6f46c08 100644 --- a/src/boot/util.h +++ b/src/boot/util.h @@ -138,6 +138,7 @@ char16_t *mangle_stub_cmdline(char16_t *cmdline); EFI_STATUS chunked_read(EFI_FILE *file, size_t *size, void *buf); EFI_STATUS file_read(EFI_FILE *dir, const char16_t *name, uint64_t offset, size_t size, char **ret, size_t *ret_size); +EFI_STATUS load_file_from_simple_filesystem(const EFI_DEVICE_PATH *device_path, char **file_buffer, size_t *file_size); EFI_STATUS file_handle_read(EFI_FILE *handle, uint64_t offset, size_t size, char **ret, size_t *ret_size); static inline void file_closep(EFI_FILE **handle) { From 4b35847aa3395890986bf5f93236160e0cfd9f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 4 May 2026 10:10:13 +0200 Subject: [PATCH 093/242] man/sd-bus: add a note that tcp: is w/o encryption --- man/sd_bus_set_address.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/man/sd_bus_set_address.xml b/man/sd_bus_set_address.xml index 603f153221c80..0057cc6afe8b3 100644 --- a/man/sd_bus_set_address.xml +++ b/man/sd_bus_set_address.xml @@ -74,6 +74,11 @@ One or both of the host= and port= keys must be present, while the rest is optional. family may be either or . + + Note: connections over TCP are made without encryption. Thus, this mode + should only be used in specific situations where integrity and confidentiality of the connection is + not necessary or is ensured through some other means. For local connections, unix: + connections should be used instead. From bd9971cd25f452e164e6c5af798a73d16aadbbd9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Apr 2026 16:44:39 +0200 Subject: [PATCH 094/242] cryptsetup: avoid a segfault when a keyfile is passed along with a TPM device When a keyfile is passed with tpm2-device=, e.g., systemd-cryptsetup attach test_data /vol /my-pass tpm2-device=auto the logic in attach_luks_or_plain_or_bitlk_by_tpm2() tries to process it as a TPM blob first. This did not work properly because it passes n_blobs=1 to acquire_tpm2_key(), and the key_file is only read when n_blobs == 0. As a result, the code ends up calling tpm2_unseal(..., blobs=NULL, n_blobs=1, ...). Before commit 5c6aad9 ("cryptsetup-tokens: Print tpm2-primary-alg: only when it is known"), the segfault was not observed because tpm2_unseal() was bailing out early when primary_alg == 0. However, after that change, it attempts to process the blob (which is NULL) and crashes. Fix this logic by passing n_blobs=0 to acquire_tpm2_key() so that it actually reads the key_file. Additionally, assert 'blobs' in tpm2_unseal() as a safeguard. Fixes #41867 --- src/cryptsetup/cryptsetup.c | 2 +- src/shared/tpm2-util.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c index 2130c54047c04..bf313340bf6c2 100644 --- a/src/cryptsetup/cryptsetup.c +++ b/src/cryptsetup/cryptsetup.c @@ -2060,7 +2060,7 @@ static int attach_luks_or_plain_or_bitlk_by_tpm2( /* pcrlock_path= */ NULL, /* primary_alg= */ 0, key_file, arg_keyfile_size, arg_keyfile_offset, - key_data, /* n_blobs= */ 1, + key_data, /* n_blobs= */ iovec_is_set(key_data) ? 1 : 0, /* policy_hash= */ NULL, /* we don't know the policy hash */ /* n_policy_hash= */ 0, /* salt= */ NULL, diff --git a/src/shared/tpm2-util.c b/src/shared/tpm2-util.c index 9fe3e018693fc..091a4dba89ac3 100644 --- a/src/shared/tpm2-util.c +++ b/src/shared/tpm2-util.c @@ -5804,6 +5804,7 @@ int tpm2_unseal(Tpm2Context *c, int r; assert(n_blobs > 0); + assert(blobs); assert(iovec_is_valid(pubkey)); assert(ret_secret); From 4820d57eeec98385c25a47e427826f466590360a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Apr 2026 16:44:45 +0200 Subject: [PATCH 095/242] TEST-70-TPM2: Test the key_file + tpm2-device= combo When key_file is passed along with tpm2-device= to systemd-cryptsetup, the logic is to try the blob as a TPM blob first, and then fall back to trying the file as a regular key file. Check that this fallback works. --- test/units/TEST-70-TPM2.cryptsetup.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/units/TEST-70-TPM2.cryptsetup.sh b/test/units/TEST-70-TPM2.cryptsetup.sh index 5a7f0facfcc0c..a40f739689224 100755 --- a/test/units/TEST-70-TPM2.cryptsetup.sh +++ b/test/units/TEST-70-TPM2.cryptsetup.sh @@ -57,8 +57,9 @@ IMAGE="$(mktemp /tmp/systemd-cryptsetup-XXX.IMAGE)" truncate -s 20M "$IMAGE" echo -n passphrase >/tmp/passphrase +echo -n wrong_passphrase >/tmp/wrong_passphrase # Change file mode to avoid "/tmp/passphrase has 0644 mode that is too permissive" messages -chmod 0600 /tmp/passphrase +chmod 0600 /tmp/passphrase /tmp/wrong_passphrase cryptsetup luksFormat -q --pbkdf pbkdf2 --pbkdf-force-iterations 1000 --use-urandom "$IMAGE" /tmp/passphrase # Unlocking via keyfile @@ -237,4 +238,11 @@ EOF rmdir /tmp/dditest fi -rm -f "$IMAGE" "$PRIMARY" +# Key file can contain a TPM blob but in case it doesn't fallback should also work. +systemd-cryptsetup attach test-volume "$IMAGE" /tmp/passphrase tpm2-device=auto,headless=1 +systemd-cryptsetup detach test-volume + +# Negative test: invalid passphrase should not work. +(! systemd-cryptsetup attach test-volume "$IMAGE" /tmp/wrong_passphrase tpm2-device=auto,headless=1) + +rm -f "$IMAGE" "$PRIMARY" /tmp/passphrase /tmp/wrong_passphrase From 7e6507c43fd16ced45b0080aa8c1883ad62fe054 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 3 May 2026 23:31:59 +0100 Subject: [PATCH 096/242] test: fix flaky TEST-04-JOURNAL.reload.sh due to service name collision write_to_journal() was called via $(...) command substitution, so SERVICE_COUNTER++ ran in a subshell and never incremented in the parent: [ 1492.668302] TEST-04-JOURNAL.sh[15064]: + local service=test-0-18493.service [ 1492.725882] TEST-04-JOURNAL.sh[15064]: + local service=test-0-18009.service [ 1492.739643] TEST-04-JOURNAL.sh[15064]: + local service=test-0-18493.service [ 1492.774586] TEST-04-JOURNAL.sh[15064]: + local service=test-0-25540.service [ 1492.815664] TEST-04-JOURNAL.sh[15064]: + local service=test-0-15916.service [ 1492.867067] TEST-04-JOURNAL.sh[15064]: + local service=test-0-20327.service [ 1492.899077] TEST-04-JOURNAL.sh[15064]: + local service=test-0-86.service [ 1497.289715] TEST-04-JOURNAL.sh[15064]: + local service=test-0-10849.service [ 1497.335791] TEST-04-JOURNAL.sh[15064]: + local service=test-0-18009.service With 99999 possible unit names collisions are rare but not impossible, so every now and then a CI run fails. Have write_to_journal() set a global SERVICE_NAME variable instead and call it directly so SERVICE_COUNTER actually goes up through the test. --- test/units/TEST-04-JOURNAL.reload.sh | 42 +++++++++++++++------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/test/units/TEST-04-JOURNAL.reload.sh b/test/units/TEST-04-JOURNAL.reload.sh index 44003028aa292..a58c3de59f0fd 100755 --- a/test/units/TEST-04-JOURNAL.reload.sh +++ b/test/units/TEST-04-JOURNAL.reload.sh @@ -11,12 +11,12 @@ MACHINE_ID="$(persistent)" @@ -73,7 +74,8 @@ verify_journals "$VAL1" persistent : "Add entries in runtime" journalctl --relinquish -VAL2=$(write_to_journal) +write_to_journal +VAL2="$SERVICE_NAME" verify_journals "$VAL2" runtime : "Reload journald after relinquish (persistent->persistent)" @@ -84,13 +86,13 @@ verify_journals "$VAL1" persistent verify_journals "$VAL2" runtime : "Write new message and confirm it's written to runtime." -VAL=$(write_to_journal) -verify_journals "$VAL" runtime +write_to_journal +verify_journals "$SERVICE_NAME" runtime : "Flush and confirm that messages are written to system." journalctl --flush -VAL=$(write_to_journal) -verify_journals "$VAL" persistent +write_to_journal +verify_journals "$SERVICE_NAME" persistent # Test persistent->volatile cat </run/systemd/journald.conf.d/reload.conf @@ -100,16 +102,16 @@ EOF : "Confirm old message exists where it was written to persistent journal." systemctl reload systemd-journald.service -verify_journals "$VAL" persistent +verify_journals "$SERVICE_NAME" persistent : "Confirm that new message is written to runtime journal." -VAL=$(write_to_journal) -verify_journals "$VAL" runtime +write_to_journal +verify_journals "$SERVICE_NAME" runtime : "Test volatile works and logs are NOT getting written to system journal despite flush." journalctl --flush -VAL=$(write_to_journal) -verify_journals "$VAL" runtime +write_to_journal +verify_journals "$SERVICE_NAME" runtime : "Disable compression" cat </run/systemd/journald.conf.d/reload.conf @@ -154,8 +156,8 @@ if (( total_size > max_size )) && (( num_archived_journals > 0 )); then fi : "Write a message to runtime journal" -VAL=$(write_to_journal) -verify_journals "$VAL" runtime +write_to_journal +verify_journals "$SERVICE_NAME" runtime : "Reload volatile->persistent" cat </run/systemd/journald.conf.d/reload.conf @@ -165,15 +167,15 @@ EOF systemctl reload systemd-journald.service : "Confirm that previous message is still in runtime journal." -verify_journals "$VAL" runtime +verify_journals "$SERVICE_NAME" runtime : "Confirm that new messages are written to runtime journal." -VAL=$(write_to_journal) -verify_journals "$VAL" runtime +write_to_journal +verify_journals "$SERVICE_NAME" runtime : "Confirm that flushing writes to system journal." journalctl --flush -verify_journals "$VAL" persistent +verify_journals "$SERVICE_NAME" persistent : "Disable compression" cat </run/systemd/journald.conf.d/reload.conf From 4a11c5edeb37de4fd73dc2ff059e7a6112514583 Mon Sep 17 00:00:00 2001 From: Valentin David Date: Mon, 4 May 2026 10:25:19 +0200 Subject: [PATCH 097/242] core: Open netfilter socket only when needed On initrds where nfnetlink module is missing, trying to open a NETLINK_NETFILTER netlink socket takes a lot of time then fails. This makes boot noticibly slower. Even though probably no unit in an initrd need netfilter. So here we delay opening the socket until we know we need it. --- src/core/cgroup.c | 12 ++++++------ src/core/unit.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index ae5874cd99daa..acf2e8147f41b 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1293,18 +1293,18 @@ static void unit_modify_nft_set(Unit *u, bool add) { if (!crt || crt->cgroup_id == 0) return; - if (!u->manager->nfnl) { - r = sd_nfnl_socket_open(&u->manager->nfnl); - if (r < 0) - return; - } - CGroupContext *c = ASSERT_PTR(unit_get_cgroup_context(u)); FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets) { if (nft_set->source != NFT_SET_SOURCE_CGROUP) continue; + if (!u->manager->nfnl) { + r = sd_nfnl_socket_open(&u->manager->nfnl); + if (r < 0) + return (void) log_once_errno(LOG_WARNING, r, "Failed to open NETLINK_NETFILTER socket, ignoring: %m"); + } + uint64_t element = crt->cgroup_id; r = nft_set_element_modify_any(u->manager->nfnl, add, nft_set->nfproto, nft_set->table, nft_set->set, &element, sizeof(element)); diff --git a/src/core/unit.c b/src/core/unit.c index 0edb7e25aaa1d..8ed74b080d144 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -5393,16 +5393,16 @@ static void unit_modify_user_nft_set(Unit *u, bool add, NFTSetSource source, uin if (!c) return; - if (!u->manager->nfnl) { - r = sd_nfnl_socket_open(&u->manager->nfnl); - if (r < 0) - return; - } - FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets) { if (nft_set->source != source) continue; + if (!u->manager->nfnl) { + r = sd_nfnl_socket_open(&u->manager->nfnl); + if (r < 0) + return (void) log_once_errno(LOG_WARNING, r, "Failed to open NETLINK_NETFILTER socket, ignoring: %m"); + } + r = nft_set_element_modify_any(u->manager->nfnl, add, nft_set->nfproto, nft_set->table, nft_set->set, &element, sizeof(element)); if (r < 0) log_warning_errno(r, "Failed to %s NFT set entry: family %s, table %s, set %s, ID %u, ignoring: %m", From 4276d3721e4bc4d8d4966b95c106fab1517f188f Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 4 May 2026 12:58:33 +0100 Subject: [PATCH 098/242] semaphore: stop deleting all apt sources The image configuration was changed and the main sources are now in a drop-in apt sources files too, so deleting the whole drop-in directory breaks installing packages. Just delete the disabled ones and chrome. --- .semaphore/semaphore-runner.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.semaphore/semaphore-runner.sh b/.semaphore/semaphore-runner.sh index 171cac8e1c702..22dc9fc4ffd73 100755 --- a/.semaphore/semaphore-runner.sh +++ b/.semaphore/semaphore-runner.sh @@ -68,8 +68,8 @@ EOF for phase in "${PHASES[@]}"; do case "$phase" in SETUP) - # remove semaphore repos, some of them don't work and cause error messages - sudo rm -rf /etc/apt/sources.list.d/* + # remove chrome repo, we don't need it + sudo rm -rf /etc/apt/sources.list.d/google-chrome.sources # enable backports for latest LXC echo "deb http://archive.ubuntu.com/ubuntu $UBUNTU_RELEASE-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/backports.list From a1d0c58220896e483adbca7386b47f29d30dd09b Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 3 May 2026 22:16:15 +0100 Subject: [PATCH 099/242] test: make TEST-64 mdadm_lvm cleanup robust against reruns mdadm --zero-superblock only wipes the MD metadata on the underlying disks, not the LVM PV header that lives in the array data area. When the VM is restarted and the test re-creates the array with the same UUID, /dev/md127 exposes the old data including the LVM PV header, so udev's 69-lvm.rules auto-triggers lvm-activate-mdlvm_vg.service which races with the test's own pvcreate for exclusive access on /dev/md127. Wipe the LVM signature off the MD device (and the underlying disks as a belt-and-braces measure) to avoid the race on re-run, fixing failures when the VM is rebooted instead of shut down. Co-developed-by: Claude Opus 4.7 --- test/units/TEST-64-UDEV-STORAGE.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/units/TEST-64-UDEV-STORAGE.sh b/test/units/TEST-64-UDEV-STORAGE.sh index f7bc59ff44a1b..de2d7d267212f 100755 --- a/test/units/TEST-64-UDEV-STORAGE.sh +++ b/test/units/TEST-64-UDEV-STORAGE.sh @@ -1333,6 +1333,12 @@ testcase_mdadm_lvm() { helper_check_device_units # Cleanup lvm vgchange -an "$vgroup" + # Wipe the LVM signature off the MD device, otherwise the underlying disks + # still hold the PV header at the same offset. If the VM is restarted (e.g. + # the test gets re-run because of a reboot), mdadm --create with the same + # UUID would expose the same data and udev would auto-trigger + # lvm-activate-${vgroup}.service, racing with the test's pvcreate. + wipefs --all "$raid_dev" mdadm -v --stop "$raid_dev" # Clear superblocks to make the MD device will not be restarted even if the VM is restarted. @@ -1340,6 +1346,10 @@ testcase_mdadm_lvm() { udevadm settle --timeout=30 # shellcheck disable=SC2046 mdadm -v --zero-superblock --force $(readlink -f "${devices[@]}") + # Also wipe any leftover signatures from the underlying disks for the same + # reason as above. + # shellcheck disable=SC2046 + wipefs --all $(readlink -f "${devices[@]}") udevadm settle --timeout=30 # Check if all expected symlinks were removed after the cleanup From 74338c0bb000cd805a87ce355478c5a0eae113b9 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 4 May 2026 14:42:03 +0100 Subject: [PATCH 100/242] test: suppress PCR public key auto-loading in TEST-70-TPM2 dditest The dditest block calls systemd-repart with Encrypt=tpm2 but without --tpm2-public-key-pcrs=. Since systemd-stub drops /run/systemd/tpm2-pcr-public-key.pem when booting from a signed UKI systemd-repart auto-loads it and enrolls a signed PCR policy, and then systemd-cryptsetup tpm2-device=auto has no matching signature file, so unlock fails. --tpm2-public-key= is not enough as the default kicks in then. Follow-up for cd18656d47710c251a44a8f5f9d616151a909152 --- test/units/TEST-70-TPM2.cryptsetup.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/units/TEST-70-TPM2.cryptsetup.sh b/test/units/TEST-70-TPM2.cryptsetup.sh index 5a7f0facfcc0c..63f1373474450 100755 --- a/test/units/TEST-70-TPM2.cryptsetup.sh +++ b/test/units/TEST-70-TPM2.cryptsetup.sh @@ -223,7 +223,8 @@ Format=ext4 CopyFiles=/tmp/dditest:/ Encrypt=tpm2 EOF - PASSWORD=passphrase systemd-repart --tpm2-device-key=/tmp/srk.pub --definitions=/tmp/dditest --empty=create --size=80M /tmp/dditest.raw --tpm2-pcrs= + # Use --tpm2-public-key-pcrs= to suppress auto-loading of the system PCR public key + PASSWORD=passphrase systemd-repart --tpm2-device-key=/tmp/srk.pub --tpm2-public-key-pcrs= --definitions=/tmp/dditest --empty=create --size=80M /tmp/dditest.raw --tpm2-pcrs= DEVICE="$(systemd-dissect --attach /tmp/dditest.raw)" udevadm wait --settle --timeout=10 "$DEVICE"p1 systemd-cryptsetup attach dditest "$DEVICE"p1 - tpm2-device=auto,headless=yes From 50138e36ffc6d05d8c32a5b6ac1d4c5d32377450 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Mon, 4 May 2026 21:38:56 +0200 Subject: [PATCH 101/242] resolve: enforce the search domain limit earlier The search domain limit is already enforced by dns_search_domain_new(), but in this case it's way too late. Let's enforce it during the first loop to avoid unnecessary parsing. --- src/resolve/resolved-link-bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/resolve/resolved-link-bus.c b/src/resolve/resolved-link-bus.c index ed4485671c8ad..f30ed5d22bac8 100644 --- a/src/resolve/resolved-link-bus.c +++ b/src/resolve/resolved-link-bus.c @@ -321,7 +321,7 @@ int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_ if (r < 0) return r; - for (;;) { + for (unsigned n_names = 0;; n_names++) { _cleanup_free_ char *prefixed = NULL; const char *name; int route_only; @@ -339,6 +339,8 @@ int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid search domain %s", name); if (!route_only && dns_name_is_root(name)) return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Root domain is not suitable as search domain"); + if (n_names >= LINK_SEARCH_DOMAINS_MAX) + return sd_bus_error_set(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many search domains per link"); if (route_only) { prefixed = strjoin("~", name); From 17e6a3e2a88e822b730f298ebb9fdb526a04a2e2 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Mon, 4 May 2026 22:07:46 +0200 Subject: [PATCH 102/242] resolve: limit the number NTAs to something sensible --- src/resolve/resolved-link-bus.c | 3 +++ src/resolve/resolved-link.h | 1 + 2 files changed, 4 insertions(+) diff --git a/src/resolve/resolved-link-bus.c b/src/resolve/resolved-link-bus.c index f30ed5d22bac8..ba5b00c239afb 100644 --- a/src/resolve/resolved-link-bus.c +++ b/src/resolve/resolved-link-bus.c @@ -683,6 +683,9 @@ int bus_link_method_set_dnssec_negative_trust_anchors(sd_bus_message *message, v if (r < 0) return r; + if (strv_length(ntas) > LINK_NEGATIVE_TRUST_ANCHORS_MAX) + return sd_bus_error_set(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many negative trust anchors per link"); + STRV_FOREACH(i, ntas) { r = dns_name_is_valid(*i); if (r < 0) diff --git a/src/resolve/resolved-link.h b/src/resolve/resolved-link.h index 44a6b511c1b67..4c81bdbe66695 100644 --- a/src/resolve/resolved-link.h +++ b/src/resolve/resolved-link.h @@ -11,6 +11,7 @@ #define LINK_SEARCH_DOMAINS_MAX 1024 #define LINK_DNS_SERVERS_MAX 256 +#define LINK_NEGATIVE_TRUST_ANCHORS_MAX 2048 typedef struct LinkAddress { Link *link; From 29b00c956fdfcc3516717eb5d7f13be237bb4f1a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 5 May 2026 08:02:07 +0200 Subject: [PATCH 103/242] update TODO --- TODO.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/TODO.md b/TODO.md index b5777556e999d..588ff720d49ee 100644 --- a/TODO.md +++ b/TODO.md @@ -128,6 +128,10 @@ SPDX-License-Identifier: LGPL-2.1-or-later ## Features +- a tool that can prep credentials, put them in the ESP, for provisioning + systems for SBC. Should be doing what sysinstall does with the credentials, + and maybe even *be* sysinstall. + - StorageProvider interface + storagectl - hook-up in systemd-nspawn - hook-up in systemd-vmspawn From a551c1bd56ef2756eb0604c8f2a7d1a1b63c9c77 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 4 May 2026 21:06:02 +0100 Subject: [PATCH 104/242] test: skip TEST-07-PID1.DeferReactivation with sanitizers This test relies on tight timers, and is flaky under sanitizers as everything slows down a lot. Just skip it. --- test/units/TEST-07-PID1.DeferReactivation.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/units/TEST-07-PID1.DeferReactivation.sh b/test/units/TEST-07-PID1.DeferReactivation.sh index ff795ff002239..8d16ff114507f 100755 --- a/test/units/TEST-07-PID1.DeferReactivation.sh +++ b/test/units/TEST-07-PID1.DeferReactivation.sh @@ -4,6 +4,13 @@ set -eux set -o pipefail +if [[ -v ASAN_OPTIONS ]]; then + # Under sanitizers the service is slow enough that the calendar timer with 5s resolution ends up + # missing ticks, making the test flaky + echo "Sanitizers detected, skipping the test..." + exit 0 +fi + systemctl start defer-reactivation.timer timeout 20 bash -c 'until [[ -e /tmp/defer-reactivation.log ]]; do sleep .5; done' From ae973bb61767fce6b428f52b20cfcddf2c70c944 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 4 May 2026 23:41:10 +0100 Subject: [PATCH 105/242] test: avoid nspawn failure due to scope in use in TEST-06-SELINUX TEST-06-SELINUX occasionally fails because repeated nspawn invocations use the same machine name and scope: TEST-06-SELINUX.sh[598]: Failed to allocate scope: Unit H.scope was already loaded or has a fragment file. Use a different machine name/scope for each invocation in the test case to avoid hitting this issue --- test/units/TEST-06-SELINUX.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/units/TEST-06-SELINUX.sh b/test/units/TEST-06-SELINUX.sh index 5cf6e80815b12..76f5c1652bf07 100755 --- a/test/units/TEST-06-SELINUX.sh +++ b/test/units/TEST-06-SELINUX.sh @@ -39,12 +39,13 @@ CONTEXT="$(stat -c %C /proc/sys/kernel/core_pattern)" (! systemd-run --wait --pipe -p ConditionSecurity='selinux' false) systemd-run --wait --pipe -p ConditionSecurity='!selinux' false +# Pass a unique --machine= name on each invocation to avoid "already loaded" flakiness NSPAWN_ARGS=(systemd-nspawn -q --volatile=yes --directory=/ --bind-ro=/etc --inaccessible=/etc/machine-id) -[[ "$("${NSPAWN_ARGS[@]}" cat /proc/self/attr/current | tr -d '\0')" != "$CONTEXT" ]] -[[ "$("${NSPAWN_ARGS[@]}" --selinux-context="$CONTEXT" cat /proc/self/attr/current | tr -d '\0')" == "$CONTEXT" ]] -[[ "$("${NSPAWN_ARGS[@]}" stat --printf %C /run)" != "$CONTEXT" ]] -[[ "$("${NSPAWN_ARGS[@]}" --selinux-apifs-context="$CONTEXT" stat --printf %C /run)" == "$CONTEXT" ]] -[[ "$("${NSPAWN_ARGS[@]}" --selinux-apifs-context="$CONTEXT" --tmpfs=/tmp stat --printf %C /tmp)" == "$CONTEXT" ]] +[[ "$("${NSPAWN_ARGS[@]}" --machine="nspawn-test-0" cat /proc/self/attr/current | tr -d '\0')" != "$CONTEXT" ]] +[[ "$("${NSPAWN_ARGS[@]}" --machine="nspawn-test-1" --selinux-context="$CONTEXT" cat /proc/self/attr/current | tr -d '\0')" == "$CONTEXT" ]] +[[ "$("${NSPAWN_ARGS[@]}" --machine="nspawn-test-2" stat --printf %C /run)" != "$CONTEXT" ]] +[[ "$("${NSPAWN_ARGS[@]}" --machine="nspawn-test-3" --selinux-apifs-context="$CONTEXT" stat --printf %C /run)" == "$CONTEXT" ]] +[[ "$("${NSPAWN_ARGS[@]}" --machine="nspawn-test-4" --selinux-apifs-context="$CONTEXT" --tmpfs=/tmp stat --printf %C /tmp)" == "$CONTEXT" ]] if [[ -n "${TEST_SELINUX_CHECK_AVCS:-}" ]] && ((TEST_SELINUX_CHECK_AVCS)); then (! journalctl -t audit -g AVC -o cat) From f7434671bda922781e8bc3ddaf108a7e2127bfc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 10:40:31 +0200 Subject: [PATCH 106/242] shared/help-util: automatically append ":" in sections --- src/ac-power/ac-power.c | 2 +- src/notify/notify.c | 2 +- src/run/run.c | 8 ++------ src/shared/help-util.c | 2 +- src/storage/storage-block.c | 2 +- src/storage/storage-fs.c | 2 +- src/storage/storagectl.c | 4 ++-- src/systemctl/systemctl.c | 16 ++++++++-------- src/udev/ata_id/ata_id.c | 2 +- src/udev/cdrom_id/cdrom_id.c | 2 +- src/udev/dmi_memory_id/dmi_memory_id.c | 2 +- src/udev/fido_id/fido_id.c | 2 +- src/udev/iocost/iocost.c | 4 ++-- src/udev/mtd_probe/mtd_probe.c | 2 +- src/udev/scsi_id/scsi_id.c | 2 +- src/udev/udev-config.c | 2 +- src/udev/udevadm-cat.c | 2 +- src/udev/udevadm-control.c | 2 +- src/udev/udevadm-hwdb.c | 2 +- src/udev/udevadm-info.c | 2 +- src/udev/udevadm-lock.c | 2 +- src/udev/udevadm-monitor.c | 2 +- src/udev/udevadm-settle.c | 2 +- src/udev/udevadm-test-builtin.c | 4 ++-- src/udev/udevadm-test.c | 2 +- src/udev/udevadm-trigger.c | 2 +- src/udev/udevadm-verify.c | 2 +- src/udev/udevadm-wait.c | 2 +- src/udev/udevadm.c | 4 ++-- src/udev/v4l_id/v4l_id.c | 2 +- 30 files changed, 42 insertions(+), 46 deletions(-) diff --git a/src/ac-power/ac-power.c b/src/ac-power/ac-power.c index 2a9c517329321..87242a3b08c7f 100644 --- a/src/ac-power/ac-power.c +++ b/src/ac-power/ac-power.c @@ -27,7 +27,7 @@ static int help(void) { help_cmdline("[OPTIONS...]"); help_abstract("Report whether we are connected to an external power source."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/notify/notify.c b/src/notify/notify.c index 6c50e4c57c394..0058eb9d3b961 100644 --- a/src/notify/notify.c +++ b/src/notify/notify.c @@ -69,7 +69,7 @@ static int help(void) { help_cmdline("[OPTIONS...] --fork -- CMDLINE..."); help_abstract("Notify the service manager about service status updates."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/run/run.c b/src/run/run.c index 46b8014e580c5..afae5b2d94af4 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -166,11 +166,7 @@ static int help(void) { help_abstract("Run the specified command in a transient scope or service."); for (size_t i = 0; i < ELEMENTSOF(groups); i++) { - _cleanup_free_ char *title = strjoin(groups[i] ?: "Options", ":"); - if (!title) - return log_oom(); - - help_section(title); + help_section(groups[i] ?: "Options"); r = table_print_or_warn(tables[i]); if (r < 0) @@ -196,7 +192,7 @@ static int help_sudo_mode(void) { help_cmdline("[OPTIONS...] COMMAND [ARGUMENTS...]"); help_abstract("Elevate privileges interactively."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(opts_table); if (r < 0) diff --git a/src/shared/help-util.c b/src/shared/help-util.c index 7e9d3e70be0c6..7b67bb58f1a21 100644 --- a/src/shared/help-util.c +++ b/src/shared/help-util.c @@ -47,7 +47,7 @@ void help_abstract(const char *text) { void help_section(const char *title) { assert(title); - printf("\n%s%s%s\n", + printf("\n%s%s:%s\n", ansi_underline(), title, ansi_normal()); diff --git a/src/storage/storage-block.c b/src/storage/storage-block.c index e5454a29c28a0..b33bf9ce40bd3 100644 --- a/src/storage/storage-block.c +++ b/src/storage/storage-block.c @@ -393,7 +393,7 @@ static int help(void) { if (r < 0) return r; - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) diff --git a/src/storage/storage-fs.c b/src/storage/storage-fs.c index c01e91a4cefe6..167b10dd83542 100644 --- a/src/storage/storage-fs.c +++ b/src/storage/storage-fs.c @@ -753,7 +753,7 @@ static int help(void) { if (r < 0) return r; - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) diff --git a/src/storage/storagectl.c b/src/storage/storagectl.c index 2bc7b7c2a3e40..f88dff29bc861 100644 --- a/src/storage/storagectl.c +++ b/src/storage/storagectl.c @@ -65,13 +65,13 @@ static int help(void) { (void) table_sync_column_widths(0, verbs, options); - help_section("Commands:"); + help_section("Commands"); r = table_print_or_warn(verbs); if (r < 0) return r; - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 4f76c5150021f..775188b5191cb 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -113,7 +113,7 @@ static int systemctl_help(void) { help_cmdline("[OPTIONS...] COMMAND ..."); help_abstract("Query or send control commands to the system manager."); - help_section("Unit Commands:"); + help_section("Unit Commands"); printf(" list-units [PATTERN...] List units currently in memory\n" " list-automounts [PATTERN...] List automount units currently in memory,\n" " ordered by path\n" @@ -162,7 +162,7 @@ static int systemctl_help(void) { " whoami [PID...] Return unit caller or specified PIDs are\n" " part of\n"); - help_section("Unit File Commands:"); + help_section("Unit File Commands"); printf(" list-unit-files [PATTERN...] List installed unit files\n" " enable [UNIT...|PATH...] Enable one or more unit files\n" " disable UNIT... Disable one or more unit files\n" @@ -186,27 +186,27 @@ static int systemctl_help(void) { " get-default Get the name of the default target\n" " set-default TARGET Set the default target\n"); - help_section("Machine Commands:"); + help_section("Machine Commands"); printf(" list-machines [PATTERN...] List local containers and host\n"); - help_section("Job Commands:"); + help_section("Job Commands"); printf(" list-jobs [PATTERN...] List jobs\n" " cancel [JOB...] Cancel all, one, or more jobs\n"); - help_section("Environment Commands:"); + help_section("Environment Commands"); printf(" show-environment Dump environment\n" " set-environment VARIABLE=VALUE... Set one or more environment variables\n" " unset-environment VARIABLE... Unset one or more environment variables\n" " import-environment VARIABLE... Import all or some environment variables\n"); - help_section("Manager State Commands:"); + help_section("Manager State Commands"); printf(" daemon-reload Reload systemd manager configuration\n" " daemon-reexec Reexecute systemd manager\n" " log-level [LEVEL] Get/set logging threshold for manager\n" " log-target [TARGET] Get/set logging target for manager\n" " service-watchdogs [BOOL] Get/set service watchdog state\n"); - help_section("System Commands:"); + help_section("System Commands"); printf(" is-system-running Check whether system is fully running\n" " default Enter system default mode\n" " rescue Enter system rescue mode\n" @@ -226,7 +226,7 @@ static int systemctl_help(void) { " suspend-then-hibernate Suspend the system, wake after a period of\n" " time, and hibernate\n"); - help_section("Options:"); + help_section("Options"); printf(" -h --help Show this help\n" " --version Show package version\n" " --system Connect to system manager\n" diff --git a/src/udev/ata_id/ata_id.c b/src/udev/ata_id/ata_id.c index c2fabdcdb844b..1cde89ad6f602 100644 --- a/src/udev/ata_id/ata_id.c +++ b/src/udev/ata_id/ata_id.c @@ -368,7 +368,7 @@ static int help(void) { return r; help_cmdline("[OPTIONS...] DEVICE"); - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } diff --git a/src/udev/cdrom_id/cdrom_id.c b/src/udev/cdrom_id/cdrom_id.c index 27423e985155e..40fd7a1e77d8c 100644 --- a/src/udev/cdrom_id/cdrom_id.c +++ b/src/udev/cdrom_id/cdrom_id.c @@ -909,7 +909,7 @@ static int help(void) { return r; help_cmdline("[OPTIONS...] DEVICE"); - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } diff --git a/src/udev/dmi_memory_id/dmi_memory_id.c b/src/udev/dmi_memory_id/dmi_memory_id.c index a1708c128c928..64af7b8028770 100644 --- a/src/udev/dmi_memory_id/dmi_memory_id.c +++ b/src/udev/dmi_memory_id/dmi_memory_id.c @@ -653,7 +653,7 @@ static int help(void) { return r; help_cmdline("[OPTIONS...]"); - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } diff --git a/src/udev/fido_id/fido_id.c b/src/udev/fido_id/fido_id.c index a19c7eebec6e7..30bc96c526bdf 100644 --- a/src/udev/fido_id/fido_id.c +++ b/src/udev/fido_id/fido_id.c @@ -38,7 +38,7 @@ static int help(void) { help_cmdline("[OPTIONS...] SYSFS_PATH"); help_abstract("Identify FIDO security tokens."); - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } diff --git a/src/udev/iocost/iocost.c b/src/udev/iocost/iocost.c index eadab1cb8a091..1efd4a5365e1b 100644 --- a/src/udev/iocost/iocost.c +++ b/src/udev/iocost/iocost.c @@ -68,12 +68,12 @@ static int help(void) { help_cmdline("[OPTIONS...] COMMAND"); help_abstract("Set up iocost model and qos solutions for block devices."); - help_section("Commands:"); + help_section("Commands"); r = table_print_or_warn(verbs); if (r < 0) return r; - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } diff --git a/src/udev/mtd_probe/mtd_probe.c b/src/udev/mtd_probe/mtd_probe.c index fe9924f1b6e28..7280573646d66 100644 --- a/src/udev/mtd_probe/mtd_probe.c +++ b/src/udev/mtd_probe/mtd_probe.c @@ -45,7 +45,7 @@ static int help(void) { help_cmdline("[OPTIONS...] /dev/mtd[n]"); help_abstract("Probe MTD devices."); - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } diff --git a/src/udev/scsi_id/scsi_id.c b/src/udev/scsi_id/scsi_id.c index d7970722848c8..295819351d19b 100644 --- a/src/udev/scsi_id/scsi_id.c +++ b/src/udev/scsi_id/scsi_id.c @@ -216,7 +216,7 @@ static int help(void) { help_cmdline("[OPTION...] DEVICE"); help_abstract("SCSI device identification."); - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } diff --git a/src/udev/udev-config.c b/src/udev/udev-config.c index 541ba16dd906b..27a72f2ac8dcd 100644 --- a/src/udev/udev-config.c +++ b/src/udev/udev-config.c @@ -160,7 +160,7 @@ static int help(void) { help_cmdline("[OPTIONS...]"); help_abstract("Rule-based manager for device events and files."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) diff --git a/src/udev/udevadm-cat.c b/src/udev/udevadm-cat.c index 62d30d0234d24..fcce76663e1a5 100644 --- a/src/udev/udevadm-cat.c +++ b/src/udev/udevadm-cat.c @@ -28,7 +28,7 @@ static int help(void) { help_cmdline("cat [OPTIONS...] [FILE...]"); help_abstract("Show udev rules files."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-control.c b/src/udev/udevadm-control.c index a6ffe83cecaf6..0a0bb35fb5dd5 100644 --- a/src/udev/udevadm-control.c +++ b/src/udev/udevadm-control.c @@ -57,7 +57,7 @@ static int help(void) { help_cmdline("control OPTION"); help_abstract("Control the udev daemon."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-hwdb.c b/src/udev/udevadm-hwdb.c index b029db2262a04..bb6f03d540890 100644 --- a/src/udev/udevadm-hwdb.c +++ b/src/udev/udevadm-hwdb.c @@ -25,7 +25,7 @@ static int help(void) { help_cmdline("hwdb [OPTIONS]"); help_abstract("Update or query the hardware database."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-info.c b/src/udev/udevadm-info.c index a5cbedc8deeda..73ed70f4bffb5 100644 --- a/src/udev/udevadm-info.c +++ b/src/udev/udevadm-info.c @@ -811,7 +811,7 @@ static int help(void) { help_cmdline("info [OPTIONS] [DEVPATH|FILE]"); help_abstract("Query sysfs or the udev database."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-lock.c b/src/udev/udevadm-lock.c index c1c3211d34992..fd4b6a9059a4b 100644 --- a/src/udev/udevadm-lock.c +++ b/src/udev/udevadm-lock.c @@ -45,7 +45,7 @@ static int help(void) { help_cmdline("lock [OPTIONS...] COMMAND"); help_cmdline("lock [OPTIONS...] --print"); help_abstract("Lock a block device and run a command."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-monitor.c b/src/udev/udevadm-monitor.c index c7d1f40fc49b6..76c9d16d1f963 100644 --- a/src/udev/udevadm-monitor.c +++ b/src/udev/udevadm-monitor.c @@ -109,7 +109,7 @@ static int help(void) { help_cmdline("monitor [OPTIONS]"); help_abstract("Listen to kernel and udev events."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-settle.c b/src/udev/udevadm-settle.c index 211a8ff1fbf8c..77882cc074a6c 100644 --- a/src/udev/udevadm-settle.c +++ b/src/udev/udevadm-settle.c @@ -39,7 +39,7 @@ static int help(void) { help_cmdline("settle [OPTIONS]"); help_abstract("Wait for pending udev events."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-test-builtin.c b/src/udev/udevadm-test-builtin.c index 9c0082800f37a..3de1366b59f20 100644 --- a/src/udev/udevadm-test-builtin.c +++ b/src/udev/udevadm-test-builtin.c @@ -24,12 +24,12 @@ static int help(void) { help_cmdline("test-builtin [OPTIONS] COMMAND DEVPATH"); help_abstract("Test a built-in command."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; - help_section("Commands:"); + help_section("Commands"); udev_builtin_list(); return 0; } diff --git a/src/udev/udevadm-test.c b/src/udev/udevadm-test.c index ac368e0f00eec..ba8217c8d36b3 100644 --- a/src/udev/udevadm-test.c +++ b/src/udev/udevadm-test.c @@ -44,7 +44,7 @@ static int help(void) { help_cmdline("test [OPTIONS] DEVPATH"); help_abstract("Test an event run."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-trigger.c b/src/udev/udevadm-trigger.c index 583d85be0b8d8..e1fdf323cbc20 100644 --- a/src/udev/udevadm-trigger.c +++ b/src/udev/udevadm-trigger.c @@ -331,7 +331,7 @@ static int help(void) { help_cmdline("trigger [OPTIONS] DEVPATH"); help_abstract("Request events from the kernel."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-verify.c b/src/udev/udevadm-verify.c index f4388f843adc6..e7f803dfb1eac 100644 --- a/src/udev/udevadm-verify.c +++ b/src/udev/udevadm-verify.c @@ -33,7 +33,7 @@ static int help(void) { help_cmdline("verify [OPTIONS] [FILE...]"); help_abstract("Verify udev rules files."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm-wait.c b/src/udev/udevadm-wait.c index 6017401440689..fa12e6c98c17c 100644 --- a/src/udev/udevadm-wait.c +++ b/src/udev/udevadm-wait.c @@ -307,7 +307,7 @@ static int help(void) { help_cmdline("wait [OPTIONS] DEVICE [DEVICE…]"); help_abstract("Wait for devices or device symlinks being created."); - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/udevadm.c b/src/udev/udevadm.c index 47d4335baec7f..cdc10802749ea 100644 --- a/src/udev/udevadm.c +++ b/src/udev/udevadm.c @@ -30,12 +30,12 @@ static int help(void) { help_cmdline("[OPTIONS…] COMMAND [COMMAND OPTIONS…]"); help_abstract("Send control commands or test the device manager."); - help_section("Commands:"); + help_section("Commands"); r = table_print_or_warn(verbs); if (r < 0) return r; - help_section("Options:"); + help_section("Options"); r = table_print_or_warn(options); if (r < 0) return r; diff --git a/src/udev/v4l_id/v4l_id.c b/src/udev/v4l_id/v4l_id.c index 1a53e1092fb7a..f1f40324de7f7 100644 --- a/src/udev/v4l_id/v4l_id.c +++ b/src/udev/v4l_id/v4l_id.c @@ -32,7 +32,7 @@ static int help(void) { help_cmdline("[OPTIONS...] DEVICE"); help_abstract("Video4Linux device identification."); - help_section("Options:"); + help_section("Options"); return table_print_or_warn(options); } From f94da4b4c564f8cff4b5b739456c985e036a4201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 10:59:46 +0200 Subject: [PATCH 107/242] shared/verbs: display default verb as "[verb]" --- src/shared/verbs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shared/verbs.c b/src/shared/verbs.c index dfecf048612b7..276c6fd5be916 100644 --- a/src/shared/verbs.c +++ b/src/shared/verbs.c @@ -192,11 +192,15 @@ int _verbs_get_help_table( /* No help string — we do not show the verb */ continue; + bool is_default = FLAGS_SET(verb->flags, VERB_DEFAULT); + /* We indent the option string by two spaces. We could set the minimum cell width and * right-align for a similar result, but that'd be more work. This is only used for * display. */ - r = table_add_cell_stringf(table, NULL, " %s%s%s", + r = table_add_cell_stringf(table, NULL, " %s%s%s%s%s", + is_default ? "[" : "", verb->verb, + is_default ? "]" : "", verb->argspec ? " " : "", strempty(verb->argspec)); if (r < 0) From f8c0aaccef5388c6454f0a0b0a34a826bc882c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 09:59:16 +0200 Subject: [PATCH 108/242] test-options: add a check for custom logic in systemd-analyze --- src/test/test-options.c | 71 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/test/test-options.c b/src/test/test-options.c index d00262fa34bb2..21fb7a2d028c1 100644 --- a/src/test/test-options.c +++ b/src/test/test-options.c @@ -818,6 +818,77 @@ TEST(option_optional_arg) { NULL); } +/* Check that we correctly implement the behaviour of + * systemd-analyze -a -b unit-shell -c -d name -e -f + * systemd-analyze -a -b other-verb -c -d name -e -f + * systemd-analyze -a -b -c -d -e -f + * where '-a', '-b', '-c', '-d' are "our" options, but '-e -f' is part of the commandline + * for unit-shell, but not in the other cases. */ +static void test_option_parsing_stops_at_second_nonoption_one( + char **cmdline, + unsigned options_to_see, + unsigned verbs_to_see, + char **args_to_see) { + + static const Option options[] = { + { 1, .short_code = 'a' }, + { 2, .short_code = 'b' }, + { 3, .short_code = 'c' }, + { 4, .short_code = 'd' }, + { 5, .short_code = 'e' }, + { 6, .short_code = 'f' }, + { 7, .long_code = "(positional)", .flags = OPTION_POSITIONAL_ENTRY }, + {}, + }; + + OptionParser opts = { strv_length(cmdline), cmdline, + .mode = OPTION_PARSER_RETURN_POSITIONAL_ARGS }; + unsigned options_seen = 0; + unsigned verbs_seen = 0; + for (int c; (c = option_parse(options, options + ELEMENTSOF(options) - 1, &opts)) != 0; ) { + ASSERT_OK(c); + ASSERT_NOT_NULL(opts.opt); + + switch (opts.opt->id) { + case 1 ... 6: + options_seen++; + break; + case 7: + verbs_seen++; + ASSERT_EQ(opts.mode, (OptionParserMode) OPTION_PARSER_RETURN_POSITIONAL_ARGS); + + if (streq(opts.arg, "unit-shell")) + opts.mode = OPTION_PARSER_STOP_AT_FIRST_NONOPTION; + else if (streq(opts.arg, "other-verb")) + opts.mode = OPTION_PARSER_NORMAL; + else + assert_not_reached(); + break; + default: + assert_not_reached(); + } + } + + ASSERT_EQ(options_seen, options_to_see); + ASSERT_EQ(verbs_seen, verbs_to_see); + ASSERT_TRUE(strv_equal(option_parser_get_args(&opts), args_to_see)); +} + +TEST(option_parsing_stops_at_second_nonoption) { + test_option_parsing_stops_at_second_nonoption_one( + STRV_MAKE("systemd-analyze", "-a", "-b", "unit-shell", "-c", "-d", "name", "-e", "-f"), + 4, 1, + STRV_MAKE("name", "-e", "-f")); + test_option_parsing_stops_at_second_nonoption_one( + STRV_MAKE("systemd-analyze", "-a", "-b", "other-verb", "-c", "-d", "name", "-e", "-f"), + 6, 1, + STRV_MAKE("name")); + test_option_parsing_stops_at_second_nonoption_one( + STRV_MAKE("systemd-analyze", "-a", "-b", "-c", "-d", "-e"), + 5, 0, + STRV_EMPTY); +} + /* Test the OPTION, OPTION_LONG, OPTION_SHORT, OPTION_FULL, OPTION_GROUP macros * by using them in a FOREACH_OPTION switch, as they would be used in real code. */ From 10b97bbb98b7e3eb472d683d10f84b468bf6c624 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 4 May 2026 08:00:32 +0200 Subject: [PATCH 109/242] analyze: convert to OPTION and VERB macros The logic that was tested in the previous commit is used to implement the behaviour for unit-shell and other verbs without changes. The compare-versions synopsis is shortened to "V1 [OP] V2" to make the verb synopsis fit. Unusual capitalizaition of "Command" is changed to "COMMAND" (it's a replace arg, not a fixed string), and some help strings are adjusted. The order of options in --help is based on the existing order in parse_argv(). The old order in --help was mostly random. I think it might be good to figure out something more rational here, but I'm leaving that as a separate step. The urlification of dot(1) in the --help string is lost. It's hard to do this with the help string being stored in a read-only section. I think this is not worth the trouble to reimplement in the current scheme. --- man/systemd-analyze.xml | 2 +- src/analyze/analyze.c | 695 ++++++++++++++++------------------------ 2 files changed, 276 insertions(+), 421 deletions(-) diff --git a/man/systemd-analyze.xml b/man/systemd-analyze.xml index f3dfd7479f87f..4f3057f725d24 100644 --- a/man/systemd-analyze.xml +++ b/man/systemd-analyze.xml @@ -81,7 +81,7 @@ OPTIONS unit-shell SERVICE - Command + COMMAND systemd-analyze diff --git a/src/analyze/analyze.c b/src/analyze/analyze.c index e23b0038a9944..0f848264c73f0 100644 --- a/src/analyze/analyze.c +++ b/src/analyze/analyze.c @@ -3,7 +3,6 @@ Copyright © 2013 Simon Peeters ***/ -#include #include #include #include @@ -57,11 +56,14 @@ #include "calendarspec.h" #include "dissect-image.h" #include "extract-word.h" +#include "format-table.h" +#include "help-util.h" #include "image-policy.h" #include "log.h" #include "loop-util.h" #include "main-func.h" #include "mount-util.h" +#include "options.h" #include "pager.h" #include "parse-argument.h" #include "parse-util.h" @@ -196,553 +198,455 @@ static int verb_transient_settings(int argc, char *argv[], uintptr_t _data, void } static int help(void) { - _cleanup_free_ char *link = NULL, *dot_link = NULL; + static const char *const vgroups[] = { + "Boot Analysis", + "Dependency Analysis", + "Configuration Files and Search Paths", + "Enumerate OS Concepts", + "Expression Evaluation", + "Clock & Time", + "Unit & Service Analysis", + "Executable Analysis", + "TPM Operations", + }; + + Table *vtables[ELEMENTSOF(vgroups)] = {}; + CLEANUP_ELEMENTS(vtables, table_unref_array_clear); + _cleanup_(table_unrefp) Table *options = NULL; int r; pager_open(arg_pager_flags); - r = terminal_urlify_man("systemd-analyze", "1", &link); - if (r < 0) - return log_oom(); + for (size_t i = 0; i < ELEMENTSOF(vgroups); i++) { + r = verbs_get_help_table_group(vgroups[i], &vtables[i]); + if (r < 0) + return r; + } - /* Not using terminal_urlify_man() for this, since we don't want the "man page" text suffix in this case. */ - r = terminal_urlify("man:dot(1)", "dot(1)", &dot_link); + r = option_parser_get_help_table(&options); if (r < 0) - return log_oom(); + return r; - printf("%1$s [OPTIONS...] COMMAND ...\n\n" - "%5$sProfile systemd, show unit dependencies, check unit files.%6$s\n" - "\n%3$sBoot Analysis:%4$s\n" - " [time] Print time required to boot the machine\n" - " blame Print list of running units ordered by\n" - " time to init\n" - " critical-chain [UNIT...] Print a tree of the time critical chain\n" - " of units\n" - "\n%3$sDependency Analysis:%4$s\n" - " plot Output SVG graphic showing service\n" - " initialization\n" - " dot [UNIT...] Output dependency graph in %7$s format\n" - " dump [PATTERN...] Output state serialization of service\n" - " manager\n" - "\n%3$sConfiguration Files and Search Paths:%4$s\n" - " cat-config NAME|PATH... Show configuration file and drop-ins\n" - " unit-files List files and symlinks for units\n" - " unit-paths List load directories for units\n" - "\n%3$sEnumerate OS Concepts:%4$s\n" - " exit-status [STATUS...] List exit status definitions\n" - " capability [CAP...] List capability definitions\n" - " syscall-filter [NAME...] List syscalls in seccomp filters\n" - " filesystems [NAME...] List known filesystems\n" - " architectures [NAME...] List known architectures\n" - " smbios11 List strings passed via SMBIOS Type #11\n" - " chid List local CHIDs\n" - " transient-settings TYPE... List transient settings for unit TYPE\n" - "\n%3$sExpression Evaluation:%4$s\n" - " condition CONDITION... Evaluate conditions and asserts\n" - " compare-versions VERSION1 [OP] VERSION2\n" - " Compare two version strings\n" - " image-policy POLICY... Analyze image policy string\n" - "\n%3$sClock & Time:%4$s\n" - " calendar SPEC... Validate repetitive calendar time\n" - " events\n" - " timestamp TIMESTAMP... Validate a timestamp\n" - " timespan SPAN... Validate a time span\n" - "\n%3$sUnit & Service Analysis:%4$s\n" - " verify FILE... Check unit files for correctness\n" - " security [UNIT...] Analyze security of unit\n" - " fdstore SERVICE... Show file descriptor store contents of service\n" - " malloc [D-BUS SERVICE...] Dump malloc stats of a D-Bus service\n" - " unit-gdb SERVICE Attach a debugger to the given running service\n" - " unit-shell SERVICE [Command]\n" - " Run command on the namespace of the service\n" - "\n%3$sExecutable Analysis:%4$s\n" - " inspect-elf FILE... Parse and print ELF package metadata\n" - " dlopen-metadata FILE Parse and print ELF dlopen metadata\n" - "\n%3$sTPM Operations:%4$s\n" - " has-tpm2 Report whether TPM2 support is available\n" - " identify-tpm2 Show TPM2 vendor information\n" - " pcrs [PCR...] Show TPM2 PCRs and their names\n" - " nvpcrs [NVPCR...] Show additional TPM2 PCRs stored in NV indexes\n" - " srk [>FILE] Write TPM2 SRK (to FILE)\n" - "\n%3$sOptions:%4$s\n" - " --recursive-errors=MODE Control which units are verified\n" - " --offline=BOOL Perform a security review on unit file(s)\n" - " --threshold=N Exit with a non-zero status when overall\n" - " exposure level is over threshold value\n" - " --security-policy=PATH Use custom JSON security policy instead\n" - " of built-in one\n" - " --json=pretty|short|off Generate JSON output of the security\n" - " analysis table, or plot's raw time data\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Disable column headers and hints in plot\n" - " with either --table or --json=\n" - " --system Operate on system systemd instance\n" - " --user Operate on user systemd instance\n" - " --global Operate on global user configuration\n" - " -H --host=[USER@]HOST Operate on remote host\n" - " -M --machine=CONTAINER Operate on local container\n" - " --order Show only order in the graph\n" - " --require Show only requirement in the graph\n" - " --from-pattern=GLOB Show only origins in the graph\n" - " --to-pattern=GLOB Show only destinations in the graph\n" - " --fuzz=SECONDS Also print services which finished SECONDS\n" - " earlier than the latest in the branch\n" - " --man[=BOOL] Do [not] check for existence of man pages\n" - " --generators[=BOOL] Do [not] run unit generators\n" - " (requires privileges)\n" - " --instance=NAME Specify fallback instance name for template units\n" - " --iterations=N Show the specified number of iterations\n" - " --base-time=TIMESTAMP Calculate calendar times relative to\n" - " specified time\n" - " --profile=name|PATH Include the specified profile in the\n" - " security review of the unit(s)\n" - " --unit=UNIT Evaluate conditions and asserts of unit\n" - " --table Output plot's raw time data as a table\n" - " --scale-svg=FACTOR Stretch x-axis of plot by FACTOR (default: 1.0)\n" - " --detailed Add more details to SVG plot,\n" - " e.g. show activation timestamps\n" - " -h --help Show this help\n" - " --version Show package version\n" - " -q --quiet Do not emit hints\n" - " --tldr Skip comments and empty lines\n" - " --root=PATH Operate on an alternate filesystem root\n" - " --image=PATH Operate on disk image as filesystem root\n" - " --image-policy=POLICY Specify disk image dissection policy\n" - " -m --mask Parse parameter as numeric capability mask\n" - " --drm-device=PATH Use this DRM device sysfs path to get EDID\n" - " --debugger=DEBUGGER Use the given debugger\n" - " -A --debugger-arguments=ARGS\n" - " Pass the given arguments to the debugger\n" - "\nSee the %2$s for details.\n", - program_invocation_short_name, - link, - ansi_underline(), - ansi_normal(), - ansi_highlight(), - ansi_normal(), - dot_link); - - /* When updating this list, including descriptions, apply changes to - * shell-completion/bash/systemd-analyze and shell-completion/zsh/_systemd-analyze too. */ + assert_se(ELEMENTSOF(vtables) == 9); + (void) table_sync_column_widths(0, options, vtables[0], vtables[1], vtables[2], + vtables[3], vtables[4], vtables[5], vtables[6], + vtables[7], vtables[8]); - return 0; -} + help_cmdline("[OPTIONS...] COMMAND ..."); + help_abstract("Profile systemd, show unit dependencies, check unit files."); -static int verb_help(int argc, char *argv[], uintptr_t _data, void *userdata) { - return help(); -} + for (size_t i = 0; i < ELEMENTSOF(vgroups); i++) { + help_section(vgroups[i]); + r = table_print_or_warn(vtables[i]); + if (r < 0) + return r; + } -static int parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_ORDER, - ARG_REQUIRE, - ARG_ROOT, - ARG_IMAGE, - ARG_IMAGE_POLICY, - ARG_SYSTEM, - ARG_USER, - ARG_GLOBAL, - ARG_DOT_FROM_PATTERN, - ARG_DOT_TO_PATTERN, - ARG_FUZZ, - ARG_NO_PAGER, - ARG_MAN, - ARG_GENERATORS, - ARG_INSTANCE, - ARG_ITERATIONS, - ARG_BASE_TIME, - ARG_RECURSIVE_ERRORS, - ARG_OFFLINE, - ARG_THRESHOLD, - ARG_SECURITY_POLICY, - ARG_JSON, - ARG_PROFILE, - ARG_TABLE, - ARG_NO_LEGEND, - ARG_TLDR, - ARG_SCALE_FACTOR_SVG, - ARG_DETAILED_SVG, - ARG_DRM_DEVICE_PATH, - ARG_DEBUGGER, - }; + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "quiet", no_argument, NULL, 'q' }, - { "order", no_argument, NULL, ARG_ORDER }, - { "require", no_argument, NULL, ARG_REQUIRE }, - { "root", required_argument, NULL, ARG_ROOT }, - { "image", required_argument, NULL, ARG_IMAGE }, - { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY }, - { "recursive-errors" , required_argument, NULL, ARG_RECURSIVE_ERRORS }, - { "offline", required_argument, NULL, ARG_OFFLINE }, - { "threshold", required_argument, NULL, ARG_THRESHOLD }, - { "security-policy", required_argument, NULL, ARG_SECURITY_POLICY }, - { "system", no_argument, NULL, ARG_SYSTEM }, - { "user", no_argument, NULL, ARG_USER }, - { "global", no_argument, NULL, ARG_GLOBAL }, - { "from-pattern", required_argument, NULL, ARG_DOT_FROM_PATTERN }, - { "to-pattern", required_argument, NULL, ARG_DOT_TO_PATTERN }, - { "fuzz", required_argument, NULL, ARG_FUZZ }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "man", optional_argument, NULL, ARG_MAN }, - { "generators", optional_argument, NULL, ARG_GENERATORS }, - { "instance", required_argument, NULL, ARG_INSTANCE }, - { "host", required_argument, NULL, 'H' }, - { "machine", required_argument, NULL, 'M' }, - { "iterations", required_argument, NULL, ARG_ITERATIONS }, - { "base-time", required_argument, NULL, ARG_BASE_TIME }, - { "unit", required_argument, NULL, 'U' }, - { "json", required_argument, NULL, ARG_JSON }, - { "profile", required_argument, NULL, ARG_PROFILE }, - { "table", optional_argument, NULL, ARG_TABLE }, - { "no-legend", optional_argument, NULL, ARG_NO_LEGEND }, - { "tldr", no_argument, NULL, ARG_TLDR }, - { "mask", no_argument, NULL, 'm' }, - { "scale-svg", required_argument, NULL, ARG_SCALE_FACTOR_SVG }, - { "detailed", no_argument, NULL, ARG_DETAILED_SVG }, - { "drm-device", required_argument, NULL, ARG_DRM_DEVICE_PATH }, - { "debugger", required_argument, NULL, ARG_DEBUGGER }, - { "debugger-arguments", required_argument, NULL, 'A' }, - {} - }; + help_man_page_reference("systemd-analyze", "1"); - bool reorder = false; - int r, c, unit_shell = -1; + return 0; +} + +VERB_COMMON_HELP_HIDDEN(help); + +/* When updating this list, including descriptions, apply changes to + * shell-completion/bash/systemd-analyze and shell-completion/zsh/_systemd-analyze too. */ + +VERB_GROUP("Boot Analysis"); +VERB_SCOPE(, verb_time, "time", NULL, VERB_ANY, 1, VERB_DEFAULT, + "Print time required to boot the machine"); +VERB_SCOPE(, verb_blame, "blame", NULL, VERB_ANY, 1, 0, + "Print list of running units ordered by time to init"); +VERB_SCOPE(, verb_critical_chain, "critical-chain", "[UNIT...]", VERB_ANY, VERB_ANY, 0, + "Print a tree of the time critical chain of units"); + +VERB_GROUP("Dependency Analysis"); +VERB_SCOPE(, verb_plot, "plot", NULL, VERB_ANY, 1, 0, + "Output SVG graphic showing service initialization"); +VERB_SCOPE(, verb_dot, "dot", "[UNIT...]", VERB_ANY, VERB_ANY, 0, + "Output dependency graph in dot(1) format"); +VERB_SCOPE(, verb_dump, "dump", "[PATTERN...]", VERB_ANY, VERB_ANY, 0, + "Output state serialization of service manager"); + +VERB_GROUP("Configuration Files and Search Paths"); +VERB_SCOPE(, verb_cat_config, "cat-config", "NAME|PATH...", 2, VERB_ANY, 0, + "Show configuration file and drop-ins"); +VERB_SCOPE(, verb_unit_files, "unit-files", NULL, VERB_ANY, VERB_ANY, 0, + "List files and symlinks for units"); +VERB_SCOPE(, verb_unit_paths, "unit-paths", NULL, 1, 1, 0, + "List load directories for units"); + +VERB_GROUP("Enumerate OS Concepts"); +VERB_SCOPE(, verb_exit_status, "exit-status", "[STATUS...]", VERB_ANY, VERB_ANY, 0, + "List exit status definitions"); +VERB_SCOPE(, verb_capabilities, "capability", "[CAP...]", VERB_ANY, VERB_ANY, 0, + "List capability definitions"); +VERB_SCOPE(, verb_syscall_filters, "syscall-filter", "[NAME...]", VERB_ANY, VERB_ANY, 0, + "List syscalls in seccomp filters"); +VERB_SCOPE(, verb_filesystems, "filesystems", "[NAME...]", VERB_ANY, VERB_ANY, 0, + "List known filesystems"); +VERB_SCOPE(, verb_architectures, "architectures", "[NAME...]", VERB_ANY, VERB_ANY, 0, + "List known architectures"); +VERB_SCOPE(, verb_smbios11, "smbios11", NULL, VERB_ANY, 1, 0, + "List strings passed via SMBIOS Type #11"); +VERB_SCOPE(, verb_chid, "chid", NULL, VERB_ANY, VERB_ANY, 0, + "List local CHIDs"); +VERB(verb_transient_settings, "transient-settings", "TYPE...", 2, VERB_ANY, 0, + "List transient settings for unit TYPE"); + +VERB_GROUP("Expression Evaluation"); +VERB_SCOPE(, verb_condition, "condition", "CONDITION...", VERB_ANY, VERB_ANY, 0, + "Evaluate conditions and asserts"); +VERB_SCOPE(, verb_compare_versions, "compare-versions", "V1 [OP] V2", 3, 4, 0, + "Compare two version strings"); +VERB_SCOPE(, verb_image_policy, "image-policy", "POLICY...", 2, 2, 0, + "Analyze image policy string"); + +VERB_GROUP("Clock & Time"); +VERB_SCOPE(, verb_calendar, "calendar", "SPEC...", 2, VERB_ANY, 0, + "Validate repetitive calendar time events"); +VERB_SCOPE(, verb_timestamp, "timestamp", "TIMESTAMP...", 2, VERB_ANY, 0, + "Validate a timestamp"); +VERB_SCOPE(, verb_timespan, "timespan", "SPAN...", 2, VERB_ANY, 0, + "Validate a time span"); + +VERB_GROUP("Unit & Service Analysis"); +VERB_SCOPE(, verb_verify, "verify", "FILE...", 2, VERB_ANY, 0, + "Check unit files for correctness"); +VERB_SCOPE(, verb_security, "security", "[UNIT...]", VERB_ANY, VERB_ANY, 0, + "Analyze security of unit"); +VERB_SCOPE(, verb_fdstore, "fdstore", "SERVICE...", 2, VERB_ANY, 0, + "Show file descriptor store contents of service"); +VERB_SCOPE(, verb_malloc, "malloc", "[D-BUS SERVICE...]", VERB_ANY, VERB_ANY, 0, + "Dump malloc stats of a D-Bus service"); +VERB_SCOPE(, verb_unit_gdb, "unit-gdb", "SERVICE", 2, VERB_ANY, 0, + "Attach a debugger to the given running service"); +VERB_SCOPE(, verb_unit_shell, "unit-shell", "SERVICE [COMMAND ...]", 2, VERB_ANY, 0, + "Run command on the namespace of the service"); + +VERB_GROUP("Executable Analysis"); +VERB_SCOPE(, verb_elf_inspection, "inspect-elf", "FILE...", 2, VERB_ANY, 0, + "Parse and print ELF package metadata"); +VERB_SCOPE(, verb_dlopen_metadata, "dlopen-metadata", "FILE", 2, 2, 0, + "Parse and print ELF dlopen metadata"); + +VERB_GROUP("TPM Operations"); +VERB_SCOPE(, verb_has_tpm2, "has-tpm2", NULL, VERB_ANY, 1, 0, + "Report whether TPM2 support is available"); +VERB_SCOPE(, verb_identify_tpm2, "identify-tpm2", NULL, VERB_ANY, 1, 0, + "Show TPM2 vendor information"); +VERB_SCOPE(, verb_pcrs, "pcrs", "[PCR...]", VERB_ANY, VERB_ANY, 0, + "Show TPM2 PCRs and their names"); +VERB_SCOPE(, verb_nvpcrs, "nvpcrs", "[NVPCR...]", VERB_ANY, VERB_ANY, 0, + "Show additional TPM2 PCRs stored in NV indexes"); +VERB_SCOPE(, verb_srk, "srk", "[>FILE]", VERB_ANY, 1, 0, + "Write TPM2 SRK (to FILE)"); + +/* The following are deprecated and not shown in --help. */ +VERB_SCOPE(, verb_log_control, "log-level", NULL, VERB_ANY, 2, 0, /* help= */ NULL); +VERB_SCOPE(, verb_log_control, "log-target", NULL, VERB_ANY, 2, 0, /* help= */ NULL); +VERB_SCOPE(, verb_log_control, "set-log-level", NULL, 2, 2, 0, /* help= */ NULL); +VERB_SCOPE(, verb_log_control, "get-log-level", NULL, VERB_ANY, 1, 0, /* help= */ NULL); +VERB_SCOPE(, verb_log_control, "set-log-target", NULL, 2, 2, 0, /* help= */ NULL); +VERB_SCOPE(, verb_log_control, "get-log-target", NULL, VERB_ANY, 1, 0, /* help= */ NULL); +VERB_SCOPE(, verb_service_watchdogs, "service-watchdogs", NULL, VERB_ANY, 2, 0, /* help= */ NULL); + +static int parse_argv(int argc, char *argv[], char ***ret_args) { + int r; assert(argc >= 0); assert(argv); + assert(ret_args); - /* Resetting to 0 forces the invocation of an internal initialization routine of getopt_long() - * that checks for GNU extensions in optstring ('-' or '+; at the beginning). */ - optind = 0; - - for (;;) { - static const char option_string[] = "-hqH:M:U:mA:"; - - c = getopt_long(argc, argv, option_string + reorder, options, NULL); - if (c < 0) - break; + /* For "unit-shell" the switches specified after the service name are part of the commandline + * to execute and are not processed by us. For other verbs, we consume all options as usual. + * To make this work, start with mode==OPTION_PARSER_RETURN_POSITIONAL_ARGS and switch to + * either OPTION_PARSER_STOP_AT_FIRST_NONOPTION or OPTION_PARSER_NORMAL after we've seen + * the verb. */ + OptionParser opts = { argc, argv, OPTION_PARSER_RETURN_POSITIONAL_ARGS }; + const char *verb = NULL; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 1: /* getopt_long() returns 1 if "-" was the first character of the option string, and a - * non-option argument was discovered. */ - - assert(!reorder); - - /* We generally are fine with the fact that getopt_long() reorders the command line, and looks - * for switches after the main verb. However, for "unit-shell" we really don't want that, since we - * want that switches specified after the service name are passed to the program to execute, - * and not processed by us. To make this possible, we'll first invoke getopt_long() with - * reordering disabled (i.e. with the "-" prefix in the option string), looking for the first - * non-option parameter. If it's the verb "unit-shell" we remember its position and continue - * processing options. In this case, as soon as we hit the next non-option argument we found - * the service name, and stop further processing. If the first non-option argument is any other - * verb than "unit-shell" we switch to normal reordering mode and continue processing arguments - * normally. */ - - if (unit_shell >= 0) { - optind--; /* don't process this argument, go one step back */ - goto done; - } - if (streq(optarg, "unit-shell")) - /* Remember the position of the "unit_shell" verb, and continue processing normally. */ - unit_shell = optind - 1; - else { - int saved_optind; - - /* Ok, this is some other verb. In this case, turn on reordering again, and continue - * processing normally. */ - reorder = true; - - /* We changed the option string. getopt_long() only looks at it again if we invoke it - * at least once with a reset option index. Hence, let's reset the option index here, - * then invoke getopt_long() again (ignoring what it has to say, after all we most - * likely already processed it), and the bump the option index so that we read the - * intended argument again. */ - saved_optind = optind; - optind = 0; - (void) getopt_long(argc, argv, option_string + reorder, options, NULL); - optind = saved_optind - 1; /* go one step back, process this argument again */ - } + OPTION_POSITIONAL: + verb = opts.arg; + assert(opts.mode == OPTION_PARSER_RETURN_POSITIONAL_ARGS); + if (streq(verb, "unit-shell")) + opts.mode = OPTION_PARSER_STOP_AT_FIRST_NONOPTION; + else + opts.mode = OPTION_PARSER_NORMAL; break; - case 'h': + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case 'q': + OPTION('q', "quiet", NULL, "Do not emit hints"): arg_quiet = true; break; - case ARG_RECURSIVE_ERRORS: - if (streq(optarg, "help")) + OPTION_LONG("recursive-errors", "MODE", "Control which units are verified"): + if (streq(opts.arg, "help")) return DUMP_STRING_TABLE(recursive_errors, RecursiveErrors, _RECURSIVE_ERRORS_MAX); - r = recursive_errors_from_string(optarg); + r = recursive_errors_from_string(opts.arg); if (r < 0) - return log_error_errno(r, "Unknown mode passed to --recursive-errors='%s'.", optarg); + return log_error_errno(r, "Unknown mode passed to --recursive-errors='%s'.", opts.arg); arg_recursive_errors = r; break; - case ARG_ROOT: - r = parse_path_argument(optarg, /* suppress_root= */ true, &arg_root); + OPTION_LONG("root", "PATH", "Operate on an alternate filesystem root"): + r = parse_path_argument(opts.arg, /* suppress_root= */ true, &arg_root); if (r < 0) return r; break; - case ARG_IMAGE: - r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image); + OPTION_LONG("image", "PATH", "Operate on disk image as filesystem root"): + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &arg_image); if (r < 0) return r; break; - case ARG_IMAGE_POLICY: - r = parse_image_policy_argument(optarg, &arg_image_policy); + OPTION_LONG("image-policy", "POLICY", "Specify disk image dissection policy"): + r = parse_image_policy_argument(opts.arg, &arg_image_policy); if (r < 0) return r; break; - case ARG_SYSTEM: + OPTION_LONG("system", NULL, "Operate on system systemd instance"): arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; break; - case ARG_USER: + OPTION_LONG("user", NULL, "Operate on user systemd instance"): arg_runtime_scope = RUNTIME_SCOPE_USER; break; - case ARG_GLOBAL: + OPTION_LONG("global", NULL, "Operate on global user configuration"): arg_runtime_scope = RUNTIME_SCOPE_GLOBAL; break; - case ARG_ORDER: + OPTION_LONG("order", NULL, "Show only order in the graph"): arg_dot = DEP_ORDER; break; - case ARG_REQUIRE: + OPTION_LONG("require", NULL, "Show only requirement in the graph"): arg_dot = DEP_REQUIRE; break; - case ARG_DOT_FROM_PATTERN: - if (strv_extend(&arg_dot_from_patterns, optarg) < 0) + OPTION_LONG("from-pattern", "GLOB", "Show only origins in the graph"): + if (strv_extend(&arg_dot_from_patterns, opts.arg) < 0) return log_oom(); - break; - case ARG_DOT_TO_PATTERN: - if (strv_extend(&arg_dot_to_patterns, optarg) < 0) + OPTION_LONG("to-pattern", "GLOB", "Show only destinations in the graph"): + if (strv_extend(&arg_dot_to_patterns, opts.arg) < 0) return log_oom(); - break; - case ARG_FUZZ: - r = parse_sec(optarg, &arg_fuzz); + OPTION_LONG("fuzz", "SECONDS", + "Also print services which finished SECONDS earlier than the latest in the branch"): + r = parse_sec(opts.arg, &arg_fuzz); if (r < 0) return r; break; - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case 'H': + OPTION_COMMON_HOST: arg_transport = BUS_TRANSPORT_REMOTE; - arg_host = optarg; + arg_host = opts.arg; break; - case 'M': - r = parse_machine_argument(optarg, &arg_host, &arg_transport); + OPTION_COMMON_MACHINE: + r = parse_machine_argument(opts.arg, &arg_host, &arg_transport); if (r < 0) return r; break; - case ARG_MAN: - r = parse_boolean_argument("--man", optarg, &arg_man); + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "man", "BOOL", "Whether to check for existence of man pages"): + r = parse_boolean_argument("--man", opts.arg, &arg_man); if (r < 0) return r; break; - case ARG_GENERATORS: - r = parse_boolean_argument("--generators", optarg, &arg_generators); + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "generators", "BOOL", + "Whether to run unit generators (which requires privileges)"): + r = parse_boolean_argument("--generators", opts.arg, &arg_generators); if (r < 0) return r; break; - case ARG_INSTANCE: - arg_instance = optarg; + OPTION_LONG("instance", "NAME", "Specify fallback instance name for template units"): + arg_instance = opts.arg; break; - case ARG_OFFLINE: - r = parse_boolean_argument("--offline", optarg, &arg_offline); + OPTION_LONG("offline", "BOOL", "Perform a security review on unit files"): + r = parse_boolean_argument("--offline", opts.arg, &arg_offline); if (r < 0) return r; break; - case ARG_THRESHOLD: - r = safe_atou(optarg, &arg_threshold); + OPTION_LONG("threshold", "N", + "Exit with a non-zero status when overall exposure level is over threshold value"): + r = safe_atou(opts.arg, &arg_threshold); if (r < 0 || arg_threshold > 100) - return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Failed to parse threshold: %s", optarg); - + return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), + "Failed to parse threshold: %s", opts.arg); break; - case ARG_SECURITY_POLICY: - r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_security_policy); + OPTION_LONG("security-policy", "PATH", + "Use custom JSON security policy instead of built-in one"): + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &arg_security_policy); if (r < 0) return r; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; break; - case ARG_ITERATIONS: - r = safe_atou(optarg, &arg_iterations); + OPTION_LONG("iterations", "N", "Show the specified number of iterations"): + r = safe_atou(opts.arg, &arg_iterations); if (r < 0) - return log_error_errno(r, "Failed to parse iterations: %s", optarg); + return log_error_errno(r, "Failed to parse iterations: %s", opts.arg); break; - case ARG_BASE_TIME: - r = parse_timestamp(optarg, &arg_base_time); + OPTION_LONG("base-time", "TIMESTAMP", + "Calculate calendar times relative to specified time"): + r = parse_timestamp(opts.arg, &arg_base_time); if (r < 0) - return log_error_errno(r, "Failed to parse --base-time= parameter: %s", optarg); + return log_error_errno(r, "Failed to parse --base-time= parameter: %s", opts.arg); break; - case ARG_PROFILE: - if (isempty(optarg)) + OPTION_LONG("profile", "name|PATH", + "Include the specified profile in the security review of the units"): + if (isempty(opts.arg)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Profile file name is empty"); - if (is_path(optarg)) { - r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_profile); + if (is_path(opts.arg)) { + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &arg_profile); if (r < 0) return r; if (!endswith(arg_profile, ".conf")) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Profile file name must end with .conf: %s", arg_profile); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Profile file name must end with .conf: %s", arg_profile); } else { - r = free_and_strdup(&arg_profile, optarg); + r = free_and_strdup(&arg_profile, opts.arg); if (r < 0) return log_oom(); } - break; - case 'U': { + OPTION('U', "unit", "UNIT", "Evaluate conditions and asserts of unit"): { _cleanup_free_ char *mangled = NULL; - r = unit_name_mangle(optarg, UNIT_NAME_MANGLE_WARN, &mangled); + r = unit_name_mangle(opts.arg, UNIT_NAME_MANGLE_WARN, &mangled); if (r < 0) - return log_error_errno(r, "Failed to mangle unit name %s: %m", optarg); + return log_error_errno(r, "Failed to mangle unit name %s: %m", opts.arg); free_and_replace(arg_unit, mangled); break; } - case ARG_TABLE: + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "table", NULL, + "Output plot's raw time data as a table"): arg_table = true; break; - case ARG_NO_LEGEND: + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "no-legend", NULL, + "Disable column headers and hints in plot with either --table or --json="): arg_legend = false; break; - case ARG_TLDR: + OPTION_LONG("tldr", NULL, "Skip comments and empty lines"): arg_cat_flags = CAT_TLDR; break; - case 'm': + OPTION('m', "mask", NULL, "Parse parameter as numeric capability mask"): arg_capability = CAPABILITY_MASK; break; - case ARG_SCALE_FACTOR_SVG: - arg_svg_timescale = strtod(optarg, NULL); + OPTION_LONG("scale-svg", "FACTOR", "Stretch x-axis of plot by FACTOR (default: 1.0)"): + arg_svg_timescale = strtod(opts.arg, NULL); break; - case ARG_DETAILED_SVG: + OPTION_LONG("detailed", NULL, + "Add more details to SVG plot, e.g. show activation timestamps"): arg_detailed_svg = true; break; - case ARG_DRM_DEVICE_PATH: - r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_drm_device_path); + OPTION_LONG("drm-device", "PATH", "Use this DRM device sysfs path to get EDID"): + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &arg_drm_device_path); if (r < 0) return r; break; - case ARG_DEBUGGER: - r = free_and_strdup_warn(&arg_debugger, optarg); + OPTION_LONG("debugger", "DEBUGGER", "Use the given debugger"): + r = free_and_strdup_warn(&arg_debugger, opts.arg); if (r < 0) return r; break; - case 'A': { + OPTION('A', "debugger-arguments", "ARGS", "Pass the given arguments to the debugger"): { _cleanup_strv_free_ char **l = NULL; - r = strv_split_full(&l, optarg, WHITESPACE, EXTRACT_UNQUOTE); + r = strv_split_full(&l, opts.arg, WHITESPACE, EXTRACT_UNQUOTE); if (r < 0) - return log_error_errno(r, "Failed to parse debugger arguments '%s': %m", optarg); + return log_error_errno(r, "Failed to parse debugger arguments '%s': %m", opts.arg); strv_free_and_replace(arg_debugger_args, l); break; } - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - } - -done: - if (unit_shell >= 0) { - char *t; - - /* We found the "unit-shell" verb while processing the argument list. Since we turned off reordering of the - * argument list initially let's readjust it now, and move the "unit-shell" verb to the back. */ - - optind -= 1; /* place the option index where the "unit-shell" verb will be placed */ - t = argv[unit_shell]; - for (int i = unit_shell; i < optind; i++) - argv[i] = argv[i+1]; - argv[optind] = t; - } + _cleanup_strv_free_ char **args = strv_copy(option_parser_get_args(&opts)); /* args is [arg1, arg2, …] */ + if (!args || strv_prepend(&args, verb) < 0) /* args is now [arg0, arg1, arg2, …] */ + return log_oom(); - if (arg_offline && !streq_ptr(argv[optind], "security")) + if (arg_offline && !streq_ptr(verb, "security")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --offline= is only supported for security right now."); - if (arg_offline && optind >= argc - 1) + if (arg_offline && strv_length(args) < 2) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --offline= requires one or more units to perform a security review."); - if (arg_json_format_flags != SD_JSON_FORMAT_OFF && !STRPTR_IN_SET(argv[optind], "security", "inspect-elf", "dlopen-metadata", "plot", "fdstore", "pcrs", "nvpcrs", "architectures", "capability", "exit-status")) + if (arg_json_format_flags != SD_JSON_FORMAT_OFF && + !STRPTR_IN_SET(verb, "security", "inspect-elf", "dlopen-metadata", "plot", "fdstore", "pcrs", "nvpcrs", "architectures", "capability", "exit-status")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --json= is only supported for security, inspect-elf, dlopen-metadata, plot, fdstore, pcrs, nvpcrs, architectures, capability, exit-status right now."); - if (arg_threshold != 100 && !streq_ptr(argv[optind], "security")) + if (arg_threshold != 100 && !streq_ptr(verb, "security")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --threshold= is only supported for security right now."); - if (arg_runtime_scope == RUNTIME_SCOPE_GLOBAL && !streq_ptr(argv[optind], "unit-paths")) + if (arg_runtime_scope == RUNTIME_SCOPE_GLOBAL && !streq_ptr(verb, "unit-paths")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --global only makes sense with verb unit-paths."); - if (streq_ptr(argv[optind], "cat-config") && arg_runtime_scope == RUNTIME_SCOPE_USER) + if (streq_ptr(verb, "cat-config") && arg_runtime_scope == RUNTIME_SCOPE_USER) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --user is not supported for cat-config right now."); - if (arg_security_policy && !streq_ptr(argv[optind], "security")) + if (arg_security_policy && !streq_ptr(verb, "security")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --security-policy= is only supported for security."); - if ((arg_root || arg_image) && (!STRPTR_IN_SET(argv[optind], "cat-config", "verify", "condition", "inspect-elf", "unit-gdb")) && - (!(streq_ptr(argv[optind], "security") && arg_offline))) + if ((arg_root || arg_image) && + !STRPTR_IN_SET(verb, "cat-config", "verify", "condition", "inspect-elf", "unit-gdb") && + (!(streq_ptr(verb, "security") && arg_offline))) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Options --root= and --image= are only supported for cat-config, verify, condition, unit-gdb, and security when used with --offline= right now."); @@ -750,84 +654,35 @@ static int parse_argv(int argc, char *argv[]) { if (arg_root && arg_image) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root= or --image=, the combination of both is not supported."); - if (arg_unit && !streq_ptr(argv[optind], "condition")) + if (arg_unit && !streq_ptr(verb, "condition")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --unit= is only supported for condition"); - if (streq_ptr(argv[optind], "condition") && !arg_unit && optind >= argc - 1) + if (streq_ptr(verb, "condition") && !arg_unit && strv_length(args) < 2) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too few arguments for condition"); - if (streq_ptr(argv[optind], "condition") && arg_unit && optind < argc - 1) + if (streq_ptr(verb, "condition") && arg_unit && strv_length(args) > 1) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No conditions can be passed if --unit= is used."); - if (arg_table && !streq_ptr(argv[optind], "plot")) + if (arg_table && !streq_ptr(verb, "plot")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --table is only supported for plot right now."); if (arg_table && sd_json_format_enabled(arg_json_format_flags)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--table and --json= are mutually exclusive."); - if (arg_capability != CAPABILITY_LITERAL && !streq_ptr(argv[optind], "capability")) + if (arg_capability != CAPABILITY_LITERAL && !streq_ptr(verb, "capability")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --mask is only supported for capability."); - if (arg_drm_device_path && !streq_ptr(argv[optind], "chid")) + if (arg_drm_device_path && !streq_ptr(verb, "chid")) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --drm-device is only supported for chid right now."); + *ret_args = TAKE_PTR(args); return 1; /* work to do */ } static int run(int argc, char *argv[]) { _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; _cleanup_(umount_and_freep) char *mounted_dir = NULL; - - static const Verb verbs[] = { - { "help", VERB_ANY, VERB_ANY, 0, verb_help }, - { "time", VERB_ANY, 1, VERB_DEFAULT, verb_time }, - { "blame", VERB_ANY, 1, 0, verb_blame }, - { "critical-chain", VERB_ANY, VERB_ANY, 0, verb_critical_chain }, - { "plot", VERB_ANY, 1, 0, verb_plot }, - { "dot", VERB_ANY, VERB_ANY, 0, verb_dot }, - /* ↓ The following seven verbs are deprecated, from here … ↓ */ - { "log-level", VERB_ANY, 2, 0, verb_log_control }, - { "log-target", VERB_ANY, 2, 0, verb_log_control }, - { "set-log-level", 2, 2, 0, verb_log_control }, - { "get-log-level", VERB_ANY, 1, 0, verb_log_control }, - { "set-log-target", 2, 2, 0, verb_log_control }, - { "get-log-target", VERB_ANY, 1, 0, verb_log_control }, - { "service-watchdogs", VERB_ANY, 2, 0, verb_service_watchdogs }, - /* ↑ … until here ↑ */ - { "dump", VERB_ANY, VERB_ANY, 0, verb_dump }, - { "cat-config", 2, VERB_ANY, 0, verb_cat_config }, - { "unit-files", VERB_ANY, VERB_ANY, 0, verb_unit_files }, - { "unit-gdb", 2, VERB_ANY, 0, verb_unit_gdb }, - { "unit-paths", 1, 1, 0, verb_unit_paths }, - { "unit-shell", 2, VERB_ANY, 0, verb_unit_shell }, - { "exit-status", VERB_ANY, VERB_ANY, 0, verb_exit_status }, - { "syscall-filter", VERB_ANY, VERB_ANY, 0, verb_syscall_filters }, - { "capability", VERB_ANY, VERB_ANY, 0, verb_capabilities }, - { "filesystems", VERB_ANY, VERB_ANY, 0, verb_filesystems }, - { "condition", VERB_ANY, VERB_ANY, 0, verb_condition }, - { "compare-versions", 3, 4, 0, verb_compare_versions }, - { "verify", 2, VERB_ANY, 0, verb_verify }, - { "calendar", 2, VERB_ANY, 0, verb_calendar }, - { "timestamp", 2, VERB_ANY, 0, verb_timestamp }, - { "timespan", 2, VERB_ANY, 0, verb_timespan }, - { "security", VERB_ANY, VERB_ANY, 0, verb_security }, - { "inspect-elf", 2, VERB_ANY, 0, verb_elf_inspection }, - { "dlopen-metadata", 2, 2, 0, verb_dlopen_metadata }, - { "malloc", VERB_ANY, VERB_ANY, 0, verb_malloc }, - { "fdstore", 2, VERB_ANY, 0, verb_fdstore }, - { "image-policy", 2, 2, 0, verb_image_policy }, - { "has-tpm2", VERB_ANY, 1, 0, verb_has_tpm2 }, - { "identify-tpm2", VERB_ANY, 1, 0, verb_identify_tpm2 }, - { "pcrs", VERB_ANY, VERB_ANY, 0, verb_pcrs }, - { "nvpcrs", VERB_ANY, VERB_ANY, 0, verb_nvpcrs }, - { "srk", VERB_ANY, 1, 0, verb_srk }, - { "architectures", VERB_ANY, VERB_ANY, 0, verb_architectures }, - { "smbios11", VERB_ANY, 1, 0, verb_smbios11 }, - { "chid", VERB_ANY, VERB_ANY, 0, verb_chid }, - { "transient-settings", 2, VERB_ANY, 0, verb_transient_settings }, - {} - }; - + _cleanup_strv_free_ char **args = NULL; int r; setlocale(LC_ALL, ""); @@ -835,7 +690,7 @@ static int run(int argc, char *argv[]) { log_setup(); - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -861,7 +716,7 @@ static int run(int argc, char *argv[]) { return log_oom(); } - return dispatch_verb(argc, argv, verbs, NULL); + return dispatch_verb_with_args(args, NULL); } DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); From 71ebb276861e28b63c9a1b86bc8fee4ca7de5500 Mon Sep 17 00:00:00 2001 From: Simon Lucido Date: Mon, 4 May 2026 11:40:41 +0200 Subject: [PATCH 110/242] core/varlink-metrics: expose ReloadCount as a metric Add ReloadCount to the io.systemd.Metrics family table so it can be queried alongside other manager-level metrics via systemd-report. Also extend the existing integration test to cross-check the value returned by systemd-report against the D-Bus and Varlink transports on every assertion. Co-developed-by: Claude Opus 4.7 Signed-off-by: Simon Lucido --- src/core/varlink-metrics.c | 18 ++++++++++++++ test/units/TEST-07-PID1.reload-count.sh | 33 +++++++++++++++++++++---- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/core/varlink-metrics.c b/src/core/varlink-metrics.c index 82bc3cf4cba15..f1ac0791bc914 100644 --- a/src/core/varlink-metrics.c +++ b/src/core/varlink-metrics.c @@ -189,6 +189,18 @@ static int nrestarts_build_json(MetricFamilyContext *context, void *userdata) { return 0; } +static int reload_count_build_json(MetricFamilyContext *context, void *userdata) { + Manager *manager = ASSERT_PTR(userdata); + + assert(context); + + return metric_build_send_unsigned( + context, + /* object= */ NULL, + manager->reload_count, + /* fields= */ NULL); +} + static int units_by_type_total_build_json(MetricFamilyContext *context, void *userdata) { Manager *manager = ASSERT_PTR(userdata); int r; @@ -364,6 +376,12 @@ static const MetricFamily metric_family_table[] = { .type = METRIC_FAMILY_TYPE_COUNTER, .generate = nrestarts_build_json, }, + { + .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "ReloadCount", + .description = "Number of successful manager reloads since startup; resets across daemon-reexec", + .type = METRIC_FAMILY_TYPE_COUNTER, + .generate = reload_count_build_json, + }, { .name = METRIC_IO_SYSTEMD_MANAGER_PREFIX "StateChangeTimestamp", .description = "Per unit metric: timestamp of the last state change in microseconds; 0 indicates no state change has occurred", diff --git a/test/units/TEST-07-PID1.reload-count.sh b/test/units/TEST-07-PID1.reload-count.sh index 7c31b65c5fc75..a41a4e467d233 100755 --- a/test/units/TEST-07-PID1.reload-count.sh +++ b/test/units/TEST-07-PID1.reload-count.sh @@ -5,7 +5,13 @@ set -o pipefail # Verify that the manager exposes a ReloadCount property that increments on # every daemon-reload, resets to zero across daemon-reexec (since the count -# is not serialized), and is reachable over both D-Bus and Varlink. +# is not serialized), and is reachable over D-Bus, Varlink Describe, and the +# io.systemd.Metrics interface (queried via systemd-report). + +# systemd-report silently returns empty if the metrics source is missing, +# which would falsely pass the cross-checks below. Assert the socket exists +# so any failure points at the real problem. +test -S /run/systemd/report/io.systemd.Manager read_count_dbus() { busctl -j get-property org.freedesktop.systemd1 \ @@ -19,10 +25,23 @@ read_count_varlink() { io.systemd.Manager.Describe '{}' | jq -r '.runtime.ReloadCount' } -# Sanity: both transports must agree. +read_count_report() { + local out + # Strip the RS separator that jq --seq re-emits on output. + out=$(/usr/lib/systemd/systemd-report metrics --json=short \ + io.systemd.Manager.ReloadCount \ + | jq --seq -r 'select(.name == "io.systemd.Manager.ReloadCount") | .value' \ + | tr -d '\036') + [[ -n "$out" ]] || { echo "ReloadCount metric missing from systemd-report output" >&2; return 1; } + echo "$out" +} + +# Sanity: all three transports must agree. dbus_count=$(read_count_dbus) varlink_count=$(read_count_varlink) +report_count=$(read_count_report) (( dbus_count == varlink_count )) +(( dbus_count == report_count )) # A single reload bumps the counter by one. before=$(read_count_dbus) @@ -34,18 +53,22 @@ systemctl daemon-reload systemctl daemon-reload (( $(read_count_dbus) == before + 3 )) -# And both transports still agree after the reload. +# And all three transports still agree after the reload. dbus_count=$(read_count_dbus) varlink_count=$(read_count_varlink) +report_count=$(read_count_report) (( dbus_count == varlink_count )) +(( dbus_count == report_count )) -# A daemon-reexec resets the counter back to zero on both transports, since -# reload_count lives only in memory and is not carried across the reexec. +# A daemon-reexec resets the counter back to zero on all three transports, +# since the counter lives only in memory and is not carried across the reexec. # `systemctl daemon-reexec` returns as soon as the old PID 1 closes its bus # connection, which is before the new PID 1 has rebound /run/systemd/private. # Use --watch-bind=yes to block on inotify until the new socket is live. systemctl daemon-reexec busctl --watch-bind=yes call org.freedesktop.systemd1 /org/freedesktop/systemd1 \ org.freedesktop.DBus.Peer Ping >/dev/null + (( $(read_count_dbus) == 0 )) (( $(read_count_varlink) == 0 )) +(( $(read_count_report) == 0 )) From 45725d6434d552330adfa84d7cf314e1716990d3 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 5 May 2026 09:52:29 +0100 Subject: [PATCH 111/242] test: skip TEST-70-TPM2.nvpcr check if pcrextend socket inactive systemd-dissect --mtree calls io.systemd.PCRExtend over Varlink to extend the verity NvPCR after activation, and the test then diffs the measure log to find the new entry. But systemd-pcrextend.socket has ConditionSecurity=measured-os, which fails when the firmware did not initialize PCRs, so the test fails. [ 10.056930] systemd[1]: systemd-pcrextend.socket - TPM PCR Measurements skipped, unmet condition check ConditionSecurity=measured-os Follow-up for 521a523ce0cdcf0d529bd566f3d64ae93f10419d --- test/units/TEST-70-TPM2.nvpcr.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/units/TEST-70-TPM2.nvpcr.sh b/test/units/TEST-70-TPM2.nvpcr.sh index 571b3eea770b3..05ae378d849e5 100755 --- a/test/units/TEST-70-TPM2.nvpcr.sh +++ b/test/units/TEST-70-TPM2.nvpcr.sh @@ -54,6 +54,18 @@ DIGEST_MEASURED2="$(echo -n "schnurz" | openssl dgst -sha256 -hex -r | cut -d' ' DIGEST_EXPECTED2="$(echo "$DIGEST_EXPECTED$DIGEST_MEASURED2" | tr '[:lower:]' '[:upper:]' | basenc --base16 -d | openssl dgst -sha256 -hex -r | cut -d' ' -f1)" test "$DIGEST_ACTUAL2" = "$DIGEST_EXPECTED2" +systemd-analyze identify-tpm2 +udevadm test-builtin 'tpm2_id identify' /dev/tpmrm0 + +# systemd-dissect calls io.systemd.PCRExtend over Varlink to extend the verity NvPCR after activation, +# but systemd-pcrextend.socket has ConditionSecurity=measured-os which fails when the firmware did not +# initialize PCRs (e.g. when not booting via a signed UKI). Skip the rest in that case, otherwise the +# 'diff | grep' below would find no new measurement and fail. +if ! systemctl is-active --quiet systemd-pcrextend.socket; then + echo "systemd-pcrextend.socket not active, skipping verity NvPCR measurement check" + exit 0 +fi + mkdir -p /tmp/nvpcr/tree touch /tmp/nvpcr/tree/file @@ -103,6 +115,3 @@ systemd-dissect --image-policy='root=signed:=absent+unused' --mtree /var/tmp/nvp set +o pipefail diff /tmp/nvpcr/log-before /run/log/systemd/tpm2-measure.log | grep -F '"content":{"nvIndexName":"verity","string":"verity:' - -systemd-analyze identify-tpm2 -udevadm test-builtin 'tpm2_id identify' /dev/tpmrm0 From 090f9b5a9587e98243cfe8b23df7694d277ff7c5 Mon Sep 17 00:00:00 2001 From: Nandakumar Raghavan Date: Mon, 4 May 2026 09:31:59 +0000 Subject: [PATCH 112/242] systemd-dissect: do not fail dissection on LUKS v1 partitions partition_is_luks2_integrity() was returning -EINVAL when it encountered a non-LUKS2 header (e.g. LUKS v1), which caused the caller to abort the entire disk dissection. A LUKS v1 partition simply isn't LUKS2-with-integrity, so return 0 instead and let dissection continue normally. --- src/shared/dissect-image.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 662739ea0fe9c..e69c644f58eeb 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -430,11 +430,15 @@ static int partition_is_luks2_integrity(int part_fd, uint64_t offset, uint64_t s if (sz != sizeof(header)) return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read LUKS header."); - if (memcmp(header.luks_magic, LUKS2_MAGIC, sizeof(header.luks_magic)) != 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Partition's magic is not LUKS."); + if (memcmp(header.luks_magic, LUKS2_MAGIC, sizeof(header.luks_magic)) != 0) { + log_debug("Partition does not have a LUKS magic header, assuming no integrity."); + return 0; + } - if (be16toh(header.version) != 2) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unsupported LUKS header version: %" PRIu16 ".", be16toh(header.version)); + if (be16toh(header.version) != 2) { + log_debug("Partition is LUKS v%" PRIu16 ", not LUKS2, assuming no integrity.", be16toh(header.version)); + return 0; + } if (be64toh(header.hdr_len) > size) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS header length exceeds partition size."); From 1041160cabddd0e8deefee370b5f5c9d4040cd58 Mon Sep 17 00:00:00 2001 From: Max Chernoff Date: Fri, 1 May 2026 02:15:07 -0600 Subject: [PATCH 113/242] github: rename "systemd-import" to "importctl" The user-facing components are the "systemd-importd.service" unit and the "importctl" binary, so using these names makes more sense. There _is_ a "systemd-import" binary, but it's in "/usr/lib/systemd/", so this is a confusing name for a user-facing form. --- .github/ISSUE_TEMPLATE/bug_report.yml | 3 ++- .github/ISSUE_TEMPLATE/feature_request.yml | 3 ++- .github/advanced-issue-labeler.yml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 313234f82a791..a0547b8b52ae1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -82,6 +82,7 @@ body: - 'homectl' - 'hostnamectl' - 'hardware database files' + - 'importctl' - 'journalctl' - 'kernel-install' - 'loginctl' @@ -112,7 +113,7 @@ body: - 'systemd-homed' - 'systemd-hostnamed' - 'systemd-hwdb' - - 'systemd-import' + - 'systemd-importd' - 'systemd-journal-gatewayd' - 'systemd-journal-remote' - 'systemd-journal-upload' diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index f6f8ac0f75210..5e5996d7574ed 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -21,6 +21,7 @@ body: - 'homectl' - 'hostnamectl' - 'hardware database files' + - 'importctl' - 'journalctl' - 'kernel-install' - 'loginctl' @@ -51,7 +52,7 @@ body: - 'systemd-homed' - 'systemd-hostnamed' - 'systemd-hwdb' - - 'systemd-import' + - 'systemd-importd' - 'systemd-journal-gatewayd' - 'systemd-journal-remote' - 'systemd-journal-upload' diff --git a/.github/advanced-issue-labeler.yml b/.github/advanced-issue-labeler.yml index 4e19392598cb5..b06cc20292f6f 100644 --- a/.github/advanced-issue-labeler.yml +++ b/.github/advanced-issue-labeler.yml @@ -53,7 +53,7 @@ policy: keys: ['systemd-hwdb', 'hardware database files'] - name: import - keys: ['systemd-import'] + keys: ['systemd-importd', 'importctl'] - name: journal keys: ['systemd-journald', 'journalctl'] From e8e3bd1dc3d79af1c391039a21d640a2aa2e2d0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 20:31:14 +0200 Subject: [PATCH 114/242] socket-activate: fix comment "optargs" is not a thing. --- src/socket-activate/socket-activate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/socket-activate/socket-activate.c b/src/socket-activate/socket-activate.c index 768a2a3ea7235..be0179ccc9eeb 100644 --- a/src/socket-activate/socket-activate.c +++ b/src/socket-activate/socket-activate.c @@ -422,7 +422,7 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { log_warning("File descriptor name \"%s\" is not valid.", esc); } - /* Empty optargs means one empty name */ + /* Empty argument means one empty name */ r = strv_extend_strv(&arg_fdnames, strv_isempty(names) ? STRV_MAKE("") : names, false); From e235b6a280f14f303aceaa1d5d7f3f1d1e1170bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 20:36:27 +0200 Subject: [PATCH 115/242] report-basic-server: use accessor function This is the API for "external" users. --- src/report/report-basic-server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/report/report-basic-server.c b/src/report/report-basic-server.c index bca943fd7faee..bcab79d8f4c25 100644 --- a/src/report/report-basic-server.c +++ b/src/report/report-basic-server.c @@ -68,7 +68,7 @@ static int parse_argv(int argc, char *argv[]) { return version(); } - if (opts.optind < argc) + if (option_parser_get_n_args(&opts) > 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments."); From e71074f761a9a9ab984f96f530fce20eb8fbc171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 23:22:45 +0200 Subject: [PATCH 116/242] various: convert "services" to option macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here we have the unusual situation that the option list is conditionalized. I thought about embedding some "tag" information in individual options to allow the options to be filtered by some arbitrary conditions. But it seems that using groups works quite well. It wouldn't scale well if there was a lot more options and conditions, but for the current set it's good enough. For options that are not supported in a given service, we print a custom message ("This service does not support [this] option"), instead of the generic "Unknown option …". I think this is actually better: we don't have to pretent that we don't know about the option, and can directly say that the it's a valid option in general but this service does not support it (yet). This converts systemd-homed, systemd-hostnamed, systemd-importd, systemd-localed, systemd-logind, systemd-machined, systemd-networkd, systemd-portabled, systemd-resolved, systemd-sysupdated, systemd-timedated, and systemd-timesyncd. When we add introspection of the option data, we'll somehow have to deal with conditionalization. But let's cross that bridge when we need to. --- src/shared/service-util.c | 142 +++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 78 deletions(-) diff --git a/src/shared/service-util.c b/src/shared/service-util.c index 70d59af6c51d1..7c1df8e73adad 100644 --- a/src/shared/service-util.c +++ b/src/shared/service-util.c @@ -1,53 +1,53 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include -#include "alloc-util.h" #include "build.h" #include "bus-object.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" -#include "pretty-print.h" +#include "options.h" #include "runtime-scope.h" #include "service-util.h" -typedef enum HelpFlags { - HELP_WITH_BUS_INTROSPECT = 1 << 0, - HELP_WITH_RUNTIME_SCOPE = 1 << 1, -} HelpFlags; - -static int help(const char *program_path, - const char *service, +static int help(const char *service, const char *description, - HelpFlags flags) { + bool with_bus_introspect, + bool with_runtime_scope) { - _cleanup_free_ char *link = NULL; + static const char* const groups[] = { + NULL, + "Bus introspection", + "Runtime scope", + }; + + bool conds[ELEMENTSOF(groups)] = { true, with_bus_introspect, with_runtime_scope }; + Table* tables[ELEMENTSOF(groups)] = {}; + CLEANUP_ELEMENTS(tables, table_unref_array_clear); int r; - r = terminal_urlify_man(service, "8", &link); - if (r < 0) - return log_oom(); - - printf("%1$s [OPTIONS...]\n" - "\n%5$s%7$s%6$s\n" - "\nThis program takes no positional arguments.\n" - "\n%3$sOptions:%4$s\n" - " -h --help Show this help\n" - " --version Show package version\n" - "%8$s" - "%9$s" - "\nSee the %2$s for details.\n", - program_path, - link, - ansi_underline(), - ansi_normal(), - ansi_highlight(), - ansi_normal(), - description, - FLAGS_SET(flags, HELP_WITH_BUS_INTROSPECT) ? " --bus-introspect=PATH Write D-Bus XML introspection data\n" : "", - FLAGS_SET(flags, HELP_WITH_RUNTIME_SCOPE) ? " --system Start service in system mode\n" - " --user Start service in user mode\n" : ""); + for (size_t i = 0; i < ELEMENTSOF(groups); i++) + if (conds[i]) { + r = option_parser_get_help_table_group(groups[i], &tables[i]); + if (r < 0) + return r; + } + + (void) table_sync_column_widths(0, tables[0], tables[1] ?: tables[2], tables[1] ? tables[2] : NULL); + + help_cmdline("[OPTIONS...]"); + help_abstract(description); + help_section("Options"); + for (size_t i = 0; i < ELEMENTSOF(groups); i++) + if (conds[i]) { + r = table_print_or_warn(tables[i]); + if (r < 0) + return r; + } + + help_man_page_reference(service, "8"); return 0; /* No further action */ } @@ -58,64 +58,50 @@ int service_parse_argv( RuntimeScope *runtime_scope, int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_BUS_INTROSPECT, - ARG_SYSTEM, - ARG_USER, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT }, - { "system", no_argument, NULL, ARG_SYSTEM }, - { "user", no_argument, NULL, ARG_USER }, - {} - }; - - int c; - assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) + OptionParser opts = { argc, argv }; + + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': - return help(argv[0], - service, + OPTION_COMMON_HELP: + return help(service, description, - (bus_objects ? HELP_WITH_BUS_INTROSPECT : 0) | - (runtime_scope ? HELP_WITH_RUNTIME_SCOPE : 0)); + /* with_bus_introspect= */ bus_objects, + /* with_runtime_scope= */ runtime_scope); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case ARG_BUS_INTROSPECT: - return bus_introspect_implementations( - stdout, - optarg, - bus_objects); + OPTION_GROUP("Bus introspection"): {} - case ARG_SYSTEM: - case ARG_USER: - if (!runtime_scope) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This service cannot be run in --system or --user mode, refusing."); + OPTION_LONG("bus-introspect", "PATH", "Write D-Bus XML introspection data"): + /* The option is defined in the shared option table, but it's not supported in this binary, + * so we pretend it doesn't exist. */ + if (!bus_objects) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "This service does not support the --bus-introspect= option."); - *runtime_scope = c == ARG_SYSTEM ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER; - break; + return bus_introspect_implementations(stdout, opts.arg, bus_objects); - case '?': - return -EINVAL; + OPTION_GROUP("Runtime scope"): {} - default: - assert_not_reached(); + OPTION_LONG_DATA("system", NULL, /* data= */ RUNTIME_SCOPE_SYSTEM, + "Start service in system mode"): {} + OPTION_LONG_DATA("user", NULL, /* data= */ RUNTIME_SCOPE_USER, + "Start service in user mode"): + if (!runtime_scope) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "This service does not support the --system/--user options."); + + *runtime_scope = opts.opt->data; + break; } - if (optind < argc) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "This program takes no arguments."); + if (option_parser_get_n_args(&opts) > 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments."); return 1; /* Further action */ } From 9cfad502f4aa103ef0d2191cbb6b82fecfbc5044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 00:34:19 +0200 Subject: [PATCH 117/242] sysext: move stuff around The verb implementation functions are reordered to match the listing in --help. The option are reorded a bit to have the "important" options that determine behaviour first, and various display options and tweaks later. The cases in parse_argv are ordered in the same way. No functional change. --- src/sysext/sysext.c | 831 ++++++++++++++++++++++---------------------- 1 file changed, 415 insertions(+), 416 deletions(-) diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c index 5f97f43ffb4c2..e4d9e7e0c355d 100644 --- a/src/sysext/sysext.c +++ b/src/sysext/sysext.c @@ -424,353 +424,6 @@ static int daemon_reload(void) { return bus_service_manager_reload(bus); } -static int unmerge_hierarchy(ImageClass image_class, const char *p, const char *submounts_path) { - - _cleanup_free_ char *dot_dir = NULL, *work_dir_info_file = NULL; - int n_unmerged = 0; - int r; - - assert(p); - - dot_dir = path_join(p, image_class_info[image_class].dot_directory_name); - if (!dot_dir) - return log_oom(); - - work_dir_info_file = path_join(dot_dir, "work_dir"); - if (!work_dir_info_file) - return log_oom(); - - for (;;) { - _cleanup_free_ char *escaped_work_dir_in_root = NULL, *work_dir = NULL; - - /* We only unmount /usr/ if it is a mount point and really one of ours, in order not to break - * systems where /usr/ is a mount point of its own already. */ - - r = is_our_mount_point(image_class, p); - if (r < 0) - return r; - if (r == 0) - break; - - r = read_one_line_file(work_dir_info_file, &escaped_work_dir_in_root); - if (r < 0) { - if (r != -ENOENT) - return log_error_errno(r, "Failed to read '%s': %m", work_dir_info_file); - } else { - _cleanup_free_ char *work_dir_in_root = NULL; - ssize_t l; - - l = cunescape_length(escaped_work_dir_in_root, r, 0, &work_dir_in_root); - if (l < 0) - return log_error_errno(l, "Failed to unescape work directory path: %m"); - work_dir = path_join(arg_root, work_dir_in_root); - if (!work_dir) - return log_oom(); - } - - r = umount_verbose(LOG_DEBUG, dot_dir, MNT_DETACH|UMOUNT_NOFOLLOW); - if (r < 0) { - /* EINVAL is possibly "not a mount point". Let it slide as it's expected to occur if - * the whole hierarchy was read-only, so the dot directory inside it was not - * bind-mounted as read-only. */ - if (r != -EINVAL) - return log_error_errno(r, "Failed to unmount '%s': %m", dot_dir); - } - - /* After we've unmounted the metadata directory, save all other submounts so we can restore - * them after unmerging the hierarchy. */ - r = move_submounts(p, submounts_path); - if (r < 0) - return r; - - r = umount_verbose(LOG_ERR, p, MNT_DETACH|UMOUNT_NOFOLLOW); - if (r < 0) - return r; - - if (work_dir) { - r = rm_rf(work_dir, REMOVE_ROOT | REMOVE_MISSING_OK | REMOVE_PHYSICAL); - if (r < 0) - return log_error_errno(r, "Failed to remove '%s': %m", work_dir); - } - - log_info("Unmerged '%s'.", p); - n_unmerged++; - } - - return n_unmerged; -} - -static int unmerge_subprocess( - ImageClass image_class, - char **hierarchies, - const char *workspace) { - - int r, ret = 0; - - assert(workspace); - assert(path_startswith(workspace, "/run/")); - - /* Mark the whole of /run as MS_SLAVE, so that we can mount stuff below it that doesn't show up on - * the host otherwise. */ - r = mount_nofollow_verbose(LOG_ERR, NULL, "/run", NULL, MS_SLAVE|MS_REC, NULL); - if (r < 0) - return r; - - /* Let's create the workspace if it's missing */ - r = mkdir_p(workspace, 0700); - if (r < 0) - return log_error_errno(r, "Failed to create '%s': %m", workspace); - - STRV_FOREACH(h, hierarchies) { - _cleanup_free_ char *submounts_path = NULL, *resolved = NULL; - - submounts_path = path_join(workspace, "submounts", *h); - if (!submounts_path) - return log_oom(); - - r = chase(*h, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL); - if (r == -ENOENT) { - log_debug_errno(r, "Hierarchy '%s%s' does not exist, ignoring.", strempty(arg_root), *h); - continue; - } - if (r < 0) { - RET_GATHER(ret, log_error_errno(r, "Failed to resolve path to hierarchy '%s%s': %m", strempty(arg_root), *h)); - continue; - } - - r = unmerge_hierarchy(image_class, resolved, submounts_path); - if (r < 0) { - RET_GATHER(ret, r); - continue; - } - if (r == 0) - continue; - - /* If we unmerged something, then we have to move the submounts from the hierarchy back into - * place in the host's original hierarchy. */ - - r = move_submounts(submounts_path, resolved); - if (r < 0) - return r; - } - - return ret; -} - -static int unmerge( - ImageClass image_class, - char **hierarchies, - bool no_reload) { - - bool need_to_reload; - int r; - - (void) dlopen_libmount(LOG_DEBUG); - - r = need_reload(image_class, hierarchies, no_reload); - if (r < 0) - return r; - need_to_reload = r > 0; - - r = pidref_safe_fork( - "(sd-unmerge)", - FORK_WAIT|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_NEW_MOUNTNS, - /* ret= */ NULL); - if (r < 0) - return r; - if (r == 0) { - /* Child with its own mount namespace */ - - r = unmerge_subprocess(image_class, hierarchies, "/run/systemd/sysext"); - - /* Our namespace ceases to exist here, also implicitly detaching all temporary mounts we - * created below /run. Nice! */ - - _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS); - } - - if (need_to_reload) { - r = daemon_reload(); - if (r < 0) - return r; - } - - return 0; -} - -static int verb_unmerge(int argc, char *argv[], uintptr_t _data, void *userdata) { - int r; - - r = have_effective_cap(CAP_SYS_ADMIN); - if (r < 0) - return log_error_errno(r, "Failed to check if we have enough privileges: %m"); - if (r == 0) - return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be privileged."); - - return unmerge(arg_image_class, - arg_hierarchies, - arg_no_reload); -} - -static int parse_image_class_parameter(sd_varlink *link, const char *value, ImageClass *image_class, char ***hierarchies) { - _cleanup_strv_free_ char **h = NULL; - ImageClass c; - int r; - - assert(link); - assert(image_class); - - if (!value) - return 0; - - c = image_class_from_string(value); - if (!IN_SET(c, IMAGE_SYSEXT, IMAGE_CONFEXT)) - return sd_varlink_error_invalid_parameter_name(link, "class"); - - if (hierarchies) { - r = parse_env_extension_hierarchies(&h, image_class_info[c].name_env); - if (r < 0) - return log_error_errno(r, "Failed to parse environment variable: %m"); - - strv_free_and_replace(*hierarchies, h); - } - - *image_class = c; - return 0; -} - -typedef struct MethodUnmergeParameters { - const char *class; - int no_reload; -} MethodUnmergeParameters; - -static int vl_method_unmerge(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { - - static const sd_json_dispatch_field dispatch_table[] = { - { "class", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MethodUnmergeParameters, class), 0 }, - { "noReload", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MethodUnmergeParameters, no_reload), 0 }, - VARLINK_DISPATCH_POLKIT_FIELD, - {} - }; - MethodUnmergeParameters p = { - .no_reload = -1, - }; - Hashmap **polkit_registry = ASSERT_PTR(userdata); - _cleanup_strv_free_ char **hierarchies = NULL; - ImageClass image_class = arg_image_class; - bool no_reload; - int r; - - assert(link); - - r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); - if (r != 0) - return r; - - no_reload = p.no_reload >= 0 ? p.no_reload : arg_no_reload; - - r = parse_image_class_parameter(link, p.class, &image_class, &hierarchies); - if (r < 0) - return r; - - r = varlink_verify_polkit_async( - link, - /* bus= */ NULL, - image_class_info[image_class].polkit_rw_action_id, - (const char**) STRV_MAKE( - "verb", "unmerge", - "noReload", one_zero(no_reload)), - polkit_registry); - if (r <= 0) - return r; - - r = unmerge(image_class, hierarchies ?: arg_hierarchies, no_reload); - if (r < 0) - return r; - - return sd_varlink_reply(link, NULL); -} - -static int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(table_unrefp) Table *t = NULL; - int r, ret = 0; - - t = table_new("hierarchy", "extensions", "since"); - if (!t) - return log_oom(); - - table_set_ersatz_string(t, TABLE_ERSATZ_DASH); - - STRV_FOREACH(p, arg_hierarchies) { - _cleanup_free_ char *resolved = NULL, *f = NULL, *buf = NULL; - _cleanup_strv_free_ char **l = NULL; - struct stat st; - - r = chase(*p, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL); - if (r == -ENOENT) { - log_debug_errno(r, "Hierarchy '%s%s' does not exist, ignoring.", strempty(arg_root), *p); - continue; - } - if (r < 0) { - log_error_errno(r, "Failed to resolve path to hierarchy '%s%s': %m", strempty(arg_root), *p); - goto inner_fail; - } - - r = is_our_mount_point(arg_image_class, resolved); - if (r < 0) - goto inner_fail; - if (r == 0) { - r = table_add_many( - t, - TABLE_PATH, *p, - TABLE_STRING, "none", - TABLE_SET_COLOR, ansi_grey(), - TABLE_EMPTY); - if (r < 0) - return table_log_add_error(r); - - continue; - } - - f = path_join(resolved, image_class_info[arg_image_class].dot_directory_name, image_class_info[arg_image_class].short_identifier_plural); - if (!f) - return log_oom(); - - r = read_full_file(f, &buf, NULL); - if (r < 0) - return log_error_errno(r, "Failed to open '%s': %m", f); - - l = strv_split_newlines(buf); - if (!l) - return log_oom(); - - if (stat(*p, &st) < 0) - return log_error_errno(errno, "Failed to stat() '%s': %m", *p); - - r = table_add_many( - t, - TABLE_PATH, *p, - TABLE_STRV, l, - TABLE_TIMESTAMP, timespec_load(&st.st_mtim)); - if (r < 0) - return table_log_add_error(r); - - continue; - - inner_fail: - if (ret == 0) - ret = r; - } - - (void) table_set_sort(t, (size_t) 0); - - r = table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend); - if (r < 0) - return r; - - return ret; -} - static int append_overlayfs_path_option( char **options, const char *separator, @@ -964,6 +617,63 @@ static OverlayFSPaths *overlayfs_paths_free(OverlayFSPaths *op) { } DEFINE_TRIVIAL_CLEANUP_FUNC(OverlayFSPaths *, overlayfs_paths_free); +static int parse_env(void) { + const char *env_var; + int r; + + env_var = secure_getenv(image_class_info[arg_image_class].mode_env); + if (env_var) { + r = parse_mutable_mode(env_var); + if (r < 0) + log_warning("Failed to parse %s environment variable value '%s'. Ignoring.", + image_class_info[arg_image_class].mode_env, env_var); + else { + arg_mutable = r; + arg_mutable_set = true; + } + } + + env_var = secure_getenv(image_class_info[arg_image_class].opts_env); + if (env_var) + arg_overlayfs_mount_options = env_var; + + /* For debugging purposes it might make sense to do this for other hierarchies than /usr/ and + * /opt/, but let's make that a hacker/debugging feature, i.e. env var instead of cmdline + * switch. */ + r = parse_env_extension_hierarchies(&arg_hierarchies, image_class_info[arg_image_class].name_env); + if (r < 0) + return log_error_errno(r, "Failed to parse %s environment variable: %m", image_class_info[arg_image_class].name_env); + + return 0; +} + +static int parse_image_class_parameter(sd_varlink *link, const char *value, ImageClass *image_class, char ***hierarchies) { + _cleanup_strv_free_ char **h = NULL; + ImageClass c; + int r; + + assert(link); + assert(image_class); + + if (!value) + return 0; + + c = image_class_from_string(value); + if (!IN_SET(c, IMAGE_SYSEXT, IMAGE_CONFEXT)) + return sd_varlink_error_invalid_parameter_name(link, "class"); + + if (hierarchies) { + r = parse_env_extension_hierarchies(&h, image_class_info[c].name_env); + if (r < 0) + return log_error_errno(r, "Failed to parse environment variable: %m"); + + strv_free_and_replace(*hierarchies, h); + } + + *image_class = c; + return 0; +} + static int resolve_hierarchy(const char *hierarchy, char **ret_resolved_hierarchy) { _cleanup_free_ char *resolved_path = NULL; int r; @@ -1793,7 +1503,7 @@ static int strverscmp_improvedp(char *const* a, char *const* b) { return strverscmp_improved(*a, *b); } -static const ImagePolicy *pick_image_policy(const Image *img) { +static const ImagePolicy* pick_image_policy(const Image *img) { assert(img); assert(img->path); @@ -1817,12 +1527,185 @@ static const ImagePolicy *pick_image_policy(const Image *img) { if (path_startswith(img->path, "/.extra/global_confext/")) return &image_policy_confext_strict; - /* Better safe than sorry, refuse everything else passed in via the untrusted /.extra/ dir */ - if (path_startswith(img->path, "/.extra/")) - return &image_policy_deny; + /* Better safe than sorry, refuse everything else passed in via the untrusted /.extra/ dir */ + if (path_startswith(img->path, "/.extra/")) + return &image_policy_deny; + } + + return image_class_info[img->class].default_image_policy; +} + +static int unmerge_hierarchy(ImageClass image_class, const char *p, const char *submounts_path) { + _cleanup_free_ char *dot_dir = NULL, *work_dir_info_file = NULL; + int n_unmerged = 0; + int r; + + assert(p); + + dot_dir = path_join(p, image_class_info[image_class].dot_directory_name); + if (!dot_dir) + return log_oom(); + + work_dir_info_file = path_join(dot_dir, "work_dir"); + if (!work_dir_info_file) + return log_oom(); + + for (;;) { + _cleanup_free_ char *escaped_work_dir_in_root = NULL, *work_dir = NULL; + + /* We only unmount /usr/ if it is a mount point and really one of ours, in order not to break + * systems where /usr/ is a mount point of its own already. */ + + r = is_our_mount_point(image_class, p); + if (r < 0) + return r; + if (r == 0) + break; + + r = read_one_line_file(work_dir_info_file, &escaped_work_dir_in_root); + if (r < 0) { + if (r != -ENOENT) + return log_error_errno(r, "Failed to read '%s': %m", work_dir_info_file); + } else { + _cleanup_free_ char *work_dir_in_root = NULL; + ssize_t l; + + l = cunescape_length(escaped_work_dir_in_root, r, 0, &work_dir_in_root); + if (l < 0) + return log_error_errno(l, "Failed to unescape work directory path: %m"); + work_dir = path_join(arg_root, work_dir_in_root); + if (!work_dir) + return log_oom(); + } + + r = umount_verbose(LOG_DEBUG, dot_dir, MNT_DETACH|UMOUNT_NOFOLLOW); + if (r < 0) { + /* EINVAL is possibly "not a mount point". Let it slide as it's expected to occur if + * the whole hierarchy was read-only, so the dot directory inside it was not + * bind-mounted as read-only. */ + if (r != -EINVAL) + return log_error_errno(r, "Failed to unmount '%s': %m", dot_dir); + } + + /* After we've unmounted the metadata directory, save all other submounts so we can restore + * them after unmerging the hierarchy. */ + r = move_submounts(p, submounts_path); + if (r < 0) + return r; + + r = umount_verbose(LOG_ERR, p, MNT_DETACH|UMOUNT_NOFOLLOW); + if (r < 0) + return r; + + if (work_dir) { + r = rm_rf(work_dir, REMOVE_ROOT | REMOVE_MISSING_OK | REMOVE_PHYSICAL); + if (r < 0) + return log_error_errno(r, "Failed to remove '%s': %m", work_dir); + } + + log_info("Unmerged '%s'.", p); + n_unmerged++; + } + + return n_unmerged; +} + +static int unmerge_subprocess( + ImageClass image_class, + char **hierarchies, + const char *workspace) { + + int r, ret = 0; + + assert(workspace); + assert(path_startswith(workspace, "/run/")); + + /* Mark the whole of /run as MS_SLAVE, so that we can mount stuff below it that doesn't show up on + * the host otherwise. */ + r = mount_nofollow_verbose(LOG_ERR, NULL, "/run", NULL, MS_SLAVE|MS_REC, NULL); + if (r < 0) + return r; + + /* Let's create the workspace if it's missing */ + r = mkdir_p(workspace, 0700); + if (r < 0) + return log_error_errno(r, "Failed to create '%s': %m", workspace); + + STRV_FOREACH(h, hierarchies) { + _cleanup_free_ char *submounts_path = NULL, *resolved = NULL; + + submounts_path = path_join(workspace, "submounts", *h); + if (!submounts_path) + return log_oom(); + + r = chase(*h, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL); + if (r == -ENOENT) { + log_debug_errno(r, "Hierarchy '%s%s' does not exist, ignoring.", strempty(arg_root), *h); + continue; + } + if (r < 0) { + RET_GATHER(ret, log_error_errno(r, "Failed to resolve path to hierarchy '%s%s': %m", strempty(arg_root), *h)); + continue; + } + + r = unmerge_hierarchy(image_class, resolved, submounts_path); + if (r < 0) { + RET_GATHER(ret, r); + continue; + } + if (r == 0) + continue; + + /* If we unmerged something, then we have to move the submounts from the hierarchy back into + * place in the host's original hierarchy. */ + + r = move_submounts(submounts_path, resolved); + if (r < 0) + return r; + } + + return ret; +} + +static int unmerge( + ImageClass image_class, + char **hierarchies, + bool no_reload) { + + bool need_to_reload; + int r; + + (void) dlopen_libmount(LOG_DEBUG); + + r = need_reload(image_class, hierarchies, no_reload); + if (r < 0) + return r; + need_to_reload = r > 0; + + r = pidref_safe_fork( + "(sd-unmerge)", + FORK_WAIT|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_NEW_MOUNTNS, + /* ret= */ NULL); + if (r < 0) + return r; + if (r == 0) { + /* Child with its own mount namespace */ + + r = unmerge_subprocess(image_class, hierarchies, "/run/systemd/sysext"); + + /* Our namespace ceases to exist here, also implicitly detaching all temporary mounts we + * created below /run. Nice! */ + + _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS); } - return image_class_info[img->class].default_image_policy; + if (need_to_reload) { + r = daemon_reload(); + if (r < 0) + return r; + } + + return 0; } static int merge_subprocess( @@ -2421,6 +2304,86 @@ static int merge(ImageClass image_class, return 1; } +static int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(table_unrefp) Table *t = NULL; + int r, ret = 0; + + t = table_new("hierarchy", "extensions", "since"); + if (!t) + return log_oom(); + + table_set_ersatz_string(t, TABLE_ERSATZ_DASH); + + STRV_FOREACH(p, arg_hierarchies) { + _cleanup_free_ char *resolved = NULL, *f = NULL, *buf = NULL; + _cleanup_strv_free_ char **l = NULL; + struct stat st; + + r = chase(*p, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL); + if (r == -ENOENT) { + log_debug_errno(r, "Hierarchy '%s%s' does not exist, ignoring.", strempty(arg_root), *p); + continue; + } + if (r < 0) { + log_error_errno(r, "Failed to resolve path to hierarchy '%s%s': %m", strempty(arg_root), *p); + goto inner_fail; + } + + r = is_our_mount_point(arg_image_class, resolved); + if (r < 0) + goto inner_fail; + if (r == 0) { + r = table_add_many( + t, + TABLE_PATH, *p, + TABLE_STRING, "none", + TABLE_SET_COLOR, ansi_grey(), + TABLE_EMPTY); + if (r < 0) + return table_log_add_error(r); + + continue; + } + + f = path_join(resolved, image_class_info[arg_image_class].dot_directory_name, image_class_info[arg_image_class].short_identifier_plural); + if (!f) + return log_oom(); + + r = read_full_file(f, &buf, NULL); + if (r < 0) + return log_error_errno(r, "Failed to open '%s': %m", f); + + l = strv_split_newlines(buf); + if (!l) + return log_oom(); + + if (stat(*p, &st) < 0) + return log_error_errno(errno, "Failed to stat() '%s': %m", *p); + + r = table_add_many( + t, + TABLE_PATH, *p, + TABLE_STRV, l, + TABLE_TIMESTAMP, timespec_load(&st.st_mtim)); + if (r < 0) + return table_log_add_error(r); + + continue; + + inner_fail: + if (ret == 0) + ret = r; + } + + (void) table_set_sort(t, (size_t) 0); + + r = table_print_with_pager(t, arg_json_format_flags, arg_pager_flags, arg_legend); + if (r < 0) + return r; + + return ret; +} + static int image_discover_and_read_metadata(ImageClass image_class, Hashmap **ret_images) { _cleanup_hashmap_free_ Hashmap *images = NULL; Image *img; @@ -2597,6 +2560,72 @@ static int vl_method_merge(sd_varlink *link, sd_json_variant *parameters, sd_var return sd_varlink_reply(link, NULL); } +static int verb_unmerge(int argc, char *argv[], uintptr_t _data, void *userdata) { + int r; + + r = have_effective_cap(CAP_SYS_ADMIN); + if (r < 0) + return log_error_errno(r, "Failed to check if we have enough privileges: %m"); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be privileged."); + + return unmerge(arg_image_class, + arg_hierarchies, + arg_no_reload); +} + +typedef struct MethodUnmergeParameters { + const char *class; + int no_reload; +} MethodUnmergeParameters; + +static int vl_method_unmerge(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + + static const sd_json_dispatch_field dispatch_table[] = { + { "class", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MethodUnmergeParameters, class), 0 }, + { "noReload", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MethodUnmergeParameters, no_reload), 0 }, + VARLINK_DISPATCH_POLKIT_FIELD, + {} + }; + MethodUnmergeParameters p = { + .no_reload = -1, + }; + Hashmap **polkit_registry = ASSERT_PTR(userdata); + _cleanup_strv_free_ char **hierarchies = NULL; + ImageClass image_class = arg_image_class; + bool no_reload; + int r; + + assert(link); + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + no_reload = p.no_reload >= 0 ? p.no_reload : arg_no_reload; + + r = parse_image_class_parameter(link, p.class, &image_class, &hierarchies); + if (r < 0) + return r; + + r = varlink_verify_polkit_async( + link, + /* bus= */ NULL, + image_class_info[image_class].polkit_rw_action_id, + (const char**) STRV_MAKE( + "verb", "unmerge", + "noReload", one_zero(no_reload)), + polkit_registry); + if (r <= 0) + return r; + + r = unmerge(image_class, hierarchies ?: arg_hierarchies, no_reload); + if (r < 0) + return r; + + return sd_varlink_reply(link, NULL); +} + static int refresh( ImageClass image_class, char **hierarchies, @@ -2816,20 +2845,20 @@ static int help(void) { " -h --help Show this help\n" " --version Show package version\n" "\n%3$sOptions:%4$s\n" + " --root=PATH Operate relative to root path\n" " --mutable=yes|no|auto|import|ephemeral|ephemeral-import|help\n" " Specify a mutability mode of the merged hierarchy\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not show the headers and footers\n" - " --root=PATH Operate relative to root path\n" - " --json=pretty|short|off\n" - " Generate JSON output\n" + " --image-policy=POLICY\n" + " Specify disk image dissection policy\n" + " --noexec=BOOL Whether to mount extension overlay with noexec\n" " --force Ignore version incompatibilities\n" " --no-reload Do not reload the service manager\n" " --always-refresh=yes|no\n" " Do not skip refresh when no changes were found\n" - " --image-policy=POLICY\n" - " Specify disk image dissection policy\n" - " --noexec=BOOL Whether to mount extension overlay with noexec\n" + " --no-pager Do not pipe output into a pager\n" + " --no-legend Do not show the headers and footers\n" + " --json=pretty|short|off\n" + " Generate JSON output\n" "\nSee the %2$s for details.\n", program_invocation_short_name, link, @@ -2892,14 +2921,6 @@ static int parse_argv(int argc, char *argv[]) { case ARG_VERSION: return version(); - case ARG_NO_PAGER: - arg_pager_flags |= PAGER_DISABLE; - break; - - case ARG_NO_LEGEND: - arg_legend = false; - break; - case ARG_ROOT: r = parse_path_argument(optarg, false, &arg_root); if (r < 0) @@ -2908,15 +2929,19 @@ static int parse_argv(int argc, char *argv[]) { arg_no_reload = true; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); - if (r <= 0) - return r; + case ARG_MUTABLE: + if (streq(optarg, "help")) { + if (arg_legend) + puts("Known mutability modes:"); - break; + return DUMP_STRING_TABLE(mutable_mode, MutableMode, _MUTABLE_MAX); + } - case ARG_FORCE: - arg_force = true; + r = parse_mutable_mode(optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse argument to --mutable=: %s", optarg); + arg_mutable = r; + arg_mutable_set = true; break; case ARG_IMAGE_POLICY: @@ -2936,6 +2961,10 @@ static int parse_argv(int argc, char *argv[]) { arg_noexec = r; break; + case ARG_FORCE: + arg_force = true; + break; + case ARG_NO_RELOAD: arg_no_reload = true; break; @@ -2946,19 +2975,19 @@ static int parse_argv(int argc, char *argv[]) { return r; break; - case ARG_MUTABLE: - if (streq(optarg, "help")) { - if (arg_legend) - puts("Known mutability modes:"); + case ARG_NO_PAGER: + arg_pager_flags |= PAGER_DISABLE; + break; - return DUMP_STRING_TABLE(mutable_mode, MutableMode, _MUTABLE_MAX); - } + case ARG_NO_LEGEND: + arg_legend = false; + break; + + case ARG_JSON: + r = parse_json_argument(optarg, &arg_json_format_flags); + if (r <= 0) + return r; - r = parse_mutable_mode(optarg); - if (r < 0) - return log_error_errno(r, "Failed to parse argument to --mutable=: %s", optarg); - arg_mutable = r; - arg_mutable_set = true; break; case '?': @@ -2977,36 +3006,6 @@ static int parse_argv(int argc, char *argv[]) { return 1; } -static int parse_env(void) { - const char *env_var; - int r; - - env_var = secure_getenv(image_class_info[arg_image_class].mode_env); - if (env_var) { - r = parse_mutable_mode(env_var); - if (r < 0) - log_warning("Failed to parse %s environment variable value '%s'. Ignoring.", - image_class_info[arg_image_class].mode_env, env_var); - else { - arg_mutable = r; - arg_mutable_set = true; - } - } - - env_var = secure_getenv(image_class_info[arg_image_class].opts_env); - if (env_var) - arg_overlayfs_mount_options = env_var; - - /* For debugging purposes it might make sense to do this for other hierarchies than /usr/ and - * /opt/, but let's make that a hacker/debugging feature, i.e. env var instead of cmdline - * switch. */ - r = parse_env_extension_hierarchies(&arg_hierarchies, image_class_info[arg_image_class].name_env); - if (r < 0) - return log_error_errno(r, "Failed to parse %s environment variable: %m", image_class_info[arg_image_class].name_env); - - return 0; -} - static int sysext_main(int argc, char *argv[]) { static const Verb verbs[] = { From a8971f6c918711939413aafa93a234ba479fa016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 00:45:32 +0200 Subject: [PATCH 118/242] sysext: convert to option and verb macros Co-developed-by: Claude Opus 4.7 --- src/sysext/sysext.c | 188 +++++++++++++++++--------------------------- 1 file changed, 70 insertions(+), 118 deletions(-) diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c index e4d9e7e0c355d..40d39b7654d2c 100644 --- a/src/sysext/sysext.c +++ b/src/sysext/sysext.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include -#include #include #include #include @@ -11,6 +10,7 @@ #include "sd-json.h" #include "sd-varlink.h" +#include "ansi-color.h" #include "argv-util.h" #include "blkid-util.h" #include "blockdev-util.h" @@ -34,6 +34,7 @@ #include "format-table.h" #include "fs-util.h" #include "hashmap.h" +#include "help-util.h" #include "image-policy.h" #include "initrd-util.h" #include "label-util.h" /* IWYU pragma: keep */ @@ -44,13 +45,13 @@ #include "mkdir.h" #include "mount-util.h" #include "mountpoint-util.h" +#include "options.h" #include "os-util.h" #include "pager.h" #include "parse-argument.h" #include "parse-util.h" #include "path-util.h" #include "pidref.h" -#include "pretty-print.h" #include "proc-cmdline.h" #include "process-util.h" #include "rm-rf.h" @@ -2304,6 +2305,7 @@ static int merge(ImageClass image_class, return 1; } +VERB(verb_status, "status", NULL, VERB_ANY, 1, VERB_DEFAULT, "Show current merge status (default)"); static int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(table_unrefp) Table *t = NULL; int r, ret = 0; @@ -2440,6 +2442,7 @@ static int look_for_merged_hierarchies( return 0; } +VERB_NOARG(verb_merge, "merge", "Merge extensions into relevant hierarchies"); static int verb_merge(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_hashmap_free_ Hashmap *images = NULL; const char *which; @@ -2560,6 +2563,7 @@ static int vl_method_merge(sd_varlink *link, sd_json_variant *parameters, sd_var return sd_varlink_reply(link, NULL); } +VERB_NOARG(verb_unmerge, "unmerge", "Unmerge extensions from relevant hierarchies"); static int verb_unmerge(int argc, char *argv[], uintptr_t _data, void *userdata) { int r; @@ -2669,6 +2673,7 @@ static int refresh( return r; } +VERB_NOARG(verb_refresh, "refresh", "Unmerge/merge extensions again"); static int verb_refresh(int argc, char *argv[], uintptr_t _data, void *userdata) { int r; @@ -2735,6 +2740,7 @@ static int vl_method_refresh(sd_varlink *link, sd_json_variant *parameters, sd_v return sd_varlink_reply(link, NULL); } +VERB_NOARG(verb_list, "list", "List installed extensions"); static int verb_list(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_hashmap_free_ Hashmap *images = NULL; _cleanup_(table_unrefp) Table *t = NULL; @@ -2827,125 +2833,91 @@ static int vl_method_list(sd_varlink *link, sd_json_variant *parameters, sd_varl } static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *verbs = NULL, *commands = NULL, *options = NULL; int r; - r = terminal_urlify_man(image_class_info[arg_image_class].full_identifier, "8", &link); + r = verbs_get_help_table(&verbs); if (r < 0) - return log_oom(); + return r; - printf("%1$s [OPTIONS...] COMMAND\n" - "\n%5$s%7$s%6$s\n" - "\n%3$sCommands:%4$s\n" - " status Show current merge status (default)\n" - " merge Merge extensions into relevant hierarchies\n" - " unmerge Unmerge extensions from relevant hierarchies\n" - " refresh Unmerge/merge extensions again\n" - " list List installed extensions\n" - " -h --help Show this help\n" - " --version Show package version\n" - "\n%3$sOptions:%4$s\n" - " --root=PATH Operate relative to root path\n" - " --mutable=yes|no|auto|import|ephemeral|ephemeral-import|help\n" - " Specify a mutability mode of the merged hierarchy\n" - " --image-policy=POLICY\n" - " Specify disk image dissection policy\n" - " --noexec=BOOL Whether to mount extension overlay with noexec\n" - " --force Ignore version incompatibilities\n" - " --no-reload Do not reload the service manager\n" - " --always-refresh=yes|no\n" - " Do not skip refresh when no changes were found\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not show the headers and footers\n" - " --json=pretty|short|off\n" - " Generate JSON output\n" - "\nSee the %2$s for details.\n", - program_invocation_short_name, - link, - ansi_underline(), - ansi_normal(), - ansi_highlight(), - ansi_normal(), - image_class_info[arg_image_class].blurb); + r = option_parser_get_help_table(&commands); + if (r < 0) + return r; - return 0; -} + r = option_parser_get_help_table_group("Options", &options); + if (r < 0) + return r; -static int verb_help(int argc, char *argv[], uintptr_t _data, void *userdata) { - return help(); -} + (void) table_sync_column_widths(0, verbs, commands, options); -static int parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_NO_PAGER, - ARG_NO_LEGEND, - ARG_ROOT, - ARG_JSON, - ARG_FORCE, - ARG_IMAGE_POLICY, - ARG_NOEXEC, - ARG_NO_RELOAD, - ARG_ALWAYS_REFRESH, - ARG_MUTABLE, - }; + help_cmdline("[OPTIONS...] COMMAND"); + help_abstract(image_class_info[arg_image_class].blurb); - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, - { "root", required_argument, NULL, ARG_ROOT }, - { "json", required_argument, NULL, ARG_JSON }, - { "force", no_argument, NULL, ARG_FORCE }, - { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY }, - { "noexec", required_argument, NULL, ARG_NOEXEC }, - { "no-reload", no_argument, NULL, ARG_NO_RELOAD }, - { "always-refresh", required_argument, NULL, ARG_ALWAYS_REFRESH }, - { "mutable", required_argument, NULL, ARG_MUTABLE }, - {} - }; + help_section("Commands"); + r = table_print_or_warn(verbs); + if (r < 0) + return r; + r = table_print_or_warn(commands); + if (r < 0) + return r; + + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; - int c, r; + help_man_page_reference(image_class_info[arg_image_class].full_identifier, "8"); + return 0; +} + +VERB_COMMON_HELP_HIDDEN(help); + +static int parse_argv(int argc, char *argv[], char ***ret_args) { + int r; assert(argc >= 0); assert(argv); + assert(ret_args); - while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) + OptionParser opts = { argc, argv }; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case ARG_ROOT: - r = parse_path_argument(optarg, false, &arg_root); + OPTION_GROUP("Options"): {} + + OPTION_LONG("root", "PATH", "Operate relative to root PATH"): + r = parse_path_argument(opts.arg, false, &arg_root); if (r < 0) return r; /* If --root= is provided, do not reload the service manager */ arg_no_reload = true; break; - case ARG_MUTABLE: - if (streq(optarg, "help")) { + OPTION_LONG("mutable", "MODE", + "Specify a mutability mode (yes, no, auto, import, ephemeral, ephemeral-import, help)"): + if (streq(opts.arg, "help")) { if (arg_legend) puts("Known mutability modes:"); return DUMP_STRING_TABLE(mutable_mode, MutableMode, _MUTABLE_MAX); } - r = parse_mutable_mode(optarg); + r = parse_mutable_mode(opts.arg); if (r < 0) - return log_error_errno(r, "Failed to parse argument to --mutable=: %s", optarg); + return log_error_errno(r, "Failed to parse argument to --mutable=: %s", opts.arg); arg_mutable = r; arg_mutable_set = true; break; - case ARG_IMAGE_POLICY: - r = parse_image_policy_argument(optarg, &arg_image_policy); + OPTION_LONG("image-policy", "POLICY", "Specify disk image dissection policy"): + r = parse_image_policy_argument(opts.arg, &arg_image_policy); if (r < 0) return r; /* When the CLI flag is given we initialize even if NULL @@ -2953,48 +2925,41 @@ static int parse_argv(int argc, char *argv[]) { arg_image_policy_set = true; break; - case ARG_NOEXEC: - r = parse_boolean_argument("--noexec", optarg, NULL); + OPTION_LONG("noexec", "BOOL", "Whether to mount extension overlay with noexec"): + r = parse_boolean_argument("--noexec", opts.arg, NULL); if (r < 0) return r; arg_noexec = r; break; - case ARG_FORCE: + OPTION_LONG("force", NULL, "Ignore version incompatibilities"): arg_force = true; break; - case ARG_NO_RELOAD: + OPTION_LONG("no-reload", NULL, "Do not reload the service manager"): arg_no_reload = true; break; - case ARG_ALWAYS_REFRESH: - r = parse_boolean_argument("--always-refresh", optarg, &arg_always_refresh); + OPTION_LONG("always-refresh", "BOOL", "Whether to refresh when no changes were found"): + r = parse_boolean_argument("--always-refresh", opts.arg, &arg_always_refresh); if (r < 0) return r; break; - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_NO_LEGEND: + OPTION_COMMON_NO_LEGEND: arg_legend = false; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; - break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } r = sd_varlink_invocation(SD_VARLINK_ALLOW_ACCEPT); @@ -3003,25 +2968,12 @@ static int parse_argv(int argc, char *argv[]) { if (r > 0) arg_varlink = true; + *ret_args = option_parser_get_args(&opts); return 1; } -static int sysext_main(int argc, char *argv[]) { - - static const Verb verbs[] = { - { "status", VERB_ANY, 1, VERB_DEFAULT, verb_status }, - { "merge", VERB_ANY, 1, 0, verb_merge }, - { "unmerge", VERB_ANY, 1, 0, verb_unmerge }, - { "refresh", VERB_ANY, 1, 0, verb_refresh }, - { "list", VERB_ANY, 1, 0, verb_list }, - { "help", VERB_ANY, 1, 0, verb_help }, - {} - }; - - return dispatch_verb(argc, argv, verbs, NULL); -} - static int run(int argc, char *argv[]) { + char **args = NULL; int r; log_setup(); @@ -3034,7 +2986,7 @@ static int run(int argc, char *argv[]) { return r; /* Parse command line */ - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -3096,7 +3048,7 @@ static int run(int argc, char *argv[]) { return EXIT_SUCCESS; } - return sysext_main(argc, argv); + return dispatch_verb_with_args(args, NULL); } DEFINE_MAIN_FUNCTION(run); From 4ce94c3bb4769606a991a081f1827a467736b939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 00:00:47 +0200 Subject: [PATCH 119/242] test-modem-manager-mock: convert to OPTION macros --help and --version are moved to the beginning of the option list. This is the usual location. Custom '-v' alias for --version is dropped. It is not used by anything and it's better to follow the usual style. Co-developed-by: Claude Opus 4.7 --- src/network/test-modem-manager-mock.c | 113 ++++++++++++-------------- 1 file changed, 53 insertions(+), 60 deletions(-) diff --git a/src/network/test-modem-manager-mock.c b/src/network/test-modem-manager-mock.c index 60f0dfa8d4ea2..7ddb18828af11 100644 --- a/src/network/test-modem-manager-mock.c +++ b/src/network/test-modem-manager-mock.c @@ -10,16 +10,17 @@ * - Simple.Connect on /org/freedesktop/ModemManager1/Modem/0 */ -#include - #include "sd-bus.h" #include "sd-daemon.h" #include "sd-event.h" #include "alloc-util.h" #include "build.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" #include "main-func.h" +#include "options.h" #include "parse-util.h" #include "string-util.h" @@ -363,82 +364,74 @@ static int filter_handler(sd_bus_message *m, void *userdata, sd_bus_error *error return 0; } +static int help(void) { + _cleanup_(table_unrefp) Table *options = NULL; + int r; + + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + help_cmdline("[OPTIONS...]"); + help_abstract("Mock ModemManager D-Bus service for testing."); + + help_section("Options"); + return table_print_or_warn(options); +} + static int parse_argv(int argc, char *argv[]) { - enum { - ARG_IFNAME = 0x100, - ARG_IPV4_ADDRESS, - ARG_IPV4_GATEWAY, - ARG_IPV4_PREFIX, - ARG_IPV6_ADDRESS, - ARG_IPV6_GATEWAY, - ARG_IPV6_PREFIX, - }; - - static const struct option options[] = { - { "ifname", required_argument, NULL, ARG_IFNAME }, - { "ipv4-address", required_argument, NULL, ARG_IPV4_ADDRESS }, - { "ipv4-gateway", required_argument, NULL, ARG_IPV4_GATEWAY }, - { "ipv4-prefix", required_argument, NULL, ARG_IPV4_PREFIX }, - { "ipv6-address", required_argument, NULL, ARG_IPV6_ADDRESS }, - { "ipv6-gateway", required_argument, NULL, ARG_IPV6_GATEWAY }, - { "ipv6-prefix", required_argument, NULL, ARG_IPV6_PREFIX }, - { "version", no_argument, NULL, 'v' }, - { "help", no_argument, NULL, 'h' }, - {} - }; - - int c, r; - - while ((c = getopt_long(argc, argv, "vh", options, NULL)) >= 0) + int r; + + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv }; + + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case ARG_IFNAME: - if (free_and_strdup(&arg_ifname, optarg) < 0) + + OPTION_COMMON_HELP: + return help(); + + OPTION_COMMON_VERSION: + return version(); + + OPTION_LONG("ifname", "NAME", "Interface name"): + if (free_and_strdup(&arg_ifname, opts.arg) < 0) return log_oom(); break; - case ARG_IPV4_ADDRESS: - if (free_and_strdup(&arg_ipv4_address, optarg) < 0) + + OPTION_LONG("ipv4-address", "ADDR", "IPv4 address"): + if (free_and_strdup(&arg_ipv4_address, opts.arg) < 0) return log_oom(); break; - case ARG_IPV4_GATEWAY: - if (free_and_strdup(&arg_ipv4_gateway, optarg) < 0) + + OPTION_LONG("ipv4-gateway", "ADDR", "IPv4 gateway"): + if (free_and_strdup(&arg_ipv4_gateway, opts.arg) < 0) return log_oom(); break; - case ARG_IPV4_PREFIX: - r = safe_atou32(optarg, &arg_ipv4_prefix); + + OPTION_LONG("ipv4-prefix", "LEN", "IPv4 prefix length"): + r = safe_atou32(opts.arg, &arg_ipv4_prefix); if (r < 0) return log_error_errno(r, "Failed to parse IPv4 prefix length: %m"); break; - case ARG_IPV6_ADDRESS: - if (free_and_strdup(&arg_ipv6_address, optarg) < 0) + + OPTION_LONG("ipv6-address", "ADDR", "IPv6 address"): + if (free_and_strdup(&arg_ipv6_address, opts.arg) < 0) return log_oom(); break; - case ARG_IPV6_GATEWAY: - if (free_and_strdup(&arg_ipv6_gateway, optarg) < 0) + + OPTION_LONG("ipv6-gateway", "ADDR", "IPv6 gateway"): + if (free_and_strdup(&arg_ipv6_gateway, opts.arg) < 0) return log_oom(); break; - case ARG_IPV6_PREFIX: - r = safe_atou32(optarg, &arg_ipv6_prefix); + + OPTION_LONG("ipv6-prefix", "LEN", "IPv6 prefix length"): + r = safe_atou32(opts.arg, &arg_ipv6_prefix); if (r < 0) return log_error_errno(r, "Failed to parse IPv6 prefix length: %m"); break; - case 'v': - return version(); - case 'h': - printf("Usage: %s [OPTIONS...]\n\n" - "Mock ModemManager D-Bus service for testing.\n\n" - " --ifname=NAME Interface name\n" - " --ipv4-address=ADDR IPv4 address\n" - " --ipv4-gateway=ADDR IPv4 gateway\n" - " --ipv4-prefix=LEN IPv4 prefix length\n" - " --ipv6-address=ADDR IPv6 address\n" - " --ipv6-gateway=ADDR IPv6 gateway\n" - " --ipv6-prefix=LEN IPv6 prefix length\n" - " -h, --help Show this help\n" - " -v, --version Show version\n", - program_invocation_short_name); - return 0; - default: - return -EINVAL; } if (!arg_ifname) From d5301b635f91226d4f1be9671bf79aff8f59899b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 00:02:27 +0200 Subject: [PATCH 120/242] networkd-wait-online: convert to OPTION macros Also fix a latent bug in parse_interface_with_operstate_range() where the global 'optarg' was used instead of the 'str' parameter when extracting the interface name; with getopt removed they would have diverged. The help strings are adjusted a bit to be grammatical and short so that the table formatting is easier. Co-developed-by: Claude Opus 4.7 --- src/network/wait-online/wait-online.c | 120 +++++++++----------------- 1 file changed, 42 insertions(+), 78 deletions(-) diff --git a/src/network/wait-online/wait-online.c b/src/network/wait-online/wait-online.c index b1d0b9cde212d..e566f89d02e57 100644 --- a/src/network/wait-online/wait-online.c +++ b/src/network/wait-online/wait-online.c @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include #include "sd-event.h" @@ -8,11 +7,13 @@ #include "alloc-util.h" #include "build.h" #include "daemon-util.h" +#include "format-table.h" #include "hashmap.h" +#include "help-util.h" #include "log.h" #include "main-func.h" +#include "options.h" #include "parse-argument.h" -#include "pretty-print.h" #include "socket-util.h" #include "strv.h" #include "time-util.h" @@ -31,32 +32,22 @@ STATIC_DESTRUCTOR_REGISTER(arg_interfaces, hashmap_freep); STATIC_DESTRUCTOR_REGISTER(arg_ignore, strv_freep); static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("systemd-networkd-wait-online.service", "8", &link); + r = option_parser_get_help_table(&options); if (r < 0) - return log_oom(); + return r; - printf("%s [OPTIONS...]\n\n" - "Block until network is configured.\n\n" - " -h --help Show this help\n" - " --version Print version string\n" - " -q --quiet Do not show status information\n" - " -i --interface=INTERFACE[:MIN_OPERSTATE[:MAX_OPERSTATE]]\n" - " Block until at least these interfaces have appeared\n" - " --ignore=INTERFACE Don't take these interfaces into account\n" - " -o --operational-state=MIN_OPERSTATE[:MAX_OPERSTATE]\n" - " Required operational state\n" - " -4 --ipv4 Requires at least one IPv4 address\n" - " -6 --ipv6 Requires at least one IPv6 address\n" - " --any Wait until at least one of the interfaces is online\n" - " --timeout=SECS Maximum time to wait for network connectivity\n" - " --dns Requires at least one DNS server to be accessible\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - link); + help_cmdline("[OPTIONS...]"); + help_abstract("Block until network is configured."); + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("systemd-networkd-wait-online.service", "8"); return 0; } @@ -78,7 +69,7 @@ static int parse_interface_with_operstate_range(const char *str) { if (r < 0) return log_error_errno(r, "Invalid operational state range: %s", p + 1); - ifname = strndup(optarg, p - optarg); + ifname = strndup(str, p - str); } else { *range = LINK_OPERSTATE_RANGE_INVALID; ifname = strdup(str); @@ -105,97 +96,70 @@ static int parse_interface_with_operstate_range(const char *str) { } static int parse_argv(int argc, char *argv[]) { - - enum { - ARG_VERSION = 0x100, - ARG_IGNORE, - ARG_ANY, - ARG_TIMEOUT, - ARG_DNS, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "quiet", no_argument, NULL, 'q' }, - { "interface", required_argument, NULL, 'i' }, - { "ignore", required_argument, NULL, ARG_IGNORE }, - { "operational-state", required_argument, NULL, 'o' }, - { "ipv4", no_argument, NULL, '4' }, - { "ipv6", no_argument, NULL, '6' }, - { "any", no_argument, NULL, ARG_ANY }, - { "timeout", required_argument, NULL, ARG_TIMEOUT }, - { "dns", optional_argument, NULL, ARG_DNS }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "hi:qo:46", options, NULL)) >= 0) + OptionParser opts = { argc, argv }; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': - help(); - return 0; + OPTION_COMMON_HELP: + return help(); - case 'q': + OPTION_COMMON_VERSION: + return version(); + + OPTION('q', "quiet", NULL, "Do not show status information"): arg_quiet = true; break; - case ARG_VERSION: - return version(); - - case 'i': - r = parse_interface_with_operstate_range(optarg); + OPTION('i', "interface", "IFNAME[:MIN[:MAX]]", + "Block until at least these interfaces have appeared, " + "in the operational state between MIN and MAX"): + r = parse_interface_with_operstate_range(opts.arg); if (r < 0) return r; break; - case ARG_IGNORE: - if (strv_extend(&arg_ignore, optarg) < 0) + OPTION_LONG("ignore", "IFNAME", "Don't take these interfaces into account"): + if (strv_extend(&arg_ignore, opts.arg) < 0) return log_oom(); - break; - case 'o': - r = parse_operational_state_range(optarg, &arg_required_operstate); + OPTION('o', "operational-state", "MIN[:MAX]", + "Require operational state between MIN and MAX"): + r = parse_operational_state_range(opts.arg, &arg_required_operstate); if (r < 0) - return log_error_errno(r, "Invalid operational state range '%s'", optarg); + return log_error_errno(r, "Invalid operational state range '%s'", opts.arg); break; - case '4': + OPTION('4', "ipv4", NULL, "Require at least one IPv4 address"): arg_required_family |= ADDRESS_FAMILY_IPV4; break; - case '6': + OPTION('6', "ipv6", NULL, "Require at least one IPv6 address"): arg_required_family |= ADDRESS_FAMILY_IPV6; break; - case ARG_ANY: + OPTION_LONG("any", NULL, "Wait until at least one of the interfaces is online"): arg_any = true; break; - case ARG_TIMEOUT: - r = parse_sec(optarg, &arg_timeout); + OPTION_LONG("timeout", "SECS", "Maximum time to wait for network connectivity"): + r = parse_sec(opts.arg, &arg_timeout); if (r < 0) return r; break; - case ARG_DNS: - r = parse_boolean_argument("--dns", optarg, &arg_requires_dns); + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "dns", "BOOL", + "Require at least one DNS server to be accessible"): + r = parse_boolean_argument("--dns", opts.arg, &arg_requires_dns); if (r < 0) return r; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } return 1; From 6557a98705f67e39a2eb262b6a5d9e0641ef1e31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 00:05:02 +0200 Subject: [PATCH 121/242] journal-upload: convert to OPTION macros The help strings are adjusted a tiny bit. Co-developed-by: Claude Opus 4.7 --- src/journal-remote/journal-upload.c | 189 +++++++++++----------------- 1 file changed, 72 insertions(+), 117 deletions(-) diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c index 23bb48f687620..f4b698ae415d6 100644 --- a/src/journal-remote/journal-upload.c +++ b/src/journal-remote/journal-upload.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include -#include #include #include @@ -17,10 +16,12 @@ #include "extract-word.h" #include "fd-util.h" #include "fileio.h" +#include "format-table.h" #include "format-util.h" #include "fs-util.h" #include "glob-util.h" #include "hashmap.h" +#include "help-util.h" #include "journal-header-util.h" #include "journal-upload.h" #include "journal-util.h" @@ -28,9 +29,9 @@ #include "logs-show.h" #include "main-func.h" #include "mkdir.h" +#include "options.h" #include "parse-argument.h" #include "parse-helpers.h" -#include "pretty-print.h" #include "process-util.h" #include "string-util.h" #include "strv.h" @@ -683,194 +684,145 @@ static int parse_config(void) { } static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("systemd-journal-upload.service", "8", &link); + r = option_parser_get_help_table(&options); if (r < 0) - return log_oom(); + return r; - printf("%s -u URL {FILE|-}...\n\n" - "Upload journal events to a remote server.\n\n" - " -h --help Show this help\n" - " --version Show package version\n" - " -u --url=URL Upload to this address (default port " - STRINGIFY(DEFAULT_PORT) ")\n" - " --key=FILENAME Specify key in PEM format (default:\n" - " \"" PRIV_KEY_FILE "\")\n" - " --cert=FILENAME Specify certificate in PEM format (default:\n" - " \"" CERT_FILE "\")\n" - " --trust=FILENAME|all Specify CA certificate or disable checking (default:\n" - " \"" TRUST_FILE "\")\n" - " --system Use the system journal\n" - " --user Use the user journal for the current user\n" - " -m --merge Use all available journals\n" - " -M --machine=CONTAINER Operate on local container\n" - " --namespace=NAMESPACE Use journal files from namespace\n" - " -D --directory=PATH Use journal files from directory\n" - " --file=PATH Use this journal file\n" - " --cursor=CURSOR Start at the specified cursor\n" - " --after-cursor=CURSOR Start after the specified cursor\n" - " --follow[=BOOL] Do [not] wait for input\n" - " --save-state[=FILE] Save uploaded cursors (default \n" - " " STATE_FILE ")\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - link); + help_cmdline("-u URL {FILE|-}..."); + help_abstract("Upload journal events to a remote server."); + + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("systemd-journal-upload.service", "8"); return 0; } -static int parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_KEY, - ARG_CERT, - ARG_TRUST, - ARG_USER, - ARG_SYSTEM, - ARG_FILE, - ARG_CURSOR, - ARG_AFTER_CURSOR, - ARG_FOLLOW, - ARG_SAVE_STATE, - ARG_NAMESPACE, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "url", required_argument, NULL, 'u' }, - { "key", required_argument, NULL, ARG_KEY }, - { "cert", required_argument, NULL, ARG_CERT }, - { "trust", required_argument, NULL, ARG_TRUST }, - { "system", no_argument, NULL, ARG_SYSTEM }, - { "user", no_argument, NULL, ARG_USER }, - { "merge", no_argument, NULL, 'm' }, - { "machine", required_argument, NULL, 'M' }, - { "namespace", required_argument, NULL, ARG_NAMESPACE }, - { "directory", required_argument, NULL, 'D' }, - { "file", required_argument, NULL, ARG_FILE }, - { "cursor", required_argument, NULL, ARG_CURSOR }, - { "after-cursor", required_argument, NULL, ARG_AFTER_CURSOR }, - { "follow", optional_argument, NULL, ARG_FOLLOW }, - { "save-state", optional_argument, NULL, ARG_SAVE_STATE }, - {} - }; - - int c, r; +static int parse_argv(int argc, char *argv[], char ***ret_args) { + int r; assert(argc >= 0); assert(argv); + assert(ret_args); + + OptionParser opts = { argc, argv }; - while ((c = getopt_long(argc, argv, "hu:mM:D:", options, NULL)) >= 0) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case 'u': - r = free_and_strdup_warn(&arg_url, optarg); + OPTION('u', "url", "URL", + "Upload to this address (default port " STRINGIFY(DEFAULT_PORT) ")"): + r = free_and_strdup_warn(&arg_url, opts.arg); if (r < 0) return r; break; - case ARG_KEY: - r = free_and_strdup_warn(&arg_key, optarg); + OPTION_LONG("key", "FILENAME", + "Specify key in PEM format (default: \"" PRIV_KEY_FILE "\")"): + r = free_and_strdup_warn(&arg_key, opts.arg); if (r < 0) return r; break; - case ARG_CERT: - r = free_and_strdup_warn(&arg_cert, optarg); + OPTION_LONG("cert", "FILENAME", + "Specify certificate in PEM format (default: \"" CERT_FILE "\")"): + r = free_and_strdup_warn(&arg_cert, opts.arg); if (r < 0) return r; break; - case ARG_TRUST: - r = free_and_strdup_warn(&arg_trust, optarg); + OPTION_LONG("trust", "FILENAME|all", + "Specify CA certificate or disable checking (default: \"" TRUST_FILE "\")"): + r = free_and_strdup_warn(&arg_trust, opts.arg); if (r < 0) return r; break; - case ARG_SYSTEM: + OPTION_LONG("system", NULL, "Use the system journal"): arg_journal_type |= SD_JOURNAL_SYSTEM; break; - case ARG_USER: + OPTION_LONG("user", NULL, "Use the user journal for the current user"): arg_journal_type |= SD_JOURNAL_CURRENT_USER; break; - case 'm': + OPTION('m', "merge", NULL, "Use all available journals"): arg_merge = true; break; - case 'M': - r = free_and_strdup_warn(&arg_machine, optarg); + OPTION_COMMON_MACHINE: + r = free_and_strdup_warn(&arg_machine, opts.arg); if (r < 0) return r; break; - case ARG_NAMESPACE: - if (streq(optarg, "*")) { + OPTION_LONG("namespace", "NAMESPACE", "Use journal files from namespace"): + if (streq(opts.arg, "*")) { arg_namespace_flags = SD_JOURNAL_ALL_NAMESPACES; arg_namespace = mfree(arg_namespace); r = 0; - } else if (startswith(optarg, "+")) { + } else if (startswith(opts.arg, "+")) { arg_namespace_flags = SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE; - r = free_and_strdup_warn(&arg_namespace, optarg + 1); - } else if (isempty(optarg)) { + r = free_and_strdup_warn(&arg_namespace, opts.arg + 1); + } else if (isempty(opts.arg)) { arg_namespace_flags = 0; arg_namespace = mfree(arg_namespace); r = 0; } else { arg_namespace_flags = 0; - r = free_and_strdup_warn(&arg_namespace, optarg); + r = free_and_strdup_warn(&arg_namespace, opts.arg); } if (r < 0) return r; break; - case 'D': - r = free_and_strdup_warn(&arg_directory, optarg); + OPTION('D', "directory", "PATH", "Use journal files from this directory"): + r = free_and_strdup_warn(&arg_directory, opts.arg); if (r < 0) return r; break; - case ARG_FILE: - r = glob_extend(&arg_file, optarg, GLOB_NOCHECK); + OPTION_LONG("file", "PATH", "Use this journal file"): + r = glob_extend(&arg_file, opts.arg, GLOB_NOCHECK); if (r < 0) return log_error_errno(r, "Failed to add paths: %m"); break; - case ARG_CURSOR: - case ARG_AFTER_CURSOR: - r = free_and_strdup_warn(&arg_cursor, optarg); + OPTION_LONG_DATA("after-cursor", "CURSOR", /* data= */ true, + "Start after the specified cursor"): {} + OPTION_LONG_DATA("cursor", "CURSOR", /* data= */ false, + "Start at the specified cursor"): + r = free_and_strdup_warn(&arg_cursor, opts.arg); if (r < 0) return r; - arg_after_cursor = c == ARG_AFTER_CURSOR; + arg_after_cursor = opts.opt->data; break; - case ARG_FOLLOW: - r = parse_boolean_argument("--follow", optarg, NULL); + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "follow", "BOOL", + "Whether to wait for input"): + r = parse_boolean_argument("--follow", opts.arg, NULL); if (r < 0) return r; arg_follow = r; break; - case ARG_SAVE_STATE: - r = free_and_strdup_warn(&arg_save_state, optarg ?: STATE_FILE); + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "save-state", "FILE", + "Save uploaded cursors (default " STATE_FILE ")"): + r = free_and_strdup_warn(&arg_save_state, opts.arg ?: STATE_FILE); if (r < 0) return r; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } if (!arg_url) @@ -881,10 +833,12 @@ static int parse_argv(int argc, char *argv[]) { return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Options --key= and --cert= must be used together."); - if (optind < argc && (arg_directory || arg_file || arg_machine || arg_journal_type)) + char **args = option_parser_get_args(&opts); + if (!strv_isempty(args) && (arg_directory || arg_file || arg_machine || arg_journal_type)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Input arguments make no sense with journal input."); + *ret_args = args; return 1; } @@ -911,6 +865,7 @@ static int open_journal(sd_journal **j) { static int run(int argc, char **argv) { _cleanup_(destroy_uploader) Uploader u = {}; _unused_ _cleanup_(notify_on_cleanup) const char *notify_message = NULL; + char **args = NULL; bool use_journal; int r; @@ -920,7 +875,7 @@ static int run(int argc, char **argv) { if (r < 0) return r; - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -947,7 +902,7 @@ static int run(int argc, char **argv) { log_debug("%s running as pid "PID_FMT, program_invocation_short_name, getpid_cached()); - use_journal = optind >= argc; + use_journal = strv_isempty(args); if (use_journal) { sd_journal *j; r = open_journal(&j); @@ -965,7 +920,7 @@ static int run(int argc, char **argv) { "STATUS=Processing input...", NOTIFY_STOPPING_MESSAGE); - for (;;) { + for (size_t i = 0;;) { r = sd_event_get_state(u.event); if (r < 0) return r; @@ -978,11 +933,11 @@ static int run(int argc, char **argv) { r = check_journal_input(&u); } else if (u.input < 0 && !use_journal) { - if (optind >= argc) + if (!args[i]) return 0; - log_debug("Using %s as input.", argv[optind]); - r = open_file_for_upload(&u, argv[optind++]); + log_debug("Using %s as input.", args[i]); + r = open_file_for_upload(&u, args[i++]); } if (r < 0) return r; From d9fbe513a9700a52845dd2ebbf0f583bab7519f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 00:07:29 +0200 Subject: [PATCH 122/242] mstack-tool: convert to OPTION macros Both the main parser and the util-linux mount-helper-mode parser (invoked as mount.mstack) are converted with "systmed-mstack" and "mount.mstack" as namespaces. The latter has no help. For systemd-mstack, Commands are listed first, and then Options. And --no-pager, --no-legend, --json= are moved to the end. Co-developed-by: Claude Opus 4.7 --- src/mstack/mstack-tool.c | 232 +++++++++++++++++---------------------- 1 file changed, 99 insertions(+), 133 deletions(-) diff --git a/src/mstack/mstack-tool.c b/src/mstack/mstack-tool.c index 2e8946ab72a7e..244e7dc682dcd 100644 --- a/src/mstack/mstack-tool.c +++ b/src/mstack/mstack-tool.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include -#include #include #include "argv-util.h" @@ -11,13 +10,14 @@ #include "extract-word.h" #include "fd-util.h" #include "format-table.h" +#include "help-util.h" #include "image-policy.h" #include "main-func.h" #include "mount-util.h" #include "mountpoint-util.h" #include "mstack.h" +#include "options.h" #include "parse-argument.h" -#include "pretty-print.h" #include "string-util.h" static enum { @@ -41,191 +41,155 @@ STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep); STATIC_DESTRUCTOR_REGISTER(arg_image_filter, image_filter_freep); static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL, *commands = NULL; int r; - r = terminal_urlify_man("systemd-mstack", "1", &link); + r = option_parser_get_help_table_ns("systemd-mstack", &options); if (r < 0) - return log_oom(); + return r; - printf("%1$s [OPTIONS...] WHAT\n" - "%1$s [OPTIONS...] --mount WHAT WHERE\n" - "%1$s [OPTIONS...] --umount WHERE\n" - "\n%5$sInspect or apply mount stack.%6$s\n\n" - "%3$sOptions:%4$s\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not print the column headers\n" - " --json=pretty|short|off Generate JSON output\n" - " -r --read-only Mount read-only\n" - " --mkdir Make mount directory before mounting, if missing\n" - " --rmdir Remove mount directory after unmounting\n" - " --image-policy=POLICY\n" - " Specify image dissection policy\n" - " --image-filter=FILTER\n" - " Specify image dissection filter\n" - "\n%3$sCommands:%4$s\n" - " -h --help Show this help\n" - " --version Show package version\n" - " -m --mount Mount the mstack to the specified directory\n" - " -M Shortcut for --mount --mkdir\n" - " -u --umount Unmount the image from the specified directory\n" - " -U Shortcut for --umount --rmdir\n" - "\nSee the %2$s for details.\n", - program_invocation_short_name, - link, - ansi_underline(), ansi_normal(), - ansi_highlight(), ansi_normal()); + r = option_parser_get_help_table_full("systemd-mstack", "Commands", &commands); + if (r < 0) + return r; + (void) table_sync_column_widths(0, options, commands); + + help_cmdline("[OPTIONS...] WHAT"); + help_cmdline("[OPTIONS...] --mount WHAT WHERE"); + help_cmdline("[OPTIONS...] --umount WHERE"); + help_abstract("Inspect or apply mount stack."); + + help_section("Commands"); + r = table_print_or_warn(commands); + if (r < 0) + return r; + + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("systemd-mstack", "1"); return 0; } static int parse_argv(int argc, char *argv[]) { - - enum { - ARG_VERSION = 0x100, - ARG_NO_PAGER, - ARG_NO_LEGEND, - ARG_JSON, - ARG_MKDIR, - ARG_RMDIR, - ARG_IMAGE_POLICY, - ARG_IMAGE_FILTER, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, - { "mount", no_argument, NULL, 'm' }, - { "umount", no_argument, NULL, 'u' }, - { "json", required_argument, NULL, ARG_JSON }, - { "read-only", no_argument, NULL, 'r' }, - { "mkdir", no_argument, NULL, ARG_MKDIR }, - { "rmdir", no_argument, NULL, ARG_RMDIR }, - { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY }, - { "image-filter", required_argument, NULL, ARG_IMAGE_FILTER }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "hmMuUr", options, NULL)) >= 0) { + OptionParser opts = { argc, argv, .namespace = "systemd-mstack" }; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': - return help(); - - case ARG_VERSION: - return version(); - - case ARG_NO_PAGER: - arg_pager_flags |= PAGER_DISABLE; - break; + OPTION_NAMESPACE("systemd-mstack"): {} - case ARG_NO_LEGEND: - arg_legend = false; + OPTION('r', "read-only", NULL, "Mount read-only"): + arg_mstack_flags |= MSTACK_RDONLY; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); - if (r <= 0) - return r; - + OPTION_LONG("mkdir", NULL, "Make mount directory before mounting, if missing"): + arg_mstack_flags |= MSTACK_MKDIR; break; - case 'r': - arg_mstack_flags |= MSTACK_RDONLY; + OPTION_LONG("rmdir", NULL, "Remove mount directory after unmounting"): + arg_rmdir = true; break; - case ARG_IMAGE_POLICY: - r = parse_image_policy_argument(optarg, &arg_image_policy); + OPTION_LONG("image-policy", "POLICY", "Specify image dissection policy"): + r = parse_image_policy_argument(opts.arg, &arg_image_policy); if (r < 0) return r; break; - case ARG_IMAGE_FILTER: { + OPTION_LONG("image-filter", "FILTER", "Specify image dissection filter"): { _cleanup_(image_filter_freep) ImageFilter *f = NULL; - r = image_filter_parse(optarg, &f); + r = image_filter_parse(opts.arg, &f); if (r < 0) - return log_error_errno(r, "Failed to parse image filter expression: %s", optarg); + return log_error_errno(r, "Failed to parse image filter expression: %s", opts.arg); image_filter_free(arg_image_filter); arg_image_filter = TAKE_PTR(f); break; } - case ARG_MKDIR: - arg_mstack_flags |= MSTACK_MKDIR; + OPTION_COMMON_NO_PAGER: + arg_pager_flags |= PAGER_DISABLE; break; - case ARG_RMDIR: - arg_rmdir = true; + OPTION_COMMON_NO_LEGEND: + arg_legend = false; break; - case 'm': + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); + if (r <= 0) + return r; + break; + + OPTION_GROUP("Commands"): {} + + OPTION_COMMON_HELP: + return help(); + + OPTION_COMMON_VERSION: + return version(); + + OPTION('m', "mount", NULL, "Mount the mstack to the specified directory"): arg_action = ACTION_MOUNT; break; - case 'M': - /* Shortcut combination of --mkdir + --mount */ + OPTION_SHORT('M', NULL, "Shortcut for --mount --mkdir"): arg_action = ACTION_MOUNT; arg_mstack_flags |= MSTACK_MKDIR; break; - case 'u': + OPTION('u', "umount", NULL, "Unmount the image from the specified directory"): arg_action = ACTION_UMOUNT; break; - case 'U': - /* Shortcut combination of --rmdir + --umount */ + OPTION_SHORT('U', NULL, "Shortcut for --umount --rmdir"): arg_action = ACTION_UMOUNT; arg_rmdir = true; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - } + + char **args = option_parser_get_args(&opts); + size_t n_args = option_parser_get_n_args(&opts); switch (arg_action) { case ACTION_INSPECT: - if (optind + 1 != argc) + if (n_args != 1) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected one argument."); - r = parse_path_argument(argv[optind], /* suppress_root= */ false, &arg_what); + r = parse_path_argument(args[0], /* suppress_root= */ false, &arg_what); if (r < 0) return r; break; case ACTION_MOUNT: - if (optind + 2 != argc) + if (n_args != 2) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected two arguments."); - r = parse_path_argument(argv[optind], /* suppress_root= */ false, &arg_what); + r = parse_path_argument(args[0], /* suppress_root= */ false, &arg_what); if (r < 0) return r; - r = parse_path_argument(argv[optind+1], /* suppress_root= */ false, &arg_where); + r = parse_path_argument(args[1], /* suppress_root= */ false, &arg_where); if (r < 0) return r; break; case ACTION_UMOUNT: - if (optind + 1 != argc) + if (n_args != 1) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected one argument."); - r = parse_path_argument(argv[optind], /* suppress_root= */ false, &arg_where); + r = parse_path_argument(args[0], /* suppress_root= */ false, &arg_where); if (r < 0) return r; @@ -239,47 +203,49 @@ static int parse_argv(int argc, char *argv[]) { } static int parse_argv_as_mount_helper(int argc, char *argv[]) { - const char *options = NULL; + const char *mount_options = NULL; bool fake = false; - int c, r; + int r; /* Implements util-linux "external helper" command line interface, as per mount(8) man page. */ - while ((c = getopt(argc, argv, "sfnvN:o:t:")) >= 0) { + OptionParser opts = { argc, argv, .namespace = "mount.mstack" }; + + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'f': + OPTION_NAMESPACE("mount.mstack"): {} + + OPTION_SHORT('f', NULL, NULL): fake = true; break; - case 'o': - options = optarg; + OPTION_SHORT('o', "OPTIONS", NULL): + mount_options = opts.arg; break; - case 't': - if (!streq(optarg, "mstack")) - log_debug("Unexpected file system type '%s', ignoring.", optarg); + OPTION_SHORT('t', "TYPE", NULL): + if (!streq(opts.arg, "mstack")) + log_debug("Unexpected file system type '%s', ignoring.", opts.arg); break; - case 's': /* sloppy mount options */ - case 'n': /* aka --no-mtab */ - case 'v': /* aka --verbose */ - log_debug("Ignoring option -%c, not implemented.", c); + OPTION_SHORT('s', NULL, NULL): {} /* sloppy mount options, fall-through */ + OPTION_SHORT('n', NULL, NULL): {} /* aka --no-mtab, fall-through */ + OPTION_SHORT('v', NULL, NULL): /* aka --verbose */ + log_debug("Ignoring option -%c, not implemented.", opts.opt->short_code); break; - case 'N': /* aka --namespace= */ - return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Option -%c is not implemented, refusing.", c); - - case '?': - return -EINVAL; + OPTION_SHORT('N', "NAMESPACE", NULL): /* aka --namespace= */ + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Option -%c is not implemented, refusing.", opts.opt->short_code); } - } - if (optind + 2 != argc) + char **args = option_parser_get_args(&opts); + if (option_parser_get_n_args(&opts) != 2) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Expected an image file path and target directory as only argument."); + "Expected an image file path and target directory as arguments."); - for (const char *p = options;;) { + for (const char *p = mount_options;;) { _cleanup_free_ char *word = NULL; r = extract_first_word(&p, &word, ",", EXTRACT_KEEP_QUOTE); @@ -300,11 +266,11 @@ static int parse_argv_as_mount_helper(int argc, char *argv[]) { if (fake) return 0; - r = parse_path_argument(argv[optind], /* suppress_root= */ false, &arg_what); + r = parse_path_argument(args[0], /* suppress_root= */ false, &arg_what); if (r < 0) return r; - r = parse_path_argument(argv[optind+1], /* suppress_root= */ false, &arg_where); + r = parse_path_argument(args[1], /* suppress_root= */ false, &arg_where); if (r < 0) return r; From c97084d5b0cf19e4895b05469e0c595333b9574b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 00:10:00 +0200 Subject: [PATCH 123/242] networkctl: convert to OPTION and VERB macros --help output is identical except for common options strings and whitespace. Co-developed-by: Claude Opus 4.7 --- src/network/networkctl.c | 261 ++++++++++++++++----------------------- 1 file changed, 109 insertions(+), 152 deletions(-) diff --git a/src/network/networkctl.c b/src/network/networkctl.c index 7fe34ac1eb778..e77950bbe8c4f 100644 --- a/src/network/networkctl.c +++ b/src/network/networkctl.c @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include - #include "sd-json.h" #include "alloc-util.h" #include "build.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" #include "logs-show.h" #include "main-func.h" @@ -16,10 +16,10 @@ #include "networkctl-lldp.h" #include "networkctl-misc.h" #include "networkctl-status-link.h" +#include "options.h" #include "parse-argument.h" #include "parse-util.h" #include "path-util.h" -#include "pretty-print.h" #include "string-util.h" #include "verbs.h" @@ -38,144 +38,146 @@ bool arg_ask_password = true; STATIC_DESTRUCTOR_REGISTER(arg_drop_in, freep); +VERB_SCOPE(, verb_list_links, "list", "[PATTERN...]", VERB_ANY, VERB_ANY, VERB_DEFAULT|VERB_ONLINE_ONLY, + "List links"); +VERB_SCOPE(, verb_link_status, "status", "[PATTERN...]", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, + "Show link status"); +VERB_SCOPE(, verb_link_lldp_status, "lldp", "[PATTERN...]", VERB_ANY, VERB_ANY, 0, + "Show LLDP neighbors"); +VERB_SCOPE(, verb_list_address_labels, "label", NULL, 1, 1, 0, + "Show current address label entries in the kernel"); +VERB_SCOPE(, verb_link_delete, "delete", "DEVICES...", 2, VERB_ANY, 0, + "Delete virtual netdevs"); +VERB_SCOPE(, verb_link_varlink_simple_method, "up", "DEVICES...", 2, VERB_ANY, 0, + "Bring devices up"); +VERB_SCOPE(, verb_link_varlink_simple_method, "down", "DEVICES...", 2, VERB_ANY, 0, + "Bring devices down"); +VERB_SCOPE(, verb_link_varlink_simple_method, "renew", "DEVICES...", 2, VERB_ANY, VERB_ONLINE_ONLY, + "Renew dynamic configurations"); +VERB_SCOPE(, verb_link_varlink_simple_method, "forcerenew", "DEVICES...", 2, VERB_ANY, VERB_ONLINE_ONLY, + "Trigger DHCP reconfiguration of all connected clients"); +VERB_SCOPE(, verb_link_varlink_simple_method, "reconfigure", "DEVICES...", 2, VERB_ANY, VERB_ONLINE_ONLY, + "Reconfigure interfaces"); +VERB_SCOPE(, verb_reload, "reload", NULL, 1, 1, VERB_ONLINE_ONLY, + "Reload .network and .netdev files"); +VERB_SCOPE(, verb_edit, "edit", "FILES|DEVICES...", 2, VERB_ANY, 0, + "Edit network configuration files"); +VERB_SCOPE(, verb_cat, "cat", "[FILES|DEVICES...]", 1, VERB_ANY, 0, + "Show network configuration files"); +VERB_SCOPE(, verb_mask, "mask", "FILES...", 2, VERB_ANY, 0, + "Mask network configuration files"); +VERB_SCOPE(, verb_unmask, "unmask", "FILES...", 2, VERB_ANY, 0, + "Unmask network configuration files"); +VERB_SCOPE(, verb_persistent_storage, "persistent-storage", "BOOL", 2, 2, 0, + "Notify systemd-networkd if persistent storage is ready"); + static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *verbs = NULL, *options = NULL; int r; - r = terminal_urlify_man("networkctl", "1", &link); + r = verbs_get_help_table(&verbs); + if (r < 0) + return r; + + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + (void) table_sync_column_widths(0, verbs, options); + + help_cmdline("[OPTIONS...] COMMAND"); + help_abstract("Query and control the networking subsystem."); + + help_section("Commands"); + r = table_print_or_warn(verbs); + if (r < 0) + return r; + + help_section("Options"); + r = table_print_or_warn(options); if (r < 0) - return log_oom(); - - printf("%s [OPTIONS...] COMMAND\n\n" - "%sQuery and control the networking subsystem.%s\n" - "\nCommands:\n" - " list [PATTERN...] List links\n" - " status [PATTERN...] Show link status\n" - " lldp [PATTERN...] Show LLDP neighbors\n" - " label Show current address label entries in the kernel\n" - " delete DEVICES... Delete virtual netdevs\n" - " up DEVICES... Bring devices up\n" - " down DEVICES... Bring devices down\n" - " renew DEVICES... Renew dynamic configurations\n" - " forcerenew DEVICES... Trigger DHCP reconfiguration of all connected clients\n" - " reconfigure DEVICES... Reconfigure interfaces\n" - " reload Reload .network and .netdev files\n" - " edit FILES|DEVICES... Edit network configuration files\n" - " cat [FILES|DEVICES...] Show network configuration files\n" - " mask FILES... Mask network configuration files\n" - " unmask FILES... Unmask network configuration files\n" - " persistent-storage BOOL\n" - " Notify systemd-networkd if persistent storage is ready\n" - "\nOptions:\n" - " -h --help Show this help\n" - " --version Show package version\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not show the headers and footers\n" - " --no-ask-password Do not prompt for password\n" - " -a --all Show status for all links\n" - " -s --stats Show detailed link statistics\n" - " -l --full Do not ellipsize output\n" - " -n --lines=INTEGER Number of journal entries to show\n" - " --json=pretty|short|off\n" - " Generate JSON output\n" - " --no-reload Do not reload systemd-networkd or systemd-udevd\n" - " after editing network config\n" - " --drop-in=NAME Edit specified drop-in instead of main config file\n" - " --runtime Edit runtime config files\n" - " --stdin Read new contents of edited file from stdin\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - link); + return r; + help_man_page_reference("networkctl", "1"); return 0; } -static int parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_NO_PAGER, - ARG_NO_LEGEND, - ARG_NO_ASK_PASSWORD, - ARG_JSON, - ARG_NO_RELOAD, - ARG_DROP_IN, - ARG_RUNTIME, - ARG_STDIN, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, - { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD }, - { "all", no_argument, NULL, 'a' }, - { "stats", no_argument, NULL, 's' }, - { "full", no_argument, NULL, 'l' }, - { "lines", required_argument, NULL, 'n' }, - { "json", required_argument, NULL, ARG_JSON }, - { "no-reload", no_argument, NULL, ARG_NO_RELOAD }, - { "drop-in", required_argument, NULL, ARG_DROP_IN }, - { "runtime", no_argument, NULL, ARG_RUNTIME }, - { "stdin", no_argument, NULL, ARG_STDIN }, - {} - }; - - int c, r; +VERB_COMMON_HELP_HIDDEN(help); + +static int parse_argv(int argc, char *argv[], char ***remaining_args) { + int r; assert(argc >= 0); assert(argv); + assert(remaining_args); - while ((c = getopt_long(argc, argv, "hasln:", options, NULL)) >= 0) { + OptionParser opts = { argc, argv }; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_NO_LEGEND: + OPTION_COMMON_NO_LEGEND: arg_legend = false; break; - case ARG_NO_RELOAD: - arg_no_reload = true; + OPTION_COMMON_NO_ASK_PASSWORD: + arg_ask_password = false; break; - case ARG_NO_ASK_PASSWORD: - arg_ask_password = false; + OPTION('a', "all", NULL, "Show status for all links"): + arg_all = true; break; - case ARG_RUNTIME: - arg_runtime = true; + OPTION('s', "stats", NULL, "Show detailed link statistics"): + arg_stats = true; break; - case ARG_STDIN: - arg_stdin = true; + OPTION('l', "full", NULL, "Do not ellipsize output"): + arg_full = true; + break; + + OPTION('n', "lines", "INTEGER", "Number of journal entries to show"): + if (safe_atou(opts.arg, &arg_lines) < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse lines '%s'", opts.arg); break; - case ARG_DROP_IN: - if (isempty(optarg)) + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); + if (r <= 0) + return r; + break; + + OPTION_LONG("no-reload", NULL, + "Do not reload systemd-networkd or systemd-udevd after editing network config"): + arg_no_reload = true; + break; + + OPTION_LONG("drop-in", "NAME", + "Edit specified drop-in instead of main config file"): + if (isempty(opts.arg)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Empty drop-in file name."); - if (!endswith(optarg, ".conf")) { + if (!endswith(opts.arg, ".conf")) { char *conf; - conf = strjoin(optarg, ".conf"); + conf = strjoin(opts.arg, ".conf"); if (!conf) return log_oom(); free_and_replace(arg_drop_in, conf); } else { - r = free_and_strdup(&arg_drop_in, optarg); + r = free_and_strdup(&arg_drop_in, opts.arg); if (r < 0) return log_oom(); } @@ -186,77 +188,32 @@ static int parse_argv(int argc, char *argv[]) { break; - case 'a': - arg_all = true; - break; - - case 's': - arg_stats = true; - break; - - case 'l': - arg_full = true; - break; - - case 'n': - if (safe_atou(optarg, &arg_lines) < 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Failed to parse lines '%s'", optarg); + OPTION_LONG("runtime", NULL, "Edit runtime config files"): + arg_runtime = true; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); - if (r <= 0) - return r; + OPTION_LONG("stdin", NULL, "Read new contents of edited file from stdin"): + arg_stdin = true; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - } + *remaining_args = option_parser_get_args(&opts); return 1; } -static int networkctl_main(int argc, char *argv[]) { - static const Verb verbs[] = { - { "list", VERB_ANY, VERB_ANY, VERB_DEFAULT|VERB_ONLINE_ONLY, verb_list_links }, - { "status", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, verb_link_status }, - { "lldp", VERB_ANY, VERB_ANY, 0, verb_link_lldp_status }, - { "label", 1, 1, 0, verb_list_address_labels }, - { "delete", 2, VERB_ANY, 0, verb_link_delete }, - { "up", 2, VERB_ANY, 0, verb_link_varlink_simple_method }, - { "down", 2, VERB_ANY, 0, verb_link_varlink_simple_method }, - { "renew", 2, VERB_ANY, VERB_ONLINE_ONLY, verb_link_varlink_simple_method }, - { "forcerenew", 2, VERB_ANY, VERB_ONLINE_ONLY, verb_link_varlink_simple_method }, - { "reconfigure", 2, VERB_ANY, VERB_ONLINE_ONLY, verb_link_varlink_simple_method }, - { "reload", 1, 1, VERB_ONLINE_ONLY, verb_reload }, - { "edit", 2, VERB_ANY, 0, verb_edit }, - { "cat", 1, VERB_ANY, 0, verb_cat }, - { "mask", 2, VERB_ANY, 0, verb_mask }, - { "unmask", 2, VERB_ANY, 0, verb_unmask }, - { "persistent-storage", 2, 2, 0, verb_persistent_storage }, - {} - }; - - return dispatch_verb(argc, argv, verbs, NULL); -} - static int run(int argc, char* argv[]) { + char **args = NULL; int r; log_setup(); - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; journal_browse_prepare(); - return networkctl_main(argc, argv); + return dispatch_verb_with_args(args, NULL); } DEFINE_MAIN_FUNCTION(run); From 203409bb9ffe78aae0c2a0a71eb4e910484a47c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Apr 2026 09:20:02 +0200 Subject: [PATCH 124/242] networkctl: use proper errno in mesage $ build/networkctl --lines=4883284838483883838383 Failed to parse --lines setting '4883284838483883838383': Numerical result out of range --- src/network/networkctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/network/networkctl.c b/src/network/networkctl.c index e77950bbe8c4f..b64c8b17fc7d0 100644 --- a/src/network/networkctl.c +++ b/src/network/networkctl.c @@ -147,9 +147,9 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { break; OPTION('n', "lines", "INTEGER", "Number of journal entries to show"): - if (safe_atou(opts.arg, &arg_lines) < 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Failed to parse lines '%s'", opts.arg); + r = safe_atou(opts.arg, &arg_lines); + if (r < 0) + return log_error_errno(r, "Failed to parse --lines value '%s': %m", opts.arg); break; OPTION_COMMON_JSON: From 83436d4b7e9e2514bb32b89765ce33ad57f53224 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 5 May 2026 13:55:00 +0100 Subject: [PATCH 125/242] scsi_id: fix memory leak of option_get_synopsis() return value option_get_synopsis() returns a heap-allocated string. Capture it in a _cleanup_free_ variable so it is freed after being used in the log message. CID#1657828 Follow-up for 05fea7df1bd6579dc382455626e0e84acb2a8912 --- src/udev/scsi_id/scsi_id.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/udev/scsi_id/scsi_id.c b/src/udev/scsi_id/scsi_id.c index 295819351d19b..93b65816e497e 100644 --- a/src/udev/scsi_id/scsi_id.c +++ b/src/udev/scsi_id/scsi_id.c @@ -328,10 +328,13 @@ static int per_dev_options(struct scsi_id_device *dev_scsi, int *good_bad, enum r = parse_page_code(opts.arg, page_code); if (r < 0) return r; - } else + } else { + _cleanup_free_ char *synopsis = + option_get_synopsis(opts.opt, "/", /* show_metavar=*/ false); return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option %s not supported in the config file.", - strnull(option_get_synopsis(opts.opt, "/", /* show_metavar=*/ false))); + strnull(synopsis)); + } return 0; } From ea07d7fec36d738dfd9f03bd6dce58051e58e739 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 20 Apr 2026 13:12:44 +0200 Subject: [PATCH 126/242] bootctl: allow extra files on 'link' be specified as literal data --- src/bootctl/bootctl-link.c | 49 ++++++++++++++------- src/shared/varlink-io.systemd.BootControl.c | 6 ++- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/bootctl/bootctl-link.c b/src/bootctl/bootctl-link.c index 6358189e7d2f3..b8372752b5649 100644 --- a/src/bootctl/bootctl-link.c +++ b/src/bootctl/bootctl-link.c @@ -25,6 +25,7 @@ #include "hashmap.h" #include "id128-util.h" #include "io-util.h" +#include "iovec-util.h" #include "json-util.h" #include "kernel-image.h" #include "log.h" @@ -44,6 +45,7 @@ typedef struct ExtraFile { /* The source and the temporary file we copy it into */ int source_fd, temp_fd; char *filename, *temp_filename; + struct iovec data; /* Alternative to 'source_fd': literal data */ } ExtraFile; #define EXTRA_FILE_NULL \ @@ -115,6 +117,7 @@ static void extra_file_done(ExtraFile *x) { x->temp_fd = safe_close(x->temp_fd); x->filename = mfree(x->filename); x->temp_filename = mfree(x->temp_filename); + iovec_done(&x->data); } static void profile_done(Profile *p) { @@ -366,7 +369,8 @@ static int link_context_pick_entry_token(LinkContext *c) { } static int begin_copy_file( - int source_fd, + int source_fd, /* Either the source fd is specified, or the 'data' below, not both */ + const struct iovec *data, const char *filename, int target_dir_fd, int *ret_tmpfile_fd, @@ -374,7 +378,6 @@ static int begin_copy_file( int r; - assert(source_fd >= 0); assert(filename); assert(target_dir_fd >= 0); assert(ret_tmpfile_fd); @@ -398,11 +401,18 @@ static int begin_copy_file( CLEANUP_TMPFILE_AT(target_dir_fd, t); - r = copy_bytes(source_fd, write_fd, UINT64_MAX, COPY_REFLINK|COPY_SEEK0_SOURCE); - if (r < 0) - return log_error_errno(r, "Failed to copy data into '%s': %m", filename); + if (source_fd >= 0) { + r = copy_bytes(source_fd, write_fd, UINT64_MAX, COPY_REFLINK|COPY_SEEK0_SOURCE); + if (r < 0) + return log_error_errno(r, "Failed to copy data into '%s': %m", filename); + + (void) copy_times(source_fd, write_fd, /* flags= */ 0); + } else if (iovec_is_set(data)) { + r = loop_write(write_fd, data->iov_base, data->iov_len); + if (r < 0) + return log_error_errno(r, "Failed to write data into '%s': %m", filename); + } - (void) copy_times(source_fd, write_fd, /* flags= */ 0); (void) fchmod(write_fd, 0644); *ret_tmpfile_fd = TAKE_FD(write_fd); @@ -824,6 +834,7 @@ static int run_link_now(LinkContext *c) { r = begin_copy_file( c->kernel_fd, + /* data= */ NULL, c->kernel_filename, c->entry_token_dir_fd, &c->kernel_temp_fd, @@ -834,6 +845,7 @@ static int run_link_now(LinkContext *c) { FOREACH_ARRAY(x, c->extra, c->n_extra) { r = begin_copy_file( x->source_fd, + &x->data, x->filename, c->entry_token_dir_fd, &x->temp_fd, @@ -1043,7 +1055,8 @@ static int dispatch_extras(const char *name, sd_json_variant *v, sd_json_dispatc static const sd_json_dispatch_field dispatch_table[] = { { "filename", SD_JSON_VARIANT_STRING, json_dispatch_loader_entry_resource_filename, offsetof(ExtraParameters, extra_file.filename), SD_JSON_MANDATORY }, - { "fileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, offsetof(ExtraParameters, fd_index), SD_JSON_MANDATORY }, + { "fileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, offsetof(ExtraParameters, fd_index), 0 }, + { "data", SD_JSON_VARIANT_STRING, json_dispatch_unbase64_iovec, offsetof(ExtraParameters, extra_file.data), 0 }, {}, }; @@ -1051,17 +1064,21 @@ static int dispatch_extras(const char *name, sd_json_variant *v, sd_json_dispatc if (r < 0) return r; - xp.extra_file.source_fd = sd_varlink_peek_dup_fd(c->link, xp.fd_index); - if (xp.extra_file.source_fd < 0) - return log_debug_errno(xp.extra_file.source_fd, "Failed to acquire extra fd from Varlink: %m"); - - r = fd_verify_safe_flags(xp.extra_file.source_fd); - if (r < 0) + if (iovec_is_set(&xp.extra_file.data) == (xp.fd_index != UINT_MAX)) return sd_varlink_error_invalid_parameter_name(c->link, name); + if (xp.fd_index != UINT_MAX) { + xp.extra_file.source_fd = sd_varlink_peek_dup_fd(c->link, xp.fd_index); + if (xp.extra_file.source_fd < 0) + return log_debug_errno(xp.extra_file.source_fd, "Failed to acquire extra fd from Varlink: %m"); - r = fd_verify_regular(xp.extra_file.source_fd); - if (r < 0) - return log_debug_errno(r, "Failed to validate that the extra file is a regular file descriptor: %m"); + r = fd_verify_safe_flags(xp.extra_file.source_fd); + if (r < 0) + return sd_varlink_error_invalid_parameter_name(c->link, name); + + r = fd_verify_regular(xp.extra_file.source_fd); + if (r < 0) + return log_debug_errno(r, "Failed to validate that the extra file is a regular file descriptor: %m"); + } if (!GREEDY_REALLOC(c->context.extra, c->context.n_extra+1)) return log_oom(); diff --git a/src/shared/varlink-io.systemd.BootControl.c b/src/shared/varlink-io.systemd.BootControl.c index 920b9479db0a4..62306f5d79377 100644 --- a/src/shared/varlink-io.systemd.BootControl.c +++ b/src/shared/varlink-io.systemd.BootControl.c @@ -153,8 +153,10 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( BootEntryExtraFile, SD_VARLINK_FIELD_COMMENT("The name of the extra file"), SD_VARLINK_DEFINE_FIELD(filename, SD_VARLINK_STRING, 0), - SD_VARLINK_FIELD_COMMENT("Index into array of file descriptors, pointing to a file descriptor referencing the extra file."), - SD_VARLINK_DEFINE_FIELD(fileDescriptor, SD_VARLINK_INT, 0)); + SD_VARLINK_FIELD_COMMENT("Index into array of file descriptors, pointing to a file descriptor referencing the extra file to copy in. Either this or the 'data' field below must be set – not both, not neither."), + SD_VARLINK_DEFINE_FIELD(fileDescriptor, SD_VARLINK_INT, SD_VARLINK_NULLABLE), + SD_VARLINK_FIELD_COMMENT("Literal data to place in the extra file."), + SD_VARLINK_DEFINE_FIELD(data, SD_VARLINK_STRING, SD_VARLINK_NULLABLE)); static SD_VARLINK_DEFINE_METHOD( Link, From 5fbc7a7b126d37e1179e5651ae72d831d92eb464 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 28 Aug 2025 11:51:11 +0200 Subject: [PATCH 127/242] sysinstall: new component --- man/rules/meson.build | 4 + man/systemd-sysinstall.xml | 292 +++++ meson.build | 3 + meson_options.txt | 2 + shell-completion/bash/meson.build | 1 + shell-completion/bash/systemd-sysinstall | 90 ++ shell-completion/zsh/_systemd-sysinstall | 29 + shell-completion/zsh/meson.build | 1 + src/basic/time-util.c | 10 + src/basic/time-util.h | 1 + src/sysinstall/meson.build | 10 + src/sysinstall/sysinstall.c | 1425 ++++++++++++++++++++++ units/meson.build | 6 + units/system-install.target | 15 + units/systemd-sysinstall.service | 22 + 15 files changed, 1911 insertions(+) create mode 100644 man/systemd-sysinstall.xml create mode 100644 shell-completion/bash/systemd-sysinstall create mode 100644 shell-completion/zsh/_systemd-sysinstall create mode 100644 src/sysinstall/meson.build create mode 100644 src/sysinstall/sysinstall.c create mode 100644 units/system-install.target create mode 100644 units/systemd-sysinstall.service diff --git a/man/rules/meson.build b/man/rules/meson.build index 719838064c02f..c42a8d47f8e27 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -1218,6 +1218,10 @@ manpages = [ 'systemd-sysext-sysroot.service', 'systemd-sysext.service'], 'ENABLE_SYSEXT'], + ['systemd-sysinstall', + '8', + ['systemd-sysinstall.service'], + 'ENABLE_SYSINSTALL'], ['systemd-system-update-generator', '8', [], ''], ['systemd-system.conf', '5', diff --git a/man/systemd-sysinstall.xml b/man/systemd-sysinstall.xml new file mode 100644 index 0000000000000..228e7e2bff17f --- /dev/null +++ b/man/systemd-sysinstall.xml @@ -0,0 +1,292 @@ + + + + + + + + systemd-sysinstall + systemd + + + + systemd-sysinstall + 8 + + + + systemd-sysinstall + systemd-sysinstall.service + Simple OS installer + + + + + systemd-sysinstall + OPTIONS + BLOCKDEVICE + + + systemd-sysinstall.service + + + + Description + + systemd-sysinstall is a simple terminal and command line based operating system + installer tool. Its primary use-case is to act as an automatically started interactive interface when + booting from an installer medium (e.g. a USB stick), in order to install an OS onto a target + disk. However, it may also be invoked directly from a shell. It executes the following steps: + + + It prompts the user for the target disk to install the OS on. (Unless the block device + is already specified on the command line.) + + It validates whether the disk is suitable (i.e. large enough, and with enough + free/unpartitioned space) for an OS installation. If it is generally suitable the user is prompted if they + want to erase the disk before installation, or if the OS shall be added to the existing partitions on + the disk (the latter only if enough free/unpartitioned disk space is available). + + It prompts the user whether to register the newly installed OS with the firmware boot option menu. + + It requests confirmation from the user, after showing a summary of the planned OS installation. + + It invokes + systemd-creds1's + encrypt command in order to generate encrypted (TPM locked, if available) system + credential files for a few, very basic system settings of the currently booted system (locale, keymap, + timezone), which it will install on the target disk, parameterizing the invoked kernel. (Or in other + words, it prepares that some settings already in effect on the installer system are propagated securely + onto the new installation.) + + It invokes + systemd-repart8 with + a definitions directory of /usr/lib/repart.sysinstall.d/ (only if populated – if + not will use the default of /usr/lib/repart.d/). This is supposed to set up the + basic OS partition structure on the target disk and copies in basic OS partitions (most importantly the + /usr/ hierarchy). + + It invokes + bootctl1's + link command to install an OS kernel image onto the target disk's ESP/XBOOTLDR, + together with the credential files prepared earlier. + + It invokes + bootctl1's + install command to install the + systemd-boot7 boot + loader onto the target disk's ESP. + + After confirmation, it reboots the system. + + + Note that the prompts/confirmation may be disabled via the command line, enabling fully automatic, + non-interactive installation. See below. + + Note this tool does not interactively query the user for a user to create or a root password to be + set on the target system, under the assumption these questions are better prompted from within the newly + installed system's first boot process, for example via the + systemd-firstboot1 or + systemd-homed-firstboot.service components. Note that if required such settings + may be propagated explicitly via the switch below. + + + + Options + + The following options are understood: + + + + + + + Overrides the directory where systemd-repart shall read its + partition definitions from, in place of the default of + /usr/lib/repart.sysinstall.d/. + + + + + + + + Takes a boolean argument. Controls whether to show the brief welcome text normally + displayed at the beginning of the installation. Defaults to true. + + + + + + + + Takes a boolean argument. Controls whether to show the colored bars at the top and + bottom of the terminal interface. Defaults to true. + + + + + + + + Takes a boolean argument. Controls whether to erase the current contents of the + target disk. If this switch is not used the user is prompted. + + + + + + + + Takes a boolean argument. Controls whether to interactively query the user for + confirmation before initiating the OS installation. Defaults to true. + + + + + + + + Takes a boolean argument. Controls whether to reboot the system after completing the + installation. Defaults to false. + + + + + + + + Takes a boolean argument. Controls whether to register the installed boot loader in + the firmware's boot options database. If not specified the user will be prompted. + + + + + + + + Takes a boolean argument. Controls whether to show a summary of the choices made + before asking for confirmation to proceed with the OS installation. Defaults to true. + + + + + + + + Takes a path to a unified kernel image (UKI). Explicitly selects the kernel image to + install on the target disk. If unspecified the currently booted kernel image is installed on the + target disk. + + + + + + + + Accepts an additional system credential to encrypt (with a key generated on the local + TPM, if available, and the null key otherwise) and place next to the installed kernel image in the + ESP. This may be used to parameterize the installed kernel with arbitrary system credentials. Do not + use this switch for sensitive data (such as passwords), use + instead, see below. May be used multiple times to configure multiple credentials. + + Note that three system credentials are propagated in similar fashion to the target system: + the locale, keymap and timezone. This may be controlled by the relevant + , and + options below. + + See + systemd.system-credentials7 + for a list of well-known system credentials that may be propagated this way. (Note that you may pass + arbitrary additional credentials this way, that can be consumed by any service of your + choice, via the usual system credentials logic.) + + + + + + + + Similar to but reads the credential value from a + file on disk or an AF_UNIX socket in the file system. This is generally + preferable for sensitive data, such as passwords. + + + + + + + + + + These options take boolean parameters. They control whether the indicated system + settings shall be propagated from the currently running system into the new target OS + installation. These options default to true. + + Typically, these three settings are the minimal settings that need to be configured during early + boot of an installer medium in order to make the installer tool accessible to the user. The + systemd-firstboot1 + tool may be used to query the user interactively when the OS install medium is booted for these + properties. By propagating these settings to the target installation via system credentials they do + not need to be queried again on first boot of the new installation. + + + + + + + + Takes a boolean argument. Controls whether to disable kernel and service manager log + output to the console the installer is invoked on temporarily while running, in order to avoid + interleaved output. Defaults to false. + + + + + + + + + + + Exit status + + On success, 0 is returned, and a non-zero failure code otherwise. + + + + Example + + + Invoke the tool for a fully automatic non-interactive OS installation + + systemd-sysinstall \ + /dev/disk/by-id/nvme-Micron_MTFDKBA1T0TFH_214532D0CDA5 \ + --erase=yes \ + --confirm=no \ + --variables=yes \ + --load-credential=ssh.authorized_keys.root:my-ssh-key + + + This installs the OS on the selected disk, erasing any previous contents, without confirmation, + registers it in the firmware, and drops in the SSH key for the root user, read from the + my-ssh-key file in the current directory. + + + + + See Also + + systemd1 + systemd-creds1 + systemd-repart8 + bootctl1 + systemd-firstboot1 + systemd-boot7 + systemd.system-credentials7 + + + + diff --git a/meson.build b/meson.build index 325b954a78b24..d6fbd7c2b7ea6 100644 --- a/meson.build +++ b/meson.build @@ -1582,6 +1582,7 @@ foreach tuple : [ ['rfkill'], ['smack'], ['sysext'], + ['sysinstall'], ['sysusers'], ['timedated'], ['timesyncd'], @@ -2144,6 +2145,7 @@ subdir('src/storagetm') subdir('src/sulogin-shell') subdir('src/sysctl') subdir('src/sysext') +subdir('src/sysinstall') subdir('src/system-update-generator') subdir('src/systemctl') subdir('src/sysupdate') @@ -2928,6 +2930,7 @@ foreach tuple : [ ['resolve'], ['rfkill'], ['sysext'], + ['sysinstall'], ['systemd-analyze', conf.get('ENABLE_ANALYZE') == 1], ['sysupdate'], ['sysupdated'], diff --git a/meson_options.txt b/meson_options.txt index d61afac519d84..1917268d2ce4d 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -109,6 +109,8 @@ option('sysupdate', type : 'feature', deprecated : { 'true' : 'enabled', 'false' option('sysupdated', type: 'combo', value : 'auto', choices : ['auto', 'enabled', 'disabled'], description : 'install the systemd-sysupdated service') +option('sysinstall', type : 'boolean', + description : 'install the systemd-sysinstall tool') option('coredump', type : 'boolean', description : 'install the coredump handler') diff --git a/shell-completion/bash/meson.build b/shell-completion/bash/meson.build index b0e56608e8f37..cddf742059d51 100644 --- a/shell-completion/bash/meson.build +++ b/shell-completion/bash/meson.build @@ -54,6 +54,7 @@ foreach item : [ ['systemd-resolve', 'ENABLE_RESOLVE'], ['systemd-run', ''], ['systemd-sysext', 'ENABLE_SYSEXT'], + ['systemd-sysinstall', 'ENABLE_SYSINSTALL'], ['systemd-vmspawn', 'ENABLE_VMSPAWN'], ['systemd-vpick', ''], ['timedatectl', 'ENABLE_TIMEDATED'], diff --git a/shell-completion/bash/systemd-sysinstall b/shell-completion/bash/systemd-sysinstall new file mode 100644 index 0000000000000..600e2aa939542 --- /dev/null +++ b/shell-completion/bash/systemd-sysinstall @@ -0,0 +1,90 @@ +# shellcheck shell=bash +# systemd-sysinstall(8) completion -*- shell-script -*- +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. +# +# systemd is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with systemd; If not, see . + +__contains_word() { + local w word=$1; shift + for w in "$@"; do + [[ $w = "$word" ]] && return + done +} + +__get_block_devices() { + systemd-repart --list-devices 2>/dev/null +} + +_systemd_sysinstall() { + local comps + local cur=${COMP_WORDS[COMP_CWORD]} prev=${COMP_WORDS[COMP_CWORD-1]} words cword + local -A OPTS=( + [STANDALONE]='-h --help --version' + [ARG]='--welcome + --chrome + --erase + --confirm + --summary + --reboot + --variables + --mute-console + --copy-locale + --copy-keymap + --copy-timezone + --definitions + --kernel + --set-credential + --load-credential' + ) + + _init_completion || return + + if __contains_word "$prev" ${OPTS[ARG]}; then + case $prev in + --welcome|--chrome|--confirm|--summary|--reboot|--mute-console|--copy-locale|--copy-keymap|--copy-timezone) + comps='yes no' + ;; + --erase|--variables) + comps='yes no auto' + ;; + --definitions) + comps=$(compgen -A directory -- "$cur") + compopt -o filenames + ;; + --kernel|--load-credential) + comps=$(compgen -A file -- "$cur") + compopt -o filenames + ;; + --set-credential) + comps='' + ;; + esac + COMPREPLY=( $(compgen -W '$comps' -- "$cur") ) + return 0 + fi + + if [[ "$cur" = -* ]]; then + COMPREPLY=( $(compgen -W '${OPTS[*]}' -- "$cur") ) + return 0 + fi + + comps=$(__get_block_devices) + COMPREPLY=( $(compgen -W '$comps' -- "$cur") ) + compopt -o filenames + return 0 +} + +complete -F _systemd_sysinstall systemd-sysinstall diff --git a/shell-completion/zsh/_systemd-sysinstall b/shell-completion/zsh/_systemd-sysinstall new file mode 100644 index 0000000000000..039e4bf2a7912 --- /dev/null +++ b/shell-completion/zsh/_systemd-sysinstall @@ -0,0 +1,29 @@ +#compdef systemd-sysinstall +# SPDX-License-Identifier: LGPL-2.1-or-later + +(( $+functions[_systemd-sysinstall_devices] )) || +_systemd-sysinstall_devices() { + local -a _devices + _devices=( ${(f)"$(systemd-repart --list-devices 2>/dev/null)"} ) + _wanted devices expl 'block device' compadd -a _devices +} + +_arguments \ + '(- *)'{-h,--help}'[Show help text]' \ + '(- *)--version[Show package version]' \ + '--welcome=[Show welcome text]:boolean:(yes no)' \ + '--chrome=[Show colored bars at top and bottom of the terminal]:boolean:(yes no)' \ + '--erase=[Erase target disk before installation]:boolean:(yes no auto)' \ + '--confirm=[Query for confirmation before installation]:boolean:(yes no)' \ + '--summary=[Show summary before installation]:boolean:(yes no)' \ + '--reboot=[Reboot system after installation]:boolean:(yes no)' \ + '--variables=[Register installation in firmware variables]:boolean:(yes no auto)' \ + '--mute-console=[Mute kernel/PID 1 console output during installation]:boolean:(yes no)' \ + '--copy-locale=[Copy current locale to target system]:boolean:(yes no)' \ + '--copy-keymap=[Copy current keymap to target system]:boolean:(yes no)' \ + '--copy-timezone=[Copy current timezone to target system]:boolean:(yes no)' \ + '--definitions=[Find partition definitions in directory]:directory:_directories' \ + '--kernel=[Kernel image to install]:kernel image:_files' \ + '--set-credential=[Install a credential with a literal value]: : _message "ID:VALUE"' \ + '--load-credential=[Load credential from a file or AF_UNIX socket]: : _message "ID:PATH"' \ + '*::block device:_systemd-sysinstall_devices' diff --git a/shell-completion/zsh/meson.build b/shell-completion/zsh/meson.build index 6cc8a2d57f83e..f10ba7be617cc 100644 --- a/shell-completion/zsh/meson.build +++ b/shell-completion/zsh/meson.build @@ -43,6 +43,7 @@ foreach item : [ ['_systemd-nspawn', ''], ['_systemd-path', ''], ['_systemd-run', ''], + ['_systemd-sysinstall', 'ENABLE_SYSINSTALL'], ['_systemd-tmpfiles', 'ENABLE_TMPFILES'], ['_timedatectl', 'ENABLE_TIMEDATED'], ['_udevadm', ''], diff --git a/src/basic/time-util.c b/src/basic/time-util.c index 78c33c7553ce6..eb74de32c2db8 100644 --- a/src/basic/time-util.c +++ b/src/basic/time-util.c @@ -1697,6 +1697,16 @@ int get_timezone(char **ret) { return strdup_to(ret, e); } +int get_timezone_prefer_env(char **ret) { + assert(ret); + + const char *e = getenv("TZ"); + if (e && e[0] == ':' && timezone_is_valid(e + 1, LOG_DEBUG)) + return strdup_to(ret, e + 1); + + return get_timezone(ret); +} + const char* etc_localtime(void) { static const char *cached = NULL; diff --git a/src/basic/time-util.h b/src/basic/time-util.h index 9a66a90859d67..fdaf11edcbf6c 100644 --- a/src/basic/time-util.h +++ b/src/basic/time-util.h @@ -178,6 +178,7 @@ bool clock_supported(clockid_t clock); usec_t usec_shift_clock(usec_t x, clockid_t from, clockid_t to); int get_timezone(char **ret); +int get_timezone_prefer_env(char **ret); const char* etc_localtime(void); int mktime_or_timegm_usec(struct tm *tm, bool utc, usec_t *ret); diff --git a/src/sysinstall/meson.build b/src/sysinstall/meson.build new file mode 100644 index 0000000000000..1d8be6036564b --- /dev/null +++ b/src/sysinstall/meson.build @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +executables += [ + executable_template + { + 'name' : 'systemd-sysinstall', + 'public' : true, + 'conditions' : ['ENABLE_SYSINSTALL'], + 'sources' : files('sysinstall.c'), + }, +] diff --git a/src/sysinstall/sysinstall.c b/src/sysinstall/sysinstall.c new file mode 100644 index 0000000000000..d8f5cbee3c93f --- /dev/null +++ b/src/sysinstall/sysinstall.c @@ -0,0 +1,1425 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "sd-varlink.h" + +#include "alloc-util.h" +#include "ansi-color.h" +#include "blockdev-list.h" +#include "build.h" +#include "build-path.h" +#include "chase.h" +#include "conf-files.h" +#include "constants.h" +#include "efi-loader.h" +#include "efivars.h" +#include "env-file.h" +#include "escape.h" +#include "fd-util.h" +#include "find-esp.h" +#include "format-table.h" +#include "format-util.h" +#include "fs-util.h" +#include "glyph-util.h" +#include "help-util.h" +#include "image-policy.h" +#include "json-util.h" +#include "locale-setup.h" +#include "log.h" +#include "loop-util.h" +#include "machine-credential.h" +#include "main-func.h" +#include "mount-util.h" +#include "options.h" +#include "os-util.h" +#include "parse-argument.h" +#include "parse-util.h" +#include "path-util.h" +#include "prompt-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "varlink-util.h" + +static char *arg_node = NULL; +static bool arg_welcome = true; +static int arg_erase = -1; /* tri-state */ +static bool arg_confirm = true; +static bool arg_summary = true; +static char **arg_definitions = NULL; +static char *arg_kernel_image = NULL; +static bool arg_reboot = false; +static int arg_touch_variables = -1; /* tri-state */ +static MachineCredentialContext arg_credentials = {}; +static bool arg_copy_locale = true; +static bool arg_copy_keymap = true; +static bool arg_copy_timezone = true; +static bool arg_chrome = true; +static bool arg_mute_console = false; + +STATIC_DESTRUCTOR_REGISTER(arg_node, freep); +STATIC_DESTRUCTOR_REGISTER(arg_definitions, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_kernel_image, freep); +STATIC_DESTRUCTOR_REGISTER(arg_credentials, machine_credential_context_done); + +static int help(void) { + int r; + + _cleanup_(table_unrefp) Table *options = NULL; + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + help_cmdline("[OPTIONS...] [DEVICE]"); + help_abstract("Installs the OS to another block device."); + help_section("Options:"); + + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("systemd-sysinstall", "8"); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + int r; + + assert(argc >= 0); + assert(argv); + + OptionParser opts = { argc, argv }; + + FOREACH_OPTION_OR_RETURN(c, &opts) + switch (c) { + + OPTION_COMMON_HELP: + return help(); + + OPTION_COMMON_VERSION: + return version(); + + OPTION_LONG("welcome", "no", "Disable the welcome text"): + r = parse_boolean_argument("--welcome=", opts.arg, &arg_welcome); + if (r < 0) + return r; + + break; + + OPTION_LONG("erase", "BOOL", "Whether to erase the target disk"): + r = parse_tristate_argument_with_auto("--erase=", opts.arg, &arg_erase); + if (r < 0) + return r; + break; + + OPTION_LONG("confirm", "no", "Disable query for confirmation"): + r = parse_boolean_argument("--confirm=", opts.arg, &arg_confirm); + if (r < 0) + return r; + break; + + OPTION_LONG("summary", "no", "Disable summary before beginning operation"): + r = parse_boolean_argument("--summary=", opts.arg, &arg_summary); + if (r < 0) + return r; + break; + + OPTION_LONG("definitions", "DIR", "Find partition definitions in specified directory"): { + _cleanup_free_ char *path = NULL; + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &path); + if (r < 0) + return r; + if (strv_consume(&arg_definitions, TAKE_PTR(path)) < 0) + return log_oom(); + break; + } + + OPTION_LONG("reboot", "BOOL", "Whether to reboot after installation is complete"): + r = parse_boolean_argument("--reboot=", opts.arg, &arg_reboot); + if (r < 0) + return r; + break; + + OPTION_LONG("variables", "BOOL", "Whether to modify EFI variables"): + r = parse_tristate_argument_with_auto("--variables=", opts.arg, &arg_touch_variables); + if (r < 0) + return r; + break; + + OPTION_LONG("kernel", "IMAGE", "Explicitly pick kernel image to install"): + r = parse_path_argument(opts.arg, /* suppress_root= */ false, &arg_kernel_image); + if (r < 0) + return r; + break; + + OPTION_LONG("set-credential", "ID:VALUE", "Install a credential with literal value to target system"): + r = machine_credential_set(&arg_credentials, opts.arg); + if (r < 0) + return r; + break; + + OPTION_LONG("load-credential", "ID:PATH", "Load a credential to install to new system from file or AF_UNIX stream socket"): + r = machine_credential_load(&arg_credentials, opts.arg); + if (r < 0) + return r; + + break; + + OPTION_LONG("copy-locale", "no", "Don't copy current locale to target system"): + r = parse_boolean_argument("--copy-locale=", opts.arg, &arg_copy_locale); + if (r < 0) + return r; + break; + + OPTION_LONG("copy-keymap", "no", "Don't copy current keymap to target system"): + r = parse_boolean_argument("--copy-keymap=", opts.arg, &arg_copy_keymap); + if (r < 0) + return r; + break; + + OPTION_LONG("copy-timezone", "no", "Don't copy current timezone to target system"): + r = parse_boolean_argument("--copy-timezone=", opts.arg, &arg_copy_timezone); + if (r < 0) + return r; + break; + + OPTION_LONG("chrome", "no", "Whether to show a color bar at top and bottom of terminal"): + r = parse_boolean_argument("--chrome=", opts.arg, &arg_chrome); + if (r < 0) + return r; + + break; + + OPTION_LONG("mute-console", "BOOL", "Whether to disallow kernel/PID 1 writes to the console while running"): + r = parse_boolean_argument("--mute-console=", opts.arg, &arg_mute_console); + if (r < 0) + return r; + break; + } + + char **args = option_parser_get_args(&opts); + + if (strv_length(args) > 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too many arguments."); + if (!strv_isempty(args)) { + arg_node = strdup(args[0]); + if (!arg_node) + return log_oom(); + } + + return 1; +} + +static int print_welcome(sd_varlink **mute_console_link) { + _cleanup_free_ char *pretty_name = NULL, *os_name = NULL, *ansi_color = NULL; + const char *pn, *ac; + int r; + + assert(mute_console_link); + + if (!*mute_console_link && arg_mute_console) + (void) mute_console(mute_console_link); + + if (!arg_welcome) + return 0; + + r = parse_os_release( + /* root= */ NULL, + "PRETTY_NAME", &pretty_name, + "NAME", &os_name, + "ANSI_COLOR", &ansi_color); + if (r < 0) + log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, + "Failed to read os-release file, ignoring: %m"); + + pn = os_release_pretty_name(pretty_name, os_name); + ac = isempty(ansi_color) ? "0" : ansi_color; + + if (colors_enabled()) + printf(ANSI_HIGHLIGHT "Welcome to the " ANSI_NORMAL "\x1B[%sm%s" ANSI_HIGHLIGHT " Installer!" ANSI_NORMAL "\n", ac, pn); + else + printf("Welcome to the %s Installer!\n", pn); + + putchar('\n'); + + return 0; +} + +static int connect_to_repart(sd_varlink **link) { + int r; + + assert(link); + + if (*link) { + /* Reset the time-out to default here, since we are reusing the connection, but might enqueue + * a different operation */ + r = sd_varlink_set_relative_timeout(*link, 0); + if (r < 0) + return r; + + return 0; + } + + _cleanup_close_ int fd = -EBADF; + _cleanup_free_ char *repart = NULL; + fd = pin_callout_binary("systemd-repart", &repart); + if (fd < 0) + return log_error_errno(fd, "Failed to find systemd-repart binary: %m"); + + r = sd_varlink_connect_exec(link, repart, /* argv= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to connect to systemd-repart: %m"); + + return 1; +} + +static int acquire_device_list( + sd_varlink **link, + char ***ret_menu, + char ***ret_accepted) { + int r; + + r = connect_to_repart(link); + if (r < 0) + return r; + + _cleanup_strv_free_ char **menu = NULL, **accepted = NULL; + + sd_json_variant *reply = NULL; + const char *error_id = NULL; + r = sd_varlink_collectbo( + *link, + "io.systemd.Repart.ListCandidateDevices", + &reply, + &error_id, + SD_JSON_BUILD_PAIR_BOOLEAN("ignoreRoot", true)); + if (r < 0) + return log_error_errno(r, "Failed to issue io.systemd.Repart.ListCandidateDevices() varlink call: %m"); + if (streq_ptr(error_id, "io.systemd.Repart.NoCandidateDevices")) + log_debug("No candidate devices found."); + else if (error_id) { + r = sd_varlink_error_to_errno(error_id, reply); /* If this is a system errno style error, output it with %m */ + if (r != -EBADR) + return log_error_errno(r, "Failed to issue io.systemd.Repart.ListCandidateDevices() varlink call: %m"); + + return log_error_errno(r, "Failed to issue io.systemd.Repart.ListCandidateDevices() varlink call: %s", error_id); + } else { + sd_json_variant *i; + JSON_VARIANT_ARRAY_FOREACH(i, reply) { + _cleanup_(block_device_done) BlockDevice bd = BLOCK_DEVICE_NULL; + + static const sd_json_dispatch_field dispatch_table[] = { + { "node", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(BlockDevice, node), SD_JSON_MANDATORY }, + { "symlinks", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_strv, offsetof(BlockDevice, symlinks), 0 }, + {} + }; + + r = sd_json_dispatch(i, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &bd); + if (r < 0) + return r; + + if (strv_extend(&accepted, bd.node) < 0) + return log_oom(); + if (strv_extend_strv(&accepted, bd.symlinks, /* filter_duplicates= */ true) < 0) + return log_oom(); + + /* Prefer the by-id and by-loop-ref because they typically contain the strings most + * directly understood by the user */ + const char *n = strv_find_prefix(bd.symlinks, "/dev/disk/by-id/"); + if (!n) + n = strv_find_prefix(bd.symlinks, "/dev/disk/by-loop-ref/"); + if (!n) + n = bd.node; + + if (strv_extend(&menu, n) < 0) + return log_oom(); + } + } + + *ret_menu = TAKE_PTR(menu); + *ret_accepted = TAKE_PTR(accepted); + return 0; +} + +static int device_is_valid(const char *node, void *userdata) { + + if (!path_is_valid(node) || !path_is_absolute(node)) { + log_error("Not a valid absolute file system path, refusing: %s", node); + return false; + } + + struct stat st; + if (stat(node, &st) < 0) { + log_error_errno(errno, "Failed to check if '%s' is a valid block device node: %m", node); + return false; + } + if (!S_ISBLK(st.st_mode)) { + log_error("Path '%s' does not refer to a valid block device node, refusing.", node); + return false; + } + + return true; +} + +static int refresh_devices(char ***ret_menu, char ***ret_accepted, void *userdata) { + sd_varlink **repart_link = ASSERT_PTR(userdata); + + (void) acquire_device_list(repart_link, ret_menu, ret_accepted); + return 0; +} + +static int prompt_block_device(sd_varlink **repart_link, char **ret_node) { + int r; + + putchar('\n'); + + _cleanup_strv_free_ char **menu = NULL, **accepted = NULL; + (void) acquire_device_list(repart_link, &menu, &accepted); + + r = prompt_loop("Please enter target disk device", + GLYPH_COMPUTER_DISK, + menu, + accepted, + /* ellipsize_percentage= */ 20, + /* n_columns= */ 1, + /* column_width= */ 80, + device_is_valid, + refresh_devices, + /* userdata= */ repart_link, + PROMPT_SHOW_MENU|PROMPT_SHOW_MENU_NOW|PROMPT_MAY_SKIP|PROMPT_HIDE_SKIP_HINT|PROMPT_HIDE_MENU_HINT, + ret_node); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ECANCELED), "Installation cancelled."); + + return 0; +} + +static int read_space_metrics( + sd_json_variant *v, + uint64_t *min_size, + uint64_t *current_size, + uint64_t *need_free) { + + int r; + + struct { + uint64_t min_size; + uint64_t current_size; + uint64_t need_free; + } p = { + .min_size = UINT64_MAX, + .current_size = UINT64_MAX, + .need_free = UINT64_MAX, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "minimalSizeBytes", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, min_size), 0 }, + { "currentSizeBytes", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, current_size), 0 }, + { "needFreeBytes", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, voffsetof(p, need_free), 0 }, + {} + }; + + r = sd_json_dispatch(v, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &p); + if (r < 0) + return r; + + if (min_size) + *min_size = p.min_size; + if (current_size) + *current_size = p.current_size; + if (need_free) + *need_free = p.need_free; + + return 0; +} + +static int invoke_repart( + sd_varlink **link, + const char *node, + bool erase, + bool dry_run, + uint64_t *min_size, /* initialized both on success and error */ + uint64_t *current_size, /* ditto */ + uint64_t *need_free) { /* ditto */ + + int r; + + assert(link); + + /* Note, if dry_run is true, then ENOSPC, E2BIG, EHWPOISON will not be logged about beyond LOG_DEBUG, + * but all other errors will be */ + + r = connect_to_repart(link); + if (r < 0) { + read_space_metrics(/* v= */ NULL, min_size, current_size, need_free); + return r; + } + + if (!dry_run) { + /* Seeding the partitions might be very slow, disable timeout */ + r = sd_varlink_set_relative_timeout(*link, UINT64_MAX); + if (r < 0) + return log_error_errno(r, "Failed to disable IPC timeout: %m"); + } + + sd_json_variant *reply = NULL; + const char *error_id = NULL; + r = sd_varlink_callbo( + *link, + "io.systemd.Repart.Run", + &reply, + &error_id, + SD_JSON_BUILD_PAIR_STRING("node", node), + SD_JSON_BUILD_PAIR_STRING("empty", erase ? "force" : "allow"), + SD_JSON_BUILD_PAIR_BOOLEAN("dryRun", dry_run), + SD_JSON_BUILD_PAIR_CONDITION(!!arg_definitions, "definitions", SD_JSON_BUILD_STRV(arg_definitions)), + SD_JSON_BUILD_PAIR_BOOLEAN("deferPartitionsEmpty", true), + SD_JSON_BUILD_PAIR_BOOLEAN("deferPartitionsFactoryReset", true)); + if (r < 0) { + read_space_metrics(/* v= */ NULL, min_size, current_size, need_free); + return log_error_errno(r, "Failed to issue io.systemd.Repart.Run() varlink call: %m"); + } + if (error_id) { + if (streq(error_id, "io.systemd.Repart.InsufficientFreeSpace")) { + (void) read_space_metrics(reply, min_size, current_size, need_free); + return log_full_errno( + dry_run ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(ENOSPC), + "Not enough free space on disk, cannot install."); + } + if (streq(error_id, "io.systemd.Repart.DiskTooSmall")) { + (void) read_space_metrics(reply, min_size, current_size, need_free); + return log_full_errno( + dry_run ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(E2BIG), + "Disk too small for installation, cannot install."); + } + + /* For all other errors reset the metrics */ + read_space_metrics(/* v= */ NULL, min_size, current_size, need_free); + + if (streq(error_id, "io.systemd.Repart.ConflictingDiskLabelPresent")) + return log_full_errno( + dry_run ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(EHWPOISON), + "A conflicting disk label is already present on the target disk, cannot install unless disk is erased."); + + r = sd_varlink_error_to_errno(error_id, reply); /* If this is a system errno style error, output it with %m */ + if (r != -EBADR) + return log_error_errno(r, "Failed to issue io.systemd.Repart.Run() varlink call: %m"); + + return log_error_errno(r, "Failed to issue io.systemd.Repart.Run() varlink call: %s", error_id); + } + + (void) read_space_metrics(reply, min_size, current_size, need_free); + + return 0; +} + +static int prompt_erase( + bool can_add, + int *ret_erase) { + int r; + + assert(ret_erase); + + putchar('\n'); + + char **l = can_add ? STRV_MAKE("keep", "erase") : STRV_MAKE("erase"); + + _cleanup_free_ char *reply = NULL; + r = prompt_loop(can_add ? + "Please type 'keep' to install the OS in addition to what the disk already contains, or 'erase' to erase all data on the disk" : + "Please type 'erase' to confirm that all data on the disk shall be erased", + GLYPH_BROOM, + /* menu= */ l, + /* accepted= */ l, + /* ellipsize_percentage= */ 20, + /* n_columns= */ 2, + /* column_width= */ 40, + /* is_valid= */ NULL, + /* refresh= */ NULL, + /* userdata= */ NULL, + PROMPT_SHOW_MENU|PROMPT_MAY_SKIP|PROMPT_HIDE_MENU_HINT|PROMPT_HIDE_SKIP_HINT, + &reply); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ECANCELED), "Installation cancelled."); + + if (streq(reply, "erase")) + *ret_erase = true; + else if (streq(reply, "keep")) + *ret_erase = false; + else + assert_not_reached(); + + return 0; +} + +static int prompt_touch_variables(void) { + int r; + + if (arg_touch_variables >= 0) + return 0; + + putchar('\n'); + + char **l = STRV_MAKE("yes", "no"); + + _cleanup_free_ char *reply = NULL; + r = prompt_loop("Type 'yes' to register OS installation in firmware variables of the local system, 'no' otherwise", + GLYPH_ROCKET, + /* menu= */ l, + /* accepted= */ l, + /* ellipsize_percentage= */ 20, + /* n_columns= */ 2, + /* column_width= */ 40, + /* is_valid= */ NULL, + /* refresh= */ NULL, + /* userdata= */ NULL, + PROMPT_SHOW_MENU|PROMPT_MAY_SKIP|PROMPT_HIDE_MENU_HINT|PROMPT_HIDE_SKIP_HINT, + &reply); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ECANCELED), "Installation cancelled."); + + r = parse_boolean(reply); + if (r < 0) + return log_error_errno(r, "Failed to parse reply: %s", reply); + + arg_touch_variables = r; + + return 0; +} + +static int prompt_confirm(void) { + int r; + + if (!arg_confirm) + return 0; + + putchar('\n'); + + char **l = STRV_MAKE("yes", "no"); + + _cleanup_free_ char *reply = NULL; + r = prompt_loop(arg_summary ? "Please type 'yes' to confirm the choices above and begin the installation" : + "Please type 'yes' to begin the installation", + GLYPH_WARNING_SIGN, + /* menu= */ l, + /* accepted= */ l, + /* ellipsize_percentage= */ 20, + /* n_columns= */ 2, + /* column_width= */ 40, + /* is_valid= */ NULL, + /* refresh= */ NULL, + /* userdata= */ NULL, + PROMPT_SHOW_MENU|PROMPT_MAY_SKIP|PROMPT_HIDE_MENU_HINT|PROMPT_HIDE_SKIP_HINT, + &reply); + if (r < 0) + return r; + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ECANCELED), "Installation cancelled."); + + if (!streq(reply, "yes")) + return log_error_errno(SYNTHETIC_ERRNO(ECANCELED), "Installation not confirmed, cancelling."); + + return 0; +} + +static int validate_run(sd_varlink **repart_link, const char *node) { + int r; + + assert(repart_link); + assert(node); + + /* First loop: either with explicitly configured --erase= value, or false. A second loop only if not configured explicitly. */ + bool try_erase = arg_erase > 0, conflicting_disk_label = false; + for (;;) { + uint64_t min_size = UINT64_MAX, current_size = UINT64_MAX, need_free = UINT64_MAX; + r = invoke_repart( + repart_link, + node, + try_erase, + /* dry_run= */ true, + &min_size, + ¤t_size, + &need_free); + if (r == -ENOSPC) { + /* The disk is large enough, but there's not enough unallocated space. Hence proceed, but require erasing */ + if (try_erase || arg_erase >= 0) + return log_error_errno(r, "The selected disk is big enough for the installation but does not have enough free space."); + + log_notice("The selected disk is big enough for the installation but does not have enough free space. Installation will require erasing."); + if (need_free != UINT64_MAX) + log_info("Required free space is %s.", FORMAT_BYTES(need_free)); + + try_erase = true; + } else if (r == -E2BIG) { + /* Won't fit, whatever we do */ + log_error_errno(r, "The selected disk is not large enough for an OS installation."); + if (current_size != UINT64_MAX) + log_info("The size of the selected disk is %s, but a minimal size of %s is required.", + FORMAT_BYTES(current_size), + FORMAT_BYTES(min_size)); + return r; + } else if (r == -EHWPOISON) { + if (try_erase || arg_erase >= 0) + return log_error_errno(r, "The selected disk contains a conflicting disk label, refusing."); + + log_debug("Disk contains a conflicting disk label, checking if we could install the OS after erasing it."); + try_erase = true; + conflicting_disk_label = true; + continue; + } else if (r < 0) + /* invoke_repart() already logged about all other errors */ + return r; + else + /* Nice, we can add the OS to the disk, without erasing anything. */ + log_info("The selected disk has enough free space for an installation of the OS."); + + if (conflicting_disk_label) + log_warning("A conflicting disk label has been found, and must be erased for installation."); + + if (arg_erase < 0) { + r = prompt_erase(/* can_add= */ !try_erase, &arg_erase); + if (r < 0) + return r; + } + + return 0; + } +} + +static int show_summary(void) { + int r; + + if (!arg_summary) + return 0; + + printf("\n" + "%sSummary:%s\n", ansi_underline(), ansi_normal()); + + _cleanup_(table_unrefp) Table *table = table_new_vertical(); + if (!table) + return log_oom(); + + r = table_add_many( + table, + TABLE_FIELD, "Selected Disk", + TABLE_STRING, arg_node, + TABLE_FIELD, "Erase Disk", + TABLE_BOOLEAN, arg_erase, + TABLE_SET_COLOR, arg_erase ? ansi_highlight_red() : NULL, + TABLE_FIELD, "Register in Firmware", + TABLE_BOOLEAN, arg_touch_variables); + if (r < 0) + return table_log_add_error(r); + + static const char * const map[] = { + "firstboot.keymap", "Keyboard Map", + "firstboot.locale", "Locale", + "firstboot.locale-messages", "Locale (Messages)", + "firstboot.timezone", "Timezone", + NULL + }; + + STRV_FOREACH_PAIR(id, text, map) { + MachineCredential *c = machine_credential_find(&arg_credentials, *id); + if (!c) + continue; + + _cleanup_free_ char *escaped = cescape_length(c->data, c->size); + if (!escaped) + return log_oom(); + + r = table_add_many( + table, + TABLE_FIELD, *text, + TABLE_STRING, escaped); + if (r < 0) + return table_log_add_error(r); + } + + unsigned n_extra_credentials = 0; + FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) { + bool covered = false; + + STRV_FOREACH_PAIR(id, text, map) + if (streq(*id, cred->id)) { + covered = true; + break; + } + + if (!covered) + n_extra_credentials++; + } + + if (n_extra_credentials > 0) { + r = table_add_many( + table, + TABLE_FIELD, "Extra Credentials", + TABLE_UINT, n_extra_credentials); + if (r < 0) + return table_log_add_error(r); + } + + r = table_print(table); + if (r < 0) + return r; + + return 0; +} + +static int find_current_kernel( + char **ret_filename, + int *ret_fd) { + + int r; + + sd_id128_t uuid; + r = efi_stub_get_device_part_uuid(&uuid); + if (r == -ENOENT) + return log_error_errno(r, "Cannot find current kernel, no stub partition UUID passed via EFI variables."); + if (r < 0) + return log_error_errno(r, "Unable to determine stub partition UUID: %m"); + + _cleanup_free_ char *image = NULL; + r = efi_get_variable_path(EFI_LOADER_VARIABLE_STR("StubImageIdentifier"), &image); + if (r == -ENOENT) + return log_error_errno(r, "Cannot find current kernel, no stub EFI binary path passed."); + if (r < 0) + return log_error_errno(r, "Unable to determine stub EFI binary path: %m"); + + /* Note: we search for the *host* ESP here (i.e. the one the current EFI paths relate to), not the + * one of the target image */ + + _cleanup_free_ char *partition_path = NULL; + _cleanup_close_ int partition_fd = -EBADF; + sd_id128_t partition_uuid; + r = find_esp_and_warn_full( + /* root= */ NULL, + /* path= */ NULL, + /* unprivileged_mode= */ false, + &partition_path, + &partition_fd, + /* ret_part= */ NULL, + /* ret_pstart= */ NULL, + /* ret_psize= */ NULL, + &partition_uuid, + /* ret_devid= */ NULL); + if (r < 0 && r != -ENOKEY) + return r; + if (r < 0 || !sd_id128_equal(uuid, partition_uuid)) { + partition_path = mfree(partition_path); + partition_fd = safe_close(partition_fd); + + r = find_xbootldr_and_warn_full( + /* root= */ NULL, + /* path= */ NULL, + /* unprivileged_mode= */ false, + &partition_path, + &partition_fd, + &partition_uuid, + /* ret_devid= */ NULL); + if (r < 0 && r != -ENOKEY) + return r; + + if (r < 0 || !sd_id128_equal(uuid, partition_uuid)) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Unable to find UKI on ESP/XBOOTLDR partitions."); + } + + _cleanup_free_ char *resolved = NULL; + _cleanup_close_ int fd = chase_and_openat( + /* root_fd= */ partition_fd, + /* dir_fd= */ partition_fd, + image, + CHASE_PROHIBIT_SYMLINKS|CHASE_MUST_BE_REGULAR, + O_RDONLY|O_CLOEXEC, + &resolved); + if (fd < 0) + return log_error_errno(fd, "Failed to find EFI binary '%s' on partition '%s': %m", image, partition_path); + + _cleanup_free_ char *fn = NULL; + r = path_extract_filename(resolved, &fn); + if (r < 0) + return log_error_errno(r, "Failed to extract UKI file name from '%s': %m", resolved); + + if (ret_filename) + *ret_filename = TAKE_PTR(fn); + if (ret_fd) + *ret_fd = TAKE_FD(fd); + + return 0; +} + +static int connect_to_bootctl(sd_varlink **link) { + int r; + + assert(link); + + if (*link) + return 0; + + _cleanup_close_ int fd = -EBADF; + _cleanup_free_ char *bootctl = NULL; + fd = pin_callout_binary("bootctl", &bootctl); + if (fd < 0) + return log_error_errno(fd, "Failed to find bootctl binary: %m"); + + r = sd_varlink_connect_exec(link, bootctl, /* argv= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to connect to bootctl: %m"); + + r = sd_varlink_set_allow_fd_passing_output(*link, true); + if (r < 0) + return log_error_errno(r, "Failed to enable fd passing to bootctl: %m"); + + return 1; +} + +static int invoke_bootctl_install( + sd_varlink **link, + const char *root_dir, + int root_fd) { + int r; + + assert(link); + assert(root_dir); + assert(root_fd >= 0); + + r = connect_to_bootctl(link); + if (r < 0) + return r; + + int fd_idx = sd_varlink_push_dup_fd(*link, root_fd); + if (fd_idx < 0) + return log_error_errno(fd_idx, "Failed to submit root fd onto Varlink connection: %m"); + + const char *error_id = NULL; + r = varlink_callbo_and_log( + *link, + "io.systemd.BootControl.Install", + /* reply= */ NULL, + &error_id, + SD_JSON_BUILD_PAIR_STRING("operation", "new"), + SD_JSON_BUILD_PAIR_INTEGER("rootFileDescriptor", fd_idx), + SD_JSON_BUILD_PAIR_STRING("rootDirectory", root_dir), + SD_JSON_BUILD_PAIR_BOOLEAN("touchVariables", arg_touch_variables)); + if (r < 0) + return r; + + return 0; +} + +static int invoke_bootctl_link( + sd_varlink **link, + const char *root_dir, + int root_fd, + char **encrypted_credentials) { + int r; + + assert(link); + assert(root_dir); + assert(root_fd >= 0); + + r = connect_to_bootctl(link); + if (r < 0) + return r; + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *array = NULL; + STRV_FOREACH_PAIR(name, value, encrypted_credentials) { + _cleanup_free_ char *j = strjoin(*name, ".cred"); + if (!j) + return log_oom(); + + r = sd_json_variant_append_arraybo( + &array, + SD_JSON_BUILD_PAIR_STRING("filename", j), + SD_JSON_BUILD_PAIR_BASE64("data", *value, strlen(*value))); + if (r < 0) + return log_error_errno(r, "Failed to append credential to message: %m"); + } + + int root_fd_idx = sd_varlink_push_dup_fd(*link, root_fd); + if (root_fd_idx < 0) + return log_error_errno(root_fd_idx, "Failed to submit root fd onto Varlink connection: %m"); + + _cleanup_free_ char *kernel_filename = NULL; + _cleanup_close_ int kernel_fd = -EBADF; + if (arg_kernel_image) { + r = path_extract_filename(arg_kernel_image, &kernel_filename); + if (r < 0) + return log_error_errno(r, "Failed to extract filename from kernel path '%s': %m", arg_kernel_image); + if (r == O_DIRECTORY) + return log_error_errno(SYNTHETIC_ERRNO(EISDIR), "Kernel path '%s' refers to directory, must be regular file, refusing.", arg_kernel_image); + + kernel_fd = xopenat_full(XAT_FDROOT, arg_kernel_image, O_RDONLY|O_CLOEXEC, XO_REGULAR, MODE_INVALID); + if (kernel_fd < 0) + return log_error_errno(kernel_fd, "Failed to open kernel image '%s': %m", arg_kernel_image); + + } else { + r = find_current_kernel(&kernel_filename, &kernel_fd); + if (r < 0) + return r; + } + + int kernel_fd_idx = sd_varlink_push_dup_fd(*link, kernel_fd); + if (kernel_fd_idx < 0) + return log_error_errno(kernel_fd_idx, "Failed to submit kernel fd onto Varlink connection: %m"); + + const char *error_id = NULL; + r = varlink_callbo_and_log( + *link, + "io.systemd.BootControl.Link", + /* reply= */ NULL, + &error_id, + SD_JSON_BUILD_PAIR_INTEGER("rootFileDescriptor", root_fd_idx), + SD_JSON_BUILD_PAIR_STRING("rootDirectory", root_dir), + JSON_BUILD_PAIR_STRING_NON_EMPTY("kernelFilename", kernel_filename), + SD_JSON_BUILD_PAIR_INTEGER("kernelFileDescriptor", kernel_fd_idx), + SD_JSON_BUILD_PAIR_CONDITION(!!array, "extraFiles", SD_JSON_BUILD_VARIANT(array))); + if (r < 0) + return r; + + return 0; +} + +static int maybe_reboot(void) { + int r; + + if (!arg_reboot) + return 0; + + log_notice("%s%sSystem will reboot now.", + emoji_enabled() ? glyph(GLYPH_CIRCLE_ARROW) : "", emoji_enabled() ? " " : ""); + + if (!any_key_to_proceed()) + return 0; + + log_notice("%s%sInitiating reboot.", + emoji_enabled() ? glyph(GLYPH_CIRCLE_ARROW) : "", emoji_enabled() ? " " : ""); + + _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; + r = sd_varlink_connect_address(&link, "/run/systemd/io.systemd.Shutdown"); + if (r < 0) + return log_error_errno(r, "Failed to connect to systemd-logind: %m"); + + sd_json_variant *reply = NULL; + const char *error_id = NULL; + r = varlink_callbo_and_log( + link, + "io.systemd.Shutdown.Reboot", + &reply, + &error_id); + if (r < 0) + return r; + + return 0; +} + +static int read_credential_locale(void) { + int r; + + if (!arg_copy_locale) + return 0; + + if (machine_credential_find(&arg_credentials, "firstboot.locale") || + machine_credential_find(&arg_credentials, "firstboot.locale-messages")) + return 0; + + /* For the main locale we check the two env vars, and if neither is there, we use LC_NUMERIC, since + * it seems to be one of the most fundamental ones, and is not LC_MESSAGES for which we have a + * separate setting after all */ + const char *l = getenv("LC_ALL") ?: getenv("LANG") ?: setlocale(LC_NUMERIC, NULL); + if (l) { + r = machine_credential_add(&arg_credentials, "firstboot.locale", l, /* size= */ SIZE_MAX); + if (r < 0) + return log_oom(); + } + + const char *m = setlocale(LC_MESSAGES, NULL); + if (m && !streq_ptr(m, l)) { + r = machine_credential_add(&arg_credentials, "firstboot.locale-messages", m, /* size= */ SIZE_MAX); + if (r < 0) + return log_oom(); + } + + return 0; +} + +static int read_credential_keymap(void) { + int r; + + if (!arg_copy_keymap) + return 0; + + if (machine_credential_find(&arg_credentials, "firstboot.keymap")) + return 0; + + _cleanup_free_ char *keymap = NULL; + r = parse_env_file( + /* f= */ NULL, + etc_vconsole_conf(), + "KEYMAP", &keymap); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to parse '%s': %m", etc_vconsole_conf()); + + if (!isempty(keymap)) { + r = machine_credential_add(&arg_credentials, "firstboot.keymap", keymap, /* size= */ SIZE_MAX); + if (r < 0) + return log_oom(); + } + + return 0; +} + +static int read_credential_timezone(void) { + int r; + + if (!arg_copy_timezone) + return 0; + + if (machine_credential_find(&arg_credentials, "firstboot.timezone")) + return 0; + + _cleanup_free_ char *tz = NULL; + r = get_timezone_prefer_env(&tz); + if (r < 0) + log_warning_errno(r, "Failed to read timezone, skipping timezone propagation: %m"); + else { + r = machine_credential_add(&arg_credentials, "firstboot.timezone", tz, /* size= */ SIZE_MAX); + if (r < 0) + return log_oom(); + } + + return 0; +} + +static int read_credentials(void) { + int r; + + r = read_credential_locale(); + if (r < 0) + return r; + + r = read_credential_keymap(); + if (r < 0) + return r; + + r = read_credential_timezone(); + if (r < 0) + return r; + + return 0; +} + +static int connect_to_creds(sd_varlink **link) { + int r; + + assert(link); + + if (*link) + return 0; + + _cleanup_close_ int fd = -EBADF; + _cleanup_free_ char *creds = NULL; + fd = pin_callout_binary("systemd-creds", &creds); + if (fd < 0) + return log_error_errno(fd, "Failed to find systemd-creds binary: %m"); + + r = sd_varlink_connect_exec(link, creds, /* argv= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to connect to systemd-creds: %m"); + + return 1; +} + +static int encrypt_one_credential(sd_varlink **link, const MachineCredential *input, char ***encrypted) { + int r; + + assert(link); + assert(input); + assert(encrypted); + + log_info("Encrypting credential '%s'...", input->id); + + r = connect_to_creds(link); + if (r < 0) + return r; + + sd_json_variant *reply = NULL; + const char *error_id = NULL; + r = varlink_callbo_and_log( + *link, + "io.systemd.Credentials.Encrypt", + &reply, + &error_id, + SD_JSON_BUILD_PAIR_STRING("name", input->id), + SD_JSON_BUILD_PAIR_BASE64("data", input->data, input->size), + SD_JSON_BUILD_PAIR_STRING("scope", "system"), + /* We pick the 'auto_initrd' key for this, since we want TPM if available, but are fine with NULL if not */ + SD_JSON_BUILD_PAIR_STRING("withKey", "auto_initrd")); + if (r < 0) + return r; + + static const sd_json_dispatch_field dispatch_table[] = { + { "blob", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, 0, 0 }, + {} + }; + + const char *blob = NULL; + r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &blob); + if (r < 0) + return r; + + r = strv_extend_many(encrypted, input->id, blob); + if (r < 0) + return r; + + return 0; +} + +static int encrypt_credentials(sd_varlink **link, char ***encrypted) { + int r; + + assert(link); + assert(encrypted); + + FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) { + r = encrypt_one_credential(link, cred, encrypted); + if (r < 0) + return r; + } + + return 0; +} + +static const ImagePolicy image_policy = { + .n_policies = 4, + .policies = { + /* We mount / and /usr/ so that we can get access to /etc/machine-id and /etc/kernel/ */ + { PARTITION_ROOT, PARTITION_POLICY_VERITY|PARTITION_POLICY_SIGNED|PARTITION_POLICY_UNPROTECTED|PARTITION_POLICY_ABSENT }, + { PARTITION_USR, PARTITION_POLICY_VERITY|PARTITION_POLICY_SIGNED|PARTITION_POLICY_UNPROTECTED|PARTITION_POLICY_ABSENT }, + { PARTITION_ESP, PARTITION_POLICY_UNPROTECTED|PARTITION_POLICY_ABSENT }, + { PARTITION_XBOOTLDR, PARTITION_POLICY_UNPROTECTED|PARTITION_POLICY_ABSENT }, + }, + .default_flags = PARTITION_POLICY_IGNORE, +}; + +static int settle_definitions(void) { + int r; + + if (arg_definitions) + return 0; + + /* If /usr/lib/repart.sysinstall.d/ is populated, use it, otherwise use the regular definition + * files */ + + _cleanup_strv_free_ char **files = NULL; + r = conf_files_list_strv( + &files, + ".conf", + /* root= */ NULL, + CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED|CONF_FILES_WARN|CONF_FILES_DONT_PREFIX_ROOT, + (const char**) CONF_PATHS_STRV("repart.sysinstall.d")); + if (r < 0) + return log_error_errno(r, "Failed to enumerate *.conf files: %m"); + + if (!strv_isempty(files)) { + arg_definitions = strv_copy(CONF_PATHS_STRV("repart.sysinstall.d")); + if (!arg_definitions) + return log_oom(); + } + + return 0; +} + +static int run(int argc, char *argv[]) { + int r; + + setlocale(LC_ALL, ""); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + log_setup(); + + r = settle_definitions(); + if (r < 0) + return r; + + _cleanup_(sd_varlink_flush_close_unrefp) sd_varlink *mute_console_link = NULL; + if (arg_welcome) { + if (arg_mute_console) + (void) mute_console(&mute_console_link); + + (void) terminal_reset_defensive_locked(STDOUT_FILENO, /* flags= */ 0); + + if (arg_chrome) + chrome_show("Operating System Installer", /* bottom= */ NULL); + } + + DEFER_VOID_CALL(chrome_hide); + + _cleanup_(sd_varlink_flush_close_unrefp) sd_varlink *repart_link = NULL; + if (arg_node) { + r = print_welcome(&mute_console_link); + if (r < 0) + return r; + + r = validate_run(&repart_link, arg_node); + if (r < 0) + return r; + } else { + /* Determine the minimum disk size */ + uint64_t min_size = UINT64_MAX; + r = invoke_repart( + &repart_link, + /* node= */ NULL, + /* erase= */ true, + /* dry_run= */ true, + &min_size, + /* current_size= */ NULL, + /* need_free= */ NULL); + if (r < 0) + return r; + + r = print_welcome(&mute_console_link); + if (r < 0) + return r; + + log_info("Required minimal installation disk size is %s.", FORMAT_BYTES(min_size)); + + for (;;) { + _cleanup_free_ char *node = NULL; + r = prompt_block_device(&repart_link, &node); + if (r < 0) + return r; + + r = validate_run(&repart_link, node); + if (IN_SET(r, -ENOSPC, -E2BIG, -EHWPOISON)) /* Device is no fit, pick other */ + continue; + if (r < 0) + return r; + + arg_node = TAKE_PTR(node); + break; + } + } + + r = prompt_touch_variables(); + if (r < 0) + return r; + + r = read_credentials(); + if (r < 0) + return r; + + /* Verify we have everything we need */ + assert(arg_node); + assert(arg_erase >= 0); + assert(arg_touch_variables >= 0); + + r = show_summary(); + if (r < 0) + return r; + + r = prompt_confirm(); + if (r < 0) + return r; + + putchar('\n'); + + log_notice("%s%sEncrypting credentials...", + emoji_enabled() ? glyph(GLYPH_LOCK_AND_KEY) : "", emoji_enabled() ? " " : ""); + + _cleanup_(sd_varlink_flush_close_unrefp) sd_varlink *creds_link = NULL; + _cleanup_strv_free_ char **encrypted_credentials = NULL; + r = encrypt_credentials(&creds_link, &encrypted_credentials); + if (r < 0) + return r; + + log_notice("%s%sInstalling partitions...", + emoji_enabled() ? glyph(GLYPH_COMPUTER_DISK) : "", emoji_enabled() ? " " : ""); + + /* Do the main part of the installation */ + r = invoke_repart( + &repart_link, + arg_node, + arg_erase, + /* dry_run= */ false, + /* min_size= */ NULL, + /* current_size= */ NULL, + /* need_free= */ NULL); + if (r < 0) + return r; + + log_notice("%s%sMounting partitions...", + emoji_enabled() ? glyph(GLYPH_COMPUTER_DISK) : "", emoji_enabled() ? " " : ""); + + _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; + _cleanup_(umount_and_freep) char *root_dir = NULL; + _cleanup_close_ int root_fd = -EBADF; + r = mount_image_privately_interactively( + arg_node, + &image_policy, + DISSECT_IMAGE_REQUIRE_ROOT | + DISSECT_IMAGE_RELAX_VAR_CHECK | + DISSECT_IMAGE_ALLOW_USERSPACE_VERITY | + DISSECT_IMAGE_DISCARD_ANY | + DISSECT_IMAGE_GPT_ONLY | + DISSECT_IMAGE_FSCK | + DISSECT_IMAGE_USR_NO_ROOT | + DISSECT_IMAGE_ADD_PARTITION_DEVICES | + DISSECT_IMAGE_PIN_PARTITION_DEVICES, + &root_dir, + &root_fd, + &loop_device); + if (r < 0) + return log_error_errno(r, "Failed to mount new image: %m"); + + log_notice("%s%sInstalling kernel...", + emoji_enabled() ? glyph(GLYPH_COMPUTER_DISK) : "", emoji_enabled() ? " " : ""); + + _cleanup_(sd_varlink_flush_close_unrefp) sd_varlink *bootctl_link = NULL; + r = invoke_bootctl_link(&bootctl_link, root_dir, root_fd, encrypted_credentials); + if (r < 0) + return r; + + log_notice("%s%sInstalling boot loader...", + emoji_enabled() ? glyph(GLYPH_COMPUTER_DISK) : "", emoji_enabled() ? " " : ""); + + r = invoke_bootctl_install(&bootctl_link, root_dir, root_fd); + if (r < 0) + return r; + + log_notice("%s%sUnmounting partitions...", + emoji_enabled() ? glyph(GLYPH_COMPUTER_DISK) : "", emoji_enabled() ? " " : ""); + + root_fd = safe_close(root_fd); + r = umount_recursive(root_dir, /* flags= */ 0); + if (r < 0) + log_warning_errno(r, "Failed to unmount target disk, proceeding anyway: %m"); + loop_device = loop_device_unref(loop_device); + sync(); + + log_notice("%s%sInstallation succeeded.", + emoji_enabled() ? glyph(GLYPH_SPARKLES) : "", emoji_enabled() ? " " : ""); + + r = maybe_reboot(); + if (r < 0) + return r; + + return 0; +} + +DEFINE_MAIN_FUNCTION(run); diff --git a/units/meson.build b/units/meson.build index 0f7ce75bd8967..fca299aa8465a 100644 --- a/units/meson.build +++ b/units/meson.build @@ -240,6 +240,7 @@ units = [ }, { 'file' : 'sysinit.target' }, { 'file' : 'syslog.socket' }, + { 'file' : 'system-install.target' }, { 'file' : 'system-systemd\\x2dcryptsetup.slice', 'conditions' : ['HAVE_LIBCRYPTSETUP'], @@ -778,6 +779,11 @@ units = [ 'file' : 'systemd-sysext@.service', 'conditions' : ['ENABLE_SYSEXT'], }, + { + 'file' : 'systemd-sysinstall.service', + 'conditions' : ['ENABLE_SYSINSTALL'], + 'symlinks' : ['system-install.target.wants/'], + }, { 'file' : 'systemd-sysupdate-reboot.service.in', 'conditions' : ['ENABLE_SYSUPDATE'], diff --git a/units/system-install.target b/units/system-install.target new file mode 100644 index 0000000000000..660110dcea36a --- /dev/null +++ b/units/system-install.target @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=System Installer +Documentation=man:systemd-sysinstall(8) +Requires=sysinit.target +After=sysinit.target +AllowIsolate=yes diff --git a/units/systemd-sysinstall.service b/units/systemd-sysinstall.service new file mode 100644 index 0000000000000..a330db2ef3054 --- /dev/null +++ b/units/systemd-sysinstall.service @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=System Install Tool +Documentation=man:systemd-sysinstall(8) +Wants=systemd-logind.service +After=systemd-logind.service + +[Service] +ExecStart=systemd-sysinstall --variables=yes --reboot=yes --mute-console=yes +StandardOutput=tty +StandardInput=tty +StandardError=tty +TTYReset=yes +FailureAction=halt From ca5b4f3f705ebceb136baea74dd45e625edb8dcc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 29 Apr 2026 21:49:58 +0200 Subject: [PATCH 128/242] ci: add CI test for systemd-sysinstall --- test/units/TEST-87-AUX-UTILS-VM.sysinstall.sh | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100755 test/units/TEST-87-AUX-UTILS-VM.sysinstall.sh diff --git a/test/units/TEST-87-AUX-UTILS-VM.sysinstall.sh b/test/units/TEST-87-AUX-UTILS-VM.sysinstall.sh new file mode 100755 index 0000000000000..d4ca6e0a0d163 --- /dev/null +++ b/test/units/TEST-87-AUX-UTILS-VM.sysinstall.sh @@ -0,0 +1,183 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +if ! command -v systemd-sysinstall >/dev/null; then + echo "systemd-sysinstall not found, skipping." + exit 0 +fi + +if ! command -v systemd-repart >/dev/null; then + echo "systemd-repart not found, skipping." + exit 0 +fi + +if ! command -v bootctl >/dev/null; then + echo "bootctl not found, skipping." + exit 0 +fi + +if ! command -v ukify >/dev/null; then + echo "ukify not found, skipping." + exit 0 +fi + +if [[ ! -d /usr/lib/systemd/boot/efi ]]; then + echo "sd-boot is not installed, skipping." + exit 0 +fi + +# We need a real environment to fiddle with loop devices. +if systemd-detect-virt -cq; then + echo "Running in a container, skipping." + exit 0 +fi + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +WORKDIR="$(mktemp --directory /tmp/test-sysinstall.XXXXXXXXXX)" +LOOPDEV="" +MOUNTED=0 + +cleanup() { + set +e + if [[ "$MOUNTED" -eq 1 ]]; then + umount -R "$WORKDIR/mnt" + MOUNTED=0 + fi + if [[ -n "$LOOPDEV" ]]; then + systemd-dissect --detach "$LOOPDEV" + LOOPDEV="" + fi + rm -rf "$WORKDIR" +} +trap cleanup EXIT + +# 1) Build a small fake "OS source" tree. systemd-sysinstall picks this up via +# the repart.sysinstall.d definitions: CopyFiles= seeds the new root +# partition with these files. +SOURCE_ROOT="$WORKDIR/sourceroot" +mkdir -p "$SOURCE_ROOT/usr/lib" "$SOURCE_ROOT/etc" + +cat >"$SOURCE_ROOT/usr/lib/os-release" <<'EOF' +ID=testos +NAME="Test OS" +PRETTY_NAME="Test OS for systemd-sysinstall" +VERSION_ID=1 +EOF +ln -s ../usr/lib/os-release "$SOURCE_ROOT/etc/os-release" + +# 2) Build a minimal UKI. bootctl link only requires a valid PE with .osrel and +# the systemd-stub SBAT marker, so the .linux/.initrd contents do not need +# to be a real kernel. +echo "fake-kernel" >"$WORKDIR/vmlinuz" +echo "fake-initrd" >"$WORKDIR/initrd" + +ukify build \ + --linux "$WORKDIR/vmlinuz" \ + --initrd "$WORKDIR/initrd" \ + --os-release "@$SOURCE_ROOT/usr/lib/os-release" \ + --uname "1.2.3-testkernel" \ + --cmdline "quiet" \ + --output "$WORKDIR/testuki.efi" + +# 3) Build a sysinstall partition definition: a single ESP plus a root +# partition seeded from the fake source tree. +DEFS="$WORKDIR/sysinstall.d" +mkdir -p "$DEFS" + +cat >"$DEFS/10-esp.conf" <"$DEFS/20-root.conf" </dev/null + +# The UKI file referenced in the entry must exist on the ESP. +UKI_PATH=$(awk '/^uki / { print $2 }' "$ENTRY") +test -n "$UKI_PATH" +test -f "$ESP$UKI_PATH" + +# bootctl install should have placed sd-boot on the ESP. +find "$ESP/EFI/systemd" -type f -iname 'systemd-boot*.efi' | grep . >/dev/null + +# The credential we passed via --set-credential= must have been encrypted and +# placed next to the UKI, and must be referenced as 'extra' from the entry. +UKI_DIR="$(dirname "$ESP$UKI_PATH")" +TOKEN_DIR="$(basename "$UKI_DIR")" +test -s "$UKI_DIR/marker.cred" +grep -E "^extra /$TOKEN_DIR/marker\.cred$" "$ENTRY" >/dev/null + +# Locale/keymap/timezone propagation is off, so those .cred files must NOT +# exist on the ESP. +test ! -e "$UKI_DIR/firstboot.locale.cred" +test ! -e "$UKI_DIR/firstboot.keymap.cred" +test ! -e "$UKI_DIR/firstboot.timezone.cred" + +# 8) The seeded files from the fake source tree must end up in the new root. +test -f "$MNT/usr/lib/os-release" +grep '^ID=testos$' "$MNT/usr/lib/os-release" >/dev/null From 1cac85a6af7b748fc055ebbaef6d8334457b57e7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 20 Apr 2026 12:34:48 +0200 Subject: [PATCH 129/242] update TODO --- TODO.md | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/TODO.md b/TODO.md index 588ff720d49ee..bcfe489151b5a 100644 --- a/TODO.md +++ b/TODO.md @@ -186,6 +186,17 @@ SPDX-License-Identifier: LGPL-2.1-or-later use as additional search condition. Use case: images that combined a sysext partition with a portable service partition in one. +- **systemd-sysinstall:** + - make systemd-sysinstall itself a varlink service + - read installation definition from json file + - polkit support in sysinstall + - sysinstall: permit driving installer via credentials + - add --offline=no mode where we talk to socket based services rather than forking off + - if a user doesn't pick a locale during boot into installer, don't ask again after install, because we suppressed credential propagation + +- repart: add MatchLabel= which matches against partition label, so that we + truly can install different images in parallel + - add "systemctl wait" or so, which does what "systemd-run --wait" does, but for all units. It should be both a way to pin units into memory as well as a wait to retrieve their exit data. @@ -1200,14 +1211,6 @@ SPDX-License-Identifier: LGPL-2.1-or-later - introduce a new group to own TPM devices -- introduce a small "systemd-installer" tool or so, that glues - systemd-repart-as-installer and bootctl-install into one. Would just - interactively ask user for target disk (with completion and so on), and then do - two varlink calls to the the two tools with the right parameters. To support - "offline" operation, optionally invoke the two tools directly as child - processes with varlink communication over socketpair(). This all should be - useful as blueprint for graphical installers which should do the same. - - introduce an option (or replacement) for "systemctl show" that outputs all properties as JSON, similar to busctl's new JSON output. In contrast to that it should skip the variant type string though. @@ -2491,10 +2494,6 @@ SPDX-License-Identifier: LGPL-2.1-or-later that we can sanely copy ESP contents, /usr/ images, and then set up btrfs raid for the root fs to extend/mirror the existing install. This would be very similar to the concept of live-install-through-btrfs-migration. - - add --installer or so, that will interactively ask for a - target disk, maybe ask for confirmation, and install something on disk. Then, - hook that into installer.target or so, so that it can be used to - install/replicate installs - should probably enable btrfs' "temp_fsid" feature for all file systems it creates, as we have no interest in RAID for repart, and it should make sure that we can mount them trivially everywhere. From 5eb256c99b309a86f7836a8397622662376b50f7 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 5 May 2026 13:55:54 +0100 Subject: [PATCH 130/242] vmspawn-qmp: take temporary ref in drive_info_add_fail drive_info_add_fail() calls bridge_unregister_drive() followed by drive_info_unref(), then continues to access the DriveInfo object. While all current callers hold their own reference, it is a bit fragile and it trips static analyzers. Take a local reference. CID#1655804 Follow-up for 1d0a8e5dbd267c803e100d9030d70d327eddf8b1 --- src/vmspawn/vmspawn-qmp.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/vmspawn/vmspawn-qmp.c b/src/vmspawn/vmspawn-qmp.c index 621c5e781a7a6..d09576213a163 100644 --- a/src/vmspawn/vmspawn-qmp.c +++ b/src/vmspawn/vmspawn-qmp.c @@ -730,23 +730,26 @@ static int drive_info_add_fail(DriveInfo *d, int error, const char *error_desc) if (FLAGS_SET(d->state, BLOCK_DEVICE_STATE_ADD_FAILED)) return 0; - vmspawn_qmp_block_device_teardown(d->bridge->qmp, d->qmp_node_name, d->state); - d->state = BLOCK_DEVICE_STATE_ADD_FAILED; + /* Pin the object alive across bridge_unregister_drive() + drive_info_unref() below. */ + _cleanup_(drive_info_unrefp) DriveInfo *ref = drive_info_ref(d); - if (bridge_unregister_drive(d->bridge, d)) - drive_info_unref(d); + vmspawn_qmp_block_device_teardown(ref->bridge->qmp, ref->qmp_node_name, ref->state); + ref->state = BLOCK_DEVICE_STATE_ADD_FAILED; - if (d->link) { - (void) reply_qmp_error(d->link, error_desc, error); - d->link = sd_varlink_unref(d->link); + if (bridge_unregister_drive(ref->bridge, ref)) + drive_info_unref(ref); + + if (ref->link) { + (void) reply_qmp_error(ref->link, error_desc, error); + ref->link = sd_varlink_unref(ref->link); return 0; } log_error_errno(error, "Block device '%s' setup failed: %s", - strna(d->id), strna(error_desc)); + strna(ref->id), strna(error_desc)); /* Boot-time (link == NULL) is always fatal — even for late-arriving ephemeral replies. */ - return sd_event_exit(qmp_client_get_event(d->bridge->qmp), error); + return sd_event_exit(qmp_client_get_event(ref->bridge->qmp), error); } /* Rolls back the up-front registry insert on a sync error path. */ From 5d7d54fc30eb91d9e225d54e5437691a2184ffd3 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 5 May 2026 10:43:45 +0100 Subject: [PATCH 131/242] test: make TEST-64 btrfs_basic cleanup robust against reruns The LUKS subtest in testcase_btrfs_basic leaves stale LUKS headers on the underlying SCSI devices, so if the VM is rebooted the test fails because the LUKS signature is still there and blkid finds it. [ 7.683] + udevadm lock ... mkfs.btrfs -f -L btrfs_root -U deadbeef-dead-dead-beef-000000000000 /dev/disk/by-id/scsi-0systemd_foobar_deadbeefbtrfs0 [ 7.729] Label: btrfs_root [ 7.729] UUID: deadbeef-dead-dead-beef-000000000000 [ 7.743] + udevadm wait --settle --timeout=30 /dev/disk/by-id/scsi-0systemd_foobar_deadbeefbtrfs0 /dev/disk/by-uuid/deadbeef-dead-dead-beef-000000000000 /dev/disk/by-label/btrfs_root [ 7.788] sda: ... SYMLINK+="disk/by-label/$env{ID_FS_LABEL_ENC}": Added device node symlink "disk/by-label/encdisk0". [ 37.998] Timed out for waiting devices being initialized. [ 38.002] TEST-64-UDEV-STORAGE-btrfs_basic.service: Main process exited, code=exited, status=1/FAILURE Likewise for the BTRFS UUID: ERROR: non-unique UUID: deadbeef-dead-dead-beef-000000000001 So wipe that too. --- test/units/TEST-64-UDEV-STORAGE.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/units/TEST-64-UDEV-STORAGE.sh b/test/units/TEST-64-UDEV-STORAGE.sh index de2d7d267212f..e175f6c3d7848 100755 --- a/test/units/TEST-64-UDEV-STORAGE.sh +++ b/test/units/TEST-64-UDEV-STORAGE.sh @@ -849,6 +849,10 @@ EOF btrfs filesystem show helper_check_device_symlinks helper_check_device_units + # Wipe the btrfs signature from each partition first, otherwise the superblocks remain inside + # the disk's data area and would be discovered again as duplicate UUIDs after re-partitioning, + # which breaks subsequent runs of this test (e.g. after a VM reboot). + udevadm lock --timeout=30 --device="${devices[0]}" wipefs -a /dev/disk/by-partlabel/diskpart{1..4} udevadm lock --timeout=30 --device="${devices[0]}" wipefs -a "${devices[0]}" udevadm wait --settle --timeout=30 --removed /dev/disk/by-partlabel/diskpart{1..4} @@ -866,6 +870,12 @@ EOF btrfs filesystem show helper_check_device_symlinks helper_check_device_units + # Wipe the btrfs signatures so that subsequent sections (and runs of the test, e.g. after a VM + # reboot) don't see the stale UUID. + for ((i = 0; i < ${#devices[@]}; i++)); do + udevadm lock --timeout=30 --device="${devices[$i]}" wipefs -a "${devices[$i]}" + done + udevadm settle --timeout=30 echo "Multiple devices: using LUKS encrypted disks, data: raid1, metadata: raid1, mixed mode" uuid="deadbeef-dead-dead-beef-000000000003" @@ -941,7 +951,13 @@ EOF sed -i "/${mpoint##*/}/d" /etc/fstab : >/etc/crypttab rm -fr "$mpoint" + rm -f /etc/btrfs_keyfile systemctl daemon-reload + # Wipe LUKS headers from the underlying devices, so that if the VM is rebooted the disks don't retain + # stale LUKS signatures that would interfere with a re-run of the test. + for ((i = 0; i < ${#devices[@]}; i++)); do + udevadm lock --timeout=30 --device="${devices[$i]}" wipefs -a "${devices[$i]}" + done udevadm settle --timeout=30 } From d508bfb51dd0cc827138d74bb25281f69c458d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 00:53:20 +0200 Subject: [PATCH 132/242] man/systemd-report: document --url= and other options 5bbbe210a4e3856385d95e16074d8aa98cff909b added the options but not the documentation. --- man/systemd-report.xml | 75 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/man/systemd-report.xml b/man/systemd-report.xml index f14600dfe5d32..0974244a8f563 100644 --- a/man/systemd-report.xml +++ b/man/systemd-report.xml @@ -1,6 +1,9 @@ - + + "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ + +%entities; +]> + + + + + Upload the collected report to the specified address instead of writing it to + standard output. URL must point to a server accepting POST requests with + a JSON-formatted report body. + + Note: both http:// and https:// URLs are supported, but + connections over plain HTTP are made without encryption. Thus, this mode should + only be used in specific situations where integrity and confidentiality of the report is not required + or is ensured through some other means. Using https:// is recommended. + + + + + + + + Takes a path to a SSL key file in PEM format, used for client certificate + authentication when uploading. Can also be set to -, to disable client certificate + authentication. Defaults to + &CERTIFICATE_ROOT;/private/systemd-report.pem. + + + + + + + + Takes a path to a SSL certificate file in PEM format, used for client certificate + authentication when uploading. Defaults to + &CERTIFICATE_ROOT;/certs/systemd-report.pem. + + + + + + + + Takes a path to a SSL CA certificate file in PEM format used to verify the server + certificate, or the literal string all to disable certificate checking + entirely. Defaults to &CERTIFICATE_ROOT;/ca/trusted.pem. + + + + + + + + Timeout for the network upload operation. Takes a value in seconds (or in other + time units if suffixed with ms, min, h, + etc.); see + systemd.time7 + for details. Defaults to 30 seconds. + + + + + + + + Inject an additional HTTP header into the upload request. May be specified multiple + times to add several headers. Passing an empty string clears any headers added by previous + uses. + + + From fc8d547642808be3aee9a9a01cf1c85add2460b3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 5 May 2026 12:43:42 +0200 Subject: [PATCH 133/242] udev-builtin-tpm2-id: gracefully skip tpm2 identification if tss2-libs are not installed Fixes: #41714 --- src/udev/udev-builtin-tpm2_id.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/udev/udev-builtin-tpm2_id.c b/src/udev/udev-builtin-tpm2_id.c index 6edf618e11112..968677a7342bc 100644 --- a/src/udev/udev-builtin-tpm2_id.c +++ b/src/udev/udev-builtin-tpm2_id.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include "device-util.h" +#include "errno-util.h" #include "string-util.h" #include "tpm2-util.h" #include "udev-builtin.h" @@ -20,6 +21,10 @@ static int builtin_tpm2_id(UdevEvent *event, int argc, char *argv[]) { _cleanup_(tpm2_context_unrefp) Tpm2Context *c = NULL; r = tpm2_context_new(dn, &c); + if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) { + log_device_debug_errno(dev, r, "Full TPM2 support is not available, skipping identification of TPM2 device '%s'.", dn); + return 0; + } if (r < 0) return log_device_error_errno(dev, r, "Failed to open device node '%s': %m", dn); From 5d17215c7ee46c82401b10b765e7b6840e9d61c7 Mon Sep 17 00:00:00 2001 From: Diego Viola Date: Tue, 5 May 2026 08:22:55 -0300 Subject: [PATCH 134/242] treewide: fix typos in the entire codebase Signed-off-by: Diego Viola --- NEWS | 8 ++++---- TODO.md | 10 +++++----- catalog/systemd.catalog.in | 2 +- man/sd_id128_to_string.xml | 2 +- po/es.po | 2 +- rules.d/82-net-auto-link-local.rules | 2 +- shell-completion/zsh/_resolvectl | 2 +- src/core/dbus.c | 2 +- src/core/ipe-setup.c | 2 +- src/home/homework-luks.c | 2 +- src/libsystemd/sd-varlink/sd-varlink-idl.c | 2 +- src/locale/localed-util.c | 2 +- src/network/netdev/bareudp.c | 2 +- src/network/networkd-wwan-bus.c | 4 ++-- src/network/wait-online/wait-online-manager.c | 2 +- src/nspawn/nspawn-network.c | 2 +- src/nsresourced/nsresourcework.c | 2 +- src/pcrlock/pcrlock.c | 2 +- src/portable/portable.c | 2 +- src/resolve/resolved-dns-cache.c | 2 +- src/resolve/test-dns-packet-extract.c | 2 +- src/run/run.c | 2 +- src/shared/bus-polkit.c | 2 +- src/shared/bus-unit-util.c | 2 +- src/shared/calendarspec.c | 2 +- src/shared/copy.c | 2 +- src/shared/dissect-image.c | 2 +- src/shared/firewall-util.c | 2 +- src/shared/ipvlan-util.c | 2 +- src/shared/ipvlan-util.h | 2 +- src/shared/libfido2-util.c | 2 +- src/shared/specifier.c | 2 +- src/shared/tests.c | 2 +- src/shared/tpm2-util.c | 2 +- src/shared/user-record.c | 2 +- src/shared/varlink-io.systemd.Import.c | 2 +- src/systemctl/systemctl-util.c | 2 +- test/test-network/systemd-networkd-tests.py | 2 +- test/units/TEST-36-NUMAPOLICY.sh | 2 +- .../TEST-53-TIMER.RandomizedDelaySec-persistent.sh | 2 +- test/units/TEST-87-AUX-UTILS-VM.pstore.sh | 2 +- 41 files changed, 49 insertions(+), 49 deletions(-) diff --git a/NEWS b/NEWS index 49061c5e11a22..bc7ee1e2bf702 100644 --- a/NEWS +++ b/NEWS @@ -3433,7 +3433,7 @@ CHANGES WITH 257: systemd-importd: - * A new generator sytemd-import-generator has been added to synthesize + * A new generator systemd-import-generator has been added to synthesize image download jobs. This provides functionality similar to importctl, but is configured via the kernel command line and system credentials. It may be used to automatically download sysext, @@ -4610,7 +4610,7 @@ CHANGES WITH 256: OpenSSH 9.4 or newer. * systemd-sysext gained support for enabling system extensions in - mutable fashion, where a writeable upperdir is stored under + mutable fashion, where a writable upperdir is stored under /var/lib/extensions.mutable/, and a new --mutable= option to configure this behaviour. An "ephemeral" mode is not also supported where the mutable layer is configured to be a tmpfs that is @@ -15312,7 +15312,7 @@ CHANGES WITH 231: * The InaccessableDirectories=, ReadOnlyDirectories= and ReadWriteDirectories= unit file settings have been renamed to - InaccessablePaths=, ReadOnlyPaths= and ReadWritePaths= and may now be + InaccessiblePaths=, ReadOnlyPaths= and ReadWritePaths= and may now be applied to all kinds of file nodes, and not just directories, with the exception of symlinks. Specifically these settings may now be used on block and character device nodes, UNIX sockets and FIFOS as @@ -20923,7 +20923,7 @@ CHANGES WITH 189: udev_device_new_from_device_id() call. * The logic for file system namespace (ReadOnlyDirectory=, - ReadWriteDirectoy=, PrivateTmp=) has been reworked not to + ReadWriteDirectories=, PrivateTmp=) has been reworked not to require pivot_root() anymore. This means fewer temporary directories are created below /tmp for this feature. diff --git a/TODO.md b/TODO.md index bcfe489151b5a..b785ac6178cdb 100644 --- a/TODO.md +++ b/TODO.md @@ -222,8 +222,8 @@ SPDX-License-Identifier: LGPL-2.1-or-later - download list + report updates in motd – but do not auto update - download list + download new version – but do not apply it - download list + download new version + apply it – but do not reboot - - download list + donwload new version + apply it + reboot - Other things the policy shoudl contain is when to place the reboot. + - download list + download new version + apply it + reboot + Other things the policy should contain is when to place the reboot. This would all decouple the updating of the package list from the application of it. Which is great for "countme" style stuff. @@ -359,7 +359,7 @@ SPDX-License-Identifier: LGPL-2.1-or-later - add bus API to retrieve current unit file contents (i.e. implement "systemctl cat" on the bus only) -- Add ConditionDirectoryNotEmpty= handle non-absoute paths as a search path or add +- Add ConditionDirectoryNotEmpty= handle non-absolute paths as a search path or add ConditionConfigSearchPathNotEmpty= or different syntax? See the discussion starting at https://github.com/systemd/systemd/pull/15109#issuecomment-607740136. @@ -1546,7 +1546,7 @@ SPDX-License-Identifier: LGPL-2.1-or-later and stick around for the whole system runtime (i.e. root fs storage daemons, the bpf loader daemon discussed above, and such) are placed. maybe protected.slice or so? Then write docs that suggest that services like this - set Slice=protected.sice, RefuseManualStart=yes, RefuseManualStop=yes and a + set Slice=protected.slice, RefuseManualStart=yes, RefuseManualStop=yes and a couple of other things. - maybe add call sd_journal_set_block_timeout() or so to set SO_SNDTIMEO for @@ -1840,7 +1840,7 @@ SPDX-License-Identifier: LGPL-2.1-or-later - oci: add support for "importctl import-oci" which implements the "OCI layout" spec (i.e. acquiring via local fs access), as opposed to the current - "importctl pull-oci" which focusses on the "OCI image spec", i.e. downloads + "importctl pull-oci" which focuses on the "OCI image spec", i.e. downloads from the web (i.e. acquiring via URLs). - oci: add support for blake hashes for layers diff --git a/catalog/systemd.catalog.in b/catalog/systemd.catalog.in index 68ef28d1974c0..30da7bd5aa98b 100644 --- a/catalog/systemd.catalog.in +++ b/catalog/systemd.catalog.in @@ -455,7 +455,7 @@ Support: %SUPPORT_URL% The directory @WHERE@ is specified as the mount point (second field in /etc/fstab or Where= field in systemd unit file) and is not empty. -This does not interfere with mounting, but the pre-exisiting files in +This does not interfere with mounting, but the pre-existing files in this directory become inaccessible. To see those over-mounted files, please manually mount the underlying file system to a secondary location. diff --git a/man/sd_id128_to_string.xml b/man/sd_id128_to_string.xml index 1d6301ec61581..f5b6c9490ff2b 100644 --- a/man/sd_id128_to_string.xml +++ b/man/sd_id128_to_string.xml @@ -72,7 +72,7 @@ sd_id128_to_uuid_string() and SD_ID128_TO_UUID_STRING() are similar to these two functions/macros, but format the 128-bit values as RFC4122 UUIDs, i.e. a series - of 36 lowercase hexadeciaml digits and dashes, terminated by a NUL byte. + of 36 lowercase hexadecimal digits and dashes, terminated by a NUL byte. sd_id128_from_string() implements the reverse operation: it takes a 33 character string with 32 hexadecimal digits (either lowercase or uppercase, terminated by diff --git a/po/es.po b/po/es.po index 6e6304658278f..7be1c80413602 100644 --- a/po/es.po +++ b/po/es.po @@ -1054,7 +1054,7 @@ msgstr "Necesita autenticarse para restablecer la configuración de DNS." #: src/network/org.freedesktop.network1.policy:143 msgid "DHCP server sends force renew message" -msgstr "El servidor DCHP envía un mensaje de renovación forzada" +msgstr "El servidor DHCP envía un mensaje de renovación forzada" #: src/network/org.freedesktop.network1.policy:144 #, fuzzy diff --git a/rules.d/82-net-auto-link-local.rules b/rules.d/82-net-auto-link-local.rules index 88ac7bc1be05e..88e581c16ca8e 100644 --- a/rules.d/82-net-auto-link-local.rules +++ b/rules.d/82-net-auto-link-local.rules @@ -4,7 +4,7 @@ ACTION=="remove", GOTO="net_link_local_end" SUBSYSTEM!="net", GOTO="net_link_local_end" # Network interfaces for which only Link-Local communication (i.e. IPv4LL, …) -# makes sense, because they almost certainy will point to another host, not an +# makes sense, because they almost certainly will point to another host, not an # internet router. # (Note: matches against VID/PID go into 82-net-auto-link-local.hwdb instead) diff --git a/shell-completion/zsh/_resolvectl b/shell-completion/zsh/_resolvectl index 3d28f1b410e04..dc9a9f1e06dfa 100644 --- a/shell-completion/zsh/_resolvectl +++ b/shell-completion/zsh/_resolvectl @@ -56,7 +56,7 @@ query:"Resolve domain names, IPv4 and IPv6 addresses" reset-server-features:"Flushes all feature level information the resolver has learned about specific servers" reset-statistics:"Resets the statistics counter shown in statistics to zero" - revert:"Revert the per-interfce DNS configuration" + revert:"Revert the per-interface DNS configuration" service:"Resolve DNS-SD and SRV services" show-cache:"Show the current cache contents" show-server-state:"Show servers state" diff --git a/src/core/dbus.c b/src/core/dbus.c index dba79b860266f..659965e4e6c50 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -1060,7 +1060,7 @@ int bus_fdset_add_all(Manager *m, FDSet *fds) { /* When we are about to reexecute we add all D-Bus fds to the * set to pass over to the newly executed systemd. They won't - * be used there however, except thatt they are closed at the + * be used there however, except that they are closed at the * very end of deserialization, those making it possible for * clients to synchronously wait for systemd to reexec by * simply waiting for disconnection */ diff --git a/src/core/ipe-setup.c b/src/core/ipe-setup.c index f263117018ef3..7b684e34b72a3 100644 --- a/src/core/ipe-setup.c +++ b/src/core/ipe-setup.c @@ -23,7 +23,7 @@ int ipe_setup(void) { _cleanup_strv_free_ char **policies = NULL; int r; - /* Very quick smoke tests first: this is in the citical, sequential boot path, and in most cases it + /* Very quick smoke tests first: this is in the critical, sequential boot path, and in most cases it * is unlikely this will be configured, so do the fastest existence checks first and immediately * return if there's nothing to do. */ diff --git a/src/home/homework-luks.c b/src/home/homework-luks.c index e85153d61dbc2..9cd0f2dc00438 100644 --- a/src/home/homework-luks.c +++ b/src/home/homework-luks.c @@ -631,7 +631,7 @@ static int fs_validate( sd_id128_t *ret_found_uuid) { _cleanup_free_ char *fstype = NULL; - sd_id128_t u = SD_ID128_NULL; /* avoid false maybe-unitialized warning */ + sd_id128_t u = SD_ID128_NULL; /* avoid false maybe-uninitialized warning */ int r; assert(dm_node); diff --git a/src/libsystemd/sd-varlink/sd-varlink-idl.c b/src/libsystemd/sd-varlink/sd-varlink-idl.c index be66fb34afc39..55f9b8b1d0f4e 100644 --- a/src/libsystemd/sd-varlink/sd-varlink-idl.c +++ b/src/libsystemd/sd-varlink/sd-varlink-idl.c @@ -389,7 +389,7 @@ static int varlink_idl_format_symbol( * https://github.com/varlink/varlink.github.io/issues/26 – but for now export this as a * comment. * - * Until this is resolved upsteam, consider this comment part of the API (i.e. don't change + * Until this is resolved upstream, consider this comment part of the API (i.e. don't change * only extend). It is used by tools like varlink-http-bridge. */ if ((symbol->symbol_flags & (SD_VARLINK_REQUIRES_MORE|SD_VARLINK_SUPPORTS_MORE)) != 0) { fputs(colors[COLOR_COMMENT], f); diff --git a/src/locale/localed-util.c b/src/locale/localed-util.c index 4cddfc32d9103..25c361d400409 100644 --- a/src/locale/localed-util.c +++ b/src/locale/localed-util.c @@ -412,7 +412,7 @@ static bool locale_encoding_is_utf8_or_unspecified(const char *locale) { } static int locale_gen_locale_supported(const char *locale_entry) { - /* Returns an error valus <= 0 if the locale-gen entry is invalid or unsupported, + /* Returns an error value <= 0 if the locale-gen entry is invalid or unsupported, * 1 in case the locale entry is valid, and -EOPNOTSUPP specifically in case * the distributor has not provided us with a SUPPORTED file to check * locale for validity. */ diff --git a/src/network/netdev/bareudp.c b/src/network/netdev/bareudp.c index 9dd70296bc6c6..2183e11661c77 100644 --- a/src/network/netdev/bareudp.c +++ b/src/network/netdev/bareudp.c @@ -49,7 +49,7 @@ static int netdev_bare_udp_verify(NetDev *netdev, const char *filename) { if (u->dest_port == 0) return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL), - "%s: BareUDP DesinationPort= is not set. Ignoring.", filename); + "%s: BareUDP DestinationPort= is not set. Ignoring.", filename); if (u->iftype == _BARE_UDP_PROTOCOL_INVALID) return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL), diff --git a/src/network/networkd-wwan-bus.c b/src/network/networkd-wwan-bus.c index 19fcf081ae188..13a860e2dc012 100644 --- a/src/network/networkd-wwan-bus.c +++ b/src/network/networkd-wwan-bus.c @@ -1182,7 +1182,7 @@ int manager_match_mm_signals(Manager *manager) { /* install_callback= */ NULL, manager); if (r < 0) - return log_error_errno(r, "Failed to request signal for IntefaceAdded"); + return log_error_errno(r, "Failed to request signal for InterfaceAdded"); r = sd_bus_match_signal_async( manager->bus, @@ -1195,7 +1195,7 @@ int manager_match_mm_signals(Manager *manager) { /* install_callback= */ NULL, manager); if (r < 0) - return log_error_errno(r, "Failed to request signal for IntefaceRemoved"); + return log_error_errno(r, "Failed to request signal for InterfaceRemoved"); /* N.B. We need "path_namespace" for bearers, not "path", */ r = sd_bus_add_match_async( diff --git a/src/network/wait-online/wait-online-manager.c b/src/network/wait-online/wait-online-manager.c index b5ca38cbe2403..70e2b45f00d20 100644 --- a/src/network/wait-online/wait-online-manager.c +++ b/src/network/wait-online/wait-online-manager.c @@ -68,7 +68,7 @@ static const LinkOperationalStateRange* get_state_range(Manager *m, Link *l, con if (operational_state_range_is_valid(range)) return range; - /* l->requred_operstate should be always valid. */ + /* l->required_operstate should be always valid. */ assert_not_reached(); } diff --git a/src/nspawn/nspawn-network.c b/src/nspawn/nspawn-network.c index 6949ecac724a9..ad506cfaf4b68 100644 --- a/src/nspawn/nspawn-network.c +++ b/src/nspawn/nspawn-network.c @@ -499,7 +499,7 @@ static int netns_child_begin(int netns_fd, int *ret_original_netns_fd) { if (r < 0) return log_error_errno(r, "Failed to mount sysfs on /sys/: %m"); - /* udev_avaliable() might be called previously and the result may be cached. + /* udev_available() might be called previously and the result may be cached. * Now, we (re-)mount sysfs. Hence, we need to reset the cache. */ reset_cached_udev_availability(); diff --git a/src/nsresourced/nsresourcework.c b/src/nsresourced/nsresourcework.c index 3b2450529c3ac..91b3645809a92 100644 --- a/src/nsresourced/nsresourcework.c +++ b/src/nsresourced/nsresourcework.c @@ -1925,7 +1925,7 @@ static void hash_ether_addr(UserNamespaceInfo *userns_info, const char *ifname, siphash24_compress_byte(0, &state); /* separator */ siphash24_compress_string(strempty(ifname), &state); siphash24_compress_byte(0, &state); /* separator */ - n = htole64(n); /* add the 'index' to the mix in an endianess-independent fashion */ + n = htole64(n); /* add the 'index' to the mix in an endianness-independent fashion */ siphash24_compress_typesafe(n, &state); h = htole64(siphash24_finalize(&state)); diff --git a/src/pcrlock/pcrlock.c b/src/pcrlock/pcrlock.c index 09f49b2ed250e..63af144fe0327 100644 --- a/src/pcrlock/pcrlock.c +++ b/src/pcrlock/pcrlock.c @@ -1947,7 +1947,7 @@ static int event_log_match_component_variant( return r; if (assign) { - /* Take ownership (Note we allow multiple components and variants to take owneship of the same record!) */ + /* Take ownership (Note we allow multiple components and variants to take ownership of the same record!) */ if (!GREEDY_REALLOC(el->records[i]->mapped, el->records[i]->n_mapped+1)) return log_oom(); diff --git a/src/portable/portable.c b/src/portable/portable.c index 3f3c0cda48891..f9f47f2e0fd89 100644 --- a/src/portable/portable.c +++ b/src/portable/portable.c @@ -2431,7 +2431,7 @@ int portable_detach( portable_changes_add_with_prefix(changes, n_changes, PORTABLE_UNLINK, where, md, NULL); } - /* Now, also drop any image symlink or copy, for images outside of the sarch path */ + /* Now, also drop any image symlink or copy, for images outside of the search path */ SET_FOREACH(item, markers) { _cleanup_free_ char *target = NULL; diff --git a/src/resolve/resolved-dns-cache.c b/src/resolve/resolved-dns-cache.c index 5540ce5c0593d..6a7967842dbe4 100644 --- a/src/resolve/resolved-dns-cache.c +++ b/src/resolve/resolved-dns-cache.c @@ -68,7 +68,7 @@ struct DnsCacheItem { }; /* Returns true if this is a cache item created as result of an explicit lookup, or created as "side-effect" - * of another request. "Primary" entries will carry the full answer data (with NSEC, …) that can aso prove + * of another request. "Primary" entries will carry the full answer data (with NSEC, …) that can also prove * wildcard expansion, non-existence and such, while entries that were created as "side-effect" just contain * immediate RR data for the specified RR key, but nothing else. */ #define DNS_CACHE_ITEM_IS_PRIMARY(item) (!!(item)->answer) diff --git a/src/resolve/test-dns-packet-extract.c b/src/resolve/test-dns-packet-extract.c index ca90afb7eb1dc..28a510dea6467 100644 --- a/src/resolve/test-dns-packet-extract.c +++ b/src/resolve/test-dns-packet-extract.c @@ -400,7 +400,7 @@ TEST(packet_validate_query_too_many_questions) { ASSERT_ERROR(dns_packet_validate_query(packet), EBADMSG); } -TEST(packet_validate_query_with_anwser) { +TEST(packet_validate_query_with_answer) { _cleanup_(dns_packet_unrefp) DnsPacket *packet = NULL; ASSERT_OK(dns_packet_new(&packet, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX)); diff --git a/src/run/run.c b/src/run/run.c index afae5b2d94af4..60f3bf7405ebd 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -962,7 +962,7 @@ static int parse_argv_sudo_mode(int argc, char *argv[]) { arg_stdio = isatty_safe(STDIN_FILENO) && isatty_safe(STDOUT_FILENO) && isatty_safe(STDERR_FILENO) ? ARG_STDIO_PTY : ARG_STDIO_DIRECT; log_debug("Using %s stdio mode.", arg_stdio == ARG_STDIO_PTY ? "pty" : "direct"); if (arg_pty_late < 0) - arg_pty_late = arg_ask_password; /* for run0 this defaults to on, except if --no-ask-pasword is used */ + arg_pty_late = arg_ask_password; /* for run0 this defaults to on, except if --no-ask-password is used */ arg_expand_environment = false; arg_send_sighup = true; diff --git a/src/shared/bus-polkit.c b/src/shared/bus-polkit.c index 78e9ed377f384..1373dba0ed213 100644 --- a/src/shared/bus-polkit.c +++ b/src/shared/bus-polkit.c @@ -346,7 +346,7 @@ static int async_polkit_process_reply(sd_bus_message *reply, AsyncPolkitQuery *q if (r < 0) return r; - /* Now, let's dispatch the original message a second time be re-enqueing. This will then traverse the + /* Now, let's dispatch the original message a second time be re-enqueuing. This will then traverse the * whole message processing again, and thus re-validating and re-retrieving the "userdata" field * again. * diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 440c6ced290ea..7076322d2d315 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2144,7 +2144,7 @@ static int bus_append_protect_hostname(sd_bus_message *m, const char *field, con int r; /* The command-line field is called "ProtectHostname". We also accept "ProtectHostnameEx" as the - * field name for backward compatibility. We set ProtectHostame or ProtectHostnameEx. */ + * field name for backward compatibility. We set ProtectHostname or ProtectHostnameEx. */ r = parse_boolean(eq); if (r >= 0) diff --git a/src/shared/calendarspec.c b/src/shared/calendarspec.c index 771363517b39e..225d811d19280 100644 --- a/src/shared/calendarspec.c +++ b/src/shared/calendarspec.c @@ -1127,7 +1127,7 @@ static int find_matching_component( assert(val); - /* Finds the *earliest* matching time specified by one of the CalendarCompoment items in chain c. + /* Finds the *earliest* matching time specified by one of the CalendarComponent items in chain c. * If no matches can be found, returns -ENOENT. * Otherwise, updates *val to the matching time. 1 is returned if *val was changed, 0 otherwise. */ diff --git a/src/shared/copy.c b/src/shared/copy.c index 3ac05c15b7b2a..a8e2b68db1414 100644 --- a/src/shared/copy.c +++ b/src/shared/copy.c @@ -652,7 +652,7 @@ static int hardlink_context_setup( * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the * common case where hardlinks are not used at all or only for few files the fact that we store the - * table on disk shouldn't matter perfomance-wise. */ + * table on disk shouldn't matter performance-wise. */ if (!FLAGS_SET(copy_flags, COPY_HARDLINKS)) return 0; diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index e69c644f58eeb..fcd809b0e61b1 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -2298,7 +2298,7 @@ int partition_pick_mount_options( case PARTITION_XBOOTLDR: flags |= MS_NOSUID|MS_NOEXEC|MS_NOSYMFOLLOW; - /* The ESP might contain a pre-boot random seed. Let's make this unaccessible to regular + /* The ESP might contain a pre-boot random seed. Let's make this inaccessible to regular * userspace. ESP/XBOOTLDR is almost certainly VFAT, hence if we don't know assume it is. */ if (!fstype || fstype_can_fmask_dmask(fstype)) if (!strextend_with_separator(&options, ",", "fmask=0177,dmask=0077")) diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c index 4693972ff2752..441e137ec03cd 100644 --- a/src/shared/firewall-util.c +++ b/src/shared/firewall-util.c @@ -1026,7 +1026,7 @@ int fw_nftables_add_masquerade( * Note that this doesn't protect against external sabotage such as a * 'while true; nft flush ruleset; done'. There is nothing that could be done about that short * of extending the kernel to allow tables to be owned by stystemd-networkd and making them - * non-deleteable except by the 'owning process'. */ + * non-deletable except by the 'owning process'. */ r = fw_nftables_init_family(nfnl, af); if (r < 0) diff --git a/src/shared/ipvlan-util.c b/src/shared/ipvlan-util.c index 1906c8026f470..5c271ba4b688b 100644 --- a/src/shared/ipvlan-util.c +++ b/src/shared/ipvlan-util.c @@ -12,7 +12,7 @@ static const char* const ipvlan_mode_table[_NETDEV_IPVLAN_MODE_MAX] = { DEFINE_STRING_TABLE_LOOKUP(ipvlan_mode, IPVlanMode); static const char* const ipvlan_flags_table[_NETDEV_IPVLAN_FLAGS_MAX] = { - [NETDEV_IPVLAN_FLAGS_BRIGDE] = "bridge", + [NETDEV_IPVLAN_FLAGS_BRIDGE] = "bridge", [NETDEV_IPVLAN_FLAGS_PRIVATE] = "private", [NETDEV_IPVLAN_FLAGS_VEPA] = "vepa", }; diff --git a/src/shared/ipvlan-util.h b/src/shared/ipvlan-util.h index 4cb74f3dcbb92..194e5cea04289 100644 --- a/src/shared/ipvlan-util.h +++ b/src/shared/ipvlan-util.h @@ -14,7 +14,7 @@ typedef enum IPVlanMode { } IPVlanMode; typedef enum IPVlanFlags { - NETDEV_IPVLAN_FLAGS_BRIGDE, + NETDEV_IPVLAN_FLAGS_BRIDGE, NETDEV_IPVLAN_FLAGS_PRIVATE = IPVLAN_F_PRIVATE, NETDEV_IPVLAN_FLAGS_VEPA = IPVLAN_F_VEPA, _NETDEV_IPVLAN_FLAGS_MAX, diff --git a/src/shared/libfido2-util.c b/src/shared/libfido2-util.c index cc00006af9d54..c25019e6b01d8 100644 --- a/src/shared/libfido2-util.c +++ b/src/shared/libfido2-util.c @@ -361,7 +361,7 @@ static int fido2_is_cred_in_specific_token( /* According to CTAP 2.1 specification, to do pre-flight we need to set up option to false * with optionally pinUvAuthParam in assertion[1]. But for authenticator that doesn't support * user presence, once up option is present, the authenticator may return CTAP2_ERR_UNSUPPORTED_OPTION[2]. - * So we simplely omit the option in that case. + * So we simply omit the option in that case. * Reference: * 1: https://fidoalliance.org/specs/fido-v2.1-ps-20210615/fido-client-to-authenticator-protocol-v2.1-ps-20210615.html#pre-flight * 2: https://fidoalliance.org/specs/fido-v2.0-ps-20190130/fido-client-to-authenticator-protocol-v2.0-ps-20190130.html#authenticatorGetAssertion (in step 5) diff --git a/src/shared/specifier.c b/src/shared/specifier.c index 595160352225b..9d2066899f2fe 100644 --- a/src/shared/specifier.c +++ b/src/shared/specifier.c @@ -347,7 +347,7 @@ int specifier_user_name(char specifier, const void *data, const char *root, cons * to be able to run this in PID 1, where our user ID is 0, but where NSS lookups are not allowed. * We don't use getusername_malloc() here, because we don't want to look at $USER, to remain - * consistent with specifer_user_id() below. + * consistent with specifier_user_id() below. */ t = uid_to_name(uid); diff --git a/src/shared/tests.c b/src/shared/tests.c index 8111d481e4d6a..84945343a4650 100644 --- a/src/shared/tests.c +++ b/src/shared/tests.c @@ -361,7 +361,7 @@ const char* ci_environment(void) { if (getenv("SALSA_CI_IMAGES")) return (ans = "salsa-ci"); - FOREACH_STRING(var, "CI", "CONTINOUS_INTEGRATION") { + FOREACH_STRING(var, "CI", "CONTINUOUS_INTEGRATION") { /* Those vars are booleans according to Semaphore and Travis docs: * https://docs.travis-ci.com/user/environment-variables/#default-environment-variables * https://docs.semaphoreci.com/ci-cd-environment/environment-variables/#ci diff --git a/src/shared/tpm2-util.c b/src/shared/tpm2-util.c index 091a4dba89ac3..4557ad3f41456 100644 --- a/src/shared/tpm2-util.c +++ b/src/shared/tpm2-util.c @@ -819,7 +819,7 @@ static bool tpm2_supports_tpmt_sym_def(Tpm2Context *c, const TPMT_SYM_DEF *param assert(c); assert(parameters); - /* Unfortunately, TPMT_SYM_DEF and TPMT_SYM_DEF_OBEJECT are separately defined, even though they are + /* Unfortunately, TPMT_SYM_DEF and TPMT_SYM_DEF_OBJECT are separately defined, even though they are * functionally identical. */ TPMT_SYM_DEF_OBJECT object = { .algorithm = parameters->algorithm, diff --git a/src/shared/user-record.c b/src/shared/user-record.c index cf33d92215b8d..191870c8ea62d 100644 --- a/src/shared/user-record.c +++ b/src/shared/user-record.c @@ -2489,7 +2489,7 @@ int user_record_self_changes_allowed(UserRecord *current, UserRecord *incoming) * `selfModifiableFields` fields are unset in their record. * 2) This user crafts a request to add the following to their record: * { "memberOf": ["wheel"], "selfModifiableFields": ["memberOf", "selfModifiableFields"] } - * 3) We remove the `mebmerOf` and `selfModifiabileFields` fields from `incoming` + * 3) We remove the `memberOf` and `selfModifiabileFields` fields from `incoming` * 4) `current` and `incoming` compare as equal, so we let the change happen * 5) the user has granted themselves administrator privileges */ diff --git a/src/shared/varlink-io.systemd.Import.c b/src/shared/varlink-io.systemd.Import.c index b4e2ab03e2d4e..13d239b9dacb2 100644 --- a/src/shared/varlink-io.systemd.Import.c +++ b/src/shared/varlink-io.systemd.Import.c @@ -87,7 +87,7 @@ static SD_VARLINK_DEFINE_METHOD_FULL( SD_VARLINK_DEFINE_INPUT_BY_TYPE(verify, ImageVerify, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("If true, an existing image by the local name is deleted. Defaults to false."), SD_VARLINK_DEFINE_INPUT(force, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), - SD_VARLINK_FIELD_COMMENT("Whether to make the image read-only after downloading. Defaults ot false."), + SD_VARLINK_FIELD_COMMENT("Whether to make the image read-only after downloading. Defaults to false."), SD_VARLINK_DEFINE_INPUT(readOnly, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("Whether to keep a pristine copy of the download separate from the locally installed image. Defaults to false."), SD_VARLINK_DEFINE_INPUT(keepDownload, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), diff --git a/src/systemctl/systemctl-util.c b/src/systemctl/systemctl-util.c index b278f784ba3ec..8e8d7181121e3 100644 --- a/src/systemctl/systemctl-util.c +++ b/src/systemctl/systemctl-util.c @@ -508,7 +508,7 @@ int unit_find_paths( * Finds where the unit is defined on disk. Returns 0 if the unit is not found. Returns 1 if it is * found, and sets: * - * - the path to the unit in *ret_frament_path, if it exists on disk, + * - the path to the unit in *ret_fragment_path, if it exists on disk, * * - and a strv of existing drop-ins in *ret_dropin_paths, if the arg is not NULL and any dropins * were found. diff --git a/test/test-network/systemd-networkd-tests.py b/test/test-network/systemd-networkd-tests.py index 0db4830848535..6917c2f697b72 100755 --- a/test/test-network/systemd-networkd-tests.py +++ b/test/test-network/systemd-networkd-tests.py @@ -9332,7 +9332,7 @@ def get_dhcp_6rd_prefix(link): # ipv4masklen: 8 # 6rd-prefix: 2001:db8::/32 - # br-addresss: 10.0.0.1 + # br-address: 10.0.0.1 start_dnsmasq('--dhcp-option=212,08:20:20:01:0d:b8:00:00:00:00:00:00:00:00:00:00:00:00:0a:00:00:01', ipv4_range='10.100.100.100,10.100.100.200', diff --git a/test/units/TEST-36-NUMAPOLICY.sh b/test/units/TEST-36-NUMAPOLICY.sh index 71fcdb4524870..1e2d7ac8ac133 100755 --- a/test/units/TEST-36-NUMAPOLICY.sh +++ b/test/units/TEST-36-NUMAPOLICY.sh @@ -34,7 +34,7 @@ testUnitNUMAConf="$testUnitFile.d/numa.conf" sleepAfterStart=3 # Journal cursor for easier navigation -journalCursorFile="jounalCursorFile" +journalCursorFile="journalCursorFile" startStrace() { coproc strace -qq -p 1 -o "$straceLog" -e set_mempolicy -s 1024 ${1:+"$1"} diff --git a/test/units/TEST-53-TIMER.RandomizedDelaySec-persistent.sh b/test/units/TEST-53-TIMER.RandomizedDelaySec-persistent.sh index af22daecc7668..e3005fe0f5df3 100755 --- a/test/units/TEST-53-TIMER.RandomizedDelaySec-persistent.sh +++ b/test/units/TEST-53-TIMER.RandomizedDelaySec-persistent.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # SPDX-License-Identifier: LGPL-2.1-or-later # -# Persistent timers (i.e. timers with Persitent=yes) save their last trigger timestamp to a persistent +# Persistent timers (i.e. timers with Persistent=yes) save their last trigger timestamp to a persistent # storage (a stamp file), which is loaded during subsequent boots. As mentioned in the man page, such timers # should be still affected by RandomizedDelaySec= during boot even if they already elapsed and would be then # triggered immediately. diff --git a/test/units/TEST-87-AUX-UTILS-VM.pstore.sh b/test/units/TEST-87-AUX-UTILS-VM.pstore.sh index be5297fa52da9..d05c99b15076c 100755 --- a/test/units/TEST-87-AUX-UTILS-VM.pstore.sh +++ b/test/units/TEST-87-AUX-UTILS-VM.pstore.sh @@ -44,7 +44,7 @@ random_efi_timestamp() { printf "%0.10d" "$((1000000000 + RANDOM))"; } # The dmesg- filename contains the backend-type and the Common Platform Error Record, CPER, # record id, a 64-bit number. # -# Files are processed in reverse lexigraphical order so as to properly reconstruct original dmesg. +# Files are processed in reverse lexicographical order so as to properly reconstruct original dmesg. prepare_efi_logs() { local file="${1:?}" From 5afd344438833535b541beb8065b4ce98fc73d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 15:26:47 +0200 Subject: [PATCH 135/242] bootctl,mute-console,pcrextend,pcrlock,repart: allow connections from self MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With SD_VARLINK_SERVER_ROOT_ONLY, we refuse all unprivileged operations. This is silly, the user can and should be able to do anything that doesn't require privileges. E.g.: $ SYSTEMD_LOG_LEVEL=debug varlinkctl introspect /usr/lib/systemd/systemd-pcrextend Forking off Varlink child process '/usr/lib/systemd/systemd-pcrextend'. Successfully forked off '(sd-vlexec)' as PID 568993. varlink: Setting state idle-client json-stream: Sending message: {"method":"org.varlink.service.GetInterfaceDescription","parameters":{"interface":"io.systemd.PCRExtend"}} Skipping PR_SET_MM, as we don't have privileges. varlink: Changing state idle-client → calling varlink: Unprivileged client attempted connection, refusing. Failed to run Varlink event loop: Operation not permitted json-stream: Got POLLHUP from socket. varlink: Changing state calling → pending-disconnect varlink: Connection was closed. Failed to issue org.varlink.service.GetInterfaceDescription() varlink call: Connection reset by peer This and similar commands now work, e.g. $ SYSTEMD_LOG_LEVEL=debug varlinkctl call --more ./build/bootctl io.systemd.BootControl.ListBootEntries {} ... Failed to open directory "/efi": No such file or directory File system "/boot" is not a FAT EFI System Partition (ESP) file system. ... Method call failed: Permission denied { "origin" : "linux", "errno" : 13, "errnoName" : "EACCES" } Which is fine — we lack privileges to actually return a useful answer, but the call itself should go through. I didn't touch udevd, which refuses to run if it is not root, and does a lot of privileged setup, so would refuse to start even if the check was removed. --- src/bootctl/bootctl.c | 4 +++- src/mute-console/mute-console.c | 5 +++-- src/pcrextend/pcrextend.c | 4 +++- src/pcrlock/pcrlock.c | 4 +++- src/repart/repart.c | 7 +++---- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/bootctl/bootctl.c b/src/bootctl/bootctl.c index c627a7dd077d5..881bdfb60ffe0 100644 --- a/src/bootctl/bootctl.c +++ b/src/bootctl/bootctl.c @@ -817,7 +817,9 @@ static int vl_server(void) { r = varlink_server_new( &varlink_server, - SD_VARLINK_SERVER_ROOT_ONLY|SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT, + SD_VARLINK_SERVER_ROOT_ONLY | + SD_VARLINK_SERVER_MYSELF_ONLY | + SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT, /* userdata= */ NULL); if (r < 0) return log_error_errno(r, "Failed to allocate Varlink server: %m"); diff --git a/src/mute-console/mute-console.c b/src/mute-console/mute-console.c index d5788de09b3b9..b64c66e14f07f 100644 --- a/src/mute-console/mute-console.c +++ b/src/mute-console/mute-console.c @@ -308,8 +308,9 @@ static int vl_server(void) { r = varlink_server_new( &varlink_server, - SD_VARLINK_SERVER_ROOT_ONLY| - SD_VARLINK_SERVER_HANDLE_SIGINT| + SD_VARLINK_SERVER_ROOT_ONLY | + SD_VARLINK_SERVER_MYSELF_ONLY | + SD_VARLINK_SERVER_HANDLE_SIGINT | SD_VARLINK_SERVER_HANDLE_SIGTERM, /* userdata= */ NULL); if (r < 0) diff --git a/src/pcrextend/pcrextend.c b/src/pcrextend/pcrextend.c index f452363209d66..278b3b730c6a4 100644 --- a/src/pcrextend/pcrextend.c +++ b/src/pcrextend/pcrextend.c @@ -427,7 +427,9 @@ static int vl_server(void) { _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *varlink_server = NULL; int r; - r = varlink_server_new(&varlink_server, SD_VARLINK_SERVER_ROOT_ONLY, /* userdata= */ NULL); + r = varlink_server_new(&varlink_server, + SD_VARLINK_SERVER_ROOT_ONLY | SD_VARLINK_SERVER_MYSELF_ONLY, + /* userdata= */ NULL); if (r < 0) return log_error_errno(r, "Failed to allocate Varlink server: %m"); diff --git a/src/pcrlock/pcrlock.c b/src/pcrlock/pcrlock.c index 63af144fe0327..4ebe3f995bdd8 100644 --- a/src/pcrlock/pcrlock.c +++ b/src/pcrlock/pcrlock.c @@ -5480,7 +5480,9 @@ static int run(int argc, char *argv[]) { /* Invocation as Varlink service */ - r = varlink_server_new(&varlink_server, SD_VARLINK_SERVER_ROOT_ONLY, NULL); + r = varlink_server_new(&varlink_server, + SD_VARLINK_SERVER_ROOT_ONLY | SD_VARLINK_SERVER_MYSELF_ONLY, + /* userdata= */ NULL); if (r < 0) return log_error_errno(r, "Failed to allocate Varlink server: %m"); diff --git a/src/repart/repart.c b/src/repart/repart.c index ad19f0ab1ec7a..84aaf60b5f790 100644 --- a/src/repart/repart.c +++ b/src/repart/repart.c @@ -11094,10 +11094,9 @@ static int vl_server(void) { /* Invocation as Varlink service */ - r = varlink_server_new( - &varlink_server, - SD_VARLINK_SERVER_ROOT_ONLY, - /* userdata= */ NULL); + r = varlink_server_new(&varlink_server, + SD_VARLINK_SERVER_ROOT_ONLY | SD_VARLINK_SERVER_MYSELF_ONLY, + /* userdata= */ NULL); if (r < 0) return log_error_errno(r, "Failed to allocate Varlink server: %m"); From 63eb3cf57b67ca8abc22e61d9dda1f572f4f7562 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 5 May 2026 17:24:48 +0200 Subject: [PATCH 136/242] update TODO --- TODO.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/TODO.md b/TODO.md index b785ac6178cdb..61114235f3290 100644 --- a/TODO.md +++ b/TODO.md @@ -132,6 +132,10 @@ SPDX-License-Identifier: LGPL-2.1-or-later systems for SBC. Should be doing what sysinstall does with the credentials, and maybe even *be* sysinstall. +- make sure we always pass O_NOFOLLOW on O_CREAT + +- xopenat(): maybe imply O_NOFOLLOW on O_CREAT + - StorageProvider interface + storagectl - hook-up in systemd-nspawn - hook-up in systemd-vmspawn From 4de3f59774b7afeaa6a70d6f072aeb5e5e1222f0 Mon Sep 17 00:00:00 2001 From: Simran Singh Date: Sun, 3 May 2026 00:22:10 +0530 Subject: [PATCH 137/242] man: EnvironmentFile= honors %h, not \$HOME --- man/systemd.exec.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 809cc285fdce1..6524ba631a7cc 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -3294,6 +3294,10 @@ SystemCallErrorNumber=EPERM If the empty string is assigned to this option, the list of files to read is reset, all prior assignments have no effect. + Note that shell variables such as $HOME are not expanded in this path. + Use %-specifiers instead; for example, %h expands to the + user's home directory. + The files listed with this directive will be read shortly before the process is executed (more specifically, after all processes from a previous unit state terminated. This means you can generate these files in one unit state, and read it with this option in the next. The files are read from the file From c0aa351ba966bb89bab11673cae72ccba2e16bc4 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 5 May 2026 15:33:49 +0100 Subject: [PATCH 138/242] test-oomd: fix flakiness under sanitizers The test asserts that pgscan is 0, but under sanitizers this sometimes fails and shows up as 1. We cannot control what the kernel scans, and with sanitizers the runtime can be slow enough it's possible that the kernel does a pass on the cgroup of the unit test. Instead of asserting that it's 0, assert that it's between 0 and 9, which seems a reasonable range. Fixes https://github.com/systemd/systemd/issues/37710 --- src/oom/test-oomd-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c index 7332f532a8aaa..c78315c56b6b0 100644 --- a/src/oom/test-oomd-util.c +++ b/src/oom/test-oomd-util.c @@ -291,7 +291,7 @@ TEST(oomd_cgroup_context_acquire_and_insert) { ASSERT_EQ(ctx->memory_low, 0u); ASSERT_EQ(ctx->swap_usage, 0u); ASSERT_EQ(ctx->last_pgscan, 0u); - ASSERT_EQ(ctx->pgscan, 0u); + ASSERT_LT(ctx->pgscan, 10u); ASSERT_NULL(ctx = oomd_cgroup_context_unref(ctx)); ASSERT_OK(oomd_cgroup_context_acquire("", &ctx)); From fc05165fce386c8cf991f18abecc5098e7261b6f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 5 May 2026 10:45:14 +0200 Subject: [PATCH 139/242] terminal-util: when prompting for a choice from a list, preselect longest prefix If all entries of a menu prompt start with the same prefix, let's preselect the prefix to enhance user experience. This is particularly relevant when prompting for a disk to install things on, as typically they all start with the same prefix /dev/, and if there's only a single target medium discoverable, then we can even fill it out fully. --- src/basic/terminal-util.c | 25 +++++++++++++++++++++---- src/basic/terminal-util.h | 8 +++++++- src/home/homectl.c | 21 +++++++++++++-------- src/shared/prompt-util.c | 30 ++++++++++++++++++++++-------- 4 files changed, 63 insertions(+), 21 deletions(-) diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c index e5e66a1864777..d241f5e7f8998 100644 --- a/src/basic/terminal-util.c +++ b/src/basic/terminal-util.c @@ -293,17 +293,33 @@ int ask_string_full( assert(ret); assert(text); + _cleanup_free_ char *string = NULL; + size_t n = 0; + + if (get_completions) { + /* Figure out what string to preselect the query with */ + _cleanup_strv_free_ char **completions = NULL; + r = get_completions("", GET_COMPLETIONS_PRESELECT, &completions, userdata); + if (r < 0) + return r; + + CompletionResult cr = pick_completion(string, completions, &string); + if (cr < 0) + return cr; + + n = strlen_ptr(string); + } + /* Output the prompt */ fputs(ansi_highlight(), stdout); va_start(ap, text); vprintf(text, ap); va_end(ap); fputs(ansi_normal(), stdout); + if (string) + fputs(string, stdout); fflush(stdout); - _cleanup_free_ char *string = NULL; - size_t n = 0; - /* Do interactive logic only if stdin + stdout are connected to the same place. And yes, we could use * STDIN_FILENO and STDOUT_FILENO here, but let's be overly correct for once, after all libc allows * swapping out stdin/stdout. */ @@ -344,7 +360,7 @@ int ask_string_full( _cleanup_strv_free_ char **completions = NULL; if (get_completions) { - r = get_completions(string, &completions, userdata); + r = get_completions(string, /* flags= */ 0, &completions, userdata); if (r < 0) return r; } @@ -450,6 +466,7 @@ int ask_string_full( fallback: /* A simple fallback without TTY magic */ + string = mfree(string); r = read_line(stdin, LONG_LINE_MAX, &string); if (r < 0) return r; diff --git a/src/basic/terminal-util.h b/src/basic/terminal-util.h index 7ac5661104159..abf999e6d5562 100644 --- a/src/basic/terminal-util.h +++ b/src/basic/terminal-util.h @@ -88,7 +88,13 @@ int chvt(int vt); int read_one_char(FILE *f, char *ret, usec_t timeout, bool echo, bool *need_nl); int ask_char(char *ret, const char *replies, const char *fmt, ...) _printf_(3, 4); -typedef int (*GetCompletionsCallback)(const char *key, char ***ret_list, void *userdata); +typedef enum GetCompletionsFlags { + /* Only return the items subject to preselection: typically you want to suppress meta entries such as + * "list" or alias entries if this flag is set. */ + GET_COMPLETIONS_PRESELECT = 1 << 0, +} GetCompletionsFlags; + +typedef int (*GetCompletionsCallback)(const char *key, GetCompletionsFlags flags, char ***ret_list, void *userdata); int ask_string_full(char **ret, GetCompletionsCallback get_completions, void *userdata, const char *text, ...) _printf_(4, 5); #define ask_string(ret, text, ...) ask_string_full(ret, NULL, NULL, text, ##__VA_ARGS__) diff --git a/src/home/homectl.c b/src/home/homectl.c index 271e03587502b..37714aa2b0185 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -2697,7 +2697,7 @@ static int acquire_group_list(char ***ret) { return !!*ret; } -static int group_completion_callback(const char *key, char ***ret_list, void *userdata) { +static int group_completion_callback(const char *key, GetCompletionsFlags flags, char ***ret_list, void *userdata) { char ***available = userdata; int r; @@ -2711,9 +2711,11 @@ static int group_completion_callback(const char *key, char ***ret_list, void *us if (!l) return -ENOMEM; - r = strv_extend(&l, "list"); - if (r < 0) - return r; + if (!FLAGS_SET(flags, GET_COMPLETIONS_PRESELECT)) { + r = strv_extend(&l, "list"); + if (r < 0) + return r; + } *ret_list = TAKE_PTR(l); return 0; @@ -2745,10 +2747,13 @@ static int prompt_groups(const char *username, char ***ret_groups) { } _cleanup_free_ char *s = NULL; - r = ask_string_full(&s, - group_completion_callback, &available, - "%s Please enter an auxiliary group for user %s (empty to continue, \"list\" to list available groups): ", - glyph(GLYPH_LABEL), username); + r = ask_string_full( + &s, + group_completion_callback, + &available, + "%s Please enter an auxiliary group for user %s (empty to continue, \"list\" to list available groups): ", + glyph(GLYPH_LABEL), + username); if (r < 0) return log_error_errno(r, "Failed to query user for auxiliary group: %m"); diff --git a/src/shared/prompt-util.c b/src/shared/prompt-util.c index 9811cb1527873..7cead706fd95f 100644 --- a/src/shared/prompt-util.c +++ b/src/shared/prompt-util.c @@ -16,27 +16,41 @@ #include "strv.h" #include "terminal-util.h" +typedef struct CompletionData { + char **menu; /* What to show in menu */ + char **accepted; /* What to accept (usually larger than the menu, but may be NULL if same) */ +} CompletionData; + static int get_completions( const char *key, + GetCompletionsFlags flags, char ***ret_list, void *userdata) { + CompletionData *data = ASSERT_PTR(userdata); int r; assert(ret_list); - if (!userdata) { + /* Figure out the list to operate on. We'll generally work based on the "accepted" list, if it is + * set. If not we'll operate with the full menu. When doing pre-selection we'll also pick the menu */ + char **l = data->accepted && !FLAGS_SET(flags, GET_COMPLETIONS_PRESELECT) ? data->accepted : data->menu; + + if (strv_isempty(l)) { *ret_list = NULL; return 0; } - _cleanup_strv_free_ char **copy = strv_copy(userdata); + _cleanup_strv_free_ char **copy = strv_copy(l); if (!copy) return -ENOMEM; - r = strv_extend(©, "list"); - if (r < 0) - return r; + /* Never consider "list" for preselecting an item, but do consider it when doing a regular completion */ + if (!FLAGS_SET(flags, GET_COMPLETIONS_PRESELECT)) { + r = strv_extend(©, "list"); + if (r < 0) + return r; + } *ret_list = TAKE_PTR(copy); return 0; @@ -45,8 +59,8 @@ static int get_completions( int prompt_loop( const char *text, Glyph emoji, - char **menu, /* if non-NULL: choices to suggest */ - char **accepted, /* if non-NULL: choices to accept (should be a superset of 'menu') */ + char **menu, /* if non-NULL: choices to suggest */ + char **accepted, /* if non-NULL: choices to accept (should be a superset of 'menu') */ unsigned ellipsize_percentage, size_t n_columns, size_t column_width, @@ -102,7 +116,7 @@ int prompt_loop( r = ask_string_full( &p, get_completions, - accepted ?: menu, + &(CompletionData) { menu, accepted }, "%s%s%s%s: ", emoji >= 0 ? glyph(emoji) : "", emoji >= 0 ? " " : "", From afa3eb821d4442d3b9d9e693be5322ffbdf594ad Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 5 May 2026 16:21:16 +0100 Subject: [PATCH 140/242] test: drop more 'grep -q' instances Follow-up for ee6b3d1aa2329cddb5867bbc86a4b62983ee56fe --- test/units/TEST-04-JOURNAL.journalctl-varlink.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/units/TEST-04-JOURNAL.journalctl-varlink.sh b/test/units/TEST-04-JOURNAL.journalctl-varlink.sh index 2d7b19990c815..16cce6f0d4718 100755 --- a/test/units/TEST-04-JOURNAL.journalctl-varlink.sh +++ b/test/units/TEST-04-JOURNAL.journalctl-varlink.sh @@ -38,14 +38,14 @@ systemd-run --unit="$UNIT_NAME_2" --wait bash -c 'echo hello-from-varlink-test-2 journalctl --sync # single unit filter -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\"]}" | grep -q "hello-from-varlink-test-1" +varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\"]}" | grep "hello-from-varlink-test-1" >/dev/null (! varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\"]}" | grep "hello-from-varlink-test-2") # multi unit filter -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}" | grep -q "hello-from-varlink-test-1" -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}" | grep -q "hello-from-varlink-test-2" +varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}" | grep "hello-from-varlink-test-1" >/dev/null +varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}" | grep "hello-from-varlink-test-2" >/dev/null # check priority filter: priority 4 (warning) should include our warning message -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{"priority": 4, "limit": 1000}' | grep -q "varlink-test-warning" +varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{"priority": 4, "limit": 1000}' | grep "varlink-test-warning" >/dev/null # check priority filter: priority 3 (error) should NOT include our warning (priority 4) (! varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{"priority": 3, "limit": 1000}' | grep "varlink-test-warning") From 88cc10e507f637fd073e13cda5d9177e36761159 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 5 May 2026 16:50:40 +0100 Subject: [PATCH 141/242] test: reduce number of identical io.systemd.JournalAccess.GetEntries calls This test is sometimes flaky under sanitizers, and it does repeated calls with the same parameters to run through different greps, and the second one sometimes fails. Store the result and grep it twice instead to try and reduce flakiness. [ 2089.891152] TEST-04-JOURNAL.sh[22392]: + systemd-run --unit=test-journalctl-varlink-2-18237.service --wait bash -c 'echo hello-from-varlink-test-2' [ 2090.066050] TEST-04-JOURNAL.sh[22460]: + varlinkctl call --more /run/systemd/io.systemd.JournalAccess io.systemd.JournalAccess.GetEntries '{"units": ["test-journalctl-varlink-1-22690.service"]}' [ 2090.067075] TEST-04-JOURNAL.sh[22461]: + grep -q hello-from-varlink-test-1 [ 2090.384551] TEST-04-JOURNAL.sh[22466]: + varlinkctl call --more /run/systemd/io.systemd.JournalAccess io.systemd.JournalAccess.GetEntries '{"units": ["test-journalctl-varlink-1-22690.service"]}' [ 2090.385373] TEST-04-JOURNAL.sh[22467]: + grep hello-from-varlink-test-2 [ 2090.723461] TEST-04-JOURNAL.sh[22474]: + grep -q hello-from-varlink-test-1 [ 2090.724294] TEST-04-JOURNAL.sh[22473]: + varlinkctl call --more /run/systemd/io.systemd.JournalAccess io.systemd.JournalAccess.GetEntries '{"units": ["test-journalctl-varlink-1-22690.service", "test-journalctl-varlink-2-18237.service"]}' [ 2091.135655] TEST-04-JOURNAL.sh[22480]: + varlinkctl call --more /run/systemd/io.systemd.JournalAccess io.systemd.JournalAccess.GetEntries '{"units": ["test-journalctl-varlink-1-22690.service", "test-journalctl-varlink-2-18237.service"]}' [ 2091.136605] TEST-04-JOURNAL.sh[22481]: + grep -q hello-from-varlink-test-2 [ 2091.479930] TEST-04-JOURNAL.sh[22480]: Method call failed: io.systemd.JournalAccess.NoEntries --- test/units/TEST-04-JOURNAL.journalctl-varlink.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/units/TEST-04-JOURNAL.journalctl-varlink.sh b/test/units/TEST-04-JOURNAL.journalctl-varlink.sh index 16cce6f0d4718..4f86fa2a541ff 100755 --- a/test/units/TEST-04-JOURNAL.journalctl-varlink.sh +++ b/test/units/TEST-04-JOURNAL.journalctl-varlink.sh @@ -38,11 +38,13 @@ systemd-run --unit="$UNIT_NAME_2" --wait bash -c 'echo hello-from-varlink-test-2 journalctl --sync # single unit filter -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\"]}" | grep "hello-from-varlink-test-1" >/dev/null -(! varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\"]}" | grep "hello-from-varlink-test-2") +SINGLE_OUTPUT="$(varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\"]}")" +grep "hello-from-varlink-test-1" >/dev/null <<<"$SINGLE_OUTPUT" +(! grep "hello-from-varlink-test-2" >/dev/null <<<"$SINGLE_OUTPUT") # multi unit filter -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}" | grep "hello-from-varlink-test-1" >/dev/null -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}" | grep "hello-from-varlink-test-2" >/dev/null +MULTI_OUTPUT="$(varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}")" +grep "hello-from-varlink-test-1" >/dev/null <<<"$MULTI_OUTPUT" +grep "hello-from-varlink-test-2" >/dev/null <<<"$MULTI_OUTPUT" # check priority filter: priority 4 (warning) should include our warning message varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{"priority": 4, "limit": 1000}' | grep "varlink-test-warning" >/dev/null From a2186070b79b2dcce332465696ad31241f27cf11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 28 Apr 2026 23:55:48 +0200 Subject: [PATCH 142/242] report: upload reports using a "varlink socket directory" Two new verbs are added: "generate" and "upload". The first one just creates a "report", i.e. puts the metrics into a structured JSON object that in the future is intended to carry additional data like a signature: $ build/systemd-report generate io.systemd.Manager.UnitsTotal { "mediaType" : "application/vnd.io.systemd.report", "timestamp" : "Tue 2026-04-28 22:30:09 UTC", "metrics" : [ { "name" : "io.systemd.Manager.UnitsTotal", "value" : 520 } ] } The second verb can be used to upload or otherwise process the report. It builds on the code added in 0a8560eed873a5f89487630a19db550fdbee3c15. In /run/systemd/metrics-upload/ we expect a set of sockets. We'll call out to each one of them. This allows the data to be processed in custom ways, incl. writing to storage or sending over the network. Each socket must provide a single interface: io.systemd.Metrics.Upload {"report":$data} --- man/systemd-report.xml | 25 ++++++ src/report/report-upload.c | 105 +++++++++++++++++++++---- src/report/report.c | 30 +++---- src/report/report.h | 9 ++- test/units/TEST-74-AUX-UTILS.report.sh | 8 +- 5 files changed, 145 insertions(+), 32 deletions(-) diff --git a/man/systemd-report.xml b/man/systemd-report.xml index 0974244a8f563..560d9406240f0 100644 --- a/man/systemd-report.xml +++ b/man/systemd-report.xml @@ -71,6 +71,31 @@ + + generate MATCH + + Acquire a list of metrics and build a JSON report. + + Match expressions supported by metrics are supported here too. + + + + + + upload MATCH + + This command can be used to send the report built by generate + to an external server. Two upload mechanisms are supported. If an http:// or + https:// URL is specified with , an HTTP upload will be + performed to the specified location. Otherwise, any sockets under + /run/systemd/metrics-upload/ will be used to call + io.systemd.Report.Upload(). + + Match expressions supported by metrics are supported here too. + + + + list-sources diff --git a/src/report/report-upload.c b/src/report/report-upload.c index 486e815e8d857..fce0c0e551396 100644 --- a/src/report/report-upload.c +++ b/src/report/report-upload.c @@ -2,12 +2,15 @@ #include "sd-json.h" +#include "alloc-util.h" +#include "errno-util.h" #include "log.h" #include "report.h" #include "string-util.h" #include "strv.h" #include "time-util.h" #include "utf8.h" +#include "varlink-util.h" #include "version.h" #if HAVE_LIBCURL @@ -49,6 +52,7 @@ static size_t output_callback(char *buf, return nmemb; } +#endif static int build_json_report(Context *context, sd_json_variant **ret) { /* Convert the variant array to a JSON report. */ @@ -60,6 +64,7 @@ static int build_json_report(Context *context, sd_json_variant **ret) { int r; r = sd_json_buildo(ret, + SD_JSON_BUILD_PAIR_STRING("mediaType", "application/vnd.io.systemd.report"), SD_JSON_BUILD_PAIR("timestamp", SD_JSON_BUILD_STRING(FORMAT_TIMESTAMP_STYLE(ts, TIMESTAMP_UTC))), SD_JSON_BUILD_PAIR("metrics", @@ -68,9 +73,8 @@ static int build_json_report(Context *context, sd_json_variant **ret) { return log_error_errno(r, "Failed to build JSON data: %m"); return 0; } -#endif -int upload_collected(Context *context) { +static int http_upload_collected(Context *context, sd_json_variant *report) { #if HAVE_LIBCURL _cleanup_(curl_slist_free_allp) struct curl_slist *header = NULL; char error[CURL_ERROR_SIZE] = {}; @@ -81,19 +85,11 @@ int upload_collected(Context *context) { if (r < 0) return r; - { - /* Convert our variant array to a JSON report. - * We won't need the JSON structure again, so free it quickly. */ - - _cleanup_(sd_json_variant_unrefp) sd_json_variant *vl = NULL; - r = build_json_report(context, &vl); - if (r < 0) - return r; + /* Upload a JSON report in text form as a single JSON object, instead of a JSON-SEQ list. */ - r = sd_json_variant_format(vl, /* flags= */ 0, &json); - if (r < 0) - return log_error_errno(r, "Failed to format JSON data: %m"); - } + r = sd_json_variant_format(report, /* flags= */ 0, &json); + if (r < 0) + return log_error_errno(r, "Failed to format JSON data: %m"); r = curl_append_to_header(&header, STRV_MAKE("Content-Type: application/json", @@ -206,3 +202,84 @@ int upload_collected(Context *context) { "Compiled without libcurl."); #endif } + +static int execute_dir_reply( + sd_varlink *link, + sd_json_variant *reply, + const char *error_id, + sd_varlink_reply_flags_t flags, + void *userdata) { + + assert(link); + + Context *context = ASSERT_PTR(userdata); + int r; + + if (error_id) { + r = sd_varlink_error_to_errno(error_id, reply); + RET_GATHER(context->upload_result, r); + return log_error_errno(r, "Upload via Varlink failed: %s", error_id); + } + + printf("Upload via Varlink was successful; reply: "); + // TODO: once we know what we want to put in the reply, replace the JSON dump by + // some formatted output. + r = sd_json_variant_dump(reply, arg_json_format_flags, stderr, /* prefix= */ ">>> "); + if (r < 0) + return log_error_errno(r, "Failed to dump json object: %m"); + + return 0; +} + +static int upload_collected(Context *context, sd_json_variant *report) { + int r; + + if (arg_url) + return http_upload_collected(context, report); + + _cleanup_(sd_json_variant_unrefp) sd_json_variant *params = NULL; + r = sd_json_buildo(¶ms, + SD_JSON_BUILD_PAIR_VARIANT("report", report)); + if (r < 0) + return log_error_errno(r, "Failed to build JSON data: %m"); + + ssize_t jobs = varlink_execute_directory( + REPORT_UPLOAD_DIR, + "io.systemd.Report.Upload", + params, + /* more= */ false, + arg_network_timeout_usec, + execute_dir_reply, + /* userdata= */ context); + if (jobs < 0) + return log_error_errno(jobs, "Failed to execute upload via %s: %m", REPORT_UPLOAD_DIR); + if (jobs == 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOPKG), + "No upload mechanism found via %s.", REPORT_UPLOAD_DIR); + if (context->upload_result < 0) + /* The details were printed at error level by execute_dir_reply above. */ + return log_debug_errno(context->upload_result, "Upload via %s failed: %m", REPORT_UPLOAD_DIR); + + log_debug("Upload via %s finished successfully.", REPORT_UPLOAD_DIR); + return 0; +} + +/* Make a structured report and either print it or upload it. */ +int report_collected(Context *context) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *report = NULL; + int r; + + r = build_json_report(context, &report); + if (r < 0) + return r; + + if (context->action == ACTION_UPLOAD) + return upload_collected(context, report); + + /* Just print the report for now. */ + assert(context->action == ACTION_GENERATE); + r = sd_json_variant_dump(report, arg_json_format_flags, /* f= */ NULL, /* prefix= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to dump json object: %m"); + return 0; +} diff --git a/src/report/report.c b/src/report/report.c index feedcaa43a5b8..c23417afb5b29 100644 --- a/src/report/report.c +++ b/src/report/report.c @@ -20,7 +20,6 @@ #include "runtime-scope.h" #include "set.h" #include "sort-util.h" -#include "string-table.h" #include "string-util.h" #include "strv.h" #include "time-util.h" @@ -35,8 +34,8 @@ static PagerFlags arg_pager_flags = 0; static bool arg_legend = true; static RuntimeScope arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; -static sd_json_format_flags_t arg_json_format_flags = SD_JSON_FORMAT_OFF|SD_JSON_FORMAT_PRETTY_AUTO|SD_JSON_FORMAT_COLOR_AUTO; static char **arg_matches = NULL; +sd_json_format_flags_t arg_json_format_flags = SD_JSON_FORMAT_OFF|SD_JSON_FORMAT_PRETTY_AUTO|SD_JSON_FORMAT_COLOR_AUTO; char *arg_url = NULL; char *arg_key = NULL; char *arg_cert = NULL; @@ -84,13 +83,6 @@ DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( void, trivial_hash_func, trivial_compare_func, LinkInfo, link_info_free); -static const char* const action_method_table[] = { - [ACTION_LIST_METRICS] = "io.systemd.Metrics.List", - [ACTION_DESCRIBE_METRICS] = "io.systemd.Metrics.Describe", -}; - -DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(action_method, Action); - static int metric_compare(sd_json_variant *const *a, sd_json_variant *const *b) { const char *name_a, *name_b, *object_a, *object_b; sd_json_variant *fields_a, *fields_b; @@ -300,7 +292,9 @@ static int call_collect(Context *context, const char *name, const char *path) { if (r < 0) return log_error_errno(r, "Failed to bind reply callback: %m"); - const char *method = ASSERT_PTR(action_method_to_string(context->action)); + const char *method = context->action == ACTION_DESCRIBE_METRICS ? + "io.systemd.Metrics.Describe" : + "io.systemd.Metrics.List"; /* This is the method for all other actions. */ r = sd_varlink_observe(vl, method, /* parameters= */ NULL); if (r < 0) @@ -591,16 +585,22 @@ VERB_FULL(verb_metrics, "metrics", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_L "Acquire list of metrics and their values"); VERB_FULL(verb_metrics, "describe", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_DESCRIBE_METRICS, "Describe available metrics"); +VERB_FULL(verb_metrics, "generate", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_GENERATE, + "Build a report with metrics"); +VERB_FULL(verb_metrics, "upload", "[MATCH…]", VERB_ANY, VERB_ANY, 0, ACTION_UPLOAD, + "Upload a report with metrics"); static int verb_metrics(int argc, char *argv[], uintptr_t data, void *userdata) { Action action = data; int r; assert(argc >= 1); assert(argv); - assert(IN_SET(action, ACTION_LIST_METRICS, ACTION_DESCRIBE_METRICS)); + assert(IN_SET(action, ACTION_LIST_METRICS, ACTION_DESCRIBE_METRICS, ACTION_GENERATE, ACTION_UPLOAD)); - /* Enable JSON-SEQ mode here, since we'll dump a large series of JSON objects */ - arg_json_format_flags |= SD_JSON_FORMAT_SEQ; + if (IN_SET(action, ACTION_LIST_METRICS, ACTION_DESCRIBE_METRICS)) + /* Enable JSON-SEQ mode for the first two verbs, since we'll dump a large series of JSON + * objects. In the report format, we return a single JSON object, so don't do this. */ + arg_json_format_flags |= SD_JSON_FORMAT_SEQ; r = parse_metrics_matches(argv + 1); if (r < 0) @@ -651,8 +651,8 @@ static int verb_metrics(int argc, char *argv[], uintptr_t data, void *userdata) if (r < 0) return log_error_errno(r, "Failed to run event loop: %m"); - if (arg_url) - r = upload_collected(&context); + if (IN_SET(action, ACTION_GENERATE, ACTION_UPLOAD)) + r = report_collected(&context); else r = output_collected(&context); if (r < 0) diff --git a/src/report/report.h b/src/report/report.h index 4adb20349514a..196a3daf577d5 100644 --- a/src/report/report.h +++ b/src/report/report.h @@ -9,6 +9,9 @@ #define REPORT_CERT_FILE CERTIFICATE_ROOT "/certs/systemd-report.pem" #define REPORT_TRUST_FILE CERTIFICATE_ROOT "/ca/trusted.pem" +#define REPORT_UPLOAD_DIR "/run/systemd/metrics-upload" + +extern sd_json_format_flags_t arg_json_format_flags; extern char *arg_url, *arg_key, *arg_cert, *arg_trust; extern char **arg_extra_headers; extern usec_t arg_network_timeout_usec; @@ -16,6 +19,8 @@ extern usec_t arg_network_timeout_usec; typedef enum Action { ACTION_LIST_METRICS, ACTION_DESCRIBE_METRICS, + ACTION_GENERATE, + ACTION_UPLOAD, _ACTION_MAX, _ACTION_INVALID = -EINVAL, } Action; @@ -27,7 +32,9 @@ typedef struct Context { Set *link_infos; sd_json_variant **metrics; /* Collected metrics for sorting */ size_t n_metrics, n_skipped_metrics, n_invalid_metrics; + + int upload_result; struct iovec_wrapper upload_answer; } Context; -int upload_collected(Context *context); +int report_collected(Context *context); diff --git a/test/units/TEST-74-AUX-UTILS.report.sh b/test/units/TEST-74-AUX-UTILS.report.sh index 73678fcabf1f8..e9332806b3791 100755 --- a/test/units/TEST-74-AUX-UTILS.report.sh +++ b/test/units/TEST-74-AUX-UTILS.report.sh @@ -60,7 +60,11 @@ trap at_exit EXIT systemd-run -p Type=notify --unit=fake-report-server "$FAKE_SERVER" systemctl status fake-report-server -"$REPORT" metrics --url=http://localhost:8089/ +"$REPORT" generate io.systemd.Manager.UnitsTotal + +"$REPORT" generate io.systemd.Manager.UnitsTotal | jq . + +"$REPORT" upload --url=http://localhost:8089/ # Test HTTPS upload with generated TLS certificates openssl req -x509 -newkey rsa:2048 -keyout "$CERTDIR/server.key" -out "$CERTDIR/server.crt" \ @@ -70,5 +74,5 @@ systemd-run -p Type=notify --unit=fake-report-server-tls \ "$FAKE_SERVER" --cert="$CERTDIR/server.crt" --key="$CERTDIR/server.key" --port=8090 systemctl status fake-report-server-tls -"$REPORT" metrics --url=https://localhost:8090/ --key=- --trust="$CERTDIR/server.crt" \ +"$REPORT" upload --url=https://localhost:8090/ --key=- --trust="$CERTDIR/server.crt" \ --extra-header='Authorization: Bearer magic string' From 9c336ef6eb9357de71df3163c7a511c9fb470474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 29 Apr 2026 11:42:00 +0200 Subject: [PATCH 143/242] report: set description on varlink sockets We make multiple connections and without this it's hard to know socket we're talking to. --- src/report/report.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/report/report.c b/src/report/report.c index c23417afb5b29..c0cd92042617a 100644 --- a/src/report/report.c +++ b/src/report/report.c @@ -280,6 +280,10 @@ static int call_collect(Context *context, const char *name, const char *path) { if (r < 0) return log_error_errno(r, "Unable to connect to %s: %m", path); + r = sd_varlink_set_description(vl, name); + if (r < 0) + return log_error_errno(r, "Failed to set varlink description: %m"); + r = sd_varlink_set_relative_timeout(vl, TIMEOUT_USEC); if (r < 0) return log_error_errno(r, "Failed to set varlink timeout: %m"); From ffd4e7ab0148f67c685f18c585ac220fa562079b Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Wed, 29 Apr 2026 17:52:50 +0200 Subject: [PATCH 144/242] report: when a report fails, print the json error details When a report upload fails the backend often provides useful details via the varlink error. Show them as part of the upload error message. For now we just dump the json because we have no structure that the backends should follow. We may want to consider adding one (like check for an "error_message" key in the json). But for now this is a nice step forward. --- src/report/report-upload.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/report/report-upload.c b/src/report/report-upload.c index fce0c0e551396..471bcb760600e 100644 --- a/src/report/report-upload.c +++ b/src/report/report-upload.c @@ -218,7 +218,11 @@ static int execute_dir_reply( if (error_id) { r = sd_varlink_error_to_errno(error_id, reply); RET_GATHER(context->upload_result, r); - return log_error_errno(r, "Upload via Varlink failed: %s", error_id); + log_error_errno(r, "Upload via Varlink failed: %s", error_id); + if (reply) + (void) sd_json_variant_dump(reply, arg_json_format_flags, + /* f= */ NULL, /* prefix= */ NULL); + return r; } printf("Upload via Varlink was successful; reply: "); From 628ab0040e163e650a5094e6678a56a2a6b234b8 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Tue, 5 May 2026 14:55:18 +0200 Subject: [PATCH 145/242] report: fold io.systemd.Basic facts into metrics We removed the concept of facts, so we need to update the existing io.systemd.Basic facts provider to metrics. This commit does just that. Its mostly mechanical. This also means that facts.{c,h} and varlink-io.systemd.Facts.{c,h} are gone now. --- src/report/report-basic-server.c | 13 ++- src/report/report-basic.c | 74 +++++++----- src/report/report-basic.h | 6 +- src/shared/facts.c | 151 ------------------------- src/shared/facts.h | 31 ----- src/shared/meson.build | 2 - src/shared/varlink-io.systemd.Facts.c | 37 ------ src/shared/varlink-io.systemd.Facts.h | 6 - test/units/TEST-74-AUX-UTILS.report.sh | 6 +- units/systemd-report-basic.socket | 2 +- units/systemd-report-basic@.service.in | 2 +- 11 files changed, 63 insertions(+), 267 deletions(-) delete mode 100644 src/shared/facts.c delete mode 100644 src/shared/facts.h delete mode 100644 src/shared/varlink-io.systemd.Facts.c delete mode 100644 src/shared/varlink-io.systemd.Facts.h diff --git a/src/report/report-basic-server.c b/src/report/report-basic-server.c index 1e2eca31eae68..51de33efe1783 100644 --- a/src/report/report-basic-server.c +++ b/src/report/report-basic-server.c @@ -4,12 +4,12 @@ #include "ansi-color.h" #include "build.h" -#include "facts.h" #include "format-table.h" #include "log.h" #include "main-func.h" #include "options.h" #include "report-basic.h" +#include "varlink-io.systemd.Metrics.h" #include "varlink-util.h" static int vl_server(void) { @@ -20,9 +20,16 @@ static int vl_server(void) { if (r < 0) return log_error_errno(r, "Failed to allocate Varlink server: %m"); - r = facts_add_to_varlink_server(vs, vl_method_list_facts, vl_method_describe_facts); + r = sd_varlink_server_add_interface(vs, &vl_interface_io_systemd_Metrics); if (r < 0) - return log_error_errno(r, "Failed to register Facts varlink interface: %m"); + return log_error_errno(r, "Failed to add Varlink interface: %m"); + + r = sd_varlink_server_bind_method_many( + vs, + "io.systemd.Metrics.List", vl_method_list_metrics, + "io.systemd.Metrics.Describe", vl_method_describe_metrics); + if (r < 0) + return log_error_errno(r, "Failed to bind Varlink methods: %m"); r = sd_varlink_server_loop_auto(vs); if (r < 0) diff --git a/src/report/report-basic.c b/src/report/report-basic.c index 381262dfd4909..50a4dfaaf1301 100644 --- a/src/report/report-basic.c +++ b/src/report/report-basic.c @@ -8,21 +8,22 @@ #include "alloc-util.h" #include "architecture.h" -#include "facts.h" #include "hostname-setup.h" +#include "metrics.h" #include "report-basic.h" #include "virt.h" -static int architecture_generate(FactFamilyContext *context, void *userdata) { +static int architecture_generate(MetricFamilyContext *context, void *userdata) { assert(context); - return fact_build_send_string( + return metric_build_send_string( context, /* object= */ NULL, - architecture_to_string(uname_architecture())); + architecture_to_string(uname_architecture()), + /* fields= */ NULL); } -static int boot_id_generate(FactFamilyContext *context, void *userdata) { +static int boot_id_generate(MetricFamilyContext *context, void *userdata) { sd_id128_t id; int r; @@ -32,13 +33,14 @@ static int boot_id_generate(FactFamilyContext *context, void *userdata) { if (r < 0) return r; - return fact_build_send_string( + return metric_build_send_string( context, /* object= */ NULL, - SD_ID128_TO_STRING(id)); + SD_ID128_TO_STRING(id), + /* fields= */ NULL); } -static int hostname_generate(FactFamilyContext *context, void *userdata) { +static int hostname_generate(MetricFamilyContext *context, void *userdata) { _cleanup_free_ char *hostname = NULL; int r; @@ -48,26 +50,28 @@ static int hostname_generate(FactFamilyContext *context, void *userdata) { if (r < 0) return r; - return fact_build_send_string( + return metric_build_send_string( context, /* object= */ NULL, - hostname); + hostname, + /* fields= */ NULL); } -static int kernel_version_generate(FactFamilyContext *context, void *userdata) { +static int kernel_version_generate(MetricFamilyContext *context, void *userdata) { struct utsname u; assert(context); assert_se(uname(&u) >= 0); - return fact_build_send_string( + return metric_build_send_string( context, /* object= */ NULL, - u.release); + u.release, + /* fields= */ NULL); } -static int machine_id_generate(FactFamilyContext *context, void *userdata) { +static int machine_id_generate(MetricFamilyContext *context, void *userdata) { sd_id128_t id; int r; @@ -77,13 +81,14 @@ static int machine_id_generate(FactFamilyContext *context, void *userdata) { if (r < 0) return r; - return fact_build_send_string( + return metric_build_send_string( context, /* object= */ NULL, - SD_ID128_TO_STRING(id)); + SD_ID128_TO_STRING(id), + /* fields= */ NULL); } -static int virtualization_generate(FactFamilyContext *context, void *userdata) { +static int virtualization_generate(MetricFamilyContext *context, void *userdata) { Virtualization v; assert(context); @@ -92,51 +97,58 @@ static int virtualization_generate(FactFamilyContext *context, void *userdata) { if (v < 0) return v; - return fact_build_send_string( + return metric_build_send_string( context, /* object= */ NULL, - virtualization_to_string(v)); + virtualization_to_string(v), + /* fields= */ NULL); } -static const FactFamily fact_family_table[] = { - /* Keep facts ordered alphabetically */ +static const MetricFamily metric_family_table[] = { + /* Keep entries ordered alphabetically */ { - .name = FACT_IO_SYSTEMD_BASIC "Architecture", + .name = METRIC_IO_SYSTEMD_BASIC_PREFIX "Architecture", .description = "CPU architecture", + .type = METRIC_FAMILY_TYPE_STRING, .generate = architecture_generate, }, { - .name = FACT_IO_SYSTEMD_BASIC "BootID", + .name = METRIC_IO_SYSTEMD_BASIC_PREFIX "BootID", .description = "Current boot ID", + .type = METRIC_FAMILY_TYPE_STRING, .generate = boot_id_generate, }, { - .name = FACT_IO_SYSTEMD_BASIC "Hostname", + .name = METRIC_IO_SYSTEMD_BASIC_PREFIX "Hostname", .description = "System hostname", + .type = METRIC_FAMILY_TYPE_STRING, .generate = hostname_generate, }, { - .name = FACT_IO_SYSTEMD_BASIC "KernelVersion", + .name = METRIC_IO_SYSTEMD_BASIC_PREFIX "KernelVersion", .description = "Kernel version", + .type = METRIC_FAMILY_TYPE_STRING, .generate = kernel_version_generate, }, { - .name = FACT_IO_SYSTEMD_BASIC "MachineID", + .name = METRIC_IO_SYSTEMD_BASIC_PREFIX "MachineID", .description = "Machine ID", + .type = METRIC_FAMILY_TYPE_STRING, .generate = machine_id_generate, }, { - .name = FACT_IO_SYSTEMD_BASIC "Virtualization", + .name = METRIC_IO_SYSTEMD_BASIC_PREFIX "Virtualization", .description = "Virtualization type", + .type = METRIC_FAMILY_TYPE_STRING, .generate = virtualization_generate, }, {} }; -int vl_method_describe_facts(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { - return facts_method_describe(fact_family_table, link, parameters, flags, userdata); +int vl_method_describe_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + return metrics_method_describe(metric_family_table, link, parameters, flags, userdata); } -int vl_method_list_facts(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { - return facts_method_list(fact_family_table, link, parameters, flags, userdata); +int vl_method_list_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + return metrics_method_list(metric_family_table, link, parameters, flags, userdata); } diff --git a/src/report/report-basic.h b/src/report/report-basic.h index b24613edb62ff..8f123cb17fe3e 100644 --- a/src/report/report-basic.h +++ b/src/report/report-basic.h @@ -3,7 +3,7 @@ #include "shared-forward.h" -#define FACT_IO_SYSTEMD_BASIC "io.systemd.Basic." +#define METRIC_IO_SYSTEMD_BASIC_PREFIX "io.systemd.Basic." -int vl_method_list_facts(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); -int vl_method_describe_facts(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); +int vl_method_list_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); +int vl_method_describe_metrics(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); diff --git a/src/shared/facts.c b/src/shared/facts.c deleted file mode 100644 index fa16c7b7cb8ff..0000000000000 --- a/src/shared/facts.c +++ /dev/null @@ -1,151 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1-or-later */ - -#include "sd-varlink.h" - -#include "facts.h" -#include "json-util.h" -#include "log.h" -#include "varlink-io.systemd.Facts.h" - -int facts_add_to_varlink_server( - sd_varlink_server *server, - sd_varlink_method_t vl_method_list_cb, - sd_varlink_method_t vl_method_describe_cb) { - - int r; - - assert(server); - assert(vl_method_list_cb); - assert(vl_method_describe_cb); - - r = sd_varlink_server_add_interface(server, &vl_interface_io_systemd_Facts); - if (r < 0) - return log_debug_errno(r, "Failed to add varlink facts interface to varlink server: %m"); - - r = sd_varlink_server_bind_method_many( - server, - "io.systemd.Facts.List", vl_method_list_cb, - "io.systemd.Facts.Describe", vl_method_describe_cb); - if (r < 0) - return log_debug_errno(r, "Failed to register varlink facts methods: %m"); - - return 0; -} - -static int fact_family_build_json(const FactFamily *ff, sd_json_variant **ret) { - assert(ff); - - return sd_json_buildo( - ret, - SD_JSON_BUILD_PAIR_STRING("name", ff->name), - SD_JSON_BUILD_PAIR_STRING("description", ff->description)); -} - -int facts_method_describe( - const FactFamily fact_family_table[], - sd_varlink *link, - sd_json_variant *parameters, - sd_varlink_method_flags_t flags, - void *userdata) { - - int r; - - assert(fact_family_table); - assert(link); - assert(parameters); - assert(FLAGS_SET(flags, SD_VARLINK_METHOD_MORE)); - - r = sd_varlink_dispatch(link, parameters, /* dispatch_table= */ NULL, /* userdata= */ NULL); - if (r != 0) - return r; - - r = sd_varlink_set_sentinel(link, "io.systemd.Facts.NoSuchFact"); - if (r < 0) - return r; - - for (const FactFamily *ff = fact_family_table; ff && ff->name; ff++) { - _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - - r = fact_family_build_json(ff, &v); - if (r < 0) - return log_debug_errno(r, "Failed to describe fact family '%s': %m", ff->name); - - r = sd_varlink_reply(link, v); - if (r < 0) - return log_debug_errno(r, "Failed to send varlink reply: %m"); - } - - return 0; -} - -int facts_method_list( - const FactFamily fact_family_table[], - sd_varlink *link, - sd_json_variant *parameters, - sd_varlink_method_flags_t flags, - void *userdata) { - - int r; - - assert(fact_family_table); - assert(link); - assert(parameters); - assert(FLAGS_SET(flags, SD_VARLINK_METHOD_MORE)); - - r = sd_varlink_dispatch(link, parameters, /* dispatch_table= */ NULL, /* userdata= */ NULL); - if (r != 0) - return r; - - r = sd_varlink_set_sentinel(link, "io.systemd.Facts.NoSuchFact"); - if (r < 0) - return r; - - FactFamilyContext ctx = { .link = link }; - for (const FactFamily *ff = fact_family_table; ff && ff->name; ff++) { - assert(ff->generate); - - ctx.fact_family = ff; - r = ff->generate(&ctx, userdata); - if (r < 0) - return log_debug_errno( - r, "Failed to list facts for fact family '%s': %m", ff->name); - } - - return 0; -} - -static int fact_build_send(FactFamilyContext *context, const char *object, sd_json_variant *value) { - assert(context); - assert(value); - assert(context->link); - assert(context->fact_family); - - return sd_varlink_replybo(context->link, - SD_JSON_BUILD_PAIR_STRING("name", context->fact_family->name), - JSON_BUILD_PAIR_STRING_NON_EMPTY("object", object), - SD_JSON_BUILD_PAIR_VARIANT("value", value)); -} - -int fact_build_send_string(FactFamilyContext *context, const char *object, const char *value) { - _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - int r; - - assert(value); - - r = sd_json_variant_new_string(&v, value); - if (r < 0) - return log_debug_errno(r, "Failed to allocate JSON string: %m"); - - return fact_build_send(context, object, v); -} - -int fact_build_send_unsigned(FactFamilyContext *context, const char *object, uint64_t value) { - _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - int r; - - r = sd_json_variant_new_unsigned(&v, value); - if (r < 0) - return log_debug_errno(r, "Failed to allocate JSON unsigned: %m"); - - return fact_build_send(context, object, v); -} diff --git a/src/shared/facts.h b/src/shared/facts.h deleted file mode 100644 index 8a8a94cd91f77..0000000000000 --- a/src/shared/facts.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1-or-later */ -#pragma once - -#include "shared-forward.h" - -typedef struct FactFamily FactFamily; - -typedef struct FactFamilyContext { - const FactFamily *fact_family; - sd_varlink *link; -} FactFamilyContext; - -typedef int (*fact_family_generate_func_t)(FactFamilyContext *ffc, void *userdata); - -typedef struct FactFamily { - const char *name; - const char *description; - fact_family_generate_func_t generate; -} FactFamily; - -/* Add io.systemd.Facts interface + methods to an existing varlink server */ -int facts_add_to_varlink_server( - sd_varlink_server *server, - sd_varlink_method_t vl_method_list_cb, - sd_varlink_method_t vl_method_describe_cb); - -int facts_method_describe(const FactFamily fact_family_table[], sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); -int facts_method_list(const FactFamily fact_family_table[], sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata); - -int fact_build_send_string(FactFamilyContext *context, const char *object, const char *value); -int fact_build_send_unsigned(FactFamilyContext *context, const char *object, uint64_t value); diff --git a/src/shared/meson.build b/src/shared/meson.build index 84acaf698b9c4..3072bf2dc7124 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -80,7 +80,6 @@ shared_sources = files( 'exit-status.c', 'extension-util.c', 'factory-reset.c', - 'facts.c', 'fdisk-util.c', 'fdset.c', 'fido2-util.c', @@ -219,7 +218,6 @@ shared_sources = files( 'varlink-io.systemd.BootControl.c', 'varlink-io.systemd.Credentials.c', 'varlink-io.systemd.FactoryReset.c', - 'varlink-io.systemd.Facts.c', 'varlink-io.systemd.Hostname.c', 'varlink-io.systemd.Import.c', 'varlink-io.systemd.InstanceMetadata.c', diff --git a/src/shared/varlink-io.systemd.Facts.c b/src/shared/varlink-io.systemd.Facts.c deleted file mode 100644 index dad1271c7248b..0000000000000 --- a/src/shared/varlink-io.systemd.Facts.c +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1-or-later */ - -#include "sd-varlink-idl.h" - -#include "varlink-io.systemd.Facts.h" - -static SD_VARLINK_DEFINE_ERROR(NoSuchFact); - -static SD_VARLINK_DEFINE_METHOD_FULL( - List, - SD_VARLINK_REQUIRES_MORE, - SD_VARLINK_FIELD_COMMENT("Fact family name, e.g. io.systemd.Basic.Hostname"), - SD_VARLINK_DEFINE_OUTPUT(name, SD_VARLINK_STRING, 0), - /* This is currently an unused placeholder. Add examples when we have them. */ - SD_VARLINK_FIELD_COMMENT("Fact object name"), - SD_VARLINK_DEFINE_OUTPUT(object, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), - SD_VARLINK_FIELD_COMMENT("Fact value"), - SD_VARLINK_DEFINE_OUTPUT(value, SD_VARLINK_ANY, 0)); - -static SD_VARLINK_DEFINE_METHOD_FULL( - Describe, - SD_VARLINK_REQUIRES_MORE, - SD_VARLINK_FIELD_COMMENT("Fact family name, e.g. io.systemd.Basic.Hostname"), - SD_VARLINK_DEFINE_OUTPUT(name, SD_VARLINK_STRING, 0), - SD_VARLINK_FIELD_COMMENT("Fact family description"), - SD_VARLINK_DEFINE_OUTPUT(description, SD_VARLINK_STRING, 0)); - -SD_VARLINK_DEFINE_INTERFACE( - io_systemd_Facts, - "io.systemd.Facts", - SD_VARLINK_INTERFACE_COMMENT("Facts APIs"), - SD_VARLINK_SYMBOL_COMMENT("Method to get a list of facts and their values"), - &vl_method_List, - SD_VARLINK_SYMBOL_COMMENT("Method to get the fact families"), - &vl_method_Describe, - SD_VARLINK_SYMBOL_COMMENT("No such fact found"), - &vl_error_NoSuchFact); diff --git a/src/shared/varlink-io.systemd.Facts.h b/src/shared/varlink-io.systemd.Facts.h deleted file mode 100644 index ce07de32fb9df..0000000000000 --- a/src/shared/varlink-io.systemd.Facts.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1-or-later */ -#pragma once - -#include "sd-varlink-idl.h" - -extern const sd_varlink_interface vl_interface_io_systemd_Facts; diff --git a/test/units/TEST-74-AUX-UTILS.report.sh b/test/units/TEST-74-AUX-UTILS.report.sh index e9332806b3791..321d8b1b79a87 100755 --- a/test/units/TEST-74-AUX-UTILS.report.sh +++ b/test/units/TEST-74-AUX-UTILS.report.sh @@ -43,8 +43,12 @@ varlinkctl list-methods /run/systemd/report/io.systemd.Network varlinkctl --more call /run/systemd/report/io.systemd.Network io.systemd.Metrics.List {} varlinkctl --more call /run/systemd/report/io.systemd.Network io.systemd.Metrics.Describe {} -# Make sure the service for "system facts" is enabled +# test io.systemd.Basic Metrics systemctl start systemd-report-basic.socket +varlinkctl info /run/systemd/report/io.systemd.Basic +varlinkctl list-methods /run/systemd/report/io.systemd.Basic +varlinkctl --more call /run/systemd/report/io.systemd.Basic io.systemd.Metrics.List {} +varlinkctl --more call /run/systemd/report/io.systemd.Basic io.systemd.Metrics.Describe {} # Test HTTP upload (plain http) FAKE_SERVER=/usr/lib/systemd/tests/integration-tests/TEST-74-AUX-UTILS/TEST-74-AUX-UTILS.units/fake-report-server.py diff --git a/units/systemd-report-basic.socket b/units/systemd-report-basic.socket index bfa4ea72568fe..ba5d88c8e7e21 100644 --- a/units/systemd-report-basic.socket +++ b/units/systemd-report-basic.socket @@ -7,7 +7,7 @@ # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. [Unit] -Description=Report System Basic Facts Socket +Description=Report System Basic Metrics Socket DefaultDependencies=no Before=sockets.target diff --git a/units/systemd-report-basic@.service.in b/units/systemd-report-basic@.service.in index 043324b5c3987..1e4798ac04bc0 100644 --- a/units/systemd-report-basic@.service.in +++ b/units/systemd-report-basic@.service.in @@ -7,7 +7,7 @@ # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. [Unit] -Description=Report System Basic Facts +Description=Report System Basic Metrics DefaultDependencies=no Conflicts=shutdown.target From 4ffb60319bcea88b09afe24736208bd0e0e03618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Thu, 30 Apr 2026 20:27:43 +0200 Subject: [PATCH 146/242] sysupdate: Ensure that end of the MatchPattern is matched correctly An error snuck into the pattern parsing of the `MatchPattern` key in the sysupdate transfer files. If there's two files "part1-v2.raw", and "part1-v2.raw.tar" in the source folder, and MatchPattern="part1-@v.raw", sysupdate will incorrectly choose "part1-v2.raw.tar" instead of "part1-v2.raw". While the pattern matching works perfectly fine, after the full pattern is successfully matched to the string, we don't ensure that the string actually ends when the pattern just did. This means we can end up choosing a wrong file for the update, if the filename/path happens to start with the same MatchPattern. Fix it by ensuring the string ends after our match pattern ended. --- src/sysupdate/sysupdate-pattern.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sysupdate/sysupdate-pattern.c b/src/sysupdate/sysupdate-pattern.c index 76155dcd2924d..58aa524a5d1ba 100644 --- a/src/sysupdate/sysupdate-pattern.c +++ b/src/sysupdate/sysupdate-pattern.c @@ -426,6 +426,10 @@ int pattern_match(const char *pattern, const char *s, InstanceMetadata *ret) { p = n; } + /* We matched the whole pattern, but if the string continues over the end of the pattern, refuse */ + if (*p != '\0') + goto nope; + if (ret) { *ret = found; found = (InstanceMetadata) INSTANCE_METADATA_NULL; From a92b60ae17dc44e8f3777a933e3a901bca986ba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 18:37:13 +0200 Subject: [PATCH 147/242] homectl: fix error handling in shell_is_ok() Fixes f233132a67a4c2c6dff053afac2385f570e8e3fe. --- src/home/homectl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/home/homectl.c b/src/home/homectl.c index 37714aa2b0185..454aa5bfe6bb3 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -2837,7 +2837,7 @@ static int shell_is_ok(const char *path, void *userdata) { return false; } - r = chase_and_access(path, /* root= */ NULL, CHASE_MUST_BE_REGULAR, X_OK, /* ret_path= */ NULL) >= 0; + r = chase_and_access(path, /* root= */ NULL, CHASE_MUST_BE_REGULAR, X_OK, /* ret_path= */ NULL); if (r == -ENOENT) { log_error_errno(r, "Shell '%s' does not exist, try again.", path); return false; From 764d9d5ddbcc355c2f895b6e87c6916e5495ddca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 13:01:16 +0200 Subject: [PATCH 148/242] homectl: split out two prompt functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit homectl.c is too long… --- src/home/homectl-prompts.c | 244 ++++++++++++++++++++++++++++++++ src/home/homectl-prompts.h | 5 + src/home/homectl.c | 283 ++++--------------------------------- src/home/meson.build | 1 + 4 files changed, 277 insertions(+), 256 deletions(-) create mode 100644 src/home/homectl-prompts.c create mode 100644 src/home/homectl-prompts.h diff --git a/src/home/homectl-prompts.c b/src/home/homectl-prompts.c new file mode 100644 index 0000000000000..71640377e3888 --- /dev/null +++ b/src/home/homectl-prompts.c @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "alloc-util.h" +#include "bitfield.h" +#include "chase.h" +#include "glyph-util.h" +#include "group-record.h" +#include "homectl-prompts.h" +#include "log.h" +#include "parse-util.h" +#include "prompt-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "user-util.h" +#include "userdb.h" + +static int acquire_group_list(char ***ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_strv_free_ char **groups = NULL; + UserDBMatch match = USERDB_MATCH_NULL; + int r; + + assert(ret); + + match.disposition_mask = INDEXES_TO_MASK(uint64_t, USER_REGULAR, USER_SYSTEM); + + r = groupdb_all(&match, USERDB_SUPPRESS_SHADOW, &iterator); + if (r == -ENOLINK) + log_debug_errno(r, "No groups found. (Didn't check via Varlink.)"); + else if (r == -ESRCH) + log_debug_errno(r, "No groups found."); + else if (r < 0) + return log_debug_errno(r, "Failed to enumerate groups, ignoring: %m"); + else + for (;;) { + _cleanup_(group_record_unrefp) GroupRecord *gr = NULL; + + r = groupdb_iterator_get(iterator, &match, &gr); + if (r == -ESRCH) + break; + if (r < 0) + return log_debug_errno(r, "Failed to acquire next group: %m"); + + if (group_record_disposition(gr) == USER_REGULAR) { + _cleanup_(user_record_unrefp) UserRecord *ur = NULL; + + /* Filter groups here that belong to a specific user, and are named like them */ + + UserDBMatch user_match = USERDB_MATCH_NULL; + user_match.disposition_mask = INDEX_TO_MASK(uint64_t, USER_REGULAR); + + r = userdb_by_name(gr->group_name, &user_match, USERDB_SUPPRESS_SHADOW, &ur); + if (r < 0 && r != -ESRCH) + return log_debug_errno(r, "Failed to check if matching user exists for group '%s': %m", gr->group_name); + + if (r >= 0 && user_record_gid(ur) == gr->gid) + continue; + } + + r = strv_extend(&groups, gr->group_name); + if (r < 0) + return log_oom(); + } + + strv_sort(groups); + + *ret = TAKE_PTR(groups); + return !!*ret; +} + +static int group_completion_callback(const char *key, GetCompletionsFlags flags, char ***ret_list, void *userdata) { + char ***available = userdata; + int r; + + if (!*available) { + r = acquire_group_list(available); + if (r < 0) + log_debug_errno(r, "Failed to enumerate available groups, ignoring: %m"); + } + + _cleanup_strv_free_ char **l = strv_copy(*available); + if (!l) + return -ENOMEM; + + if (!FLAGS_SET(flags, GET_COMPLETIONS_PRESELECT)) { + r = strv_extend(&l, "list"); + if (r < 0) + return r; + } + + *ret_list = TAKE_PTR(l); + return 0; +} + +int prompt_groups(const char *username, char ***ret_groups) { + int r; + + assert(username); + assert(ret_groups); + + _cleanup_strv_free_ char **available = NULL, **groups = NULL; + for (;;) { + strv_sort_uniq(groups); + + if (!strv_isempty(groups)) { + _cleanup_free_ char *j = strv_join(groups, ", "); + if (!j) + return log_oom(); + + log_info("Currently selected groups: %s", j); + } + + _cleanup_free_ char *s = NULL; + r = ask_string_full( + &s, + group_completion_callback, + &available, + "%s Please enter an auxiliary group for user %s (empty to continue, \"list\" to list available groups): ", + glyph(GLYPH_LABEL), + username); + if (r < 0) + return log_error_errno(r, "Failed to query user for auxiliary group: %m"); + + if (isempty(s)) + break; + + if (streq(s, "list")) { + if (!available) { + r = acquire_group_list(&available); + if (r < 0) + log_warning_errno(r, "Failed to enumerate available groups, ignoring: %m"); + if (r == 0) + log_notice("Did not find any available groups"); + if (r <= 0) + continue; + } + + r = show_menu(available, + /* n_columns= */ 3, + /* column_width= */ 20, + /* ellipsize_percentage= */ 60, + /* grey_prefix= */ NULL, + /* with_numbers= */ true); + if (r < 0) + return log_error_errno(r, "Failed to show menu: %m"); + + putchar('\n'); + continue; + } + + if (!strv_isempty(available)) { + unsigned u; + r = safe_atou(s, &u); + if (r >= 0) { + if (u <= 0 || u > strv_length(available)) { + log_error("Specified entry number out of range."); + continue; + } + + log_info("Selected '%s'.", available[u-1]); + + r = strv_extend(&groups, available[u-1]); + if (r < 0) + return log_oom(); + + continue; + } + } + + if (!valid_user_group_name(s, /* flags= */ 0)) { + log_notice("Specified group name is not a valid UNIX group name, try again: %s", s); + continue; + } + + r = groupdb_by_name(s, /* match= */ NULL, USERDB_SUPPRESS_SHADOW|USERDB_EXCLUDE_DYNAMIC_USER, /* ret= */ NULL); + if (r == -ESRCH) { + log_notice("Specified auxiliary group does not exist, try again: %s", s); + continue; + } + if (r < 0) + return log_error_errno(r, "Failed to check if specified group '%s' already exists: %m", s); + + log_info("Selected '%s'.", s); + + r = strv_extend(&groups, s); + if (r < 0) + return log_oom(); + } + + *ret_groups = TAKE_PTR(groups); + return 0; +} + +static int shell_is_ok(const char *path, void *userdata) { + int r; + + assert(path); + + if (!valid_shell(path)) { + log_error("String '%s' is not a valid path to a shell, refusing.", path); + return false; + } + + r = chase_and_access(path, /* root= */ NULL, CHASE_MUST_BE_REGULAR, X_OK, /* ret_path= */ NULL); + if (r == -ENOENT) { + log_error_errno(r, "Shell '%s' does not exist, try again.", path); + return false; + } + if (ERRNO_IS_NEG_PRIVILEGE(r)) { + log_error_errno(r, "File '%s' is not executable, try again.", path); + return false; + } + if (r < 0) + return log_error_errno(r, "Failed to check if shell '%s' exists and is executable: %m", path); + + return true; +} + +int prompt_shell(const char *username, char **ret_shell) { + assert(username); + assert(ret_shell); + + _cleanup_free_ char *q = strjoin("Please enter the shell to use for user ", username); + if (!q) + return log_oom(); + + return prompt_loop( + q, + GLYPH_SHELL, + /* menu= */ NULL, + /* accepted= */ NULL, + /* ellipsize_percentage= */ 0, + /* n_columns= */ 3, + /* column_width= */ 20, + shell_is_ok, + /* refresh= */ NULL, + /* userdata= */ NULL, + PROMPT_MAY_SKIP|PROMPT_SILENT_VALIDATE, + ret_shell); +} diff --git a/src/home/homectl-prompts.h b/src/home/homectl-prompts.h new file mode 100644 index 0000000000000..04d6460058279 --- /dev/null +++ b/src/home/homectl-prompts.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +int prompt_groups(const char *username, char ***ret_groups); +int prompt_shell(const char *username, char **ret_shell); diff --git a/src/home/homectl.c b/src/home/homectl.c index 454aa5bfe6bb3..56e723fbfef9f 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -16,7 +16,6 @@ #include "capability-list.h" #include "capability-util.h" #include "cgroup-util.h" -#include "chase.h" #include "creds-util.h" #include "crypto-util.h" #include "dirent-util.h" @@ -35,6 +34,7 @@ #include "home-util.h" #include "homectl-fido2.h" #include "homectl-pkcs11.h" +#include "homectl-prompts.h" #include "homectl-recovery-key.h" #include "json-util.h" #include "libfido2-util.h" @@ -2643,245 +2643,6 @@ static int has_regular_user(void) { return true; } -static int acquire_group_list(char ***ret) { - _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; - _cleanup_strv_free_ char **groups = NULL; - UserDBMatch match = USERDB_MATCH_NULL; - int r; - - assert(ret); - - match.disposition_mask = INDEXES_TO_MASK(uint64_t, USER_REGULAR, USER_SYSTEM); - - r = groupdb_all(&match, USERDB_SUPPRESS_SHADOW, &iterator); - if (r == -ENOLINK) - log_debug_errno(r, "No groups found. (Didn't check via Varlink.)"); - else if (r == -ESRCH) - log_debug_errno(r, "No groups found."); - else if (r < 0) - return log_debug_errno(r, "Failed to enumerate groups, ignoring: %m"); - else - for (;;) { - _cleanup_(group_record_unrefp) GroupRecord *gr = NULL; - - r = groupdb_iterator_get(iterator, &match, &gr); - if (r == -ESRCH) - break; - if (r < 0) - return log_debug_errno(r, "Failed to acquire next group: %m"); - - if (group_record_disposition(gr) == USER_REGULAR) { - _cleanup_(user_record_unrefp) UserRecord *ur = NULL; - - /* Filter groups here that belong to a specific user, and are named like them */ - - UserDBMatch user_match = USERDB_MATCH_NULL; - user_match.disposition_mask = INDEX_TO_MASK(uint64_t, USER_REGULAR); - - r = userdb_by_name(gr->group_name, &user_match, USERDB_SUPPRESS_SHADOW, &ur); - if (r < 0 && r != -ESRCH) - return log_debug_errno(r, "Failed to check if matching user exists for group '%s': %m", gr->group_name); - - if (r >= 0 && user_record_gid(ur) == gr->gid) - continue; - } - - r = strv_extend(&groups, gr->group_name); - if (r < 0) - return log_oom(); - } - - strv_sort(groups); - - *ret = TAKE_PTR(groups); - return !!*ret; -} - -static int group_completion_callback(const char *key, GetCompletionsFlags flags, char ***ret_list, void *userdata) { - char ***available = userdata; - int r; - - if (!*available) { - r = acquire_group_list(available); - if (r < 0) - log_debug_errno(r, "Failed to enumerate available groups, ignoring: %m"); - } - - _cleanup_strv_free_ char **l = strv_copy(*available); - if (!l) - return -ENOMEM; - - if (!FLAGS_SET(flags, GET_COMPLETIONS_PRESELECT)) { - r = strv_extend(&l, "list"); - if (r < 0) - return r; - } - - *ret_list = TAKE_PTR(l); - return 0; -} - -static int prompt_groups(const char *username, char ***ret_groups) { - int r; - - assert(username); - assert(ret_groups); - - if (!arg_prompt_groups) { - *ret_groups = NULL; - return 0; - } - - putchar('\n'); - - _cleanup_strv_free_ char **available = NULL, **groups = NULL; - for (;;) { - strv_sort_uniq(groups); - - if (!strv_isempty(groups)) { - _cleanup_free_ char *j = strv_join(groups, ", "); - if (!j) - return log_oom(); - - log_info("Currently selected groups: %s", j); - } - - _cleanup_free_ char *s = NULL; - r = ask_string_full( - &s, - group_completion_callback, - &available, - "%s Please enter an auxiliary group for user %s (empty to continue, \"list\" to list available groups): ", - glyph(GLYPH_LABEL), - username); - if (r < 0) - return log_error_errno(r, "Failed to query user for auxiliary group: %m"); - - if (isempty(s)) - break; - - if (streq(s, "list")) { - if (!available) { - r = acquire_group_list(&available); - if (r < 0) - log_warning_errno(r, "Failed to enumerate available groups, ignoring: %m"); - if (r == 0) - log_notice("Did not find any available groups"); - if (r <= 0) - continue; - } - - r = show_menu(available, - /* n_columns= */ 3, - /* column_width= */ 20, - /* ellipsize_percentage= */ 60, - /* grey_prefix= */ NULL, - /* with_numbers= */ true); - if (r < 0) - return log_error_errno(r, "Failed to show menu: %m"); - - putchar('\n'); - continue; - }; - - if (!strv_isempty(available)) { - unsigned u; - r = safe_atou(s, &u); - if (r >= 0) { - if (u <= 0 || u > strv_length(available)) { - log_error("Specified entry number out of range."); - continue; - } - - log_info("Selected '%s'.", available[u-1]); - - r = strv_extend(&groups, available[u-1]); - if (r < 0) - return log_oom(); - - continue; - } - } - - if (!valid_user_group_name(s, /* flags= */ 0)) { - log_notice("Specified group name is not a valid UNIX group name, try again: %s", s); - continue; - } - - r = groupdb_by_name(s, /* match= */ NULL, USERDB_SUPPRESS_SHADOW|USERDB_EXCLUDE_DYNAMIC_USER, /* ret= */ NULL); - if (r == -ESRCH) { - log_notice("Specified auxiliary group does not exist, try again: %s", s); - continue; - } - if (r < 0) - return log_error_errno(r, "Failed to check if specified group '%s' already exists: %m", s); - - log_info("Selected '%s'.", s); - - r = strv_extend(&groups, s); - if (r < 0) - return log_oom(); - } - - *ret_groups = TAKE_PTR(groups); - return 0; -} - -static int shell_is_ok(const char *path, void *userdata) { - int r; - - assert(path); - - if (!valid_shell(path)) { - log_error("String '%s' is not a valid path to a shell, refusing.", path); - return false; - } - - r = chase_and_access(path, /* root= */ NULL, CHASE_MUST_BE_REGULAR, X_OK, /* ret_path= */ NULL); - if (r == -ENOENT) { - log_error_errno(r, "Shell '%s' does not exist, try again.", path); - return false; - } - if (ERRNO_IS_NEG_PRIVILEGE(r)) { - log_error_errno(r, "File '%s' is not executable, try again.", path); - return false; - } - if (r < 0) - return log_error_errno(r, "Failed to check if shell '%s' exists and is executable: %m", path); - - return true; -} - -static int prompt_shell(const char *username, char **ret_shell) { - assert(username); - assert(ret_shell); - - if (!arg_prompt_shell) { - *ret_shell = NULL; - return 0; - } - - putchar('\n'); - - _cleanup_free_ char *q = strjoin("Please enter the shell to use for user ", username); - if (!q) - return log_oom(); - - return prompt_loop( - q, - GLYPH_SHELL, - /* menu= */ NULL, - /* accepted= */ NULL, - /* ellipsize_percentage= */ 0, - /* n_columns= */ 3, - /* column_width= */ 20, - shell_is_ok, - /* refresh= */ NULL, - /* userdata= */ NULL, - PROMPT_MAY_SKIP|PROMPT_SILENT_VALIDATE, - ret_shell); -} - static int username_is_ok(const char *name, void *userdata) { int r; @@ -2963,30 +2724,40 @@ static int create_interactively(void) { if (r < 0) return log_error_errno(r, "Failed to set enforcePasswordPolicy field: %m"); - _cleanup_strv_free_ char **groups = NULL; - r = prompt_groups(username, &groups); - if (r < 0) - return r; + if (arg_prompt_groups) { + _cleanup_strv_free_ char **groups = NULL; - if (!strv_isempty(groups)) { - strv_sort_uniq(groups); + putchar('\n'); - r = sd_json_variant_set_field_strv(&arg_identity_extra, "memberOf", groups); + r = prompt_groups(username, &groups); if (r < 0) - return log_error_errno(r, "Failed to set memberOf field: %m"); + return r; + + if (!strv_isempty(groups)) { + strv_sort_uniq(groups); + + r = sd_json_variant_set_field_strv(&arg_identity_extra, "memberOf", groups); + if (r < 0) + return log_error_errno(r, "Failed to set memberOf field: %m"); + } } - _cleanup_free_ char *shell = NULL; - r = prompt_shell(username, &shell); - if (r < 0) - return r; + if (arg_prompt_shell) { + _cleanup_free_ char *shell = NULL; - if (!isempty(shell)) { - log_info("Selected %s as the shell for user %s", shell, username); + putchar('\n'); - r = sd_json_variant_set_field_string(&arg_identity_extra, "shell", shell); + r = prompt_shell(username, &shell); if (r < 0) - return log_error_errno(r, "Failed to set shell field: %m"); + return r; + + if (!isempty(shell)) { + log_info("Selected %s as the shell for user %s", shell, username); + + r = sd_json_variant_set_field_string(&arg_identity_extra, "shell", shell); + if (r < 0) + return log_error_errno(r, "Failed to set shell field: %m"); + } } putchar('\n'); diff --git a/src/home/meson.build b/src/home/meson.build index 53c5675c83f88..8c644842c1470 100644 --- a/src/home/meson.build +++ b/src/home/meson.build @@ -47,6 +47,7 @@ systemd_homed_sources += [homed_gperf_c] homectl_sources = files( 'homectl-fido2.c', 'homectl-pkcs11.c', + 'homectl-prompts.c', 'homectl-recovery-key.c', 'homectl.c', ) From 392846b370dcde7142f854ac38c15292522966a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 13:21:30 +0200 Subject: [PATCH 149/242] test-homectl-prompts: add manual test to exercise prompt functionality The prompt for groups is nice. The prompt for a shell could use some love. Looking at this is much easier if we can invoke the code outside in isolation. I wrote this when looking at https://github.com/systemd/systemd/pull/41947, where I wanted to see how the homectl prompt works with the changes. --- src/home/meson.build | 11 +++- src/home/test-homectl-prompts.c | 106 ++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 src/home/test-homectl-prompts.c diff --git a/src/home/meson.build b/src/home/meson.build index 8c644842c1470..600f00b4ac997 100644 --- a/src/home/meson.build +++ b/src/home/meson.build @@ -47,11 +47,14 @@ systemd_homed_sources += [homed_gperf_c] homectl_sources = files( 'homectl-fido2.c', 'homectl-pkcs11.c', - 'homectl-prompts.c', 'homectl-recovery-key.c', 'homectl.c', ) +homectl_extract = files( + 'homectl-prompts.c', +) + pam_systemd_home_sources = files( 'home-util.c', 'pam_systemd_home.c', @@ -86,6 +89,7 @@ executables += [ 'name' : 'homectl', 'public' : true, 'sources' : homectl_sources, + 'extract' : homectl_extract, 'objects' : ['systemd-homed'], 'dependencies' : [ libdl, @@ -94,6 +98,11 @@ executables += [ threads, ], }, + test_template + { + 'sources' : files('test-homectl-prompts.c'), + 'objects' : ['homectl'], + 'type' : 'manual', + }, test_template + { 'sources' : files('test-homed-regression-31896.c'), 'type' : 'manual', diff --git a/src/home/test-homectl-prompts.c b/src/home/test-homectl-prompts.c new file mode 100644 index 0000000000000..aaa81cc78a89d --- /dev/null +++ b/src/home/test-homectl-prompts.c @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "format-table.h" +#include "help-util.h" +#include "homectl-prompts.h" +#include "main-func.h" +#include "options.h" +#include "string-util.h" +#include "strv.h" +#include "tests.h" +#include "verbs.h" + +static int help(void) { + _cleanup_(table_unrefp) Table *options = NULL, *verbs = NULL; + int r; + + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + r = verbs_get_help_table(&verbs); + if (r < 0) + return r; + + (void) table_sync_column_widths(0, options, verbs); + + help_cmdline("[OPTIONS...] VERB [USERNAME]"); + help_abstract("Exercise homectl prompt functions in isolation."); + + help_section("Verbs"); + r = table_print_or_warn(verbs); + if (r < 0) + return r; + + help_section("Options"); + return table_print_or_warn(options); +} + +VERB(verb_groups, "groups", "[USER]", VERB_ANY, 2, 0, "Select groups"); +static int verb_groups(int argc, char *argv[], uintptr_t _data, void *userdata) { + assert(argv); + + const char *username = argv[1] ?: "test"; + int r; + + _cleanup_strv_free_ char **t = NULL; + + r = prompt_groups(username, &t); + if (r < 0) + return r; + + _cleanup_free_ char *s = ASSERT_PTR(strv_join(t, ", ")); + log_info("groups: %s → %s", username, s); + return 0; +} + +VERB(verb_shell, "shell", "[USER]", VERB_ANY, 2, 0, "Select shell"); +static int verb_shell(int argc, char *argv[], uintptr_t _data, void *userdata) { + assert(argv); + + const char *username = argv[1] ?: "test"; + int r; + + _cleanup_free_ char *s = NULL; + + r = prompt_shell(username, &s); + if (r < 0) + return r; + + log_info("shell: %s → %s", username, strnull(s)); + return 0; +} + +static int parse_argv(int argc, char **argv, char ***remaining_args) { + assert(argc >= 0); + assert(argv); + assert(remaining_args); + + OptionParser opts = { argc, argv }; + + FOREACH_OPTION_OR_RETURN(c, &opts) + switch (c) { + + OPTION_COMMON_HELP: + return help(); + } + + *remaining_args = option_parser_get_args(&opts); + return 1; +} + +static int run(int argc, char **argv) { + int r; + + test_setup_logging(LOG_DEBUG); + + char **args = NULL; + r = parse_argv(argc, argv, &args); + if (r <= 0) + return r; + + return dispatch_verb_with_args(args, /* userdata= */ NULL); +} + +DEFINE_MAIN_FUNCTION(run); From c379621426a9a1b244320d5b331f634d9a46126d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 22:08:19 +0200 Subject: [PATCH 150/242] homectl: drop redunant sort Claude points out that prompt_groups() already does the sort in every loop, including the last. --- src/home/homectl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/home/homectl.c b/src/home/homectl.c index 56e723fbfef9f..ce54da0d8c2b7 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -2734,8 +2734,6 @@ static int create_interactively(void) { return r; if (!strv_isempty(groups)) { - strv_sort_uniq(groups); - r = sd_json_variant_set_field_strv(&arg_identity_extra, "memberOf", groups); if (r < 0) return log_error_errno(r, "Failed to set memberOf field: %m"); From 852fcf5134db04fccc4988decc4c5e8a33914700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Miguel=20Sarasola?= Date: Tue, 5 May 2026 21:20:59 +0200 Subject: [PATCH 151/242] hwdb: add SOUND_FORM_FACTOR for Edifier M60 and Fractal Scape Dongle --- hwdb.d/70-sound-card.hwdb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hwdb.d/70-sound-card.hwdb b/hwdb.d/70-sound-card.hwdb index f9ceeacb7d79c..340e6b54e3c5e 100644 --- a/hwdb.d/70-sound-card.hwdb +++ b/hwdb.d/70-sound-card.hwdb @@ -20,6 +20,13 @@ # Allowed properties are: # SOUND_FORM_FACTOR +########################################################### +# Bestechnic (Edifier) +########################################################### +# Edifier M60 +usb:v2D99pA094* + SOUND_FORM_FACTOR=speaker + ########################################################### # Corsair ########################################################### @@ -27,6 +34,13 @@ usb:v1B1Cp0A51* SOUND_FORM_FACTOR=headset +########################################################### +# Fractal +########################################################### +# Fractal Scape Dongle +usb:v36BCp0001* + SOUND_FORM_FACTOR=headset + ########################################################### # Microsoft ########################################################### From fc68ee611886c4f2d7d5bccdcfd700c5f28ed2d1 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Tue, 5 May 2026 21:55:30 +0100 Subject: [PATCH 152/242] sd-boot: efi-log: fix `__stack_chk_guard` type In https://gcc.gnu.org/PR121911 `gcc` started enforcing the type of `__stack_chk_guard` to `uintptr_t` and broke `systemd` build as: ``` ../src/boot/efi-log.c:136:17: error: conflicting types for '__stack_chk_guard'; have 'intptr_t' {aka 'long int'} 136 | _used_ intptr_t __stack_chk_guard = (intptr_t) 0x70f6967de78acae3; | ^~~~~~~~~~~~~~~~~ cc1: note: previous declaration of '__stack_chk_guard' with type 'long unsigned int' ../src/boot/efi-log.c:136:17: error: declaration of '__stack_chk_guard' shadows a global declaration [-Werror=shadow] 136 | _used_ intptr_t __stack_chk_guard = (intptr_t) 0x70f6967de78acae3; | ^~~~~~~~~~~~~~~~~ ``` Let's match the declaration to unsigned type as suggested by upstream in https://gcc.gnu.org/PR121911#c6. --- src/boot/efi-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/boot/efi-log.c b/src/boot/efi-log.c index ed0a2746933e0..520f985389c55 100644 --- a/src/boot/efi-log.c +++ b/src/boot/efi-log.c @@ -133,7 +133,7 @@ void log_wait(void) { } // NOLINTNEXTLINE(misc-use-internal-linkage) -_used_ intptr_t __stack_chk_guard = (intptr_t) 0x70f6967de78acae3; +_used_ uintptr_t __stack_chk_guard = (uintptr_t) 0x70f6967de78acae3; /* We can only set a random stack canary if this function attribute is available, * otherwise this may create a stack check fail. */ @@ -144,7 +144,7 @@ void __stack_chk_guard_init(void) { (void) rng->GetRNG(rng, NULL, sizeof(__stack_chk_guard), (void *) &__stack_chk_guard); else /* Better than no extra entropy. */ - __stack_chk_guard ^= (intptr_t) __executable_start; + __stack_chk_guard ^= (uintptr_t) __executable_start; } #endif From eaa0073027b06c384b5f5e9cb57ec850ea024728 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 14:58:53 +0200 Subject: [PATCH 153/242] user-util,storagectl: introduce USERNS_RANGE_SIZE macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mount.storage helper open-codes the conventional 64K UID/GID delegation block size as 0x10000 / 0x10000U in four places. Several other places in the tree do the same (nspawn's arg_uid_range default, homed's mount setup, …), but with no shared name. Add USERNS_RANGE_SIZE in user-util.h alongside UID_NOBODY and friends, and switch storagectl over to it. Other call sites can adopt it incrementally. Signed-off-by: Christian Brauner (Amutable) --- src/basic/user-util.h | 4 ++++ src/shared/nsresource.h | 3 ++- src/storage/storagectl.c | 12 ++++++------ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/basic/user-util.h b/src/basic/user-util.h index 003420dbe3b0d..a8902aca6150b 100644 --- a/src/basic/user-util.h +++ b/src/basic/user-util.h @@ -90,6 +90,10 @@ int take_etc_passwd_lock(const char *root); #define UID_NOBODY ((uid_t) 65534U) #define GID_NOBODY ((gid_t) 65534U) +/* Conventional size of a user-namespace UID/GID delegation block (64K). + * Untyped so it can be used in both UID and GID contexts without casts. */ +#define USERNS_RANGE_SIZE 0x10000U + /* If REMOUNT_IDMAPPING_HOST_ROOT is set for remount_idmap() we'll include a mapping here that maps the host * root user accessing the idmapped mount to the this user ID on the backing fs. This is the last valid UID in * the *signed* 32-bit range. You might wonder why precisely use this specific UID for this purpose? Well, we diff --git a/src/shared/nsresource.h b/src/shared/nsresource.h index 5633fd9bf35bc..c26dd4f8a553f 100644 --- a/src/shared/nsresource.h +++ b/src/shared/nsresource.h @@ -2,9 +2,10 @@ #pragma once #include "shared-forward.h" +#include "user-util.h" /* Helpful constants for the only numbers of UIDs that can currently be allocated */ -#define NSRESOURCE_UIDS_64K 0x10000U +#define NSRESOURCE_UIDS_64K USERNS_RANGE_SIZE #define NSRESOURCE_UIDS_1 1U int nsresource_connect(sd_varlink **ret); diff --git a/src/storage/storagectl.c b/src/storage/storagectl.c index f88dff29bc861..23d91d6ba12ee 100644 --- a/src/storage/storagectl.c +++ b/src/storage/storagectl.c @@ -723,25 +723,25 @@ static int run_as_mount_helper(int argc, char *argv[]) { if (!uid_is_valid(p.base_uid) || !gid_is_valid(p.base_gid)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Provider did not report base UID/GID, cannot mount."); - if (p.base_uid > UINT32_MAX - 0x10000U || - p.base_gid > UINT32_MAX - 0x10000U) + if (p.base_uid > UINT32_MAX - USERNS_RANGE_SIZE || + p.base_gid > UINT32_MAX - USERNS_RANGE_SIZE) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Returned base UID/GID out of range."); r = stat_verify_directory(&st); if (r < 0) return log_error_errno(r, "File descriptor for directory volume is not a directory inode: %m"); - if (st.st_uid < p.base_uid || st.st_uid >= p.base_uid + 0x10000 || - st.st_gid < p.base_gid || st.st_gid >= p.base_gid + 0x10000) + if (st.st_uid < p.base_uid || st.st_uid >= p.base_uid + USERNS_RANGE_SIZE || + st.st_gid < p.base_gid || st.st_gid >= p.base_gid + USERNS_RANGE_SIZE) return log_error_errno(SYNTHETIC_ERRNO(EPERM), "File descriptor for directory volume is not owned by base UID/GID range, refusing."); /* Now move the mount into our own UID/GID range */ _cleanup_free_ char *uid_line = asprintf_safe( UID_FMT " " UID_FMT " " UID_FMT "\n", - p.base_uid, (uid_t) 0, (uid_t) 0x10000); + p.base_uid, (uid_t) 0, USERNS_RANGE_SIZE); _cleanup_free_ char *gid_line = asprintf_safe( GID_FMT " " GID_FMT " " GID_FMT "\n", - p.base_gid, (gid_t) 0, (gid_t) 0x10000); + p.base_gid, (gid_t) 0, USERNS_RANGE_SIZE); if (!uid_line || !gid_line) return log_oom(); From efb0a0fa7a4c4f25a5abb5df60f4813951279772 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:29:56 +0200 Subject: [PATCH 154/242] shared: move storage-util to libshared The storage backend providers (block, fs) and storagectl currently each extract storage-util.c into their target. Several upcoming consumers (machine-util's BindVolume parser, vmspawn's hotplug glue, machinectl's new bind-volume verbs) need the StorageProvider type/string-table helpers and a future shared Acquire client helper. Move storage-util.{c,h} to src/shared so libshared exports the symbols once and every consumer (storage providers, storagectl, libshared itself) picks them up by linking libshared. Drop the now-redundant 'extract'/'objects' wiring in src/storage/meson.build. No code changes; this is purely a relocation. Signed-off-by: Christian Brauner (Amutable) --- src/shared/meson.build | 1 + src/{storage => shared}/storage-util.c | 0 src/{storage => shared}/storage-util.h | 0 src/storage/meson.build | 3 --- 4 files changed, 1 insertion(+), 3 deletions(-) rename src/{storage => shared}/storage-util.c (100%) rename src/{storage => shared}/storage-util.h (100%) diff --git a/src/shared/meson.build b/src/shared/meson.build index 0529d35d26586..d4cd5ca431233 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -199,6 +199,7 @@ shared_sources = files( 'socket-netlink.c', 'specifier.c', 'ssl-util.c', + 'storage-util.c', 'switch-root.c', 'swtpm-util.c', 'tar-util.c', diff --git a/src/storage/storage-util.c b/src/shared/storage-util.c similarity index 100% rename from src/storage/storage-util.c rename to src/shared/storage-util.c diff --git a/src/storage/storage-util.h b/src/shared/storage-util.h similarity index 100% rename from src/storage/storage-util.h rename to src/shared/storage-util.h diff --git a/src/storage/meson.build b/src/storage/meson.build index 21456141dec8c..bcd68e612da9b 100644 --- a/src/storage/meson.build +++ b/src/storage/meson.build @@ -4,18 +4,15 @@ executables += [ libexec_template + { 'name' : 'systemd-storage-block', 'sources' : files('storage-block.c'), - 'extract' : files('storage-util.c') }, libexec_template + { 'name' : 'systemd-storage-fs', 'sources' : files('storage-fs.c'), - 'objects' : ['systemd-storage-block'], }, executable_template + { 'name' : 'storagectl', 'public' : true, 'sources' : files('storagectl.c'), - 'objects' : ['systemd-storage-block'], }, ] From 32a80416b67a44c8944d2b20496003caf6d9add3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:32:27 +0200 Subject: [PATCH 155/242] shared: add BindVolume parser in machine-util MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a universal parser for the colon-separated grammar 'PROVIDER:VOLUME[:CONFIG][:K=V,K=V,…]' that backs --bind-volume on systemd-vmspawn (next), machinectl bind-volume, and the future nspawn + service-manager BindVolume= integrations. The 'config' field is opaque to shared code and interpreted per backend (vmspawn: a DiskType name, future nspawn: a mount path). The trailing key=value list is parsed into the io.systemd.StorageProvider .Acquire() parameters (template, create, read-only/ro, size/create-size and request-as), with values validated against the existing storage-util enums and validators. Provider/volume names are checked with storage_provider_name_is_valid() and storage_volume_name_is_valid(); the combined ":" string is also validated as string_is_safe so it is safe to use as a QEMU device id. Add a test-machine-util unit test covering the happy paths plus a handful of malformed inputs. Signed-off-by: Christian Brauner (Amutable) --- src/shared/machine-util.c | 177 +++++++++++++++++++++++++++++++++++ src/shared/machine-util.h | 54 +++++++++++ src/test/meson.build | 1 + src/test/test-machine-util.c | 144 ++++++++++++++++++++++++++++ 4 files changed, 376 insertions(+) create mode 100644 src/test/test-machine-util.c diff --git a/src/shared/machine-util.c b/src/shared/machine-util.c index fa5e46ace1e53..43a4fdfdd81b9 100644 --- a/src/shared/machine-util.c +++ b/src/shared/machine-util.c @@ -4,7 +4,11 @@ #include "extract-word.h" #include "machine-util.h" #include "parse-argument.h" +#include "parse-util.h" +#include "storage-util.h" #include "string-table.h" +#include "string-util.h" +#include "strv.h" static const char *const image_format_table[_IMAGE_FORMAT_MAX] = { [IMAGE_FORMAT_RAW] = "raw", @@ -13,6 +17,14 @@ static const char *const image_format_table[_IMAGE_FORMAT_MAX] = { DEFINE_STRING_TABLE_LOOKUP(image_format, ImageFormat); +static const char *const read_only_mode_table[_READ_ONLY_MAX] = { + [READ_ONLY_NO] = "no", + [READ_ONLY_YES] = "yes", + [READ_ONLY_AUTO] = "auto", +}; + +DEFINE_STRING_TABLE_LOOKUP(read_only_mode, ReadOnlyMode); + static const char *const disk_type_table[_DISK_TYPE_MAX] = { [DISK_TYPE_VIRTIO_BLK] = "virtio-blk", [DISK_TYPE_VIRTIO_SCSI] = "virtio-scsi", @@ -100,3 +112,168 @@ int parse_disk_spec( *ret_path = TAKE_PTR(path); return 0; } + +BindVolume* bind_volume_free(BindVolume *v) { + if (!v) + return NULL; + + free(v->provider); + free(v->volume); + free(v->config); + free(v->template); + + return mfree(v); +} + +static int bind_volume_apply_extra(BindVolume *v, const char *key, const char *value) { + int r; + + assert(v); + assert(key); + assert(value); + + if (streq(key, "template")) { + if (v->template) + return -EINVAL; + if (!storage_template_name_is_valid(value)) + return -EINVAL; + r = free_and_strdup(&v->template, value); + if (r < 0) + return r; + return 0; + } + + if (streq(key, "create")) { + if (v->create_mode >= 0) + return -EINVAL; + CreateMode m = create_mode_from_string(value); + if (m < 0) + return m; + v->create_mode = m; + return 0; + } + + if (STR_IN_SET(key, "read-only", "ro")) { + if (v->read_only >= 0) + return -EINVAL; + ReadOnlyMode m = read_only_mode_from_string(value); + if (m < 0) { + r = parse_boolean(value); + if (r < 0) + return r; + m = r ? READ_ONLY_YES : READ_ONLY_NO; + } + v->read_only = m; + return 0; + } + + if (STR_IN_SET(key, "size", "create-size")) { + if (v->create_size_bytes != UINT64_MAX) + return -EINVAL; + uint64_t sz; + r = parse_size(value, 1024, &sz); + if (r < 0) + return r; + if (sz == 0) + return -EINVAL; + v->create_size_bytes = sz; + return 0; + } + + if (streq(key, "request-as")) { + if (v->request_as >= 0) + return -EINVAL; + VolumeType t = volume_type_from_string(value); + if (t < 0) + return t; + v->request_as = t; + return 0; + } + + return -EINVAL; +} + +int bind_volume_parse(const char *arg, BindVolume **ret) { + _cleanup_(bind_volume_freep) BindVolume *v = NULL; + int r; + + assert(arg); + assert(ret); + + v = new(BindVolume, 1); + if (!v) + return -ENOMEM; + + *v = BIND_VOLUME_INIT; + + const char *p = arg; + _cleanup_free_ char *provider = NULL, *volume = NULL, *config = NULL; + + r = extract_first_word(&p, &provider, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0 || isempty(provider) || !storage_provider_name_is_valid(provider)) + return -EINVAL; + + r = extract_first_word(&p, &volume, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0 || isempty(volume) || !storage_volume_name_is_valid(volume)) + return -EINVAL; + + r = extract_first_word(&p, &config, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + + v->provider = TAKE_PTR(provider); + v->volume = TAKE_PTR(volume); + if (!isempty(config)) { + if (!string_is_safe(config, /* flags= */ 0)) + return -EINVAL; + v->config = TAKE_PTR(config); + } + + for (;;) { + _cleanup_free_ char *kv = NULL, *key = NULL, *value = NULL; + + r = extract_first_word(&p, &kv, ",", 0); + if (r < 0) + return r; + if (r == 0) + break; + + r = split_pair(kv, "=", &key, &value); + if (r < 0) + return r; + if (isempty(key)) + return -EINVAL; + + r = bind_volume_apply_extra(v, key, value); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(v); + return 0; +} + +int machine_storage_name_split(const char *s, char **ret_provider, char **ret_volume) { + _cleanup_free_ char *p = NULL, *v = NULL; + int r; + + if (isempty(s)) + return -EINVAL; + + r = split_pair(s, ":", &p, &v); + if (r < 0) + return r; + + if (!storage_provider_name_is_valid(p) || !storage_volume_name_is_valid(v)) + return -EINVAL; + + if (ret_provider) + *ret_provider = TAKE_PTR(p); + if (ret_volume) + *ret_volume = TAKE_PTR(v); + return 0; +} diff --git a/src/shared/machine-util.h b/src/shared/machine-util.h index 3937ce170377e..a992e24448076 100644 --- a/src/shared/machine-util.h +++ b/src/shared/machine-util.h @@ -2,6 +2,7 @@ #pragma once #include "shared-forward.h" +#include "storage-util.h" typedef enum ImageFormat { IMAGE_FORMAT_RAW, @@ -30,3 +31,56 @@ int parse_disk_spec( ImageFormat *format, DiskType *disk_type, char **ret_path); + +typedef enum ReadOnlyMode { + READ_ONLY_NO, + READ_ONLY_YES, + READ_ONLY_AUTO, + _READ_ONLY_MAX, + _READ_ONLY_INVALID = -EINVAL, +} ReadOnlyMode; + +DECLARE_STRING_TABLE_LOOKUP(read_only_mode, ReadOnlyMode); + +/* Map ReadOnlyMode onto the Acquire() wire tristate (-1 unset/auto, 0 no, 1 yes). */ +static inline int read_only_mode_to_tristate(ReadOnlyMode m) { + switch (m) { + case READ_ONLY_NO: return 0; + case READ_ONLY_YES: return 1; + default: return -1; + } +} + +/* Parsed "PROVIDER:VOLUME[:CONFIG][:K=V,K=V,...]" used by --bind-volume, + * machinectl bind-volume, and (future) the BindVolume= unit setting. The 'config' + * field is opaque here and interpreted per-backend (vmspawn: a DiskType name; + * nspawn: a mount path). */ +typedef struct BindVolume { + char *provider; + char *volume; + char *config; + + /* Acquire() parameters parsed from the trailing key=value list. */ + char *template; + CreateMode create_mode; + ReadOnlyMode read_only; + uint64_t create_size_bytes; + VolumeType request_as; +} BindVolume; + +#define BIND_VOLUME_INIT \ + (BindVolume) { \ + .create_mode = _CREATE_MODE_INVALID, \ + .read_only = _READ_ONLY_INVALID, \ + .create_size_bytes = UINT64_MAX, \ + .request_as = _VOLUME_TYPE_INVALID, \ + } + +BindVolume* bind_volume_free(BindVolume *v); +DEFINE_TRIVIAL_CLEANUP_FUNC(BindVolume*, bind_volume_free); + +int bind_volume_parse(const char *arg, BindVolume **ret); + +/* Validate a ":" binding name as used by AddStorage/RemoveStorage. + * ret_provider/ret_volume may each be NULL when the caller only wants validation. */ +int machine_storage_name_split(const char *s, char **ret_provider, char **ret_volume); diff --git a/src/test/meson.build b/src/test/meson.build index 09c367d3074f3..f4288119f94ba 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -141,6 +141,7 @@ simple_tests += files( 'test-log.c', 'test-logarithm.c', 'test-login-util.c', + 'test-machine-util.c', 'test-macro.c', 'test-memfd-util.c', 'test-memory-util.c', diff --git a/src/test/test-machine-util.c b/src/test/test-machine-util.c new file mode 100644 index 0000000000000..8774be2189228 --- /dev/null +++ b/src/test/test-machine-util.c @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "machine-util.h" +#include "tests.h" + +TEST(bind_volume_parse_minimal) { + _cleanup_(bind_volume_freep) BindVolume *v = NULL; + + ASSERT_OK(bind_volume_parse("block:/dev/sda", &v)); + ASSERT_STREQ(v->provider, "block"); + ASSERT_STREQ(v->volume, "/dev/sda"); + ASSERT_NULL(v->config); + ASSERT_NULL(v->template); + ASSERT_EQ(v->create_mode, _CREATE_MODE_INVALID); + ASSERT_EQ(v->request_as, _VOLUME_TYPE_INVALID); + ASSERT_EQ(v->read_only, _READ_ONLY_INVALID); + ASSERT_EQ(v->create_size_bytes, UINT64_MAX); +} + +TEST(bind_volume_parse_with_config) { + _cleanup_(bind_volume_freep) BindVolume *v = NULL; + + ASSERT_OK(bind_volume_parse("block:/dev/sda:virtio-scsi", &v)); + ASSERT_STREQ(v->provider, "block"); + ASSERT_STREQ(v->volume, "/dev/sda"); + ASSERT_STREQ(v->config, "virtio-scsi"); +} + +TEST(bind_volume_parse_empty_config) { + _cleanup_(bind_volume_freep) BindVolume *v = NULL; + + ASSERT_OK(bind_volume_parse("fs:vol-1::create=new,size=64M,template=sparse-file", &v)); + ASSERT_STREQ(v->provider, "fs"); + ASSERT_STREQ(v->volume, "vol-1"); + ASSERT_NULL(v->config); + ASSERT_EQ(v->create_mode, CREATE_NEW); + ASSERT_STREQ(v->template, "sparse-file"); + ASSERT_EQ(v->create_size_bytes, UINT64_C(64) * 1024 * 1024); +} + +TEST(bind_volume_parse_full) { + _cleanup_(bind_volume_freep) BindVolume *v = NULL; + + ASSERT_OK(bind_volume_parse( + "fs:vol-2:nvme:create=any,template=allocated-file,size=128M,ro=auto,request-as=blk", + &v)); + ASSERT_STREQ(v->provider, "fs"); + ASSERT_STREQ(v->volume, "vol-2"); + ASSERT_STREQ(v->config, "nvme"); + ASSERT_EQ(v->create_mode, CREATE_ANY); + ASSERT_STREQ(v->template, "allocated-file"); + ASSERT_EQ(v->request_as, VOLUME_BLK); + ASSERT_EQ(v->create_size_bytes, UINT64_C(128) * 1024 * 1024); + ASSERT_EQ(v->read_only, READ_ONLY_AUTO); +} + +TEST(bind_volume_parse_read_only) { + _cleanup_(bind_volume_freep) BindVolume *v = NULL; + + ASSERT_OK(bind_volume_parse("block:/dev/sdb:scsi-cd:read-only=yes", &v)); + ASSERT_EQ(v->read_only, READ_ONLY_YES); + + v = bind_volume_free(v); + ASSERT_OK(bind_volume_parse("block:/dev/sdb:scsi-cd:ro=no", &v)); + ASSERT_EQ(v->read_only, READ_ONLY_NO); +} + +TEST(bind_volume_parse_invalid) { + BindVolume *v = NULL; + + /* Missing provider */ + ASSERT_ERROR(bind_volume_parse(":vol", &v), EINVAL); + ASSERT_NULL(v); + + /* Missing volume */ + ASSERT_ERROR(bind_volume_parse("block:", &v), EINVAL); + ASSERT_NULL(v); + + /* Provider with control char */ + ASSERT_ERROR(bind_volume_parse("bl\x01ock:vol", &v), EINVAL); + ASSERT_NULL(v); + + /* Config with control char */ + ASSERT_ERROR(bind_volume_parse("block:vol:nv\x01me", &v), EINVAL); + ASSERT_NULL(v); + + /* Unknown extras key */ + ASSERT_ERROR(bind_volume_parse("block:vol::bogus=foo", &v), EINVAL); + ASSERT_NULL(v); + + /* Bogus create mode */ + ASSERT_ERROR(bind_volume_parse("block:vol::create=bogus", &v), EINVAL); + ASSERT_NULL(v); + + /* Bogus request-as */ + ASSERT_ERROR(bind_volume_parse("block:vol::request-as=bogus", &v), EINVAL); + ASSERT_NULL(v); + + /* Extras entry without '=' */ + ASSERT_ERROR(bind_volume_parse("block:vol::nokey", &v), EINVAL); + ASSERT_NULL(v); + + /* Empty key (=value with no key) */ + ASSERT_ERROR(bind_volume_parse("block:vol::=value", &v), EINVAL); + ASSERT_NULL(v); + + /* Duplicate key */ + ASSERT_ERROR(bind_volume_parse("block:vol::create=new,create=any", &v), EINVAL); + ASSERT_NULL(v); + + /* Aliased duplicate (size / create-size) */ + ASSERT_ERROR(bind_volume_parse("block:vol::size=64M,create-size=128M", &v), EINVAL); + ASSERT_NULL(v); + + /* Zero-byte size */ + ASSERT_ERROR(bind_volume_parse("block:vol::size=0", &v), EINVAL); + ASSERT_NULL(v); + + /* Duplicate read-only with explicit yes/no values */ + ASSERT_ERROR(bind_volume_parse("block:vol::read-only=yes,read-only=no", &v), EINVAL); + ASSERT_NULL(v); + ASSERT_ERROR(bind_volume_parse("block:vol::read-only=yes,ro=auto", &v), EINVAL); + ASSERT_NULL(v); +} + +TEST(machine_storage_name_split) { + _cleanup_free_ char *p = NULL, *v = NULL; + + ASSERT_OK(machine_storage_name_split("block:/dev/sda", &p, &v)); + ASSERT_STREQ(p, "block"); + ASSERT_STREQ(v, "/dev/sda"); + + /* NULL outputs — validate-only mode */ + ASSERT_OK(machine_storage_name_split("fs:vol-1", NULL, NULL)); + + ASSERT_ERROR(machine_storage_name_split(NULL, NULL, NULL), EINVAL); + ASSERT_ERROR(machine_storage_name_split("", NULL, NULL), EINVAL); + ASSERT_ERROR(machine_storage_name_split("no-colon", NULL, NULL), EINVAL); + ASSERT_ERROR(machine_storage_name_split(":vol", NULL, NULL), EINVAL); + ASSERT_ERROR(machine_storage_name_split("block:", NULL, NULL), EINVAL); + ASSERT_ERROR(machine_storage_name_split("bl\x01ock:vol", NULL, NULL), EINVAL); +} + +DEFINE_TEST_MAIN(LOG_INFO); From fad897b8bf5f940f0ba04c4f3742f16d2bb05390 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:31:21 +0200 Subject: [PATCH 156/242] shared: add storage_acquire_volume() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit storagectl's mount.storage helper bundles "open StorageProvider socket + Acquire() + dispatch reply + take fd" inline. Future consumers (systemd-vmspawn boot-time --bind-volume, machinectl bind-volume) need the same dance. Factor it into a single libshared helper that takes the Acquire() parameters by value and returns the fd plus the actual type/read-only flags. Library code, so no logging — varlink errors are surfaced via sd_varlink_error_to_errno() and the StorageProvider error_id is returned to the caller via reterr_error_id (caller decides how to format messages). Signed-off-by: Christian Brauner (Amutable) --- src/libsystemd/sd-json/json-util.h | 2 + src/shared/storage-util.c | 107 +++++++++++++++++++++++++++++ src/shared/storage-util.h | 29 ++++++++ 3 files changed, 138 insertions(+) diff --git a/src/libsystemd/sd-json/json-util.h b/src/libsystemd/sd-json/json-util.h index cea2d368b43db..0db1e445e62ac 100644 --- a/src/libsystemd/sd-json/json-util.h +++ b/src/libsystemd/sd-json/json-util.h @@ -268,6 +268,8 @@ enum { SD_JSON_BUILD_PAIR_CONDITION(condition, name, SD_JSON_BUILD_UNSIGNED(value)) #define JSON_BUILD_PAIR_CONDITION_BOOLEAN(condition, name, value) \ SD_JSON_BUILD_PAIR_CONDITION(condition, name, SD_JSON_BUILD_BOOLEAN(value)) +#define JSON_BUILD_PAIR_CONDITION_STRING(condition, name, value) \ + SD_JSON_BUILD_PAIR_CONDITION(condition, name, SD_JSON_BUILD_STRING(value)) #define JSON_BUILD_PAIR_CONDITION_STRV(condition, name, value) \ SD_JSON_BUILD_PAIR_CONDITION(condition, name, SD_JSON_BUILD_STRV(value)) diff --git a/src/shared/storage-util.c b/src/shared/storage-util.c index 793946c03a63e..3a3c1e6c57d3f 100644 --- a/src/shared/storage-util.c +++ b/src/shared/storage-util.c @@ -1,6 +1,14 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "sd-varlink.h" + +#include "alloc-util.h" +#include "fd-util.h" #include "json-util.h" +#include "machine-util.h" +#include "path-lookup.h" +#include "path-util.h" +#include "runtime-scope.h" #include "string-table.h" #include "storage-util.h" @@ -21,3 +29,102 @@ DEFINE_STRING_TABLE_LOOKUP(create_mode, CreateMode); JSON_DISPATCH_ENUM_DEFINE(json_dispatch_volume_type, VolumeType, volume_type_from_string); JSON_DISPATCH_ENUM_DEFINE(json_dispatch_create_mode, CreateMode, create_mode_from_string); + +void storage_acquire_reply_done(StorageAcquireReply *reply) { + if (!reply) + return; + + reply->fd = safe_close(reply->fd); +} + +int storage_acquire_volume( + RuntimeScope scope, + const BindVolume *bv, + bool allow_interactive_auth, + char **reterr_error_id, + StorageAcquireReply *ret) { + + int r; + + assert(bv); + assert(bv->provider); + assert(bv->volume); + assert(ret); + + /* Defense-in-depth: this is a libshared helper that may grow new callers; reject + * provider names that could escape the StorageProvider runtime directory. */ + if (!storage_provider_name_is_valid(bv->provider)) + return -EINVAL; + + _cleanup_free_ char *socket_path = NULL; + r = runtime_directory_generic(scope, "systemd/io.systemd.StorageProvider", &socket_path); + if (r < 0) + return r; + + if (!path_extend(&socket_path, bv->provider)) + return -ENOMEM; + + _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; + r = sd_varlink_connect_address(&link, socket_path); + if (r < 0) + return r; + + r = sd_varlink_set_allow_fd_passing_input(link, true); + if (r < 0) + return r; + + sd_json_variant *mreply = NULL; + const char *merror_id = NULL; + r = sd_varlink_callbo( + link, + "io.systemd.StorageProvider.Acquire", + &mreply, + &merror_id, + SD_JSON_BUILD_PAIR_STRING("name", bv->volume), + JSON_BUILD_PAIR_CONDITION_STRING(bv->create_mode >= 0, "createMode", create_mode_to_string(bv->create_mode)), + JSON_BUILD_PAIR_STRING_NON_EMPTY("template", bv->template), + JSON_BUILD_PAIR_TRISTATE_NON_NULL("readOnly", read_only_mode_to_tristate(bv->read_only)), + JSON_BUILD_PAIR_CONDITION_STRING(bv->request_as >= 0, "requestAs", volume_type_to_string(bv->request_as)), + JSON_BUILD_PAIR_UNSIGNED_NOT_EQUAL("createSizeBytes", bv->create_size_bytes, UINT64_MAX), + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", allow_interactive_auth)); + if (r < 0) + return r; + + if (merror_id) { + if (reterr_error_id) { + char *copy = strdup(merror_id); + if (!copy) + return -ENOMEM; + *reterr_error_id = copy; + } + + r = sd_varlink_error_to_errno(merror_id, mreply); + return r == -EBADR ? -EPROTO : r; + } + + /* tmp.fd holds the JSON fd index until sd_varlink_take_fd() swaps it for the real fd. */ + StorageAcquireReply tmp = STORAGE_ACQUIRE_REPLY_INIT; + + static const sd_json_dispatch_field dispatch_table[] = { + { "fileDescriptorIndex", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, voffsetof(StorageAcquireReply, fd), SD_JSON_MANDATORY }, + { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_tristate, voffsetof(StorageAcquireReply, read_only), 0 }, + { "type", SD_JSON_VARIANT_STRING, json_dispatch_volume_type, voffsetof(StorageAcquireReply, type), SD_JSON_MANDATORY }, + { "baseUID", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uid_gid, voffsetof(StorageAcquireReply, base_uid), 0 }, + { "baseGID", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uid_gid, voffsetof(StorageAcquireReply, base_gid), 0 }, + {} + }; + + r = sd_json_dispatch(mreply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &tmp); + if (r < 0) + return r; + if (tmp.fd < 0) + return -EBADMSG; + + _cleanup_close_ int fd = sd_varlink_take_fd(link, tmp.fd); + if (fd < 0) + return fd; + + tmp.fd = TAKE_FD(fd); + *ret = tmp; + return 0; +} diff --git a/src/shared/storage-util.h b/src/shared/storage-util.h index f7a62aeec0835..b37515bedbd43 100644 --- a/src/shared/storage-util.h +++ b/src/shared/storage-util.h @@ -41,3 +41,32 @@ static inline bool storage_template_name_is_valid(const char *n) { static inline bool storage_provider_name_is_valid(const char *n) { return string_is_safe(n, STRING_FILENAME); } + +typedef struct StorageAcquireReply { + int fd; + VolumeType type; + int read_only; + uid_t base_uid; + gid_t base_gid; +} StorageAcquireReply; + +#define STORAGE_ACQUIRE_REPLY_INIT \ + (StorageAcquireReply) { \ + .fd = -EBADF, \ + .type = _VOLUME_TYPE_INVALID, \ + .read_only = -1, \ + .base_uid = UID_INVALID, \ + .base_gid = GID_INVALID, \ + } + +void storage_acquire_reply_done(StorageAcquireReply *reply); + +/* On varlink failure, reterr_error_id (if non-NULL) is set to the io.systemd.StorageProvider.* + * error name. The reply is untouched on any error. */ +typedef struct BindVolume BindVolume; +int storage_acquire_volume( + RuntimeScope scope, + const BindVolume *bv, + bool allow_interactive_auth, + char **reterr_error_id, + StorageAcquireReply *ret); From a0faa6a798ae4364726a5676dd5455decad2e7d1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:32:01 +0200 Subject: [PATCH 157/242] storagectl: refactor mount.storage helper to use storage_acquire_volume() Drop the inline socket-build + sd_varlink_callbo() + reply-dispatch + take_fd block from run_as_mount_helper() in favour of the shared helper. Preserves the type-fallback retry (TypeNotSupported / WrongType re-tries with requestAs="blk") and the per-error-id message mapping; the helper just reports the io.systemd.StorageProvider.* error name back to the caller. Net effect: ~50 lines of dedup, no functional change. Signed-off-by: Christian Brauner (Amutable) --- src/storage/storagectl.c | 224 +++++++++++++-------------------------- 1 file changed, 76 insertions(+), 148 deletions(-) diff --git a/src/storage/storagectl.c b/src/storage/storagectl.c index 23d91d6ba12ee..bbe09e01b1d70 100644 --- a/src/storage/storagectl.c +++ b/src/storage/storagectl.c @@ -19,7 +19,7 @@ #include "format-table.h" #include "format-util.h" #include "help-util.h" -#include "json-util.h" +#include "machine-util.h" #include "main-func.h" #include "mount-util.h" #include "namespace-util.h" @@ -524,7 +524,7 @@ static int run_as_mount_helper(int argc, char *argv[]) { _cleanup_free_ char *filtered = NULL, *template = NULL; CreateMode create_mode = _CREATE_MODE_INVALID; uint64_t create_size = UINT64_MAX; - int read_only = -1; + ReadOnlyMode read_only = _READ_ONLY_INVALID; for (const char *p = options;;) { _cleanup_free_ char *word = NULL; @@ -555,9 +555,9 @@ static int run_as_mount_helper(int argc, char *argv[]) { } else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown mount option '%s', refusing.", word); } else if (streq(word, "ro")) - read_only = true; + read_only = READ_ONLY_YES; else if (streq(word, "rw")) - read_only = false; + read_only = READ_ONLY_NO; else if (!strextend_with_separator(&filtered, ",", word)) return log_oom(); } @@ -565,141 +565,69 @@ static int run_as_mount_helper(int argc, char *argv[]) { if (fake) return 0; - _cleanup_free_ char *socket_path = NULL; - r = runtime_directory_generic(arg_runtime_scope, "systemd/io.systemd.StorageProvider", &socket_path); - if (r < 0) - return log_error_errno(r, "Failed to determine socket directory: %m"); - - if (!path_extend(&socket_path, provider)) - return log_oom(); - - _cleanup_(sd_varlink_unrefp) sd_varlink *link = NULL; - r = sd_varlink_connect_address(&link, socket_path); - if (r < 0) - return log_error_errno(r, "Failed to connect to '%s': %m", socket_path); - - r = sd_varlink_set_allow_fd_passing_input(link, true); - if (r < 0) - return log_error_errno(r, "Failed to enable file descriptor passing: %m"); - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - sd_json_variant *mreply = NULL; - const char *merror_id = NULL, *vtype = fstype ? "reg" : "dir"; - r = sd_varlink_callbo( - link, - "io.systemd.StorageProvider.Acquire", - &mreply, - &merror_id, - SD_JSON_BUILD_PAIR_STRING("name", name), - SD_JSON_BUILD_PAIR_CONDITION(create_mode >= 0, "createMode", SD_JSON_BUILD_STRING(create_mode_to_string(create_mode))), - JSON_BUILD_PAIR_STRING_NON_EMPTY("template", template), - SD_JSON_BUILD_PAIR_CONDITION(read_only >= 0, "readOnly", SD_JSON_BUILD_BOOLEAN(read_only)), - SD_JSON_BUILD_PAIR_STRING("requestAs", vtype), - SD_JSON_BUILD_PAIR_CONDITION(create_size != UINT64_MAX, "createSizeBytes", SD_JSON_BUILD_UNSIGNED(create_size)), - SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); - if (r < 0) - return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %m"); - _cleanup_(sd_json_variant_unrefp) sd_json_variant *reply = sd_json_variant_ref(mreply); - if (merror_id) { - /* Copy out the error ID, as the follow-up call will invalidate it */ - _cleanup_free_ char *error_id = strdup(merror_id); - if (!error_id) - return log_oom(); - - /* Hmm, the type might not have been right for the backend or the volume? then try - * again, and switch from "reg" to "blk", maybe it works then. (We keep the original - * reply referenced, since we prefer generating an error for the first error.) */ - if (streq(vtype, "reg") && STR_IN_SET(error_id, - "io.systemd.StorageProvider.TypeNotSupported", - "io.systemd.StorageProvider.WrongType")) { - - sd_json_variant *freply = NULL; - const char *ferror_id = NULL; - r = sd_varlink_callbo( - link, - "io.systemd.StorageProvider.Acquire", - &freply, - &ferror_id, - SD_JSON_BUILD_PAIR_STRING("name", name), - SD_JSON_BUILD_PAIR_CONDITION(create_mode >= 0, "createMode", SD_JSON_BUILD_STRING(create_mode_to_string(create_mode))), - JSON_BUILD_PAIR_STRING_NON_EMPTY("template", template), - SD_JSON_BUILD_PAIR_CONDITION(read_only >= 0, "readOnly", SD_JSON_BUILD_BOOLEAN(read_only)), - SD_JSON_BUILD_PAIR_STRING("requestAs", "blk"), - SD_JSON_BUILD_PAIR_CONDITION(create_size != UINT64_MAX, "createSizeBytes", SD_JSON_BUILD_UNSIGNED(create_size)), - SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); - if (r < 0) - return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %m"); - if (!ferror_id) { - /* The 2nd call worked? then let's forget about the first failure */ - sd_json_variant_unref(reply); - reply = sd_json_variant_ref(freply); - error_id = mfree(error_id); - } - - /* NB: if both fail we show the Varlink error of the first call here, i.e. of the preferred type */ - } - - if (error_id) { - if (streq(error_id, "io.systemd.StorageProvider.NoSuchVolume")) - return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Volume '%s' not known.", name); - if (streq(error_id, "io.systemd.StorageProvider.NoSuchTemplate")) - return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Template '%s' not known.", template); - if (streq(error_id, "io.systemd.StorageProvider.VolumeExists")) - return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Volume '%s' exists already.", name); - if (streq(error_id, "io.systemd.StorageProvider.TypeNotSupported")) - return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Storage provider does not support the specified volume type '%s'.", vtype); - if (streq(error_id, "io.systemd.StorageProvider.WrongType")) - return log_error_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Volume '%s' is not of type '%s'.", name, vtype); - if (streq(error_id, "io.systemd.StorageProvider.CreateNotSupported")) - return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Storage provider does not support creating volumes."); - if (streq(error_id, "io.systemd.StorageProvider.CreateSizeRequired")) - return log_error_errno(SYNTHETIC_ERRNO(ENODATA), "Storage provider requires a create size to be provided when creating volumes on-the-fly. Use 'storage.create-size=' mount option."); - if (streq(error_id, "io.systemd.StorageProvider.ReadOnlyVolume")) - return log_error_errno(SYNTHETIC_ERRNO(EROFS), "Volume '%s' is read-only.", name); - if (streq(error_id, "io.systemd.StorageProvider.BadTemplate")) - return log_error_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Template does not apply to this volume type."); - - r = sd_varlink_error_to_errno(error_id, reply); /* If this is a system errno style error, output it with %m */ - if (r != -EBADR) - return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %m"); - - return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %s", error_id); + VolumeType requested_type = fstype ? VOLUME_REG : VOLUME_DIR; + + BindVolume bv = BIND_VOLUME_INIT; + bv.provider = provider; + bv.volume = name; + bv.create_mode = create_mode; + bv.template = template; + bv.read_only = read_only; + bv.request_as = requested_type; + bv.create_size_bytes = create_size; + + _cleanup_(storage_acquire_reply_done) StorageAcquireReply reply = STORAGE_ACQUIRE_REPLY_INIT; + _cleanup_free_ char *acquire_error_id = NULL; + r = storage_acquire_volume(arg_runtime_scope, &bv, arg_ask_password, &acquire_error_id, &reply); + if (r < 0 && fstype && + STR_IN_SET(strna(acquire_error_id), + "io.systemd.StorageProvider.TypeNotSupported", + "io.systemd.StorageProvider.WrongType")) { + _cleanup_(storage_acquire_reply_done) StorageAcquireReply retry = STORAGE_ACQUIRE_REPLY_INIT; + assert(bv.request_as == VOLUME_REG); + bv.request_as = VOLUME_BLK; + int k = storage_acquire_volume(arg_runtime_scope, &bv, arg_ask_password, /* reterr_error_id= */ NULL, &retry); + if (k >= 0) { + storage_acquire_reply_done(&reply); + reply = retry; + retry = STORAGE_ACQUIRE_REPLY_INIT; + acquire_error_id = mfree(acquire_error_id); + requested_type = VOLUME_BLK; + r = 0; } } - struct { - unsigned fd_idx; - int read_only; - const char *type; - uid_t base_uid; - gid_t base_gid; - } p = { - .fd_idx = UINT_MAX, - .read_only = -1, - .base_uid = UID_INVALID, - .base_gid = GID_INVALID, - }; - - static const sd_json_dispatch_field dispatch_table[] = { - { "fileDescriptorIndex", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, voffsetof(p, fd_idx), SD_JSON_MANDATORY }, - { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_tristate, voffsetof(p, read_only), 0 }, - { "type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, type), SD_JSON_MANDATORY }, - { "baseUID", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uid_gid, voffsetof(p, base_uid), 0 }, - { "baseGID", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uid_gid, voffsetof(p, base_gid), 0 }, - {} - }; - - r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &p); - if (r < 0) - return log_error_errno(r, "Failed to decode Acquire() reply: %m"); - - _cleanup_close_ int fd = sd_varlink_take_fd(link, p.fd_idx); - if (fd < 0) - return log_error_errno(fd, "Failed to acquire fd from Varlink connection: %m"); + if (r < 0) { + const char *eid = acquire_error_id; + + if (streq_ptr(eid, "io.systemd.StorageProvider.NoSuchVolume")) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Volume '%s' not known.", name); + if (streq_ptr(eid, "io.systemd.StorageProvider.NoSuchTemplate")) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Template '%s' not known.", template); + if (streq_ptr(eid, "io.systemd.StorageProvider.VolumeExists")) + return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Volume '%s' exists already.", name); + if (streq_ptr(eid, "io.systemd.StorageProvider.TypeNotSupported")) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Storage provider does not support the specified volume type '%s'.", volume_type_to_string(requested_type)); + if (streq_ptr(eid, "io.systemd.StorageProvider.WrongType")) + return log_error_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Volume '%s' is not of type '%s'.", name, volume_type_to_string(requested_type)); + if (streq_ptr(eid, "io.systemd.StorageProvider.CreateNotSupported")) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Storage provider does not support creating volumes."); + if (streq_ptr(eid, "io.systemd.StorageProvider.CreateSizeRequired")) + return log_error_errno(SYNTHETIC_ERRNO(ENODATA), "Storage provider requires a create size to be provided when creating volumes on-the-fly. Use 'storage.create-size=' mount option."); + if (streq_ptr(eid, "io.systemd.StorageProvider.ReadOnlyVolume")) + return log_error_errno(SYNTHETIC_ERRNO(EROFS), "Volume '%s' is read-only.", name); + if (streq_ptr(eid, "io.systemd.StorageProvider.BadTemplate")) + return log_error_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Template does not apply to this volume type."); + + if (eid) + return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call (%s): %m", eid); + return log_error_errno(r, "Failed to issue io.systemd.StorageProvider.Acquire() varlink call: %m"); + } struct stat st; - if (fstat(fd, &st) < 0) + if (fstat(reply.fd, &st) < 0) return log_error_errno(errno, "Failed to stat returned file descriptor: %m"); _cleanup_strv_free_ char **cmdline = strv_new("mount", "-c"); @@ -707,7 +635,7 @@ static int run_as_mount_helper(int argc, char *argv[]) { return log_oom(); if (fstype) { - if (!STR_IN_SET(p.type, "reg", "blk")) + if (!IN_SET(reply.type, VOLUME_REG, VOLUME_BLK)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mounting as file system type '%s' requested, but volume is not a block device or regular file.", fstype); r = stat_verify_regular_or_block(&st); @@ -717,31 +645,31 @@ static int run_as_mount_helper(int argc, char *argv[]) { if (strv_extend_strv(&cmdline, STRV_MAKE("-t", fstype), /* filter_duplicates= */ false) < 0) return log_oom(); } else { - if (!streq(p.type, "dir")) + if (reply.type != VOLUME_DIR) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mount as directory requested, but volume is not a directory."); - if (!uid_is_valid(p.base_uid) || !gid_is_valid(p.base_gid)) + if (!uid_is_valid(reply.base_uid) || !gid_is_valid(reply.base_gid)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Provider did not report base UID/GID, cannot mount."); - if (p.base_uid > UINT32_MAX - USERNS_RANGE_SIZE || - p.base_gid > UINT32_MAX - USERNS_RANGE_SIZE) + if (reply.base_uid > UINT32_MAX - USERNS_RANGE_SIZE || + reply.base_gid > UINT32_MAX - USERNS_RANGE_SIZE) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Returned base UID/GID out of range."); r = stat_verify_directory(&st); if (r < 0) return log_error_errno(r, "File descriptor for directory volume is not a directory inode: %m"); - if (st.st_uid < p.base_uid || st.st_uid >= p.base_uid + USERNS_RANGE_SIZE || - st.st_gid < p.base_gid || st.st_gid >= p.base_gid + USERNS_RANGE_SIZE) + if (st.st_uid < reply.base_uid || st.st_uid >= reply.base_uid + USERNS_RANGE_SIZE || + st.st_gid < reply.base_gid || st.st_gid >= reply.base_gid + USERNS_RANGE_SIZE) return log_error_errno(SYNTHETIC_ERRNO(EPERM), "File descriptor for directory volume is not owned by base UID/GID range, refusing."); /* Now move the mount into our own UID/GID range */ _cleanup_free_ char *uid_line = asprintf_safe( UID_FMT " " UID_FMT " " UID_FMT "\n", - p.base_uid, (uid_t) 0, USERNS_RANGE_SIZE); + reply.base_uid, (uid_t) 0, USERNS_RANGE_SIZE); _cleanup_free_ char *gid_line = asprintf_safe( GID_FMT " " GID_FMT " " GID_FMT "\n", - p.base_gid, (gid_t) 0, USERNS_RANGE_SIZE); + reply.base_gid, (gid_t) 0, USERNS_RANGE_SIZE); if (!uid_line || !gid_line) return log_oom(); @@ -750,7 +678,7 @@ static int run_as_mount_helper(int argc, char *argv[]) { return log_error_errno(userns_fd, "Failed to acquire new user namespace: %m"); _cleanup_close_ int remapped_fd = open_tree_attr_with_fallback( - fd, + reply.fd, /* path= */ NULL, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC, &(struct mount_attr) { @@ -760,25 +688,25 @@ static int run_as_mount_helper(int argc, char *argv[]) { if (remapped_fd < 0) return log_error_errno(remapped_fd, "Failed to set ID mapping on returned mount: %m"); - close_and_replace(fd, remapped_fd); + close_and_replace(reply.fd, remapped_fd); if (strv_extend(&cmdline, "--bind") < 0) return log_oom(); } - if (p.read_only > 0) - read_only = true; + if (reply.read_only > 0) + read_only = READ_ONLY_YES; - if (!strextend_with_separator(&filtered, ",", read_only > 0 ? "ro" : "rw")) + if (!strextend_with_separator(&filtered, ",", read_only == READ_ONLY_YES ? "ro" : "rw")) return log_oom(); if (strv_extend_strv(&cmdline, STRV_MAKE("-o", filtered), /* filter_duplicates= */ false) < 0) return log_oom(); - if (strv_extend_strv(&cmdline, STRV_MAKE(FORMAT_PROC_FD_PATH(fd), path), /* filter_duplicates= */ false) < 0) + if (strv_extend_strv(&cmdline, STRV_MAKE(FORMAT_PROC_FD_PATH(reply.fd), path), /* filter_duplicates= */ false) < 0) return log_oom(); - r = fd_cloexec(fd, false); + r = fd_cloexec(reply.fd, false); if (r < 0) return log_error_errno(r, "Failed to disable O_CLOEXEC for mount fd: %m"); From aa02ad284e6d98fdd594c0f4e4d1765f8b4574d4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:32:58 +0200 Subject: [PATCH 158/242] shared: add AddStorage / RemoveStorage to io.systemd.MachineInstance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define two new methods on the generic 'MachineInstance' Varlink interface that systemd-vmspawn (this series) and (future) systemd-nspawn implement on their per-machine control sockets: AddStorage(fileDescriptorIndex, name, config?) -> () Attach a storage volume — the caller passes an fd previously acquired from a StorageProvider, plus a unique name of the form ':' that identifies this binding for later removal, plus a backend-specific 'config' field (vmspawn: guest device type; future nspawn: mount path). RemoveStorage(name) -> () Detach a previously-added storage volume. Plus errors NoSuchStorage, StorageExists, StorageImmutable (the volume was attached at boot and cannot be removed), BadConfig, and ConfigNotSupported. Names follow the io.systemd.StorageProvider vocabulary (NoSuchVolume, BadTemplate, TypeNotSupported, etc.) so the two interfaces are visually consistent. Signed-off-by: Christian Brauner (Amutable) --- .../varlink-io.systemd.MachineInstance.c | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/shared/varlink-io.systemd.MachineInstance.c b/src/shared/varlink-io.systemd.MachineInstance.c index 365b6f5f9e1af..6630402709870 100644 --- a/src/shared/varlink-io.systemd.MachineInstance.c +++ b/src/shared/varlink-io.systemd.MachineInstance.c @@ -25,8 +25,27 @@ static SD_VARLINK_DEFINE_METHOD_FULL( SD_VARLINK_FIELD_COMMENT("Event-specific payload"), SD_VARLINK_DEFINE_OUTPUT(data, SD_VARLINK_OBJECT, SD_VARLINK_NULLABLE)); +static SD_VARLINK_DEFINE_METHOD( + AddStorage, + SD_VARLINK_FIELD_COMMENT("Index of the attached file descriptor for the storage volume"), + SD_VARLINK_DEFINE_INPUT(fileDescriptorIndex, SD_VARLINK_INT, 0), + SD_VARLINK_FIELD_COMMENT("Unique storage name of the form ':' identifying this binding for later removal"), + SD_VARLINK_DEFINE_INPUT(name, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("Backend-specific configuration"), + SD_VARLINK_DEFINE_INPUT(config, SD_VARLINK_STRING, SD_VARLINK_NULLABLE)); + +static SD_VARLINK_DEFINE_METHOD( + RemoveStorage, + SD_VARLINK_FIELD_COMMENT("Unique storage name ':' to detach"), + SD_VARLINK_DEFINE_INPUT(name, SD_VARLINK_STRING, 0)); + static SD_VARLINK_DEFINE_ERROR(NotConnected); static SD_VARLINK_DEFINE_ERROR(NotSupported); +static SD_VARLINK_DEFINE_ERROR(NoSuchStorage); +static SD_VARLINK_DEFINE_ERROR(StorageExists); +static SD_VARLINK_DEFINE_ERROR(StorageImmutable); +static SD_VARLINK_DEFINE_ERROR(BadConfig); +static SD_VARLINK_DEFINE_ERROR(ConfigNotSupported); SD_VARLINK_DEFINE_INTERFACE( io_systemd_MachineInstance, @@ -45,7 +64,21 @@ SD_VARLINK_DEFINE_INTERFACE( &vl_method_Describe, SD_VARLINK_SYMBOL_COMMENT("Subscribe to machine events. Returns a stream of events as they occur."), &vl_method_SubscribeEvents, + SD_VARLINK_SYMBOL_COMMENT("Attach a storage volume (passed via file descriptor) to the running machine"), + &vl_method_AddStorage, + SD_VARLINK_SYMBOL_COMMENT("Detach a previously-attached storage volume from the running machine"), + &vl_method_RemoveStorage, SD_VARLINK_SYMBOL_COMMENT("The connection to the machine backend is not available"), &vl_error_NotConnected, SD_VARLINK_SYMBOL_COMMENT("The requested operation is not supported"), - &vl_error_NotSupported); + &vl_error_NotSupported, + SD_VARLINK_SYMBOL_COMMENT("The named storage binding does not exist"), + &vl_error_NoSuchStorage, + SD_VARLINK_SYMBOL_COMMENT("A storage binding with this name already exists"), + &vl_error_StorageExists, + SD_VARLINK_SYMBOL_COMMENT("The storage binding cannot be detached at runtime (e.g. attached at boot)"), + &vl_error_StorageImmutable, + SD_VARLINK_SYMBOL_COMMENT("The supplied 'config' value is not valid for this backend"), + &vl_error_BadConfig, + SD_VARLINK_SYMBOL_COMMENT("The supplied 'config' value is recognized but not supported by this backend"), + &vl_error_ConfigNotSupported); From 0e911e41649d46ff529522ba2ed036e779c13098 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:34:06 +0200 Subject: [PATCH 159/242] vmspawn: track removability as a QmpDriveFlags bit and expose add_block_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drives attached at boot via the existing CLI options (--image, --extra-drive) must not be detachable at runtime via the upcoming RemoveStorage Varlink method, while drives added at runtime via AddStorage must be. Track this distinction with a new QMP_DRIVE_REMOVABLE property flag — placed alongside QMP_DRIVE_BLOCK_DEVICE, not in the transient BlockDeviceStateFlags state-machine, since "may be removed" is a permanent property of the drive. vmspawn_qmp_remove_block_device() now early-rejects unknown ids with io.systemd.MachineInstance.NoSuchStorage and immutable drives with io.systemd.MachineInstance.StorageImmutable. vmspawn_qmp_add_block_device() loses its 'static' qualifier and gets a declaration in the header, so the runtime hotplug path (vmspawn-bind-volume.c, next) can dispatch into it directly. Signed-off-by: Christian Brauner (Amutable) --- src/vmspawn/vmspawn-qmp.c | 9 +++++---- src/vmspawn/vmspawn-qmp.h | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/vmspawn/vmspawn-qmp.c b/src/vmspawn/vmspawn-qmp.c index d09576213a163..52d9e11b9f89e 100644 --- a/src/vmspawn/vmspawn-qmp.c +++ b/src/vmspawn/vmspawn-qmp.c @@ -826,7 +826,7 @@ static int on_add_device_add_complete( return 0; if (d->link) { - (void) sd_varlink_replybo(d->link, SD_JSON_BUILD_PAIR_STRING("id", d->id)); + (void) sd_varlink_reply(d->link, NULL); d->link = sd_varlink_unref(d->link); } @@ -882,7 +882,7 @@ static int qmp_setup_scsi_controller(VmspawnQmpBridge *bridge, const char *pcie_ return 0; } -static int vmspawn_qmp_add_block_device(VmspawnQmpBridge *bridge, DriveInfo *drive) { +int vmspawn_qmp_add_block_device(VmspawnQmpBridge *bridge, DriveInfo *drive) { int r; assert(bridge); @@ -989,7 +989,6 @@ static int qmp_setup_regular_drive(VmspawnQmpBridge *bridge, DriveInfo *drive) { assert(bridge); assert(drive); assert(drive->fd >= 0); - assert(!drive->id); return vmspawn_qmp_add_block_device(bridge, drive); } @@ -1028,7 +1027,9 @@ int vmspawn_qmp_remove_block_device(VmspawnQmpBridge *bridge, sd_varlink *link, DriveInfo *drive = hashmap_get(bridge->block_devices, id); if (!drive) - return reply_qmp_error(link, "Unknown block device id", -ENOENT); + return sd_varlink_error(link, "io.systemd.MachineInstance.NoSuchStorage", NULL); + if (!FLAGS_SET(drive->flags, QMP_DRIVE_REMOVABLE)) + return sd_varlink_error(link, "io.systemd.MachineInstance.StorageImmutable", NULL); if (!FLAGS_SET(drive->state, BLOCK_DEVICE_STATE_BLOCKDEV_ADDED)) return reply_qmp_error(link, "Block device add pending", -EBUSY); if (FLAGS_SET(drive->state, BLOCK_DEVICE_STATE_REMOVE_PENDING)) diff --git a/src/vmspawn/vmspawn-qmp.h b/src/vmspawn/vmspawn-qmp.h index d8403520c9afe..a2f929732086e 100644 --- a/src/vmspawn/vmspawn-qmp.h +++ b/src/vmspawn/vmspawn-qmp.h @@ -72,6 +72,7 @@ typedef enum QmpDriveFlags { QMP_DRIVE_BOOT = 1u << 4, QMP_DRIVE_IO_URING = 1u << 5, QMP_DRIVE_DISCARD_NO_UNREF = 1u << 6, /* qcow2 only */ + QMP_DRIVE_REMOVABLE = 1u << 7, /* may be detached at runtime via RemoveStorage */ } QmpDriveFlags; typedef enum BlockDeviceStateFlags { @@ -177,5 +178,6 @@ int vmspawn_qmp_setup_drives(VmspawnQmpBridge *bridge, DriveInfos *drives); int vmspawn_qmp_setup_network(VmspawnQmpBridge *bridge, NetworkInfo *network); int vmspawn_qmp_setup_virtiofs(VmspawnQmpBridge *bridge, const VirtiofsInfos *virtiofs); int vmspawn_qmp_setup_vsock(VmspawnQmpBridge *bridge, VsockInfo *vsock); +int vmspawn_qmp_add_block_device(VmspawnQmpBridge *bridge, DriveInfo *drive); int vmspawn_qmp_remove_block_device(VmspawnQmpBridge *bridge, sd_varlink *link, const char *id); int vmspawn_qmp_dispatch_device_deleted(VmspawnQmpBridge *bridge, sd_json_variant *data); From a34ad7f7b01ba39f2a7fd4a3b1ba6b1eff49c6f2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:34:54 +0200 Subject: [PATCH 160/242] vmspawn: add vmspawn-bind-volume glue This is vmspawn's per-backend code for the StorageProvider integration. Other backends (future systemd-nspawn, future service-manager BindVolume=) consume the same shared parser and Acquire helper but each provides its own attach/detach glue; this is vmspawn's. - disk_type_from_bind_volume_config() turns the opaque BindVolume 'config' field (e.g. "scsi-cd") into a DiskType. Empty defaults to virtio-blk to match the --bind-volume CLI grammar. - vmspawn_bind_volume_acquire() takes a parsed BindVolume, calls storage_acquire_volume() for the fd, and builds a DriveInfo ready for vmspawn_qmp_setup_drives() (boot) or vmspawn_qmp_add_block_device() (hotplug). Rejects directory-typed volumes (vmspawn block devices need a regular file or a host block device). - vmspawn_bind_volume_attach_fd() is the runtime path: takes a fd that was already pushed across by an AddStorage caller plus the name+config it specified, builds the DriveInfo with QMP_DRIVE_REMOVABLE set and a varlink link, and dispatches to vmspawn_qmp_add_block_device(). Reply is delivered asynchronously by the existing on_add_device_add_complete() callback. - vmspawn_bind_volume_prepare_boot() is a thin loop the boot-time path uses to populate DriveInfos. Signed-off-by: Christian Brauner (Amutable) --- src/vmspawn/meson.build | 1 + src/vmspawn/vmspawn-bind-volume.c | 202 ++++++++++++++++++++++++++++++ src/vmspawn/vmspawn-bind-volume.h | 39 ++++++ 3 files changed, 242 insertions(+) create mode 100644 src/vmspawn/vmspawn-bind-volume.c create mode 100644 src/vmspawn/vmspawn-bind-volume.h diff --git a/src/vmspawn/meson.build b/src/vmspawn/meson.build index 6d08755fedf8b..6bc31c77c692d 100644 --- a/src/vmspawn/meson.build +++ b/src/vmspawn/meson.build @@ -6,6 +6,7 @@ endif vmspawn_sources = files( 'vmspawn.c', + 'vmspawn-bind-volume.c', 'vmspawn-qemu-config.c', 'vmspawn-qmp.c', 'vmspawn-varlink.c', diff --git a/src/vmspawn/vmspawn-bind-volume.c b/src/vmspawn/vmspawn-bind-volume.c new file mode 100644 index 0000000000000..d67fc61de02e5 --- /dev/null +++ b/src/vmspawn/vmspawn-bind-volume.c @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "sd-varlink.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "log.h" +#include "runtime-scope.h" +#include "stat-util.h" +#include "storage-util.h" +#include "string-util.h" +#include "vmspawn-bind-volume.h" +#include "vmspawn-qmp.h" + +DiskType disk_type_from_bind_volume_config(const char *config) { + if (isempty(config)) + return DISK_TYPE_VIRTIO_BLK; + return disk_type_from_string(config); +} + +int vmspawn_bind_volume_acquire( + RuntimeScope scope, + const BindVolume *v, + bool removable, + sd_varlink *link, + DriveInfo **ret, + char **reterr_error_id) { + + _cleanup_(storage_acquire_reply_done) StorageAcquireReply reply = STORAGE_ACQUIRE_REPLY_INIT; + _cleanup_(drive_info_unrefp) DriveInfo *d = NULL; + _cleanup_free_ char *err = NULL; + int r; + + assert(v); + assert(ret); + + DiskType dt = disk_type_from_bind_volume_config(v->config); + if (dt < 0) { + r = dt; + goto fail; + } + + r = storage_acquire_volume(scope, v, /* allow_interactive_auth= */ false, &err, &reply); + if (r < 0) + goto fail; + + if (reply.type == VOLUME_DIR) { + r = log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Directory volumes are not supported for vmspawn block devices."); + goto fail; + } + + struct stat st; + if (fstat(reply.fd, &st) < 0) { + r = -errno; + goto fail; + } + r = stat_verify_regular_or_block(&st); + if (r < 0) + goto fail; + + d = drive_info_new(); + if (!d) { + r = -ENOMEM; + goto fail; + } + + d->id = strjoin(v->provider, ":", v->volume); + d->disk_driver = strdup(ASSERT_PTR(qemu_device_driver_to_string(dt))); + d->format = strdup("raw"); + d->path = strdup(v->volume); + if (!d->id || !d->disk_driver || !d->format || !d->path) { + r = -ENOMEM; + goto fail; + } + + d->disk_type = dt; + d->fd = TAKE_FD(reply.fd); + + if (reply.type == VOLUME_BLK || S_ISBLK(st.st_mode)) + d->flags |= QMP_DRIVE_BLOCK_DEVICE; + if (reply.read_only > 0 || dt == DISK_TYPE_VIRTIO_SCSI_CDROM) + d->flags |= QMP_DRIVE_READ_ONLY; + if (removable) + d->flags |= QMP_DRIVE_REMOVABLE; + d->link = sd_varlink_ref(link); + + *ret = TAKE_PTR(d); + return 0; + +fail: + if (reterr_error_id) + *reterr_error_id = TAKE_PTR(err); + return r; +} + +/* Takes ownership of fd unconditionally — it is closed on every error path too. */ +int vmspawn_bind_volume_attach_fd( + VmspawnQmpBridge *bridge, + sd_varlink *link, + int fd, + const char *name, + const char *config) { + + _cleanup_close_ int owned_fd = fd; + int r; + + assert(bridge); + assert(link); + assert(fd >= 0); + assert(name); + + DiskType dt = disk_type_from_bind_volume_config(config); + if (dt < 0) + return dt; + + struct stat st; + if (fstat(owned_fd, &st) < 0) + return -errno; + r = stat_verify_regular_or_block(&st); + if (r < 0) + return r; + + _cleanup_(drive_info_unrefp) DriveInfo *d = drive_info_new(); + if (!d) + return -ENOMEM; + + d->id = strdup(name); + d->disk_driver = strdup(ASSERT_PTR(qemu_device_driver_to_string(dt))); + d->format = strdup("raw"); + d->path = strdup(name); + if (!d->id || !d->disk_driver || !d->format || !d->path) + return -ENOMEM; + + int oflags = fcntl(owned_fd, F_GETFL); + if (oflags < 0) + return -errno; + + d->disk_type = dt; + d->fd = TAKE_FD(owned_fd); + if (S_ISBLK(st.st_mode)) + d->flags |= QMP_DRIVE_BLOCK_DEVICE; + if (dt == DISK_TYPE_VIRTIO_SCSI_CDROM || (oflags & O_ACCMODE_STRICT) == O_RDONLY) + d->flags |= QMP_DRIVE_READ_ONLY; + d->flags |= QMP_DRIVE_REMOVABLE; + d->link = sd_varlink_ref(link); + + return vmspawn_qmp_add_block_device(bridge, TAKE_PTR(d)); +} + +void bind_volumes_done(BindVolumes *bv) { + assert(bv); + FOREACH_ARRAY(v, bv->items, bv->n_items) + bind_volume_free(*v); + bv->items = mfree(bv->items); + bv->n_items = 0; +} + +int vmspawn_bind_volume_prepare_boot( + RuntimeScope scope, + const BindVolumes *bv, + DriveInfos *drives) { + + int r; + + assert(bv); + assert(drives); + + if (bv->n_items == 0) + return 0; + + if (!GREEDY_REALLOC(drives->drives, drives->n_drives + bv->n_items)) + return log_oom(); + + FOREACH_ARRAY(it, bv->items, bv->n_items) { + BindVolume *v = *it; + _cleanup_(drive_info_unrefp) DriveInfo *d = NULL; + _cleanup_free_ char *error_id = NULL; + + r = vmspawn_bind_volume_acquire( + scope, v, + /* removable= */ false, + /* link= */ NULL, + &d, &error_id); + if (r < 0) { + if (error_id) + return log_error_errno(r, + "Failed to acquire storage volume '%s:%s' (%s): %m", + v->provider, v->volume, error_id); + return log_error_errno(r, + "Failed to acquire storage volume '%s:%s': %m", + v->provider, v->volume); + } + + drives->drives[drives->n_drives++] = TAKE_PTR(d); + } + + return 0; +} diff --git a/src/vmspawn/vmspawn-bind-volume.h b/src/vmspawn/vmspawn-bind-volume.h new file mode 100644 index 0000000000000..23b3ff52f3cb3 --- /dev/null +++ b/src/vmspawn/vmspawn-bind-volume.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "machine-util.h" +#include "shared-forward.h" +#include "vmspawn-qmp.h" + +/* Empty/NULL defaults to virtio-blk; otherwise delegates to disk_type_from_string(). */ +DiskType disk_type_from_bind_volume_config(const char *config); + +/* Acquires the volume and builds a DriveInfo with id=":" (the + * bridge-visible name; QMP-side names are still allocated by add_block_device). */ +int vmspawn_bind_volume_acquire( + RuntimeScope scope, + const BindVolume *v, + bool removable, + sd_varlink *link, + DriveInfo **ret, + char **reterr_error_id); + +typedef struct BindVolumes { + BindVolume **items; + size_t n_items; +} BindVolumes; + +void bind_volumes_done(BindVolumes *bv); + +int vmspawn_bind_volume_prepare_boot( + RuntimeScope scope, + const BindVolumes *bv, + DriveInfos *drives); + +/* Takes ownership of fd unconditionally. */ +int vmspawn_bind_volume_attach_fd( + VmspawnQmpBridge *bridge, + sd_varlink *link, + int fd, + const char *name, + const char *config); From 413fd62dd79c1983d4c2ec93570fb85f8167d242 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:35:44 +0200 Subject: [PATCH 161/242] vmspawn: add --bind-volume= command line option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit systemd-vmspawn --bind-volume=PROVIDER:VOLUME[:CONFIG][:K=V,...] For each --bind-volume passed at startup, vmspawn calls Acquire() on the named StorageProvider and attaches the resulting fd to the VM as an additional drive. The drive is identified by the user-visible name ':' on the bridge — that is also the handle used later when machinectl unbind-volume detaches drives at runtime (though boot-time drives like these are NOT removable; that is the StorageImmutable behaviour added earlier). The colon grammar is parsed by the shared bind_volume_parse() helper. The 3rd 'config' field selects the guest device type from the disk_type_table[] vocabulary (virtio-blk, virtio-scsi, nvme, scsi-cd); empty defaults to virtio-blk per the TASK grammar. Wiring lives next to the existing --extra-drive setup: parse_argv() appends a parsed BindVolume to arg_bind_volumes, and prepare_device_info() hands the array to vmspawn_bind_volume_prepare_boot() which Acquires each volume and pushes a DriveInfo onto the existing drives array. PCIe port assignment (assign_pcie_ports()) and the QMP setup loop pick them up automatically. Signed-off-by: Christian Brauner (Amutable) --- shell-completion/bash/systemd-vmspawn | 2 +- src/vmspawn/vmspawn.c | 33 ++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/shell-completion/bash/systemd-vmspawn b/shell-completion/bash/systemd-vmspawn index efa0dae58de04..62fa5ab52065d 100644 --- a/shell-completion/bash/systemd-vmspawn +++ b/shell-completion/bash/systemd-vmspawn @@ -38,7 +38,7 @@ _systemd_vmspawn() { [BIND]='--bind --bind-ro' [SSH_KEY]='--ssh-key' [CONSOLE]='--console' - [ARG]='--cpus --ram --vsock-cid -M --machine --uuid --private-users --background --set-credential --load-credential --forward-journal-max-use --forward-journal-keep-free --forward-journal-max-file-size --forward-journal-max-files' + [ARG]='--cpus --ram --vsock-cid -M --machine --uuid --private-users --background --set-credential --load-credential --forward-journal-max-use --forward-journal-keep-free --forward-journal-max-file-size --forward-journal-max-files --bind-volume' [IMAGE_FORMAT]='--image-format' [IMAGE_DISK_TYPE]='--image-disk-type' ) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 81c035c250d62..ee8ae518f0330 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -88,6 +88,7 @@ #include "user-record.h" #include "user-util.h" #include "utf8.h" +#include "vmspawn-bind-volume.h" #include "vmspawn-mount.h" #include "vmspawn-qemu-config.h" #include "vmspawn-qmp.h" @@ -163,6 +164,7 @@ static bool arg_keep_unit = false; static sd_id128_t arg_uuid = {}; static char **arg_kernel_cmdline_extra = NULL; static ExtraDriveContext arg_extra_drives = {}; +static BindVolumes arg_bind_volumes = {}; static char *arg_background = NULL; static bool arg_pass_ssh_key = true; static char *arg_ssh_key_type = NULL; @@ -200,6 +202,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts, runtime_mount_context_done); STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep); STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_extra_drives, extra_drive_context_done); +STATIC_DESTRUCTOR_REGISTER(arg_bind_volumes, bind_volumes_done); STATIC_DESTRUCTOR_REGISTER(arg_background, freep); STATIC_DESTRUCTOR_REGISTER(arg_ssh_key_type, freep); STATIC_DESTRUCTOR_REGISTER(arg_smbios11, strv_freep); @@ -766,6 +769,30 @@ static int parse_argv(int argc, char *argv[]) { break; } + OPTION_LONG("bind-volume", "PROVIDER:VOLUME[:CONFIG][:KEY=VALUE,...]", + "Acquire a storage volume from a StorageProvider and attach it to the VM"): { + _cleanup_(bind_volume_freep) BindVolume *bv = NULL; + + r = bind_volume_parse(opts.arg, &bv); + if (r < 0) + return log_error_errno(r, "Failed to parse --bind-volume= argument '%s': %m", opts.arg); + + if (disk_type_from_bind_volume_config(bv->config) < 0) { + _cleanup_free_ char *valid = NULL; + for (DiskType t = 0; t < _DISK_TYPE_MAX; t++) + if (!strextend_with_separator(&valid, ", ", disk_type_to_string(t))) + return log_oom(); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown device type '%s' for --bind-volume=. Valid values: %s.", + bv->config, valid); + } + + if (!GREEDY_REALLOC(arg_bind_volumes.items, arg_bind_volumes.n_items + 1)) + return log_oom(); + arg_bind_volumes.items[arg_bind_volumes.n_items++] = TAKE_PTR(bv); + break; + } + OPTION_LONG("bind-user", "NAME", "Bind user from host to virtual machine"): if (!valid_user_group_name(opts.arg, /* flags= */ 0)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid user name to bind: %s", opts.arg); @@ -2475,7 +2502,7 @@ static int prepare_device_info(const char *runtime_dir, MachineConfig *c) { /* Build drive info for QMP-based setup. vmspawn opens all image files and * passes fds to QEMU via add-fd — QEMU never needs filesystem access. */ - drives->drives = new0(DriveInfo*, 1 + arg_extra_drives.n_drives); + drives->drives = new0(DriveInfo*, 1 + arg_extra_drives.n_drives + arg_bind_volumes.n_items); if (!drives->drives) return log_oom(); @@ -2487,6 +2514,10 @@ static int prepare_device_info(const char *runtime_dir, MachineConfig *c) { if (r < 0) return r; + r = vmspawn_bind_volume_prepare_boot(arg_runtime_scope, &arg_bind_volumes, drives); + if (r < 0) + return r; + return assign_pcie_ports(c); } From e7eac392ef87879451fdca337bb2ccd0113c1a86 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:36:21 +0200 Subject: [PATCH 162/242] vmspawn: implement io.systemd.MachineInstance.AddStorage / RemoveStorage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire up the runtime hotplug Varlink methods on the per-VM control socket: AddStorage → take fd from the link, look up the DiskType from the 'config' field, build a DriveInfo flagged QMP_DRIVE_REMOVABLE, dispatch to vmspawn_qmp_add_block_device(). Reply delivered async by on_add_device_add_complete() once the guest sees the device. RemoveStorage → forward the user-visible name to vmspawn_qmp_remove_block_device(); the existing device_del / DEVICE_DELETED / blockdev-del chain replies on the link. Add SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT to the server flags so clients can push storage fds across via sd_varlink_push_fd(). Maps -EEXIST → StorageExists and -EOPNOTSUPP/-EINVAL → ConfigNotSupported in the AddStorage handler so callers see the specific MachineInstance errors. Signed-off-by: Christian Brauner (Amutable) --- src/vmspawn/vmspawn-varlink.c | 82 +++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/src/vmspawn/vmspawn-varlink.c b/src/vmspawn/vmspawn-varlink.c index 9aa6afeae0385..a782a5c9c1a4f 100644 --- a/src/vmspawn/vmspawn-varlink.c +++ b/src/vmspawn/vmspawn-varlink.c @@ -2,6 +2,7 @@ #include "alloc-util.h" #include "errno-util.h" +#include "fd-util.h" #include "hashmap.h" #include "log.h" #include "path-util.h" @@ -11,6 +12,7 @@ #include "varlink-io.systemd.MachineInstance.h" #include "varlink-io.systemd.VirtualMachineInstance.h" #include "varlink-util.h" +#include "vmspawn-bind-volume.h" #include "vmspawn-qmp.h" #include "vmspawn-varlink.h" @@ -168,6 +170,77 @@ static int vl_method_describe(sd_varlink *link, sd_json_variant *parameters, sd_ return qmp_execute_varlink_async(ctx, link, "query-status", /* arguments= */ NULL, on_qmp_describe_complete); } +static int vl_method_add_storage(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata); + int r; + + struct { + int fd_index; + const char *name; + const char *config; + } p = { + .fd_index = -1, + }; + + static const sd_json_dispatch_field dispatch_table[] = { + { "fileDescriptorIndex", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, voffsetof(p, fd_index), SD_JSON_MANDATORY }, + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, name), SD_JSON_MANDATORY }, + { "config", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, config), 0 }, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + if (machine_storage_name_split(p.name, /* ret_provider= */ NULL, /* ret_volume= */ NULL) < 0) + return sd_varlink_error_invalid_parameter_name(link, "name"); + + if (disk_type_from_bind_volume_config(p.config) < 0) + return sd_varlink_error(link, "io.systemd.MachineInstance.BadConfig", NULL); + + if (p.fd_index < 0) + return sd_varlink_error_invalid_parameter_name(link, "fileDescriptorIndex"); + + _cleanup_close_ int fd = sd_varlink_take_fd(link, p.fd_index); + if (fd < 0) + return sd_varlink_error_errno(link, fd); + + r = vmspawn_bind_volume_attach_fd(ctx->bridge, link, TAKE_FD(fd), p.name, p.config); + if (r == -EEXIST) + return sd_varlink_error(link, "io.systemd.MachineInstance.StorageExists", NULL); + if (r == -EOPNOTSUPP) + return sd_varlink_error(link, "io.systemd.MachineInstance.ConfigNotSupported", NULL); + if (r < 0) + return sd_varlink_error_errno(link, r); + + /* Async reply via on_add_device_add_complete. */ + return 0; +} + +static int vl_method_remove_storage(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata); + int r; + + struct { + const char *name; + } p = {}; + + static const sd_json_dispatch_field dispatch_table[] = { + { "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, voffsetof(p, name), SD_JSON_MANDATORY }, + {} + }; + + r = sd_varlink_dispatch(link, parameters, dispatch_table, &p); + if (r != 0) + return r; + + if (machine_storage_name_split(p.name, /* ret_provider= */ NULL, /* ret_volume= */ NULL) < 0) + return sd_varlink_error_invalid_parameter_name(link, "name"); + + return vmspawn_qmp_remove_block_device(ctx->bridge, link, p.name); +} + static int vl_method_subscribe_events(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { VmspawnVarlinkContext *ctx = ASSERT_PTR(userdata); _cleanup_strv_free_ char **filter = NULL; @@ -380,9 +453,10 @@ int vmspawn_varlink_setup( if (!ctx) return log_oom(); - /* Create varlink server for VM control */ + /* AddStorage receives an fd from the caller. */ r = varlink_server_new(&ctx->varlink_server, - SD_VARLINK_SERVER_INHERIT_USERDATA, + SD_VARLINK_SERVER_INHERIT_USERDATA | + SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT, ctx); if (r < 0) return log_error_errno(r, "Failed to create varlink server: %m"); @@ -402,7 +476,9 @@ int vmspawn_varlink_setup( "io.systemd.MachineInstance.Resume", vl_method_resume, "io.systemd.MachineInstance.Reboot", vl_method_reboot, "io.systemd.MachineInstance.Describe", vl_method_describe, - "io.systemd.MachineInstance.SubscribeEvents", vl_method_subscribe_events); + "io.systemd.MachineInstance.SubscribeEvents", vl_method_subscribe_events, + "io.systemd.MachineInstance.AddStorage", vl_method_add_storage, + "io.systemd.MachineInstance.RemoveStorage", vl_method_remove_storage); if (r < 0) return log_error_errno(r, "Failed to bind varlink methods: %m"); From c9f461a8067996c6b0c3ac3bf6f9097aedbf4734 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:37:33 +0200 Subject: [PATCH 163/242] machinectl: add bind-volume / unbind-volume verbs machinectl bind-volume MACHINE PROVIDER:VOLUME[:CONFIG][:K=V,...] machinectl unbind-volume MACHINE PROVIDER:VOLUME For bind-volume, machinectl parses the SPEC with the shared bind_volume_parse(), Acquires the storage volume from the named provider on the machinectl side, locates the target machine's io.systemd.MachineInstance control socket via machine_get_control_address(), pushes the fd across, and calls io.systemd.MachineInstance.AddStorage with name=':' and the user-supplied config string. For unbind-volume, machinectl just forwards the name string to io.systemd.MachineInstance.RemoveStorage. Volumes attached at machine startup (e.g. via systemd-vmspawn's --bind-volume=) are rejected with StorageImmutable when the user attempts to unbind them at runtime. Signed-off-by: Christian Brauner (Amutable) --- shell-completion/bash/machinectl | 2 +- shell-completion/zsh/_machinectl | 8 +++ src/machine/machinectl.c | 120 +++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) diff --git a/shell-completion/bash/machinectl b/shell-completion/bash/machinectl index 50b46fb27925b..d6627e6ba7e63 100644 --- a/shell-completion/bash/machinectl +++ b/shell-completion/bash/machinectl @@ -48,7 +48,7 @@ _machinectl() { [MACHINES]='status show start stop login shell enable disable poweroff reboot pause resume terminate kill image-status show-image remove export-tar export-raw' [MACHINES_OR_FILES]='edit cat' - [MACHINE_ONLY]='clone rename set-limit' + [MACHINE_ONLY]='clone rename set-limit bind-volume unbind-volume' [READONLY]='read-only' [FILE]='import-tar import-raw' [MACHINES_AND_FILES]='copy-to copy-from bind' diff --git a/shell-completion/zsh/_machinectl b/shell-completion/zsh/_machinectl index 31ddf4fca571d..d61a62ee27681 100644 --- a/shell-completion/zsh/_machinectl +++ b/shell-completion/zsh/_machinectl @@ -45,6 +45,8 @@ "copy-to:Copy files from the host to a container" "copy-from:Copy files from a container to the host" "bind:Bind mount a path from the host into a container" + "bind-volume:Attach a storage volume to a running machine" + "unbind-volume:Detach a storage volume from a running machine" "list-images:Show available container and VM images" "image-status:Show image details" @@ -115,6 +117,12 @@ else stop=1 fi ;; + bind-volume|unbind-volume) + if (( CURRENT == 2 )); then _sd_machines + elif (( CURRENT == 3 )); then _message "volume spec" + else stop=1 + fi ;; + read-only) if (( CURRENT == 2 )); then _machinectl_images elif (( CURRENT == 3 )); then _values 'read-only flag' 'true' 'false' diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index ae5473aa99d50..3d7f782a8c775 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -29,16 +29,19 @@ #include "cgroup-util.h" #include "edit-util.h" #include "env-util.h" +#include "fd-util.h" #include "format-ifname.h" #include "format-table.h" #include "format-util.h" #include "hostname-util.h" #include "import-util.h" #include "in-addr-util.h" +#include "json-util.h" #include "label-util.h" #include "log.h" #include "logs-show.h" #include "machine-dbus.h" +#include "machine-util.h" #include "main-func.h" #include "nulstr-util.h" #include "osc-context.h" @@ -55,6 +58,7 @@ #include "ptyfwd.h" #include "runtime-scope.h" #include "stdio-util.h" +#include "storage-util.h" #include "string-table.h" #include "string-util.h" #include "strv.h" @@ -1338,6 +1342,120 @@ static int verb_copy_files(int argc, char *argv[], uintptr_t _data, void *userda return 0; } +static int verb_bind_volume(int argc, char *argv[], uintptr_t _data, void *userdata) { + int r; + + if (arg_transport != BUS_TRANSPORT_LOCAL) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "bind-volume is only supported on the local transport."); + + _cleanup_(bind_volume_freep) BindVolume *bv = NULL; + r = bind_volume_parse(argv[2], &bv); + if (r < 0) + return log_error_errno(r, "Failed to parse bind-volume argument '%s': %m", argv[2]); + + (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password); + + /* Locate and connect to the target machine before acquiring storage, so a missing + * machine doesn't trigger 'create=new' side effects on the StorageProvider. */ + _cleanup_free_ char *address = NULL; + r = machine_get_control_address(argv[1], &address); + if (r == -EOPNOTSUPP) + return log_error_errno(r, "Machine '%s' does not expose a varlink control socket.", argv[1]); + if (r < 0) + return r; + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_address(&vl, address); + if (r < 0) + return log_error_errno(r, "Failed to connect to machine control socket %s: %m", address); + + r = sd_varlink_set_allow_fd_passing_output(vl, true); + if (r < 0) + return log_error_errno(r, "Failed to enable fd passing on varlink connection: %m"); + + _cleanup_(storage_acquire_reply_done) StorageAcquireReply reply = STORAGE_ACQUIRE_REPLY_INIT; + _cleanup_free_ char *acquire_error_id = NULL; + r = storage_acquire_volume(arg_runtime_scope, bv, arg_ask_password, &acquire_error_id, &reply); + if (r < 0) { + if (acquire_error_id) + return log_error_errno(r, "Failed to acquire storage volume '%s:%s' from provider: %s", + bv->provider, bv->volume, acquire_error_id); + return log_error_errno(r, "Failed to acquire storage volume '%s:%s' from provider: %m", + bv->provider, bv->volume); + } + + int fd_index = sd_varlink_push_fd(vl, reply.fd); + if (fd_index < 0) + return log_error_errno(fd_index, "Failed to push storage fd onto varlink connection: %m"); + TAKE_FD(reply.fd); + + _cleanup_free_ char *name = strjoin(bv->provider, ":", bv->volume); + if (!name) + return log_oom(); + + sd_json_variant *vl_reply = NULL; + const char *error_id = NULL; + r = sd_varlink_callbo( + vl, + "io.systemd.MachineInstance.AddStorage", + &vl_reply, &error_id, + SD_JSON_BUILD_PAIR_INTEGER("fileDescriptorIndex", fd_index), + SD_JSON_BUILD_PAIR_STRING("name", name), + JSON_BUILD_PAIR_STRING_NON_EMPTY("config", bv->config)); + if (r < 0) + return log_error_errno(r, "Failed to call io.systemd.MachineInstance.AddStorage: %m"); + if (error_id) + return log_error_errno(sd_varlink_error_to_errno(error_id, vl_reply), + "AddStorage failed for '%s': %s", name, error_id); + + return 0; +} + +static int verb_unbind_volume(int argc, char *argv[], uintptr_t _data, void *userdata) { + int r; + + if (arg_transport != BUS_TRANSPORT_LOCAL) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "unbind-volume is only supported on the local transport."); + + r = machine_storage_name_split(argv[2], /* ret_provider= */ NULL, /* ret_volume= */ NULL); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid unbind-volume name '%s', expected ':'.", argv[2]); + + (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password); + + _cleanup_free_ char *address = NULL; + r = machine_get_control_address(argv[1], &address); + if (r == -EOPNOTSUPP) + return log_error_errno(r, "Machine '%s' does not expose a varlink control socket.", argv[1]); + if (r < 0) + return r; + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + r = sd_varlink_connect_address(&vl, address); + if (r < 0) + return log_error_errno(r, "Failed to connect to machine control socket %s: %m", address); + + sd_json_variant *reply = NULL; + const char *error_id = NULL; + r = sd_varlink_callbo( + vl, + "io.systemd.MachineInstance.RemoveStorage", + &reply, &error_id, + SD_JSON_BUILD_PAIR_STRING("name", argv[2])); + if (r < 0) + return log_error_errno(r, "Failed to call io.systemd.MachineInstance.RemoveStorage: %m"); + if (error_id) + return log_error_errno(sd_varlink_error_to_errno(error_id, reply), + "RemoveStorage failed for '%s': %s", argv[2], error_id); + + return 0; +} + static int verb_bind_mount(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus *bus = ASSERT_PTR(userdata); @@ -2606,6 +2724,8 @@ static int machinectl_main(int argc, char *argv[], sd_bus *bus) { { "login", VERB_ANY, 2, 0, verb_login_machine }, { "shell", VERB_ANY, VERB_ANY, 0, verb_shell_machine }, { "bind", 3, 4, 0, verb_bind_mount }, + { "bind-volume", 3, 3, 0, verb_bind_volume }, + { "unbind-volume", 3, 3, 0, verb_unbind_volume }, { "edit", 2, VERB_ANY, 0, verb_edit_settings }, { "cat", 2, VERB_ANY, 0, verb_cat_settings }, { "copy-to", 3, 4, 0, verb_copy_files }, From 8a4451d2f0d994562ec01f88283b23986565126e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 May 2026 13:38:12 +0200 Subject: [PATCH 164/242] docs,test: --bind-volume / bind-volume / unbind-volume - Document the new --bind-volume= option in systemd-vmspawn(1) and the new bind-volume / unbind-volume verbs in machinectl(1). - Add an integration test (TEST-87-AUX-UTILS-VM.bind-volume.sh) covering boot-time attach via --bind-volume, runtime attach via 'machinectl bind-volume', runtime detach via 'machinectl unbind-volume', the StorageImmutable rejection of attempts to detach boot-time volumes, and the NoSuchStorage rejection of detach on unknown names. - Strike "hook-up in systemd-vmspawn" from TODO.md; the nspawn and service-manager hookups remain. Signed-off-by: Christian Brauner (Amutable) --- TODO.md | 1 - man/machinectl.xml | 32 ++++ man/systemd-vmspawn.xml | 33 ++++ .../units/TEST-87-AUX-UTILS-VM.bind-volume.sh | 166 ++++++++++++++++++ 4 files changed, 231 insertions(+), 1 deletion(-) create mode 100755 test/units/TEST-87-AUX-UTILS-VM.bind-volume.sh diff --git a/TODO.md b/TODO.md index 61114235f3290..f3d1070c6c746 100644 --- a/TODO.md +++ b/TODO.md @@ -138,7 +138,6 @@ SPDX-License-Identifier: LGPL-2.1-or-later - StorageProvider interface + storagectl - hook-up in systemd-nspawn - - hook-up in systemd-vmspawn - hook-up in service manager (BindVolume=) - introduce a locking concept: right now all access to volumes is fully shared. Let's add a basic locking concept: supporting backends can take an diff --git a/man/machinectl.xml b/man/machinectl.xml index b4fb15b4f93a3..10ab225074dfc 100644 --- a/man/machinectl.xml +++ b/man/machinectl.xml @@ -357,6 +357,38 @@ + + bind-volume NAME SPEC + + Acquire a storage volume from a + storagectl1 + provider and attach it to the running machine. SPEC is a string of the form + PROVIDER:VOLUME[:CONFIG][:K=V,…], + identical in grammar to the argument of + systemd-vmspawn1. + + The attached volume is identified by the name PROVIDER:VOLUME + and may be detached at runtime via unbind-volume. Currently only supported for + systemd-vmspawn machines that expose an + io.systemd.MachineInstance control socket. + + + + + + unbind-volume NAME STORAGE-NAME + + Detach a storage volume from the running machine. STORAGE-NAME + is the PROVIDER:VOLUME + identifier that was specified at bind-volume time. Volumes that were attached at machine + startup (e.g. via on + systemd-vmspawn1) + cannot be detached and will fail with + io.systemd.MachineInstance.StorageImmutable. + + + + copy-to NAME PATH [PATH] diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml index 5c5ec4ccbcd55..b23c66514221d 100644 --- a/man/systemd-vmspawn.xml +++ b/man/systemd-vmspawn.xml @@ -566,6 +566,39 @@ + + + + Acquire a storage volume from a + storagectl1 + provider and attach it to the virtual machine. PROVIDER is the + provider name (typically block or fs). VOLUME + is the volume name passed to the provider's Acquire() method. + CONFIG selects the guest device type and takes one of + virtio-blk, virtio-scsi, nvme, or + scsi-cd. If empty or omitted, defaults to virtio-blk. + + The trailing comma-separated K=V list passes parameters to + io.systemd.StorageProvider.Acquire(): template=, + create= (one of any, new, open), + read-only= (or ro=; takes a boolean or auto), + size= / create-size= (size for created volumes), + request-as= (one of blk, reg, + dir; dir is rejected by vmspawn). + + Each attached volume is identified by the name PROVIDER:VOLUME. + Volumes attached at startup via this option cannot be detached at runtime via + machinectl unbind-volume; only volumes added at runtime via + machinectl bind-volume are removable. + + The provider is looked up under + /run/systemd/io.systemd.StorageProvider/ for system mode (or + $XDG_RUNTIME_DIR/systemd/io.systemd.StorageProvider/ for user mode), matching + the runtime scope chosen via / . + + + + diff --git a/test/units/TEST-87-AUX-UTILS-VM.bind-volume.sh b/test/units/TEST-87-AUX-UTILS-VM.bind-volume.sh new file mode 100755 index 0000000000000..6339e390936bb --- /dev/null +++ b/test/units/TEST-87-AUX-UTILS-VM.bind-volume.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +# Test --bind-volume / machinectl bind-volume / unbind-volume integration with the +# StorageProvider Varlink interface. +# +# Exercises: +# - --bind-volume parser + runtime_directory_generic + Acquire round-trip +# - boot-time attach via DriveInfo (non-removable) +# - runtime hotplug via io.systemd.MachineInstance.AddStorage (removable) +# - runtime hot-remove via io.systemd.MachineInstance.RemoveStorage +# - StorageImmutable rejection for boot-time attached volumes +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +if [[ -v ASAN_OPTIONS ]]; then + echo "vmspawn launches QEMU which doesn't work under ASan, skipping" + exit 0 +fi + +if ! command -v systemd-vmspawn >/dev/null 2>&1; then + echo "systemd-vmspawn not found, skipping" + exit 0 +fi + +if ! command -v storagectl >/dev/null 2>&1; then + echo "storagectl not found, skipping" + exit 0 +fi + +if ! find_qemu_binary; then + echo "QEMU not found, skipping" + exit 0 +fi + +if ! command -v mke2fs >/dev/null 2>&1; then + echo "mke2fs not found, skipping" + exit 0 +fi + +# Storage providers are socket-activated; skip if the fs provider socket isn't present. +if ! test -S /run/systemd/io.systemd.StorageProvider/fs; then + echo "StorageProvider fs socket not found, skipping" + exit 0 +fi + +# Find a kernel for direct boot +KERNEL="" +for k in /usr/lib/modules/"$(uname -r)"/vmlinuz /boot/vmlinuz-"$(uname -r)" /boot/vmlinuz; do + if [[ -f "$k" ]]; then + KERNEL="$k" + break + fi +done + +if [[ -z "$KERNEL" ]]; then + echo "No kernel found for direct VM boot, skipping" + exit 0 +fi + +WORKDIR="$(mktemp -d /tmp/test-bind-volume.XXXXXXXXXX)" + +at_exit() { + set +e + if [[ -n "${MACHINE:-}" ]]; then + if machinectl status "$MACHINE" &>/dev/null; then + machinectl terminate "$MACHINE" 2>/dev/null + timeout 10 bash -c "while machinectl status '$MACHINE' &>/dev/null; do sleep .5; done" 2>/dev/null + fi + fi + [[ -n "${VMSPAWN_PID:-}" ]] && { kill "$VMSPAWN_PID" 2>/dev/null; wait "$VMSPAWN_PID" 2>/dev/null; } + rm -rf "$WORKDIR" + rm -f /var/lib/storage/test-bind-volume-*.volume +} +trap at_exit EXIT + +# Build a minimal root for direct boot — guest just sleeps. +mkdir -p "$WORKDIR/rootfs/sbin" +cat >"$WORKDIR/rootfs/sbin/init" <<'INITEOF' +#!/bin/sh +exec sleep infinity +INITEOF +chmod +x "$WORKDIR/rootfs/sbin/init" + +truncate -s 256M "$WORKDIR/root.raw" +mke2fs -t ext4 -q -d "$WORKDIR/rootfs" "$WORKDIR/root.raw" + +BOOT_VOL="test-bind-volume-boot-$$" +RUNTIME_VOL="test-bind-volume-runtime-$$" + +wait_for_machine() { + local machine="$1" pid="$2" log="$3" + timeout 30 bash -c " + while ! machinectl list --no-legend 2>/dev/null | grep >/dev/null '$machine'; do + if ! kill -0 $pid 2>/dev/null; then + echo 'vmspawn exited before machine registration' + cat '$log' + exit 77 + fi + sleep .5 + done + " || { + local rc=$? + if [[ $rc -eq 77 ]]; then exit 0; fi + exit "$rc" + } +} + +# --- Boot the VM with one boot-time bind-volume --- +MACHINE="test-bind-volume-$$" +systemd-vmspawn \ + --machine="$MACHINE" \ + --ram=256M \ + --image="$WORKDIR/root.raw" \ + --bind-volume="fs:${BOOT_VOL}::create=new,size=64M,template=sparse-file" \ + --linux="$KERNEL" \ + --tpm=no \ + --console=headless \ + root=/dev/vda rw \ + &>"$WORKDIR/vmspawn.log" & +VMSPAWN_PID=$! + +wait_for_machine "$MACHINE" "$VMSPAWN_PID" "$WORKDIR/vmspawn.log" +echo "Machine '$MACHINE' registered" + +VARLINK_ADDR=$(varlinkctl call /run/systemd/machine/io.systemd.Machine \ + io.systemd.Machine.List "{\"name\":\"$MACHINE\"}" | jq -r '.controlAddress') +assert_neq "$VARLINK_ADDR" "null" + +varlinkctl call "$VARLINK_ADDR" io.systemd.MachineInstance.Describe '{}' \ + | jq -e '.running == true' >/dev/null +echo "VM running with boot-time bind-volume attached" + +# --- Hot-add a second volume via machinectl bind-volume (must succeed) --- +machinectl bind-volume "$MACHINE" \ + "fs:${RUNTIME_VOL}:virtio-scsi:create=new,size=32M,template=sparse-file" +echo "Hot-added runtime bind-volume succeeded" + +# --- Hot-remove the runtime-added volume (must succeed) --- +machinectl unbind-volume "$MACHINE" "fs:${RUNTIME_VOL}" +echo "Hot-removed runtime bind-volume succeeded" + +# --- Removing the boot-time volume must fail with StorageImmutable --- +if machinectl unbind-volume "$MACHINE" "fs:${BOOT_VOL}" 2>"$WORKDIR/unbind.err"; then + echo "ERROR: unbind-volume of boot-time volume should have failed" + cat "$WORKDIR/unbind.err" + exit 1 +fi +grep StorageImmutable "$WORKDIR/unbind.err" >/dev/null +echo "Boot-time bind-volume correctly rejected with StorageImmutable" + +# --- Removing a non-existent name must fail with NoSuchStorage --- +if machinectl unbind-volume "$MACHINE" "fs:no-such-volume-$$" 2>"$WORKDIR/unbind-noexist.err"; then + echo "ERROR: unbind-volume of non-existent name should have failed" + cat "$WORKDIR/unbind-noexist.err" + exit 1 +fi +grep NoSuchStorage "$WORKDIR/unbind-noexist.err" >/dev/null +echo "Non-existent unbind-volume correctly rejected with NoSuchStorage" + +machinectl terminate "$MACHINE" +timeout 10 bash -c "while machinectl status '$MACHINE' &>/dev/null; do sleep .5; done" +timeout 10 bash -c "while kill -0 '$VMSPAWN_PID' 2>/dev/null; do sleep .5; done" +echo "All bind-volume tests passed" From 5a41d43e1b33746fb59114b5dabd3ad1d5409af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 6 May 2026 12:54:12 +0200 Subject: [PATCH 165/242] portablectl: reorder verb functions The order in --help is changed to move have 'list', 'inspect' (query operations), 'attach'/'detach'/'reattach' (main ops), and then the other more specialized verbs. The functions are then reordered to match this. --- src/portable/portablectl.c | 254 ++++++++++++++++++------------------- 1 file changed, 127 insertions(+), 127 deletions(-) diff --git a/src/portable/portablectl.c b/src/portable/portablectl.c index 2c555ee359877..0c0f03ff166ac 100644 --- a/src/portable/portablectl.c +++ b/src/portable/portablectl.c @@ -251,6 +251,80 @@ static int maybe_reload(sd_bus **bus) { return bus_service_manager_reload(*bus); } +static int verb_list_images(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(table_unrefp) Table *table = NULL; + int r; + + r = acquire_bus(&bus); + if (r < 0) + return r; + + r = bus_call_method(bus, bus_portable_mgr, "ListImages", &error, &reply, NULL); + if (r < 0) + return log_error_errno(r, "Failed to list images: %s", bus_error_message(&error, r)); + + table = table_new("name", "type", "ro", "crtime", "mtime", "usage", "state"); + if (!table) + return log_oom(); + + r = sd_bus_message_enter_container(reply, 'a', "(ssbtttso)"); + if (r < 0) + return bus_log_parse_error(r); + + for (;;) { + const char *name, *type, *state; + uint64_t crtime, mtime, usage; + int ro_int; + + r = sd_bus_message_read(reply, "(ssbtttso)", &name, &type, &ro_int, &crtime, &mtime, &usage, &state, NULL); + if (r < 0) + return bus_log_parse_error(r); + if (r == 0) + break; + + r = table_add_many(table, + TABLE_STRING, name, + TABLE_STRING, type, + TABLE_BOOLEAN, ro_int, + TABLE_SET_COLOR, ro_int ? ansi_highlight_red() : NULL, + TABLE_TIMESTAMP, crtime, + TABLE_TIMESTAMP, mtime, + TABLE_SIZE, usage, + TABLE_STRING, state, + TABLE_SET_COLOR, !streq(state, "detached") ? ansi_highlight_green() : NULL); + if (r < 0) + return table_log_add_error(r); + } + + r = sd_bus_message_exit_container(reply); + if (r < 0) + return bus_log_parse_error(r); + + if (!table_isempty(table)) { + r = table_set_sort(table, (size_t) 0); + if (r < 0) + return table_log_sort_error(r); + + table_set_header(table, arg_legend); + + r = table_print_or_warn(table); + if (r < 0) + return r; + } + + if (arg_legend) { + if (table_isempty(table)) + printf("No images.\n"); + else + printf("\n%zu images listed.\n", table_get_rows(table) - 1); + } + + return 0; +} + static int get_image_metadata(sd_bus *bus, const char *image, char **matches, sd_bus_message **reply) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -960,10 +1034,6 @@ static int verb_attach_image(int argc, char *argv[], uintptr_t _data, void *user return attach_reattach_image(argc, argv, strv_isempty(arg_extension_images) && !arg_force ? "AttachImage" : "AttachImageWithExtensions"); } -static int verb_reattach_image(int argc, char *argv[], uintptr_t _data, void *userdata) { - return attach_reattach_image(argc, argv, strv_isempty(arg_extension_images) && !arg_force ? "ReattachImage" : "ReattachImageWithExtensions"); -} - static int verb_detach_image(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -1018,77 +1088,81 @@ static int verb_detach_image(int argc, char *argv[], uintptr_t _data, void *user return 0; } -static int verb_list_images(int argc, char *argv[], uintptr_t _data, void *userdata) { +static int verb_reattach_image(int argc, char *argv[], uintptr_t _data, void *userdata) { + return attach_reattach_image(argc, argv, strv_isempty(arg_extension_images) && !arg_force ? "ReattachImage" : "ReattachImageWithExtensions"); +} + +static int verb_is_image_attached(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(table_unrefp) Table *table = NULL; + _cleanup_free_ char *image = NULL; + const char *state, *method; int r; - r = acquire_bus(&bus); + r = determine_image(argv[1], true, &image); if (r < 0) return r; - r = bus_call_method(bus, bus_portable_mgr, "ListImages", &error, &reply, NULL); + r = acquire_bus(&bus); if (r < 0) - return log_error_errno(r, "Failed to list images: %s", bus_error_message(&error, r)); + return r; - table = table_new("name", "type", "ro", "crtime", "mtime", "usage", "state"); - if (!table) - return log_oom(); + method = strv_isempty(arg_extension_images) ? "GetImageState" : "GetImageStateWithExtensions"; - r = sd_bus_message_enter_container(reply, 'a', "(ssbtttso)"); + r = bus_message_new_method_call(bus, &m, bus_portable_mgr, method); if (r < 0) - return bus_log_parse_error(r); + return bus_log_create_error(r); - for (;;) { - const char *name, *type, *state; - uint64_t crtime, mtime, usage; - int ro_int; + r = sd_bus_message_append(m, "s", image); + if (r < 0) + return bus_log_create_error(r); - r = sd_bus_message_read(reply, "(ssbtttso)", &name, &type, &ro_int, &crtime, &mtime, &usage, &state, NULL); - if (r < 0) - return bus_log_parse_error(r); - if (r == 0) - break; + r = attach_extensions_to_message(m, method, arg_extension_images); + if (r < 0) + return r; - r = table_add_many(table, - TABLE_STRING, name, - TABLE_STRING, type, - TABLE_BOOLEAN, ro_int, - TABLE_SET_COLOR, ro_int ? ansi_highlight_red() : NULL, - TABLE_TIMESTAMP, crtime, - TABLE_TIMESTAMP, mtime, - TABLE_SIZE, usage, - TABLE_STRING, state, - TABLE_SET_COLOR, !streq(state, "detached") ? ansi_highlight_green() : NULL); + if (!strv_isempty(arg_extension_images)) { + r = sd_bus_message_append(m, "t", UINT64_C(0)); if (r < 0) - return table_log_add_error(r); + return bus_log_create_error(r); } - r = sd_bus_message_exit_container(reply); + r = sd_bus_call(bus, m, 0, &error, &reply); if (r < 0) - return bus_log_parse_error(r); + return log_error_errno(r, "%s failed: %s", method, bus_error_message(&error, r)); - if (!table_isempty(table)) { - r = table_set_sort(table, (size_t) 0); - if (r < 0) - return table_log_sort_error(r); + r = sd_bus_message_read(reply, "s", &state); + if (r < 0) + return r; - table_set_header(table, arg_legend); + if (!arg_quiet) + puts(state); - r = table_print_or_warn(table); - if (r < 0) - return r; - } + return streq(state, "detached"); +} - if (arg_legend) { - if (table_isempty(table)) - printf("No images.\n"); - else - printf("\n%zu images listed.\n", table_get_rows(table) - 1); +static int verb_read_only_image(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + int b = true, r; + + if (argc > 2) { + b = parse_boolean(argv[2]); + if (b < 0) + return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]); } + r = acquire_bus(&bus); + if (r < 0) + return r; + + (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password); + + r = bus_call_method(bus, bus_portable_mgr, "MarkImageReadOnly", &error, NULL, "sb", argv[1], b); + if (r < 0) + return log_error_errno(r, "Could not mark image read-only: %s", bus_error_message(&error, r)); + return 0; } @@ -1123,30 +1197,6 @@ static int verb_remove_image(int argc, char *argv[], uintptr_t _data, void *user return 0; } -static int verb_read_only_image(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - int b = true, r; - - if (argc > 2) { - b = parse_boolean(argv[2]); - if (b < 0) - return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]); - } - - r = acquire_bus(&bus); - if (r < 0) - return r; - - (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password); - - r = bus_call_method(bus, bus_portable_mgr, "MarkImageReadOnly", &error, NULL, "sb", argv[1], b); - if (r < 0) - return log_error_errno(r, "Could not mark image read-only: %s", bus_error_message(&error, r)); - - return 0; -} - static int verb_set_limit(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; @@ -1180,56 +1230,6 @@ static int verb_set_limit(int argc, char *argv[], uintptr_t _data, void *userdat return 0; } -static int verb_is_image_attached(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_free_ char *image = NULL; - const char *state, *method; - int r; - - r = determine_image(argv[1], true, &image); - if (r < 0) - return r; - - r = acquire_bus(&bus); - if (r < 0) - return r; - - method = strv_isempty(arg_extension_images) ? "GetImageState" : "GetImageStateWithExtensions"; - - r = bus_message_new_method_call(bus, &m, bus_portable_mgr, method); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append(m, "s", image); - if (r < 0) - return bus_log_create_error(r); - - r = attach_extensions_to_message(m, method, arg_extension_images); - if (r < 0) - return r; - - if (!strv_isempty(arg_extension_images)) { - r = sd_bus_message_append(m, "t", UINT64_C(0)); - if (r < 0) - return bus_log_create_error(r); - } - - r = sd_bus_call(bus, m, 0, &error, &reply); - if (r < 0) - return log_error_errno(r, "%s failed: %s", method, bus_error_message(&error, r)); - - r = sd_bus_message_read(reply, "s", &state); - if (r < 0) - return r; - - if (!arg_quiet) - puts(state); - - return streq(state, "detached"); -} - static int dump_profiles(void) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; @@ -1269,14 +1269,14 @@ static int help(void) { "%sAttach or detach portable services from the local system.%s\n" "\nCommands:\n" " list List available portable service images\n" + " inspect NAME|PATH [PREFIX...]\n" + " Show details of specified portable service image\n" " attach NAME|PATH [PREFIX...]\n" " Attach the specified portable service image\n" " detach NAME|PATH [PREFIX...]\n" " Detach the specified portable service image\n" " reattach NAME|PATH [PREFIX...]\n" " Reattach the specified portable service image\n" - " inspect NAME|PATH [PREFIX...]\n" - " Show details of specified portable service image\n" " is-attached NAME|PATH Query if portable service image is attached\n" " read-only NAME|PATH [BOOL] Mark or unmark portable service image read-only\n" " remove NAME|PATH... Remove a portable service image\n" From 0731b6c4d947d5eb6cb6f1cfd7c4890965ee00c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 14:36:12 +0200 Subject: [PATCH 166/242] portablectl: convert to OPTION and VERB macros s|attach/detach from the local system|in the local system|, because "attach from" doesn't work. The synopses for 'list' and 'set-limit' are fixed. Co-developed-by: Claude Opus 4.7 --- src/portable/portablectl.c | 273 ++++++++++++++----------------------- 1 file changed, 106 insertions(+), 167 deletions(-) diff --git a/src/portable/portablectl.c b/src/portable/portablectl.c index 0c0f03ff166ac..575ab4149aa63 100644 --- a/src/portable/portablectl.c +++ b/src/portable/portablectl.c @@ -1,10 +1,9 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include - #include "sd-bus.h" #include "alloc-util.h" +#include "ansi-color.h" #include "build.h" #include "bus-error.h" #include "bus-locator.h" @@ -15,8 +14,11 @@ #include "env-file.h" #include "format-table.h" #include "fs-util.h" +#include "glyph-util.h" +#include "help-util.h" #include "install.h" #include "main-func.h" +#include "options.h" #include "os-util.h" #include "pager.h" #include "parse-argument.h" @@ -24,7 +26,6 @@ #include "path-util.h" #include "polkit-agent.h" #include "portable.h" -#include "pretty-print.h" #include "string-util.h" #include "strv.h" #include "verbs.h" @@ -155,8 +156,9 @@ static int extract_prefix(const char *path, char **ret) { if (!name) return -ENOMEM; - /* A slightly reduced version of what's permitted in unit names. With ':' and '\' are removed, as well as '_' - * which we use as delimiter for the second part of the image string, which we ignore for now. */ + /* A slightly reduced version of what's permitted in unit names. With ':' and '\' are removed, as + * well as '_' which we use as delimiter for the second part of the image string, which we ignore for + * now. */ if (!in_charset(name, ALPHANUMERICAL "-.")) return -EINVAL; @@ -171,9 +173,9 @@ static int determine_matches(const char *image, char **l, bool allow_any, char * _cleanup_strv_free_ char **k = NULL; int r; - /* Determine the matches to apply. If the list is empty we derive the match from the image name. If the list - * contains exactly the "-" we return a wildcard list (which is the empty list), but only if this is expressly - * permitted. */ + /* Determine the matches to apply. If the list is empty we derive the match from the image name. If + * the list contains exactly the "-" we return a wildcard list (which is the empty list), but only if + * this is expressly permitted. */ if (strv_isempty(l)) { char *prefix; @@ -251,6 +253,8 @@ static int maybe_reload(sd_bus **bus) { return bus_service_manager_reload(*bus); } +VERB(verb_list_images, "list", NULL, VERB_ANY, 1, VERB_DEFAULT, + "List available portable service images (default)"); static int verb_list_images(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; @@ -366,6 +370,8 @@ static int get_image_metadata(sd_bus *bus, const char *image, char **matches, sd return 0; } +VERB(verb_inspect_image, "inspect", "NAME|PATH [PREFIX…]", 2, VERB_ANY, 0, + "Show details of specified portable service image"); static int verb_inspect_image(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; @@ -1030,10 +1036,14 @@ static int attach_reattach_image(int argc, char *argv[], const char *method) { return 0; } +VERB(verb_attach_image, "attach", "NAME|PATH [PREFIX…]", 2, VERB_ANY, 0, + "Attach the specified portable service image"); static int verb_attach_image(int argc, char *argv[], uintptr_t _data, void *userdata) { return attach_reattach_image(argc, argv, strv_isempty(arg_extension_images) && !arg_force ? "AttachImage" : "AttachImageWithExtensions"); } +VERB(verb_detach_image, "detach", "NAME|PATH [PREFIX…]", 2, VERB_ANY, 0, + "Detach the specified portable service image"); static int verb_detach_image(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -1088,10 +1098,14 @@ static int verb_detach_image(int argc, char *argv[], uintptr_t _data, void *user return 0; } +VERB(verb_reattach_image, "reattach", "NAME|PATH [PREFIX…]", 2, VERB_ANY, 0, + "Reattach the specified portable service image"); static int verb_reattach_image(int argc, char *argv[], uintptr_t _data, void *userdata) { return attach_reattach_image(argc, argv, strv_isempty(arg_extension_images) && !arg_force ? "ReattachImage" : "ReattachImageWithExtensions"); } +VERB(verb_is_image_attached, "is-attached", "NAME|PATH", 2, 2, 0, + "Query if portable service image is attached"); static int verb_is_image_attached(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -1142,6 +1156,8 @@ static int verb_is_image_attached(int argc, char *argv[], uintptr_t _data, void return streq(state, "detached"); } +VERB(verb_read_only_image, "read-only", "NAME|PATH [BOOL]", 2, 3, 0, + "Mark or unmark portable service image read-only"); static int verb_read_only_image(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; @@ -1166,6 +1182,8 @@ static int verb_read_only_image(int argc, char *argv[], uintptr_t _data, void *u return 0; } +VERB(verb_remove_image, "remove", "NAME|PATH…", 2, VERB_ANY, 0, + "Remove a portable service image"); static int verb_remove_image(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; int r, i; @@ -1197,6 +1215,8 @@ static int verb_remove_image(int argc, char *argv[], uintptr_t _data, void *user return 0; } +VERB(verb_set_limit, "set-limit", "[NAME|PATH] LIMIT", 3, 3, 0, + "Set image or pool size limit (disk quota)"); static int verb_set_limit(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; @@ -1256,176 +1276,104 @@ static int dump_profiles(void) { } static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *verbs = NULL, *options = NULL; int r; pager_open(arg_pager_flags); - r = terminal_urlify_man("portablectl", "1", &link); + r = verbs_get_help_table(&verbs); if (r < 0) - return log_oom(); + return r; + + r = option_parser_get_help_table(&options); + if (r < 0) + return r; - printf("%s [OPTIONS...] COMMAND ...\n\n" - "%sAttach or detach portable services from the local system.%s\n" - "\nCommands:\n" - " list List available portable service images\n" - " inspect NAME|PATH [PREFIX...]\n" - " Show details of specified portable service image\n" - " attach NAME|PATH [PREFIX...]\n" - " Attach the specified portable service image\n" - " detach NAME|PATH [PREFIX...]\n" - " Detach the specified portable service image\n" - " reattach NAME|PATH [PREFIX...]\n" - " Reattach the specified portable service image\n" - " is-attached NAME|PATH Query if portable service image is attached\n" - " read-only NAME|PATH [BOOL] Mark or unmark portable service image read-only\n" - " remove NAME|PATH... Remove a portable service image\n" - " set-limit [NAME|PATH] Set image or pool size limit (disk quota)\n" - "\nOptions:\n" - " -h --help Show this help\n" - " --version Show package version\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not show the headers and footers\n" - " --no-ask-password Do not ask for system passwords\n" - " -H --host=[USER@]HOST Operate on remote host\n" - " -M --machine=CONTAINER Operate on local container\n" - " -q --quiet Suppress informational messages\n" - " -p --profile=PROFILE Pick security profile for portable service\n" - " --copy=copy|auto|symlink|mixed\n" - " Pick copying or symlinking of resources\n" - " --runtime Attach portable service until next reboot only\n" - " --no-reload Don't reload the system and service manager\n" - " --cat When inspecting include unit and os-release file\n" - " contents\n" - " --enable Immediately enable/disable the portable service\n" - " after attach/detach\n" - " --now Immediately start/stop the portable service after\n" - " attach/before detach\n" - " --no-block Don't block waiting for attach --now to complete\n" - " --extension=PATH Extend the image with an overlay\n" - " --force Skip 'already active' check when attaching or\n" - " detaching an image (with extensions)\n" - " --clean When detaching, also remove configuration, state,\n" - " cache, logs or runtime data of the portable\n" - " service(s)\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - link); + (void) table_sync_column_widths(0, verbs, options); - return 0; -} + help_cmdline("[OPTIONS…] COMMAND …"); + help_abstract("Attach or detach portable services in the local system."); + + help_section("Commands"); + r = table_print_or_warn(verbs); + if (r < 0) + return r; -static int verb_help(int argc, char *argv[], uintptr_t _data, void *userdata) { - return help(); + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("portablectl", "1"); + return 0; } -static int parse_argv(int argc, char *argv[]) { - - enum { - ARG_VERSION = 0x100, - ARG_NO_PAGER, - ARG_NO_LEGEND, - ARG_NO_ASK_PASSWORD, - ARG_COPY, - ARG_RUNTIME, - ARG_NO_RELOAD, - ARG_CAT, - ARG_ENABLE, - ARG_NOW, - ARG_NO_BLOCK, - ARG_EXTENSION, - ARG_FORCE, - ARG_CLEAN, - ARG_USER, - ARG_SYSTEM, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, - { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD }, - { "host", required_argument, NULL, 'H' }, - { "machine", required_argument, NULL, 'M' }, - { "quiet", no_argument, NULL, 'q' }, - { "profile", required_argument, NULL, 'p' }, - { "copy", required_argument, NULL, ARG_COPY }, - { "runtime", no_argument, NULL, ARG_RUNTIME }, - { "no-reload", no_argument, NULL, ARG_NO_RELOAD }, - { "cat", no_argument, NULL, ARG_CAT }, - { "enable", no_argument, NULL, ARG_ENABLE }, - { "now", no_argument, NULL, ARG_NOW }, - { "no-block", no_argument, NULL, ARG_NO_BLOCK }, - { "extension", required_argument, NULL, ARG_EXTENSION }, - { "force", no_argument, NULL, ARG_FORCE }, - { "clean", no_argument, NULL, ARG_CLEAN }, - { "user", no_argument, NULL, ARG_USER }, - { "system", no_argument, NULL, ARG_SYSTEM }, - {} - }; - - int r, c; +VERB_COMMON_HELP_HIDDEN(help); +static int parse_argv(int argc, char *argv[], char ***remaining_args) { assert(argc >= 0); assert(argv); + assert(remaining_args); - while ((c = getopt_long(argc, argv, "hH:M:qp:", options, NULL)) >= 0) + OptionParser opts = { argc, argv }; + int r; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_NO_LEGEND: + OPTION_COMMON_NO_LEGEND: arg_legend = false; break; - case ARG_NO_ASK_PASSWORD: + OPTION_COMMON_NO_ASK_PASSWORD: arg_ask_password = false; break; - case 'H': + OPTION_COMMON_HOST: arg_transport = BUS_TRANSPORT_REMOTE; - arg_host = optarg; + arg_host = opts.arg; break; - case 'M': - r = parse_machine_argument(optarg, &arg_host, &arg_transport); + OPTION_COMMON_MACHINE: + r = parse_machine_argument(opts.arg, &arg_host, &arg_transport); if (r < 0) return r; break; - case 'q': + OPTION('q', "quiet", NULL, "Suppress informational messages"): arg_quiet = true; break; - case 'p': - if (streq(optarg, "help")) + OPTION('p', "profile", "PROFILE", + "Pick security profile for portable service"): + if (streq(opts.arg, "help")) return dump_profiles(); - if (!filename_is_valid(optarg)) + if (!filename_is_valid(opts.arg)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Unit profile name not valid: %s", optarg); + "Unit profile name not valid: %s", opts.arg); - arg_profile = optarg; + arg_profile = opts.arg; break; - case ARG_COPY: - if (streq(optarg, "auto")) + OPTION_LONG("copy", "MODE", + "Pick copying or symlinking of resources " + "(copy, auto, symlink, mixed)"): + if (streq(opts.arg, "auto")) arg_copy_mode = NULL; - else if (STR_IN_SET(optarg, "copy", "symlink", "mixed")) - arg_copy_mode = optarg; - else if (streq(optarg, "help")) { + else if (STR_IN_SET(opts.arg, "copy", "symlink", "mixed")) + arg_copy_mode = opts.arg; + else if (streq(opts.arg, "help")) { puts("auto\n" "copy\n" "symlink\n" @@ -1433,90 +1381,81 @@ static int parse_argv(int argc, char *argv[]) { return 0; } else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Failed to parse --copy= argument: %s", optarg); - + "Failed to parse --copy= argument: %s", opts.arg); break; - case ARG_RUNTIME: + OPTION_LONG("runtime", NULL, + "Attach portable service until next reboot only"): arg_runtime = true; break; - case ARG_NO_RELOAD: + OPTION_LONG("no-reload", NULL, + "Don't reload the system and service manager"): arg_reload = false; break; - case ARG_CAT: + OPTION_LONG("cat", NULL, + "When inspecting include unit and os-release file contents"): arg_cat = true; break; - case ARG_ENABLE: + OPTION_LONG("enable", NULL, + "Immediately enable/disable the portable service after attach/detach"): arg_enable = true; break; - case ARG_NOW: + OPTION_LONG("now", NULL, + "Immediately start/stop the portable service after attach/before detach"): arg_now = true; break; - case ARG_NO_BLOCK: + OPTION_LONG("no-block", NULL, + "Don't block waiting for attach --now to complete"): arg_no_block = true; break; - case ARG_EXTENSION: - r = strv_extend(&arg_extension_images, optarg); + OPTION_LONG("extension", "PATH", + "Extend the image with an overlay"): + r = strv_extend(&arg_extension_images, opts.arg); if (r < 0) return log_oom(); break; - case ARG_FORCE: + OPTION_LONG("force", NULL, + "Skip 'already active' check when attaching or detaching an image (with extensions)"): arg_force = true; break; - case ARG_CLEAN: + OPTION_LONG("clean", NULL, + "When detaching, also remove configuration, state, " + "cache, logs or runtime data of the portable service(s)"): arg_clean = true; break; - case ARG_USER: + OPTION_LONG("user", NULL, /* help= */ NULL): arg_runtime_scope = RUNTIME_SCOPE_USER; break; - case ARG_SYSTEM: + OPTION_LONG("system", NULL, /* help= */ NULL): arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } + *remaining_args = option_parser_get_args(&opts); return 1; } static int run(int argc, char *argv[]) { - static const Verb verbs[] = { - { "help", VERB_ANY, VERB_ANY, 0, verb_help }, - { "list", VERB_ANY, 1, VERB_DEFAULT, verb_list_images }, - { "attach", 2, VERB_ANY, 0, verb_attach_image }, - { "detach", 2, VERB_ANY, 0, verb_detach_image }, - { "inspect", 2, VERB_ANY, 0, verb_inspect_image }, - { "is-attached", 2, 2, 0, verb_is_image_attached }, - { "read-only", 2, 3, 0, verb_read_only_image }, - { "remove", 2, VERB_ANY, 0, verb_remove_image }, - { "set-limit", 3, 3, 0, verb_set_limit }, - { "reattach", 2, VERB_ANY, 0, verb_reattach_image }, - {} - }; - + char **args = NULL; int r; log_setup(); - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; - return dispatch_verb(argc, argv, verbs, NULL); + return dispatch_verb_with_args(args, NULL); } DEFINE_MAIN_FUNCTION(run); From cbadda5e60bae8858f38379aae3556ed3b03721f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 6 May 2026 12:40:26 +0200 Subject: [PATCH 167/242] vmspawn: reject --bind-volume= duplicates at parse time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bind_volume_parse() does not look at peers, so passing the same PROVIDER:VOLUME twice on the command line silently produces two parsed entries in arg_bind_volumes. vmspawn_bind_volume_acquire() then builds two DriveInfo with identical d->id (":"). At boot, bridge_register_drive() puts d->id into the b->block_devices hashmap; the second insert returns -EEXIST and the user sees a bare "File exists" with no context for which volume is responsible. Reject the collision at the parse site with a linear scan over the existing array — n_items is small (one entry per --bind-volume on the command line), and a clear error message naming the offending volume is much more useful than the late EEXIST from the QMP setup loop. Signed-off-by: Christian Brauner (Amutable) --- src/vmspawn/vmspawn.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index ee8ae518f0330..57b7697079ee4 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -787,6 +787,12 @@ static int parse_argv(int argc, char *argv[]) { bv->config, valid); } + FOREACH_ARRAY(it, arg_bind_volumes.items, arg_bind_volumes.n_items) + if (streq((*it)->provider, bv->provider) && streq((*it)->volume, bv->volume)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Volume '%s:%s' specified more than once for --bind-volume=.", + bv->provider, bv->volume); + if (!GREEDY_REALLOC(arg_bind_volumes.items, arg_bind_volumes.n_items + 1)) return log_oom(); arg_bind_volumes.items[arg_bind_volumes.n_items++] = TAKE_PTR(bv); From a5004653acd25d166ab8c10d987173f3bae0420a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 6 May 2026 14:46:19 +0200 Subject: [PATCH 168/242] update TODO --- TODO.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/TODO.md b/TODO.md index f3d1070c6c746..466eb223426dc 100644 --- a/TODO.md +++ b/TODO.md @@ -128,6 +128,12 @@ SPDX-License-Identifier: LGPL-2.1-or-later ## Features +- bootctl link + sysupdate integration + - make sysupdate call out to a special varlink dir on completion + - bind bootctl link socket in there, which when invoked goes to new dir in + /var/ where downloaded kernels+confext+sysext are dropped in (place in + .v/) and then does "bootctl link" on them. + - a tool that can prep credentials, put them in the ESP, for provisioning systems for SBC. Should be doing what sysinstall does with the credentials, and maybe even *be* sysinstall. From a7e276a82f7e2bf080685afca83d30215b1307a5 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Wed, 6 May 2026 14:03:29 +0200 Subject: [PATCH 169/242] dbus: limit the number of env variables to something reasonable, vol. 3 Let's limit the number of environment variables when creating a transient unit via StartTransientUnit as well, since validating the environment variable names/assignments is expensive. Follow-up for 49c1e1bcf2b482b6de35a4212a06ed1d8c382745. --- src/core/dbus-execute.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index aea9fbb304e22..2329762f16b68 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -3456,6 +3456,9 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; + if (strv_length(l) > ENVIRONMENT_ASSIGNMENTS_MAX) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many environment assignments."); if (!strv_env_is_valid(l)) return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment block."); @@ -3490,6 +3493,9 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; + if (strv_length(l) > ENVIRONMENT_ASSIGNMENTS_MAX) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many environment variable names or assignments."); if (!strv_env_name_or_assignment_is_valid(l)) return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid UnsetEnvironment= list."); @@ -3642,6 +3648,9 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; + if (strv_length(l) > ENVIRONMENT_ASSIGNMENTS_MAX) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_LIMITS_EXCEEDED, + "Too many environment variable names."); if (!strv_env_name_is_valid(l)) return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "Invalid PassEnvironment= block."); From f0a171d1d8d511bbdc8fd1a5cb9ec2b2b156adbc Mon Sep 17 00:00:00 2001 From: Nita Vesa Date: Wed, 6 May 2026 19:37:14 +0300 Subject: [PATCH 170/242] hwdb: add Fn-key mappings for MSI GE76 Raider 10UG --- hwdb.d/60-keyboard.hwdb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hwdb.d/60-keyboard.hwdb b/hwdb.d/60-keyboard.hwdb index 99006885f076f..8c1605930da97 100644 --- a/hwdb.d/60-keyboard.hwdb +++ b/hwdb.d/60-keyboard.hwdb @@ -1613,6 +1613,14 @@ evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMICRO-STAR*:pnGE60*:* evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMICRO-STAR*:pnGE70*:* KEYBOARD_KEY_c2=ejectcd +# MSI GE76 Raider 10UG uses Fn+F3 for touchpad and Fn+F6 and Fn+F7 +# for crosshair-mode and gaming-mode respectively but the latter +# two were previously not generating any keycodes under Linux +evdev:atkbd:dmi:bvn*:bvr*:bd*:svnMicro-Star*:pnGE76Raider10UG:* + KEYBOARD_KEY_76=touchpad_toggle + KEYBOARD_KEY_f3=prog2 + KEYBOARD_KEY_8a=prog3 + # some MSI models generate ACPI/input events on the LNXVIDEO input devices, # plus some extra synthesized ones on atkbd as an echo of actually changing the # brightness; so ignore those atkbd ones, to avoid loops From 67e23e0e050fca0279fc6045de20ed05b0b97bee Mon Sep 17 00:00:00 2001 From: Paul Meyer Date: Wed, 6 May 2026 17:35:48 +0200 Subject: [PATCH 171/242] vmspawn: search XDG_DATA_DIRS for QEMU firmware get_firmware_search_dirs() previously hardcoded /usr/share/qemu/firmware as the only system-wide search path. That assumption breaks on distributions that deliberately do not populate /usr/share, making vmspawn fail: "Failed to find OVMF config: No such file or directory". NixOS exposes those firmware locations through XDG_DATA_DIRS. Extend the search list with XDG_DATA_HOME/XDG_DATA_DIRS. This is the standard XDG mechanism and is already what QEMU itself uses for the same descriptors, so behavior matches user expectations across tooling. To avoid regressing setups where user has set XDG_DATA_DIRS to a custom value that omits /usr/share, keep /usr/share/qemu/firmware as an unconditional fallback. Precedence is unchanged: XDG_CONFIG_HOME/qemu/firmware still wins over /etc/qemu/firmware, which still wins over any shared-data dir. Co-developed-by: Claude Opus 4.7 Signed-off-by: Paul Meyer --- src/vmspawn/vmspawn-util.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/vmspawn/vmspawn-util.c b/src/vmspawn/vmspawn-util.c index 72187b6731a0a..c5ab40a0d2055 100644 --- a/src/vmspawn/vmspawn-util.c +++ b/src/vmspawn/vmspawn-util.c @@ -254,7 +254,9 @@ static int get_firmware_search_dirs(char ***ret) { /* Search in: * - $XDG_CONFIG_HOME/qemu/firmware * - /etc/qemu/firmware - * - /usr/share/qemu/firmware + * - $XDG_DATA_HOME/qemu/firmware (default: ~/.local/share/qemu/firmware) + * - each entry in $XDG_DATA_DIRS suffixed with /qemu/firmware + * (default: /usr/local/share/qemu/firmware, /usr/share/qemu/firmware) * * Prioritising entries in "more specific" directories */ @@ -264,10 +266,27 @@ static int get_firmware_search_dirs(char ***ret) { return r; _cleanup_strv_free_ char **l = NULL; - l = strv_new(user_firmware_dir, "/etc/qemu/firmware", "/usr/share/qemu/firmware"); + l = strv_new(user_firmware_dir, "/etc/qemu/firmware"); if (!l) return log_oom_debug(); + _cleanup_strv_free_ char **data_dirs = NULL; + r = sd_path_lookup_strv(SD_PATH_SEARCH_SHARED, "/qemu/firmware", &data_dirs); + if (r < 0) + return r; + + r = strv_extend_strv(&l, data_dirs, /* filter_duplicates = */ true); + if (r < 0) + return log_oom_debug(); + + /* Always include /usr/share/qemu/firmware as a final fallback, + * even if a custom $XDG_DATA_DIRS replaced it. */ + r = strv_extend(&l, "/usr/share/qemu/firmware"); + if (r < 0) + return log_oom_debug(); + + strv_uniq(l); + *ret = TAKE_PTR(l); return 0; } @@ -424,13 +443,8 @@ int find_ovmf_config( if (r < 0) return r; - /* Search in: - * - $XDG_CONFIG_HOME/qemu/firmware - * - /etc/qemu/firmware - * - /usr/share/qemu/firmware - * - * Prioritising entries in "more specific" directories - */ + /* Search paths are constructed by get_firmware_search_dirs(), + * prioritising entries in "more specific" directories. */ r = list_ovmf_config(&conf_files); if (r < 0) From de29e618434ad53fee7e60cfc2e3d186968b21cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 13:07:52 +0200 Subject: [PATCH 172/242] portablectl: actually allow set-limit with one arg In the man page and in the actual code, the first arg is optional. But the arg limit in the verbs table did not allow only one arg to be specified. Fixes: 61d0578b07b97cbffebfd350bac481274e310d39 --- src/portable/portablectl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/portable/portablectl.c b/src/portable/portablectl.c index 575ab4149aa63..c70d50634676a 100644 --- a/src/portable/portablectl.c +++ b/src/portable/portablectl.c @@ -1215,7 +1215,7 @@ static int verb_remove_image(int argc, char *argv[], uintptr_t _data, void *user return 0; } -VERB(verb_set_limit, "set-limit", "[NAME|PATH] LIMIT", 3, 3, 0, +VERB(verb_set_limit, "set-limit", "[NAME|PATH] LIMIT", 2, 3, 0, "Set image or pool size limit (disk quota)"); static int verb_set_limit(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; From a42f1ebb94062a9c11c072b6ca8ff84c509f9445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 5 May 2026 18:23:55 +0200 Subject: [PATCH 173/242] report-cgroup-server: convert to OPTION macros Co-developed-by: Claude Opus 4.7 --- src/report/report-cgroup-server.c | 60 +++++++++++-------------------- 1 file changed, 20 insertions(+), 40 deletions(-) diff --git a/src/report/report-cgroup-server.c b/src/report/report-cgroup-server.c index eef2ec05fcbfd..9f5ac9370694a 100644 --- a/src/report/report-cgroup-server.c +++ b/src/report/report-cgroup-server.c @@ -1,15 +1,14 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include - #include "sd-varlink.h" #include "alloc-util.h" -#include "ansi-color.h" #include "build.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" #include "main-func.h" -#include "pretty-print.h" +#include "options.h" #include "report-cgroup.h" #include "varlink-io.systemd.Metrics.h" #include "varlink-util.h" @@ -46,63 +45,44 @@ static int vl_server(void) { } static int help(void) { - _cleanup_free_ char *url = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("systemd-report-cgroup", "8", &url); + r = option_parser_get_help_table(&options); if (r < 0) - return log_oom(); + return r; - printf("%s [OPTIONS...]\n" - "\n%sReport cgroup metrics.%s\n" - "\n%sOptions:%s\n" - " -h --help Show this help\n" - " --version Show package version\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - ansi_underline(), - ansi_normal(), - url); + help_cmdline("[OPTIONS...]"); + help_abstract("Report cgroup metrics."); + + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("systemd-report-cgroup", "8"); return 0; } static int parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - {} - }; - - int c, r; + int r; assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) + OptionParser opts = { argc, argv }; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - if (optind < argc) + if (option_parser_get_n_args(&opts) > 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments."); From b49a4daa1b118cdb55bf0389dea09375b2a4e577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 6 May 2026 13:39:25 +0200 Subject: [PATCH 174/242] coredumpctl: convert to OPTION and VERB macros The order of options is changed (to what was present in parse_argv). I don't the order in --help was mostly random, as is the new one, so I didn't try to preserve the old order. Some help strings are reworded/adjusted. Co-developed-by: Claude Opus 4.7 --- src/coredump/coredumpctl.c | 230 ++++++++++++++----------------------- 1 file changed, 85 insertions(+), 145 deletions(-) diff --git a/src/coredump/coredumpctl.c b/src/coredump/coredumpctl.c index b6ca0f8b7dd23..d89da78755f35 100644 --- a/src/coredump/coredumpctl.c +++ b/src/coredump/coredumpctl.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include -#include #include #include #include @@ -14,6 +13,7 @@ #include "sd-messages.h" #include "alloc-util.h" +#include "ansi-color.h" #include "build.h" #include "bus-error.h" #include "bus-locator.h" @@ -29,6 +29,7 @@ #include "format-util.h" #include "fs-util.h" #include "glob-util.h" +#include "help-util.h" #include "image-policy.h" #include "io-util.h" #include "journal-internal.h" @@ -39,12 +40,12 @@ #include "loop-util.h" #include "main-func.h" #include "mount-util.h" +#include "options.h" #include "pager.h" #include "parse-argument.h" #include "parse-util.h" #include "path-util.h" #include "pidref.h" -#include "pretty-print.h" #include "process-util.h" #include "signal-util.h" #include "string-util.h" @@ -177,224 +178,166 @@ static int acquire_journal(sd_journal **ret, char **matches) { } static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *verbs = NULL, *options = NULL; int r; - r = terminal_urlify_man("coredumpctl", "1", &link); + r = verbs_get_help_table(&verbs); if (r < 0) - return log_oom(); + return r; + + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + (void) table_sync_column_widths(0, verbs, options); + + help_cmdline("[OPTIONS…] COMMAND …"); + help_abstract("List or retrieve coredumps from the journal."); - printf("%1$s [OPTIONS...] COMMAND ...\n\n" - "%5$sList or retrieve coredumps from the journal.%6$s\n" - "\n%3$sCommands:%4$s\n" - " list [MATCHES...] List available coredumps (default)\n" - " info [MATCHES...] Show detailed information about one or more coredumps\n" - " dump [MATCHES...] Print first matching coredump to stdout\n" - " debug [MATCHES...] Start a debugger for the first matching coredump\n" - "\n%3$sOptions:%4$s\n" - " -h --help Show this help\n" - " --version Print version string\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not print the column headers\n" - " --json=pretty|short|off Generate JSON output\n" - " --debugger=DEBUGGER Use the given debugger\n" - " -A --debugger-arguments=ARGS Pass the given arguments to the debugger\n" - " -n INT Show maximum number of rows\n" - " -1 Show information about most recent entry only\n" - " -S --since=DATE Only print coredumps since the date\n" - " -U --until=DATE Only print coredumps until the date\n" - " -r --reverse Show the newest entries first\n" - " -F --field=FIELD List all values a certain field takes\n" - " -o --output=FILE Write output to FILE\n" - " --file=PATH Use journal file\n" - " -D --directory=DIR Use journal files from directory\n\n" - " -q --quiet Do not show info messages and privilege warning\n" - " --all Look at all journal files instead of local ones\n" - " --root=PATH Operate on an alternate filesystem root\n" - " --image=PATH Operate on disk image as filesystem root\n" - " --image-policy=POLICY Specify disk image dissection policy\n" - "\nSee the %2$s for details.\n", - program_invocation_short_name, - link, - ansi_underline(), - ansi_normal(), - ansi_highlight(), - ansi_normal()); + help_section("Commands"); + r = table_print_or_warn(verbs); + if (r < 0) + return r; + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("coredumpctl", "1"); return 0; } -static int verb_help(int argc, char *argv[], uintptr_t _data, void *userdata) { - return help(); -} +VERB_COMMON_HELP_HIDDEN(help); -static int parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_NO_PAGER, - ARG_NO_LEGEND, - ARG_JSON, - ARG_DEBUGGER, - ARG_FILE, - ARG_ROOT, - ARG_IMAGE, - ARG_IMAGE_POLICY, - ARG_ALL, - }; - - int c, r; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version" , no_argument, NULL, ARG_VERSION }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, - { "debugger", required_argument, NULL, ARG_DEBUGGER }, - { "debugger-arguments", required_argument, NULL, 'A' }, - { "output", required_argument, NULL, 'o' }, - { "field", required_argument, NULL, 'F' }, - { "file", required_argument, NULL, ARG_FILE }, - { "directory", required_argument, NULL, 'D' }, - { "reverse", no_argument, NULL, 'r' }, - { "since", required_argument, NULL, 'S' }, - { "until", required_argument, NULL, 'U' }, - { "quiet", no_argument, NULL, 'q' }, - { "json", required_argument, NULL, ARG_JSON }, - { "root", required_argument, NULL, ARG_ROOT }, - { "image", required_argument, NULL, ARG_IMAGE }, - { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY }, - { "all", no_argument, NULL, ARG_ALL }, - {} - }; +static int parse_argv(int argc, char *argv[], char ***remaining_args) { + int r; assert(argc >= 0); assert(argv); + assert(remaining_args); + + OptionParser opts = { argc, argv }; - while ((c = getopt_long(argc, argv, "hA:o:F:1D:rS:U:qn:", options, NULL)) >= 0) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_NO_LEGEND: + OPTION_COMMON_NO_LEGEND: arg_legend = false; break; - case ARG_DEBUGGER: - arg_debugger = optarg; + OPTION_LONG("debugger", "DEBUGGER", "Use the given debugger"): + arg_debugger = opts.arg; break; - case 'A': { + OPTION('A', "debugger-arguments", "…", "Pass the given arguments to the debugger"): { _cleanup_strv_free_ char **l = NULL; - r = strv_split_full(&l, optarg, WHITESPACE, EXTRACT_UNQUOTE); + r = strv_split_full(&l, opts.arg, WHITESPACE, EXTRACT_UNQUOTE); if (r < 0) - return log_error_errno(r, "Failed to parse debugger arguments '%s': %m", optarg); + return log_error_errno(r, "Failed to parse debugger arguments '%s': %m", opts.arg); strv_free_and_replace(arg_debugger_args, l); break; } - case ARG_FILE: - r = glob_extend(&arg_file, optarg, GLOB_NOCHECK); + OPTION_LONG("file", "PATH", "Use journal file"): + r = glob_extend(&arg_file, opts.arg, GLOB_NOCHECK); if (r < 0) return log_error_errno(r, "Failed to add paths: %m"); break; - case 'o': + OPTION('o', "output", "FILE", "Write output to FILE"): if (arg_output) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot set output more than once."); - arg_output = optarg; + arg_output = opts.arg; break; - case 'S': - r = parse_timestamp(optarg, &arg_since); + OPTION('S', "since", "DATE", "Only print coredumps since the date"): + r = parse_timestamp(opts.arg, &arg_since); if (r < 0) - return log_error_errno(r, "Failed to parse timestamp '%s': %m", optarg); + return log_error_errno(r, "Failed to parse timestamp '%s': %m", opts.arg); break; - case 'U': - r = parse_timestamp(optarg, &arg_until); + OPTION('U', "until", "DATE", "Only print coredumps until the date"): + r = parse_timestamp(opts.arg, &arg_until); if (r < 0) - return log_error_errno(r, "Failed to parse timestamp '%s': %m", optarg); + return log_error_errno(r, "Failed to parse timestamp '%s': %m", opts.arg); break; - case 'F': + OPTION('F', "field", "FIELD", "List all values a certain field takes"): if (arg_field) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --field/-F more than once."); - arg_field = optarg; + arg_field = opts.arg; break; - case '1': + OPTION_SHORT('1', NULL, "Show information about most recent entry only"): arg_rows_max = 1; arg_reverse = true; break; - case 'n': { + OPTION_SHORT('n', "INT", "Show at most this many rows"): { unsigned n; - r = safe_atou(optarg, &n); + r = safe_atou(opts.arg, &n); if (r < 0 || n < 1) return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), - "Invalid numeric parameter to -n: %s", optarg); + "Invalid numeric parameter to -n: %s", opts.arg); arg_rows_max = n; break; } - case 'D': - arg_directory = optarg; + OPTION('D', "directory", "DIR", "Use journal files from directory"): + arg_directory = opts.arg; break; - case ARG_ROOT: - r = parse_path_argument(optarg, false, &arg_root); + OPTION_LONG("root", "PATH", "Operate on an alternate filesystem root"): + r = parse_path_argument(opts.arg, false, &arg_root); if (r < 0) return r; break; - case ARG_IMAGE: - r = parse_path_argument(optarg, false, &arg_image); + OPTION_LONG("image", "PATH", "Operate on disk image as filesystem root"): + r = parse_path_argument(opts.arg, false, &arg_image); if (r < 0) return r; break; - case ARG_IMAGE_POLICY: - r = parse_image_policy_argument(optarg, &arg_image_policy); + OPTION_LONG("image-policy", "POLICY", "Specify disk image dissection policy"): + r = parse_image_policy_argument(opts.arg, &arg_image_policy); if (r < 0) return r; break; - case 'r': + OPTION('r', "reverse", NULL, "Show the newest entries first"): arg_reverse = true; break; - case 'q': + OPTION('q', "quiet", NULL, "Do not show info messages and privilege warning"): arg_quiet = true; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; - break; - case ARG_ALL: + OPTION_LONG("all", NULL, "Look at all journal files instead of local ones"): arg_all = true; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } if (arg_since != USEC_INFINITY && arg_until != USEC_INFINITY && @@ -405,6 +348,7 @@ static int parse_argv(int argc, char *argv[]) { if ((!!arg_directory + !!arg_image + !!arg_root) > 1) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root=, --image= or -D/--directory=, the combination of these options is not supported."); + *remaining_args = option_parser_get_args(&opts); return 1; } @@ -902,6 +846,10 @@ static int print_entry( return print_info(stdout, j, n_found > 0); } +VERB(verb_dump_list, "list", "[MATCHES…]", VERB_ANY, VERB_ANY, VERB_DEFAULT, + "List available coredumps"); +VERB(verb_dump_list, "info", "[MATCHES…]", VERB_ANY, VERB_ANY, 0, + "Show detailed information about one or more coredumps"); static int verb_dump_list(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_journal_closep) sd_journal *j = NULL; _cleanup_(table_unrefp) Table *t = NULL; @@ -1146,6 +1094,8 @@ static int save_core(sd_journal *j, FILE *file, char **path, bool *unlink_temp) return r; } +VERB(verb_dump_core, "dump", "[MATCHES…]", VERB_ANY, VERB_ANY, 0, + "Print first matching coredump to stdout"); static int verb_dump_core(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_journal_closep) sd_journal *j = NULL; _cleanup_fclose_ FILE *f = NULL; @@ -1182,6 +1132,10 @@ static int verb_dump_core(int argc, char *argv[], uintptr_t _data, void *userdat return 0; } +VERB(verb_run_debug, "debug", "[MATCHES…]", VERB_ANY, VERB_ANY, 0, + "Start a debugger for the first matching coredump"); +VERB(verb_run_debug, "gdb", "[MATCHES…]", VERB_ANY, VERB_ANY, 0, + /* help= */ NULL); static int verb_run_debug(int argc, char *argv[], uintptr_t _data, void *userdata) { static const struct sigaction sa = { .sa_sigaction = sigterm_process_group_handler, @@ -1372,30 +1326,16 @@ static int check_units_active(void) { return c; } -static int coredumpctl_main(int argc, char *argv[]) { - - static const Verb verbs[] = { - { "list", VERB_ANY, VERB_ANY, VERB_DEFAULT, verb_dump_list }, - { "info", VERB_ANY, VERB_ANY, 0, verb_dump_list }, - { "dump", VERB_ANY, VERB_ANY, 0, verb_dump_core }, - { "debug", VERB_ANY, VERB_ANY, 0, verb_run_debug }, - { "gdb", VERB_ANY, VERB_ANY, 0, verb_run_debug }, - { "help", VERB_ANY, 1, 0, verb_help }, - {} - }; - - return dispatch_verb(argc, argv, verbs, NULL); -} - static int run(int argc, char *argv[]) { _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; _cleanup_(umount_and_freep) char *mounted_dir = NULL; + char **args = NULL; int r, units_active; setlocale(LC_ALL, ""); log_setup(); - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -1425,7 +1365,7 @@ static int run(int argc, char *argv[]) { return log_oom(); } - r = coredumpctl_main(argc, argv); + r = dispatch_verb_with_args(args, NULL); if (units_active > 0) printf("%s-- Notice: %d systemd-coredump@.service %s, output may be incomplete.%s\n", From 561f3a65da050dedaf96854c460bbe97258e3d07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 6 May 2026 13:59:44 +0200 Subject: [PATCH 175/242] mount: convert to OPTION macros Previously, we'd show a partial synopsis for systemd-mount in --help for systemd-umount. I don't think it makes sense to do that. So now the --help for systemd-umount is separate, with just its syntax and a new blurb. "transiently" is dropped from the description. Mount points generally are transient, so no need to say that. (E.g. the man page for mount just says "attach" and "detach".) Co-developed-by: Claude Opus 4.7 --- src/mount/mount-tool.c | 300 +++++++++++++++-------------------------- 1 file changed, 106 insertions(+), 194 deletions(-) diff --git a/src/mount/mount-tool.c b/src/mount/mount-tool.c index f1c4c90d76883..80846c1c2a929 100644 --- a/src/mount/mount-tool.c +++ b/src/mount/mount-tool.c @@ -1,10 +1,9 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include - #include "sd-bus.h" #include "sd-device.h" +#include "ansi-color.h" #include "argv-util.h" #include "build.h" #include "bus-error.h" @@ -20,15 +19,16 @@ #include "format-table.h" #include "format-util.h" #include "fstab-util.h" +#include "help-util.h" #include "libmount-util.h" #include "main-func.h" #include "mountpoint-util.h" +#include "options.h" #include "pager.h" #include "parse-argument.h" #include "parse-util.h" #include "path-util.h" #include "polkit-agent.h" -#include "pretty-print.h" #include "process-util.h" #include "runtime-scope.h" #include "stat-util.h" @@ -108,296 +108,206 @@ static int parse_where(const char *input, char **ret_where) { return 0; } -static int help(void) { - _cleanup_free_ char *link = NULL; +static int help(char *argv[]) { + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("systemd-mount", "1", &link); + r = option_parser_get_help_table(&options); if (r < 0) - return log_oom(); + return r; + + if (invoked_as(argv, "systemd-umount")) { + help_cmdline("[OPTIONS…] WHAT|WHERE…"); + help_abstract("Unmount one or more mount points."); + } else { + help_cmdline("[OPTIONS…] WHAT [WHERE]"); + help_cmdline("[OPTIONS…] --tmpfs [NAME] WHERE"); + help_cmdline("[OPTIONS…] --list"); + help_cmdline("[OPTIONS…] --umount WHAT|WHERE…"); + help_abstract("Establish a mount or auto-mount point."); + } - printf("systemd-mount [OPTIONS...] WHAT [WHERE]\n" - "systemd-mount [OPTIONS...] --tmpfs [NAME] WHERE\n" - "systemd-mount [OPTIONS...] --list\n" - "%1$s [OPTIONS...] %7$sWHAT|WHERE...\n" - "\n%5$sEstablish a mount or auto-mount point transiently.%6$s\n" - "\n%3$sOptions:%4$s\n" - " -h --help Show this help\n" - " --version Show package version\n" - " --no-block Do not wait until operation finished\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not show the headers\n" - " -l --full Do not ellipsize output\n" - " --no-ask-password Do not prompt for password\n" - " -q --quiet Suppress information messages during runtime\n" - " --json=pretty|short|off Generate JSON output\n" - " --user Run as user unit\n" - " -H --host=[USER@]HOST Operate on remote host\n" - " -M --machine=CONTAINER Operate on local container\n" - " --discover Discover mount device metadata\n" - " -t --type=TYPE File system type\n" - " -o --options=OPTIONS Mount options\n" - " --owner=USER Add uid= and gid= options for USER\n" - " --fsck=no Don't run file system check before mount\n" - " --description=TEXT Description for unit\n" - " -p --property=NAME=VALUE Set mount unit property\n" - " --automount=BOOL Create an automount point\n" - " -A Same as --automount=yes\n" - " --timeout-idle-sec=SEC Specify automount idle timeout\n" - " --automount-property=NAME=VALUE\n" - " Set automount unit property\n" - " --bind-device Bind automount unit to device\n" - " --list List mountable block devices\n" - " -u --umount Unmount mount points\n" - " -G --collect Unload unit after it stopped, even when failed\n" - " -T --tmpfs Create a new tmpfs on the mount point\n" - " --canonicalize=BOOL Controls whether to canonicalize path before\n" - " operation\n" - "\nSee the %2$s for details.\n", - program_invocation_short_name, - link, - ansi_underline(), - ansi_normal(), - ansi_highlight(), - ansi_normal(), - streq(program_invocation_short_name, "systemd-umount") ? "" : "--umount "); + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("systemd-mount", "1"); return 0; } -static int parse_argv(int argc, char *argv[]) { - - enum { - ARG_VERSION = 0x100, - ARG_NO_BLOCK, - ARG_NO_PAGER, - ARG_NO_LEGEND, - ARG_NO_ASK_PASSWORD, - ARG_USER, - ARG_SYSTEM, - ARG_DISCOVER, - ARG_MOUNT_TYPE, - ARG_MOUNT_OPTIONS, - ARG_OWNER, - ARG_FSCK, - ARG_DESCRIPTION, - ARG_TIMEOUT_IDLE, - ARG_AUTOMOUNT, - ARG_AUTOMOUNT_PROPERTY, - ARG_BIND_DEVICE, - ARG_LIST, - ARG_JSON, - ARG_CANONICALIZE, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "no-block", no_argument, NULL, ARG_NO_BLOCK }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, - { "full", no_argument, NULL, 'l' }, - { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD }, - { "quiet", no_argument, NULL, 'q' }, - { "user", no_argument, NULL, ARG_USER }, - { "system", no_argument, NULL, ARG_SYSTEM }, - { "host", required_argument, NULL, 'H' }, - { "machine", required_argument, NULL, 'M' }, - { "discover", no_argument, NULL, ARG_DISCOVER }, - { "type", required_argument, NULL, 't' }, - { "options", required_argument, NULL, 'o' }, - { "owner", required_argument, NULL, ARG_OWNER }, - { "fsck", required_argument, NULL, ARG_FSCK }, - { "description", required_argument, NULL, ARG_DESCRIPTION }, - { "property", required_argument, NULL, 'p' }, - { "automount", required_argument, NULL, ARG_AUTOMOUNT }, - { "timeout-idle-sec", required_argument, NULL, ARG_TIMEOUT_IDLE }, - { "automount-property", required_argument, NULL, ARG_AUTOMOUNT_PROPERTY }, - { "bind-device", no_argument, NULL, ARG_BIND_DEVICE }, - { "list", no_argument, NULL, ARG_LIST }, - { "umount", no_argument, NULL, 'u' }, - { "unmount", no_argument, NULL, 'u' }, /* Compat spelling */ - { "collect", no_argument, NULL, 'G' }, - { "tmpfs", no_argument, NULL, 'T' }, - { "json", required_argument, NULL, ARG_JSON }, - { "canonicalize", required_argument, NULL, ARG_CANONICALIZE }, - {}, - }; - - int r, c; +static int parse_argv(int argc, char *argv[], char ***remaining_args) { + int r; assert(argc >= 0); assert(argv); + assert(remaining_args); if (invoked_as(argv, "systemd-umount")) arg_action = ACTION_UMOUNT; - while ((c = getopt_long(argc, argv, "hqH:M:t:o:p:AuGlT", options, NULL)) >= 0) + OptionParser opts = { argc, argv }; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': - return help(); + OPTION_COMMON_HELP: + return help(argv); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case ARG_NO_BLOCK: + OPTION_LONG("no-block", NULL, "Do not wait until operation finished"): arg_no_block = true; break; - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_NO_LEGEND: + OPTION_COMMON_NO_LEGEND: arg_legend = false; break; - case 'l': + OPTION('l', "full", NULL, "Do not ellipsize output"): arg_full = true; break; - case ARG_NO_ASK_PASSWORD: + OPTION_COMMON_NO_ASK_PASSWORD: arg_ask_password = false; break; - case 'q': + OPTION('q', "quiet", NULL, "Suppress information messages during runtime"): arg_quiet = true; break; - case ARG_USER: + OPTION_LONG("user", NULL, "Run as user unit"): arg_runtime_scope = RUNTIME_SCOPE_USER; break; - case ARG_SYSTEM: + OPTION_LONG("system", NULL, /* help= */ NULL): arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; break; - case 'H': + OPTION_COMMON_HOST: arg_transport = BUS_TRANSPORT_REMOTE; - arg_host = optarg; + arg_host = opts.arg; break; - case 'M': - r = parse_machine_argument(optarg, &arg_host, &arg_transport); + OPTION_COMMON_MACHINE: + r = parse_machine_argument(opts.arg, &arg_host, &arg_transport); if (r < 0) return r; break; - case ARG_DISCOVER: + OPTION_LONG("discover", NULL, "Discover mount device metadata"): arg_discover = true; break; - case 't': - r = free_and_strdup_warn(&arg_mount_type, optarg); + OPTION('t', "type", "TYPE", "File system type"): + r = free_and_strdup_warn(&arg_mount_type, opts.arg); if (r < 0) return r; break; - case 'o': - r = free_and_strdup_warn(&arg_mount_options, optarg); + OPTION('o', "options", "OPTIONS", "Mount options"): + r = free_and_strdup_warn(&arg_mount_options, opts.arg); if (r < 0) return r; break; - case ARG_OWNER: { - const char *user = optarg; + OPTION_LONG("owner", "USER", "Add uid= and gid= options for USER"): { + const char *user = opts.arg; r = get_user_creds(&user, &arg_uid, &arg_gid, NULL, NULL, 0); if (r < 0) return log_error_errno(r, r == -EBADMSG ? "UID or GID of user %s are invalid." : "Cannot use \"%s\" as owner: %m", - optarg); + opts.arg); break; } - case ARG_FSCK: - r = parse_boolean_argument("--fsck=", optarg, &arg_fsck); + OPTION_LONG("fsck", "BOOL", "Run a file system check before mount"): + r = parse_boolean_argument("--fsck=", opts.arg, &arg_fsck); if (r < 0) return r; break; - case ARG_DESCRIPTION: - r = free_and_strdup_warn(&arg_description, optarg); + OPTION_LONG("description", "TEXT", "Description for unit"): + r = free_and_strdup_warn(&arg_description, opts.arg); if (r < 0) return r; break; - case 'p': - if (strv_extend(&arg_property, optarg) < 0) + OPTION('p', "property", "NAME=VALUE", "Set mount unit property"): + if (strv_extend(&arg_property, opts.arg) < 0) return log_oom(); - break; - case 'A': + OPTION_SHORT('A', NULL, "Same as --automount=yes"): arg_action = ACTION_AUTOMOUNT; break; - case ARG_AUTOMOUNT: - r = parse_boolean_argument("--automount=", optarg, NULL); + OPTION_LONG("automount", "BOOL", "Create an automount point"): + r = parse_boolean_argument("--automount=", opts.arg, NULL); if (r < 0) return r; arg_action = r ? ACTION_AUTOMOUNT : ACTION_MOUNT; break; - case ARG_TIMEOUT_IDLE: - r = parse_sec(optarg, &arg_timeout_idle); + OPTION_LONG("timeout-idle-sec", "SEC", "Specify automount idle timeout"): + r = parse_sec(opts.arg, &arg_timeout_idle); if (r < 0) - return log_error_errno(r, "Failed to parse timeout: %s", optarg); + return log_error_errno(r, "Failed to parse timeout: %s", opts.arg); arg_timeout_idle_set = true; break; - case ARG_AUTOMOUNT_PROPERTY: - if (strv_extend(&arg_automount_property, optarg) < 0) + OPTION_LONG("automount-property", "NAME=VALUE", "Set automount unit property"): + if (strv_extend(&arg_automount_property, opts.arg) < 0) return log_oom(); - break; - case ARG_BIND_DEVICE: + OPTION_LONG("bind-device", NULL, "Bind automount unit to device"): arg_bind_device = true; break; - case ARG_LIST: + OPTION_LONG("list", NULL, "List mountable block devices"): arg_action = ACTION_LIST; break; - case 'u': + OPTION('u', "umount", NULL, "Unmount mount points"): {} + OPTION_LONG("unmount", NULL, /* help= */ NULL): /* compat spelling */ arg_action = ACTION_UMOUNT; break; - case 'G': + OPTION('G', "collect", NULL, "Unload unit after it stopped, even when failed"): arg_aggressive_gc = true; break; - case 'T': + OPTION('T', "tmpfs", NULL, "Create a new tmpfs on the mount point"): arg_tmpfs = true; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; - break; - case ARG_CANONICALIZE: - r = parse_boolean_argument("--canonicalize=", optarg, &arg_canonicalize); + OPTION_LONG("canonicalize", "BOOL", + "Controls whether to canonicalize path before operation"): + r = parse_boolean_argument("--canonicalize=", opts.arg, &arg_canonicalize); if (r < 0) return r; - break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } + char **args = option_parser_get_args(&opts); + size_t n_args = option_parser_get_n_args(&opts); + if (arg_runtime_scope == RUNTIME_SCOPE_USER) { arg_ask_password = false; @@ -407,7 +317,7 @@ static int parse_argv(int argc, char *argv[]) { } if (arg_action == ACTION_LIST) { - if (optind < argc) + if (n_args > 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too many arguments."); @@ -415,22 +325,22 @@ static int parse_argv(int argc, char *argv[]) { return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Listing devices only supported locally."); } else if (arg_action == ACTION_UMOUNT) { - if (optind >= argc) + if (n_args == 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "At least one argument required."); if (arg_transport != BUS_TRANSPORT_LOCAL || !arg_canonicalize) - for (int i = optind; i < argc; i++) - if (!path_is_absolute(argv[i])) + STRV_FOREACH(a, args) + if (!path_is_absolute(*a)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path must be absolute when operating remotely or when canonicalization is turned off: %s", - argv[i]); + *a); } else { - if (optind >= argc) + if (n_args == 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "At least one argument required."); - if (argc > optind+2) + if (n_args > 2) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "More than two arguments are not allowed."); @@ -439,16 +349,16 @@ static int parse_argv(int argc, char *argv[]) { return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--discover cannot be used in conjunction with --tmpfs."); - if (argc <= optind+1) { + if (n_args == 1) { arg_mount_what = strdup("tmpfs"); if (!arg_mount_what) return log_oom(); - r = parse_where(argv[optind], &arg_mount_where); + r = parse_where(args[0], &arg_mount_where); if (r < 0) return r; } else { - arg_mount_what = strdup(argv[optind]); + arg_mount_what = strdup(args[0]); if (!arg_mount_what) return log_oom(); } @@ -463,12 +373,12 @@ static int parse_argv(int argc, char *argv[]) { arg_mount_type); } else { if (arg_mount_type && !fstype_is_blockdev_backed(arg_mount_type)) { - arg_mount_what = strdup(argv[optind]); + arg_mount_what = strdup(args[0]); if (!arg_mount_what) return log_oom(); } else { _cleanup_free_ char *u = NULL; - const char *p = argv[optind]; + const char *p = args[0]; if (arg_canonicalize) { u = fstab_node_to_udev_node(p); @@ -494,8 +404,8 @@ static int parse_argv(int argc, char *argv[]) { } } - if (argc > optind+1) { - r = parse_where(argv[optind+1], &arg_mount_where); + if (n_args >= 2) { + r = parse_where(args[1], &arg_mount_where); if (r < 0) return r; } else if (!arg_tmpfs) @@ -524,6 +434,7 @@ static int parse_argv(int argc, char *argv[]) { } } + *remaining_args = args; return 1; } @@ -1077,18 +988,18 @@ static int umount_loop(sd_bus *bus, const char *backing_file) { return umount_by_device(bus, dev); } -static int action_umount(sd_bus *bus, int argc, char **argv) { +static int action_umount(sd_bus *bus, char **args) { int r, ret = 0; assert(bus); - assert(argv); - assert(argc > optind); + assert(args); + assert(!strv_isempty(args)); if (arg_transport != BUS_TRANSPORT_LOCAL || !arg_canonicalize) { - for (int i = optind; i < argc; i++) { + STRV_FOREACH(arg, args) { _cleanup_free_ char *p = NULL; - r = path_simplify_alloc(argv[i], &p); + r = path_simplify_alloc(*arg, &p); if (r < 0) return r; @@ -1097,10 +1008,10 @@ static int action_umount(sd_bus *bus, int argc, char **argv) { return ret; } - for (int i = optind; i < argc; i++) { + STRV_FOREACH(arg, args) { _cleanup_free_ char *u = NULL, *p = NULL; - u = fstab_node_to_udev_node(argv[i]); + u = fstab_node_to_udev_node(*arg); if (!u) return log_oom(); @@ -1113,7 +1024,7 @@ static int action_umount(sd_bus *bus, int argc, char **argv) { struct stat st; if (fstat(fd, &st) < 0) - return log_error_errno(errno, "Can't stat '%s' (from %s): %m", p, argv[i]); + return log_error_errno(errno, "Can't stat '%s' (from %s): %m", p, *arg); r = is_mount_point_at(fd, /* path= */ NULL, /* flags= */ 0); fd = safe_close(fd); /* before continuing make sure the dir is not keeping anything busy */ @@ -1135,7 +1046,7 @@ static int action_umount(sd_bus *bus, int argc, char **argv) { else r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown file type for unmounting: %s (from %s)", - p, argv[i]); + p, *arg); RET_GATHER(ret, r); } } @@ -1552,11 +1463,12 @@ static int list_devices(void) { static int run(int argc, char* argv[]) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + char **args = NULL; int r; log_setup(); - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -1570,7 +1482,7 @@ static int run(int argc, char* argv[]) { (void) sd_bus_set_allow_interactive_authorization(bus, arg_ask_password); if (arg_action == ACTION_UMOUNT) - return action_umount(bus, argc, argv); + return action_umount(bus, args); if ((!arg_mount_type || fstype_is_blockdev_backed(arg_mount_type)) && !path_is_normalized(arg_mount_what)) From 355f2bec79c40e276a8dc3c089875399a9d3a123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 6 May 2026 14:24:37 +0200 Subject: [PATCH 176/242] mount: stop showing mount options for systemd-unmount This only serves to confuse the user. --- src/mount/mount-tool.c | 80 +++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/src/mount/mount-tool.c b/src/mount/mount-tool.c index 80846c1c2a929..636930d277176 100644 --- a/src/mount/mount-tool.c +++ b/src/mount/mount-tool.c @@ -109,10 +109,10 @@ static int parse_where(const char *input, char **ret_where) { } static int help(char *argv[]) { - _cleanup_(table_unrefp) Table *options = NULL; + _cleanup_(table_unrefp) Table *options_common = NULL, *options_mount = NULL; int r; - r = option_parser_get_help_table(&options); + r = option_parser_get_help_table(&options_common); if (r < 0) return r; @@ -125,13 +125,25 @@ static int help(char *argv[]) { help_cmdline("[OPTIONS…] --list"); help_cmdline("[OPTIONS…] --umount WHAT|WHERE…"); help_abstract("Establish a mount or auto-mount point."); + + r = option_parser_get_help_table_group("Mount options", &options_mount); + if (r < 0) + return r; + + (void) table_sync_column_widths(0, options_common, options_mount); } help_section("Options"); - r = table_print_or_warn(options); + r = table_print_or_warn(options_common); if (r < 0) return r; + if (options_mount) { + r = table_print_or_warn(options_mount); + if (r < 0) + return r; + } + help_man_page_reference("systemd-mount", "1"); return 0; } @@ -157,6 +169,32 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OPTION_COMMON_VERSION: return version(); + OPTION_LONG("user", NULL, "Run as user unit"): + arg_runtime_scope = RUNTIME_SCOPE_USER; + break; + + OPTION_LONG("system", NULL, /* help= */ NULL): + arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; + break; + + OPTION_COMMON_HOST: + arg_transport = BUS_TRANSPORT_REMOTE; + arg_host = opts.arg; + break; + + OPTION_COMMON_MACHINE: + r = parse_machine_argument(opts.arg, &arg_host, &arg_transport); + if (r < 0) + return r; + break; + + OPTION_LONG("canonicalize", "BOOL", + "Whether to canonicalize path before operation"): + r = parse_boolean_argument("--canonicalize=", opts.arg, &arg_canonicalize); + if (r < 0) + return r; + break; + OPTION_LONG("no-block", NULL, "Do not wait until operation finished"): arg_no_block = true; break; @@ -177,29 +215,18 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { arg_ask_password = false; break; - OPTION('q', "quiet", NULL, "Suppress information messages during runtime"): + OPTION('q', "quiet", NULL, "Suppress informational messages during runtime"): arg_quiet = true; break; - OPTION_LONG("user", NULL, "Run as user unit"): - arg_runtime_scope = RUNTIME_SCOPE_USER; - break; - - OPTION_LONG("system", NULL, /* help= */ NULL): - arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; - break; - - OPTION_COMMON_HOST: - arg_transport = BUS_TRANSPORT_REMOTE; - arg_host = opts.arg; - break; - - OPTION_COMMON_MACHINE: - r = parse_machine_argument(opts.arg, &arg_host, &arg_transport); - if (r < 0) + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); + if (r <= 0) return r; break; + OPTION_GROUP("Mount options"): {} + OPTION_LONG("discover", NULL, "Discover mount device metadata"): arg_discover = true; break; @@ -290,19 +317,6 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { OPTION('T', "tmpfs", NULL, "Create a new tmpfs on the mount point"): arg_tmpfs = true; break; - - OPTION_COMMON_JSON: - r = parse_json_argument(opts.arg, &arg_json_format_flags); - if (r <= 0) - return r; - break; - - OPTION_LONG("canonicalize", "BOOL", - "Controls whether to canonicalize path before operation"): - r = parse_boolean_argument("--canonicalize=", opts.arg, &arg_canonicalize); - if (r < 0) - return r; - break; } char **args = option_parser_get_args(&opts); From d4bc62713e09df09281f26f4bf385801a3ee2897 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 6 May 2026 18:04:51 +0100 Subject: [PATCH 177/242] test: fix flaky testcase_15_wait_online_dns in TEST-75-RESOLVED The test used `timeout 30 bash -c "journalctl -b -u $unit -f | grep -m1 ..."` to wait for systemd-networkd-wait-online to log that no DNS server is accessible. The expected message is actually emitted ~1s after the unit starts, but `grep -m1` exiting doesn't tear down `journalctl -f`: journalctl only notices the closed pipe on its next write, which may never happen for an otherwise idle unit. The pipeline therefore hangs until the 30s timeout fires, eventually causing the test to fail. Replace the follow+pipe with a polling `journalctl --grep` loop, which exits cleanly as soon as the message lands in the journal. Logs from the failing run: [ 2650.871441] systemd-networkd-wait-online[2190]: dns0: No DNS configuration yet [ 2651.723180] systemd-networkd-wait-online[2190]: dns0: No DNS server is accessible. [ 2680.909048] systemd-networkd-wait-online[2190]: json-stream: Got POLLHUP from socket. [ 2680.909092] systemd-networkd-wait-online[2190]: DNS configuration monitor disconnected, reconnecting... [ 2680.914368] systemd-networkd-wait-online[2190]: Failed to connect to io.systemd.Resolve.Monitor: Connection refused [ 2681.966674] systemd-networkd-wait-online[2190]: dns0: No DNS server is accessible. [ 2681.969527] systemd-networkd-wait-online[2190]: Failed to connect to io.systemd.Resolve.Monitor: Connection refused [ 2682.077032] systemd[1]: Stopping wait-online-dns-0f9e4f6d-8b34-4cff-b2da-03612ca731e8.service - [systemd-run] /usr/lib/systemd/systemd-networkd-wait-online --timeout=0 --dns --interface=dns0... Co-developed-by: Claude Opus 4.7 --- test/units/TEST-75-RESOLVED.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/units/TEST-75-RESOLVED.sh b/test/units/TEST-75-RESOLVED.sh index bb1cf9576c292..7b5670a4bc726 100755 --- a/test/units/TEST-75-RESOLVED.sh +++ b/test/units/TEST-75-RESOLVED.sh @@ -1421,7 +1421,7 @@ testcase_15_wait_online_dns() { /usr/lib/systemd/systemd-networkd-wait-online --timeout=0 --dns --interface=dns0 # Wait until it blocks waiting for updated DNS config - timeout 30 bash -c "journalctl -b -u $unit -f | grep -m1 'dns0: No.*DNS server is accessible'" >/dev/null + timeout 30 bash -c "until journalctl -b -u $unit --grep 'dns0: No.*DNS server is accessible' >/dev/null 2>&1; do sleep 0.5; done" # Update the global configuration. Restart rather than reload systemd-resolved so that # systemd-networkd-wait-online has to re-connect to the varlink service. From 16c3c69be3c4c5d0e86c051b66153eff3b2ac98a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 6 May 2026 14:41:08 +0200 Subject: [PATCH 178/242] resolvectl: split out parse_protocol --- src/resolve/resolvectl.c | 91 +++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 53 deletions(-) diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c index 1452131b59e7d..7b7cae68d437f 100644 --- a/src/resolve/resolvectl.c +++ b/src/resolve/resolvectl.c @@ -3139,16 +3139,38 @@ static int verb_show_server_state(int argc, char *argv[], uintptr_t _data, void return sd_json_variant_dump(d, arg_json_format_flags, NULL, NULL); } -static void help_protocol_types(void) { - if (arg_legend) - puts("Known protocol types:"); - puts("dns\n" - "llmnr\n" - "llmnr-ipv4\n" - "llmnr-ipv6\n" - "mdns\n" - "mdns-ipv4\n" - "mdns-ipv6"); +static int parse_protocol(const char *arg) { + if (streq(arg, "help")) { + if (arg_legend) + puts("Known protocol types:"); + puts("dns\n" + "llmnr\n" + "llmnr-ipv4\n" + "llmnr-ipv6\n" + "mdns\n" + "mdns-ipv4\n" + "mdns-ipv6"); + return 0; + } + + if (streq(arg, "dns")) + arg_flags |= SD_RESOLVED_DNS; + else if (streq(arg, "llmnr")) + arg_flags |= SD_RESOLVED_LLMNR; + else if (streq(arg, "llmnr-ipv4")) + arg_flags |= SD_RESOLVED_LLMNR_IPV4; + else if (streq(arg, "llmnr-ipv6")) + arg_flags |= SD_RESOLVED_LLMNR_IPV6; + else if (streq(arg, "mdns")) + arg_flags |= SD_RESOLVED_MDNS; + else if (streq(arg, "mdns-ipv4")) + arg_flags |= SD_RESOLVED_MDNS_IPV4; + else if (streq(arg, "mdns-ipv6")) + arg_flags |= SD_RESOLVED_MDNS_IPV6; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown protocol specifier: %s", arg); + return 1; } static void help_dns_types(void) { @@ -3434,27 +3456,9 @@ static int compat_parse_argv(int argc, char *argv[]) { break; case 'p': - if (streq(optarg, "help")) { - help_protocol_types(); - return 0; - } else if (streq(optarg, "dns")) - arg_flags |= SD_RESOLVED_DNS; - else if (streq(optarg, "llmnr")) - arg_flags |= SD_RESOLVED_LLMNR; - else if (streq(optarg, "llmnr-ipv4")) - arg_flags |= SD_RESOLVED_LLMNR_IPV4; - else if (streq(optarg, "llmnr-ipv6")) - arg_flags |= SD_RESOLVED_LLMNR_IPV6; - else if (streq(optarg, "mdns")) - arg_flags |= SD_RESOLVED_MDNS; - else if (streq(optarg, "mdns-ipv4")) - arg_flags |= SD_RESOLVED_MDNS_IPV4; - else if (streq(optarg, "mdns-ipv6")) - arg_flags |= SD_RESOLVED_MDNS_IPV6; - else - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Unknown protocol specifier: %s", optarg); - + r = parse_protocol(optarg); + if (r <= 0) + return r; break; case ARG_SERVICE: @@ -3736,28 +3740,9 @@ static int native_parse_argv(int argc, char *argv[]) { break; case 'p': - if (streq(optarg, "help")) { - help_protocol_types(); - return 0; - } else if (streq(optarg, "dns")) - arg_flags |= SD_RESOLVED_DNS; - else if (streq(optarg, "llmnr")) - arg_flags |= SD_RESOLVED_LLMNR; - else if (streq(optarg, "llmnr-ipv4")) - arg_flags |= SD_RESOLVED_LLMNR_IPV4; - else if (streq(optarg, "llmnr-ipv6")) - arg_flags |= SD_RESOLVED_LLMNR_IPV6; - else if (streq(optarg, "mdns")) - arg_flags |= SD_RESOLVED_MDNS; - else if (streq(optarg, "mdns-ipv4")) - arg_flags |= SD_RESOLVED_MDNS_IPV4; - else if (streq(optarg, "mdns-ipv6")) - arg_flags |= SD_RESOLVED_MDNS_IPV6; - else - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Unknown protocol specifier: %s", - optarg); - + r = parse_protocol(optarg); + if (r <= 0) + return r; break; case ARG_RAW: From df24074bb51a72ee5beb8c16c5b6660058892e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 09:30:37 +0200 Subject: [PATCH 179/242] resolvectl: move things around in --help Move the "display options" to the end of both --help strings and then reorder the implementation to match. --- src/resolve/resolvectl.c | 187 ++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 93 deletions(-) diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c index 7b7cae68d437f..fdce91344774b 100644 --- a/src/resolve/resolvectl.c +++ b/src/resolve/resolvectl.c @@ -3206,7 +3206,6 @@ static int compat_help(void) { "%2$sResolve domain names, IPv4 and IPv6 addresses, DNS records, and services.%3$s\n\n" " -h --help Show this help\n" " --version Show package version\n" - " --no-pager Do not pipe output into a pager\n" " -4 Resolve IPv4 addresses\n" " -6 Resolve IPv6 addresses\n" " -i --interface=INTERFACE Look on interface\n" @@ -3221,8 +3220,6 @@ static int compat_help(void) { " --cname=BOOL Follow CNAME redirects (default: yes)\n" " --search=BOOL Use search domains for single-label names\n" " (default: yes)\n" - " --raw[=payload|packet] Dump the answer as binary data\n" - " --legend=BOOL Print headers and additional info (default: yes)\n" " --statistics Show resolver statistics\n" " --reset-statistics Reset resolver statistics\n" " --status Show link and server status\n" @@ -3237,6 +3234,9 @@ static int compat_help(void) { " --set-dnssec=MODE Set per-interface DNSSEC mode\n" " --set-nta=DOMAIN Set per-interface DNSSEC NTA\n" " --revert Revert per-interface configuration\n" + " --raw[=payload|packet] Dump the answer as binary data\n" + " --no-pager Do not pipe output into a pager\n" + " --legend=BOOL Print headers and additional info (default: yes)\n" "\nSee the %4$s for details.\n", program_invocation_short_name, ansi_highlight(), @@ -3286,8 +3286,6 @@ static int native_help(void) { "\n%3$sOptions:%4$s\n" " -h --help Show this help\n" " --version Show package version\n" - " --no-pager Do not pipe output into a pager\n" - " --no-ask-password Do not prompt for password\n" " -4 Resolve IPv4 addresses\n" " -6 Resolve IPv6 addresses\n" " -i --interface=INTERFACE Look on interface\n" @@ -3310,6 +3308,8 @@ static int native_help(void) { " --search=BOOL Use search domains for single-label names (default:\n" " yes)\n" " --raw[=payload|packet] Dump the answer as binary data\n" + " --no-pager Do not pipe output into a pager\n" + " --no-ask-password Do not prompt for password\n" " --legend=BOOL Print headers and additional info (default: yes)\n" " --json=MODE Output as JSON\n" " -j Same as --json=pretty on tty, --json=short\n" @@ -3418,6 +3418,12 @@ static int compat_parse_argv(int argc, char *argv[]) { return r; break; + case 'p': + r = parse_protocol(optarg); + if (r <= 0) + return r; + break; + case 't': if (streq(optarg, "help")) { help_dns_types(); @@ -3449,20 +3455,22 @@ static int compat_parse_argv(int argc, char *argv[]) { break; - case ARG_LEGEND: - r = parse_boolean_argument("--legend=", optarg, &arg_legend); - if (r < 0) - return r; + case ARG_SERVICE: + arg_mode = MODE_RESOLVE_SERVICE; break; - case 'p': - r = parse_protocol(optarg); - if (r <= 0) + case ARG_SERVICE_ADDRESS: + r = parse_boolean_argument("--service-address=", optarg, NULL); + if (r < 0) return r; + SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0); break; - case ARG_SERVICE: - arg_mode = MODE_RESOLVE_SERVICE; + case ARG_SERVICE_TXT: + r = parse_boolean_argument("--service-txt=", optarg, NULL); + if (r < 0) + return r; + SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0); break; case ARG_OPENPGP: @@ -3478,23 +3486,6 @@ static int compat_parse_argv(int argc, char *argv[]) { "Unknown service family \"%s\".", optarg); break; - case ARG_RAW: - if (on_tty()) - return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), - "Refusing to write binary data to tty."); - - if (optarg == NULL || streq(optarg, "payload")) - arg_raw = RAW_PAYLOAD; - else if (streq(optarg, "packet")) - arg_raw = RAW_PACKET; - else - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Unknown --raw specifier \"%s\".", - optarg); - - arg_legend = false; - break; - case ARG_CNAME: r = parse_boolean_argument("--cname=", optarg, NULL); if (r < 0) @@ -3502,19 +3493,6 @@ static int compat_parse_argv(int argc, char *argv[]) { SET_FLAG(arg_flags, SD_RESOLVED_NO_CNAME, r == 0); break; - case ARG_SERVICE_ADDRESS: - r = parse_boolean_argument("--service-address=", optarg, NULL); - if (r < 0) - return r; - SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0); - break; - - case ARG_SERVICE_TXT: - r = parse_boolean_argument("--service-txt=", optarg, NULL); - if (r < 0) - return r; - SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0); - break; case ARG_SEARCH: r = parse_boolean_argument("--search=", optarg, NULL); @@ -3531,6 +3509,10 @@ static int compat_parse_argv(int argc, char *argv[]) { arg_mode = MODE_RESET_STATISTICS; break; + case ARG_STATUS: + arg_mode = MODE_STATUS; + break; + case ARG_FLUSH_CACHES: arg_mode = MODE_FLUSH_CACHES; break; @@ -3539,14 +3521,6 @@ static int compat_parse_argv(int argc, char *argv[]) { arg_mode = MODE_RESET_SERVER_FEATURES; break; - case ARG_STATUS: - arg_mode = MODE_STATUS; - break; - - case ARG_NO_PAGER: - arg_pager_flags |= PAGER_DISABLE; - break; - case ARG_SET_DNS: r = strv_extend(&arg_set_dns, optarg); if (r < 0) @@ -3595,6 +3569,33 @@ static int compat_parse_argv(int argc, char *argv[]) { arg_mode = MODE_REVERT_LINK; break; + case ARG_RAW: + if (on_tty()) + return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), + "Refusing to write binary data to tty."); + + if (optarg == NULL || streq(optarg, "payload")) + arg_raw = RAW_PAYLOAD; + else if (streq(optarg, "packet")) + arg_raw = RAW_PACKET; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown --raw specifier \"%s\".", + optarg); + + arg_legend = false; + break; + + case ARG_NO_PAGER: + arg_pager_flags |= PAGER_DISABLE; + break; + + case ARG_LEGEND: + r = parse_boolean_argument("--legend=", optarg, &arg_legend); + if (r < 0) + return r; + break; + case '?': return -EINVAL; @@ -3703,6 +3704,12 @@ static int native_parse_argv(int argc, char *argv[]) { return r; break; + case 'p': + r = parse_protocol(optarg); + if (r <= 0) + return r; + break; + case 't': if (streq(optarg, "help")) { help_dns_types(); @@ -3733,33 +3740,18 @@ static int native_parse_argv(int argc, char *argv[]) { break; - case ARG_LEGEND: - r = parse_boolean_argument("--legend=", optarg, &arg_legend); + case ARG_SERVICE_ADDRESS: + r = parse_boolean_argument("--service-address=", optarg, NULL); if (r < 0) return r; + SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0); break; - case 'p': - r = parse_protocol(optarg); - if (r <= 0) + case ARG_SERVICE_TXT: + r = parse_boolean_argument("--service-txt=", optarg, NULL); + if (r < 0) return r; - break; - - case ARG_RAW: - if (on_tty()) - return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), - "Refusing to write binary data to tty."); - - if (optarg == NULL || streq(optarg, "payload")) - arg_raw = RAW_PAYLOAD; - else if (streq(optarg, "packet")) - arg_raw = RAW_PACKET; - else - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Unknown --raw specifier \"%s\".", - optarg); - - arg_legend = false; + SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0); break; case ARG_CNAME: @@ -3797,6 +3789,13 @@ static int native_parse_argv(int argc, char *argv[]) { SET_FLAG(arg_flags, SD_RESOLVED_NO_STALE, r == 0); break; + case ARG_RELAX_SINGLE_LABEL: + r = parse_boolean_argument("--relax-single-label=", optarg, NULL); + if (r < 0) + return r; + SET_FLAG(arg_flags, SD_RESOLVED_RELAX_SINGLE_LABEL, r > 0); + break; + case ARG_ZONE: r = parse_boolean_argument("--zone=", optarg, NULL); if (r < 0) @@ -3818,20 +3817,6 @@ static int native_parse_argv(int argc, char *argv[]) { SET_FLAG(arg_flags, SD_RESOLVED_NO_NETWORK, r == 0); break; - case ARG_SERVICE_ADDRESS: - r = parse_boolean_argument("--service-address=", optarg, NULL); - if (r < 0) - return r; - SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0); - break; - - case ARG_SERVICE_TXT: - r = parse_boolean_argument("--service-txt=", optarg, NULL); - if (r < 0) - return r; - SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0); - break; - case ARG_SEARCH: r = parse_boolean_argument("--search=", optarg, NULL); if (r < 0) @@ -3839,11 +3824,21 @@ static int native_parse_argv(int argc, char *argv[]) { SET_FLAG(arg_flags, SD_RESOLVED_NO_SEARCH, r == 0); break; - case ARG_RELAX_SINGLE_LABEL: - r = parse_boolean_argument("--relax-single-label=", optarg, NULL); - if (r < 0) - return r; - SET_FLAG(arg_flags, SD_RESOLVED_RELAX_SINGLE_LABEL, r > 0); + case ARG_RAW: + if (on_tty()) + return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), + "Refusing to write binary data to tty."); + + if (optarg == NULL || streq(optarg, "payload")) + arg_raw = RAW_PAYLOAD; + else if (streq(optarg, "packet")) + arg_raw = RAW_PACKET; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown --raw specifier \"%s\".", + optarg); + + arg_legend = false; break; case ARG_NO_PAGER: @@ -3854,6 +3849,12 @@ static int native_parse_argv(int argc, char *argv[]) { arg_ask_password = false; break; + case ARG_LEGEND: + r = parse_boolean_argument("--legend=", optarg, &arg_legend); + if (r < 0) + return r; + break; + case ARG_JSON: r = parse_json_argument(optarg, &arg_json_format_flags); if (r <= 0) From d865863929bd010392330a4358b005c9eb902880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 09:31:28 +0200 Subject: [PATCH 180/242] resolvectl: move verb implementations to match order in --help --- src/resolve/resolvectl.c | 2533 +++++++++++++++++++------------------- 1 file changed, 1266 insertions(+), 1267 deletions(-) diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c index fdce91344774b..4088c39b1da60 100644 --- a/src/resolve/resolvectl.c +++ b/src/resolve/resolvectl.c @@ -122,6 +122,18 @@ static const char* const status_mode_json_field_table[_STATUS_MAX] = { DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(status_mode_json_field, StatusMode); +static int strv_extend_extended_bool(char ***strv, const char *name, const char *value) { + int r; + + if (value) { + r = parse_boolean(value); + if (r >= 0) + return strv_extendf(strv, "%s%s", plus_minus(r), name); + } + + return strv_extendf(strv, "%s=%s", name, value ?: "???"); +} + static int acquire_bus(sd_bus **ret) { _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; int r; @@ -1163,430 +1175,114 @@ static int verb_tlsa(int argc, char *argv[], uintptr_t _data, void *userdata) { return ret; } -static int verb_show_statistics(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(table_unrefp) Table *table = NULL; - sd_json_variant *reply = NULL; +static int varlink_dump_dns_configuration(sd_json_variant **ret) { _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *reply = NULL; + sd_json_variant *v; int r; - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + assert(ret); - r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); + r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve"); if (r < 0) - return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); + return log_error_errno(r, "Failed to connect to service /run/systemd/resolve/io.systemd.Resolve: %m"); - r = varlink_callbo_and_log( - vl, - "io.systemd.Resolve.Monitor.DumpStatistics", - &reply, - SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + r = varlink_call_and_log(vl, "io.systemd.Resolve.DumpDNSConfiguration", /* parameters= */ NULL, &reply); if (r < 0) return r; - if (sd_json_format_enabled(arg_json_format_flags)) - return sd_json_variant_dump(reply, arg_json_format_flags, NULL, NULL); - - struct statistics { - sd_json_variant *transactions; - sd_json_variant *cache; - sd_json_variant *dnssec; - } statistics; - - static const sd_json_dispatch_field statistics_dispatch_table[] = { - { "transactions", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct statistics, transactions), SD_JSON_MANDATORY }, - { "cache", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct statistics, cache), SD_JSON_MANDATORY }, - { "dnssec", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct statistics, dnssec), SD_JSON_MANDATORY }, - {}, - }; - - r = sd_json_dispatch(reply, statistics_dispatch_table, SD_JSON_LOG, &statistics); - if (r < 0) - return r; + v = sd_json_variant_by_key(reply, "configuration"); - struct transactions { - uint64_t n_current_transactions; - uint64_t n_transactions_total; - uint64_t n_timeouts_total; - uint64_t n_timeouts_served_stale_total; - uint64_t n_failure_responses_total; - uint64_t n_failure_responses_served_stale_total; - } transactions; + if (!sd_json_variant_is_array(v)) + return log_error_errno(SYNTHETIC_ERRNO(ENODATA), "DumpDNSConfiguration() response missing 'configuration' key."); - static const sd_json_dispatch_field transactions_dispatch_table[] = { - { "currentTransactions", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_current_transactions), SD_JSON_MANDATORY }, - { "totalTransactions", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_transactions_total), SD_JSON_MANDATORY }, - { "totalTimeouts", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_timeouts_total), SD_JSON_MANDATORY }, - { "totalTimeoutsServedStale", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_timeouts_served_stale_total), SD_JSON_MANDATORY }, - { "totalFailedResponses", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_failure_responses_total), SD_JSON_MANDATORY }, - { "totalFailedResponsesServedStale", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_failure_responses_served_stale_total), SD_JSON_MANDATORY }, - {}, - }; + TAKE_PTR(reply); + *ret = sd_json_variant_ref(v); + return 0; +} - r = sd_json_dispatch(statistics.transactions, transactions_dispatch_table, SD_JSON_LOG, &transactions); - if (r < 0) - return r; +static int status_json_filter_links(sd_json_variant **configuration, char **links) { + _cleanup_set_free_ Set *links_by_index = NULL; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; + sd_json_variant *w; + int r; - struct cache { - uint64_t cache_size; - uint64_t n_cache_hit; - uint64_t n_cache_miss; - } cache; + assert(configuration); - static const sd_json_dispatch_field cache_dispatch_table[] = { - { "size", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct cache, cache_size), SD_JSON_MANDATORY }, - { "hits", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct cache, n_cache_hit), SD_JSON_MANDATORY }, - { "misses", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct cache, n_cache_miss), SD_JSON_MANDATORY }, - {}, - }; + if (links) + STRV_FOREACH(ifname, links) { + int ifindex = rtnl_resolve_interface_or_warn(/* rtnl= */ NULL, *ifname); + if (ifindex < 0) + return ifindex; - r = sd_json_dispatch(statistics.cache, cache_dispatch_table, SD_JSON_LOG, &cache); - if (r < 0) - return r; + r = set_ensure_put(&links_by_index, NULL, INT_TO_PTR(ifindex)); + if (r < 0) + return r; + } - struct dnsssec { - uint64_t n_dnssec_secure; - uint64_t n_dnssec_insecure; - uint64_t n_dnssec_bogus; - uint64_t n_dnssec_indeterminate; - } dnsssec; + JSON_VARIANT_ARRAY_FOREACH(w, *configuration) { + int ifindex = sd_json_variant_unsigned(sd_json_variant_by_key(w, "ifindex")); - static const sd_json_dispatch_field dnssec_dispatch_table[] = { - { "secure", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_secure), SD_JSON_MANDATORY }, - { "insecure", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_insecure), SD_JSON_MANDATORY }, - { "bogus", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_bogus), SD_JSON_MANDATORY }, - { "indeterminate", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_indeterminate), SD_JSON_MANDATORY }, - {}, - }; + if (links_by_index) { + if (ifindex <= 0) + /* Possibly invalid, but most likely unset because this is global + * or delegate configuration. */ + continue; - r = sd_json_dispatch(statistics.dnssec, dnssec_dispatch_table, SD_JSON_LOG, &dnsssec); - if (r < 0) - return r; + if (!set_contains(links_by_index, INT_TO_PTR(ifindex))) + continue; - table = table_new_vertical(); - if (!table) - return log_oom(); + } else if (ifindex == LOOPBACK_IFINDEX) + /* By default, exclude the loopback interface. */ + continue; - r = table_add_many(table, - TABLE_STRING, "Transactions", - TABLE_SET_COLOR, ansi_highlight(), - TABLE_SET_ALIGN_PERCENT, 0, - TABLE_EMPTY, - TABLE_FIELD, "Current Transactions", - TABLE_SET_ALIGN_PERCENT, 100, - TABLE_UINT64, transactions.n_current_transactions, - TABLE_SET_ALIGN_PERCENT, 100, - TABLE_FIELD, "Total Transactions", - TABLE_UINT64, transactions.n_transactions_total, - TABLE_EMPTY, TABLE_EMPTY, - TABLE_STRING, "Cache", - TABLE_SET_COLOR, ansi_highlight(), - TABLE_SET_ALIGN_PERCENT, 0, - TABLE_EMPTY, - TABLE_FIELD, "Current Cache Size", - TABLE_SET_ALIGN_PERCENT, 100, - TABLE_UINT64, cache.cache_size, - TABLE_FIELD, "Cache Hits", - TABLE_UINT64, cache.n_cache_hit, - TABLE_FIELD, "Cache Misses", - TABLE_UINT64, cache.n_cache_miss, - TABLE_EMPTY, TABLE_EMPTY, - TABLE_STRING, "Failure Transactions", - TABLE_SET_COLOR, ansi_highlight(), - TABLE_SET_ALIGN_PERCENT, 0, - TABLE_EMPTY, - TABLE_FIELD, "Total Timeouts", - TABLE_SET_ALIGN_PERCENT, 100, - TABLE_UINT64, transactions.n_timeouts_total, - TABLE_FIELD, "Total Timeouts (Stale Data Served)", - TABLE_UINT64, transactions.n_timeouts_served_stale_total, - TABLE_FIELD, "Total Failure Responses", - TABLE_UINT64, transactions.n_failure_responses_total, - TABLE_FIELD, "Total Failure Responses (Stale Data Served)", - TABLE_UINT64, transactions.n_failure_responses_served_stale_total, - TABLE_EMPTY, TABLE_EMPTY, - TABLE_STRING, "DNSSEC Verdicts", - TABLE_SET_COLOR, ansi_highlight(), - TABLE_SET_ALIGN_PERCENT, 0, - TABLE_EMPTY, - TABLE_FIELD, "Secure", - TABLE_SET_ALIGN_PERCENT, 100, - TABLE_UINT64, dnsssec.n_dnssec_secure, - TABLE_FIELD, "Insecure", - TABLE_UINT64, dnsssec.n_dnssec_insecure, - TABLE_FIELD, "Bogus", - TABLE_UINT64, dnsssec.n_dnssec_bogus, - TABLE_FIELD, "Indeterminate", - TABLE_UINT64, dnsssec.n_dnssec_indeterminate - ); - if (r < 0) - return table_log_add_error(r); + r = sd_json_variant_append_array(&v, w); + if (r < 0) + return r; + } - return table_print_or_warn(table); + JSON_VARIANT_REPLACE(*configuration, TAKE_PTR(v)); + return 0; } -static int verb_reset_statistics(int argc, char *argv[], uintptr_t _data, void *userdata) { - sd_json_variant *reply = NULL; - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; +static int status_json_filter_fields(sd_json_variant **configuration, StatusMode mode) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; + sd_json_variant *w; + const char *field; int r; - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - - r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); - if (r < 0) - return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); + assert(configuration); - r = varlink_callbo_and_log( - vl, - "io.systemd.Resolve.Monitor.ResetStatistics", - &reply, - SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); - if (r < 0) - return r; + field = status_mode_json_field_to_string(mode); + if (!field) + /* Nothing to filter for this mode. */ + return 0; - if (sd_json_format_enabled(arg_json_format_flags)) - return sd_json_variant_dump(reply, arg_json_format_flags, NULL, NULL); + JSON_VARIANT_ARRAY_FOREACH(w, *configuration) { + /* Always include identifier fields like ifname or delegate, and include the requested + * field even if it is empty in the configuration. */ + r = sd_json_variant_append_arraybo( + &v, + JSON_BUILD_PAIR_VARIANT_NON_NULL("ifname", sd_json_variant_by_key(w, "ifname")), + JSON_BUILD_PAIR_VARIANT_NON_NULL("ifindex", sd_json_variant_by_key(w, "ifindex")), + JSON_BUILD_PAIR_VARIANT_NON_NULL("delegate", sd_json_variant_by_key(w, "delegate")), + SD_JSON_BUILD_PAIR_VARIANT(field, sd_json_variant_by_key(w, field))); + if (r < 0) + return r; + } + JSON_VARIANT_REPLACE(*configuration, TAKE_PTR(v)); return 0; } -static int verb_flush_caches(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; +static int format_dns_server_one(DNSConfiguration *configuration, DNSServer *s, char **ret) { + bool global; int r; - r = acquire_bus(&bus); - if (r < 0) - return r; - - r = bus_call_method(bus, bus_resolve_mgr, "FlushCaches", &error, NULL, NULL); - if (r < 0) - return log_error_errno(r, "Failed to flush caches: %s", bus_error_message(&error, r)); + assert(s); + assert(ret); - return 0; -} - -static int verb_reset_server_features(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; - - r = acquire_bus(&bus); - if (r < 0) - return r; - - r = bus_call_method(bus, bus_resolve_mgr, "ResetServerFeatures", &error, NULL, NULL); - if (r < 0) - return log_error_errno(r, "Failed to reset server features: %s", bus_error_message(&error, r)); - - return 0; -} - -static int status_print_strv(DNSConfiguration *c, char **p) { - const unsigned indent = strlen("Global: "); /* Use the same indentation everywhere to make things nice */ - int pos1, pos2; - - assert(c); - - if (c->ifname) - printf("%s%nLink %i (%s)%n%s:", ansi_highlight(), &pos1, c->ifindex, c->ifname, &pos2, ansi_normal()); - else if (c->delegate) - printf("%s%nDelegate %s%n%s:", ansi_highlight(), &pos1, c->delegate, &pos2, ansi_normal()); - else - printf("%s%nGlobal%n%s:", ansi_highlight(), &pos1, &pos2, ansi_normal()); - - size_t cols = columns(), position = pos2 - pos1 + 2; - - STRV_FOREACH(i, p) { - size_t our_len = utf8_console_width(*i); /* This returns -1 on invalid utf-8 (which shouldn't happen). - * If that happens, we'll just print one item per line. */ - - if (position <= indent || size_add(size_add(position, 1), our_len) < cols) { - printf(" %s", *i); - position = size_add(size_add(position, 1), our_len); - } else { - printf("\n%*s%s", (int) indent, "", *i); - position = size_add(our_len, indent); - } - } - - printf("\n"); - - return 0; -} - -static void status_print_string(DNSConfiguration *c, const char *p) { - assert(c); - - if (c->ifname) - printf("%sLink %i (%s)%s: %s\n", - ansi_highlight(), - c->ifindex, - c->ifname, - ansi_normal(), - p); - else if (c->delegate) - printf("%sDelegate %s%s: %s\n", - ansi_highlight(), - c->delegate, - ansi_normal(), - p); - else - printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(), p); -} - -static void status_print_header(DNSConfiguration *c) { - assert(c); - - if (c->ifname) - printf("%sLink %i (%s)%s\n", - ansi_highlight(), - c->ifindex, - c->ifname, - ansi_normal()); - else if (c->delegate) - printf("%sDelegate %s%s\n", - ansi_highlight(), - c->delegate, - ansi_normal()); - else - printf("%sGlobal%s\n", ansi_highlight(), ansi_normal()); -} - -static int dump_list(Table *table, const char *field, char * const *l) { - int r; - - if (strv_isempty(l)) - return 0; - - r = table_add_many(table, - TABLE_FIELD, field, - TABLE_STRV_WRAPPED, l); - if (r < 0) - return table_log_add_error(r); - - return 0; -} - -static int strv_extend_extended_bool(char ***strv, const char *name, const char *value) { - int r; - - if (value) { - r = parse_boolean(value); - if (r >= 0) - return strv_extendf(strv, "%s%s", plus_minus(r), name); - } - - return strv_extendf(strv, "%s=%s", name, value ?: "???"); -} - -static int status_json_filter_fields(sd_json_variant **configuration, StatusMode mode) { - _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - sd_json_variant *w; - const char *field; - int r; - - assert(configuration); - - field = status_mode_json_field_to_string(mode); - if (!field) - /* Nothing to filter for this mode. */ - return 0; - - JSON_VARIANT_ARRAY_FOREACH(w, *configuration) { - /* Always include identifier fields like ifname or delegate, and include the requested - * field even if it is empty in the configuration. */ - r = sd_json_variant_append_arraybo( - &v, - JSON_BUILD_PAIR_VARIANT_NON_NULL("ifname", sd_json_variant_by_key(w, "ifname")), - JSON_BUILD_PAIR_VARIANT_NON_NULL("ifindex", sd_json_variant_by_key(w, "ifindex")), - JSON_BUILD_PAIR_VARIANT_NON_NULL("delegate", sd_json_variant_by_key(w, "delegate")), - SD_JSON_BUILD_PAIR_VARIANT(field, sd_json_variant_by_key(w, field))); - if (r < 0) - return r; - } - - JSON_VARIANT_REPLACE(*configuration, TAKE_PTR(v)); - return 0; -} - -static int status_json_filter_links(sd_json_variant **configuration, char **links) { - _cleanup_set_free_ Set *links_by_index = NULL; - _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - sd_json_variant *w; - int r; - - assert(configuration); - - if (links) - STRV_FOREACH(ifname, links) { - int ifindex = rtnl_resolve_interface_or_warn(/* rtnl= */ NULL, *ifname); - if (ifindex < 0) - return ifindex; - - r = set_ensure_put(&links_by_index, NULL, INT_TO_PTR(ifindex)); - if (r < 0) - return r; - } - - JSON_VARIANT_ARRAY_FOREACH(w, *configuration) { - int ifindex = sd_json_variant_unsigned(sd_json_variant_by_key(w, "ifindex")); - - if (links_by_index) { - if (ifindex <= 0) - /* Possibly invalid, but most likely unset because this is global - * or delegate configuration. */ - continue; - - if (!set_contains(links_by_index, INT_TO_PTR(ifindex))) - continue; - - } else if (ifindex == LOOPBACK_IFINDEX) - /* By default, exclude the loopback interface. */ - continue; - - r = sd_json_variant_append_array(&v, w); - if (r < 0) - return r; - } - - JSON_VARIANT_REPLACE(*configuration, TAKE_PTR(v)); - return 0; -} - -static int varlink_dump_dns_configuration(sd_json_variant **ret) { - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; - _cleanup_(sd_json_variant_unrefp) sd_json_variant *reply = NULL; - sd_json_variant *v; - int r; - - assert(ret); - - r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve"); - if (r < 0) - return log_error_errno(r, "Failed to connect to service /run/systemd/resolve/io.systemd.Resolve: %m"); - - r = varlink_call_and_log(vl, "io.systemd.Resolve.DumpDNSConfiguration", /* parameters= */ NULL, &reply); - if (r < 0) - return r; - - v = sd_json_variant_by_key(reply, "configuration"); - - if (!sd_json_variant_is_array(v)) - return log_error_errno(SYNTHETIC_ERRNO(ENODATA), "DumpDNSConfiguration() response missing 'configuration' key."); - - TAKE_PTR(reply); - *ret = sd_json_variant_ref(v); - return 0; -} - -static int format_dns_server_one(DNSConfiguration *configuration, DNSServer *s, char **ret) { - bool global; - int r; - - assert(s); - assert(ret); - - global = !(configuration->ifindex > 0 || configuration->delegate); + global = !(configuration->ifindex > 0 || configuration->delegate); if (global && s->ifindex > 0 && s->ifindex != LOOPBACK_IFINDEX) { /* This one has an (non-loopback) ifindex set, and we were told to suppress those. Hence do so. */ @@ -1735,27 +1431,113 @@ static int format_scopes_string(DNSConfiguration *configuration, char **ret) { return 0; } -static int print_configuration(DNSConfiguration *configuration, StatusMode mode, bool *empty_line) { - _cleanup_(table_unrefp) Table *table = NULL; - int r; - - assert(configuration); - - pager_open(arg_pager_flags); +static void status_print_header(DNSConfiguration *c) { + assert(c); - bool global = !(configuration->ifindex > 0 || configuration->delegate); - if (mode == STATUS_DNS) { - _cleanup_strv_free_ char **l = NULL; - r = format_dns_servers(configuration, configuration->dns_servers, &l); - if (r < 0) - return r; + if (c->ifname) + printf("%sLink %i (%s)%s\n", + ansi_highlight(), + c->ifindex, + c->ifname, + ansi_normal()); + else if (c->delegate) + printf("%sDelegate %s%s\n", + ansi_highlight(), + c->delegate, + ansi_normal()); + else + printf("%sGlobal%s\n", ansi_highlight(), ansi_normal()); +} - return status_print_strv(configuration, l); +static void status_print_string(DNSConfiguration *c, const char *p) { + assert(c); - } else if (mode == STATUS_DOMAIN) { - _cleanup_strv_free_ char **l = NULL; - r = format_search_domains(configuration, configuration->search_domains, &l); - if (r < 0) + if (c->ifname) + printf("%sLink %i (%s)%s: %s\n", + ansi_highlight(), + c->ifindex, + c->ifname, + ansi_normal(), + p); + else if (c->delegate) + printf("%sDelegate %s%s: %s\n", + ansi_highlight(), + c->delegate, + ansi_normal(), + p); + else + printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(), p); +} + +static int status_print_strv(DNSConfiguration *c, char **p) { + const unsigned indent = strlen("Global: "); /* Use the same indentation everywhere to make things nice */ + int pos1, pos2; + + assert(c); + + if (c->ifname) + printf("%s%nLink %i (%s)%n%s:", ansi_highlight(), &pos1, c->ifindex, c->ifname, &pos2, ansi_normal()); + else if (c->delegate) + printf("%s%nDelegate %s%n%s:", ansi_highlight(), &pos1, c->delegate, &pos2, ansi_normal()); + else + printf("%s%nGlobal%n%s:", ansi_highlight(), &pos1, &pos2, ansi_normal()); + + size_t cols = columns(), position = pos2 - pos1 + 2; + + STRV_FOREACH(i, p) { + size_t our_len = utf8_console_width(*i); /* This returns -1 on invalid utf-8 (which shouldn't happen). + * If that happens, we'll just print one item per line. */ + + if (position <= indent || size_add(size_add(position, 1), our_len) < cols) { + printf(" %s", *i); + position = size_add(size_add(position, 1), our_len); + } else { + printf("\n%*s%s", (int) indent, "", *i); + position = size_add(our_len, indent); + } + } + + printf("\n"); + + return 0; +} + +static int dump_list(Table *table, const char *field, char * const *l) { + int r; + + if (strv_isempty(l)) + return 0; + + r = table_add_many(table, + TABLE_FIELD, field, + TABLE_STRV_WRAPPED, l); + if (r < 0) + return table_log_add_error(r); + + return 0; +} + +static int print_configuration(DNSConfiguration *configuration, StatusMode mode, bool *empty_line) { + _cleanup_(table_unrefp) Table *table = NULL; + int r; + + assert(configuration); + + pager_open(arg_pager_flags); + + bool global = !(configuration->ifindex > 0 || configuration->delegate); + if (mode == STATUS_DNS) { + _cleanup_strv_free_ char **l = NULL; + r = format_dns_servers(configuration, configuration->dns_servers, &l); + if (r < 0) + return r; + + return status_print_strv(configuration, l); + + } else if (mode == STATUS_DOMAIN) { + _cleanup_strv_free_ char **l = NULL; + r = format_search_domains(configuration, configuration->search_domains, &l); + if (r < 0) return r; return status_print_strv(configuration, l); @@ -1985,165 +1767,193 @@ static int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) return status_full(STATUS_ALL, strv_skip(argv, 1)); } -static int call_dns(sd_bus *bus, char **dns, const BusLocator *locator, sd_bus_error *error, bool extended) { - _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL; +static int verb_show_statistics(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(table_unrefp) Table *table = NULL; + sd_json_variant *reply = NULL; + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; int r; (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - r = bus_message_new_method_call(bus, &req, locator, extended ? "SetLinkDNSEx" : "SetLinkDNS"); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append(req, "i", arg_ifindex); + r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); if (r < 0) - return bus_log_create_error(r); + return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); - r = sd_bus_message_open_container(req, 'a', extended ? "(iayqs)" : "(iay)"); + r = varlink_callbo_and_log( + vl, + "io.systemd.Resolve.Monitor.DumpStatistics", + &reply, + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); if (r < 0) - return bus_log_create_error(r); - - /* If only argument is the empty string, then call SetLinkDNS() with an - * empty list, which will clear the list of domains for an interface. */ - if (!strv_equal(dns, STRV_MAKE(""))) - STRV_FOREACH(p, dns) { - _cleanup_free_ char *name = NULL; - struct in_addr_data data; - uint16_t port; - int ifindex; - - r = in_addr_port_ifindex_name_from_string_auto(*p, &data.family, &data.address, &port, &ifindex, &name); - if (r < 0) - return log_error_errno(r, "Failed to parse DNS server address: %s", *p); - - if (ifindex != 0 && ifindex != arg_ifindex) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid ifindex: %i", ifindex); - - r = sd_bus_message_open_container(req, 'r', extended ? "iayqs" : "iay"); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append(req, "i", data.family); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append_array(req, 'y', &data.address, FAMILY_ADDRESS_SIZE(data.family)); - if (r < 0) - return bus_log_create_error(r); + return r; - if (extended) { - r = sd_bus_message_append(req, "q", port); - if (r < 0) - return bus_log_create_error(r); + if (sd_json_format_enabled(arg_json_format_flags)) + return sd_json_variant_dump(reply, arg_json_format_flags, NULL, NULL); - r = sd_bus_message_append(req, "s", name); - if (r < 0) - return bus_log_create_error(r); - } + struct statistics { + sd_json_variant *transactions; + sd_json_variant *cache; + sd_json_variant *dnssec; + } statistics; - r = sd_bus_message_close_container(req); - if (r < 0) - return bus_log_create_error(r); - } + static const sd_json_dispatch_field statistics_dispatch_table[] = { + { "transactions", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct statistics, transactions), SD_JSON_MANDATORY }, + { "cache", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct statistics, cache), SD_JSON_MANDATORY }, + { "dnssec", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct statistics, dnssec), SD_JSON_MANDATORY }, + {}, + }; - r = sd_bus_message_close_container(req); + r = sd_json_dispatch(reply, statistics_dispatch_table, SD_JSON_LOG, &statistics); if (r < 0) - return bus_log_create_error(r); + return r; - r = sd_bus_call(bus, req, 0, error, NULL); - if (r < 0 && extended && sd_bus_error_has_name(error, SD_BUS_ERROR_UNKNOWN_METHOD)) { - sd_bus_error_free(error); - return call_dns(bus, dns, locator, error, false); - } - return r; -} + struct transactions { + uint64_t n_current_transactions; + uint64_t n_transactions_total; + uint64_t n_timeouts_total; + uint64_t n_timeouts_served_stale_total; + uint64_t n_failure_responses_total; + uint64_t n_failure_responses_served_stale_total; + } transactions; -static int verb_dns(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; + static const sd_json_dispatch_field transactions_dispatch_table[] = { + { "currentTransactions", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_current_transactions), SD_JSON_MANDATORY }, + { "totalTransactions", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_transactions_total), SD_JSON_MANDATORY }, + { "totalTimeouts", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_timeouts_total), SD_JSON_MANDATORY }, + { "totalTimeoutsServedStale", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_timeouts_served_stale_total), SD_JSON_MANDATORY }, + { "totalFailedResponses", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_failure_responses_total), SD_JSON_MANDATORY }, + { "totalFailedResponsesServedStale", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct transactions, n_failure_responses_served_stale_total), SD_JSON_MANDATORY }, + {}, + }; - r = acquire_bus(&bus); + r = sd_json_dispatch(statistics.transactions, transactions_dispatch_table, SD_JSON_LOG, &transactions); if (r < 0) return r; - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } - - if (arg_ifindex <= 0) - return status_all(STATUS_DNS); + struct cache { + uint64_t cache_size; + uint64_t n_cache_hit; + uint64_t n_cache_miss; + } cache; - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_DNS); + static const sd_json_dispatch_field cache_dispatch_table[] = { + { "size", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct cache, cache_size), SD_JSON_MANDATORY }, + { "hits", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct cache, n_cache_hit), SD_JSON_MANDATORY }, + { "misses", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct cache, n_cache_miss), SD_JSON_MANDATORY }, + {}, + }; - char **args = strv_skip(argv, 2); - r = call_dns(bus, args, bus_resolve_mgr, &error, true); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + r = sd_json_dispatch(statistics.cache, cache_dispatch_table, SD_JSON_LOG, &cache); + if (r < 0) + return r; - r = call_dns(bus, args, bus_network_mgr, &error, true); - } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; + struct dnsssec { + uint64_t n_dnssec_secure; + uint64_t n_dnssec_insecure; + uint64_t n_dnssec_bogus; + uint64_t n_dnssec_indeterminate; + } dnsssec; - return log_error_errno(r, "Failed to set DNS configuration: %s", bus_error_message(&error, r)); - } + static const sd_json_dispatch_field dnssec_dispatch_table[] = { + { "secure", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_secure), SD_JSON_MANDATORY }, + { "insecure", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_insecure), SD_JSON_MANDATORY }, + { "bogus", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_bogus), SD_JSON_MANDATORY }, + { "indeterminate", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct dnsssec, n_dnssec_indeterminate), SD_JSON_MANDATORY }, + {}, + }; - return 0; -} + r = sd_json_dispatch(statistics.dnssec, dnssec_dispatch_table, SD_JSON_LOG, &dnsssec); + if (r < 0) + return r; -static int call_domain(sd_bus *bus, char **domain, const BusLocator *locator, sd_bus_error *error) { - _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL; - int r; + table = table_new_vertical(); + if (!table) + return log_oom(); - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - - r = bus_message_new_method_call(bus, &req, locator, "SetLinkDomains"); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append(req, "i", arg_ifindex); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_open_container(req, 'a', "(sb)"); + r = table_add_many(table, + TABLE_STRING, "Transactions", + TABLE_SET_COLOR, ansi_highlight(), + TABLE_SET_ALIGN_PERCENT, 0, + TABLE_EMPTY, + TABLE_FIELD, "Current Transactions", + TABLE_SET_ALIGN_PERCENT, 100, + TABLE_UINT64, transactions.n_current_transactions, + TABLE_SET_ALIGN_PERCENT, 100, + TABLE_FIELD, "Total Transactions", + TABLE_UINT64, transactions.n_transactions_total, + TABLE_EMPTY, TABLE_EMPTY, + TABLE_STRING, "Cache", + TABLE_SET_COLOR, ansi_highlight(), + TABLE_SET_ALIGN_PERCENT, 0, + TABLE_EMPTY, + TABLE_FIELD, "Current Cache Size", + TABLE_SET_ALIGN_PERCENT, 100, + TABLE_UINT64, cache.cache_size, + TABLE_FIELD, "Cache Hits", + TABLE_UINT64, cache.n_cache_hit, + TABLE_FIELD, "Cache Misses", + TABLE_UINT64, cache.n_cache_miss, + TABLE_EMPTY, TABLE_EMPTY, + TABLE_STRING, "Failure Transactions", + TABLE_SET_COLOR, ansi_highlight(), + TABLE_SET_ALIGN_PERCENT, 0, + TABLE_EMPTY, + TABLE_FIELD, "Total Timeouts", + TABLE_SET_ALIGN_PERCENT, 100, + TABLE_UINT64, transactions.n_timeouts_total, + TABLE_FIELD, "Total Timeouts (Stale Data Served)", + TABLE_UINT64, transactions.n_timeouts_served_stale_total, + TABLE_FIELD, "Total Failure Responses", + TABLE_UINT64, transactions.n_failure_responses_total, + TABLE_FIELD, "Total Failure Responses (Stale Data Served)", + TABLE_UINT64, transactions.n_failure_responses_served_stale_total, + TABLE_EMPTY, TABLE_EMPTY, + TABLE_STRING, "DNSSEC Verdicts", + TABLE_SET_COLOR, ansi_highlight(), + TABLE_SET_ALIGN_PERCENT, 0, + TABLE_EMPTY, + TABLE_FIELD, "Secure", + TABLE_SET_ALIGN_PERCENT, 100, + TABLE_UINT64, dnsssec.n_dnssec_secure, + TABLE_FIELD, "Insecure", + TABLE_UINT64, dnsssec.n_dnssec_insecure, + TABLE_FIELD, "Bogus", + TABLE_UINT64, dnsssec.n_dnssec_bogus, + TABLE_FIELD, "Indeterminate", + TABLE_UINT64, dnsssec.n_dnssec_indeterminate + ); if (r < 0) - return bus_log_create_error(r); + return table_log_add_error(r); - /* If only argument is the empty string, then call SetLinkDomains() with an - * empty list, which will clear the list of domains for an interface. */ - if (!strv_equal(domain, STRV_MAKE(""))) - STRV_FOREACH(p, domain) { - const char *n; + return table_print_or_warn(table); +} - n = **p == '~' ? *p + 1 : *p; +static int verb_reset_statistics(int argc, char *argv[], uintptr_t _data, void *userdata) { + sd_json_variant *reply = NULL; + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + int r; - r = dns_name_is_valid(n); - if (r < 0) - return log_error_errno(r, "Failed to validate specified domain %s: %m", n); - if (r == 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Domain not valid: %s", - n); + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - r = sd_bus_message_append(req, "(sb)", n, **p == '~'); - if (r < 0) - return bus_log_create_error(r); - } + r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); + if (r < 0) + return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); - r = sd_bus_message_close_container(req); + r = varlink_callbo_and_log( + vl, + "io.systemd.Resolve.Monitor.ResetStatistics", + &reply, + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); if (r < 0) - return bus_log_create_error(r); + return r; - return sd_bus_call(bus, req, 0, error, NULL); + if (sd_json_format_enabled(arg_json_format_flags)) + return sd_json_variant_dump(reply, arg_json_format_flags, NULL, NULL); + + return 0; } -static int verb_domain(int argc, char *argv[], uintptr_t _data, void *userdata) { +static int verb_flush_caches(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; int r; @@ -2152,295 +1962,268 @@ static int verb_domain(int argc, char *argv[], uintptr_t _data, void *userdata) if (r < 0) return r; - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } - - if (arg_ifindex <= 0) - return status_all(STATUS_DOMAIN); - - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_DOMAIN); - - char **args = strv_skip(argv, 2); - r = call_domain(bus, args, bus_resolve_mgr, &error); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); - - r = call_domain(bus, args, bus_network_mgr, &error); - } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; - - return log_error_errno(r, "Failed to set domain configuration: %s", bus_error_message(&error, r)); - } + r = bus_call_method(bus, bus_resolve_mgr, "FlushCaches", &error, NULL, NULL); + if (r < 0) + return log_error_errno(r, "Failed to flush caches: %s", bus_error_message(&error, r)); return 0; } -static int verb_default_route(int argc, char *argv[], uintptr_t _data, void *userdata) { +static int verb_reset_server_features(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r, b; + int r; r = acquire_bus(&bus); if (r < 0) return r; - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } - - if (arg_ifindex <= 0) - return status_all(STATUS_DEFAULT_ROUTE); + r = bus_call_method(bus, bus_resolve_mgr, "ResetServerFeatures", &error, NULL, NULL); + if (r < 0) + return log_error_errno(r, "Failed to reset server features: %s", bus_error_message(&error, r)); - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_DEFAULT_ROUTE); + return 0; +} - b = parse_boolean(argv[2]); - if (b < 0) - return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]); +static int print_question(char prefix, const char *color, sd_json_variant *question) { + sd_json_variant *q = NULL; + int r; - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + assert(color); - r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDefaultRoute", &error, NULL, "ib", arg_ifindex, b); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + JSON_VARIANT_ARRAY_FOREACH(q, question) { + _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL; + char buf[DNS_RESOURCE_KEY_STRING_MAX]; - r = bus_call_method(bus, bus_network_mgr, "SetLinkDefaultRoute", &error, NULL, "ib", arg_ifindex, b); - } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; + r = dns_resource_key_from_json(q, &key); + if (r < 0) { + log_warning_errno(r, "Received monitor message with invalid question key, ignoring: %m"); + continue; + } - return log_error_errno(r, "Failed to set default route configuration: %s", bus_error_message(&error, r)); + printf("%s%s %c%s: %s\n", + color, + glyph(GLYPH_ARROW_RIGHT), + prefix, + ansi_normal(), + dns_resource_key_to_string(key, buf, sizeof(buf))); } return 0; } -static int verb_llmnr(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_free_ char *global_llmnr_support_str = NULL; - ResolveSupport global_llmnr_support, llmnr_support; +static int print_answer(sd_json_variant *answer) { + sd_json_variant *a; int r; - r = acquire_bus(&bus); - if (r < 0) - return r; - - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } - - if (arg_ifindex <= 0) - return status_all(STATUS_LLMNR); - - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_LLMNR); + JSON_VARIANT_ARRAY_FOREACH(a, answer) { + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + _cleanup_free_ void *d = NULL; + sd_json_variant *jraw; + const char *s; + size_t l; - llmnr_support = resolve_support_from_string(argv[2]); - if (llmnr_support < 0) - return log_error_errno(llmnr_support, "Invalid LLMNR setting: %s", argv[2]); + jraw = sd_json_variant_by_key(a, "raw"); + if (!jraw) { + log_warning("Received monitor answer lacking valid raw data, ignoring."); + continue; + } - r = bus_get_property_string(bus, bus_resolve_mgr, "LLMNR", &error, &global_llmnr_support_str); - if (r < 0) - return log_error_errno(r, "Failed to get the global LLMNR support state: %s", bus_error_message(&error, r)); + r = sd_json_variant_unbase64(jraw, &d, &l); + if (r < 0) { + log_warning_errno(r, "Failed to undo base64 encoding of monitor answer raw data, ignoring."); + continue; + } - global_llmnr_support = resolve_support_from_string(global_llmnr_support_str); - if (global_llmnr_support < 0) - return log_error_errno(global_llmnr_support, "Received invalid global LLMNR setting: %s", global_llmnr_support_str); + r = dns_resource_record_new_from_raw(&rr, d, l); + if (r < 0) { + log_warning_errno(r, "Failed to parse monitor answer RR, ignoring: %m"); + continue; + } - if (global_llmnr_support < llmnr_support) - log_warning("Setting LLMNR support level \"%s\" for \"%s\", but the global support level is \"%s\".", - argv[2], arg_ifname, global_llmnr_support_str); + s = dns_resource_record_to_string(rr); + if (!s) + return log_oom(); - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + printf("%s%s A%s: %s\n", + ansi_highlight_yellow(), + glyph(GLYPH_ARROW_LEFT), + ansi_normal(), + s); + } - r = bus_call_method(bus, bus_resolve_mgr, "SetLinkLLMNR", &error, NULL, "is", arg_ifindex, argv[2]); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + return 0; +} - r = bus_call_method(bus, bus_network_mgr, "SetLinkLLMNR", &error, NULL, "is", arg_ifindex, argv[2]); - } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; +typedef struct MonitorQueryParams { + sd_json_variant *question; + sd_json_variant *answer; + sd_json_variant *collected_questions; + int rcode; + int error; + int ede_code; + const char *state; + const char *result; + const char *ede_msg; +} MonitorQueryParams; - return log_error_errno(r, "Failed to set LLMNR configuration: %s", bus_error_message(&error, r)); - } +static void monitor_query_params_done(MonitorQueryParams *p) { + assert(p); - return 0; + sd_json_variant_unref(p->question); + sd_json_variant_unref(p->answer); + sd_json_variant_unref(p->collected_questions); } -static int verb_mdns(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_free_ char *global_mdns_support_str = NULL; - ResolveSupport global_mdns_support, mdns_support; - int r; - - r = acquire_bus(&bus); - if (r < 0) - return r; - - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } - - if (arg_ifindex <= 0) - return status_all(STATUS_MDNS); +static void monitor_query_dump(sd_json_variant *v) { + static const sd_json_dispatch_field dispatch_table[] = { + { "question", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant, offsetof(MonitorQueryParams, question), SD_JSON_MANDATORY }, + { "answer", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant, offsetof(MonitorQueryParams, answer), 0 }, + { "collectedQuestions", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant, offsetof(MonitorQueryParams, collected_questions), 0 }, + { "state", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MonitorQueryParams, state), SD_JSON_MANDATORY }, + { "result", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MonitorQueryParams, result), 0 }, + { "rcode", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(MonitorQueryParams, rcode), 0 }, + { "errno", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(MonitorQueryParams, error), 0 }, + { "extendedDNSErrorCode", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(MonitorQueryParams, ede_code), 0 }, + { "extendedDNSErrorMessage", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MonitorQueryParams, ede_msg), 0 }, + {} + }; - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_MDNS); + _cleanup_(monitor_query_params_done) MonitorQueryParams p = { + .rcode = -1, + .ede_code = -1, + }; - mdns_support = resolve_support_from_string(argv[2]); - if (mdns_support < 0) - return log_error_errno(mdns_support, "Invalid mDNS setting: %s", argv[2]); + assert(v); - r = bus_get_property_string(bus, bus_resolve_mgr, "MulticastDNS", &error, &global_mdns_support_str); - if (r < 0) - return log_error_errno(r, "Failed to get the global mDNS support state: %s", bus_error_message(&error, r)); + if (sd_json_dispatch(v, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &p) < 0) + return; - global_mdns_support = resolve_support_from_string(global_mdns_support_str); - if (global_mdns_support < 0) - return log_error_errno(global_mdns_support, "Received invalid global mDNS setting: %s", global_mdns_support_str); + /* First show the current question */ + print_question('Q', ansi_highlight_cyan(), p.question); - if (global_mdns_support < mdns_support) - log_warning("Setting mDNS support level \"%s\" for \"%s\", but the global support level is \"%s\".", - argv[2], arg_ifname, global_mdns_support_str); + /* And then show the questions that led to this one in case this was a CNAME chain */ + print_question('C', ansi_highlight_grey(), p.collected_questions); - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + printf("%s%s S%s: %s", + streq_ptr(p.state, "success") ? ansi_highlight_green() : ansi_highlight_red(), + glyph(GLYPH_ARROW_LEFT), + ansi_normal(), + streq_ptr(p.state, "errno") ? ERRNO_NAME(p.error) : + streq_ptr(p.state, "rcode-failure") ? strna(dns_rcode_to_string(p.rcode)) : + strna(p.state)); - r = bus_call_method(bus, bus_resolve_mgr, "SetLinkMulticastDNS", &error, NULL, "is", arg_ifindex, argv[2]); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + if (!isempty(p.result)) + printf(": %s", p.result); - r = bus_call_method( - bus, - bus_network_mgr, - "SetLinkMulticastDNS", - &error, - NULL, - "is", arg_ifindex, argv[2]); - } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; + if (p.ede_code >= 0) + printf(" (%s%s%s)", + FORMAT_DNS_EDE_RCODE(p.ede_code), + !isempty(p.ede_msg) ? ": " : "", + strempty(p.ede_msg)); - return log_error_errno(r, "Failed to set MulticastDNS configuration: %s", bus_error_message(&error, r)); - } + puts(""); - return 0; + print_answer(p.answer); } -static int verb_dns_over_tls(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; - - r = acquire_bus(&bus); - if (r < 0) - return r; - - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } - - if (arg_ifindex <= 0) - return status_all(STATUS_DNS_OVER_TLS); +static int monitor_reply( + sd_varlink *link, + sd_json_variant *parameters, + const char *error_id, + sd_varlink_reply_flags_t flags, + void *userdata) { - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_DNS_OVER_TLS); + assert(link); - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + if (error_id) { + bool disconnect; - r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDNSOverTLS", &error, NULL, "is", arg_ifindex, argv[2]); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + disconnect = streq(error_id, SD_VARLINK_ERROR_DISCONNECTED); + if (disconnect) + log_info("Disconnected."); + else + log_error("Varlink error: %s", error_id); - r = bus_call_method( - bus, - bus_network_mgr, - "SetLinkDNSOverTLS", - &error, - NULL, - "is", arg_ifindex, argv[2]); + (void) sd_event_exit(ASSERT_PTR(sd_varlink_get_event(link)), disconnect ? EXIT_SUCCESS : EXIT_FAILURE); + return 0; } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; - return log_error_errno(r, "Failed to set DNSOverTLS configuration: %s", bus_error_message(&error, r)); + if (sd_json_variant_by_key(parameters, "ready")) { + /* The first message coming in will just indicate that we are now subscribed. We let our + * caller know if they asked for it. Once the caller sees this they should know that we are + * not going to miss any queries anymore. */ + (void) sd_notify(/* unset_environment=false */ false, "READY=1"); + return 0; } + if (!sd_json_format_enabled(arg_json_format_flags)) { + monitor_query_dump(parameters); + printf("\n"); + } else + sd_json_variant_dump(parameters, arg_json_format_flags, NULL, NULL); + + fflush(stdout); + return 0; } -static int verb_dnssec(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; +static int verb_monitor(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + int r, c; - r = acquire_bus(&bus); + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + + r = sd_event_default(&event); if (r < 0) - return r; + return log_error_errno(r, "Failed to get event loop: %m"); - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } + r = sd_event_set_signal_exit(event, true); + if (r < 0) + return log_error_errno(r, "Failed to enable exit on SIGINT/SIGTERM: %m"); - if (arg_ifindex <= 0) - return status_all(STATUS_DNSSEC); + r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); + if (r < 0) + return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_DNSSEC); + r = sd_varlink_set_relative_timeout(vl, USEC_INFINITY); /* We want the monitor to run basically forever */ + if (r < 0) + return log_error_errno(r, "Failed to set varlink timeout: %m"); - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + r = sd_varlink_attach_event(vl, event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return log_error_errno(r, "Failed to attach varlink connection to event loop: %m"); - r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDNSSEC", &error, NULL, "is", arg_ifindex, argv[2]); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + r = sd_varlink_bind_reply(vl, monitor_reply); + if (r < 0) + return log_error_errno(r, "Failed to bind reply callback to varlink connection: %m"); - r = bus_call_method(bus, bus_network_mgr, "SetLinkDNSSEC", &error, NULL, "is", arg_ifindex, argv[2]); - } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; + r = sd_varlink_observebo( + vl, + "io.systemd.Resolve.Monitor.SubscribeQueryResults", + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + if (r < 0) + return log_error_errno(r, "Failed to issue SubscribeQueryResults() varlink call: %m"); - return log_error_errno(r, "Failed to set DNSSEC configuration: %s", bus_error_message(&error, r)); - } + r = sd_event_loop(event); + if (r < 0) + return log_error_errno(r, "Failed to run event loop: %m"); - return 0; + r = sd_event_get_exit_code(event, &c); + if (r < 0) + return log_error_errno(r, "Failed to get exit code: %m"); + + return c; } -static int call_nta(sd_bus *bus, char **nta, const BusLocator *locator, sd_bus_error *error) { +static int call_dns(sd_bus *bus, char **dns, const BusLocator *locator, sd_bus_error *error, bool extended) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL; int r; (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - r = bus_message_new_method_call(bus, &req, locator, "SetLinkDNSSECNegativeTrustAnchors"); + r = bus_message_new_method_call(bus, &req, locator, extended ? "SetLinkDNSEx" : "SetLinkDNS"); if (r < 0) return bus_log_create_error(r); @@ -2448,695 +2231,911 @@ static int call_nta(sd_bus *bus, char **nta, const BusLocator *locator, sd_bus_ if (r < 0) return bus_log_create_error(r); - r = sd_bus_message_append_strv(req, nta); + r = sd_bus_message_open_container(req, 'a', extended ? "(iayqs)" : "(iay)"); if (r < 0) return bus_log_create_error(r); - return sd_bus_call(bus, req, 0, error, NULL); -} - -static int verb_nta(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - char **args; - bool clear; - int r; + /* If only argument is the empty string, then call SetLinkDNS() with an + * empty list, which will clear the list of domains for an interface. */ + if (!strv_equal(dns, STRV_MAKE(""))) + STRV_FOREACH(p, dns) { + _cleanup_free_ char *name = NULL; + struct in_addr_data data; + uint16_t port; + int ifindex; - r = acquire_bus(&bus); - if (r < 0) - return r; + r = in_addr_port_ifindex_name_from_string_auto(*p, &data.family, &data.address, &port, &ifindex, &name); + if (r < 0) + return log_error_errno(r, "Failed to parse DNS server address: %s", *p); - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } + if (ifindex != 0 && ifindex != arg_ifindex) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid ifindex: %i", ifindex); - if (arg_ifindex <= 0) - return status_all(STATUS_NTA); + r = sd_bus_message_open_container(req, 'r', extended ? "iayqs" : "iay"); + if (r < 0) + return bus_log_create_error(r); - if (argc < 3) - return status_ifindex(arg_ifindex, STATUS_NTA); + r = sd_bus_message_append(req, "i", data.family); + if (r < 0) + return bus_log_create_error(r); - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + r = sd_bus_message_append_array(req, 'y', &data.address, FAMILY_ADDRESS_SIZE(data.family)); + if (r < 0) + return bus_log_create_error(r); - /* If only argument is the empty string, then call SetLinkDNSSECNegativeTrustAnchors() - * with an empty list, which will clear the list of domains for an interface. */ - args = strv_skip(argv, 2); - clear = strv_equal(args, STRV_MAKE("")); + if (extended) { + r = sd_bus_message_append(req, "q", port); + if (r < 0) + return bus_log_create_error(r); - if (!clear) - STRV_FOREACH(p, args) { - r = dns_name_is_valid(*p); + r = sd_bus_message_append(req, "s", name); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(req); if (r < 0) - return log_error_errno(r, "Failed to validate specified domain %s: %m", *p); - if (r == 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Domain not valid: %s", - *p); + return bus_log_create_error(r); } - r = call_nta(bus, clear ? NULL : args, bus_resolve_mgr, &error); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + r = sd_bus_message_close_container(req); + if (r < 0) + return bus_log_create_error(r); - r = call_nta(bus, clear ? NULL : args, bus_network_mgr, &error); + r = sd_bus_call(bus, req, 0, error, NULL); + if (r < 0 && extended && sd_bus_error_has_name(error, SD_BUS_ERROR_UNKNOWN_METHOD)) { + sd_bus_error_free(error); + return call_dns(bus, dns, locator, error, false); } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; + return r; +} - return log_error_errno(r, "Failed to set DNSSEC NTA configuration: %s", bus_error_message(&error, r)); - } +static int dump_cache_item(sd_json_variant *item) { - return 0; -} + struct item_info { + sd_json_variant *key; + sd_json_variant *rrs; + const char *type; + uint64_t until; + } item_info = {}; -static int verb_revert_link(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; + static const sd_json_dispatch_field dispatch_table[] = { + { "key", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct item_info, key), SD_JSON_MANDATORY }, + { "rrs", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant_noref, offsetof(struct item_info, rrs), 0 }, + { "type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct item_info, type), 0 }, + { "until", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct item_info, until), 0 }, + {}, + }; - r = acquire_bus(&bus); + _cleanup_(dns_resource_key_unrefp) DnsResourceKey *k = NULL; + int r, c = 0; + + r = sd_json_dispatch(item, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &item_info); if (r < 0) return r; - if (argc >= 2) { - r = ifname_mangle(argv[1]); - if (r < 0) - return r; - } + r = dns_resource_key_from_json(item_info.key, &k); + if (r < 0) + return log_error_errno(r, "Failed to turn JSON data to resource key: %m"); - if (arg_ifindex <= 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Interface argument required."); + if (item_info.type) + printf("%s %s%s%s\n", DNS_RESOURCE_KEY_TO_STRING(k), ansi_highlight_red(), item_info.type, ansi_normal()); + else { + sd_json_variant *i; - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + JSON_VARIANT_ARRAY_FOREACH(i, item_info.rrs) { + _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; + _cleanup_free_ void *data = NULL; + sd_json_variant *raw; + size_t size; - r = bus_call_method(bus, bus_resolve_mgr, "RevertLink", &error, NULL, "i", arg_ifindex); - if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { - sd_bus_error_free(&error); + raw = sd_json_variant_by_key(i, "raw"); + if (!raw) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "raw field missing from RR JSON data."); - r = bus_call_method(bus, bus_network_mgr, "RevertLinkDNS", &error, NULL, "i", arg_ifindex); - } - if (r < 0) { - if (arg_ifindex_permissive && - sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) - return 0; + r = sd_json_variant_unbase64(raw, &data, &size); + if (r < 0) + return log_error_errno(r, "Unable to decode raw RR JSON data: %m"); - return log_error_errno(r, "Failed to revert interface configuration: %s", bus_error_message(&error, r)); + r = dns_resource_record_new_from_raw(&rr, data, size); + if (r < 0) + return log_error_errno(r, "Failed to parse DNS data: %m"); + + printf("%s\n", dns_resource_record_to_string(rr)); + c++; + } } - return 0; + return c; } -static int verb_log_level(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - int r; +static int dump_cache_scope(sd_json_variant *scope) { + struct scope_info { + const char *protocol; + int family; + int ifindex; + const char *ifname; + sd_json_variant *cache; + const char *dnssec_mode; + const char *dns_over_tls_mode; + } scope_info = { + .family = AF_UNSPEC, + }; + sd_json_variant *i; + int r, c = 0; - r = acquire_bus(&bus); + static const sd_json_dispatch_field dispatch_table[] = { + { "protocol", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, protocol), SD_JSON_MANDATORY }, + { "family", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(struct scope_info, family), 0 }, + { "ifindex", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(struct scope_info, ifindex), SD_JSON_RELAX }, + { "ifname", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, ifname), 0 }, + { "cache", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant_noref, offsetof(struct scope_info, cache), SD_JSON_MANDATORY }, + { "dnssec", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, dnssec_mode), 0 }, + { "dnsOverTLS", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, dns_over_tls_mode), 0 }, + {}, + }; + + r = sd_json_dispatch(scope, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &scope_info); if (r < 0) return r; - assert(IN_SET(argc, 1, 2)); + printf("%sScope protocol=%s", ansi_underline(), scope_info.protocol); - return verb_log_control_common(bus, "org.freedesktop.resolve1", argv[0], argc == 2 ? argv[1] : NULL); -} + if (scope_info.family != AF_UNSPEC) + printf(" family=%s", af_to_name(scope_info.family)); -static int print_question(char prefix, const char *color, sd_json_variant *question) { - sd_json_variant *q = NULL; - int r; + if (scope_info.ifindex > 0) + printf(" ifindex=%i", scope_info.ifindex); + if (scope_info.ifname) + printf(" ifname=%s", scope_info.ifname); - assert(color); + if (dns_protocol_from_string(scope_info.protocol) == DNS_PROTOCOL_DNS) { + if (scope_info.dnssec_mode) + printf(" DNSSEC=%s", scope_info.dnssec_mode); + if (scope_info.dns_over_tls_mode) + printf(" DNSOverTLS=%s", scope_info.dns_over_tls_mode); + } - JSON_VARIANT_ARRAY_FOREACH(q, question) { - _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL; - char buf[DNS_RESOURCE_KEY_STRING_MAX]; + printf("%s\n", ansi_normal()); - r = dns_resource_key_from_json(q, &key); - if (r < 0) { - log_warning_errno(r, "Received monitor message with invalid question key, ignoring: %m"); - continue; - } + JSON_VARIANT_ARRAY_FOREACH(i, scope_info.cache) { + r = dump_cache_item(i); + if (r < 0) + return r; - printf("%s%s %c%s: %s\n", - color, - glyph(GLYPH_ARROW_RIGHT), - prefix, - ansi_normal(), - dns_resource_key_to_string(key, buf, sizeof(buf))); + c += r; } + if (c == 0) + printf("%sNo entries.%s\n\n", ansi_grey(), ansi_normal()); + else + printf("\n"); + return 0; } -static int print_answer(sd_json_variant *answer) { - sd_json_variant *a; +static int verb_show_cache(int argc, char *argv[], uintptr_t _data, void *userdata) { + sd_json_variant *reply = NULL, *d = NULL; + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; int r; - JSON_VARIANT_ARRAY_FOREACH(a, answer) { - _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; - _cleanup_free_ void *d = NULL; - sd_json_variant *jraw; - const char *s; - size_t l; - - jraw = sd_json_variant_by_key(a, "raw"); - if (!jraw) { - log_warning("Received monitor answer lacking valid raw data, ignoring."); - continue; - } + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - r = sd_json_variant_unbase64(jraw, &d, &l); - if (r < 0) { - log_warning_errno(r, "Failed to undo base64 encoding of monitor answer raw data, ignoring."); - continue; - } + r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); + if (r < 0) + return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); - r = dns_resource_record_new_from_raw(&rr, d, l); - if (r < 0) { - log_warning_errno(r, "Failed to parse monitor answer RR, ignoring: %m"); - continue; - } + r = varlink_callbo_and_log( + vl, + "io.systemd.Resolve.Monitor.DumpCache", + &reply, + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + if (r < 0) + return r; - s = dns_resource_record_to_string(rr); - if (!s) - return log_oom(); + d = sd_json_variant_by_key(reply, "dump"); + if (!d) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "DumpCache() response is missing 'dump' key."); - printf("%s%s A%s: %s\n", - ansi_highlight_yellow(), - glyph(GLYPH_ARROW_LEFT), - ansi_normal(), - s); + if (!sd_json_variant_is_array(d)) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "DumpCache() response 'dump' field not an array"); + + if (!sd_json_format_enabled(arg_json_format_flags)) { + sd_json_variant *i; + + JSON_VARIANT_ARRAY_FOREACH(i, d) { + r = dump_cache_scope(i); + if (r < 0) + return r; + } + + return 0; } - return 0; + return sd_json_variant_dump(d, arg_json_format_flags, NULL, NULL); } -typedef struct MonitorQueryParams { - sd_json_variant *question; - sd_json_variant *answer; - sd_json_variant *collected_questions; - int rcode; - int error; - int ede_code; - const char *state; - const char *result; - const char *ede_msg; -} MonitorQueryParams; +static int dump_server_state(sd_json_variant *server) { + _cleanup_(table_unrefp) Table *table = NULL; + TableCell *cell; -static void monitor_query_params_done(MonitorQueryParams *p) { - assert(p); + struct server_state { + const char *server_name; + const char *type; + const char *ifname; + int ifindex; + const char *verified_feature_level; + const char *possible_feature_level; + const char *dnssec_mode; + bool dnssec_supported; + size_t received_udp_fragment_max; + uint64_t n_failed_udp; + uint64_t n_failed_tcp; + bool packet_truncated; + bool packet_bad_opt; + bool packet_rrsig_missing; + bool packet_invalid; + bool packet_do_off; + } server_state = { + .ifindex = -1, + }; - sd_json_variant_unref(p->question); - sd_json_variant_unref(p->answer); - sd_json_variant_unref(p->collected_questions); -} + int r; -static void monitor_query_dump(sd_json_variant *v) { static const sd_json_dispatch_field dispatch_table[] = { - { "question", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant, offsetof(MonitorQueryParams, question), SD_JSON_MANDATORY }, - { "answer", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant, offsetof(MonitorQueryParams, answer), 0 }, - { "collectedQuestions", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant, offsetof(MonitorQueryParams, collected_questions), 0 }, - { "state", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MonitorQueryParams, state), SD_JSON_MANDATORY }, - { "result", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MonitorQueryParams, result), 0 }, - { "rcode", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(MonitorQueryParams, rcode), 0 }, - { "errno", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(MonitorQueryParams, error), 0 }, - { "extendedDNSErrorCode", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(MonitorQueryParams, ede_code), 0 }, - { "extendedDNSErrorMessage", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(MonitorQueryParams, ede_msg), 0 }, - {} + { "Server", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, server_name), SD_JSON_MANDATORY }, + { "Type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, type), SD_JSON_MANDATORY }, + { "Interface", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, ifname), 0 }, + { "InterfaceIndex", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(struct server_state, ifindex), SD_JSON_RELAX }, + { "VerifiedFeatureLevel", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, verified_feature_level), 0 }, + { "PossibleFeatureLevel", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, possible_feature_level), 0 }, + { "DNSSECMode", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, dnssec_mode), SD_JSON_MANDATORY }, + { "DNSSECSupported", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, dnssec_supported), SD_JSON_MANDATORY }, + { "ReceivedUDPFragmentMax", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct server_state, received_udp_fragment_max), SD_JSON_MANDATORY }, + { "FailedUDPAttempts", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct server_state, n_failed_udp), SD_JSON_MANDATORY }, + { "FailedTCPAttempts", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct server_state, n_failed_tcp), SD_JSON_MANDATORY }, + { "PacketTruncated", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_truncated), SD_JSON_MANDATORY }, + { "PacketBadOpt", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_bad_opt), SD_JSON_MANDATORY }, + { "PacketRRSIGMissing", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_rrsig_missing), SD_JSON_MANDATORY }, + { "PacketInvalid", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_invalid), SD_JSON_MANDATORY }, + { "PacketDoOff", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_do_off), SD_JSON_MANDATORY }, + {}, }; - _cleanup_(monitor_query_params_done) MonitorQueryParams p = { - .rcode = -1, - .ede_code = -1, - }; + r = sd_json_dispatch(server, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &server_state); + if (r < 0) + return r; - assert(v); + table = table_new_vertical(); + if (!table) + return log_oom(); - if (sd_json_dispatch(v, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &p) < 0) - return; + assert_se(cell = table_get_cell(table, 0, 0)); + (void) table_set_ellipsize_percent(table, cell, 100); + (void) table_set_align_percent(table, cell, 0); - /* First show the current question */ - print_question('Q', ansi_highlight_cyan(), p.question); + r = table_add_cell_stringf(table, NULL, "Server: %s", server_state.server_name); + if (r < 0) + return table_log_add_error(r); - /* And then show the questions that led to this one in case this was a CNAME chain */ - print_question('C', ansi_highlight_grey(), p.collected_questions); + r = table_add_many(table, + TABLE_EMPTY, + TABLE_FIELD, "Type", + TABLE_SET_ALIGN_PERCENT, 100, + TABLE_STRING, server_state.type); + if (r < 0) + return table_log_add_error(r); - printf("%s%s S%s: %s", - streq_ptr(p.state, "success") ? ansi_highlight_green() : ansi_highlight_red(), - glyph(GLYPH_ARROW_LEFT), - ansi_normal(), - streq_ptr(p.state, "errno") ? ERRNO_NAME(p.error) : - streq_ptr(p.state, "rcode-failure") ? strna(dns_rcode_to_string(p.rcode)) : - strna(p.state)); + if (server_state.ifname) { + r = table_add_many(table, + TABLE_FIELD, "Interface", + TABLE_STRING, server_state.ifname); + if (r < 0) + return table_log_add_error(r); + } - if (!isempty(p.result)) - printf(": %s", p.result); + if (server_state.ifindex >= 0) { + r = table_add_many(table, + TABLE_FIELD, "Interface Index", + TABLE_INT, server_state.ifindex); + if (r < 0) + return table_log_add_error(r); + } - if (p.ede_code >= 0) - printf(" (%s%s%s)", - FORMAT_DNS_EDE_RCODE(p.ede_code), - !isempty(p.ede_msg) ? ": " : "", - strempty(p.ede_msg)); + if (server_state.verified_feature_level) { + r = table_add_many(table, + TABLE_FIELD, "Verified feature level", + TABLE_STRING, server_state.verified_feature_level); + if (r < 0) + return table_log_add_error(r); + } - puts(""); + if (server_state.possible_feature_level) { + r = table_add_many(table, + TABLE_FIELD, "Possible feature level", + TABLE_STRING, server_state.possible_feature_level); + if (r < 0) + return table_log_add_error(r); + } - print_answer(p.answer); + r = table_add_many(table, + TABLE_FIELD, "DNSSEC Mode", + TABLE_STRING, server_state.dnssec_mode, + TABLE_FIELD, "DNSSEC Supported", + TABLE_STRING, yes_no(server_state.dnssec_supported), + TABLE_FIELD, "Maximum UDP fragment size received", + TABLE_UINT64, server_state.received_udp_fragment_max, + TABLE_FIELD, "Failed UDP attempts", + TABLE_UINT64, server_state.n_failed_udp, + TABLE_FIELD, "Failed TCP attempts", + TABLE_UINT64, server_state.n_failed_tcp, + TABLE_FIELD, "Seen truncated packet", + TABLE_STRING, yes_no(server_state.packet_truncated), + TABLE_FIELD, "Seen OPT RR getting lost", + TABLE_STRING, yes_no(server_state.packet_bad_opt), + TABLE_FIELD, "Seen RRSIG RR missing", + TABLE_STRING, yes_no(server_state.packet_rrsig_missing), + TABLE_FIELD, "Seen invalid packet", + TABLE_STRING, yes_no(server_state.packet_invalid), + TABLE_FIELD, "Server dropped DO flag", + TABLE_STRING, yes_no(server_state.packet_do_off), + TABLE_SET_ALIGN_PERCENT, 0, + TABLE_EMPTY, TABLE_EMPTY); + + if (r < 0) + return table_log_add_error(r); + + return table_print_or_warn(table); } -static int monitor_reply( - sd_varlink *link, - sd_json_variant *parameters, - const char *error_id, - sd_varlink_reply_flags_t flags, - void *userdata) { +static int verb_show_server_state(int argc, char *argv[], uintptr_t _data, void *userdata) { + sd_json_variant *reply = NULL, *d = NULL; + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + int r; - assert(link); + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - if (error_id) { - bool disconnect; + r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); + if (r < 0) + return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); - disconnect = streq(error_id, SD_VARLINK_ERROR_DISCONNECTED); - if (disconnect) - log_info("Disconnected."); - else - log_error("Varlink error: %s", error_id); + r = varlink_callbo_and_log( + vl, + "io.systemd.Resolve.Monitor.DumpServerState", + &reply, + SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + if (r < 0) + return r; + + d = sd_json_variant_by_key(reply, "dump"); + if (!d) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "DumpCache() response is missing 'dump' key."); + + if (!sd_json_variant_is_array(d)) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), + "DumpCache() response 'dump' field not an array"); + + if (!sd_json_format_enabled(arg_json_format_flags)) { + sd_json_variant *i; + + JSON_VARIANT_ARRAY_FOREACH(i, d) { + r = dump_server_state(i); + if (r < 0) + return r; + } - (void) sd_event_exit(ASSERT_PTR(sd_varlink_get_event(link)), disconnect ? EXIT_SUCCESS : EXIT_FAILURE); return 0; } - if (sd_json_variant_by_key(parameters, "ready")) { - /* The first message coming in will just indicate that we are now subscribed. We let our - * caller know if they asked for it. Once the caller sees this they should know that we are - * not going to miss any queries anymore. */ - (void) sd_notify(/* unset_environment=false */ false, "READY=1"); - return 0; + return sd_json_variant_dump(d, arg_json_format_flags, NULL, NULL); +} + +static int verb_dns(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + r = acquire_bus(&bus); + if (r < 0) + return r; + + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; } - if (!sd_json_format_enabled(arg_json_format_flags)) { - monitor_query_dump(parameters); - printf("\n"); - } else - sd_json_variant_dump(parameters, arg_json_format_flags, NULL, NULL); + if (arg_ifindex <= 0) + return status_all(STATUS_DNS); + + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_DNS); + + char **args = strv_skip(argv, 2); + r = call_dns(bus, args, bus_resolve_mgr, &error, true); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); + + r = call_dns(bus, args, bus_network_mgr, &error, true); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; + + return log_error_errno(r, "Failed to set DNS configuration: %s", bus_error_message(&error, r)); + } + + return 0; +} + +static int call_domain(sd_bus *bus, char **domain, const BusLocator *locator, sd_bus_error *error) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL; + int r; + + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + + r = bus_message_new_method_call(bus, &req, locator, "SetLinkDomains"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(req, "i", arg_ifindex); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(req, 'a', "(sb)"); + if (r < 0) + return bus_log_create_error(r); + + /* If only argument is the empty string, then call SetLinkDomains() with an + * empty list, which will clear the list of domains for an interface. */ + if (!strv_equal(domain, STRV_MAKE(""))) + STRV_FOREACH(p, domain) { + const char *n; + + n = **p == '~' ? *p + 1 : *p; + + r = dns_name_is_valid(n); + if (r < 0) + return log_error_errno(r, "Failed to validate specified domain %s: %m", n); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Domain not valid: %s", + n); + + r = sd_bus_message_append(req, "(sb)", n, **p == '~'); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(req); + if (r < 0) + return bus_log_create_error(r); + + return sd_bus_call(bus, req, 0, error, NULL); +} + +static int verb_domain(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + r = acquire_bus(&bus); + if (r < 0) + return r; + + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; + } + + if (arg_ifindex <= 0) + return status_all(STATUS_DOMAIN); + + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_DOMAIN); + + char **args = strv_skip(argv, 2); + r = call_domain(bus, args, bus_resolve_mgr, &error); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); + + r = call_domain(bus, args, bus_network_mgr, &error); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; + + return log_error_errno(r, "Failed to set domain configuration: %s", bus_error_message(&error, r)); + } + + return 0; +} + +static int verb_default_route(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r, b; + + r = acquire_bus(&bus); + if (r < 0) + return r; + + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; + } + + if (arg_ifindex <= 0) + return status_all(STATUS_DEFAULT_ROUTE); + + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_DEFAULT_ROUTE); + + b = parse_boolean(argv[2]); + if (b < 0) + return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]); + + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + + r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDefaultRoute", &error, NULL, "ib", arg_ifindex, b); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); + + r = bus_call_method(bus, bus_network_mgr, "SetLinkDefaultRoute", &error, NULL, "ib", arg_ifindex, b); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; + + return log_error_errno(r, "Failed to set default route configuration: %s", bus_error_message(&error, r)); + } + + return 0; +} + +static int verb_llmnr(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *global_llmnr_support_str = NULL; + ResolveSupport global_llmnr_support, llmnr_support; + int r; + + r = acquire_bus(&bus); + if (r < 0) + return r; + + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; + } + + if (arg_ifindex <= 0) + return status_all(STATUS_LLMNR); + + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_LLMNR); + + llmnr_support = resolve_support_from_string(argv[2]); + if (llmnr_support < 0) + return log_error_errno(llmnr_support, "Invalid LLMNR setting: %s", argv[2]); + + r = bus_get_property_string(bus, bus_resolve_mgr, "LLMNR", &error, &global_llmnr_support_str); + if (r < 0) + return log_error_errno(r, "Failed to get the global LLMNR support state: %s", bus_error_message(&error, r)); + + global_llmnr_support = resolve_support_from_string(global_llmnr_support_str); + if (global_llmnr_support < 0) + return log_error_errno(global_llmnr_support, "Received invalid global LLMNR setting: %s", global_llmnr_support_str); + + if (global_llmnr_support < llmnr_support) + log_warning("Setting LLMNR support level \"%s\" for \"%s\", but the global support level is \"%s\".", + argv[2], arg_ifname, global_llmnr_support_str); + + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + + r = bus_call_method(bus, bus_resolve_mgr, "SetLinkLLMNR", &error, NULL, "is", arg_ifindex, argv[2]); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); + + r = bus_call_method(bus, bus_network_mgr, "SetLinkLLMNR", &error, NULL, "is", arg_ifindex, argv[2]); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; - fflush(stdout); + return log_error_errno(r, "Failed to set LLMNR configuration: %s", bus_error_message(&error, r)); + } return 0; } -static int verb_monitor(int argc, char *argv[], uintptr_t _data, void *userdata) { - _cleanup_(sd_event_unrefp) sd_event *event = NULL; - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; - int r, c; - - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); +static int verb_mdns(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *global_mdns_support_str = NULL; + ResolveSupport global_mdns_support, mdns_support; + int r; - r = sd_event_default(&event); + r = acquire_bus(&bus); if (r < 0) - return log_error_errno(r, "Failed to get event loop: %m"); + return r; - r = sd_event_set_signal_exit(event, true); - if (r < 0) - return log_error_errno(r, "Failed to enable exit on SIGINT/SIGTERM: %m"); + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; + } - r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); - if (r < 0) - return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); + if (arg_ifindex <= 0) + return status_all(STATUS_MDNS); - r = sd_varlink_set_relative_timeout(vl, USEC_INFINITY); /* We want the monitor to run basically forever */ - if (r < 0) - return log_error_errno(r, "Failed to set varlink timeout: %m"); + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_MDNS); - r = sd_varlink_attach_event(vl, event, SD_EVENT_PRIORITY_NORMAL); - if (r < 0) - return log_error_errno(r, "Failed to attach varlink connection to event loop: %m"); + mdns_support = resolve_support_from_string(argv[2]); + if (mdns_support < 0) + return log_error_errno(mdns_support, "Invalid mDNS setting: %s", argv[2]); - r = sd_varlink_bind_reply(vl, monitor_reply); + r = bus_get_property_string(bus, bus_resolve_mgr, "MulticastDNS", &error, &global_mdns_support_str); if (r < 0) - return log_error_errno(r, "Failed to bind reply callback to varlink connection: %m"); + return log_error_errno(r, "Failed to get the global mDNS support state: %s", bus_error_message(&error, r)); - r = sd_varlink_observebo( - vl, - "io.systemd.Resolve.Monitor.SubscribeQueryResults", - SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); - if (r < 0) - return log_error_errno(r, "Failed to issue SubscribeQueryResults() varlink call: %m"); + global_mdns_support = resolve_support_from_string(global_mdns_support_str); + if (global_mdns_support < 0) + return log_error_errno(global_mdns_support, "Received invalid global mDNS setting: %s", global_mdns_support_str); - r = sd_event_loop(event); - if (r < 0) - return log_error_errno(r, "Failed to run event loop: %m"); + if (global_mdns_support < mdns_support) + log_warning("Setting mDNS support level \"%s\" for \"%s\", but the global support level is \"%s\".", + argv[2], arg_ifname, global_mdns_support_str); - r = sd_event_get_exit_code(event, &c); - if (r < 0) - return log_error_errno(r, "Failed to get exit code: %m"); + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - return c; -} + r = bus_call_method(bus, bus_resolve_mgr, "SetLinkMulticastDNS", &error, NULL, "is", arg_ifindex, argv[2]); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); -static int dump_cache_item(sd_json_variant *item) { + r = bus_call_method( + bus, + bus_network_mgr, + "SetLinkMulticastDNS", + &error, + NULL, + "is", arg_ifindex, argv[2]); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; - struct item_info { - sd_json_variant *key; - sd_json_variant *rrs; - const char *type; - uint64_t until; - } item_info = {}; + return log_error_errno(r, "Failed to set MulticastDNS configuration: %s", bus_error_message(&error, r)); + } - static const sd_json_dispatch_field dispatch_table[] = { - { "key", SD_JSON_VARIANT_OBJECT, sd_json_dispatch_variant_noref, offsetof(struct item_info, key), SD_JSON_MANDATORY }, - { "rrs", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant_noref, offsetof(struct item_info, rrs), 0 }, - { "type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct item_info, type), 0 }, - { "until", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct item_info, until), 0 }, - {}, - }; + return 0; +} - _cleanup_(dns_resource_key_unrefp) DnsResourceKey *k = NULL; - int r, c = 0; +static int verb_dns_over_tls(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; - r = sd_json_dispatch(item, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &item_info); + r = acquire_bus(&bus); if (r < 0) return r; - r = dns_resource_key_from_json(item_info.key, &k); - if (r < 0) - return log_error_errno(r, "Failed to turn JSON data to resource key: %m"); + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; + } - if (item_info.type) - printf("%s %s%s%s\n", DNS_RESOURCE_KEY_TO_STRING(k), ansi_highlight_red(), item_info.type, ansi_normal()); - else { - sd_json_variant *i; + if (arg_ifindex <= 0) + return status_all(STATUS_DNS_OVER_TLS); - JSON_VARIANT_ARRAY_FOREACH(i, item_info.rrs) { - _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL; - _cleanup_free_ void *data = NULL; - sd_json_variant *raw; - size_t size; + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_DNS_OVER_TLS); - raw = sd_json_variant_by_key(i, "raw"); - if (!raw) - return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "raw field missing from RR JSON data."); + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - r = sd_json_variant_unbase64(raw, &data, &size); - if (r < 0) - return log_error_errno(r, "Unable to decode raw RR JSON data: %m"); + r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDNSOverTLS", &error, NULL, "is", arg_ifindex, argv[2]); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); - r = dns_resource_record_new_from_raw(&rr, data, size); - if (r < 0) - return log_error_errno(r, "Failed to parse DNS data: %m"); + r = bus_call_method( + bus, + bus_network_mgr, + "SetLinkDNSOverTLS", + &error, + NULL, + "is", arg_ifindex, argv[2]); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; - printf("%s\n", dns_resource_record_to_string(rr)); - c++; - } + return log_error_errno(r, "Failed to set DNSOverTLS configuration: %s", bus_error_message(&error, r)); } - return c; + return 0; } -static int dump_cache_scope(sd_json_variant *scope) { - - struct scope_info { - const char *protocol; - int family; - int ifindex; - const char *ifname; - sd_json_variant *cache; - const char *dnssec_mode; - const char *dns_over_tls_mode; - } scope_info = { - .family = AF_UNSPEC, - }; - sd_json_variant *i; - int r, c = 0; - - static const sd_json_dispatch_field dispatch_table[] = { - { "protocol", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, protocol), SD_JSON_MANDATORY }, - { "family", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_int, offsetof(struct scope_info, family), 0 }, - { "ifindex", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(struct scope_info, ifindex), SD_JSON_RELAX }, - { "ifname", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, ifname), 0 }, - { "cache", SD_JSON_VARIANT_ARRAY, sd_json_dispatch_variant_noref, offsetof(struct scope_info, cache), SD_JSON_MANDATORY }, - { "dnssec", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, dnssec_mode), 0 }, - { "dnsOverTLS", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct scope_info, dns_over_tls_mode), 0 }, - {}, - }; +static int verb_dnssec(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; - r = sd_json_dispatch(scope, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &scope_info); + r = acquire_bus(&bus); if (r < 0) return r; - printf("%sScope protocol=%s", ansi_underline(), scope_info.protocol); + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; + } - if (scope_info.family != AF_UNSPEC) - printf(" family=%s", af_to_name(scope_info.family)); + if (arg_ifindex <= 0) + return status_all(STATUS_DNSSEC); - if (scope_info.ifindex > 0) - printf(" ifindex=%i", scope_info.ifindex); - if (scope_info.ifname) - printf(" ifname=%s", scope_info.ifname); + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_DNSSEC); - if (dns_protocol_from_string(scope_info.protocol) == DNS_PROTOCOL_DNS) { - if (scope_info.dnssec_mode) - printf(" DNSSEC=%s", scope_info.dnssec_mode); - if (scope_info.dns_over_tls_mode) - printf(" DNSOverTLS=%s", scope_info.dns_over_tls_mode); - } + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - printf("%s\n", ansi_normal()); + r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDNSSEC", &error, NULL, "is", arg_ifindex, argv[2]); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); - JSON_VARIANT_ARRAY_FOREACH(i, scope_info.cache) { - r = dump_cache_item(i); - if (r < 0) - return r; + r = bus_call_method(bus, bus_network_mgr, "SetLinkDNSSEC", &error, NULL, "is", arg_ifindex, argv[2]); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; - c += r; + return log_error_errno(r, "Failed to set DNSSEC configuration: %s", bus_error_message(&error, r)); } - if (c == 0) - printf("%sNo entries.%s\n\n", ansi_grey(), ansi_normal()); - else - printf("\n"); + return 0; +} + +static int call_nta(sd_bus *bus, char **nta, const BusLocator *locator, sd_bus_error *error) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL; + int r; + + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + + r = bus_message_new_method_call(bus, &req, locator, "SetLinkDNSSECNegativeTrustAnchors"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(req, "i", arg_ifindex); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_strv(req, nta); + if (r < 0) + return bus_log_create_error(r); - return 0; + return sd_bus_call(bus, req, 0, error, NULL); } -static int verb_show_cache(int argc, char *argv[], uintptr_t _data, void *userdata) { - sd_json_variant *reply = NULL, *d = NULL; - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; +static int verb_nta(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + char **args; + bool clear; int r; - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - - r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); - if (r < 0) - return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); - - r = varlink_callbo_and_log( - vl, - "io.systemd.Resolve.Monitor.DumpCache", - &reply, - SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + r = acquire_bus(&bus); if (r < 0) return r; - d = sd_json_variant_by_key(reply, "dump"); - if (!d) - return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), - "DumpCache() response is missing 'dump' key."); + if (argc >= 2) { + r = ifname_mangle(argv[1]); + if (r < 0) + return r; + } - if (!sd_json_variant_is_array(d)) - return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), - "DumpCache() response 'dump' field not an array"); + if (arg_ifindex <= 0) + return status_all(STATUS_NTA); - if (!sd_json_format_enabled(arg_json_format_flags)) { - sd_json_variant *i; + if (argc < 3) + return status_ifindex(arg_ifindex, STATUS_NTA); - JSON_VARIANT_ARRAY_FOREACH(i, d) { - r = dump_cache_scope(i); + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); + + /* If only argument is the empty string, then call SetLinkDNSSECNegativeTrustAnchors() + * with an empty list, which will clear the list of domains for an interface. */ + args = strv_skip(argv, 2); + clear = strv_equal(args, STRV_MAKE("")); + + if (!clear) + STRV_FOREACH(p, args) { + r = dns_name_is_valid(*p); if (r < 0) - return r; + return log_error_errno(r, "Failed to validate specified domain %s: %m", *p); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Domain not valid: %s", + *p); } - return 0; - } + r = call_nta(bus, clear ? NULL : args, bus_resolve_mgr, &error); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); - return sd_json_variant_dump(d, arg_json_format_flags, NULL, NULL); -} + r = call_nta(bus, clear ? NULL : args, bus_network_mgr, &error); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; -static int dump_server_state(sd_json_variant *server) { - _cleanup_(table_unrefp) Table *table = NULL; - TableCell *cell; + return log_error_errno(r, "Failed to set DNSSEC NTA configuration: %s", bus_error_message(&error, r)); + } - struct server_state { - const char *server_name; - const char *type; - const char *ifname; - int ifindex; - const char *verified_feature_level; - const char *possible_feature_level; - const char *dnssec_mode; - bool dnssec_supported; - size_t received_udp_fragment_max; - uint64_t n_failed_udp; - uint64_t n_failed_tcp; - bool packet_truncated; - bool packet_bad_opt; - bool packet_rrsig_missing; - bool packet_invalid; - bool packet_do_off; - } server_state = { - .ifindex = -1, - }; + return 0; +} +static int verb_revert_link(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; int r; - static const sd_json_dispatch_field dispatch_table[] = { - { "Server", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, server_name), SD_JSON_MANDATORY }, - { "Type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, type), SD_JSON_MANDATORY }, - { "Interface", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, ifname), 0 }, - { "InterfaceIndex", _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_ifindex, offsetof(struct server_state, ifindex), SD_JSON_RELAX }, - { "VerifiedFeatureLevel", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, verified_feature_level), 0 }, - { "PossibleFeatureLevel", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, possible_feature_level), 0 }, - { "DNSSECMode", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct server_state, dnssec_mode), SD_JSON_MANDATORY }, - { "DNSSECSupported", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, dnssec_supported), SD_JSON_MANDATORY }, - { "ReceivedUDPFragmentMax", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct server_state, received_udp_fragment_max), SD_JSON_MANDATORY }, - { "FailedUDPAttempts", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct server_state, n_failed_udp), SD_JSON_MANDATORY }, - { "FailedTCPAttempts", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(struct server_state, n_failed_tcp), SD_JSON_MANDATORY }, - { "PacketTruncated", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_truncated), SD_JSON_MANDATORY }, - { "PacketBadOpt", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_bad_opt), SD_JSON_MANDATORY }, - { "PacketRRSIGMissing", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_rrsig_missing), SD_JSON_MANDATORY }, - { "PacketInvalid", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_invalid), SD_JSON_MANDATORY }, - { "PacketDoOff", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(struct server_state, packet_do_off), SD_JSON_MANDATORY }, - {}, - }; - - r = sd_json_dispatch(server, dispatch_table, SD_JSON_LOG|SD_JSON_ALLOW_EXTENSIONS, &server_state); + r = acquire_bus(&bus); if (r < 0) return r; - table = table_new_vertical(); - if (!table) - return log_oom(); - - assert_se(cell = table_get_cell(table, 0, 0)); - (void) table_set_ellipsize_percent(table, cell, 100); - (void) table_set_align_percent(table, cell, 0); - - r = table_add_cell_stringf(table, NULL, "Server: %s", server_state.server_name); - if (r < 0) - return table_log_add_error(r); - - r = table_add_many(table, - TABLE_EMPTY, - TABLE_FIELD, "Type", - TABLE_SET_ALIGN_PERCENT, 100, - TABLE_STRING, server_state.type); - if (r < 0) - return table_log_add_error(r); - - if (server_state.ifname) { - r = table_add_many(table, - TABLE_FIELD, "Interface", - TABLE_STRING, server_state.ifname); + if (argc >= 2) { + r = ifname_mangle(argv[1]); if (r < 0) - return table_log_add_error(r); + return r; } - if (server_state.ifindex >= 0) { - r = table_add_many(table, - TABLE_FIELD, "Interface Index", - TABLE_INT, server_state.ifindex); - if (r < 0) - return table_log_add_error(r); - } + if (arg_ifindex <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Interface argument required."); - if (server_state.verified_feature_level) { - r = table_add_many(table, - TABLE_FIELD, "Verified feature level", - TABLE_STRING, server_state.verified_feature_level); - if (r < 0) - return table_log_add_error(r); - } + (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - if (server_state.possible_feature_level) { - r = table_add_many(table, - TABLE_FIELD, "Possible feature level", - TABLE_STRING, server_state.possible_feature_level); - if (r < 0) - return table_log_add_error(r); - } + r = bus_call_method(bus, bus_resolve_mgr, "RevertLink", &error, NULL, "i", arg_ifindex); + if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) { + sd_bus_error_free(&error); - r = table_add_many(table, - TABLE_FIELD, "DNSSEC Mode", - TABLE_STRING, server_state.dnssec_mode, - TABLE_FIELD, "DNSSEC Supported", - TABLE_STRING, yes_no(server_state.dnssec_supported), - TABLE_FIELD, "Maximum UDP fragment size received", - TABLE_UINT64, server_state.received_udp_fragment_max, - TABLE_FIELD, "Failed UDP attempts", - TABLE_UINT64, server_state.n_failed_udp, - TABLE_FIELD, "Failed TCP attempts", - TABLE_UINT64, server_state.n_failed_tcp, - TABLE_FIELD, "Seen truncated packet", - TABLE_STRING, yes_no(server_state.packet_truncated), - TABLE_FIELD, "Seen OPT RR getting lost", - TABLE_STRING, yes_no(server_state.packet_bad_opt), - TABLE_FIELD, "Seen RRSIG RR missing", - TABLE_STRING, yes_no(server_state.packet_rrsig_missing), - TABLE_FIELD, "Seen invalid packet", - TABLE_STRING, yes_no(server_state.packet_invalid), - TABLE_FIELD, "Server dropped DO flag", - TABLE_STRING, yes_no(server_state.packet_do_off), - TABLE_SET_ALIGN_PERCENT, 0, - TABLE_EMPTY, TABLE_EMPTY); + r = bus_call_method(bus, bus_network_mgr, "RevertLinkDNS", &error, NULL, "i", arg_ifindex); + } + if (r < 0) { + if (arg_ifindex_permissive && + sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK)) + return 0; - if (r < 0) - return table_log_add_error(r); + return log_error_errno(r, "Failed to revert interface configuration: %s", bus_error_message(&error, r)); + } - return table_print_or_warn(table); + return 0; } -static int verb_show_server_state(int argc, char *argv[], uintptr_t _data, void *userdata) { - sd_json_variant *reply = NULL, *d = NULL; - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; +static int verb_log_level(int argc, char *argv[], uintptr_t _data, void *userdata) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; int r; - (void) polkit_agent_open_if_enabled(BUS_TRANSPORT_LOCAL, arg_ask_password); - - r = sd_varlink_connect_address(&vl, "/run/systemd/resolve/io.systemd.Resolve.Monitor"); - if (r < 0) - return log_error_errno(r, "Failed to connect to query monitoring service /run/systemd/resolve/io.systemd.Resolve.Monitor: %m"); - - r = varlink_callbo_and_log( - vl, - "io.systemd.Resolve.Monitor.DumpServerState", - &reply, - SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", arg_ask_password)); + r = acquire_bus(&bus); if (r < 0) return r; - d = sd_json_variant_by_key(reply, "dump"); - if (!d) - return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), - "DumpCache() response is missing 'dump' key."); - - if (!sd_json_variant_is_array(d)) - return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), - "DumpCache() response 'dump' field not an array"); - - if (!sd_json_format_enabled(arg_json_format_flags)) { - sd_json_variant *i; - - JSON_VARIANT_ARRAY_FOREACH(i, d) { - r = dump_server_state(i); - if (r < 0) - return r; - } - - return 0; - } + assert(IN_SET(argc, 1, 2)); - return sd_json_variant_dump(d, arg_json_format_flags, NULL, NULL); + return verb_log_control_common(bus, "org.freedesktop.resolve1", argv[0], argc == 2 ? argv[1] : NULL); } static int parse_protocol(const char *arg) { From 4e20d61c65c232a25dd30ce0aad4235dec3f634c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 13:38:04 +0200 Subject: [PATCH 181/242] shared/verbs: when showing default verb, put [] around the args too The verb cannot be omitted but the args kept, so: resolvectl [status] [link] is wrong, we need: resolvectl [status [link]] Fixes f94da4b4c564f8cff4b5b739456c985e036a4201. --- src/shared/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shared/verbs.c b/src/shared/verbs.c index 276c6fd5be916..274945e5c5977 100644 --- a/src/shared/verbs.c +++ b/src/shared/verbs.c @@ -200,9 +200,9 @@ int _verbs_get_help_table( r = table_add_cell_stringf(table, NULL, " %s%s%s%s%s", is_default ? "[" : "", verb->verb, - is_default ? "]" : "", verb->argspec ? " " : "", - strempty(verb->argspec)); + strempty(verb->argspec), + is_default ? "]" : ""); if (r < 0) return table_log_add_error(r); From 7cea71e4c112177e74140b39898e9836f334e588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 09:57:43 +0200 Subject: [PATCH 182/242] resolvectl: convert to OPTION and VERB macros Use OPTION_NAMESPACE() to keep the resolvectl and systemd-resolve option sets separate. The resolvconf-compat path (resolvconf invocation) keeps its own getopt-based parsing. --help output has the expected changes to formatting. Synopis for [status] is now shows that the verb is optional. Co-developed-by: Claude Opus 4.7 --- src/resolve/resolvectl.c | 668 +++++++++++++++------------------------ 1 file changed, 249 insertions(+), 419 deletions(-) diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c index 4088c39b1da60..015345fdba7d9 100644 --- a/src/resolve/resolvectl.c +++ b/src/resolve/resolvectl.c @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include #include @@ -13,6 +12,7 @@ #include "af-list.h" #include "alloc-util.h" +#include "ansi-color.h" #include "argv-util.h" #include "build.h" #include "bus-common-errors.h" @@ -30,17 +30,19 @@ #include "escape.h" #include "format-ifname.h" #include "format-table.h" +#include "glyph-util.h" +#include "help-util.h" #include "hostname-util.h" #include "json-util.h" #include "main-func.h" #include "missing-network.h" #include "netlink-util.h" +#include "options.h" #include "ordered-set.h" #include "pager.h" #include "parse-argument.h" #include "parse-util.h" #include "polkit-agent.h" -#include "pretty-print.h" #include "resolvconf-compat.h" #include "resolve-util.h" #include "resolvectl.h" @@ -799,6 +801,8 @@ static int resolve_rfc4501(sd_bus *bus, const char *name) { "Invalid DNS URI: %s", name); } +VERB(verb_query, "query", "HOSTNAME|ADDRESS…", 2, VERB_ANY, 0, + "Resolve domain names, IPv4 and IPv6 addresses"); static int verb_query(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; int ret = 0, r; @@ -1009,6 +1013,8 @@ static int resolve_service(sd_bus *bus, const char *name, const char *type, cons return 0; } +VERB(verb_service, "service", "[[NAME] TYPE] DOMAIN", 2, 4, 0, + "Resolve service (SRV)"); static int verb_service(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; int r; @@ -1091,6 +1097,8 @@ static int resolve_openpgp(sd_bus *bus, const char *address) { } #endif +VERB(verb_openpgp, "openpgp", "EMAIL@DOMAIN…", 2, VERB_ANY, 0, + "Query OpenPGP public key"); static int verb_openpgp(int argc, char *argv[], uintptr_t _data, void *userdata) { #if HAVE_OPENSSL _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; @@ -1148,6 +1156,8 @@ static bool service_family_is_valid(const char *s) { return STR_IN_SET(s, "tcp", "udp", "sctp"); } +VERB(verb_tlsa, "tlsa", "DOMAIN[:PORT]…", 2, VERB_ANY, 0, + "Query TLS public key"); static int verb_tlsa(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; const char *family = "tcp"; @@ -1763,10 +1773,14 @@ static int status_ifindex(int ifindex, StatusMode mode) { return status_full(mode, STRV_MAKE(ifname)); } +VERB(verb_status, "status", "[LINK…]", VERB_ANY, VERB_ANY, VERB_DEFAULT, + "Show link and server status"); static int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { return status_full(STATUS_ALL, strv_skip(argv, 1)); } +VERB(verb_show_statistics, "statistics", NULL, VERB_ANY, 1, 0, + "Show resolver statistics"); static int verb_show_statistics(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(table_unrefp) Table *table = NULL; sd_json_variant *reply = NULL; @@ -1928,6 +1942,8 @@ static int verb_show_statistics(int argc, char *argv[], uintptr_t _data, void *u return table_print_or_warn(table); } +VERB(verb_reset_statistics, "reset-statistics", NULL, VERB_ANY, 1, 0, + "Reset resolver statistics"); static int verb_reset_statistics(int argc, char *argv[], uintptr_t _data, void *userdata) { sd_json_variant *reply = NULL; _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; @@ -1953,6 +1969,8 @@ static int verb_reset_statistics(int argc, char *argv[], uintptr_t _data, void * return 0; } +VERB(verb_flush_caches, "flush-caches", NULL, VERB_ANY, 1, 0, + "Flush all local DNS caches"); static int verb_flush_caches(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -1969,6 +1987,8 @@ static int verb_flush_caches(int argc, char *argv[], uintptr_t _data, void *user return 0; } +VERB(verb_reset_server_features, "reset-server-features", NULL, VERB_ANY, 1, 0, + "Forget learnt DNS server feature levels"); static int verb_reset_server_features(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2168,6 +2188,8 @@ static int monitor_reply( return 0; } +VERB(verb_monitor, "monitor", NULL, VERB_ANY, 1, 0, + "Monitor DNS queries"); static int verb_monitor(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_event_unrefp) sd_event *event = NULL; _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; @@ -2414,6 +2436,8 @@ static int dump_cache_scope(sd_json_variant *scope) { return 0; } +VERB(verb_show_cache, "show-cache", NULL, VERB_ANY, 1, 0, + "Show cache contents"); static int verb_show_cache(int argc, char *argv[], uintptr_t _data, void *userdata) { sd_json_variant *reply = NULL, *d = NULL; _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; @@ -2590,6 +2614,8 @@ static int dump_server_state(sd_json_variant *server) { return table_print_or_warn(table); } +VERB(verb_show_server_state, "show-server-state", NULL, VERB_ANY, 1, 0, + "Show servers state"); static int verb_show_server_state(int argc, char *argv[], uintptr_t _data, void *userdata) { sd_json_variant *reply = NULL, *d = NULL; _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; @@ -2633,6 +2659,8 @@ static int verb_show_server_state(int argc, char *argv[], uintptr_t _data, void return sd_json_variant_dump(d, arg_json_format_flags, NULL, NULL); } +VERB(verb_dns, "dns", "[LINK [SERVER…]]", VERB_ANY, VERB_ANY, 0, + "Get/set per-interface DNS server address"); static int verb_dns(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2718,6 +2746,8 @@ static int call_domain(sd_bus *bus, char **domain, const BusLocator *locator, sd return sd_bus_call(bus, req, 0, error, NULL); } +VERB(verb_domain, "domain", "[LINK [DOMAIN…]]", VERB_ANY, VERB_ANY, 0, + "Get/set per-interface search domain"); static int verb_domain(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2757,6 +2787,8 @@ static int verb_domain(int argc, char *argv[], uintptr_t _data, void *userdata) return 0; } +VERB(verb_default_route, "default-route", "[LINK [BOOL]]", VERB_ANY, 3, 0, + "Get/set per-interface default route flag"); static int verb_default_route(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2801,6 +2833,8 @@ static int verb_default_route(int argc, char *argv[], uintptr_t _data, void *use return 0; } +VERB(verb_llmnr, "llmnr", "[LINK [MODE]]", VERB_ANY, 3, 0, + "Get/set per-interface LLMNR mode"); static int verb_llmnr(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2859,6 +2893,8 @@ static int verb_llmnr(int argc, char *argv[], uintptr_t _data, void *userdata) { return 0; } +VERB(verb_mdns, "mdns", "[LINK [MODE]]", VERB_ANY, 3, 0, + "Get/set per-interface MulticastDNS mode"); static int verb_mdns(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2923,6 +2959,8 @@ static int verb_mdns(int argc, char *argv[], uintptr_t _data, void *userdata) { return 0; } +VERB(verb_dns_over_tls, "dnsovertls", "[LINK [MODE]]", VERB_ANY, 3, 0, + "Get/set per-interface DNS-over-TLS mode"); static int verb_dns_over_tls(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2969,6 +3007,8 @@ static int verb_dns_over_tls(int argc, char *argv[], uintptr_t _data, void *user return 0; } +VERB(verb_dnssec, "dnssec", "[LINK [MODE]]", VERB_ANY, 3, 0, + "Get/set per-interface DNSSEC mode"); static int verb_dnssec(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -3030,6 +3070,8 @@ static int call_nta(sd_bus *bus, char **nta, const BusLocator *locator, sd_bus_ return sd_bus_call(bus, req, 0, error, NULL); } +VERB(verb_nta, "nta", "[LINK [DOMAIN…]]", VERB_ANY, VERB_ANY, 0, + "Get/set per-interface DNSSEC NTA"); static int verb_nta(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -3088,6 +3130,8 @@ static int verb_nta(int argc, char *argv[], uintptr_t _data, void *userdata) { return 0; } +VERB(verb_revert_link, "revert", "LINK", VERB_ANY, 2, 0, + "Revert per-interface configuration"); static int verb_revert_link(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -3125,6 +3169,8 @@ static int verb_revert_link(int argc, char *argv[], uintptr_t _data, void *userd return 0; } +VERB(verb_log_level, "log-level", "[LEVEL]", VERB_ANY, 2, 0, + "Get/set logging threshold for systemd-resolved"); static int verb_log_level(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; int r; @@ -3187,251 +3233,116 @@ static void help_dns_classes(void) { } static int compat_help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("resolvectl", "1", &link); + r = option_parser_get_help_table_ns("systemd-resolve", &options); if (r < 0) - return log_oom(); + return r; pager_open(arg_pager_flags); - printf("%1$s [OPTIONS...] HOSTNAME|ADDRESS...\n" - "%1$s [OPTIONS...] --service [[NAME] TYPE] DOMAIN\n" - "%1$s [OPTIONS...] --openpgp EMAIL@DOMAIN...\n" - "%1$s [OPTIONS...] --statistics\n" - "%1$s [OPTIONS...] --reset-statistics\n" - "\n" - "%2$sResolve domain names, IPv4 and IPv6 addresses, DNS records, and services.%3$s\n\n" - " -h --help Show this help\n" - " --version Show package version\n" - " -4 Resolve IPv4 addresses\n" - " -6 Resolve IPv6 addresses\n" - " -i --interface=INTERFACE Look on interface\n" - " -p --protocol=PROTO|help Look via protocol\n" - " -t --type=TYPE|help Query RR with DNS type\n" - " -c --class=CLASS|help Query RR with DNS class\n" - " --service Resolve service (SRV)\n" - " --service-address=BOOL Resolve address for services (default: yes)\n" - " --service-txt=BOOL Resolve TXT records for services (default: yes)\n" - " --openpgp Query OpenPGP public key\n" - " --tlsa Query TLS public key\n" - " --cname=BOOL Follow CNAME redirects (default: yes)\n" - " --search=BOOL Use search domains for single-label names\n" - " (default: yes)\n" - " --statistics Show resolver statistics\n" - " --reset-statistics Reset resolver statistics\n" - " --status Show link and server status\n" - " --flush-caches Flush all local DNS caches\n" - " --reset-server-features\n" - " Forget learnt DNS server feature levels\n" - " --set-dns=SERVER Set per-interface DNS server address\n" - " --set-domain=DOMAIN Set per-interface search domain\n" - " --set-llmnr=MODE Set per-interface LLMNR mode\n" - " --set-mdns=MODE Set per-interface MulticastDNS mode\n" - " --set-dnsovertls=MODE Set per-interface DNS-over-TLS mode\n" - " --set-dnssec=MODE Set per-interface DNSSEC mode\n" - " --set-nta=DOMAIN Set per-interface DNSSEC NTA\n" - " --revert Revert per-interface configuration\n" - " --raw[=payload|packet] Dump the answer as binary data\n" - " --no-pager Do not pipe output into a pager\n" - " --legend=BOOL Print headers and additional info (default: yes)\n" - "\nSee the %4$s for details.\n", - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - link); + help_cmdline("[OPTIONS…] HOSTNAME|ADDRESS…"); + help_cmdline("[OPTIONS…] --service [[NAME] TYPE] DOMAIN"); + help_cmdline("[OPTIONS…] --openpgp EMAIL@DOMAIN…"); + help_cmdline("[OPTIONS…] --statistics"); + help_cmdline("[OPTIONS…] --reset-statistics"); + help_abstract("Resolve domain names, IPv4 and IPv6 addresses, DNS records, and services."); + + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("resolvectl", "1"); return 0; } static int native_help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *verbs = NULL, *options = NULL; int r; - r = terminal_urlify_man("resolvectl", "1", &link); + r = verbs_get_help_table(&verbs); if (r < 0) - return log_oom(); + return r; + + r = option_parser_get_help_table_ns("resolvectl", &options); + if (r < 0) + return r; + + (void) table_sync_column_widths(0, verbs, options); pager_open(arg_pager_flags); - printf("%1$s [OPTIONS...] COMMAND ...\n" - "\n" - "%5$sSend control commands to the network name resolution manager, or%6$s\n" - "%5$sresolve domain names, IPv4 and IPv6 addresses, DNS records, and services.%6$s\n" - "\n%3$sCommands:%4$s\n" - " query HOSTNAME|ADDRESS... Resolve domain names, IPv4 and IPv6 addresses\n" - " service [[NAME] TYPE] DOMAIN Resolve service (SRV)\n" - " openpgp EMAIL@DOMAIN... Query OpenPGP public key\n" - " tlsa DOMAIN[:PORT]... Query TLS public key\n" - " status [LINK...] Show link and server status\n" - " statistics Show resolver statistics\n" - " reset-statistics Reset resolver statistics\n" - " flush-caches Flush all local DNS caches\n" - " reset-server-features Forget learnt DNS server feature levels\n" - " monitor Monitor DNS queries\n" - " show-cache Show cache contents\n" - " show-server-state Show servers state\n" - " dns [LINK [SERVER...]] Get/set per-interface DNS server address\n" - " domain [LINK [DOMAIN...]] Get/set per-interface search domain\n" - " default-route [LINK [BOOL]] Get/set per-interface default route flag\n" - " llmnr [LINK [MODE]] Get/set per-interface LLMNR mode\n" - " mdns [LINK [MODE]] Get/set per-interface MulticastDNS mode\n" - " dnsovertls [LINK [MODE]] Get/set per-interface DNS-over-TLS mode\n" - " dnssec [LINK [MODE]] Get/set per-interface DNSSEC mode\n" - " nta [LINK [DOMAIN...]] Get/set per-interface DNSSEC NTA\n" - " revert LINK Revert per-interface configuration\n" - " log-level [LEVEL] Get/set logging threshold for systemd-resolved\n" - "\n%3$sOptions:%4$s\n" - " -h --help Show this help\n" - " --version Show package version\n" - " -4 Resolve IPv4 addresses\n" - " -6 Resolve IPv6 addresses\n" - " -i --interface=INTERFACE Look on interface\n" - " -p --protocol=PROTO|help Look via protocol\n" - " -t --type=TYPE|help Query RR with DNS type\n" - " -c --class=CLASS|help Query RR with DNS class\n" - " --service-address=BOOL Resolve address for services (default: yes)\n" - " --service-txt=BOOL Resolve TXT records for services (default: yes)\n" - " --cname=BOOL Follow CNAME redirects (default: yes)\n" - " --validate=BOOL Allow DNSSEC validation (default: yes)\n" - " --synthesize=BOOL Allow synthetic response (default: yes)\n" - " --cache=BOOL Allow response from cache (default: yes)\n" - " --stale-data=BOOL Allow response from cache with stale data (default: yes)\n" - " --relax-single-label=BOOL Allow single label lookups to go upstream (default: no)\n" - " --zone=BOOL Allow response from locally registered mDNS/LLMNR\n" - " records (default: yes)\n" - " --trust-anchor=BOOL Allow response from local trust anchor (default:\n" - " yes)\n" - " --network=BOOL Allow response from network (default: yes)\n" - " --search=BOOL Use search domains for single-label names (default:\n" - " yes)\n" - " --raw[=payload|packet] Dump the answer as binary data\n" - " --no-pager Do not pipe output into a pager\n" - " --no-ask-password Do not prompt for password\n" - " --legend=BOOL Print headers and additional info (default: yes)\n" - " --json=MODE Output as JSON\n" - " -j Same as --json=pretty on tty, --json=short\n" - " otherwise\n" - "\nSee the %2$s for details.\n", - program_invocation_short_name, - link, - ansi_underline(), - ansi_normal(), - ansi_highlight(), - ansi_normal()); + help_cmdline("[OPTIONS…] COMMAND …"); + help_abstract("Send control commands to the network name resolution manager, or\n" + "resolve domain names, IPv4 and IPv6 addresses, DNS records, and services."); + + help_section("Commands"); + r = table_print_or_warn(verbs); + if (r < 0) + return r; + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + + help_man_page_reference("resolvectl", "1"); return 0; } -static int verb_help(int argc, char *argv[], uintptr_t _data, void *userdata) { - return native_help(); -} - -static int compat_parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_LEGEND, - ARG_SERVICE, - ARG_CNAME, - ARG_SERVICE_ADDRESS, - ARG_SERVICE_TXT, - ARG_OPENPGP, - ARG_TLSA, - ARG_RAW, - ARG_SEARCH, - ARG_STATISTICS, - ARG_RESET_STATISTICS, - ARG_STATUS, - ARG_FLUSH_CACHES, - ARG_RESET_SERVER_FEATURES, - ARG_NO_PAGER, - ARG_SET_DNS, - ARG_SET_DOMAIN, - ARG_SET_LLMNR, - ARG_SET_MDNS, - ARG_SET_DNS_OVER_TLS, - ARG_SET_DNSSEC, - ARG_SET_NTA, - ARG_REVERT_LINK, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "type", required_argument, NULL, 't' }, - { "class", required_argument, NULL, 'c' }, - { "legend", required_argument, NULL, ARG_LEGEND }, - { "interface", required_argument, NULL, 'i' }, - { "protocol", required_argument, NULL, 'p' }, - { "cname", required_argument, NULL, ARG_CNAME }, - { "service", no_argument, NULL, ARG_SERVICE }, - { "service-address", required_argument, NULL, ARG_SERVICE_ADDRESS }, - { "service-txt", required_argument, NULL, ARG_SERVICE_TXT }, - { "openpgp", no_argument, NULL, ARG_OPENPGP }, - { "tlsa", optional_argument, NULL, ARG_TLSA }, - { "raw", optional_argument, NULL, ARG_RAW }, - { "search", required_argument, NULL, ARG_SEARCH }, - { "statistics", no_argument, NULL, ARG_STATISTICS, }, - { "reset-statistics", no_argument, NULL, ARG_RESET_STATISTICS }, - { "status", no_argument, NULL, ARG_STATUS }, - { "flush-caches", no_argument, NULL, ARG_FLUSH_CACHES }, - { "reset-server-features", no_argument, NULL, ARG_RESET_SERVER_FEATURES }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "set-dns", required_argument, NULL, ARG_SET_DNS }, - { "set-domain", required_argument, NULL, ARG_SET_DOMAIN }, - { "set-llmnr", required_argument, NULL, ARG_SET_LLMNR }, - { "set-mdns", required_argument, NULL, ARG_SET_MDNS }, - { "set-dnsovertls", required_argument, NULL, ARG_SET_DNS_OVER_TLS }, - { "set-dnssec", required_argument, NULL, ARG_SET_DNSSEC }, - { "set-nta", required_argument, NULL, ARG_SET_NTA }, - { "revert", no_argument, NULL, ARG_REVERT_LINK }, - {} - }; +VERB_COMMON_HELP_HIDDEN(native_help); - int c, r; +static int compat_parse_argv(int argc, char *argv[], char ***remaining_args) { + int r; assert(argc >= 0); assert(argv); + assert(remaining_args); + + OptionParser opts = { argc, argv, .namespace = "systemd-resolve" }; - while ((c = getopt_long(argc, argv, "h46i:t:c:p:", options, NULL)) >= 0) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_NAMESPACE("systemd-resolve"): {} + + OPTION_COMMON_HELP: return compat_help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case '4': + OPTION_SHORT('4', NULL, "Resolve IPv4 addresses"): arg_family = AF_INET; break; - case '6': + OPTION_SHORT('6', NULL, "Resolve IPv6 addresses"): arg_family = AF_INET6; break; - case 'i': - r = ifname_mangle(optarg); + OPTION('i', "interface", "INTERFACE", "Look on interface"): + r = ifname_mangle(opts.arg); if (r < 0) return r; break; - case 'p': - r = parse_protocol(optarg); + OPTION('p', "protocol", "PROTO|help", "Look via protocol"): + r = parse_protocol(opts.arg); if (r <= 0) return r; break; - case 't': - if (streq(optarg, "help")) { + OPTION('t', "type", "TYPE|help", "Query RR with DNS type"): + if (streq(opts.arg, "help")) { help_dns_types(); return 0; } - r = dns_type_from_string(optarg); + r = dns_type_from_string(opts.arg); if (r < 0) - return log_error_errno(r, "Failed to parse RR record type %s: %m", optarg); + return log_error_errno(r, "Failed to parse RR record type %s: %m", opts.arg); arg_type = (uint16_t) r; assert((int) arg_type == r); @@ -3439,167 +3350,162 @@ static int compat_parse_argv(int argc, char *argv[]) { arg_mode = MODE_RESOLVE_RECORD; break; - case 'c': - if (streq(optarg, "help")) { + OPTION('c', "class", "CLASS|help", "Query RR with DNS class"): + if (streq(opts.arg, "help")) { help_dns_classes(); return 0; } - r = dns_class_from_string(optarg); + r = dns_class_from_string(opts.arg); if (r < 0) - return log_error_errno(r, "Failed to parse RR record class %s: %m", optarg); + return log_error_errno(r, "Failed to parse RR record class %s: %m", opts.arg); arg_class = (uint16_t) r; assert((int) arg_class == r); break; - case ARG_SERVICE: + OPTION_LONG("service", NULL, "Resolve service (SRV)"): arg_mode = MODE_RESOLVE_SERVICE; break; - case ARG_SERVICE_ADDRESS: - r = parse_boolean_argument("--service-address=", optarg, NULL); + OPTION_LONG("service-address", "BOOL", "Resolve address for services (default: yes)"): + r = parse_boolean_argument("--service-address=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0); break; - case ARG_SERVICE_TXT: - r = parse_boolean_argument("--service-txt=", optarg, NULL); + OPTION_LONG("service-txt", "BOOL", "Resolve TXT records for services (default: yes)"): + r = parse_boolean_argument("--service-txt=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0); break; - case ARG_OPENPGP: + OPTION_LONG("openpgp", NULL, "Query OpenPGP public key"): arg_mode = MODE_RESOLVE_OPENPGP; break; - case ARG_TLSA: + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "tlsa", "FAMILY", "Query TLS public key"): arg_mode = MODE_RESOLVE_TLSA; - if (!optarg || service_family_is_valid(optarg)) - arg_service_family = optarg; + if (!opts.arg || service_family_is_valid(opts.arg)) + arg_service_family = opts.arg; else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Unknown service family \"%s\".", optarg); + "Unknown service family \"%s\".", opts.arg); break; - case ARG_CNAME: - r = parse_boolean_argument("--cname=", optarg, NULL); + OPTION_LONG("cname", "BOOL", "Follow CNAME redirects (default: yes)"): + r = parse_boolean_argument("--cname=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_CNAME, r == 0); break; - - case ARG_SEARCH: - r = parse_boolean_argument("--search=", optarg, NULL); + OPTION_LONG("search", "BOOL", "Use search domains for single-label names (default: yes)"): + r = parse_boolean_argument("--search=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_SEARCH, r == 0); break; - case ARG_STATISTICS: + OPTION_LONG("statistics", NULL, "Show resolver statistics"): arg_mode = MODE_STATISTICS; break; - case ARG_RESET_STATISTICS: + OPTION_LONG("reset-statistics", NULL, "Reset resolver statistics"): arg_mode = MODE_RESET_STATISTICS; break; - case ARG_STATUS: + OPTION_LONG("status", NULL, "Show link and server status"): arg_mode = MODE_STATUS; break; - case ARG_FLUSH_CACHES: + OPTION_LONG("flush-caches", NULL, "Flush all local DNS caches"): arg_mode = MODE_FLUSH_CACHES; break; - case ARG_RESET_SERVER_FEATURES: + OPTION_LONG("reset-server-features", NULL, + "Forget learnt DNS server feature levels"): arg_mode = MODE_RESET_SERVER_FEATURES; break; - case ARG_SET_DNS: - r = strv_extend(&arg_set_dns, optarg); + OPTION_LONG("set-dns", "SERVER", "Set per-interface DNS server address"): + r = strv_extend(&arg_set_dns, opts.arg); if (r < 0) return log_oom(); arg_mode = MODE_SET_LINK; break; - case ARG_SET_DOMAIN: - r = strv_extend(&arg_set_domain, optarg); + OPTION_LONG("set-domain", "DOMAIN", "Set per-interface search domain"): + r = strv_extend(&arg_set_domain, opts.arg); if (r < 0) return log_oom(); arg_mode = MODE_SET_LINK; break; - case ARG_SET_LLMNR: - arg_set_llmnr = optarg; + OPTION_LONG("set-llmnr", "MODE", "Set per-interface LLMNR mode"): + arg_set_llmnr = opts.arg; arg_mode = MODE_SET_LINK; break; - case ARG_SET_MDNS: - arg_set_mdns = optarg; + OPTION_LONG("set-mdns", "MODE", "Set per-interface MulticastDNS mode"): + arg_set_mdns = opts.arg; arg_mode = MODE_SET_LINK; break; - case ARG_SET_DNS_OVER_TLS: - arg_set_dns_over_tls = optarg; + OPTION_LONG("set-dnsovertls", "MODE", "Set per-interface DNS-over-TLS mode"): + arg_set_dns_over_tls = opts.arg; arg_mode = MODE_SET_LINK; break; - case ARG_SET_DNSSEC: - arg_set_dnssec = optarg; + OPTION_LONG("set-dnssec", "MODE", "Set per-interface DNSSEC mode"): + arg_set_dnssec = opts.arg; arg_mode = MODE_SET_LINK; break; - case ARG_SET_NTA: - r = strv_extend(&arg_set_nta, optarg); + OPTION_LONG("set-nta", "DOMAIN", "Set per-interface DNSSEC NTA"): + r = strv_extend(&arg_set_nta, opts.arg); if (r < 0) return log_oom(); arg_mode = MODE_SET_LINK; break; - case ARG_REVERT_LINK: + OPTION_LONG("revert", NULL, "Revert per-interface configuration"): arg_mode = MODE_REVERT_LINK; break; - case ARG_RAW: + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "raw", "payload|packet", + "Dump the answer as binary data"): if (on_tty()) return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Refusing to write binary data to tty."); - if (optarg == NULL || streq(optarg, "payload")) + if (opts.arg == NULL || streq(opts.arg, "payload")) arg_raw = RAW_PAYLOAD; - else if (streq(optarg, "packet")) + else if (streq(opts.arg, "packet")) arg_raw = RAW_PACKET; else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown --raw specifier \"%s\".", - optarg); + opts.arg); arg_legend = false; break; - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_LEGEND: - r = parse_boolean_argument("--legend=", optarg, &arg_legend); + OPTION_LONG("legend", "BOOL", "Print headers and additional info (default: yes)"): + r = parse_boolean_argument("--legend=", opts.arg, &arg_legend); if (r < 0) return r; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } if (arg_type == 0 && arg_class != 0) @@ -3623,253 +3529,209 @@ static int compat_parse_argv(int argc, char *argv[]) { "--set-dns=, --set-domain=, --set-llmnr=, --set-mdns=, --set-dnsovertls=, --set-dnssec=, --set-nta= and --revert require --interface=."); } + *remaining_args = option_parser_get_args(&opts); return 1 /* work to do */; } -static int native_parse_argv(int argc, char *argv[]) { - enum { - ARG_VERSION = 0x100, - ARG_LEGEND, - ARG_CNAME, - ARG_VALIDATE, - ARG_SYNTHESIZE, - ARG_CACHE, - ARG_ZONE, - ARG_TRUST_ANCHOR, - ARG_NETWORK, - ARG_SERVICE_ADDRESS, - ARG_SERVICE_TXT, - ARG_RAW, - ARG_SEARCH, - ARG_NO_PAGER, - ARG_NO_ASK_PASSWORD, - ARG_JSON, - ARG_STALE_DATA, - ARG_RELAX_SINGLE_LABEL, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "type", required_argument, NULL, 't' }, - { "class", required_argument, NULL, 'c' }, - { "legend", required_argument, NULL, ARG_LEGEND }, - { "interface", required_argument, NULL, 'i' }, - { "protocol", required_argument, NULL, 'p' }, - { "cname", required_argument, NULL, ARG_CNAME }, - { "validate", required_argument, NULL, ARG_VALIDATE }, - { "synthesize", required_argument, NULL, ARG_SYNTHESIZE }, - { "cache", required_argument, NULL, ARG_CACHE }, - { "zone", required_argument, NULL, ARG_ZONE }, - { "trust-anchor", required_argument, NULL, ARG_TRUST_ANCHOR }, - { "network", required_argument, NULL, ARG_NETWORK }, - { "service-address", required_argument, NULL, ARG_SERVICE_ADDRESS }, - { "service-txt", required_argument, NULL, ARG_SERVICE_TXT }, - { "raw", optional_argument, NULL, ARG_RAW }, - { "search", required_argument, NULL, ARG_SEARCH }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD }, - { "json", required_argument, NULL, ARG_JSON }, - { "stale-data", required_argument, NULL, ARG_STALE_DATA }, - { "relax-single-label", required_argument, NULL, ARG_RELAX_SINGLE_LABEL }, - {} - }; - - int c, r; +static int native_parse_argv(int argc, char *argv[], char ***remaining_args) { + int r; assert(argc >= 0); assert(argv); + assert(remaining_args); + + OptionParser opts = { argc, argv, .namespace = "resolvectl" }; - while ((c = getopt_long(argc, argv, "h46i:t:c:p:j", options, NULL)) >= 0) + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_NAMESPACE("resolvectl"): {} + + OPTION_COMMON_HELP: return native_help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case '4': + OPTION_SHORT('4', NULL, "Resolve IPv4 addresses"): arg_family = AF_INET; break; - case '6': + OPTION_SHORT('6', NULL, "Resolve IPv6 addresses"): arg_family = AF_INET6; break; - case 'i': - r = ifname_mangle(optarg); + OPTION('i', "interface", "INTERFACE", "Look on interface"): + r = ifname_mangle(opts.arg); if (r < 0) return r; break; - case 'p': - r = parse_protocol(optarg); + OPTION('p', "protocol", "PROTO|help", "Look via protocol"): + r = parse_protocol(opts.arg); if (r <= 0) return r; break; - case 't': - if (streq(optarg, "help")) { + OPTION('t', "type", "TYPE|help", "Query RR with DNS type"): + if (streq(opts.arg, "help")) { help_dns_types(); return 0; } - r = dns_type_from_string(optarg); + r = dns_type_from_string(opts.arg); if (r < 0) - return log_error_errno(r, "Failed to parse RR record type %s: %m", optarg); + return log_error_errno(r, "Failed to parse RR record type %s: %m", opts.arg); arg_type = (uint16_t) r; assert((int) arg_type == r); break; - case 'c': - if (streq(optarg, "help")) { + OPTION('c', "class", "CLASS|help", "Query RR with DNS class"): + if (streq(opts.arg, "help")) { help_dns_classes(); return 0; } - r = dns_class_from_string(optarg); + r = dns_class_from_string(opts.arg); if (r < 0) - return log_error_errno(r, "Failed to parse RR record class %s: %m", optarg); + return log_error_errno(r, "Failed to parse RR record class %s: %m", opts.arg); arg_class = (uint16_t) r; assert((int) arg_class == r); break; - case ARG_SERVICE_ADDRESS: - r = parse_boolean_argument("--service-address=", optarg, NULL); + OPTION_LONG("service-address", "BOOL", "Resolve address for services (default: yes)"): + r = parse_boolean_argument("--service-address=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0); break; - case ARG_SERVICE_TXT: - r = parse_boolean_argument("--service-txt=", optarg, NULL); + OPTION_LONG("service-txt", "BOOL", "Resolve TXT records for services (default: yes)"): + r = parse_boolean_argument("--service-txt=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0); break; - case ARG_CNAME: - r = parse_boolean_argument("--cname=", optarg, NULL); + OPTION_LONG("cname", "BOOL", "Follow CNAME redirects (default: yes)"): + r = parse_boolean_argument("--cname=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_CNAME, r == 0); break; - case ARG_VALIDATE: - r = parse_boolean_argument("--validate=", optarg, NULL); + OPTION_LONG("validate", "BOOL", "Allow DNSSEC validation (default: yes)"): + r = parse_boolean_argument("--validate=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_VALIDATE, r == 0); break; - case ARG_SYNTHESIZE: - r = parse_boolean_argument("--synthesize=", optarg, NULL); + OPTION_LONG("synthesize", "BOOL", "Allow synthetic response (default: yes)"): + r = parse_boolean_argument("--synthesize=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_SYNTHESIZE, r == 0); break; - case ARG_CACHE: - r = parse_boolean_argument("--cache=", optarg, NULL); + OPTION_LONG("cache", "BOOL", "Allow response from cache (default: yes)"): + r = parse_boolean_argument("--cache=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_CACHE, r == 0); break; - case ARG_STALE_DATA: - r = parse_boolean_argument("--stale-data=", optarg, NULL); + OPTION_LONG("stale-data", "BOOL", + "Allow response from cache with stale data (default: yes)"): + r = parse_boolean_argument("--stale-data=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_STALE, r == 0); break; - case ARG_RELAX_SINGLE_LABEL: - r = parse_boolean_argument("--relax-single-label=", optarg, NULL); + OPTION_LONG("relax-single-label", "BOOL", + "Allow single label lookups to go upstream (default: no)"): + r = parse_boolean_argument("--relax-single-label=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_RELAX_SINGLE_LABEL, r > 0); break; - case ARG_ZONE: - r = parse_boolean_argument("--zone=", optarg, NULL); + OPTION_LONG("zone", "BOOL", + "Allow response from locally registered mDNS/LLMNR records (default: yes)"): + r = parse_boolean_argument("--zone=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_ZONE, r == 0); break; - case ARG_TRUST_ANCHOR: - r = parse_boolean_argument("--trust-anchor=", optarg, NULL); + OPTION_LONG("trust-anchor", "BOOL", + "Allow response from local trust anchor (default: yes)"): + r = parse_boolean_argument("--trust-anchor=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_TRUST_ANCHOR, r == 0); break; - case ARG_NETWORK: - r = parse_boolean_argument("--network=", optarg, NULL); + OPTION_LONG("network", "BOOL", "Allow response from network (default: yes)"): + r = parse_boolean_argument("--network=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_NETWORK, r == 0); break; - case ARG_SEARCH: - r = parse_boolean_argument("--search=", optarg, NULL); + OPTION_LONG("search", "BOOL", "Use search domains for single-label names (default: yes)"): + r = parse_boolean_argument("--search=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_flags, SD_RESOLVED_NO_SEARCH, r == 0); break; - case ARG_RAW: + OPTION_LONG_FLAGS(OPTION_OPTIONAL_ARG, "raw", "payload|packet", + "Dump the answer as binary data"): if (on_tty()) return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Refusing to write binary data to tty."); - if (optarg == NULL || streq(optarg, "payload")) + if (opts.arg == NULL || streq(opts.arg, "payload")) arg_raw = RAW_PAYLOAD; - else if (streq(optarg, "packet")) + else if (streq(opts.arg, "packet")) arg_raw = RAW_PACKET; else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown --raw specifier \"%s\".", - optarg); + opts.arg); arg_legend = false; break; - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_NO_ASK_PASSWORD: + OPTION_COMMON_NO_ASK_PASSWORD: arg_ask_password = false; break; - case ARG_LEGEND: - r = parse_boolean_argument("--legend=", optarg, &arg_legend); + OPTION_LONG("legend", "BOOL", "Print headers and additional info (default: yes)"): + r = parse_boolean_argument("--legend=", opts.arg, &arg_legend); if (r < 0) return r; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; - break; - case 'j': + OPTION_COMMON_LOWERCASE_J: arg_json_format_flags = SD_JSON_FORMAT_PRETTY_AUTO|SD_JSON_FORMAT_COLOR_AUTO; break; - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } if (arg_type == 0 && arg_class != 0) @@ -3882,140 +3744,107 @@ static int native_parse_argv(int argc, char *argv[]) { if (arg_class != 0 && arg_type == 0) arg_type = DNS_TYPE_A; + *remaining_args = option_parser_get_args(&opts); return 1 /* work to do */; } -static int native_main(int argc, char *argv[]) { - - static const Verb verbs[] = { - { "help", VERB_ANY, VERB_ANY, 0, verb_help }, - { "status", VERB_ANY, VERB_ANY, VERB_DEFAULT, verb_status }, - { "query", 2, VERB_ANY, 0, verb_query }, - { "service", 2, 4, 0, verb_service }, - { "openpgp", 2, VERB_ANY, 0, verb_openpgp }, - { "tlsa", 2, VERB_ANY, 0, verb_tlsa }, - { "statistics", VERB_ANY, 1, 0, verb_show_statistics }, - { "reset-statistics", VERB_ANY, 1, 0, verb_reset_statistics }, - { "flush-caches", VERB_ANY, 1, 0, verb_flush_caches }, - { "reset-server-features", VERB_ANY, 1, 0, verb_reset_server_features }, - { "dns", VERB_ANY, VERB_ANY, 0, verb_dns }, - { "domain", VERB_ANY, VERB_ANY, 0, verb_domain }, - { "default-route", VERB_ANY, 3, 0, verb_default_route }, - { "llmnr", VERB_ANY, 3, 0, verb_llmnr }, - { "mdns", VERB_ANY, 3, 0, verb_mdns }, - { "dnsovertls", VERB_ANY, 3, 0, verb_dns_over_tls }, - { "dnssec", VERB_ANY, 3, 0, verb_dnssec }, - { "nta", VERB_ANY, VERB_ANY, 0, verb_nta }, - { "revert", VERB_ANY, 2, 0, verb_revert_link }, - { "log-level", VERB_ANY, 2, 0, verb_log_level }, - { "monitor", VERB_ANY, 1, 0, verb_monitor }, - { "show-cache", VERB_ANY, 1, 0, verb_show_cache }, - { "show-server-state", VERB_ANY, 1, 0, verb_show_server_state }, - {} - }; - - return dispatch_verb(argc, argv, verbs, /* userdata= */ NULL); -} - -static int translate(const char *verb, const char *single_arg, size_t num_args, char **args) { +static int translate(const char *verb, const char *single_arg, char **args) { char **fake, **p; size_t num; assert(verb); - assert(num_args == 0 || args); - num = !!single_arg + num_args + 1; + num = !!single_arg + strv_length(args) + 1; p = fake = newa0(char *, num + 1); *p++ = (char *) verb; if (single_arg) *p++ = (char *) single_arg; - FOREACH_ARRAY(arg, args, num_args) - *p++ = *arg; + STRV_FOREACH(a, args) + *p++ = *a; - optind = 0; - return native_main((int) num, fake); + return dispatch_verb_with_args(fake, /* userdata= */ NULL); } -static int compat_main(int argc, char *argv[]) { +static int compat_main(char **args) { int r = 0; switch (arg_mode) { case MODE_RESOLVE_HOST: case MODE_RESOLVE_RECORD: - return translate("query", NULL, argc - optind, argv + optind); + return translate("query", NULL, args); case MODE_RESOLVE_SERVICE: - return translate("service", NULL, argc - optind, argv + optind); + return translate("service", NULL, args); case MODE_RESOLVE_OPENPGP: - return translate("openpgp", NULL, argc - optind, argv + optind); + return translate("openpgp", NULL, args); case MODE_RESOLVE_TLSA: - return translate("tlsa", arg_service_family, argc - optind, argv + optind); + return translate("tlsa", arg_service_family, args); case MODE_STATISTICS: - return translate("statistics", NULL, 0, NULL); + return translate("statistics", NULL, NULL); case MODE_RESET_STATISTICS: - return translate("reset-statistics", NULL, 0, NULL); + return translate("reset-statistics", NULL, NULL); case MODE_FLUSH_CACHES: - return translate("flush-caches", NULL, 0, NULL); + return translate("flush-caches", NULL, NULL); case MODE_RESET_SERVER_FEATURES: - return translate("reset-server-features", NULL, 0, NULL); + return translate("reset-server-features", NULL, NULL); case MODE_STATUS: - return translate("status", NULL, argc - optind, argv + optind); + return translate("status", NULL, args); case MODE_SET_LINK: assert(arg_ifname); if (arg_disable_default_route) { - r = translate("default-route", arg_ifname, 1, STRV_MAKE("no")); + r = translate("default-route", arg_ifname, STRV_MAKE("no")); if (r < 0) return r; } if (arg_set_dns) { - r = translate("dns", arg_ifname, strv_length(arg_set_dns), arg_set_dns); + r = translate("dns", arg_ifname, arg_set_dns); if (r < 0) return r; } if (arg_set_domain) { - r = translate("domain", arg_ifname, strv_length(arg_set_domain), arg_set_domain); + r = translate("domain", arg_ifname, arg_set_domain); if (r < 0) return r; } if (arg_set_nta) { - r = translate("nta", arg_ifname, strv_length(arg_set_nta), arg_set_nta); + r = translate("nta", arg_ifname, arg_set_nta); if (r < 0) return r; } if (arg_set_llmnr) { - r = translate("llmnr", arg_ifname, 1, (char **) &arg_set_llmnr); + r = translate("llmnr", arg_ifname, STRV_MAKE(arg_set_llmnr)); if (r < 0) return r; } if (arg_set_mdns) { - r = translate("mdns", arg_ifname, 1, (char **) &arg_set_mdns); + r = translate("mdns", arg_ifname, STRV_MAKE(arg_set_mdns)); if (r < 0) return r; } if (arg_set_dns_over_tls) { - r = translate("dnsovertls", arg_ifname, 1, (char **) &arg_set_dns_over_tls); + r = translate("dnsovertls", arg_ifname, STRV_MAKE(arg_set_dns_over_tls)); if (r < 0) return r; } if (arg_set_dnssec) { - r = translate("dnssec", arg_ifname, 1, (char **) &arg_set_dnssec); + r = translate("dnssec", arg_ifname, STRV_MAKE(arg_set_dnssec)); if (r < 0) return r; } @@ -4025,7 +3854,7 @@ static int compat_main(int argc, char *argv[]) { case MODE_REVERT_LINK: assert(arg_ifname); - return translate("revert", arg_ifname, 0, NULL); + return translate("revert", arg_ifname, NULL); case _MODE_INVALID: assert_not_reached(); @@ -4035,6 +3864,7 @@ static int compat_main(int argc, char *argv[]) { } static int run(int argc, char **argv) { + char **args = NULL; bool compat = false; int r; @@ -4046,16 +3876,16 @@ static int run(int argc, char **argv) { r = resolvconf_parse_argv(argc, argv); } else if (invoked_as(argv, "systemd-resolve")) { compat = true; - r = compat_parse_argv(argc, argv); + r = compat_parse_argv(argc, argv, &args); } else - r = native_parse_argv(argc, argv); + r = native_parse_argv(argc, argv, &args); if (r <= 0) return r; if (compat) - return compat_main(argc, argv); + return compat_main(args); - return native_main(argc, argv); + return dispatch_verb_with_args(args, /* userdata= */ NULL); } DEFINE_MAIN_FUNCTION(run); From 366071143d60987f44d6173730ffc3fb875fd804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 13:54:12 +0200 Subject: [PATCH 183/242] resolvconf-compat: convert to OPTION macros Use the "resolvconf" namespace to keep these options separate from the resolvectl/systemd-resolve sets. Co-developed-by: Claude Opus 4.7 --- src/resolve/resolvconf-compat.c | 131 +++++++++++++------------------- 1 file changed, 53 insertions(+), 78 deletions(-) diff --git a/src/resolve/resolvconf-compat.c b/src/resolve/resolvconf-compat.c index d8e68d524a094..d3ddec755a244 100644 --- a/src/resolve/resolvconf-compat.c +++ b/src/resolve/resolvconf-compat.c @@ -1,14 +1,15 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include #include "alloc-util.h" #include "build.h" #include "extract-word.h" #include "fileio.h" +#include "format-table.h" +#include "help-util.h" #include "log.h" -#include "pretty-print.h" +#include "options.h" #include "resolvconf-compat.h" #include "resolvectl.h" #include "string-util.h" @@ -21,36 +22,32 @@ typedef enum LookupType { } LookupType; static int resolvconf_help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *options = NULL; int r; - r = terminal_urlify_man("resolvectl", "1", &link); + r = option_parser_get_help_table_ns("resolvconf", &options); if (r < 0) - return log_oom(); - - printf("%1$s -a INTERFACE < FILE\n" - "%1$s -d INTERFACE\n" - "\n" - "Register DNS server and domain configuration with systemd-resolved.\n\n" - " -h --help Show this help\n" - " --version Show package version\n" - " -a Register per-interface DNS server and domain data\n" - " -d Unregister per-interface DNS server and domain data\n" - " -p Do not use this interface as default route\n" - " -f Ignore if specified interface does not exist\n" - " -x Send DNS traffic preferably over this interface\n" - "\n" + return r; + + help_cmdline("-a INTERFACE = 0); assert(argv); @@ -204,89 +182,86 @@ int resolvconf_parse_argv(int argc, char *argv[]) { arg_mode = _MODE_INVALID; - while ((c = getopt_long(argc, argv, "hadxpfm:uIi:l:Rr:vV", options, NULL)) >= 0) + OptionParser opts = { argc, argv, .namespace = "resolvconf" }; + + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_NAMESPACE("resolvconf"): {} + + OPTION_COMMON_HELP: return resolvconf_help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); /* -a and -d is what everybody can agree on */ - case 'a': + OPTION_SHORT('a', NULL, "Register per-interface DNS server and domain data"): arg_mode = MODE_SET_LINK; break; - case 'd': + OPTION_SHORT('d', NULL, "Unregister per-interface DNS server and domain data"): arg_mode = MODE_REVERT_LINK; break; - /* The exclusive/private/force stuff is an openresolv invention, we support in some skewed way */ - case 'x': - lookup_type = LOOKUP_TYPE_EXCLUSIVE; - break; - - case 'p': + OPTION_SHORT('p', NULL, "Do not use this interface as default route"): lookup_type = LOOKUP_TYPE_PRIVATE; break; - case 'f': + OPTION_SHORT('f', NULL, "Ignore if specified interface does not exist"): arg_ifindex_permissive = true; break; + /* The exclusive/private/force stuff is an openresolv invention, we support in some skewed way */ + OPTION_SHORT('x', NULL, "Send DNS traffic preferably over this interface"): + lookup_type = LOOKUP_TYPE_EXCLUSIVE; + break; + /* The metrics stuff is an openresolv invention we ignore (and don't really need) */ - case 'm': - log_debug("Switch -%c ignored.", c); + OPTION_SHORT('m', "ARG", /* help= */ NULL): + log_debug("Switch -%c ignored.", opts.opt->short_code); break; /* -u supposedly should "update all subscribers". We have no subscribers, hence let's make this a NOP, and exit immediately, cleanly. */ - case 'u': - log_info("Switch -%c ignored.", c); + OPTION_SHORT('u', NULL, /* help= */ NULL): + log_info("Switch -%c ignored.", opts.opt->short_code); return 0; /* The following options are openresolv inventions we don't support. */ - case 'I': - case 'i': - case 'l': - case 'R': - case 'r': - case 'v': - case 'V': + OPTION_SHORT('I', NULL, /* help= */ NULL): {} + OPTION_SHORT('i', "ARG", /* help= */ NULL): {} + OPTION_SHORT('l', "ARG", /* help= */ NULL): {} + OPTION_SHORT('R', NULL, /* help= */ NULL): {} + OPTION_SHORT('r', "ARG", /* help= */ NULL): {} + OPTION_SHORT('v', NULL, /* help= */ NULL): {} + OPTION_SHORT('V', NULL, /* help= */ NULL): return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Switch -%c not supported.", c); + "Switch -%c not supported.", opts.opt->short_code); /* The Debian resolvconf commands we don't support. */ - case ARG_ENABLE_UPDATES: + OPTION_LONG("enable-updates", NULL, /* help= */ NULL): return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Switch --enable-updates not supported."); - case ARG_DISABLE_UPDATES: + OPTION_LONG("disable-updates", NULL, /* help= */ NULL): return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Switch --disable-updates not supported."); - case ARG_UPDATES_ARE_ENABLED: + OPTION_LONG("updates-are-enabled", NULL, /* help= */ NULL): return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Switch --updates-are-enabled not supported."); - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } if (arg_mode == _MODE_INVALID) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected either -a or -d on the command line."); - if (optind+1 != argc) + if (option_parser_get_n_args(&opts) != 1) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected interface name as argument."); - r = ifname_resolvconf_mangle(argv[optind]); + r = ifname_resolvconf_mangle(option_parser_get_arg(&opts, 0)); if (r <= 0) return r; - optind++; if (arg_mode == MODE_SET_LINK) { r = parse_stdin(lookup_type); From 62a489fbf97ae3cbaf36cc1b2ad5260032423385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 21:43:32 +0200 Subject: [PATCH 184/242] tree-wide: rename unref_and_replace_full to unref_and_replace_new_ref We have a number of *_unref_and_replace macros. One could think that they are like the various free_and_replace variants, but they actually create a new ref to the passed object. The free_and_replace variants take ownership of the argument. This inconsistency is surprising. Rename all those functions to have "_new_ref" at the end to make the difference clear. --- src/basic/cleanup-util.h | 14 +++++++------- src/libsystemd-network/dhcp-lease-internal.h | 4 ++-- src/libsystemd-network/sd-dhcp-client.c | 6 +++--- src/libsystemd-network/sd-dhcp6-client.c | 2 +- src/libsystemd/sd-device/device-util.h | 4 ++-- src/network/networkd-dhcp6.c | 2 +- src/network/networkd-link.c | 2 +- src/network/networkd-wiphy.c | 4 ++-- src/shared/blockdev-util.c | 2 +- src/udev/udev-manager.c | 2 +- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/basic/cleanup-util.h b/src/basic/cleanup-util.h index 041c37530aaa9..e3a2ade4ed984 100644 --- a/src/basic/cleanup-util.h +++ b/src/basic/cleanup-util.h @@ -19,13 +19,13 @@ typedef void* (*mfree_func_t)(void *p); /* This is similar to free_and_replace_full(), but NULL is not assigned to 'b', and its reference counter is * increased. */ -#define unref_and_replace_full(a, b, ref_func, unref_func) \ - ({ \ - typeof(a)* _a = &(a); \ - typeof(b) _b = ref_func(b); \ - unref_func(*_a); \ - *_a = _b; \ - 0; \ +#define unref_and_replace_new_ref(a, b, ref_func, unref_func) \ + ({ \ + typeof(a)* _a = &(a); \ + typeof(b) _b = ref_func(b); \ + unref_func(*_a); \ + *_a = _b; \ + 0; \ }) #define _DEFINE_TRIVIAL_REF_FUNC(type, name, scope) \ diff --git a/src/libsystemd-network/dhcp-lease-internal.h b/src/libsystemd-network/dhcp-lease-internal.h index 744580fc48a88..1eab7f89b616c 100644 --- a/src/libsystemd-network/dhcp-lease-internal.h +++ b/src/libsystemd-network/dhcp-lease-internal.h @@ -97,5 +97,5 @@ void dhcp_lease_set_timestamp(sd_dhcp_lease *lease, const triple_timestamp *time int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease); int dhcp_lease_set_client_id(sd_dhcp_lease *lease, const sd_dhcp_client_id *client_id); -#define dhcp_lease_unref_and_replace(a, b) \ - unref_and_replace_full(a, b, sd_dhcp_lease_ref, sd_dhcp_lease_unref) +#define dhcp_lease_unref_and_replace_new_ref(a, b) \ + unref_and_replace_new_ref(a, b, sd_dhcp_lease_ref, sd_dhcp_lease_unref) diff --git a/src/libsystemd-network/sd-dhcp-client.c b/src/libsystemd-network/sd-dhcp-client.c index 9742ab833bd5b..922dd5881ca9e 100644 --- a/src/libsystemd-network/sd-dhcp-client.c +++ b/src/libsystemd-network/sd-dhcp-client.c @@ -1594,7 +1594,7 @@ static int client_handle_offer_or_rapid_ack(sd_dhcp_client *client, DHCPMessage dhcp_lease_set_timestamp(lease, timestamp); - dhcp_lease_unref_and_replace(client->lease, lease); + dhcp_lease_unref_and_replace_new_ref(client->lease, lease); if (client->lease->rapid_commit) { log_dhcp_client(client, "ACK"); @@ -1678,7 +1678,7 @@ static int client_handle_ack(sd_dhcp_client *client, DHCPMessage *message, size_ else r = SD_DHCP_CLIENT_EVENT_IP_CHANGE; - dhcp_lease_unref_and_replace(client->lease, lease); + dhcp_lease_unref_and_replace_new_ref(client->lease, lease); log_dhcp_client(client, "ACK"); return r; @@ -2281,7 +2281,7 @@ sd_event* sd_dhcp_client_get_event(sd_dhcp_client *client) { int sd_dhcp_client_attach_device(sd_dhcp_client *client, sd_device *dev) { assert_return(client, -EINVAL); - return device_unref_and_replace(client->dev, dev); + return device_unref_and_replace_new_ref(client->dev, dev); } static sd_dhcp_client* dhcp_client_free(sd_dhcp_client *client) { diff --git a/src/libsystemd-network/sd-dhcp6-client.c b/src/libsystemd-network/sd-dhcp6-client.c index ee67664364c9f..5e71d63de5a43 100644 --- a/src/libsystemd-network/sd-dhcp6-client.c +++ b/src/libsystemd-network/sd-dhcp6-client.c @@ -1533,7 +1533,7 @@ sd_event *sd_dhcp6_client_get_event(sd_dhcp6_client *client) { int sd_dhcp6_client_attach_device(sd_dhcp6_client *client, sd_device *dev) { assert_return(client, -EINVAL); - return device_unref_and_replace(client->dev, dev); + return device_unref_and_replace_new_ref(client->dev, dev); } static sd_dhcp6_client *dhcp6_client_free(sd_dhcp6_client *client) { diff --git a/src/libsystemd/sd-device/device-util.h b/src/libsystemd/sd-device/device-util.h index 3bbe321f61649..e4350a679b690 100644 --- a/src/libsystemd/sd-device/device-util.h +++ b/src/libsystemd/sd-device/device-util.h @@ -6,8 +6,8 @@ #include "sd-forward.h" #include "log.h" -#define device_unref_and_replace(a, b) \ - unref_and_replace_full(a, b, sd_device_ref, sd_device_unref) +#define device_unref_and_replace_new_ref(a, b) \ + unref_and_replace_new_ref(a, b, sd_device_ref, sd_device_unref) #define FOREACH_DEVICE_PROPERTY(device, key, value) \ for (const char *value, *key = sd_device_get_property_first(device, &value); \ diff --git a/src/network/networkd-dhcp6.c b/src/network/networkd-dhcp6.c index c230a86587464..799caeb15b312 100644 --- a/src/network/networkd-dhcp6.c +++ b/src/network/networkd-dhcp6.c @@ -364,7 +364,7 @@ static int dhcp6_lease_information_acquired(sd_dhcp6_client *client, Link *link) if (r < 0) return log_link_error_errno(link, r, "Failed to get DHCPv6 lease: %m"); - unref_and_replace_full(link->dhcp6_lease, lease, sd_dhcp6_lease_ref, sd_dhcp6_lease_unref); + unref_and_replace_new_ref(link->dhcp6_lease, lease, sd_dhcp6_lease_ref, sd_dhcp6_lease_unref); link_dirty(link); return 0; diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c index c6bc8fc4b1c4d..dcd081039bf55 100644 --- a/src/network/networkd-link.c +++ b/src/network/networkd-link.c @@ -1678,7 +1678,7 @@ static int link_initialized(Link *link, sd_device *device) { /* Always replace with the new sd_device object. As the sysname (and possibly other properties * or sysattrs) may be outdated. */ - device_unref_and_replace(link->dev, device); + device_unref_and_replace_new_ref(link->dev, device); r = link_managed_by_us(link); if (r <= 0) diff --git a/src/network/networkd-wiphy.c b/src/network/networkd-wiphy.c index 1dde69a43b44d..f715f244d6b29 100644 --- a/src/network/networkd-wiphy.c +++ b/src/network/networkd-wiphy.c @@ -469,7 +469,7 @@ int manager_udev_process_wiphy(Manager *m, sd_device *device, sd_device_action_t return 0; } - return device_unref_and_replace(w->dev, action == SD_DEVICE_REMOVE ? NULL : device); + return device_unref_and_replace_new_ref(w->dev, action == SD_DEVICE_REMOVE ? NULL : device); } int manager_udev_process_rfkill(Manager *m, sd_device *device, sd_device_action_t action) { @@ -501,5 +501,5 @@ int manager_udev_process_rfkill(Manager *m, sd_device *device, sd_device_action_ return 0; } - return device_unref_and_replace(w->rfkill, action == SD_DEVICE_REMOVE ? NULL : device); + return device_unref_and_replace_new_ref(w->rfkill, action == SD_DEVICE_REMOVE ? NULL : device); } diff --git a/src/shared/blockdev-util.c b/src/shared/blockdev-util.c index 12a4f59c28a69..83eb67c54152d 100644 --- a/src/shared/blockdev-util.c +++ b/src/shared/blockdev-util.c @@ -207,7 +207,7 @@ int block_device_new_from_fd(int fd, BlockDeviceLookupFlags flags, sd_device **r r = block_device_get_originating(dev_whole_disk, &dev_origin, /* recursive= */ false); if (r >= 0) - device_unref_and_replace(dev, dev_origin); + device_unref_and_replace_new_ref(dev, dev_origin); else if (r != -ENOENT) return r; } diff --git a/src/udev/udev-manager.c b/src/udev/udev-manager.c index 7c2530f17fec8..44d2ee6400362 100644 --- a/src/udev/udev-manager.c +++ b/src/udev/udev-manager.c @@ -683,7 +683,7 @@ static void event_find_blocker(Event *event) { log_device_debug(event->dev, "SEQNUM=%" PRIu64 " blocked by SEQNUM=%" PRIu64, event->seqnum, e->seqnum); - unref_and_replace_full(event->blocker, e, event_ref, event_unref); + unref_and_replace_new_ref(event->blocker, e, event_ref, event_unref); return; } From f3ee3510a3ce67bcde3016c6f26faf3a6a9f5150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 22:34:11 +0200 Subject: [PATCH 185/242] sd-dhcp-client: avoid taking and dropping a reference The helper would create a new ref, even though we had one handy and didn't need to create a new ref. So change the helper to take an existing reference. --- src/libsystemd-network/dhcp-lease-internal.h | 4 ++-- src/libsystemd-network/sd-dhcp-client.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libsystemd-network/dhcp-lease-internal.h b/src/libsystemd-network/dhcp-lease-internal.h index 1eab7f89b616c..3f5d638d67d39 100644 --- a/src/libsystemd-network/dhcp-lease-internal.h +++ b/src/libsystemd-network/dhcp-lease-internal.h @@ -97,5 +97,5 @@ void dhcp_lease_set_timestamp(sd_dhcp_lease *lease, const triple_timestamp *time int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease); int dhcp_lease_set_client_id(sd_dhcp_lease *lease, const sd_dhcp_client_id *client_id); -#define dhcp_lease_unref_and_replace_new_ref(a, b) \ - unref_and_replace_new_ref(a, b, sd_dhcp_lease_ref, sd_dhcp_lease_unref) +#define dhcp_lease_unref_and_replace(a, b) \ + free_and_replace_full(a, b, sd_dhcp_lease_unref) diff --git a/src/libsystemd-network/sd-dhcp-client.c b/src/libsystemd-network/sd-dhcp-client.c index 922dd5881ca9e..c6a4d02f965a4 100644 --- a/src/libsystemd-network/sd-dhcp-client.c +++ b/src/libsystemd-network/sd-dhcp-client.c @@ -1594,7 +1594,7 @@ static int client_handle_offer_or_rapid_ack(sd_dhcp_client *client, DHCPMessage dhcp_lease_set_timestamp(lease, timestamp); - dhcp_lease_unref_and_replace_new_ref(client->lease, lease); + dhcp_lease_unref_and_replace(client->lease, lease); if (client->lease->rapid_commit) { log_dhcp_client(client, "ACK"); @@ -1678,7 +1678,7 @@ static int client_handle_ack(sd_dhcp_client *client, DHCPMessage *message, size_ else r = SD_DHCP_CLIENT_EVENT_IP_CHANGE; - dhcp_lease_unref_and_replace_new_ref(client->lease, lease); + dhcp_lease_unref_and_replace(client->lease, lease); log_dhcp_client(client, "ACK"); return r; From f2361469e41454c573200c562b9d89481205f76b Mon Sep 17 00:00:00 2001 From: Dirga Yuza Date: Fri, 8 May 2026 07:10:40 +0700 Subject: [PATCH 186/242] hwdb: add force-release to Nitro AN515-58 backlight keys This fixes an incomplete mapping introduced in PR #39769 for the Acer Nitro 5 AN515-58. The previous PR mapped the physical keyboard backlight keys (scancodes `0xef` and `0xf0`) to `kbdillumup` and `kbdillumdown` to prevent them from dropping screen brightness. However, the embedded controller on this Acer model only emits "make" (press) scancodes and fails to emit "break" (release) scancodes for these specific keys. Without a release event, the input subsystem registers the keys as continously held down (auto-repeat). In desktop environments like KDE Plasma, pressing the key once causes the brightness UI slider to get stuck in an infinite adjustment loop. This issue is previously unnoticed as this model did not expose any keyboard backlight control. The fix is done by prepending the `!` (force-release) flag to the keycodes. This instructs `evdev` to synthesize a key release event. The fix is verified locally on an Acer Nitro AN515-58. `evtest` now correctly reports `value 1` immediately followed by `value` 0, and KDE Plasma brightness OSD no longer gets stuck. Signed-off-by: Dirga Yuza --- hwdb.d/60-keyboard.hwdb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hwdb.d/60-keyboard.hwdb b/hwdb.d/60-keyboard.hwdb index 8c1605930da97..ebc08560fe9e8 100644 --- a/hwdb.d/60-keyboard.hwdb +++ b/hwdb.d/60-keyboard.hwdb @@ -237,8 +237,8 @@ evdev:atkbd:dmi:bvn*:bvr*:bd*:svnAcer*:pnNitro*AN*515-47:pvr* # Nitro AN515-58 evdev:atkbd:dmi:bvn*:bvr*:bd*:svnAcer*:pnNitro*AN*515-58:pvr* - KEYBOARD_KEY_ef=kbdillumup # Fn+F10 - KEYBOARD_KEY_f0=kbdillumdown # Fn+F9 + KEYBOARD_KEY_ef=!kbdillumup # Fn+F10 + KEYBOARD_KEY_f0=!kbdillumdown # Fn+F9 KEYBOARD_KEY_8a=micmute # Microphone mute button KEYBOARD_KEY_55=power From 88d7e4439216b1ad2e3ba061dcf6ef99d9af9574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 22:53:27 +0200 Subject: [PATCH 187/242] Add json_variant_unref_and_replace and use it where appropriate JSON_VARIANT_REPLACE is similar, but doesn't try to nullify the second arg, so it can be used with an inline expression. --- src/home/homectl.c | 8 ++----- src/home/user-record-util.c | 32 +++++--------------------- src/libsystemd/sd-json/json-util.h | 13 +++++++---- src/libsystemd/sd-json/sd-json.c | 16 ++++++------- src/libsystemd/sd-json/test-json.c | 3 +-- src/libsystemd/sd-varlink/sd-varlink.c | 5 +--- src/resolve/resolvectl.c | 6 ++--- src/resolve/resolved-manager.c | 2 +- src/shared/user-record.c | 6 ++--- 9 files changed, 30 insertions(+), 61 deletions(-) diff --git a/src/home/homectl.c b/src/home/homectl.c index ce54da0d8c2b7..80ee6e2155a94 100644 --- a/src/home/homectl.c +++ b/src/home/homectl.c @@ -1038,8 +1038,7 @@ static int apply_identity_changes(sd_json_variant **_v) { if (r < 0) return log_error_errno(r, "Failed to allocate new perMachine array: %m"); - sd_json_variant_unref(per_machine); - per_machine = TAKE_PTR(npm); + json_variant_unref_and_replace(per_machine, npm); } else { _cleanup_(sd_json_variant_unrefp) sd_json_variant *positive = sd_json_variant_ref(arg_identity_extra_this_machine), *negative = sd_json_variant_ref(arg_identity_extra_other_machines); @@ -1122,10 +1121,7 @@ static int apply_identity_changes(sd_json_variant **_v) { } } - sd_json_variant_unref(*_v); - *_v = TAKE_PTR(v); - - return 0; + return json_variant_unref_and_replace(*_v, v); } static int add_disposition(sd_json_variant **v) { diff --git a/src/home/user-record-util.c b/src/home/user-record-util.c index b27d993922a60..b419a85b6842d 100644 --- a/src/home/user-record-util.c +++ b/src/home/user-record-util.c @@ -376,8 +376,7 @@ int user_record_add_binding( if (r < 0) return r; - sd_json_variant_unref(new_binding_entry); - new_binding_entry = TAKE_PTR(be); + json_variant_unref_and_replace(new_binding_entry, be); } } @@ -787,12 +786,8 @@ int user_record_update_last_changed(UserRecord *h, bool with_password) { } h->last_change_usec = n; - - sd_json_variant_unref(h->json); - h->json = TAKE_PTR(v); - h->mask |= USER_RECORD_REGULAR; - return 0; + return json_variant_unref_and_replace(h->json, v); } int user_record_make_hashed_password(UserRecord *h, char **secret, bool extend) { @@ -1131,12 +1126,8 @@ int user_record_set_password_change_now(UserRecord *h, int b) { SET_FLAG(h->mask, USER_RECORD_PER_MACHINE, !sd_json_variant_is_blank_array(array)); } - sd_json_variant_unref(h->json); - h->json = TAKE_PTR(w); - h->password_change_now = b; - - return 0; + return json_variant_unref_and_replace(h->json, w); } int user_record_merge_secret(UserRecord *h, UserRecord *secret) { @@ -1227,14 +1218,10 @@ int user_record_good_authentication(UserRecord *h) { if (r < 0) return r; - sd_json_variant_unref(h->json); - h->json = TAKE_PTR(v); - h->good_authentication_counter = counter; h->last_good_authentication_usec = usec; - h->mask |= USER_RECORD_STATUS; - return 0; + return json_variant_unref_and_replace(h->json, v); } int user_record_bad_authentication(UserRecord *h) { @@ -1282,14 +1269,10 @@ int user_record_bad_authentication(UserRecord *h) { if (r < 0) return r; - sd_json_variant_unref(h->json); - h->json = TAKE_PTR(v); - h->bad_authentication_counter = counter; h->last_bad_authentication_usec = usec; - h->mask |= USER_RECORD_STATUS; - return 0; + return json_variant_unref_and_replace(h->json, v); } int user_record_ratelimit(UserRecord *h) { @@ -1344,13 +1327,10 @@ int user_record_ratelimit(UserRecord *h) { if (r < 0) return r; - sd_json_variant_unref(h->json); - h->json = TAKE_PTR(v); - h->ratelimit_begin_usec = new_ratelimit_begin_usec; h->ratelimit_count = new_ratelimit_count; - h->mask |= USER_RECORD_STATUS; + json_variant_unref_and_replace(h->json, v); return 1; } diff --git a/src/libsystemd/sd-json/json-util.h b/src/libsystemd/sd-json/json-util.h index 0db1e445e62ac..6f06f6fb63500 100644 --- a/src/libsystemd/sd-json/json-util.h +++ b/src/libsystemd/sd-json/json-util.h @@ -9,14 +9,17 @@ #include "sd-forward.h" #include "string-util.h" /* IWYU pragma: keep */ -#define JSON_VARIANT_REPLACE(v, q) \ - do { \ - typeof(v)* _v = &(v); \ - typeof(q) _q = (q); \ +#define JSON_VARIANT_REPLACE(v, q) \ + do { \ + typeof(v)* _v = &(v); \ + typeof(q) _q = (q); \ sd_json_variant_unref(*_v); \ - *_v = _q; \ + *_v = _q; \ } while(false) +#define json_variant_unref_and_replace(a, b) \ + free_and_replace_full(a, b, sd_json_variant_unref) + static inline int json_variant_set_field_non_null(sd_json_variant **v, const char *field, sd_json_variant *value) { return value && !sd_json_variant_is_null(value) ? sd_json_variant_set_field(v, field, value) : 0; } diff --git a/src/libsystemd/sd-json/sd-json.c b/src/libsystemd/sd-json/sd-json.c index 659dffb2bac7e..fe8b8225c96d3 100644 --- a/src/libsystemd/sd-json/sd-json.c +++ b/src/libsystemd/sd-json/sd-json.c @@ -1974,7 +1974,7 @@ _public_ int sd_json_variant_filter(sd_json_variant **v, char **to_remove) { return r; json_variant_propagate_sensitive(*v, w); - JSON_VARIANT_REPLACE(*v, TAKE_PTR(w)); + json_variant_unref_and_replace(*v, w); return (int) n; } @@ -2043,7 +2043,7 @@ _public_ int sd_json_variant_set_field(sd_json_variant **v, const char *field, s return r; json_variant_propagate_sensitive(*v, w); - JSON_VARIANT_REPLACE(*v, TAKE_PTR(w)); + json_variant_unref_and_replace(*v, w); return 1; } @@ -2183,7 +2183,7 @@ _public_ int sd_json_variant_merge_object(sd_json_variant **v, sd_json_variant * json_variant_propagate_sensitive(*v, w); json_variant_propagate_sensitive(m, w); - JSON_VARIANT_REPLACE(*v, TAKE_PTR(w)); + json_variant_unref_and_replace(*v, w); return 1; } @@ -2262,9 +2262,7 @@ _public_ int sd_json_variant_append_array(sd_json_variant **v, sd_json_variant * } json_variant_propagate_sensitive(*v, nv); - JSON_VARIANT_REPLACE(*v, TAKE_PTR(nv)); - - return 0; + return json_variant_unref_and_replace(*v, nv); } _public_ int sd_json_variant_append_arrayb(sd_json_variant **v, ...) { @@ -2511,7 +2509,7 @@ static int json_variant_set_source(sd_json_variant **v, JsonSource *source, unsi w->line = line; w->column = column; - JSON_VARIANT_REPLACE(*v, w); + json_variant_unref_and_replace(*v, w); return 1; } @@ -5855,7 +5853,7 @@ _public_ int sd_json_variant_sort(sd_json_variant **v) { if (!n->sorted) /* Check if this worked. This will fail if there are multiple identical keys used. */ return -ENOTUNIQ; - JSON_VARIANT_REPLACE(*v, TAKE_PTR(n)); + json_variant_unref_and_replace(*v, n); return 1; } @@ -5910,7 +5908,7 @@ _public_ int sd_json_variant_normalize(sd_json_variant **v) { goto finish; } - JSON_VARIANT_REPLACE(*v, TAKE_PTR(n)); + json_variant_unref_and_replace(*v, n); r = 1; diff --git a/src/libsystemd/sd-json/test-json.c b/src/libsystemd/sd-json/test-json.c index 1785ef416f5b1..3be4b09660b14 100644 --- a/src/libsystemd/sd-json/test-json.c +++ b/src/libsystemd/sd-json/test-json.c @@ -548,8 +548,7 @@ TEST(depth) { assert_se(r >= 0); - sd_json_variant_unref(v); - v = TAKE_PTR(w); + json_variant_unref_and_replace(v, w); } sd_json_variant_dump(v, 0, stdout, NULL); diff --git a/src/libsystemd/sd-varlink/sd-varlink.c b/src/libsystemd/sd-varlink/sd-varlink.c index 2a5f677ef37d0..8e43e38800bde 100644 --- a/src/libsystemd/sd-varlink/sd-varlink.c +++ b/src/libsystemd/sd-varlink/sd-varlink.c @@ -772,10 +772,7 @@ static int varlink_sanitize_incoming_parameters(sd_json_variant **v) { r = sd_json_variant_new_object(&empty, NULL, 0); if (r < 0) return r; - /* sd_json_variant_unref() is a NOP if *v is NULL */ - sd_json_variant_unref(*v); - *v = TAKE_PTR(empty); - return 0; + return json_variant_unref_and_replace(*v, empty); } /* Ensure we have an object */ diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c index 015345fdba7d9..e8ae7c5412acc 100644 --- a/src/resolve/resolvectl.c +++ b/src/resolve/resolvectl.c @@ -1251,8 +1251,7 @@ static int status_json_filter_links(sd_json_variant **configuration, char **link return r; } - JSON_VARIANT_REPLACE(*configuration, TAKE_PTR(v)); - return 0; + return json_variant_unref_and_replace(*configuration, v); } static int status_json_filter_fields(sd_json_variant **configuration, StatusMode mode) { @@ -1281,8 +1280,7 @@ static int status_json_filter_fields(sd_json_variant **configuration, StatusMode return r; } - JSON_VARIANT_REPLACE(*configuration, TAKE_PTR(v)); - return 0; + return json_variant_unref_and_replace(*configuration, v); } static int format_dns_server_one(DNSConfiguration *configuration, DNSServer *s, char **ret) { diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c index 0a52e922c4ff5..d7d707726587d 100644 --- a/src/resolve/resolved-manager.c +++ b/src/resolve/resolved-manager.c @@ -2335,7 +2335,7 @@ int manager_send_dns_configuration_changed(Manager *m, Link *l, bool reset) { if (sd_json_variant_equal(configuration, m->dns_configuration_json)) return 0; - JSON_VARIANT_REPLACE(m->dns_configuration_json, TAKE_PTR(configuration)); + json_variant_unref_and_replace(m->dns_configuration_json, configuration); r = varlink_many_notify(m->varlink_dns_configuration_subscription, m->dns_configuration_json); if (r < 0) diff --git a/src/shared/user-record.c b/src/shared/user-record.c index 191870c8ea62d..7c64171bf58ec 100644 --- a/src/shared/user-record.c +++ b/src/shared/user-record.c @@ -2374,8 +2374,7 @@ static int remove_self_modifiable_json_fields_common(UserRecord *current, sd_jso return r; } - JSON_VARIANT_REPLACE(*target, TAKE_PTR(v)); - return 0; + return json_variant_unref_and_replace(*target, v); } static int remove_self_modifiable_json_fields(UserRecord *current, UserRecord *h, sd_json_variant **ret) { @@ -2460,8 +2459,7 @@ static int remove_self_modifiable_json_fields(UserRecord *current, UserRecord *h return r; } - JSON_VARIANT_REPLACE(*ret, TAKE_PTR(v)); - return 0; + return json_variant_unref_and_replace(*ret, v); } int user_record_self_changes_allowed(UserRecord *current, UserRecord *incoming) { From 576ca62b9b1b76c5621d29b9a3895ab21e645740 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 7 May 2026 22:55:00 +0200 Subject: [PATCH 188/242] userdbctl: split out parse_from_file() parse_from_file doesn't set arg_from_file itself, but returns a sd_json_variant ref to the caller. I think the change of arg_from_file is more readable with this structure. --- src/userdb/userdbctl.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/userdb/userdbctl.c b/src/userdb/userdbctl.c index 6b4371aa509b1..75b30215b1da3 100644 --- a/src/userdb/userdbctl.c +++ b/src/userdb/userdbctl.c @@ -19,6 +19,7 @@ #include "format-util.h" #include "fs-util.h" #include "io-util.h" +#include "json-util.h" #include "log.h" #include "main-func.h" #include "mkdir.h" @@ -1590,6 +1591,30 @@ static int verb_help(int argc, char *argv[], uintptr_t _data, void *userdata) { return help(); } +static int parse_from_file(const char *arg, sd_json_variant **ret) { + sd_json_variant *v = NULL; + int r; + + assert(ret); + + if (!isempty(arg)) { + const char *fn = streq(arg, "-") ? NULL : arg; + unsigned line = 0; + r = sd_json_parse_file( + fn ? NULL : stdin, + fn ?: "", + SD_JSON_PARSE_MUST_BE_OBJECT | SD_JSON_PARSE_SENSITIVE, + &v, + &line, + /* reterr_column= */ NULL); + if (r < 0) + return log_syntax(/* unit= */ NULL, LOG_ERR, fn ?: "", line, r, "JSON parse failure."); + } + + *ret = v; + return 0; +} + static int parse_argv(int argc, char *argv[]) { enum { @@ -1832,20 +1857,13 @@ static int parse_argv(int argc, char *argv[]) { break; case 'F': { - if (isempty(optarg)) { - arg_from_file = sd_json_variant_unref(arg_from_file); - break; - } + sd_json_variant *v = NULL; /* initialization to appease gcc-14 */ - _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - const char *fn = streq(optarg, "-") ? NULL : optarg; - unsigned line = 0; - r = sd_json_parse_file(fn ? NULL : stdin, fn ?: "", SD_JSON_PARSE_MUST_BE_OBJECT|SD_JSON_PARSE_SENSITIVE, &v, &line, /* reterr_column= */ NULL); + r = parse_from_file(optarg, &v); if (r < 0) - return log_syntax(/* unit= */ NULL, LOG_ERR, fn ?: "", line, r, "JSON parse failure."); + return r; - sd_json_variant_unref(arg_from_file); - arg_from_file = TAKE_PTR(v); + json_variant_unref_and_replace(arg_from_file, v); break; } From 244d80bbf85c7a3ac2834b426e9bd5d8c9b73144 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 6 May 2026 19:57:19 +0100 Subject: [PATCH 189/242] test: try to make TEST-04-JOURNAL.journalctl-varlink less flaky The io.systemd.JournalAccess server occasionally returns NoEntries for a unit-filter query right after the unit logged its message, e.g. from a failing CI run: [ 1204.967910] TEST-04-JOURNAL.sh[15025]: ++ varlinkctl call --more /run/systemd/io.systemd.JournalAccess io.systemd.JournalAccess.GetEntries '{"units": ["test-journalctl-varlink-1-13583.service", "test-journalctl-varlink-2-25039.service"]}' [ 1205.017361] journalctl[15026]: varlink-3-3: Received message: {"method":"io.systemd.JournalAccess.GetEntries","parameters":{"units":["test-journalctl-varlink-1-13583.service","test-journalctl-varlink-2-25039.service"]},"more":true} [ 1205.017498] journalctl[15026]: Failed to open journal file /var/log/journal/ce54feb228124e639f3b7779beeaff60/system.journal: No data available [ 1205.017823] journalctl[15026]: varlink-3-3: Sending message: {"error":"io.systemd.JournalAccess.NoEntries"} [ 1205.017936] TEST-04-JOURNAL.sh[15025]: Method call failed: io.systemd.JournalAccess.NoEntries [ 1205.499083] TEST-04-JOURNAL.sh[146]: Subtest /usr/lib/systemd/tests/testdata/units/TEST-04-JOURNAL.journalctl-varlink.sh failed Wrap the calls that expect data in a helper that retries up to 3 times on NoEntries, syncing the journal between attempts. Follow-up for a109189fabe6a4c307528459f891c2d545361622 Co-developed-by: Claude Opus 4.7 --- .../TEST-04-JOURNAL.journalctl-varlink.sh | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/test/units/TEST-04-JOURNAL.journalctl-varlink.sh b/test/units/TEST-04-JOURNAL.journalctl-varlink.sh index 4f86fa2a541ff..d1738487f5df3 100755 --- a/test/units/TEST-04-JOURNAL.journalctl-varlink.sh +++ b/test/units/TEST-04-JOURNAL.journalctl-varlink.sh @@ -5,6 +5,26 @@ set -o pipefail VARLINK_SOCKET="/run/systemd/io.systemd.JournalAccess" +# Wrapper around varlinkctl that retries up to 3 times when the server returns +# NoEntries to avoid spurious flaky failures +varlinkctl_get_entries() { + local output rc + for _ in 1 2 3; do + output="$(varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "$@" 2>&1)" && rc=0 || rc=$? + if [[ $rc -eq 0 ]]; then + printf '%s\n' "$output" + return 0 + fi + if ! grep -q 'io.systemd.JournalAccess.NoEntries' <<<"$output"; then + printf '%s\n' "$output" >&2 + return $rc + fi + journalctl --sync || true + done + printf '%s\n' "$output" >&2 + return $rc +} + # ensure the varlink basics work varlinkctl list-interfaces "$VARLINK_SOCKET" | grep io.systemd.JournalAccess varlinkctl introspect "$VARLINK_SOCKET" | grep "method GetEntries(" @@ -16,18 +36,18 @@ systemd-cat -t "$TAG" -p warning echo "varlink-test-warning" journalctl --sync # most basic call works -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{}' | jq --seq . +varlinkctl_get_entries '{}' | jq --seq . # validate the JSON has some basic properties (similar to journalctls json output) -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{}' | jq --seq '.entry | {MESSAGE, PRIORITY, _UID}' +varlinkctl_get_entries '{}' | jq --seq '.entry | {MESSAGE, PRIORITY, _UID}' # check that default limit works (100), we don't know how many entries we have so we just check # bounds -ENTRIES=$(varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{}' | wc -l) +ENTRIES=$(varlinkctl_get_entries '{}' | wc -l) test "$ENTRIES" -gt 0 test "$ENTRIES" -le 100 # check explicit limit -ENTRIES=$(varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{"limit": 3}' | wc -l) +ENTRIES=$(varlinkctl_get_entries '{"limit": 3}' | wc -l) test "$ENTRIES" -le 3 # check unit filter: use transient units to get deterministic results @@ -38,16 +58,16 @@ systemd-run --unit="$UNIT_NAME_2" --wait bash -c 'echo hello-from-varlink-test-2 journalctl --sync # single unit filter -SINGLE_OUTPUT="$(varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\"]}")" +SINGLE_OUTPUT="$(varlinkctl_get_entries "{\"units\": [\"$UNIT_NAME_1\"]}")" grep "hello-from-varlink-test-1" >/dev/null <<<"$SINGLE_OUTPUT" (! grep "hello-from-varlink-test-2" >/dev/null <<<"$SINGLE_OUTPUT") # multi unit filter -MULTI_OUTPUT="$(varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}")" +MULTI_OUTPUT="$(varlinkctl_get_entries "{\"units\": [\"$UNIT_NAME_1\", \"$UNIT_NAME_2\"]}")" grep "hello-from-varlink-test-1" >/dev/null <<<"$MULTI_OUTPUT" grep "hello-from-varlink-test-2" >/dev/null <<<"$MULTI_OUTPUT" # check priority filter: priority 4 (warning) should include our warning message -varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{"priority": 4, "limit": 1000}' | grep "varlink-test-warning" >/dev/null +varlinkctl_get_entries '{"priority": 4, "limit": 1000}' | grep "varlink-test-warning" >/dev/null # check priority filter: priority 3 (error) should NOT include our warning (priority 4) (! varlinkctl call --more "$VARLINK_SOCKET" io.systemd.JournalAccess.GetEntries '{"priority": 3, "limit": 1000}' | grep "varlink-test-warning") From cb8359877ecb4b361a4ff74ac839a4a55986560c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 8 May 2026 08:25:10 +0200 Subject: [PATCH 190/242] userdbctl: convert to OPTION and VERB macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The situation with --chain is complicated. The old code tried to use "+…" in getopt_long() to stop option parsing. But it didn't actually work. This logic was originally added in 8072a7e6a9eaf2de120797dd16c5e0baea606219. ef9c12b157a50d63e8a8eb710c013d16c2cea319 added an comment about 'optind=0' which explains why the code doesn't work, but the code wasn't changed. To wit: $ userdbctl.old --no-pager --chain ssh-authorized-keys zbyszek -- /bin/echo --asdf --asdf $ userdbctl.old --no-pager --chain ssh-authorized-keys zbyszek /bin/echo -- --asdf --asdf $ userdbctl.old --no-pager --chain ssh-authorized-keys zbyszek /bin/echo --asdf userdbctl.old: unrecognized option '--asdf' (Basically, if "--" is used, it can be anywhere, since getopt_long() doesn't do anything special after --chain and looks for the next option. There were some tests of --chain, but they all used the username as the positional argument, so it wasn't misinterpreted as an option.) This behaviour is preserved in the conversion. --help is generally the same except for expected formatting changes. --json= is moved above between --output= and -j. For some reason it was further down. Co-developed-by: Claude Opus 4.7 --- src/userdb/userdbctl.c | 328 ++++++++++++++++------------------------- 1 file changed, 128 insertions(+), 200 deletions(-) diff --git a/src/userdb/userdbctl.c b/src/userdb/userdbctl.c index 75b30215b1da3..60a6ff051a3b8 100644 --- a/src/userdb/userdbctl.c +++ b/src/userdb/userdbctl.c @@ -1,10 +1,10 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include #include #include "alloc-util.h" +#include "ansi-color.h" #include "bitfield.h" #include "build.h" #include "copy.h" @@ -18,14 +18,16 @@ #include "format-table.h" #include "format-util.h" #include "fs-util.h" +#include "glyph-util.h" +#include "help-util.h" #include "io-util.h" #include "json-util.h" #include "log.h" #include "main-func.h" #include "mkdir.h" +#include "options.h" #include "pager.h" #include "parse-argument.h" -#include "pretty-print.h" #include "recurse-dir.h" #include "socket-util.h" #include "string-table.h" @@ -408,6 +410,8 @@ static int table_add_uid_map( return n_added; } +VERB(verb_display_user, "user", "[USER…]", VERB_ANY, VERB_ANY, VERB_DEFAULT, + "Inspect user"); static int verb_display_user(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(table_unrefp) Table *table = NULL; bool draw_separator = false; @@ -751,6 +755,8 @@ static int add_unavailable_gid(Table *table, uid_t start, uid_t end) { return 2; } +VERB(verb_display_group, "group", "[GROUP…]", VERB_ANY, VERB_ANY, 0, + "Inspect group"); static int verb_display_group(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(table_unrefp) Table *table = NULL; bool draw_separator = false; @@ -952,6 +958,10 @@ static int show_membership(const char *user, const char *group, Table *table) { return 0; } +VERB(verb_display_memberships, "users-in-group", "[GROUP…]", VERB_ANY, VERB_ANY, 0, + "Show users that are members of specified groups"); +VERB(verb_display_memberships, "groups-of-user", "[USER…]", VERB_ANY, VERB_ANY, 0, + "Show groups the specified users are members of"); static int verb_display_memberships(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(table_unrefp) Table *table = NULL; int ret = 0, r; @@ -1048,6 +1058,8 @@ static int verb_display_memberships(int argc, char *argv[], uintptr_t _data, voi return ret; } +VERB(verb_display_services, "services", NULL, VERB_ANY, 1, 0, + "Show enabled database services"); static int verb_display_services(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(table_unrefp) Table *t = NULL; _cleanup_closedir_ DIR *d = NULL; @@ -1111,8 +1123,11 @@ static int verb_display_services(int argc, char *argv[], uintptr_t _data, void * return 0; } +VERB(verb_ssh_authorized_keys, "ssh-authorized-keys", "USER", 2, VERB_ANY, 0, + "Show SSH authorized keys for user"); static int verb_ssh_authorized_keys(int argc, char *argv[], uintptr_t _data, void *userdata) { _cleanup_(user_record_unrefp) UserRecord *ur = NULL; + const char *username; char **chain_invocation; int r; @@ -1121,6 +1136,8 @@ static int verb_ssh_authorized_keys(int argc, char *argv[], uintptr_t _data, voi if (arg_from_file) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--from-file= not supported when showing SSH authorized keys, refusing."); + username = argv[1]; + if (arg_chain) { /* If --chain is specified, the rest of the command line is the chain command */ @@ -1147,18 +1164,18 @@ static int verb_ssh_authorized_keys(int argc, char *argv[], uintptr_t _data, voi chain_invocation = NULL; } - r = userdb_by_name(argv[1], /* match= */ NULL, arg_userdb_flags, &ur); + r = userdb_by_name(username, /* match= */ NULL, arg_userdb_flags, &ur); if (r == -ESRCH) - log_error_errno(r, "User %s does not exist.", argv[1]); + log_error_errno(r, "User %s does not exist.", username); else if (r == -EHOSTDOWN) log_error_errno(r, "Selected user database service is not available for this request."); else if (r == -EINVAL) - log_error_errno(r, "Failed to find user %s: %m (Invalid user name?)", argv[1]); + log_error_errno(r, "Failed to find user %s: %m (Invalid user name?)", username); else if (r < 0) - log_error_errno(r, "Failed to find user %s: %m", argv[1]); + log_error_errno(r, "Failed to find user %s: %m", username); else { if (strv_isempty(ur->ssh_authorized_keys)) - log_debug("User record for %s has no public SSH keys.", argv[1]); + log_debug("User record for %s has no public SSH keys.", username); else STRV_FOREACH(i, ur->ssh_authorized_keys) printf("%s\n", *i); @@ -1494,6 +1511,8 @@ static int load_credential_one( return 0; } +VERB(verb_load_credentials, "load-credentials", NULL, VERB_ANY, 1, 0, + "Write static user/group records from credentials"); static int verb_load_credentials(int argc, char *argv[], uintptr_t _data, void *userdata) { int r; @@ -1530,66 +1549,39 @@ static int verb_load_credentials(int argc, char *argv[], uintptr_t _data, void * } static int help(void) { - _cleanup_free_ char *link = NULL; + _cleanup_(table_unrefp) Table *verbs = NULL, *options = NULL; int r; + r = verbs_get_help_table(&verbs); + if (r < 0) + return r; + + r = option_parser_get_help_table(&options); + if (r < 0) + return r; + + (void) table_sync_column_widths(0, verbs, options); + pager_open(arg_pager_flags); - r = terminal_urlify_man("userdbctl", "1", &link); + help_cmdline("[OPTIONS…] COMMAND …"); + help_abstract("Show user and group information."); + + help_section("Commands"); + r = table_print_or_warn(verbs); if (r < 0) - return log_oom(); + return r; - printf("%s [OPTIONS...] COMMAND ...\n\n" - "%sShow user and group information.%s\n" - "\nCommands:\n" - " user [USER…] Inspect user\n" - " group [GROUP…] Inspect group\n" - " users-in-group [GROUP…] Show users that are members of specified groups\n" - " groups-of-user [USER…] Show groups the specified users are members of\n" - " services Show enabled database services\n" - " ssh-authorized-keys USER Show SSH authorized keys for user\n" - " load-credentials Write static user/group records from credentials\n" - "\nOptions:\n" - " -h --help Show this help\n" - " --version Show package version\n" - " --no-pager Do not pipe output into a pager\n" - " --no-legend Do not show the headers and footers\n" - " --output=MODE Select output mode (classic, friendly, table, json)\n" - " -j Equivalent to --output=json\n" - " -s --service=SERVICE[:SERVICE…]\n" - " Query the specified service\n" - " --with-nss=BOOL Control whether to include glibc NSS data\n" - " -N Do not synthesize or include glibc NSS data\n" - " (Same as --synthesize=no --with-nss=no)\n" - " --synthesize=BOOL Synthesize root/nobody user\n" - " --with-dropin=BOOL Control whether to include drop-in records\n" - " --with-varlink=BOOL Control whether to talk to services at all\n" - " --multiplexer=BOOL Control whether to use the multiplexer\n" - " --json=pretty|short JSON output mode\n" - " --chain Chain another command\n" - " --uid-min=ID Filter by minimum UID/GID (default 0)\n" - " --uid-max=ID Filter by maximum UID/GID (default 4294967294)\n" - " --uuid=UUID Filter by UUID\n" - " -z --fuzzy Do a fuzzy name search\n" - " --disposition=VALUE Filter by disposition\n" - " -I Equivalent to --disposition=intrinsic\n" - " -S Equivalent to --disposition=system\n" - " -R Equivalent to --disposition=regular\n" - " --boundaries=BOOL Show/hide UID/GID range boundaries in output\n" - " -B Equivalent to --boundaries=no\n" - " -F --from-file=PATH Read JSON record from file\n" - "\nSee the %s for details.\n", - program_invocation_short_name, - ansi_highlight(), - ansi_normal(), - link); + help_section("Options"); + r = table_print_or_warn(options); + if (r < 0) + return r; + help_man_page_reference("userdbctl", "1"); return 0; } -static int verb_help(int argc, char *argv[], uintptr_t _data, void *userdata) { - return help(); -} +VERB_COMMON_HELP_HIDDEN(help); static int parse_from_file(const char *arg, sd_json_variant **ret) { sd_json_variant *v = NULL; @@ -1615,56 +1607,13 @@ static int parse_from_file(const char *arg, sd_json_variant **ret) { return 0; } -static int parse_argv(int argc, char *argv[]) { - - enum { - ARG_VERSION = 0x100, - ARG_NO_PAGER, - ARG_NO_LEGEND, - ARG_OUTPUT, - ARG_WITH_NSS, - ARG_WITH_DROPIN, - ARG_WITH_VARLINK, - ARG_SYNTHESIZE, - ARG_MULTIPLEXER, - ARG_JSON, - ARG_CHAIN, - ARG_UID_MIN, - ARG_UID_MAX, - ARG_UUID, - ARG_DISPOSITION, - ARG_BOUNDARIES, - }; - - static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "no-pager", no_argument, NULL, ARG_NO_PAGER }, - { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, - { "output", required_argument, NULL, ARG_OUTPUT }, - { "service", required_argument, NULL, 's' }, - { "with-nss", required_argument, NULL, ARG_WITH_NSS }, - { "with-dropin", required_argument, NULL, ARG_WITH_DROPIN }, - { "with-varlink", required_argument, NULL, ARG_WITH_VARLINK }, - { "synthesize", required_argument, NULL, ARG_SYNTHESIZE }, - { "multiplexer", required_argument, NULL, ARG_MULTIPLEXER }, - { "json", required_argument, NULL, ARG_JSON }, - { "chain", no_argument, NULL, ARG_CHAIN }, - { "uid-min", required_argument, NULL, ARG_UID_MIN }, - { "uid-max", required_argument, NULL, ARG_UID_MAX }, - { "uuid", required_argument, NULL, ARG_UUID }, - { "fuzzy", no_argument, NULL, 'z' }, - { "disposition", required_argument, NULL, ARG_DISPOSITION }, - { "boundaries", required_argument, NULL, ARG_BOUNDARIES }, - { "from-file", required_argument, NULL, 'F' }, - {} - }; - +static int parse_argv(int argc, char *argv[], char ***remaining_args) { const char *e; int r; assert(argc >= 0); assert(argv); + assert(remaining_args); /* We are going to update this environment variable with our own, hence let's first read what is already set */ e = getenv("SYSTEMD_ONLY_USERDB"); @@ -1679,122 +1628,137 @@ static int parse_argv(int argc, char *argv[]) { arg_services = l; } - /* Resetting to 0 forces the invocation of an internal initialization routine of getopt_long() - * that checks for GNU extensions in optstring ('-' or '+' at the beginning). */ - optind = 0; - - for (;;) { - int c; - - c = getopt_long(argc, argv, - arg_chain ? "+hjs:NISRzBF:" : "hjs:NISRzBF:", /* When --chain was used disable parsing of further switches */ - options, NULL); - if (c < 0) - break; + OptionParser opts = { argc, argv }; + FOREACH_OPTION_OR_RETURN(c, &opts) switch (c) { - case 'h': + OPTION_COMMON_HELP: return help(); - case ARG_VERSION: + OPTION_COMMON_VERSION: return version(); - case ARG_NO_PAGER: + OPTION_COMMON_NO_PAGER: arg_pager_flags |= PAGER_DISABLE; break; - case ARG_NO_LEGEND: + OPTION_COMMON_NO_LEGEND: arg_legend = false; break; - case ARG_OUTPUT: - if (streq(optarg, "help")) + OPTION_LONG("output", "MODE", + "Select output mode (classic, friendly, table, json)"): + if (streq(opts.arg, "help")) return DUMP_STRING_TABLE(output, Output, _OUTPUT_MAX); - arg_output = output_from_string(optarg); + arg_output = output_from_string(opts.arg); if (arg_output < 0) - return log_error_errno(arg_output, "Invalid --output= mode: %s", optarg); + return log_error_errno(arg_output, "Invalid --output= mode: %s", opts.arg); arg_json_format_flags = arg_output == OUTPUT_JSON ? SD_JSON_FORMAT_PRETTY|SD_JSON_FORMAT_COLOR_AUTO : SD_JSON_FORMAT_OFF; break; - case ARG_JSON: - r = parse_json_argument(optarg, &arg_json_format_flags); + OPTION_COMMON_JSON: + r = parse_json_argument(opts.arg, &arg_json_format_flags); if (r <= 0) return r; arg_output = sd_json_format_enabled(arg_json_format_flags) ? OUTPUT_JSON : _OUTPUT_INVALID; break; - case 'j': + OPTION_SHORT('j', NULL, "Equivalent to --output=json"): arg_json_format_flags = SD_JSON_FORMAT_PRETTY|SD_JSON_FORMAT_COLOR_AUTO; arg_output = OUTPUT_JSON; break; - case 's': - if (isempty(optarg)) + OPTION('s', "service", "SERVICE[:SERVICE…]", "Query the specified service"): + if (isempty(opts.arg)) arg_services = strv_free(arg_services); else { - r = strv_split_and_extend(&arg_services, optarg, ":", /* filter_duplicates= */ true); + r = strv_split_and_extend(&arg_services, opts.arg, ":", /* filter_duplicates= */ true); if (r < 0) return log_error_errno(r, "Failed to parse -s/--service= argument: %m"); } break; - case 'N': + OPTION_LONG("with-nss", "BOOL", "Control whether to include glibc NSS data"): + r = parse_boolean_argument("--with-nss=", opts.arg, NULL); + if (r < 0) + return r; + + SET_FLAG(arg_userdb_flags, USERDB_EXCLUDE_NSS, !r); + break; + + OPTION_SHORT('N', NULL, + "Do not synthesize or include glibc NSS data " + "(Same as --synthesize=no --with-nss=no)"): arg_userdb_flags |= USERDB_EXCLUDE_NSS|USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN; break; - case ARG_WITH_NSS: - r = parse_boolean_argument("--with-nss=", optarg, NULL); + OPTION_LONG("synthesize", "BOOL", "Synthesize root/nobody user"): + r = parse_boolean_argument("--synthesize=", opts.arg, NULL); if (r < 0) return r; - SET_FLAG(arg_userdb_flags, USERDB_EXCLUDE_NSS, !r); + SET_FLAG(arg_userdb_flags, USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN, !r); break; - case ARG_WITH_DROPIN: - r = parse_boolean_argument("--with-dropin=", optarg, NULL); + OPTION_LONG("with-dropin", "BOOL", "Control whether to include drop-in records"): + r = parse_boolean_argument("--with-dropin=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_userdb_flags, USERDB_EXCLUDE_DROPIN, !r); break; - case ARG_WITH_VARLINK: - r = parse_boolean_argument("--with-varlink=", optarg, NULL); + OPTION_LONG("with-varlink", "BOOL", "Control whether to talk to services at all"): + r = parse_boolean_argument("--with-varlink=", opts.arg, NULL); if (r < 0) return r; SET_FLAG(arg_userdb_flags, USERDB_EXCLUDE_VARLINK, !r); break; - case ARG_SYNTHESIZE: - r = parse_boolean_argument("--synthesize=", optarg, NULL); + OPTION_LONG("multiplexer", "BOOL", "Control whether to use the multiplexer"): + r = parse_boolean_argument("--multiplexer=", opts.arg, NULL); if (r < 0) return r; - SET_FLAG(arg_userdb_flags, USERDB_DONT_SYNTHESIZE_INTRINSIC|USERDB_DONT_SYNTHESIZE_FOREIGN, !r); + SET_FLAG(arg_userdb_flags, USERDB_AVOID_MULTIPLEXER, !r); break; - case ARG_MULTIPLEXER: - r = parse_boolean_argument("--multiplexer=", optarg, NULL); + OPTION_LONG("chain", NULL, "Chain another command"): + arg_chain = true; + break; + + OPTION_LONG("uid-min", "ID", "Filter by minimum UID/GID (default 0)"): + r = parse_uid(opts.arg, &arg_uid_min); if (r < 0) - return r; + return log_error_errno(r, "Failed to parse --uid-min= value: %s", opts.arg); + break; - SET_FLAG(arg_userdb_flags, USERDB_AVOID_MULTIPLEXER, !r); + OPTION_LONG("uid-max", "ID", "Filter by maximum UID/GID (default 4294967294)"): + r = parse_uid(opts.arg, &arg_uid_max); + if (r < 0) + return log_error_errno(r, "Failed to parse --uid-max= value: %s", opts.arg); break; - case ARG_CHAIN: - arg_chain = true; + OPTION_LONG("uuid", "UUID", "Filter by UUID"): + r = sd_id128_from_string(opts.arg, &arg_uuid); + if (r < 0) + return log_error_errno(r, "Failed to parse --uuid= value: %s", opts.arg); + break; + + OPTION('z', "fuzzy", NULL, "Do a fuzzy name search"): + arg_fuzzy = true; break; - case ARG_DISPOSITION: { - UserDisposition d = user_disposition_from_string(optarg); + OPTION_LONG("disposition", "VALUE", "Filter by disposition"): { + UserDisposition d = user_disposition_from_string(opts.arg); if (d < 0) - return log_error_errno(d, "Unknown user disposition: %s", optarg); + return log_error_errno(d, "Unknown user disposition: %s", opts.arg); if (arg_disposition_mask == UINT64_MAX) arg_disposition_mask = 0; @@ -1803,80 +1767,54 @@ static int parse_argv(int argc, char *argv[]) { break; } - case 'I': + OPTION_SHORT('I', NULL, "Equivalent to --disposition=intrinsic"): if (arg_disposition_mask == UINT64_MAX) arg_disposition_mask = 0; arg_disposition_mask |= UINT64_C(1) << USER_INTRINSIC; break; - case 'S': + OPTION_SHORT('S', NULL, "Equivalent to --disposition=system"): if (arg_disposition_mask == UINT64_MAX) arg_disposition_mask = 0; arg_disposition_mask |= UINT64_C(1) << USER_SYSTEM; break; - case 'R': + OPTION_SHORT('R', NULL, "Equivalent to --disposition=regular"): if (arg_disposition_mask == UINT64_MAX) arg_disposition_mask = 0; arg_disposition_mask |= UINT64_C(1) << USER_REGULAR; break; - case ARG_UID_MIN: - r = parse_uid(optarg, &arg_uid_min); - if (r < 0) - return log_error_errno(r, "Failed to parse --uid-min= value: %s", optarg); - break; - - case ARG_UID_MAX: - r = parse_uid(optarg, &arg_uid_max); - if (r < 0) - return log_error_errno(r, "Failed to parse --uid-max= value: %s", optarg); - break; - - case ARG_UUID: - r = sd_id128_from_string(optarg, &arg_uuid); - if (r < 0) - return log_error_errno(r, "Failed to parse --uuid= value: %s", optarg); - break; - - case 'z': - arg_fuzzy = true; - break; - - case ARG_BOUNDARIES: - r = parse_boolean_argument("boundaries", optarg, &arg_boundaries); + OPTION_LONG("boundaries", "BOOL", + "Show/hide UID/GID range boundaries in output"): + r = parse_boolean_argument("boundaries", opts.arg, &arg_boundaries); if (r < 0) return r; break; - case 'B': + OPTION_SHORT('B', NULL, "Equivalent to --boundaries=no"): arg_boundaries = false; break; - case 'F': { + OPTION('F', "from-file", "PATH", "Read JSON record from file"): { sd_json_variant *v = NULL; /* initialization to appease gcc-14 */ - r = parse_from_file(optarg, &v); + r = parse_from_file(opts.arg, &v); if (r < 0) return r; json_variant_unref_and_replace(arg_from_file, v); break; } - - case '?': - return -EINVAL; - - default: - assert_not_reached(); } - } if (arg_uid_min > arg_uid_max) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Minimum UID/GID " UID_FMT " is above maximum UID/GID " UID_FMT ", refusing.", arg_uid_min, arg_uid_max); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Minimum UID/GID " UID_FMT " is above maximum UID/GID " UID_FMT ", refusing.", + arg_uid_min, arg_uid_max); /* If not mask was specified, use the all bits on mask */ if (arg_disposition_mask == UINT64_MAX) @@ -1885,27 +1823,17 @@ static int parse_argv(int argc, char *argv[]) { if (arg_from_file) arg_boundaries = false; + *remaining_args = option_parser_get_args(&opts); return 1; } static int run(int argc, char *argv[]) { - static const Verb verbs[] = { - { "help", VERB_ANY, VERB_ANY, 0, verb_help }, - { "user", VERB_ANY, VERB_ANY, VERB_DEFAULT, verb_display_user }, - { "group", VERB_ANY, VERB_ANY, 0, verb_display_group }, - { "users-in-group", VERB_ANY, VERB_ANY, 0, verb_display_memberships }, - { "groups-of-user", VERB_ANY, VERB_ANY, 0, verb_display_memberships }, - { "services", VERB_ANY, 1, 0, verb_display_services }, - { "ssh-authorized-keys", 2, VERB_ANY, 0, verb_ssh_authorized_keys }, - { "load-credentials", VERB_ANY, 1, 0, verb_load_credentials }, - {} - }; - + char **args = NULL; int r; log_setup(); - r = parse_argv(argc, argv); + r = parse_argv(argc, argv, &args); if (r <= 0) return r; @@ -1923,7 +1851,7 @@ static int run(int argc, char *argv[]) { } else assert_se(unsetenv("SYSTEMD_ONLY_USERDB") == 0); - return dispatch_verb(argc, argv, verbs, NULL); + return dispatch_verb_with_args(args, NULL); } DEFINE_MAIN_FUNCTION(run); From 0a5e09404d56081049266124f586c9a47889c063 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Thu, 30 Apr 2026 21:18:19 +0200 Subject: [PATCH 191/242] curl-util: Make some curl symbols private --- src/shared/curl-util.c | 18 ++++++++++-------- src/shared/curl-util.h | 9 --------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/shared/curl-util.c b/src/shared/curl-util.c index 9254b83dd74fb..0a6bdeefe5589 100644 --- a/src/shared/curl-util.c +++ b/src/shared/curl-util.c @@ -29,17 +29,19 @@ DLSYM_PROTOTYPE(curl_easy_strerror) = NULL; DLSYM_PROTOTYPE(curl_easy_header) = NULL; #endif DLSYM_PROTOTYPE(curl_getdate) = NULL; -DLSYM_PROTOTYPE(curl_multi_add_handle) = NULL; -DLSYM_PROTOTYPE(curl_multi_assign) = NULL; -DLSYM_PROTOTYPE(curl_multi_cleanup) = NULL; -DLSYM_PROTOTYPE(curl_multi_info_read) = NULL; -DLSYM_PROTOTYPE(curl_multi_init) = NULL; -DLSYM_PROTOTYPE(curl_multi_remove_handle) = NULL; -DLSYM_PROTOTYPE(curl_multi_setopt) = NULL; -DLSYM_PROTOTYPE(curl_multi_socket_action) = NULL; +static DLSYM_PROTOTYPE(curl_multi_add_handle) = NULL; +static DLSYM_PROTOTYPE(curl_multi_assign) = NULL; +static DLSYM_PROTOTYPE(curl_multi_cleanup) = NULL; +static DLSYM_PROTOTYPE(curl_multi_info_read) = NULL; +static DLSYM_PROTOTYPE(curl_multi_init) = NULL; +static DLSYM_PROTOTYPE(curl_multi_remove_handle) = NULL; +static DLSYM_PROTOTYPE(curl_multi_setopt) = NULL; +static DLSYM_PROTOTYPE(curl_multi_socket_action) = NULL; DLSYM_PROTOTYPE(curl_slist_append) = NULL; DLSYM_PROTOTYPE(curl_slist_free_all) = NULL; +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL_RENAME(CURLM*, sym_curl_multi_cleanup, curl_multi_cleanupp, NULL); + static void curl_glue_check_finished(CurlGlue *g) { int r; diff --git a/src/shared/curl-util.h b/src/shared/curl-util.h index 112649f371ba7..33ab0a5fb204b 100644 --- a/src/shared/curl-util.h +++ b/src/shared/curl-util.h @@ -18,14 +18,6 @@ extern DLSYM_PROTOTYPE(curl_easy_strerror); extern DLSYM_PROTOTYPE(curl_easy_header); #endif extern DLSYM_PROTOTYPE(curl_getdate); -extern DLSYM_PROTOTYPE(curl_multi_add_handle); -extern DLSYM_PROTOTYPE(curl_multi_assign); -extern DLSYM_PROTOTYPE(curl_multi_cleanup); -extern DLSYM_PROTOTYPE(curl_multi_info_read); -extern DLSYM_PROTOTYPE(curl_multi_init); -extern DLSYM_PROTOTYPE(curl_multi_remove_handle); -extern DLSYM_PROTOTYPE(curl_multi_setopt); -extern DLSYM_PROTOTYPE(curl_multi_socket_action); extern DLSYM_PROTOTYPE(curl_slist_append); extern DLSYM_PROTOTYPE(curl_slist_free_all); @@ -66,7 +58,6 @@ int curl_parse_http_time(const char *t, usec_t *ret); int curl_append_to_header(struct curl_slist **list, char **headers); DEFINE_TRIVIAL_CLEANUP_FUNC_FULL_RENAME(CURL*, sym_curl_easy_cleanup, curl_easy_cleanupp, NULL); -DEFINE_TRIVIAL_CLEANUP_FUNC_FULL_RENAME(CURLM*, sym_curl_multi_cleanup, curl_multi_cleanupp, NULL); DEFINE_TRIVIAL_CLEANUP_FUNC_FULL_RENAME(struct curl_slist*, sym_curl_slist_free_all, curl_slist_free_allp, NULL); #endif From 87cec65cae656f6ac2e702bd60dad6dd4fdae636 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 1 May 2026 09:08:35 +0000 Subject: [PATCH 192/242] curl-util: bring CurlGlue/CurlSlot in line with sd-bus and qmp-client Refactor curl-util to use the same per-request, refcounted, cancellable slot model as sd-bus, sd-varlink and qmp-client. CurlGlue becomes opaque and refcounted, and dispatches per-slot completion callbacks through CURLOPT_PRIVATE instead of a single g->on_finished demux that every caller had to switch on. The new curl_glue_perform_async(g, easy, cb, userdata, &slot) replaces curl_glue_add + the on_finished/userdata wiring. CurlSlot is the per-request handle: it owns the easy handle, curl_slot_unref does curl_multi_remove_handle + curl_easy_cleanup (which doubles as cancel since remove aborts in-flight transfers without queuing CURLMSG_DONE), and floating slots (ret_slot=NULL) are kept alive in the glue's slot set until the callback fires. Drop the userdata parameter from curl_glue_make: CURLOPT_PRIVATE is now used internally to route completions to the slot. Migrate pull-job and the pull-{oci,raw,tar} drivers, and imdsd, to the new shape. PullJob.curl becomes PullJob.slot; pull_job_curl_on_finished becomes a per-slot callback. imdsd routes its token-vs-data branch off slot identity rather than easy-handle pointer comparison. Both daemons drop the global on_finished/userdata wiring on the glue. pull_job_finish and context_fail{,_full} now return int (always 0) so the callbacks stay in the `return finish(...);` style. Add test-curl-util covering glue lifecycle, easy-handle defaults, floating and non-floating perform paths, cancel-via-slot-unref (verified by a sentinel request that drives the loop to completion), and three concurrent requests on a single glue. Tests fetch local files via file:// URLs so no network is needed; libcurl availability is probed once via dlopen_curl in intro(). --- src/imds/imdsd.c | 101 ++++++------- src/import/pull-job.c | 162 +++++++++------------ src/import/pull-job.h | 5 +- src/import/pull-oci.c | 3 - src/import/pull-raw.c | 3 - src/import/pull-tar.c | 3 - src/shared/curl-util.c | 176 ++++++++++++++++++++--- src/shared/curl-util.h | 45 ++++-- src/shared/shared-forward.h | 2 + src/test/meson.build | 4 + src/test/test-curl-util.c | 280 ++++++++++++++++++++++++++++++++++++ 11 files changed, 585 insertions(+), 199 deletions(-) create mode 100644 src/test/test-curl-util.c diff --git a/src/imds/imdsd.c b/src/imds/imdsd.c index a0c54ad84d7af..9c194c09005a0 100644 --- a/src/imds/imdsd.c +++ b/src/imds/imdsd.c @@ -182,8 +182,8 @@ struct Context { /* Mode 1 "direct": we go directly to the network (this is done if we know the interface index to * use) */ - CURL *curl_token; - CURL *curl_data; + CurlSlot *slot_token; + CurlSlot *slot_data; struct curl_slist *request_header_token, *request_header_data; sd_event_source *retry_source; unsigned n_retry; @@ -247,15 +247,8 @@ static void context_reset_for_refresh(Context *c) { /* Flush out all fields, up to the point we can restart the current request */ - if (c->curl_token) { - curl_glue_remove_and_free(c->glue, c->curl_token); - c->curl_token = NULL; - } - - if (c->curl_data) { - curl_glue_remove_and_free(c->glue, c->curl_data); - c->curl_data = NULL; - } + c->slot_token = curl_slot_unref(c->slot_token); + c->slot_data = curl_slot_unref(c->slot_data); sym_curl_slist_free_all(c->request_header_token); c->request_header_token = NULL; @@ -325,11 +318,12 @@ static void context_done(Context *c) { c->system_bus = sd_bus_flush_close_unref(c->system_bus); } -static void context_fail_full(Context *c, int r, const char *varlink_error) { +static int context_fail_full(Context *c, int r, const char *varlink_error) { assert(c); assert(r != 0); - /* Called whenever the current retrieval fails asynchronously */ + /* Called whenever the current retrieval fails asynchronously. Returns 0 so callers in + * int-returning paths can `return context_fail_full(...)` directly. */ r = -abs(r); @@ -349,10 +343,11 @@ static void context_fail_full(Context *c, int r, const char *varlink_error) { sd_event_exit(c->event, r); context_reset_full(c); + return 0; } -static void context_fail(Context *c, int r) { - context_fail_full(c, r, /* varlink_error= */ NULL); +static int context_fail(Context *c, int r) { + return context_fail_full(c, r, /* varlink_error= */ NULL); } static void context_success(Context *c) { @@ -898,17 +893,12 @@ static int context_save_data(Context *c) { return 0; } -static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { +static int curl_on_finished(CurlSlot *slot, CURL *curl, CURLcode result, void *userdata) { + Context *c = ASSERT_PTR(userdata); int r; - assert(g); - /* Called whenever libcurl did its thing and reports a download being complete or having failed */ - Context *c = NULL; - if (sym_curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char**) &c) != CURLE_OK) - return; - switch (result) { case CURLE_OK: /* yay! */ @@ -934,7 +924,7 @@ static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { if (r < 0) return context_fail(c, r); - return; + return 0; default: return context_fail_full( @@ -951,12 +941,12 @@ static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { return context_fail(c, r); if (r == 0) { /* We shall retry */ (void) context_schedule_retry(c); - return; + return 0; } if (result != CURLE_OK) /* if getting the HTTP status didn't work, propagate a generic error */ return context_fail(c, SYNTHETIC_ERRNO(ENOTRECOVERABLE)); - if (curl == c->curl_token) { + if (slot == c->slot_token) { r = context_validate_token_http_status(c, status); if (r < 0) return context_fail(c, r); @@ -975,7 +965,7 @@ static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { if (r < 0) return context_fail(c, r); - } else if (curl == c->curl_data) { + } else if (slot == c->slot_data) { r = context_validate_data_http_status(c, status); if (r == -ENOENT) @@ -983,7 +973,7 @@ static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { if (r < 0) return context_fail(c, r); if (r == 0) /* Immediately restarted */ - return; + return 0; context_log(c, LOG_DEBUG, "Data download successful."); @@ -994,6 +984,8 @@ static void curl_glue_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { context_success(c); } else assert_not_reached(); + + return 0; } static int context_acquire_glue(Context *c) { @@ -1010,9 +1002,6 @@ static int context_acquire_glue(Context *c) { if (r < 0) return context_log_errno(c, LOG_ERR, r, "Failed to allocate curl glue: %m"); - c->glue->on_finished = curl_glue_on_finished; - c->glue->userdata = c; - return 0; } @@ -1028,13 +1017,13 @@ static size_t data_write_callback(void *contents, size_t size, size_t nmemb, voi (void) context_save_ifname(c); /* Before we use the acquired data, let's verify the HTTP status, if there's a failure or we need to - * restart, abort the write here. Note that the curl_glue_on_finished() call will then check the HTTP + * restart, abort the write here. Note that the curl_on_finished() call will then check the HTTP * status again and act on it. */ long status; - r = context_acquire_http_status(c, c->curl_data, &status); + r = context_acquire_http_status(c, curl_slot_get_easy(c->slot_data), &status); if (r <= 0) - return 0; /* fail the thing, so that curl_glue_on_finished() can handle this failure or retry request */ - if (status >= 300) /* any status equal or above 300 needs to be handled by curl_glue_on_finished() too */ + return 0; /* fail the thing, so that curl_on_finished() can handle this failure or retry request */ + if (status >= 300) /* any status equal or above 300 needs to be handled by curl_on_finished() too */ return 0; if (sz > UINT64_MAX - c->data_size || @@ -1103,7 +1092,8 @@ static int context_acquire_data(Context *c) { if (!url) return context_log_oom(c); - r = curl_glue_make(&c->curl_data, url, c); + _cleanup_(curl_easy_cleanupp) CURL *easy = NULL; + r = curl_glue_make(&easy, url); if (r < 0) return context_log_errno(c, LOG_ERR, r, "Failed to create CURL request for data: %m"); @@ -1122,30 +1112,31 @@ static int context_acquire_data(Context *c) { return context_log_errno(c, LOG_ERR, r, "Failed to create curl header: %m"); if (c->request_header_data) - if (sym_curl_easy_setopt(c->curl_data, CURLOPT_HTTPHEADER, c->request_header_data) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_HTTPHEADER, c->request_header_data) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request header."); - if (sym_curl_easy_setopt(c->curl_data, CURLOPT_WRITEFUNCTION, data_write_callback) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, data_write_callback) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function."); - if (sym_curl_easy_setopt(c->curl_data, CURLOPT_WRITEDATA, c) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_WRITEDATA, c) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function userdata."); - if (sym_curl_easy_setopt(c->curl_data, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt function."); - if (sym_curl_easy_setopt(c->curl_data, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt function userdata."); - if (sym_curl_easy_setopt(c->curl_data, CURLOPT_LOCALPORT, 1L) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_LOCALPORT, 1L) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt local port"); - if (sym_curl_easy_setopt(c->curl_data, CURLOPT_LOCALPORTRANGE, 1023L) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_LOCALPORTRANGE, 1023L) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt local port range"); - r = curl_glue_add(c->glue, c->curl_data); + r = curl_glue_perform_async(c->glue, easy, curl_on_finished, c, &c->slot_data); if (r < 0) return context_log_errno(c, LOG_ERR, r, "Failed to add CURL request to glue: %m"); + TAKE_PTR(easy); return 0; } @@ -1163,10 +1154,10 @@ static size_t token_write_callback(void *contents, size_t size, size_t nmemb, vo /* Before we use acquired data, let's verify the HTTP status */ long status; - r = context_acquire_http_status(c, c->curl_token, &status); + r = context_acquire_http_status(c, curl_slot_get_easy(c->slot_token), &status); if (r <= 0) - return 0; /* fail the thing, so that curl_glue_on_finished() can handle this failure or retry request */ - if (status >= 300) /* any status equal or above 300 needs to be handled by curl_glue_on_finished() */ + return 0; /* fail the thing, so that curl_on_finished() can handle this failure or retry request */ + if (status >= 300) /* any status equal or above 300 needs to be handled by curl_on_finished() */ return 0; if (sz > SIZE_MAX - c->token.iov_len || @@ -1199,7 +1190,8 @@ static int context_acquire_token(Context *c) { if (r < 0) return r; - r = curl_glue_make(&c->curl_token, arg_token_url, c); + _cleanup_(curl_easy_cleanupp) CURL *easy = NULL; + r = curl_glue_make(&easy, arg_token_url); if (r < 0) return context_log_errno(c, LOG_ERR, r, "Failed to create CURL request for API token: %m"); @@ -1216,27 +1208,28 @@ static int context_acquire_token(Context *c) { return context_log_oom(c); } - if (sym_curl_easy_setopt(c->curl_token, CURLOPT_HTTPHEADER, c->request_header_token) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_HTTPHEADER, c->request_header_token) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request header."); - if (sym_curl_easy_setopt(c->curl_token, CURLOPT_CUSTOMREQUEST, "PUT") != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_CUSTOMREQUEST, "PUT") != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set HTTP request method."); - if (sym_curl_easy_setopt(c->curl_token, CURLOPT_WRITEFUNCTION, token_write_callback) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, token_write_callback) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function."); - if (sym_curl_easy_setopt(c->curl_token, CURLOPT_WRITEDATA, c) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_WRITEDATA, c) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL write function userdata."); - if (sym_curl_easy_setopt(c->curl_token, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_SOCKOPTFUNCTION, setsockopt_callback) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt function."); - if (sym_curl_easy_setopt(c->curl_token, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_SOCKOPTDATA, c) != CURLE_OK) return context_log_errno(c, LOG_ERR, SYNTHETIC_ERRNO(EIO), "Failed to set CURL setsockopt function userdata."); - r = curl_glue_add(c->glue, c->curl_token); + r = curl_glue_perform_async(c->glue, easy, curl_on_finished, c, &c->slot_token); if (r < 0) return context_log_errno(c, LOG_ERR, r, "Failed to add CURL request to glue: %m"); + TAKE_PTR(easy); return 0; } diff --git a/src/import/pull-job.c b/src/import/pull-job.c index 4c3fb05dd3533..5b8aa6da26942 100644 --- a/src/import/pull-job.c +++ b/src/import/pull-job.c @@ -53,7 +53,7 @@ PullJob* pull_job_unref(PullJob *j) { pull_job_close_disk_fd(j); - curl_glue_remove_and_free(j->glue, j->curl); + curl_slot_unref(j->slot); sym_curl_slist_free_all(j->request_header); j->compress = compressor_free(j->compress); @@ -83,11 +83,13 @@ static const char* pull_job_description(PullJob *j) { return j->description ?: j->url; } -static void pull_job_finish(PullJob *j, int ret) { +static int pull_job_finish(PullJob *j, int ret) { assert(j); + /* Returns 0 so callers in int-returning paths can `return pull_job_finish(...)` directly. */ + if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED)) - return; + return 0; if (ret == 0) { j->state = PULL_JOB_DONE; @@ -100,6 +102,8 @@ static void pull_job_finish(PullJob *j, int ret) { if (j->on_finished) j->on_finished(j); + + return 0; } int pull_job_restart(PullJob *j, const char *new_url) { @@ -134,8 +138,7 @@ int pull_job_restart(PullJob *j, const char *new_url) { j->expected_content_length = UINT64_MAX; } - curl_glue_remove_and_free(j->glue, j->curl); - j->curl = NULL; + j->slot = curl_slot_unref(j->slot); j->compress = compressor_free(j->compress); @@ -160,23 +163,18 @@ static uint64_t pull_job_content_length_effective(PullJob *j) { return j->content_length; } -void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { - PullJob *j = NULL; +static int pull_job_curl_on_finished(CurlSlot *slot, CURL *curl, CURLcode result, void *userdata) { + PullJob *j = ASSERT_PTR(userdata); char *scheme = NULL; CURLcode code; int r; - if (sym_curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&j) != CURLE_OK) - return; - - if (!j || IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED)) - return; + if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED)) + return 0; code = sym_curl_easy_getinfo(curl, CURLINFO_SCHEME, &scheme); - if (code != CURLE_OK || !scheme) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve URL scheme."); - goto finish; - } + if (code != CURLE_OK || !scheme) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve URL scheme.")); if (strcaseeq(scheme, "FILE") && result == CURLE_FILE_COULDNT_READ_FILE && j->on_not_found) { _cleanup_free_ char *new_url = NULL; @@ -184,43 +182,37 @@ void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { /* This resource wasn't found, but the implementer wants to maybe let us know a new URL, query for it. */ r = j->on_not_found(j, &new_url); if (r < 0) - goto finish; + return pull_job_finish(j, r); if (r > 0) { /* A new url to use */ assert(new_url); r = pull_job_restart(j, new_url); if (r < 0) - goto finish; + return pull_job_finish(j, r); - return; + return 0; } /* if this didn't work, handle like any other error below */ } - if (result != CURLE_OK) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Transfer failed: %s", sym_curl_easy_strerror(result)); - goto finish; - } + if (result != CURLE_OK) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "Transfer failed: %s", sym_curl_easy_strerror(result))); if (STRCASE_IN_SET(scheme, "HTTP", "HTTPS")) { long status; code = sym_curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status); - if (code != CURLE_OK) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve response code: %s", sym_curl_easy_strerror(code)); - goto finish; - } + if (code != CURLE_OK) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve response code: %s", sym_curl_easy_strerror(code))); if (http_status_etag_exists(status)) { log_info("Image already downloaded. Skipping download."); j->etag_exists = true; - r = 0; - goto finish; + return pull_job_finish(j, 0); } else if (http_status_need_authentication(status)) { log_info("Access to image requires authentication."); - r = -ENOKEY; - goto finish; + return pull_job_finish(j, -ENOKEY); } else if (status >= 300) { if (status == 404 && j->on_not_found) { @@ -229,81 +221,64 @@ void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { /* This resource wasn't found, but the implementer wants to maybe let us know a new URL, query for it. */ r = j->on_not_found(j, &new_url); if (r < 0) - goto finish; + return pull_job_finish(j, r); if (r > 0) { /* A new url to use */ assert(new_url); r = pull_job_restart(j, new_url); if (r < 0) - goto finish; + return pull_job_finish(j, r); - code = sym_curl_easy_getinfo(j->curl, CURLINFO_RESPONSE_CODE, &status); - if (code != CURLE_OK) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve response code: %s", sym_curl_easy_strerror(code)); - goto finish; - } + code = sym_curl_easy_getinfo(curl_slot_get_easy(j->slot), CURLINFO_RESPONSE_CODE, &status); + if (code != CURLE_OK) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve response code: %s", sym_curl_easy_strerror(code))); if (status == 0) - return; + return 0; } } - r = log_notice_errno( + return pull_job_finish(j, log_notice_errno( status == 404 ? SYNTHETIC_ERRNO(ENOMEDIUM) : SYNTHETIC_ERRNO(EIO), /* Make the most common error recognizable */ - "HTTP request to %s failed with code %li.", j->url, status); - goto finish; - } else if (status < 200) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "HTTP request to %s finished with unexpected code %li.", j->url, status); - goto finish; - } + "HTTP request to %s failed with code %li.", j->url, status)); + } else if (status < 200) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "HTTP request to %s finished with unexpected code %li.", j->url, status)); } - if (j->state != PULL_JOB_RUNNING) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Premature connection termination."); - goto finish; - } + if (j->state != PULL_JOB_RUNNING) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "Premature connection termination.")); uint64_t cl = pull_job_content_length_effective(j); if (cl != UINT64_MAX && - cl != j->written_compressed) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Download truncated."); - goto finish; - } + cl != j->written_compressed) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "Download truncated.")); if (j->checksum_ctx) { unsigned checksum_len; iovec_done(&j->checksum); j->checksum.iov_base = malloc(EVP_MAX_MD_SIZE); - if (!j->checksum.iov_base) { - r = log_oom(); - goto finish; - } + if (!j->checksum.iov_base) + return pull_job_finish(j, log_oom()); r = sym_EVP_DigestFinal_ex(j->checksum_ctx, j->checksum.iov_base, &checksum_len); - if (r == 0) { - r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to get checksum."); - goto finish; - } + if (r == 0) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to get checksum.")); assert(checksum_len <= EVP_MAX_MD_SIZE); j->checksum.iov_len = checksum_len; if (DEBUG_LOGGING) { _cleanup_free_ char *h = hexmem(j->checksum.iov_base, j->checksum.iov_len); - if (!h) { - r = log_oom(); - goto finish; - } + if (!h) + return pull_job_finish(j, log_oom()); log_debug("%s of %s is %s.", sym_EVP_MD_CTX_get0_name(j->checksum_ctx), pull_job_description(j), h); } if (iovec_is_set(&j->expected_checksum) && - !iovec_equal(&j->checksum, &j->expected_checksum)) { - r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Checksum of downloaded resource does not match expected checksum, yikes."); - goto finish; - } + !iovec_equal(&j->checksum, &j->expected_checksum)) + return pull_job_finish(j, log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Checksum of downloaded resource does not match expected checksum, yikes.")); } /* Do a couple of finishing disk operations, but only if we are the sole owner of the file (i.e. no @@ -318,10 +293,8 @@ void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { if (j->written_compressed > 0) { /* Make sure the file size is right, in case the file was sparse and * we just moved to the last part. */ - if (ftruncate(j->disk_fd, j->written_uncompressed) < 0) { - r = log_error_errno(errno, "Failed to truncate file: %m"); - goto finish; - } + if (ftruncate(j->disk_fd, j->written_uncompressed) < 0) + return pull_job_finish(j, log_error_errno(errno, "Failed to truncate file: %m")); } if (j->etag) @@ -345,27 +318,20 @@ void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) { if (j->sync) { r = fsync_full(j->disk_fd); - if (r < 0) { - log_error_errno(r, "Failed to synchronize file to disk: %m"); - goto finish; - } + if (r < 0) + return pull_job_finish(j, log_error_errno(r, "Failed to synchronize file to disk: %m")); } } else if (S_ISBLK(j->disk_stat.st_mode) && j->sync) { - if (fsync(j->disk_fd) < 0) { - r = log_error_errno(errno, "Failed to synchronize block device: %m"); - goto finish; - } + if (fsync(j->disk_fd) < 0) + return pull_job_finish(j, log_error_errno(errno, "Failed to synchronize block device: %m")); } } log_info("Acquired %s for %s.", FORMAT_BYTES(j->written_uncompressed), pull_job_description(j)); - r = 0; - -finish: - pull_job_finish(j, r); + return pull_job_finish(j, 0); } static int pull_job_write_uncompressed(const void *p, size_t sz, void *userdata) { @@ -595,7 +561,7 @@ static size_t pull_job_header_callback(void *contents, size_t size, size_t nmemb assert(j->state == PULL_JOB_ANALYZING); - code = sym_curl_easy_getinfo(j->curl, CURLINFO_RESPONSE_CODE, &status); + code = sym_curl_easy_getinfo(curl_slot_get_easy(j->slot), CURLINFO_RESPONSE_CODE, &status); if (code != CURLE_OK) { r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve response code: %s", sym_curl_easy_strerror(code)); goto fail; @@ -809,7 +775,8 @@ int pull_job_begin(PullJob *j) { if (j->state != PULL_JOB_INIT) return -EBUSY; - r = curl_glue_make(&j->curl, j->url, j); + _cleanup_(curl_easy_cleanupp) CURL *easy = NULL; + r = curl_glue_make(&easy, j->url); if (r < 0) return r; @@ -830,34 +797,35 @@ int pull_job_begin(PullJob *j) { } if (j->request_header) { - if (sym_curl_easy_setopt(j->curl, CURLOPT_HTTPHEADER, j->request_header) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_HTTPHEADER, j->request_header) != CURLE_OK) return -EIO; } - if (sym_curl_easy_setopt(j->curl, CURLOPT_WRITEFUNCTION, pull_job_write_callback) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, pull_job_write_callback) != CURLE_OK) return -EIO; - if (sym_curl_easy_setopt(j->curl, CURLOPT_WRITEDATA, j) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_WRITEDATA, j) != CURLE_OK) return -EIO; - if (sym_curl_easy_setopt(j->curl, CURLOPT_HEADERFUNCTION, pull_job_header_callback) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_HEADERFUNCTION, pull_job_header_callback) != CURLE_OK) return -EIO; - if (sym_curl_easy_setopt(j->curl, CURLOPT_HEADERDATA, j) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_HEADERDATA, j) != CURLE_OK) return -EIO; - if (sym_curl_easy_setopt(j->curl, CURLOPT_XFERINFOFUNCTION, pull_job_progress_callback) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_XFERINFOFUNCTION, pull_job_progress_callback) != CURLE_OK) return -EIO; - if (sym_curl_easy_setopt(j->curl, CURLOPT_XFERINFODATA, j) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_XFERINFODATA, j) != CURLE_OK) return -EIO; - if (sym_curl_easy_setopt(j->curl, CURLOPT_NOPROGRESS, 0L) != CURLE_OK) + if (sym_curl_easy_setopt(easy, CURLOPT_NOPROGRESS, 0L) != CURLE_OK) return -EIO; - r = curl_glue_add(j->glue, j->curl); + r = curl_glue_perform_async(j->glue, easy, pull_job_curl_on_finished, j, &j->slot); if (r < 0) return r; + TAKE_PTR(easy); j->state = PULL_JOB_ANALYZING; diff --git a/src/import/pull-job.h b/src/import/pull-job.h index 0b878292f096b..00d001680ff20 100644 --- a/src/import/pull-job.h +++ b/src/import/pull-job.h @@ -7,7 +7,6 @@ #include "shared-forward.h" -typedef struct CurlGlue CurlGlue; typedef struct PullJob PullJob; typedef void (*PullJobFinished)(PullJob *job); @@ -46,7 +45,7 @@ typedef struct PullJob { PullJobNotFound on_not_found; CurlGlue *glue; - CURL *curl; + CurlSlot *slot; struct curl_slist *request_header; char *etag; @@ -95,8 +94,6 @@ PullJob* pull_job_unref(PullJob *job); int pull_job_begin(PullJob *j); -void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result); - void pull_job_close_disk_fd(PullJob *j); int pull_job_add_request_header(PullJob *j, const char *hdr); diff --git a/src/import/pull-oci.c b/src/import/pull-oci.c index acea93b09de9a..c1c76fc898017 100644 --- a/src/import/pull-oci.c +++ b/src/import/pull-oci.c @@ -195,9 +195,6 @@ int oci_pull_new( .userns_fd = -EBADF, }; - i->glue->on_finished = pull_job_curl_on_finished; - i->glue->userdata = i; - *ret = TAKE_PTR(i); return 0; diff --git a/src/import/pull-raw.c b/src/import/pull-raw.c index 0ddde7c091962..c63a453177cff 100644 --- a/src/import/pull-raw.c +++ b/src/import/pull-raw.c @@ -149,9 +149,6 @@ int raw_pull_new( .offset = UINT64_MAX, }; - p->glue->on_finished = pull_job_curl_on_finished; - p->glue->userdata = p; - *ret = TAKE_PTR(p); return 0; diff --git a/src/import/pull-tar.c b/src/import/pull-tar.c index fe18636eb7d9d..453ad1187cf7b 100644 --- a/src/import/pull-tar.c +++ b/src/import/pull-tar.c @@ -153,9 +153,6 @@ int tar_pull_new( .progress_ratelimit = { 100 * USEC_PER_MSEC, 1 }, }; - p->glue->on_finished = pull_job_curl_on_finished; - p->glue->userdata = p; - *ret = TAKE_PTR(p); return 0; diff --git a/src/shared/curl-util.c b/src/shared/curl-util.c index 0a6bdeefe5589..e438ddf61a4d2 100644 --- a/src/shared/curl-util.c +++ b/src/shared/curl-util.c @@ -12,6 +12,7 @@ #include "dlfcn-util.h" #include "fd-util.h" #include "hashmap.h" +#include "set.h" #include "string-util.h" #include "strv.h" #include "time-util.h" @@ -42,6 +43,77 @@ DLSYM_PROTOTYPE(curl_slist_free_all) = NULL; DEFINE_TRIVIAL_CLEANUP_FUNC_FULL_RENAME(CURLM*, sym_curl_multi_cleanup, curl_multi_cleanupp, NULL); +struct CurlGlue { + unsigned n_ref; + sd_event *event; + CURLM *curl; + sd_event_source *timer; + Hashmap *ios; + sd_event_source *defer; + Set *slots; /* CurlSlot* — back-pointer set; floating slots are kept alive here */ +}; + +struct CurlSlot { + unsigned n_ref; + CurlGlue *glue; /* NULL once disconnected (callback fired, cancelled, or glue died) */ + CURL *easy; /* owned; cleared once the easy handle has been freed */ + bool floating; + curl_finished_t callback; + void *userdata; +}; + +static void curl_slot_disconnect(CurlSlot *slot, bool unref) { + assert(slot); + + /* Tear down the slot's connection to the glue: pull the easy handle out of the multi, + * curl_easy_cleanup() it, and remove the slot from the glue's lookup set. Floating + * slots are owned by that set, so on disconnect we drop the implicit ref (when + * unref=true; the recursive call from curl_slot_free passes false to avoid infinite + * recursion). Non-floating slots release the back-ref they held on the glue. + * + * Idempotent: once slot->glue is NULL, subsequent calls are no-ops. */ + + if (!slot->glue) + return; + + CurlGlue *glue = slot->glue; + + if (slot->easy) { + if (glue->curl) + (void) sym_curl_multi_remove_handle(glue->curl, slot->easy); + sym_curl_easy_cleanup(slot->easy); + slot->easy = NULL; + } + + set_remove(glue->slots, slot); + slot->glue = NULL; + + if (!slot->floating) + curl_glue_unref(glue); + else if (unref) + curl_slot_unref(slot); +} + +static CurlSlot* curl_slot_free(CurlSlot *slot) { + if (!slot) + return NULL; + + curl_slot_disconnect(slot, /* unref= */ false); + return mfree(slot); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(CurlSlot, curl_slot, curl_slot_free); + +CURL* curl_slot_get_easy(CurlSlot *slot) { + assert(slot); + return slot->easy; +} + +CurlGlue* curl_slot_get_glue(CurlSlot *slot) { + assert(slot); + return slot->glue; +} + static void curl_glue_check_finished(CurlGlue *g) { int r; @@ -60,8 +132,27 @@ static void curl_glue_check_finished(CurlGlue *g) { if (!msg) return; - if (msg->msg == CURLMSG_DONE && g->on_finished) - g->on_finished(g, msg->easy_handle, msg->data.result); + if (msg->msg == CURLMSG_DONE) { + CURL *easy = msg->easy_handle; + CURLcode code = msg->data.result; + CurlSlot *slot = NULL; + + if (sym_curl_easy_getinfo(easy, CURLINFO_PRIVATE, (char**) &slot) == CURLE_OK && slot) { + /* Pin the slot across the callback: a floating slot's only + * reference is the one held via the glue's slots set, and + * disconnect drops it. */ + curl_slot_ref(slot); + + if (slot->callback) { + r = slot->callback(slot, easy, code, slot->userdata); + if (r < 0) + log_debug_errno(r, "Curl finished callback returned error, ignoring: %m"); + } + + curl_slot_disconnect(slot, /* unref= */ true); + curl_slot_unref(slot); + } + } /* This is a queue, process another item soon, but do so in a later event loop iteration. */ (void) sd_event_source_set_enabled(g->defer, SD_EVENT_ONESHOT); @@ -212,12 +303,22 @@ static int curl_glue_on_defer(sd_event_source *s, void *userdata) { return 0; } -CurlGlue *curl_glue_unref(CurlGlue *g) { +static CurlGlue* curl_glue_free(CurlGlue *g) { sd_event_source *io; + CurlSlot *slot; if (!g) return NULL; + /* Drain any slots still hanging off us. By construction only floating slots can + * be here: connected non-floating slots hold a glue back-ref, so glue's last ref + * couldn't have dropped while one was attached. disconnect(unref=true) does the + * floating slot's free as part of its work. set_steal_first() pops up front so + * forward progress doesn't depend on disconnect's internal set_remove(). */ + while ((slot = set_steal_first(g->slots))) + curl_slot_disconnect(slot, /* unref= */ true); + g->slots = set_free(g->slots); + if (g->curl) sym_curl_multi_cleanup(g->curl); @@ -232,6 +333,8 @@ CurlGlue *curl_glue_unref(CurlGlue *g) { return mfree(g); } +DEFINE_TRIVIAL_REF_UNREF_FUNC(CurlGlue, curl_glue, curl_glue_free); + int curl_glue_new(CurlGlue **glue, sd_event *event) { _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL; _cleanup_(curl_multi_cleanupp) CURLM *c = NULL; @@ -261,6 +364,7 @@ int curl_glue_new(CurlGlue **glue, sd_event *event) { return -ENOMEM; *g = (CurlGlue) { + .n_ref = 1, .event = TAKE_PTR(e), .curl = TAKE_PTR(c), }; @@ -288,7 +392,7 @@ int curl_glue_new(CurlGlue **glue, sd_event *event) { return 0; } -int curl_glue_make(CURL **ret, const char *url, void *userdata) { +int curl_glue_make(CURL **ret, const char *url) { _cleanup_(curl_easy_cleanupp) CURL *c = NULL; const char *useragent; int r; @@ -310,9 +414,6 @@ int curl_glue_make(CURL **ret, const char *url, void *userdata) { if (sym_curl_easy_setopt(c, CURLOPT_URL, url) != CURLE_OK) return -EIO; - if (sym_curl_easy_setopt(c, CURLOPT_PRIVATE, userdata) != CURLE_OK) - return -EIO; - useragent = strjoina(program_invocation_short_name, "/" GIT_VERSION); if (sym_curl_easy_setopt(c, CURLOPT_USERAGENT, useragent) != CURLE_OK) return -EIO; @@ -342,26 +443,61 @@ int curl_glue_make(CURL **ret, const char *url, void *userdata) { return 0; } -int curl_glue_add(CurlGlue *g, CURL *c) { +int curl_glue_perform_async( + CurlGlue *g, + CURL *easy, + curl_finished_t cb, + void *userdata, + CurlSlot **ret_slot) { + + int r; + assert(g); - assert(c); + assert(easy); - if (sym_curl_multi_add_handle(g->curl, c) != CURLM_OK) - return -EIO; + _cleanup_(curl_slot_unrefp) CurlSlot *slot = new(CurlSlot, 1); + if (!slot) + return -ENOMEM; - return 0; -} + *slot = (CurlSlot) { + .n_ref = 1, + .glue = NULL, /* wired up below, after we've committed to the multi */ + .easy = easy, + .floating = !ret_slot, + .callback = cb, + .userdata = userdata, + }; -void curl_glue_remove_and_free(CurlGlue *g, CURL *c) { - assert(g); + r = set_ensure_put(&g->slots, &trivial_hash_ops, slot); + if (r < 0) + return r; + assert(r > 0); - if (!c) - return; + if (sym_curl_multi_add_handle(g->curl, easy) != CURLM_OK) { + set_remove(g->slots, slot); + return -EIO; + } - if (g->curl) - sym_curl_multi_remove_handle(g->curl, c); + /* Stash the slot pointer on the easy handle so curl_glue_check_finished() can recover + * it on completion. Set this only after we've fully committed to the multi, so that + * error paths above don't leave a dangling pointer on the easy handle. */ + if (sym_curl_easy_setopt(easy, CURLOPT_PRIVATE, slot) != CURLE_OK) { + sym_curl_multi_remove_handle(g->curl, easy); + set_remove(g->slots, slot); + return -EIO; + } + + slot->glue = g; + if (!slot->floating) + curl_glue_ref(g); - sym_curl_easy_cleanup(c); + /* Transfer the slot's single reference: to the caller for non-floating slots, or to + * the glue's slot set (implicitly, until disconnect drops it) for floating ones. */ + if (ret_slot) + *ret_slot = slot; + + TAKE_PTR(slot); + return 0; } struct curl_slist *curl_slist_new(const char *first, ...) { diff --git a/src/shared/curl-util.h b/src/shared/curl-util.h index 33ab0a5fb204b..3436188952fbc 100644 --- a/src/shared/curl-util.h +++ b/src/shared/curl-util.h @@ -30,27 +30,42 @@ extern DLSYM_PROTOTYPE(curl_slist_free_all); code == CURLE_OK; \ }) -typedef struct CurlGlue CurlGlue; - -typedef struct CurlGlue { - sd_event *event; - CURLM *curl; - sd_event_source *timer; - Hashmap *ios; - sd_event_source *defer; - - void (*on_finished)(CurlGlue *g, CURL *curl, CURLcode code); - void *userdata; -} CurlGlue; +typedef int (*curl_finished_t)(CurlSlot *slot, CURL *curl, CURLcode code, void *userdata); int curl_glue_new(CurlGlue **glue, sd_event *event); +CurlGlue* curl_glue_ref(CurlGlue *glue); CurlGlue* curl_glue_unref(CurlGlue *glue); DEFINE_TRIVIAL_CLEANUP_FUNC(CurlGlue*, curl_glue_unref); -int curl_glue_make(CURL **ret, const char *url, void *userdata); -int curl_glue_add(CurlGlue *g, CURL *c); -void curl_glue_remove_and_free(CurlGlue *g, CURL *c); +/* Build a CURL easy handle with sane defaults. The caller configures any + * additional options (headers, write callbacks, …) before handing it off to + * curl_glue_perform_async(). */ +int curl_glue_make(CURL **ret, const char *url); + +/* Hand a configured CURL easy handle off to the multi for execution. The slot + * takes ownership of the easy handle: once the slot is released (the callback + * has fired, the caller has dropped its last ref, or the glue is being freed), + * the handle is removed from the multi and freed. + * + * If ret_slot is NULL the slot is allocated as floating: the glue keeps it + * alive until the callback fires or the glue is torn down. Otherwise a + * reference is returned to the caller; releasing that reference cancels the + * call. */ +int curl_glue_perform_async( + CurlGlue *g, + CURL *easy, + curl_finished_t cb, + void *userdata, + CurlSlot **ret_slot); + +CURL* curl_slot_get_easy(CurlSlot *slot); +CurlGlue* curl_slot_get_glue(CurlSlot *slot); + +CurlSlot* curl_slot_ref(CurlSlot *slot); +CurlSlot* curl_slot_unref(CurlSlot *slot); + +DEFINE_TRIVIAL_CLEANUP_FUNC(CurlSlot*, curl_slot_unref); struct curl_slist *curl_slist_new(const char *first, ...) _sentinel_; int curl_header_strdup(const void *contents, size_t sz, const char *field, char **value); diff --git a/src/shared/shared-forward.h b/src/shared/shared-forward.h index e850d8982bd30..751a6f71dc359 100644 --- a/src/shared/shared-forward.h +++ b/src/shared/shared-forward.h @@ -57,6 +57,8 @@ typedef struct Condition Condition; typedef struct ConfigSection ConfigSection; typedef struct ConfigTableItem ConfigTableItem; typedef struct CPUSet CPUSet; +typedef struct CurlGlue CurlGlue; +typedef struct CurlSlot CurlSlot; typedef struct DissectedImage DissectedImage; typedef struct DnsAnswer DnsAnswer; typedef struct DnsPacket DnsPacket; diff --git a/src/test/meson.build b/src/test/meson.build index f4288119f94ba..ba890e7341017 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -348,6 +348,10 @@ executables += [ 'sources' : files('test-kexec.c'), 'link_with' : [libshared], }, + test_template + { + 'sources' : files('test-curl-util.c'), + 'conditions' : ['HAVE_LIBCURL'], + }, test_template + { 'sources' : files('test-libcrypt-util.c'), 'conditions' : ['HAVE_LIBCRYPT'], diff --git a/src/test/test-curl-util.c b/src/test/test-curl-util.c new file mode 100644 index 0000000000000..fb3d278200671 --- /dev/null +++ b/src/test/test-curl-util.c @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "sd-event.h" + +#include "alloc-util.h" +#include "curl-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "io-util.h" +#include "string-util.h" +#include "tests.h" +#include "tmpfile-util.h" + +#define ASSERT_CURL_OK(expr) \ + ({ \ + CURLcode _code = (expr); \ + if (_code != CURLE_OK) \ + log_test_failed("Expected \"%s\" to be CURLE_OK, but got %d/%s",\ + #expr, (int) _code, sym_curl_easy_strerror(_code)); \ + }) + +/* Per-request context: the write callback appends bytes to ->body, and the + * on_finished callback stashes the CURLcode plus a "fired" flag. Each test + * uses one or more of these and cleans them up via context_done(). */ +typedef struct Context { + sd_event *event; + char *body; + size_t body_len; + bool finished; + CURLcode result; +} Context; + +static void context_done(Context *f) { + f->event = sd_event_unref(f->event); + f->body = mfree(f->body); +} + +static size_t write_callback(void *contents, size_t size, size_t nmemb, void *userdata) { + Context *f = ASSERT_PTR(userdata); + size_t sz = size * nmemb; + + if (!GREEDY_REALLOC(f->body, f->body_len + sz + 1)) + return 0; + memcpy(f->body + f->body_len, contents, sz); + f->body[f->body_len + sz] = 0; + f->body_len += sz; + return sz; +} + +static int on_finished(CurlSlot *slot, CURL *curl, CURLcode code, void *userdata) { + Context *f = ASSERT_PTR(userdata); + + f->finished = true; + f->result = code; + + return sd_event_exit(f->event, 0); +} + +static int make_tmp_url(char **ret_path, char **ret_url, const char *body) { + const char *t; + ASSERT_OK(tmp_dir(&t)); + + _cleanup_(unlink_and_freep) char *path = ASSERT_NOT_NULL(strjoin(t, "/test-curl-util.XXXXXX")); + + _cleanup_close_ int fd = ASSERT_OK(mkostemp_safe(path)); + ASSERT_OK(loop_write(fd, body, strlen(body))); + + char *url = ASSERT_NOT_NULL(strjoin("file://", path)); + + *ret_url = url; + *ret_path = TAKE_PTR(path); + return 0; +} + +static int build_easy(const char *url, Context *f, CURL **ret) { + _cleanup_(curl_easy_cleanupp) CURL *easy = NULL; + ASSERT_OK(curl_glue_make(&easy, url)); + + ASSERT_CURL_OK(sym_curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, write_callback)); + ASSERT_CURL_OK(sym_curl_easy_setopt(easy, CURLOPT_WRITEDATA, f)); + + *ret = TAKE_PTR(easy); + return 0; +} + +TEST(curl_glue_lifecycle) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + ASSERT_OK(sd_event_default(&event)); + + _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL; + ASSERT_OK(curl_glue_new(&g, event)); + + /* ref/unref roundtrip */ + ASSERT_PTR_EQ(curl_glue_ref(g), g); + ASSERT_NULL(curl_glue_unref(g)); +} + +TEST(curl_glue_make) { + _cleanup_(curl_easy_cleanupp) CURL *easy = NULL; + ASSERT_OK(curl_glue_make(&easy, "file:///dev/null")); + ASSERT_NOT_NULL(easy); +} + +TEST(curl_perform_floating) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + ASSERT_OK(sd_event_default(&event)); + + _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL; + ASSERT_OK(curl_glue_new(&g, event)); + + _cleanup_(unlink_and_freep) char *path = NULL; + _cleanup_free_ char *url = NULL; + ASSERT_OK(make_tmp_url(&path, &url, "hello world")); + + _cleanup_(context_done) Context f = { .event = sd_event_ref(event) }; + + _cleanup_(curl_easy_cleanupp) CURL *easy = NULL; + ASSERT_OK(build_easy(url, &f, &easy)); + + /* Floating: pass NULL for ret_slot. The glue owns the slot until completion. */ + ASSERT_OK(curl_glue_perform_async(g, easy, on_finished, &f, /* ret_slot= */ NULL)); + TAKE_PTR(easy); + + ASSERT_OK(sd_event_loop(event)); + + ASSERT_TRUE(f.finished); + ASSERT_CURL_OK(f.result); + ASSERT_STREQ(f.body, "hello world"); +} + +TEST(curl_perform_slot) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + ASSERT_OK(sd_event_default(&event)); + + _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL; + ASSERT_OK(curl_glue_new(&g, event)); + + _cleanup_(unlink_and_freep) char *path = NULL; + _cleanup_free_ char *url = NULL; + ASSERT_OK(make_tmp_url(&path, &url, "slot test")); + + _cleanup_(context_done) Context f = { .event = sd_event_ref(event) }; + + _cleanup_(curl_easy_cleanupp) CURL *easy = NULL; + ASSERT_OK(build_easy(url, &f, &easy)); + + _cleanup_(curl_slot_unrefp) CurlSlot *slot = NULL; + ASSERT_OK(curl_glue_perform_async(g, easy, on_finished, &f, &slot)); + TAKE_PTR(easy); + + ASSERT_NOT_NULL(slot); + ASSERT_NOT_NULL(curl_slot_get_easy(slot)); + ASSERT_PTR_EQ(curl_slot_get_glue(slot), g); + + ASSERT_OK(sd_event_loop(event)); + + ASSERT_TRUE(f.finished); + ASSERT_CURL_OK(f.result); + ASSERT_STREQ(f.body, "slot test"); + + /* After completion, disconnect has cleared the slot's back-pointers; the slot itself + * is still alive because we hold a ref. Releasing it must be a clean no-op. */ + ASSERT_NULL(curl_slot_get_easy(slot)); + ASSERT_NULL(curl_slot_get_glue(slot)); +} + +TEST(curl_perform_cancel) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + ASSERT_OK(sd_event_default(&event)); + + _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL; + ASSERT_OK(curl_glue_new(&g, event)); + + _cleanup_(unlink_and_freep) char *path = NULL; + _cleanup_free_ char *url = NULL; + ASSERT_OK(make_tmp_url(&path, &url, "payload")); + + /* Two requests: cancelled is unref'd before we run the loop; sentinel runs to + * completion and exits the loop. After the loop returns we know the dispatcher had + * an opportunity to fire any pending completion — so cancelled.finished staying false + * means our cancel actually prevented the callback from running, not just outraced it. */ + _cleanup_(context_done) Context cancelled = { .event = sd_event_ref(event) }; + _cleanup_(context_done) Context sentinel = { .event = sd_event_ref(event) }; + + _cleanup_(curl_easy_cleanupp) CURL *easy_cancelled = NULL, *easy_sentinel = NULL; + ASSERT_OK(build_easy(url, &cancelled, &easy_cancelled)); + ASSERT_OK(build_easy(url, &sentinel, &easy_sentinel)); + + _cleanup_(curl_slot_unrefp) CurlSlot *slot = NULL; + ASSERT_OK(curl_glue_perform_async(g, easy_cancelled, on_finished, &cancelled, &slot)); + TAKE_PTR(easy_cancelled); + + /* Cancel by dropping our only reference: removes the easy handle from the multi and + * cleans it up. The callback must not fire afterwards. */ + slot = curl_slot_unref(slot); + + /* The sentinel runs as floating; its callback will exit the loop on completion. */ + ASSERT_OK(curl_glue_perform_async(g, easy_sentinel, on_finished, &sentinel, /* ret_slot= */ NULL)); + TAKE_PTR(easy_sentinel); + + ASSERT_OK(sd_event_loop(event)); + + ASSERT_TRUE(sentinel.finished); + ASSERT_FALSE(cancelled.finished); +} + +typedef struct ConcurrentReq { + Context ctx; + const char *expected; + unsigned *remaining; +} ConcurrentReq; + +static int concurrent_on_finished(CurlSlot *slot, CURL *curl, CURLcode code, void *userdata) { + ConcurrentReq *cr = ASSERT_PTR(userdata); + + cr->ctx.finished = true; + cr->ctx.result = code; + + (*cr->remaining)--; + if (*cr->remaining == 0) + return sd_event_exit(cr->ctx.event, 0); + return 0; +} + +TEST(curl_concurrent) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + ASSERT_OK(sd_event_default(&event)); + + _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL; + ASSERT_OK(curl_glue_new(&g, event)); + + _cleanup_(unlink_and_freep) char *path_a = NULL, *path_b = NULL, *path_c = NULL; + _cleanup_free_ char *url_a = NULL, *url_b = NULL, *url_c = NULL; + ASSERT_OK(make_tmp_url(&path_a, &url_a, "alpha")); + ASSERT_OK(make_tmp_url(&path_b, &url_b, "bravo")); + ASSERT_OK(make_tmp_url(&path_c, &url_c, "charlie")); + + unsigned remaining = 3; + ConcurrentReq reqs[3] = { + { .ctx = { .event = sd_event_ref(event) }, .expected = "alpha", .remaining = &remaining }, + { .ctx = { .event = sd_event_ref(event) }, .expected = "bravo", .remaining = &remaining }, + { .ctx = { .event = sd_event_ref(event) }, .expected = "charlie", .remaining = &remaining }, + }; + + _cleanup_(curl_easy_cleanupp) CURL *ea = NULL, *eb = NULL, *ec = NULL; + ASSERT_OK(build_easy(url_a, &reqs[0].ctx, &ea)); + ASSERT_OK(build_easy(url_b, &reqs[1].ctx, &eb)); + ASSERT_OK(build_easy(url_c, &reqs[2].ctx, &ec)); + + /* All three fire as floating slots; the only way the loop exits is through the + * remaining-counter hitting zero, which means every callback fired with the right + * userdata routed to its respective body. */ + ASSERT_OK(curl_glue_perform_async(g, ea, concurrent_on_finished, &reqs[0], NULL)); + TAKE_PTR(ea); + ASSERT_OK(curl_glue_perform_async(g, eb, concurrent_on_finished, &reqs[1], NULL)); + TAKE_PTR(eb); + ASSERT_OK(curl_glue_perform_async(g, ec, concurrent_on_finished, &reqs[2], NULL)); + TAKE_PTR(ec); + + ASSERT_OK(sd_event_loop(event)); + + ASSERT_EQ(remaining, 0u); + + FOREACH_ARRAY(r, reqs, ELEMENTSOF(reqs)) { + ASSERT_TRUE(r->ctx.finished); + ASSERT_CURL_OK(r->ctx.result); + ASSERT_STREQ(r->ctx.body, r->expected); + context_done(&r->ctx); + } +} + +static int intro(void) { + if (dlopen_curl(LOG_DEBUG) < 0) + return log_tests_skipped("libcurl not available"); + return EXIT_SUCCESS; +} + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro); From b2c6cc6a8e0c11bb59b46999ab4c865dd448ccba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 8 May 2026 15:47:18 +0200 Subject: [PATCH 193/242] userdbctl: actually implement option parsing stop after --chain The basic idea is that --chain should stop option parsing. But previously this didn't work, so --chain could be specified anywhere in the command line. To maintain with compatibility with that, allow --chain to be specified anywhere until the first positional arg or option in the command string. This allows options to be passed in the expected fashion: userdbctl --chain ssh-authorized-keys user cmd --opt1 --opt2 userdbctl --chain ssh-authorized-keys user -- cmd --opt1 --opt2 but also allows the invocations which worked previously: userdbctl ssh-authorized-keys user --chain cmd userdbctl ssh-authorized-keys user cmd --chain Fixes 8072a7e6a9eaf2de120797dd16c5e0baea606219. The error messages are extended a bit. "binary path" is misleading: we support all kinds of executables, not only compiled programs. --- src/userdb/userdbctl.c | 34 ++++++++++++++++++++++++++++------ test/units/TEST-46-HOMED.sh | 9 +++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/userdb/userdbctl.c b/src/userdb/userdbctl.c index 60a6ff051a3b8..35a17c50217da 100644 --- a/src/userdb/userdbctl.c +++ b/src/userdb/userdbctl.c @@ -1148,11 +1148,13 @@ static int verb_ssh_authorized_keys(int argc, char *argv[], uintptr_t _data, voi /* Make similar restrictions on the chain command as OpenSSH itself makes on the primary command. */ if (!path_is_absolute(argv[2])) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Chain invocation of ssh-authorized-keys commands requires an absolute binary path argument."); + "Chain invocation of ssh-authorized-keys commands requires an absolute program path (got '%s').", + argv[2]); if (!path_is_normalized(argv[2])) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "Chain invocation of ssh-authorized-keys commands requires an normalized binary path argument."); + "Chain invocation of ssh-authorized-keys commands requires a normalized program path (got '%s').", + argv[2]); chain_invocation = argv + 2; } else { @@ -1628,9 +1630,10 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { arg_services = l; } - OptionParser opts = { argc, argv }; + OptionParser opts = { argc, argv, OPTION_PARSER_RETURN_POSITIONAL_ARGS }; + _cleanup_strv_free_ char **args = NULL; - FOREACH_OPTION_OR_RETURN(c, &opts) + FOREACH_OPTION_OR_RETURN(c, &opts) { switch (c) { OPTION_COMMON_HELP: @@ -1733,6 +1736,12 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { arg_chain = true; break; + OPTION_POSITIONAL: + r = strv_extend(&args, opts.arg); + if (r < 0) + return log_oom(); + break; + OPTION_LONG("uid-min", "ID", "Filter by minimum UID/GID (default 0)"): r = parse_uid(opts.arg, &arg_uid_min); if (r < 0) @@ -1811,6 +1820,15 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { } } + /* When --chain was seen, stop parsing switches after the second positional argument: + * [OPTS0…, VERB, OPTS1…, USERNAME, OPTS2…, COMMAND, OPTS3…] + * We shall parse OPTS0, OPTS1, OPTS2, but OPTS3 are for COMMAND. + * --chain can be anywhere in OPTS0, OPTS1, OPTS2, or first in OPTS3. + */ + if (arg_chain && strv_length(args) >= 3) + opts.state = OPTION_PARSER_DONE; + } + if (arg_uid_min > arg_uid_max) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Minimum UID/GID " UID_FMT " is above maximum UID/GID " UID_FMT ", refusing.", @@ -1823,12 +1841,16 @@ static int parse_argv(int argc, char *argv[], char ***remaining_args) { if (arg_from_file) arg_boundaries = false; - *remaining_args = option_parser_get_args(&opts); + /* We gathered some positional args in 'args' ourselves. Append the remaining ones. */ + if (strv_extend_strv(&args, option_parser_get_args(&opts), /* filter_duplicates= */ false) < 0) + return log_oom(); + + *remaining_args = TAKE_PTR(args); return 1; } static int run(int argc, char *argv[]) { - char **args = NULL; + _cleanup_strv_free_ char **args = NULL; int r; log_setup(); diff --git a/test/units/TEST-46-HOMED.sh b/test/units/TEST-46-HOMED.sh index 4b81799ef3dea..5afa42d73968e 100755 --- a/test/units/TEST-46-HOMED.sh +++ b/test/units/TEST-46-HOMED.sh @@ -586,6 +586,15 @@ EOF (! userdbctl ssh-authorized-keys dropin-user --chain '') (! SYSTEMD_LOG_LEVEL=debug userdbctl ssh-authorized-keys dropin-user --chain /usr/bin/false) + # Check that invocations with --chain work as expected + userdbctl ssh-authorized-keys --chain dropin-user /bin/echo --asdf | grep -e --asdf + userdbctl ssh-authorized-keys dropin-user --chain /bin/echo --asdf | grep -e --asdf + userdbctl ssh-authorized-keys dropin-user /bin/echo --chain --asdf | grep -e --asdf + userdbctl ssh-authorized-keys --chain dropin-user -- /bin/echo --asdf | grep -e --asdf + userdbctl ssh-authorized-keys --chain -- dropin-user /bin/echo --asdf | grep -e --asdf + userdbctl --chain -- ssh-authorized-keys dropin-user /bin/echo --asdf | grep -e --asdf + (! userdbctl --chain -- ssh-authorized-keys dropin-user -- /bin/echo --asdf) + (! userdbctl '') for opt in json multiplexer output synthesize with-dropin with-nss with-varlink; do (! userdbctl "--$opt=''") From 480449db851f1ec8875ccbf992357d418dd75779 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Thu, 7 May 2026 12:59:18 +0200 Subject: [PATCH 194/242] machine: require normalized paths in Copy* D-Bus/Varlink methods Also, do the same for the Varlink BindMount method, since its D-Bus counterpart already does it. --- src/machine/machine-dbus.c | 8 ++++---- src/machine/machine-varlink.c | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index 28f64b3c9b683..624ec4848ae08 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -593,13 +593,13 @@ int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_erro copy_flags |= COPY_REPLACE; } - if (!path_is_absolute(src)) - return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute."); + if (!path_is_absolute(src) || !path_is_normalized(src)) + return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and normalized."); if (isempty(dest)) dest = src; - else if (!path_is_absolute(dest)) - return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute."); + else if (!path_is_absolute(dest) || !path_is_normalized(dest)) + return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute and normalized."); if (manager->runtime_scope != RUNTIME_SCOPE_USER) { const char *details[] = { diff --git a/src/machine/machine-varlink.c b/src/machine/machine-varlink.c index fcdeeb7ae8b10..d9524c75e1747 100644 --- a/src/machine/machine-varlink.c +++ b/src/machine/machine-varlink.c @@ -821,10 +821,10 @@ static void machine_mount_paramaters_done(MachineMountParameters *p) { int vl_method_bind_mount(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { static const sd_json_dispatch_field dispatch_table[] = { VARLINK_DISPATCH_MACHINE_LOOKUP_FIELDS(MachineMountParameters), - { "source", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineMountParameters, src), SD_JSON_MANDATORY }, - { "destination", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineMountParameters, dest), 0 }, - { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MachineMountParameters, read_only), 0 }, - { "mkdir", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MachineMountParameters, mkdir), 0 }, + { "source", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineMountParameters, src), SD_JSON_MANDATORY|SD_JSON_STRICT }, + { "destination", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineMountParameters, dest), SD_JSON_STRICT }, + { "readOnly", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MachineMountParameters, read_only), 0 }, + { "mkdir", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MachineMountParameters, mkdir), 0 }, VARLINK_DISPATCH_POLKIT_FIELD, {} }; @@ -844,7 +844,7 @@ int vl_method_bind_mount(sd_varlink *link, sd_json_variant *parameters, sd_varli if (r != 0) return r; - /* There is no need for extra validation since json_dispatch_const_path() does path_is_valid() and path_is_absolute(). */ + /* There is no need for extra validation since json_dispatch_const_path() with SD_JSON_STRICT does path_is_normalized() and path_is_absolute(). */ const char *dest = p.dest ?: p.src; Machine *machine; @@ -931,9 +931,9 @@ static int copy_done(Operation *operation, int ret, sd_bus_error *error) { int vl_method_copy_internal(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata, bool copy_from) { static const sd_json_dispatch_field dispatch_table[] = { VARLINK_DISPATCH_MACHINE_LOOKUP_FIELDS(MachineCopyParameters), - { "source", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineCopyParameters, src), SD_JSON_MANDATORY }, - { "destination", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineCopyParameters, dest), 0 }, - { "replace", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MachineCopyParameters, replace), 0 }, + { "source", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineCopyParameters, src), SD_JSON_MANDATORY|SD_JSON_STRICT }, + { "destination", SD_JSON_VARIANT_STRING, json_dispatch_const_path, offsetof(MachineCopyParameters, dest), SD_JSON_STRICT }, + { "replace", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(MachineCopyParameters, replace), 0 }, VARLINK_DISPATCH_POLKIT_FIELD, {} }; @@ -954,7 +954,7 @@ int vl_method_copy_internal(sd_varlink *link, sd_json_variant *parameters, sd_va if (r != 0) return r; - /* There is no need for extra validation since json_dispatch_const_path() does path_is_valid() and path_is_absolute(). */ + /* There is no need for extra validation since json_dispatch_const_path() with SD_JSON_STRICT does path_is_normalized() and path_is_absolute(). */ const char *dest = p.dest ?: p.src; const char *container_path = copy_from ? p.src : dest; const char *host_path = copy_from ? dest : p.src; From d0c912899a33436d6676b2564eb1ac506f378571 Mon Sep 17 00:00:00 2001 From: Ivan Kruglov Date: Thu, 7 May 2026 02:16:51 -0700 Subject: [PATCH 195/242] test: add missing varlink IDL enum tests for Job and ServiceType PR #41583 (io.systemd.Unit.StartTransient) introduced several new varlink IDL enum types without corresponding enum consistency tests: - JobType, JobState, JobResult in the new io.systemd.Job interface - ServiceType in the Unit interface's ServiceContext Add a new test-varlink-idl-job test file covering all three Job enums, and add ServiceType coverage to the existing test-varlink-idl-unit test. Export vl_type_ServiceType (was static) so it can be referenced from the test. Co-developed-by: Claude Opus 4.6 --- src/shared/varlink-io.systemd.Unit.c | 2 +- src/shared/varlink-io.systemd.Unit.h | 1 + src/test/meson.build | 5 ++++- src/test/test-varlink-idl-job.c | 14 ++++++++++++++ src/test/test-varlink-idl-unit.c | 4 ++++ 5 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 src/test/test-varlink-idl-job.c diff --git a/src/shared/varlink-io.systemd.Unit.c b/src/shared/varlink-io.systemd.Unit.c index 2b1f0f2b1058b..be5c942fcc717 100644 --- a/src/shared/varlink-io.systemd.Unit.c +++ b/src/shared/varlink-io.systemd.Unit.c @@ -976,7 +976,7 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( /* Service-specific types */ /* Keep in sync with service_type_table[] in src/core/service.c */ -static SD_VARLINK_DEFINE_ENUM_TYPE( +SD_VARLINK_DEFINE_ENUM_TYPE( ServiceType, SD_VARLINK_DEFINE_ENUM_VALUE(simple), SD_VARLINK_DEFINE_ENUM_VALUE(exec), diff --git a/src/shared/varlink-io.systemd.Unit.h b/src/shared/varlink-io.systemd.Unit.h index a39407133844c..f12ac60701ee6 100644 --- a/src/shared/varlink-io.systemd.Unit.h +++ b/src/shared/varlink-io.systemd.Unit.h @@ -32,3 +32,4 @@ extern const sd_varlink_symbol vl_type_AutomountResult; extern const sd_varlink_symbol vl_type_MountResult; extern const sd_varlink_symbol vl_type_CollectMode; extern const sd_varlink_symbol vl_type_JobMode; +extern const sd_varlink_symbol vl_type_ServiceType; diff --git a/src/test/meson.build b/src/test/meson.build index f4288119f94ba..828e309c19ef4 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -491,11 +491,14 @@ executables += [ 'conditions' : ['ENABLE_UTMP'], }, core_test_template + { - 'sources' : files('test-varlink-idl-unit.c'), + 'sources' : files('test-varlink-idl-job.c'), }, core_test_template + { 'sources' : files('test-varlink-idl-manager.c'), }, + core_test_template + { + 'sources' : files('test-varlink-idl-unit.c'), + }, test_template + { 'sources' : files('test-varlink-idl-machine.c'), 'objects' : ['systemd-machined'], diff --git a/src/test/test-varlink-idl-job.c b/src/test/test-varlink-idl-job.c new file mode 100644 index 0000000000000..23c75d573b50f --- /dev/null +++ b/src/test/test-varlink-idl-job.c @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "job.h" +#include "tests.h" +#include "test-varlink-idl-util.h" +#include "varlink-io.systemd.Job.h" + +TEST(job_enums_idl) { + TEST_IDL_ENUM(JobType, job_type, vl_type_JobType); + TEST_IDL_ENUM(JobState, job_state, vl_type_JobState); + TEST_IDL_ENUM(JobResult, job_result, vl_type_JobResult); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/test/test-varlink-idl-unit.c b/src/test/test-varlink-idl-unit.c index 28b49a0659258..2469859411931 100644 --- a/src/test/test-varlink-idl-unit.c +++ b/src/test/test-varlink-idl-unit.c @@ -7,6 +7,7 @@ #include "mount.h" #include "numa-util.h" #include "process-util.h" +#include "service.h" #include "tests.h" #include "test-varlink-idl-util.h" #include "unit.h" @@ -59,6 +60,9 @@ TEST(unit_enums_idl) { /* MountRuntime enums */ TEST_IDL_ENUM(MountResult, mount_result, vl_type_MountResult); + /* ServiceContext enums */ + TEST_IDL_ENUM(ServiceType, service_type, vl_type_ServiceType); + /* UnitContext enums */ TEST_IDL_ENUM(CollectMode, collect_mode, vl_type_CollectMode); TEST_IDL_ENUM(EmergencyAction, emergency_action, vl_type_EmergencyAction); From c0cda5d91c8a6e08839ee004044c0bdea2faafa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 8 May 2026 17:25:41 +0200 Subject: [PATCH 196/242] userdbctl: fix erroneous errno Fixes 1604937f83d3154fb1c3b5ef053f7fccd0825ce6. --- src/userdb/userdbctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/userdb/userdbctl.c b/src/userdb/userdbctl.c index 35a17c50217da..dd6fc0f5c12f1 100644 --- a/src/userdb/userdbctl.c +++ b/src/userdb/userdbctl.c @@ -1866,7 +1866,8 @@ static int run(int argc, char *argv[]) { if (!e) return log_oom(); - if (setenv("SYSTEMD_ONLY_USERDB", e, true) < 0) + r = RET_NERRNO(setenv("SYSTEMD_ONLY_USERDB", e, true)); + if (r < 0) return log_error_errno(r, "Failed to set $SYSTEMD_ONLY_USERDB: %m"); log_info("Enabled services: %s", e); From 7af304d601e63b892bd965e5d0c7536d6ab31621 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Fri, 8 May 2026 18:02:40 +0100 Subject: [PATCH 197/242] test: make TEST-07-PID1.user-namespace-path more robust The test occasionally fails because lsns returns empty output for the transient unit, even though the process is running. e.g.: [ 1843.556046] TEST-07-PID1.sh[8560]: + systemd-run --unit=newservice --property=Type=exec --property=UserNamespacePath=/proc/8608/ns/user --property=NetworkNamespacePath=/proc/8608/ns/net sleep 3600 [ 1844.205927] TEST-07-PID1.sh[8616]: ++ systemctl show newservice -p MainPID [ 1844.221425] TEST-07-PID1.sh[8618]: ++ lsns -p 8608 -o NS -t net -n [ 1844.229653] TEST-07-PID1.sh[8619]: ++ lsns -p 8614 -o NS -t net -n [ 1844.235563] TEST-07-PID1.sh[8620]: FAIL: expected: '' actual: '4026532522' This could be a race, so switch to Type=notify to try and make it more robust. --- test/units/TEST-07-PID1.user-namespace-path.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/units/TEST-07-PID1.user-namespace-path.sh b/test/units/TEST-07-PID1.user-namespace-path.sh index fda83a9566f80..f868b0ce6804d 100755 --- a/test/units/TEST-07-PID1.user-namespace-path.sh +++ b/test/units/TEST-07-PID1.user-namespace-path.sh @@ -7,10 +7,10 @@ set -o pipefail . "$(dirname "$0")"/util.sh # Only reuse the user namespace -systemd-run --unit=oldservice --property=Type=exec --property=PrivateUsers=true sleep 3600 +systemd-run --unit=oldservice --property=Type=notify --property=NotifyAccess=all --property=PrivateUsers=true bash -c 'systemd-notify --ready; exec sleep 3600' OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}') -systemd-run --unit=newservice --property=Type=exec --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=PrivateNetwork=true sleep 3600 +systemd-run --unit=newservice --property=Type=notify --property=NotifyAccess=all --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=PrivateNetwork=true bash -c 'systemd-notify --ready; exec sleep 3600' NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}') assert_neq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)" @@ -19,10 +19,10 @@ assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS - systemctl stop oldservice newservice # Reuse the user and network namespaces -systemd-run --unit=oldservice --property=Type=exec --property=PrivateUsers=true --property=PrivateNetwork=true sleep 3600 +systemd-run --unit=oldservice --property=Type=notify --property=NotifyAccess=all --property=PrivateUsers=true --property=PrivateNetwork=true bash -c 'systemd-notify --ready; exec sleep 3600' OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}') -systemd-run --unit=newservice --property=Type=exec --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=NetworkNamespacePath=/proc/"$OLD_PID"/ns/net sleep 3600 +systemd-run --unit=newservice --property=Type=notify --property=NotifyAccess=all --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=NetworkNamespacePath=/proc/"$OLD_PID"/ns/net bash -c 'systemd-notify --ready; exec sleep 3600' NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}') assert_eq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)" @@ -31,10 +31,10 @@ assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS - systemctl stop oldservice newservice # Delegate the network namespace -systemd-run --unit=oldservice --property=Type=exec --property=PrivateUsers=true sleep 3600 +systemd-run --unit=oldservice --property=Type=notify --property=NotifyAccess=all --property=PrivateUsers=true bash -c 'systemd-notify --ready; exec sleep 3600' OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}') -systemd-run --unit=newservice --property=Type=exec --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=DelegateNamespaces=net --property=PrivateNetwork=true sleep 3600 +systemd-run --unit=newservice --property=Type=notify --property=NotifyAccess=all --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=DelegateNamespaces=net --property=PrivateNetwork=true bash -c 'systemd-notify --ready; exec sleep 3600' NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}') assert_neq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)" From 9bbadefe432ab6e7c2778631349e307bde5bef22 Mon Sep 17 00:00:00 2001 From: albertescanes <225928304+albertescanes@users.noreply.github.com> Date: Fri, 8 May 2026 15:43:34 +0200 Subject: [PATCH 198/242] man: update Fedora image name in vmspawn example Update the Fedora Cloud image name to the current one and use importctl instead of machinectl. --- man/systemd-vmspawn.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml index b23c66514221d..d75993846f754 100644 --- a/man/systemd-vmspawn.xml +++ b/man/systemd-vmspawn.xml @@ -852,16 +852,16 @@ $ systemd-vmspawn --image=image.raw - Import and run a Fedora &fedora_latest_version; Cloud image using machinectl + Import and run a Fedora &fedora_latest_version; Cloud image using importctl $ curl -L \ - -O https://download.fedoraproject.org/pub/fedora/linux/releases/&fedora_latest_version;/Cloud/x86_64/images/Fedora-Cloud-Base-&fedora_latest_version;-&fedora_cloud_release;.x86_64.raw.xz \ + -O https://download.fedoraproject.org/pub/fedora/linux/releases/&fedora_latest_version;/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-&fedora_latest_version;-&fedora_cloud_release;.x86_64.qcow2 \ -O https://download.fedoraproject.org/pub/fedora/linux/releases/&fedora_latest_version;/Cloud/x86_64/images/Fedora-Cloud-&fedora_latest_version;-&fedora_cloud_release;-x86_64-CHECKSUM \ -O https://fedoraproject.org/fedora.gpg $ gpgv --keyring ./fedora.gpg Fedora-Cloud-&fedora_latest_version;-&fedora_cloud_release;-x86_64-CHECKSUM $ sha256sum -c Fedora-Cloud-&fedora_latest_version;-&fedora_cloud_release;-x86_64-CHECKSUM -# machinectl import-raw Fedora-Cloud-Base-&fedora_latest_version;-&fedora_cloud_release;.x86_64.raw.xz fedora-&fedora_latest_version;-cloud +# importctl import-raw -m Fedora-Cloud-Base-Generic-&fedora_latest_version;-&fedora_cloud_release;.x86_64.qcow2 fedora-&fedora_latest_version;-cloud # systemd-vmspawn -M fedora-&fedora_latest_version;-cloud From 73fd578f8667ad4cd4a71eb31ca144aa29e36656 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 8 May 2026 21:28:36 +0200 Subject: [PATCH 199/242] mkosi: drop libucontext again Turns out it's possible to implement fibers without unnecessary system calls and without ucontext.h so there's no need for libucontext anymore, so drop it from the package list. --- mkosi/mkosi.conf.d/arch/mkosi.conf | 1 - mkosi/mkosi.conf.d/centos-fedora/mkosi.conf | 1 - mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf | 1 - mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf | 1 - mkosi/mkosi.tools.conf/mkosi.conf.d/arch.conf | 1 - mkosi/mkosi.tools.conf/mkosi.conf.d/centos-fedora.conf | 1 - 6 files changed, 6 deletions(-) diff --git a/mkosi/mkosi.conf.d/arch/mkosi.conf b/mkosi/mkosi.conf.d/arch/mkosi.conf index d3c284a2f4bc4..f3503b3789381 100644 --- a/mkosi/mkosi.conf.d/arch/mkosi.conf +++ b/mkosi/mkosi.conf.d/arch/mkosi.conf @@ -29,7 +29,6 @@ Packages= iproute iputils knot - libucontext liburing linux man-db diff --git a/mkosi/mkosi.conf.d/centos-fedora/mkosi.conf b/mkosi/mkosi.conf.d/centos-fedora/mkosi.conf index 4bf316eb89201..925078fbb76a4 100644 --- a/mkosi/mkosi.conf.d/centos-fedora/mkosi.conf +++ b/mkosi/mkosi.conf.d/centos-fedora/mkosi.conf @@ -44,7 +44,6 @@ Packages= knot libcap-ng-utils libmicrohttpd - libucontext liburing man-db nmap-ncat diff --git a/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf b/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf index 59c7ed6cae9ab..d7a79d11c1051 100644 --- a/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf +++ b/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf @@ -10,4 +10,3 @@ Packages= diffutils erofs-utils git - libucontext diff --git a/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf b/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf index 35bc886f40c11..4d0ca8917d83f 100644 --- a/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf +++ b/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf @@ -12,6 +12,5 @@ Packages= git-core libasan libubsan - libucontext-devel rpm-build which diff --git a/mkosi/mkosi.tools.conf/mkosi.conf.d/arch.conf b/mkosi/mkosi.tools.conf/mkosi.conf.d/arch.conf index 2fcf333324afb..99592efc01960 100644 --- a/mkosi/mkosi.tools.conf/mkosi.conf.d/arch.conf +++ b/mkosi/mkosi.tools.conf/mkosi.conf.d/arch.conf @@ -10,7 +10,6 @@ Packages= clang-tools-extra github-cli lcov - libucontext liburing musl mypy diff --git a/mkosi/mkosi.tools.conf/mkosi.conf.d/centos-fedora.conf b/mkosi/mkosi.tools.conf/mkosi.conf.d/centos-fedora.conf index 06ff1b66258f2..2715d1494e488 100644 --- a/mkosi/mkosi.tools.conf/mkosi.conf.d/centos-fedora.conf +++ b/mkosi/mkosi.tools.conf/mkosi.conf.d/centos-fedora.conf @@ -12,6 +12,5 @@ Packages= rpm-build libasan libubsan - libucontext-devel liburing-devel compiler-rt From ba932407580677225a3d126332deb5087f1e1aa8 Mon Sep 17 00:00:00 2001 From: Todd Zullinger Date: Fri, 8 May 2026 19:12:43 -0400 Subject: [PATCH 200/242] man/tmpfiles: fix missing 'as' in %t details column This was missing when the details were added in 5a8575ef013 (tmpfiles: also add %t/%S/%C/%L specifiers, 2017-11-23). --- man/tmpfiles.d.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/tmpfiles.d.xml b/man/tmpfiles.d.xml index 39fcad850d71e..8a908c412519f 100644 --- a/man/tmpfiles.d.xml +++ b/man/tmpfiles.d.xml @@ -776,7 +776,7 @@ d /tmp/foo/bar - - - bmA:1h - %t System or user runtime directory - In mode, this is the same $XDG_RUNTIME_DIR, and /run/ otherwise. + In mode, this is the same as $XDG_RUNTIME_DIR, and /run/ otherwise. From 89148192f8fb6fef31616af458a942ad0fdd5acc Mon Sep 17 00:00:00 2001 From: Valentin David Date: Sat, 28 Mar 2026 19:48:36 +0100 Subject: [PATCH 201/242] tmpfiles: Add commands for file capabilites --- man/tmpfiles.d.xml | 42 +++- src/tmpfiles/tmpfiles.c | 314 ++++++++++++++++++++++++++++++ test/units/TEST-22-TMPFILES.22.sh | 63 ++++++ 3 files changed, 416 insertions(+), 3 deletions(-) create mode 100755 test/units/TEST-22-TMPFILES.22.sh diff --git a/man/tmpfiles.d.xml b/man/tmpfiles.d.xml index 8a908c412519f..e5f694611a60d 100644 --- a/man/tmpfiles.d.xml +++ b/man/tmpfiles.d.xml @@ -76,6 +76,10 @@ a /path-or-glob/to/set/acls - - - - POSIX a+ /path-or-glob/to/append/acls - - - - POSIX ACLs A /path-or-glob/to/set/acls/recursively - - - - POSIX ACLs A+ /path-or-glob/to/append/acls/recursively - - - - POSIX ACLs +k /path-or-glob/to/set/caps - - - - file capabilities +k+ /path-or-glob/to/adjust/caps - - - - file capabilities +K /path-or-glob/to/set/caps/recursively - - - - file capabilities +K+ /path-or-glob/to/adjust/caps/recursively - - - - file capabilities @@ -484,6 +488,37 @@ L /tmp/foobar - - - - /dev/null + + + k + k+ + Set file capabilities, see capabilities7. + Lines of this type accept shell-style globs in place of normal path names. Does not follow + symlinks. + + The syntax follows cap_text_formats7. It + also supports rootuid=INT for the user namespace root + user ID. + + If suffixed with +, current capabilities on the file that are not touched by the expression + will be kept. For example, if all cap_setuid capabilities need to be removed but + others should be kept, one can use k+ with cap_setuid= or + cap_setuid-eip. + + + + + + K + K+ + Same as k and + k+, but recursive. Does not follow + symlinks. + + + @@ -565,8 +600,8 @@ w- /proc/sys/vm/swappiness - - - - 10 -, the default is used: 0755 for directories, 0644 for all other file objects. For z, Z lines, if omitted or when set to -, the file access mode will not be modified. This parameter is ignored for x, - r, R, L, t, and - a lines. + r, R, L, t, + a, and k lines. Optionally, if prefixed with ~, the access mode is masked based on the already set access bits for existing file or directories: if the existing file has all executable bits unset, @@ -707,7 +742,8 @@ d /tmp/foo/bar - - - bmA:1h - suffixed by a newline. For C, specifies the source file or directory. For t and T, determines extended attributes to be set. For a and A, determines ACL attributes to be set. For h and H, - determines the file attributes to set. Ignored for all other lines. + determines the file attributes to set. For k and K, determines + file capabilities to be set. Ignored for all other lines. This field can contain specifiers, see below. diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c index 44843f3ca77ec..b6007f6207ed6 100644 --- a/src/tmpfiles/tmpfiles.c +++ b/src/tmpfiles/tmpfiles.c @@ -14,6 +14,7 @@ #include "bitfield.h" #include "btrfs-util.h" #include "build.h" +#include "capability-list.h" #include "capability-util.h" #include "chase.h" #include "chattr-util.h" @@ -104,6 +105,8 @@ typedef enum ItemType { RECURSIVE_SET_XATTR = 'T', SET_ACL = 'a', RECURSIVE_SET_ACL = 'A', + SET_FCAPS = 'k', + RECURSIVE_SET_FCAPS = 'K', SET_ATTRIBUTE = 'h', RECURSIVE_SET_ATTRIBUTE = 'H', IGNORE_PATH = 'x', @@ -126,6 +129,18 @@ typedef enum AgeBy { AGE_BY_DEFAULT_DIR = AGE_BY_ATIME | AGE_BY_BTIME | AGE_BY_MTIME, } AgeBy; +typedef struct FCapsPatch { + uint64_t mask; + uint64_t set; +} FCapsPatch; + +typedef struct FCapsUpdate { + uid_t rootuid; + FCapsPatch inheritable; + FCapsPatch permitted; + FCapsPatch effective; +} FCapsUpdate; + typedef struct Item { ItemType type; @@ -139,6 +154,7 @@ typedef struct Item { acl_t acl_access_exec; acl_t acl_default; #endif + FCapsUpdate fcaps; uid_t uid; gid_t gid; mode_t mode; @@ -171,6 +187,8 @@ typedef struct Item { bool ignore_if_target_missing:1; + bool fcaps_set:1; + OperationMask done; } Item; @@ -408,6 +426,8 @@ static bool needs_glob(ItemType t) { RECURSIVE_SET_XATTR, SET_ACL, RECURSIVE_SET_ACL, + SET_FCAPS, + RECURSIVE_SET_FCAPS, SET_ATTRIBUTE, RECURSIVE_SET_ATTRIBUTE, IGNORE_PATH, @@ -1502,6 +1522,271 @@ static int path_set_acls( return r; } +static int capability_vfs_from_string(const char *s, FCapsUpdate *ret) { + FCapsUpdate set = { + .rootuid = UID_INVALID, + }; + + assert(s); + assert(ret); + + for (const char *p = s;;) { + _cleanup_free_ char *word = NULL, *keys = NULL; + char *value, sep; + int r; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX); + if (r < 0) + return log_debug_errno(r, "Failed to split words from '%s': %m", p); + if (r == 0) + break; + + value = strpbrk(word, "=+-"); + if (!value) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse key/value '%s': %m", word); + keys = strndup(word, value - word); + if (!keys) + return log_oom(); + sep = *value; + value++; + + if (sep == '=' && streq(keys, "rootuid")) { + r = parse_uid(value, &set.rootuid); + if (r < 0) + return log_debug_errno(r, "Failed to parse rootuid value '%s': %m", value); + } else { + uint64_t caps = 0; + + if (!in_charset(value, "eip")) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse value '%s': %m", value); + + if (STR_IN_SET(keys, "all", "")) + caps = all_capabilities(); + else + for (const char *remaining_keys = keys; remaining_keys && *remaining_keys;) { + _cleanup_free_ char *key = NULL; + + r = extract_first_word(&remaining_keys, &key, ",", /* flags= */0); + if (r < 0) + return log_debug_errno(r, "Failed to parse capability list '%s': %m", keys); + if (r == 0) + break; + if (streq(key, "all")) + caps = all_capabilities(); + else { + r = capability_from_name(key); + if (r < 0) + return log_debug_errno(r, "Failed to parse capability '%s': %m", key); + caps |= UINT64_C(1) << r; + } + } + + if (sep == '=') { + set.permitted.mask |= caps; + set.inheritable.mask |= caps; + set.effective.mask |= caps; + } + if (IN_SET(sep, '=', '+')) { + if (strchr(value, 'p')) + set.permitted.set |= caps; + if (strchr(value, 'i')) + set.inheritable.set |= caps; + if (strchr(value, 'e')) + set.effective.set |= caps; + } else { + if (strchr(value, 'p')) { + set.permitted.mask |= caps; + set.permitted.set &= ~caps; + } + if (strchr(value, 'i')) { + set.inheritable.mask |= caps; + set.inheritable.set &= ~caps; + } + if (strchr(value, 'e')) { + set.effective.mask |= caps; + set.effective.set &= ~caps; + } + } + } + } + + *ret = set; + + return 0; +} + +static size_t cap_data_size(uint32_t revision) { + switch (revision) { + case VFS_CAP_REVISION_1: + return XATTR_CAPS_SZ_1; + case VFS_CAP_REVISION_2: + return XATTR_CAPS_SZ_2; + case VFS_CAP_REVISION_3: + return XATTR_CAPS_SZ_3; + default: + return SIZE_MAX; + } +} + +static bool inode_type_can_fcaps(mode_t mode) { + return S_ISREG(mode); +} + +static int apply_fcaps(int fd, const char *path, bool append, const FCapsUpdate *set) { + struct vfs_ns_cap_data val = { + .magic_etc = htole32(VFS_CAP_REVISION), + }; + le32_t effective[VFS_CAP_U32] = {}; + struct stat st; + int r; + + assert(fd >= 0); + assert(path); + assert(set); + + if (fstat(fd, &st) < 0) + return log_error_errno(errno, "Failed to fstat(%s): %m", path); + + if (hardlink_vulnerable(&st)) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Refusing to set file capabilities on hardlinked file %s while the fs.protected_hardlinks sysctl is turned off.", + path); + + if (!inode_type_can_fcaps(st.st_mode)) { + log_debug("Skipping file capabilities for '%s' (inode type does not support file capabilities).", path); + return 0; + } + + if (append) { + _cleanup_free_ char *xattr_data = NULL; + size_t xattr_data_len; + + r = fgetxattr_malloc(fd, "security.capability", &xattr_data, &xattr_data_len); + if (r == -ENODATA) + log_debug("No capabilities found for '%s'", path); + else if (r < 0) + return log_error_errno(r, "Failed to read capabilities of '%s': %m", path); + else { + _cleanup_free_ struct vfs_ns_cap_data *original = NULL; + + if (xattr_data_len < endoffsetof_field(struct vfs_ns_cap_data, magic_etc)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Extended attributes for capabilities are too small"); + + original = realloc0(xattr_data, sizeof(struct vfs_ns_cap_data)); + if (!original) + return log_oom(); + xattr_data = NULL; + + size_t expected_size = cap_data_size(le32toh(original->magic_etc) & VFS_CAP_REVISION_MASK); + if (expected_size == SIZE_MAX) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown type of file capabilities"); + if (xattr_data_len != expected_size) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Size of file capabilities does not match its type"); + + if (FLAGS_SET(le32toh(original->magic_etc), VFS_CAP_FLAGS_EFFECTIVE)) + for (size_t n = 0; n < VFS_CAP_U32; n++) + effective[n] = original->data[n].permitted | original->data[n].inheritable; + + for (size_t n = 0; n < VFS_CAP_U32; n++) { + val.data[n].permitted = original->data[n].permitted; + val.data[n].inheritable = original->data[n].inheritable; + } + + val.rootid = original->rootid; + } + } + + for (size_t n = 0; n < VFS_CAP_U32; n++) { + size_t bit_shift = 32*n; + val.data[n].inheritable &= htole32(~(set->inheritable.mask >> bit_shift) & UINT32_C(0xffffffff)); + val.data[n].inheritable |= htole32((set->inheritable.set >> bit_shift) & UINT32_C(0xffffffff)); + val.data[n].permitted &= htole32(~(set->permitted.mask >> bit_shift) & UINT32_C(0xffffffff)); + val.data[n].permitted |= htole32((set->permitted.set >> bit_shift) & UINT32_C(0xffffffff)); + effective[n] &= htole32(~(set->effective.mask >> bit_shift) & UINT32_C(0xffffffff)); + effective[n] |= htole32((set->effective.set >> bit_shift) & UINT32_C(0xffffffff)); + if (effective[n] != 0) + val.magic_etc |= htole32(VFS_CAP_FLAGS_EFFECTIVE); + } + + if (FLAGS_SET(le32toh(val.magic_etc), VFS_CAP_FLAGS_EFFECTIVE)) + for (size_t n = 0; n < VFS_CAP_U32; n++) + if ((val.data[n].permitted | val.data[n].inheritable) != effective[n]) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Inconsistent effective bits"); + + if (set->rootuid != UID_INVALID) + val.rootid = htole32(set->rootuid); + + log_action("Would try to set", "Trying to set", + "%s capabilities on %s", path); + + if (!arg_dry_run) { + r = xsetxattr_full(fd, /* path= */ NULL, AT_EMPTY_PATH, "security.capability", (void*)&val, sizeof(val), /* xattr_flags= */ 0); + if (r < 0) + return log_error_errno(r, "Failed to setcap '%s': %m", path); + } + + return 0; +} + +static int parse_caps_from_arg(Item *item) { + FCapsUpdate fcaps; + int r; + + assert(item); + + r = capability_vfs_from_string(item->argument, &fcaps); + if (r < 0) { + log_full_errno(arg_graceful ? LOG_DEBUG : LOG_WARNING, + r, "Failed to parse capabilities \"%s\", ignoring: %m", item->argument); + return 0; + } + + item->fcaps_set = true; + item->fcaps = fcaps; + + return 0; +} + +static int fd_set_caps( + Context *c, + Item *item, + int fd, + const char *path, + const struct stat *st, + CreationMode creation) { + assert(c); + assert(item); + assert(fd >= 0); + assert(path); + + if (!item->fcaps_set) + return 0; + return apply_fcaps(fd, path, item->append_or_force, &item->fcaps); +} + +static int path_set_caps( + Context *c, + Item *item, + const char *path, + CreationMode creation) { + _cleanup_close_ int fd = -EBADF; + + assert(c); + assert(item); + assert(path); + + if (!item->fcaps_set) + return 0; + + fd = path_open_safe(path); + if (fd == -ENOENT) + return 0; + if (fd < 0) + return fd; + + return apply_fcaps(fd, path, item->append_or_force, &item->fcaps); +} + static int parse_attribute_from_arg(Item *item) { static const struct { char character; @@ -2955,6 +3240,18 @@ static int create_item(Context *c, Item *i) { return r; break; + case SET_FCAPS: + r = glob_item(c, i, path_set_caps); + if (r < 0) + return r; + break; + + case RECURSIVE_SET_FCAPS: + r = glob_item_recursively(c, i, fd_set_caps); + if (r < 0) + return r; + break; + case SET_ATTRIBUTE: r = glob_item(c, i, path_set_attribute); if (r < 0) @@ -3816,6 +4113,23 @@ static int parse_line( return r; break; + case SET_FCAPS: + case RECURSIVE_SET_FCAPS: + if (unbase64) { + *invalid_config = true; + return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), + "base64 decoding not supported for capabilities."); + } + if (!i.argument) { + *invalid_config = true; + return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), + "Set capabilities requires argument."); + } + r = parse_caps_from_arg(&i); + if (r < 0) + return r; + break; + case SET_ATTRIBUTE: case RECURSIVE_SET_ATTRIBUTE: if (unbase64) { diff --git a/test/units/TEST-22-TMPFILES.22.sh b/test/units/TEST-22-TMPFILES.22.sh new file mode 100755 index 0000000000000..37c9f709b75ce --- /dev/null +++ b/test/units/TEST-22-TMPFILES.22.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +# +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +rm -f /tmp/setcap +touch /tmp/setcap + +systemd-tmpfiles --dry-run --create - < Date: Sat, 9 May 2026 05:59:10 +0000 Subject: [PATCH 202/242] po: Translated using Weblate (Romanian) Currently translated at 68.7% (183 of 266 strings) Co-authored-by: Petru Rebeja Translate-URL: https://translate.fedoraproject.org/projects/systemd/main/ro/ Translation: systemd/main --- po/ro.po | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/po/ro.po b/po/ro.po index 876dc27773b09..97dcf594331a3 100644 --- a/po/ro.po +++ b/po/ro.po @@ -4,21 +4,22 @@ # va511e , 2015. # Daniel Șerbănescu , 2015, 2017. # Vlad , 2020, 2021. +# Petru Rebeja , 2026. msgid "" msgstr "" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2026-03-06 03:46+0900\n" -"PO-Revision-Date: 2021-01-12 17:36+0000\n" -"Last-Translator: Vlad \n" +"PO-Revision-Date: 2026-05-09 05:59+0000\n" +"Last-Translator: Petru Rebeja \n" "Language-Team: Romanian \n" +"systemd/main/ro/>\n" "Language: ro\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=3; plural=n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < " "20)) ? 1 : 2;\n" -"X-Generator: Weblate 4.4\n" +"X-Generator: Weblate 5.17.1\n" #: src/core/org.freedesktop.systemd1.policy.in:22 msgid "Send passphrase back to system" @@ -125,16 +126,12 @@ msgstr "" "utilizator." #: src/home/org.freedesktop.home1.policy:53 -#, fuzzy msgid "Update your home area" -msgstr "Actualizează un spațiu personal" +msgstr "Actualizați-vă spațiu personal" #: src/home/org.freedesktop.home1.policy:54 -#, fuzzy msgid "Authentication is required to update your home area." -msgstr "" -"Autentificarea este necesară pentru a actualiza spațiul personal al unui " -"utilizator." +msgstr "Pentru a-ți actualiza spațiul personal, este necesară autentificarea." #: src/home/org.freedesktop.home1.policy:63 msgid "Resize a home area" @@ -158,16 +155,14 @@ msgstr "" "al unui utilizator." #: src/home/org.freedesktop.home1.policy:83 -#, fuzzy msgid "Activate a home area" -msgstr "Crează un spațiu personal" +msgstr "Activează un spațiu personal" #: src/home/org.freedesktop.home1.policy:84 -#, fuzzy msgid "Authentication is required to activate a user's home area." msgstr "" -"Autentificarea este necesară pentru a crea spațiul personal al unui " -"utilizator." +"Pentru a activa spațiul personal al unui utilizator este necesară " +"autentificarea." #: src/home/org.freedesktop.home1.policy:93 msgid "Manage Home Directory Signing Keys" From b7be9ccc8f4299269f72bde49e426a7a9d484da9 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sat, 9 May 2026 08:53:01 -0300 Subject: [PATCH 203/242] hwdb/keyboard: fix KP_Enter on Clevo PA70ES The ITE keyboard controller firmware (version 0xAB83) is shared between the Clevo PA70ES and the X+ piccolo series. The piccolo's hwdb rule matches by input device ID (evdev:input:b0011v0001p0001eAB83*) and remaps scan code 0x9c (KP_Enter) to Enter, since the piccolo has no numpad and its main Enter key sends the wrong scan code. The Clevo PA70ES has a real numpad. The piccolo rule matches it because both laptops use the same ITE controller firmware, which breaks KP_Enter on the PA70ES. Add a DMI-specific override that restores KEY_KPENTER for 0x9c on the PA70ES. The piccolo rule should ideally be narrowed to use DMI matching instead of input device ID to avoid catching other laptops with the same ITE controller firmware. --- hwdb.d/60-keyboard.hwdb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hwdb.d/60-keyboard.hwdb b/hwdb.d/60-keyboard.hwdb index ebc08560fe9e8..23023740901ac 100644 --- a/hwdb.d/60-keyboard.hwdb +++ b/hwdb.d/60-keyboard.hwdb @@ -344,6 +344,15 @@ evdev:atkbd:dmi:bvn*:bvr*:bd*:svn*BenQ*:pn*Joybook*R22*:* # Clevo ########################################################### +# Clevo PA70ES (Avell C73) +# The ITE keyboard controller firmware (version 0xAB83) is shared with +# the X+ piccolo. The piccolo rule (below) matches by input device ID +# and remaps KP_Enter to Enter since the piccolo has no numpad and its +# main Enter sends the wrong scan code. The PA70ES has a real numpad, +# so the remap breaks KP_Enter. This restores the correct mapping. +evdev:atkbd:dmi:bvn*:bvr*:bd*:svnNotebook:pnPA70ES:* + KEYBOARD_KEY_9c=kpenter + evdev:atkbd:dmi:bvn*:bvr*:bd*:svnNotebook:pnW65_67SZ:* KEYBOARD_KEY_a0=!mute KEYBOARD_KEY_a2=!playpause From cd57308303ecffaef01a5b27d7fa4ef7e7d6a9ce Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Fri, 8 May 2026 15:09:25 +0100 Subject: [PATCH 204/242] test: workaroud flaky TEST-53-TIMER.restart-trigger against journald cgroup attribution race The restart-trigger subtest occasionally fails on CI with: + assert_eq 0 1 FAIL: expected: '1' actual: '0' even though the timer fires correctly and the echo message is in fact written to the journal. The failure happens because the test relies on `journalctl --unit=$UNIT_NAME` to find the message, and that filter is based on the cgroup journald looks up for the writer PID at the time the stdout message is received. For very short-lived processes spawned via systemd-executor (like `echo`), that lookup is racy: the writer's `/proc/$PID/cgroup` can still resolve to `/init.scope` (systemd-executor's own cgroup) rather than the service's cgroup, so the message ends up attributed to `init.scope` and `--unit=` filtering misses it. __CURSOR=s=6f90ff5b6a0e47c3a527a9b4892af965;i=f8ed;b=3dad0cc689a04781879e4dd846d24432;m=17703dc;t=6513be1be2506;x=8d3009a687724b5e __REALTIME_TIMESTAMP=1778167492519174 __MONOTONIC_TIMESTAMP=24576988 __SEQNUM=63725 __SEQNUM_ID=6f90ff5b6a0e47c3a527a9b4892af965 _BOOT_ID=3dad0cc689a04781879e4dd846d24432 _HOSTNAME=H PRIORITY=6 SYSLOG_FACILITY=3 _UID=0 _GID=0 _CAP_EFFECTIVE=1ffffffffff _SYSTEMD_CGROUP=/init.scope _SYSTEMD_UNIT=init.scope _SYSTEMD_SLICE=-.slice _EXE=/usr/lib/systemd/systemd-executor _TRANSPORT=stdout _COMM=18 _MACHINE_ID=89ef83adc0bc4a33a83a227201b57203 _RUNTIME_SCOPE=system _PID=816 _CMDLINE=/usr/lib/systemd/systemd-executor --deserialize 50 --log-level debug,console:info --log-target journal-or-kmsg _STREAM_ID=8e8e4166c99e40afaa58bcd04a50a7f4 SYSLOG_IDENTIFIER=echo MESSAGE=Hello from timer 29581 Note _SYSTEMD_UNIT=init.scope / _SYSTEMD_CGROUP=/init.scope on the echo output: this is what causes `--unit=timer-restart-14362` to return 0 hits. The test failure logs from the same run confirm this: + JOURNAL_TS=1778160292 + journalctl -p info --since=@1778160292 --unit=timer-restart-14362 '--grep=Hello from timer 29581' -- No entries -- + systemctl restart timer-restart-14362.timer ... + date '--set=+2 hours' Thu May 7 15:24:52 UTC 2026 + sleep 1 ... echo[816]: Hello from timer 29581 ... ++ journalctl -q -p info --since=@1778160292 --unit=timer-restart-14362 '--grep=Hello from timer 29581' ++ wc -l + assert_eq 0 1 FAIL: expected: '1' actual: '0' For comparison, in a passing local run the same message is attributed correctly to the service unit (_SYSTEMD_UNIT=timer-restart-24147.service), so `--unit=` matches. Work around the underlying journald race in the test by setting an explicit `SyslogIdentifier=` on the service and matching with `-t` plus the unique grep pattern: `SyslogIdentifier` is carried over the stdout stream protocol and is not affected by the cgroup lookup race. Co-developed-by: Claude Opus 4.7 --- test/units/TEST-53-TIMER.restart-trigger.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/units/TEST-53-TIMER.restart-trigger.sh b/test/units/TEST-53-TIMER.restart-trigger.sh index 71de99cf2762e..6a4add9d638d5 100755 --- a/test/units/TEST-53-TIMER.restart-trigger.sh +++ b/test/units/TEST-53-TIMER.restart-trigger.sh @@ -25,13 +25,15 @@ EOF cat >"/run/systemd/system/$UNIT_NAME.service" < Date: Fri, 8 May 2026 16:16:04 +0100 Subject: [PATCH 205/242] test: fix flaky TEST-07-PID1.socket-defer.sh The socket's SubState transitions from 'running' to 'listening' shortly after the triggered service becomes inactive, so the assert can race and observe the stale 'running' state: [ 1882.425335] systemd[1]: TEST-07-PID1-socket-defer-23279.service: Changed dead -> running [ 1882.495150] TEST-07-PID1.sh[20535]: ++ systemctl show TEST-07-PID1-socket-defer-23279.socket -P SubState [ 1882.514239] TEST-07-PID1.sh[20509]: + assert_eq running listening [ 1882.510529] systemd[1]: TEST-07-PID1-socket-defer-23279.socket: Flushing socket before listening. [ 1882.510559] systemd[1]: TEST-07-PID1-socket-defer-23279.socket: Changed running -> listening Poll for 30s instead of directly asserting to try and make it more robust --- test/units/TEST-07-PID1.socket-defer.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/units/TEST-07-PID1.socket-defer.sh b/test/units/TEST-07-PID1.socket-defer.sh index 304c3bce3d90c..d8a8ff02e0499 100755 --- a/test/units/TEST-07-PID1.socket-defer.sh +++ b/test/units/TEST-07-PID1.socket-defer.sh @@ -60,7 +60,9 @@ wait_for_start() { wait_for_stop() { timeout 30 bash -c "while systemctl -q is-active '$UNIT_NAME.service'; do sleep .5; done" - assert_eq "$(systemctl show "$UNIT_NAME.socket" -P SubState)" "listening" + # The socket's SubState transitions from 'running' to 'listening' shortly after the triggered + # service becomes inactive, so wait for that transition instead of checking once and racing. + timeout 30 bash -c "until [[ \$(systemctl show '$UNIT_NAME.socket' -P SubState) == 'listening' ]]; do sleep .5; done" } # DeferTrigger=no: job mode replace From 0bf094b7636471d4c25df5b60d1e8fe2512601e5 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Fri, 8 May 2026 20:25:56 +0100 Subject: [PATCH 206/242] test: bump TEST-58-REPART timeouts with sanitizers The test is flaky under sanitizers as the timeouts seem to be too short, bump them like we do in other tests to try and make it more robust when running with sanitizers --- test/units/TEST-58-REPART.sh | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/test/units/TEST-58-REPART.sh b/test/units/TEST-58-REPART.sh index 7b536fa09209f..df6f935c98035 100755 --- a/test/units/TEST-58-REPART.sh +++ b/test/units/TEST-58-REPART.sh @@ -20,6 +20,13 @@ export PAGER=cat # Disable use of special glyphs such as → export SYSTEMD_UTF8=0 +# Sanitizer runs are significantly slower, so give udevadm wait 3 times longer timeouts +if [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then + UDEVADM_WAIT_TIMEOUT=180 +else + UDEVADM_WAIT_TIMEOUT=60 +fi + seed=750b6cd5c4ae4012a15e7be3c29e6a47 esp_guid=C12A7328-F81F-11D2-BA4B-00A0C93EC93B @@ -378,7 +385,7 @@ $imgs/zzz7 : start= 6291416, size= 131072, type=3B8F8425-20E0-4F3B-907F fi loop="$(losetup -P --show --find "$imgs/zzz")" - udevadm wait --timeout=60 --settle "${loop:?}p7" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p7" cryptsetup luksDump "${loop}p7" | grep 'Flags:[[:space:]]*allow-discards' >/dev/null @@ -438,7 +445,7 @@ $imgs/zzz7 : start= 6291416, size= 131072, type=3B8F8425-20E0-4F3B-907F $imgs/zzz8 : start= 6422488, size= 131072, type=4D21B016-B534-45C2-A9FB-5C16E091FD2D, uuid=329B9DB2-DFD9-4F39-8EBF-53B582B05FCD, name=\"luks-no-discards\", attrs=\"GUID:59\"" loop="$(losetup -P --show --find "$imgs/zzz")" - udevadm wait --timeout=60 --settle "${loop:?}p8" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p8" cryptsetup luksDump "${loop}p8" | grep 'Flags:[[:space:]]*(no flags)' >/dev/null losetup -d "$loop" @@ -1058,7 +1065,7 @@ EOF # shellcheck disable=SC2064 trap "rm -rf '$defs' '$imgs' ; losetup -d '$loop'" RETURN ERR - udevadm wait --timeout=60 --settle "${loop:?}p1" "${loop:?}p2" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p1" "${loop:?}p2" # Check that the verity block sizes are as expected veritysetup dump "${loop}p2" | grep 'Data block size:' | grep '4096' >/dev/null @@ -1118,7 +1125,7 @@ EOF # shellcheck disable=SC2064 trap "rm -rf '$defs' '$imgs' ; losetup -d '$loop'" RETURN ERR - udevadm wait --timeout=60 --settle "${loop:?}p1" "${loop:?}p2" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p1" "${loop:?}p2" output=$(sfdisk -J "$loop") @@ -1200,7 +1207,7 @@ EOF fi loop=$(losetup -P --show -f "$imgs/zzz") - udevadm wait --timeout=60 --settle "${loop:?}p1" "${loop:?}p2" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p1" "${loop:?}p2" # Test that /usr/def did not end up in the root partition but other files did. mkdir "$imgs/mnt" @@ -1425,7 +1432,7 @@ EOF truncate -s 100m "$imgs/$sector.img" loop=$(losetup -b "$sector" -P --show -f "$imgs/$sector.img" ) - udevadm wait --timeout=60 --settle "${loop:?}" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}" systemd-repart --offline="$OFFLINE" \ --pretty=yes \ @@ -1808,7 +1815,7 @@ EOF # shellcheck disable=SC2064 trap "umount '$imgs/mount' 2>/dev/null || true; losetup -d '$loop' 2>/dev/null || true; rm -rf '$defs' '$imgs'" RETURN echo "Loop device: $loop" - udevadm wait --timeout=60 --settle "${loop:?}p1" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p1" mkdir -p "$imgs/mount" mount -t btrfs "${loop:?}p1" "$imgs/mount" @@ -1936,7 +1943,7 @@ EOF "$imgs/encint.img" loop="$(losetup -P --show --find "$imgs/encint.img")" - udevadm wait --timeout=60 --settle "${loop:?}p1" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p1" volume="test-repart-luksint-$RANDOM" dmstatus="$imgs/dmsetup-$RANDOM" @@ -2041,7 +2048,7 @@ EOF "$imgs/enckeyhash.img" loop="$(losetup -P --show --find "$imgs/enckeyhash.img")" - udevadm wait --timeout=60 --settle "${loop:?}p1" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p1" touch "$imgs/empty-password" @@ -2101,7 +2108,7 @@ EOF "$imgs/fstabcrypttabrepart.img" loop="$(losetup -P --show --find "$imgs/fstabcrypttabrepart.img")" - udevadm wait --timeout=60 --settle "${loop:?}p1" + udevadm wait --timeout="$UDEVADM_WAIT_TIMEOUT" --settle "${loop:?}p1" touch "$imgs/empty-password" From 87d282a73d9db5cfb2885f37d7fb06194a8ca65e Mon Sep 17 00:00:00 2001 From: favilances Date: Sat, 9 May 2026 21:52:04 +0300 Subject: [PATCH 207/242] test-path-util: add coverage for path edge cases Path utility helpers are used throughout systemd for validation, comparison and manipulation of filesystem paths. Add coverage for additional corner cases around absolute path detection, normalization and prefix matching so regressions in these common helpers are easier to catch. Co-developed-by: OpenAI Codex Signed-off-by: favilances --- src/test/test-path-util.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/test/test-path-util.c b/src/test/test-path-util.c index ef4f29172d838..1dc260a46732e 100644 --- a/src/test/test-path-util.c +++ b/src/test/test-path-util.c @@ -24,8 +24,12 @@ TEST(print_paths) { } TEST(path) { + assert_se(!path_is_absolute(NULL)); + assert_se(!path_is_absolute("")); assert_se( path_is_absolute("/")); + assert_se( path_is_absolute("//")); assert_se(!path_is_absolute("./")); + assert_se(!path_is_absolute("foo/bar")); assert_se( PATH_IN_SET("/bin", "/", "/bin", "/foo")); assert_se( PATH_IN_SET("/bin", "/bin")); @@ -42,6 +46,21 @@ TEST(path) { assert_se(!path_equal(NULL, "a")); } +TEST(path_is_normalized) { + assert_se( path_is_normalized("/")); + assert_se( path_is_normalized("/usr/bin")); + assert_se( path_is_normalized("usr/bin")); + + assert_se(!path_is_normalized("")); + assert_se(!path_is_normalized(".")); + assert_se(!path_is_normalized("./usr/bin")); + assert_se(!path_is_normalized("/usr//bin")); + assert_se(!path_is_normalized("/usr/./bin")); + assert_se(!path_is_normalized("/usr/bin/.")); + assert_se(!path_is_normalized("../usr/bin")); + assert_se(!path_is_normalized("/usr/../bin")); +} + TEST(is_path) { assert_se(!is_path("foo")); assert_se(!is_path("dos.ext")); @@ -760,6 +779,9 @@ TEST(path_startswith) { test_path_startswith_one("/foo/bar/barfoo/", "/foo/bar/barfo", NULL, NULL); test_path_startswith_one("/foo/bar/barfoo/", "/foo/bar/bar", NULL, NULL); test_path_startswith_one("/foo/bar/barfoo/", "/fo", NULL, NULL); + test_path_startswith_one("/usr/binary", "/usr/bin", NULL, NULL); + test_path_startswith_one("/foo/barista", "/foo/bar", NULL, NULL); + test_path_startswith_one("foo/barista", "foo/bar", NULL, NULL); } static void test_path_startswith_return_leading_slash_one(const char *path, const char *prefix, const char *expected) { From 20594fdd6e11dcfef1b833f63c9b5a0d44dbc175 Mon Sep 17 00:00:00 2001 From: Ambareesh Balaji Date: Sun, 10 May 2026 23:25:13 +0000 Subject: [PATCH 208/242] elf-util: pass exectable path to dwfl_core_file_report Without it, stack trace symbols fail to resolve with .gnu_debuglink split debug info. --- src/shared/elf-util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/shared/elf-util.c b/src/shared/elf-util.c index 97188ca41483e..7c3d10f9e3374 100644 --- a/src/shared/elf-util.c +++ b/src/shared/elf-util.c @@ -619,6 +619,7 @@ static int module_callback(Dwfl_Module *mod, void **userdata, const char *name, static int parse_core( int fd, + const char *executable, const char *root, char **ret, sd_json_variant **ret_package_metadata, @@ -667,7 +668,7 @@ static int parse_core( log_warning("Compiled without dwfl_set_sysroot() support, ignoring provided root directory."); #endif - if (sym_dwfl_core_file_report(c.dwfl, c.elf, NULL) < 0) + if (sym_dwfl_core_file_report(c.dwfl, c.elf, executable) < 0) return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Could not parse core file, dwfl_core_file_report() failed: %s", sym_dwfl_errmsg(sym_dwfl_errno())); if (sym_dwfl_report_end(c.dwfl, NULL, NULL) != 0) @@ -734,7 +735,7 @@ static int parse_elf( if (elf_header.e_type == ET_CORE) { _cleanup_free_ char *out = NULL; - r = parse_core(fd, root, ret ? &out : NULL, &package_metadata, &dlopen_metadata); + r = parse_core(fd, executable, root, ret ? &out : NULL, &package_metadata, &dlopen_metadata); if (r < 0) return log_warning_errno(r, "Failed to inspect core file: %m"); From 1b8a2b6abe28a105493a456a9e2aa4ae3960e136 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 11 May 2026 11:34:22 +0200 Subject: [PATCH 209/242] fileio: teach read_one_line_file_at() XAT_FDROOT support --- src/basic/fileio.c | 18 ++++++---- src/basic/socket-util.c | 17 +++++++-- src/test/test-fileio.c | 76 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 9 deletions(-) diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 661667a6b2a1e..31e9af2e4e03f 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -420,11 +420,11 @@ int read_one_line_file_at(int dir_fd, const char *filename, char **ret) { _cleanup_fclose_ FILE *f = NULL; int r; - assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); assert(filename); assert(ret); - r = fopen_unlocked_at(dir_fd, filename, "re", 0, &f); + r = fopen_unlocked_at(dir_fd, filename, "re", /* open_flags= */ 0, &f); if (r < 0) return r; @@ -1010,13 +1010,19 @@ static int xfopenat_regular(int dir_fd, const char *path, const char *mode, int /* A combination of fopen() with openat() */ - assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); assert(mode); assert(ret); if (dir_fd == AT_FDCWD && path && open_flags == 0) f = fopen(path, mode); - else { + else if (dir_fd == XAT_FDROOT && path && open_flags == 0) { + _cleanup_free_ char *j = strjoin("/", path); + if (!j) + return -ENOMEM; + + f = fopen(j, mode); + } else { _cleanup_close_ int fd = -EBADF; int mode_flags; @@ -1051,7 +1057,7 @@ static int xfopenat_unix_socket(int dir_fd, const char *path, const char *bind_n FILE *f; int r; - assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); assert(ret); sk = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); @@ -1099,7 +1105,7 @@ int xfopenat_full( FILE *f = NULL; /* avoid false maybe-uninitialized warning */ int r; - assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); assert(mode); assert(ret); diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c index 2e0ee684ff98b..d53208f138990 100644 --- a/src/basic/socket-util.c +++ b/src/basic/socket-util.c @@ -1619,13 +1619,17 @@ int connect_unix_path(int fd, int dir_fd, const char *path) { _cleanup_close_ int inode_fd = -EBADF; assert(fd >= 0); - assert(dir_fd == AT_FDCWD || dir_fd >= 0); + assert(IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT) || dir_fd >= 0); /* Connects to the specified AF_UNIX socket in the file system. Works around the 108 byte size limit * in sockaddr_un, by going via O_PATH if needed. This hence works for any kind of path. */ - if (!path) + if (!path) { + if (dir_fd < 0) + return -EISDIR; + return connect_unix_inode(fd, dir_fd); /* If no path is specified, then dir_fd refers to the socket inode to connect to. */ + } /* Refuse zero length path early, to make sure AF_UNIX stack won't mistake this for an abstract * namespace path, since first char is NUL */ @@ -1640,7 +1644,14 @@ int connect_unix_path(int fd, int dir_fd, const char *path) { * exist. If the path is too long, we also need to take the indirect route, since we can't fit this * into a sockaddr_un directly. */ - inode_fd = openat(dir_fd, path, O_PATH|O_CLOEXEC); + if (dir_fd == XAT_FDROOT) { + _cleanup_free_ char *j = strjoin("/", path); + if (!j) + return -ENOMEM; + + inode_fd = open(j, O_PATH|O_CLOEXEC); + } else + inode_fd = openat(dir_fd, path, O_PATH|O_CLOEXEC); if (inode_fd < 0) return -errno; diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c index a752b1c7cda23..d27bf7ab5d824 100644 --- a/src/test/test-fileio.c +++ b/src/test/test-fileio.c @@ -747,4 +747,80 @@ TEST(write_data_file_atomic_at) { ASSERT_OK_ERRNO(rmdir("/tmp/zzz")); } +TEST(read_one_line_file_at_xat_fdroot) { + _cleanup_(rm_rf_physical_and_freep) char *t = NULL; + _cleanup_free_ char *fn = NULL, *buf = NULL; + + ASSERT_OK(mkdtemp_malloc("/tmp/test-r1lf-xatfd-XXXXXX", &t)); + ASSERT_TRUE(path_is_absolute(t)); + + ASSERT_NOT_NULL(fn = path_join(t, "hello")); + ASSERT_OK(write_string_file(fn, "first line\nsecond line", WRITE_STRING_FILE_CREATE)); + + /* XAT_FDROOT is supposed to root the path at the host's "/"; the implementation prepends a "/" so + * we pass the path without leading slash. */ + ASSERT_OK_EQ(read_one_line_file_at(XAT_FDROOT, fn + 1, &buf), (int) STRLEN("first line\n")); + ASSERT_STREQ(buf, "first line"); + buf = mfree(buf); + + /* Sanity check: AT_FDCWD with the absolute path gives the same result. */ + ASSERT_OK_EQ(read_one_line_file_at(AT_FDCWD, fn, &buf), (int) STRLEN("first line\n")); + ASSERT_STREQ(buf, "first line"); + buf = mfree(buf); + + /* /proc/version should always be readable via XAT_FDROOT (some build envs may restrict it; tolerate + * that). */ + int r = read_one_line_file_at(XAT_FDROOT, "proc/version", &buf); + if (!ERRNO_IS_NEG_PRIVILEGE(r)) { + ASSERT_OK(r); + ASSERT_FALSE(isempty(buf)); + buf = mfree(buf); + } + + /* Non-existent path through XAT_FDROOT should yield -ENOENT. */ + ASSERT_ERROR(read_one_line_file_at(XAT_FDROOT, "tmp/this/path/really/should/not/exist", &buf), ENOENT); + + /* Now create a Unix socket in the same temp dir, and verify that read_one_line_file_at() returns + * -ENXIO when pointed at it via XAT_FDROOT — read_one_line_file_at() does not enable the socket + * fallback. */ + _cleanup_free_ char *sockpath = NULL; + ASSERT_NOT_NULL(sockpath = path_join(t, "socket")); + + _cleanup_close_ int listener = -EBADF; + ASSERT_OK(listener = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, /* protocol= */ 0)); + + union sockaddr_union sa; + ASSERT_OK(sockaddr_un_set_path(&sa.un, sockpath)); + ASSERT_OK_ERRNO(bind(listener, &sa.sa, sockaddr_un_len(&sa.un))); + ASSERT_OK_ERRNO(listen(listener, 1)); + + ASSERT_ERROR(read_one_line_file_at(XAT_FDROOT, sockpath + 1, &buf), ENXIO); + + /* But read_full_file_full() with READ_FULL_FILE_CONNECT_SOCKET *does* enable the socket fallback, + * which routes through xfopenat_unix_socket() and connect_unix_path() — both now teach the + * XAT_FDROOT codepath. Use that to exercise the socket open via XAT_FDROOT. */ + static const char test_sock_str[] = "hello via xat_fdroot socket\n"; + + _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; + int pr = ASSERT_OK(pidref_safe_fork("(server)", FORK_DEATHSIG_SIGTERM|FORK_LOG, &pidref)); + if (pr == 0) { + _cleanup_close_ int rfd = -EBADF; + ASSERT_OK(rfd = accept4(listener, /* addr= */ NULL, /* addrlen= */ NULL, SOCK_CLOEXEC)); + ASSERT_OK_EQ_ERRNO(write(rfd, test_sock_str, sizeof(test_sock_str) - 1), + (ssize_t) sizeof(test_sock_str) - 1); + _exit(EXIT_SUCCESS); + } + + _cleanup_free_ char *data = NULL; + size_t size; + ASSERT_OK(read_full_file_full(XAT_FDROOT, sockpath + 1, + /* offset= */ UINT64_MAX, /* size= */ SIZE_MAX, + READ_FULL_FILE_CONNECT_SOCKET, /* bind_name= */ NULL, + &data, &size)); + ASSERT_EQ(size, sizeof(test_sock_str) - 1); + ASSERT_STREQ(data, test_sock_str); + + ASSERT_OK(pidref_wait_for_terminate_and_check("(server)", &pidref, WAIT_LOG)); +} + DEFINE_TEST_MAIN(LOG_DEBUG); From db8046535b419142e2850499552600e2fb0ac558 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 6 May 2026 15:17:36 +0200 Subject: [PATCH 210/242] fileio: add new read_boolean_file() helper --- src/basic/fileio.c | 14 +++++++++++ src/basic/fileio.h | 4 ++++ src/test/test-fileio.c | 54 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 31e9af2e4e03f..8149bb8b0dd44 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -431,6 +431,20 @@ int read_one_line_file_at(int dir_fd, const char *filename, char **ret) { return read_line(f, LONG_LINE_MAX, ret); } +int read_boolean_file_at(int dir_fd, const char *filename) { + _cleanup_free_ char *s = NULL; + int r; + + assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT)); + assert(filename); + + r = read_one_line_file_at(dir_fd, filename, &s); + if (r < 0) + return r; + + return parse_boolean(s); +} + int verify_file_at(int dir_fd, const char *fn, const char *blob, bool accept_extra_nl) { _cleanup_fclose_ FILE *f = NULL; _cleanup_free_ char *buf = NULL; diff --git a/src/basic/fileio.h b/src/basic/fileio.h index 274fdfbd7c89a..5e6e3de1fef57 100644 --- a/src/basic/fileio.h +++ b/src/basic/fileio.h @@ -63,6 +63,10 @@ int read_one_line_file_at(int dir_fd, const char *filename, char **ret); static inline int read_one_line_file(const char *filename, char **ret) { return read_one_line_file_at(AT_FDCWD, filename, ret); } +int read_boolean_file_at(int dir_fd, const char *filename); +static inline int read_boolean_file(const char *filename) { + return read_boolean_file_at(AT_FDCWD, filename); +} int read_full_file_full(int dir_fd, const char *filename, uint64_t offset, size_t size, ReadFullFileFlags flags, const char *bind_name, char **ret_contents, size_t *ret_size); static inline int read_full_file_at(int dir_fd, const char *filename, char **ret_contents, size_t *ret_size) { return read_full_file_full(dir_fd, filename, UINT64_MAX, SIZE_MAX, 0, NULL, ret_contents, ret_size); diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c index d27bf7ab5d824..d050522028f6d 100644 --- a/src/test/test-fileio.c +++ b/src/test/test-fileio.c @@ -823,4 +823,58 @@ TEST(read_one_line_file_at_xat_fdroot) { ASSERT_OK(pidref_wait_for_terminate_and_check("(server)", &pidref, WAIT_LOG)); } +TEST(read_boolean_file) { + _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-read-boolean-file-XXXXXX"; + _cleanup_close_ int fd = -EBADF, dfd = -EBADF; + const char *rel; + + ASSERT_OK(fd = mkostemp_safe(fn)); + + ASSERT_OK(write_string_file(fn, "yes", WRITE_STRING_FILE_TRUNCATE)); + ASSERT_OK_EQ(read_boolean_file(fn), true); + ASSERT_OK_EQ(read_boolean_file_at(AT_FDCWD, fn), true); + + ASSERT_OK(write_string_file(fn, "0", WRITE_STRING_FILE_TRUNCATE)); + ASSERT_OK_EQ(read_boolean_file(fn), false); + ASSERT_OK_EQ(read_boolean_file_at(AT_FDCWD, fn), false); + + ASSERT_OK(write_string_file(fn, "true\nignored\n", WRITE_STRING_FILE_TRUNCATE)); + ASSERT_OK_EQ(read_boolean_file(fn), true); + + ASSERT_OK(write_string_file(fn, "garbage", WRITE_STRING_FILE_TRUNCATE)); + ASSERT_ERROR(read_boolean_file(fn), EINVAL); + + ASSERT_ERROR(read_boolean_file("/tmp/this-file-better-not-exist-XXX"), ENOENT); + + /* Now test XAT_FDROOT: filename is relative, looked up against "/" */ + ASSERT_TRUE(path_startswith(fn, "/")); + rel = fn + 1; + + ASSERT_OK(write_string_file(fn, "on", WRITE_STRING_FILE_TRUNCATE)); + ASSERT_OK_EQ(read_boolean_file_at(XAT_FDROOT, rel), true); + + ASSERT_OK(write_string_file(fn, "off", WRITE_STRING_FILE_TRUNCATE)); + ASSERT_OK_EQ(read_boolean_file_at(XAT_FDROOT, rel), false); + + ASSERT_ERROR(read_boolean_file_at(XAT_FDROOT, "tmp/this-file-better-not-exist-XXX"), ENOENT); + + /* And confirm XAT_FDROOT ignores the cwd: chdir somewhere unrelated, then look up + * the same relative-to-/ path. */ + _cleanup_free_ char *cwd = NULL; + ASSERT_OK(safe_getcwd(&cwd)); + ASSERT_OK_ERRNO(chdir("/usr")); + + ASSERT_OK(write_string_file(fn, "yes", WRITE_STRING_FILE_TRUNCATE)); + ASSERT_OK_EQ(read_boolean_file_at(XAT_FDROOT, rel), true); + + ASSERT_OK_ERRNO(chdir(cwd)); + + /* Also test the dir_fd >= 0 path using an actual fd for /tmp. */ + ASSERT_OK(dfd = open("/tmp", O_DIRECTORY|O_CLOEXEC)); + ASSERT_OK(write_string_file(fn, "1", WRITE_STRING_FILE_TRUNCATE)); + _cleanup_free_ char *bn = NULL; + ASSERT_OK(path_extract_filename(fn, &bn)); + ASSERT_OK_EQ(read_boolean_file_at(dfd, bn), true); +} + DEFINE_TEST_MAIN(LOG_DEBUG); From 4f63cff1f47f78c063680745e4ebc33c7d82057e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 11 May 2026 11:48:11 +0200 Subject: [PATCH 211/242] tree-wide: port various places to read_boolean_file() --- src/basic/terminal-util.c | 10 +--------- src/journal/journald-console.c | 10 ++-------- src/shared/apparmor-util.c | 13 +++---------- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c index d241f5e7f8998..5b3fc8990d696 100644 --- a/src/basic/terminal-util.c +++ b/src/basic/terminal-util.c @@ -869,16 +869,8 @@ int vt_disallocate(const char *tty_path) { } static int vt_default_utf8(void) { - _cleanup_free_ char *b = NULL; - int r; - /* Read the default VT UTF8 setting from the kernel */ - - r = read_one_line_file("/sys/module/vt/parameters/default_utf8", &b); - if (r < 0) - return r; - - return parse_boolean(b); + return read_boolean_file("/sys/module/vt/parameters/default_utf8"); } static int vt_reset_keyboard(int fd) { diff --git a/src/journal/journald-console.c b/src/journal/journald-console.c index 0f376f9e5a45c..0cd2215bd21ad 100644 --- a/src/journal/journald-console.c +++ b/src/journal/journald-console.c @@ -12,7 +12,6 @@ #include "journald-console.h" #include "journald-manager.h" #include "log.h" -#include "parse-util.h" #include "process-util.h" #include "stdio-util.h" #include "terminal-util.h" @@ -22,13 +21,8 @@ static bool prefix_timestamp(void) { static int cached_printk_time = -1; - if (_unlikely_(cached_printk_time < 0)) { - _cleanup_free_ char *p = NULL; - - cached_printk_time = - read_one_line_file("/sys/module/printk/parameters/time", &p) >= 0 - && parse_boolean(p) > 0; - } + if (_unlikely_(cached_printk_time < 0)) + cached_printk_time = read_boolean_file("/sys/module/printk/parameters/time") > 0; return cached_printk_time; } diff --git a/src/shared/apparmor-util.c b/src/shared/apparmor-util.c index 5f01bfae01651..b784d26c5baf6 100644 --- a/src/shared/apparmor-util.c +++ b/src/shared/apparmor-util.c @@ -9,9 +9,7 @@ #include "sd-dlopen.h" -#include "alloc-util.h" #include "fileio.h" -#include "parse-util.h" static void *libapparmor_dl = NULL; @@ -31,18 +29,13 @@ bool mac_apparmor_use(void) { if (cached_use >= 0) return cached_use; - _cleanup_free_ char *p = NULL; - r = read_one_line_file("/sys/module/apparmor/parameters/enabled", &p); + r = read_boolean_file("/sys/module/apparmor/parameters/enabled"); if (r < 0) { if (r != -ENOENT) - log_debug_errno(r, "Failed to read /sys/module/apparmor/parameters/enabled, assuming AppArmor is not available: %m"); + log_debug_errno(r, "Failed to read and parse /sys/module/apparmor/parameters/enabled, assuming AppArmor is not available: %m"); return (cached_use = false); } - - r = parse_boolean(p); - if (r < 0) - log_debug_errno(r, "Failed to parse /sys/module/apparmor/parameters/enabled, assuming AppArmor is not available: %m"); - if (r <= 0) + if (r == 0) return (cached_use = false); if (dlopen_libapparmor(LOG_DEBUG) < 0) From 263335518b29d90cc3745414c02c3a9de20b2070 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sun, 10 May 2026 21:24:26 +0200 Subject: [PATCH 212/242] clang-tidy: Drop unknown gcc compiler args clang-tidy recently gained support to allow dropping compiler args from the entries parsed from the compilation database. Let's make use of this to drop the two compiler args we use with gcc that clang doesn't support so we can run clang-tidy on meson build trees configured to use gcc without getting tons of false positives. --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index 82681fda39923..8d69a69cbf971 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -70,6 +70,7 @@ CheckOptions: ^getmntent$,mnt_table_next_fs(),libmount parser should be used instead ' misc-header-include-cycle.IgnoredFilesList: 'glib-2.0' +RemovedArgs: ['-fwide-exec-charset=UCS2', '-maccumulate-outgoing-args'] WarningsAsErrors: '*' ExcludeHeaderFilterRegex: 'blkid\.h|gmessages\.h|gstring\.h' HeaderFileExtensions: From f68fa99a0748383eb10339f22ad0be0ed825722a Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 11 May 2026 12:58:13 +0100 Subject: [PATCH 213/242] TEST-67-INTEGRITY: pre-load crypto modules and skip unsupported algorithms The test occasionally fails on GHA CI when formatting with xxhash64 because dm-integrity's crypto_alloc_shash() -> request_module() path flakily fails to load the algorithm: [ 29.172664] TEST-67-INTEGRITY.sh[447]: + for a in crc32c crc32 xxhash64 sha1 sha256 [ 29.172664] TEST-67-INTEGRITY.sh[447]: + [[ xxhash64 == crc32 ]] [ 29.172664] TEST-67-INTEGRITY.sh[447]: + test_one xxhash64 0 [ 29.172664] TEST-67-INTEGRITY.sh[447]: + integritysetup format /dev/loop0 --batch-mode -I xxhash64 '' [ 29.223383] TEST-67-INTEGRITY.sh[1220]: device-mapper: reload ioctl on temporary-cryptsetup-fa8bebe3-1d87-4796-91e8-abc02c487bb5 (254:0) failed: No such file or directory [ 29.226916] kernel: device-mapper: table: 254:0: integrity: Invalid internal hash (-ENOENT) [ 29.227415] kernel: device-mapper: ioctl: error adding target to table [ 29.231586] TEST-67-INTEGRITY.sh[1220]: Cannot format integrity for device /dev/loop0. Preload each algorithm's crypto module before use, and skip algorithms that are not registered in /proc/crypto. Co-developed-by: Claude Opus 4.7 --- test/units/TEST-67-INTEGRITY.sh | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/test/units/TEST-67-INTEGRITY.sh b/test/units/TEST-67-INTEGRITY.sh index 667fa56343401..a57d7d9db279f 100755 --- a/test/units/TEST-67-INTEGRITY.sh +++ b/test/units/TEST-67-INTEGRITY.sh @@ -2,8 +2,6 @@ # SPDX-License-Identifier: LGPL-2.1-or-later set -euxo pipefail -. /etc/os-release - DM_NAME="integrity_test" DM_NODE="/dev/mapper/${DM_NAME}" DM_SERVICE="systemd-integritysetup@${DM_NAME}.service" @@ -119,16 +117,23 @@ EOF } for a in crc32c crc32 xxhash64 sha1 sha256; do - if [[ "$a" == crc32 && "${ID_LIKE:-}" == alpine ]]; then - # crc32 is not supported on alpine/postmarketos ?? - # -------- - # [ 22.419458] TEST-67-INTEGRITY.sh[3085]: + integritysetup format /dev/loop0 --batch-mode -I crc32 '' - # [ 22.433168] kernel: device-mapper: table: 253:0: integrity: Invalid internal hash (-ENOENT) - # [ 22.433220] TEST-67-INTEGRITY.sh[3475]: device-mapper: reload ioctl on temporary-cryptsetup-6b3b80ef-6854-4102-8239-6360f15af0c3 (253:0) failed: No such file or directory - # [ 22.433220] TEST-67-INTEGRITY.sh[3475]: Cannot format integrity for device /dev/loop0. - # [ 22.433835] kernel: device-mapper: ioctl: error adding target to table - # -------- - continue; + # dm-integrity uses crypto_alloc_shash() which triggers request_module() + # for the underlying hash algorithm when needed. That auto-load has been + # observed to fail flakily in some test environments, leading to errors + # like: + # kernel: device-mapper: table: NNN:N: integrity: Invalid internal hash (-ENOENT) + # integritysetup: Cannot format integrity for device /dev/loopN. + # Try to load the kernel module ahead of time to avoid that. Failure is + # acceptable here: the algorithm might be built-in (no module to load) or + # genuinely unsupported, in which case the next check will skip it. + modprobe -q "crypto-$a" || : + + # Some algorithms are not supported on certain platforms (e.g. crc32 is + # missing on Alpine/postmarketOS). Skip them at runtime to avoid spurious + # failures. + if ! grep -q -E "^name\s+: $a\$" /proc/crypto; then + echo "Algorithm '$a' is not supported on this system, skipping." + continue fi test_one "$a" 0 From 6e7f59884ebc8c8e2205fafd4d789fb73b389029 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 11 May 2026 15:54:12 +0200 Subject: [PATCH 214/242] repart: make definitions varlink parameter actually optional The Varlink iterface said the definitions directory was mandatory, and so did the dispatch table. But that's nonsense, the code is completely fine to operate without (same as cmdline repart invocations): it will just use the standard definitions dir. Fix that. --- src/repart/repart.c | 14 +++++++------- src/shared/varlink-io.systemd.Repart.c | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/repart/repart.c b/src/repart/repart.c index 84aaf60b5f790..168092d43a8eb 100644 --- a/src/repart/repart.c +++ b/src/repart/repart.c @@ -10937,13 +10937,13 @@ static int vl_method_run( void *userdata) { static const sd_json_dispatch_field dispatch_table[] = { - { "node", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(RunParameters, node), SD_JSON_NULLABLE }, - { "empty", SD_JSON_VARIANT_STRING, json_dispatch_empty_mode, offsetof(RunParameters, empty), SD_JSON_MANDATORY }, - { "seed", SD_JSON_VARIANT_STRING, sd_json_dispatch_id128, offsetof(RunParameters, seed), SD_JSON_NULLABLE }, - { "dryRun", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(RunParameters, dry_run), SD_JSON_MANDATORY }, - { "definitions", SD_JSON_VARIANT_ARRAY, json_dispatch_strv_path, offsetof(RunParameters, definitions), SD_JSON_MANDATORY|SD_JSON_STRICT }, - { "deferPartitionsEmpty", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(RunParameters, defer_partitions_empty), SD_JSON_NULLABLE }, - { "deferPartitionsFactoryReset", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(RunParameters, defer_partitions_factory_reset), SD_JSON_NULLABLE }, + { "node", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(RunParameters, node), SD_JSON_NULLABLE }, + { "empty", SD_JSON_VARIANT_STRING, json_dispatch_empty_mode, offsetof(RunParameters, empty), SD_JSON_MANDATORY }, + { "seed", SD_JSON_VARIANT_STRING, sd_json_dispatch_id128, offsetof(RunParameters, seed), SD_JSON_NULLABLE }, + { "dryRun", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(RunParameters, dry_run), SD_JSON_MANDATORY }, + { "definitions", SD_JSON_VARIANT_ARRAY, json_dispatch_strv_path, offsetof(RunParameters, definitions), SD_JSON_STRICT }, + { "deferPartitionsEmpty", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(RunParameters, defer_partitions_empty), SD_JSON_NULLABLE }, + { "deferPartitionsFactoryReset", SD_JSON_VARIANT_BOOLEAN, sd_json_dispatch_stdbool, offsetof(RunParameters, defer_partitions_factory_reset), SD_JSON_NULLABLE }, {} }; diff --git a/src/shared/varlink-io.systemd.Repart.c b/src/shared/varlink-io.systemd.Repart.c index dbfb8d0360d2f..287af1f15024d 100644 --- a/src/shared/varlink-io.systemd.Repart.c +++ b/src/shared/varlink-io.systemd.Repart.c @@ -41,7 +41,7 @@ static SD_VARLINK_DEFINE_METHOD_FULL( SD_VARLINK_FIELD_COMMENT("The seed value to derive partition and file system UUIDs from"), SD_VARLINK_DEFINE_INPUT(seed, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("Path to directory containing definition files."), - SD_VARLINK_DEFINE_INPUT(definitions, SD_VARLINK_STRING, SD_VARLINK_ARRAY), + SD_VARLINK_DEFINE_INPUT(definitions, SD_VARLINK_STRING, SD_VARLINK_ARRAY|SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("If true, automatically defer creation of all partitions whose label is \"empty\"."), SD_VARLINK_DEFINE_INPUT(deferPartitionsEmpty, SD_VARLINK_BOOL, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("If true, automatically defer creation of all partitions which are marked for factory reset."), From 4409e52494d803426a365b6636a66fd2dfc70b62 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Fri, 8 May 2026 16:37:52 +0200 Subject: [PATCH 215/242] units: enable systemd-report-basic.socket by default In https://github.com/systemd/systemd/pull/41688 we merged metrics and facts for systemd-report. However while some metric sources are enabled by default (like `io.systemd.{Manager,Network}`) the `io.systemd.Basic` service is not enabled by default. This commit changes this and enables it by default. We could also enable the systemd-report-cgroup.socket but that sends a lot more data not sure that is a good default. --- presets/90-systemd.preset | 1 + test/units/TEST-74-AUX-UTILS.report.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/presets/90-systemd.preset b/presets/90-systemd.preset index cd7afb5df2523..4a668fb8ce93b 100644 --- a/presets/90-systemd.preset +++ b/presets/90-systemd.preset @@ -31,6 +31,7 @@ enable systemd-networkd.service enable systemd-networkd-wait-online.service enable systemd-nsresourced.socket enable systemd-pstore.service +enable systemd-report-basic.socket enable systemd-resolved.service enable systemd-sysext.service enable systemd-timesyncd.service diff --git a/test/units/TEST-74-AUX-UTILS.report.sh b/test/units/TEST-74-AUX-UTILS.report.sh index 61d2b10d0b7c6..7475978336f14 100755 --- a/test/units/TEST-74-AUX-UTILS.report.sh +++ b/test/units/TEST-74-AUX-UTILS.report.sh @@ -51,7 +51,7 @@ varlinkctl --more call /run/systemd/report/io.systemd.Network io.systemd.Metrics varlinkctl --more call /run/systemd/report/io.systemd.Network io.systemd.Metrics.Describe {} # test io.systemd.Basic Metrics -systemctl start systemd-report-basic.socket +[[ "$(systemctl is-enabled systemd-report-basic.socket)" == enabled ]] varlinkctl info /run/systemd/report/io.systemd.Basic varlinkctl list-methods /run/systemd/report/io.systemd.Basic varlinkctl --more call /run/systemd/report/io.systemd.Basic io.systemd.Metrics.List {} From 6f3fa7177598cc586273ff4e3c753a5bf5bb104a Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 15:00:19 +0200 Subject: [PATCH 216/242] vmspawn: Attach a USB keyboard in GUI mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EDK2's UsbKbDxe is the only driver that registers a default HII keyboard layout via the HII database protocol; the PS/2 driver does not. Adding a USB xHCI controller and usb-kbd in CONSOLE_GUI mode gives us a layout to query, which systemd-boot exports through the LoaderKeyboardLayout EFI variable — useful for exercising that codepath end-to-end. --- src/vmspawn/vmspawn.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 57b7697079ee4..f49b36af1e866 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -2978,6 +2978,23 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; + /* Attach a USB xHCI controller and a USB keyboard. We prefer USB over the implicit PS/2 + * keyboard so that EDK2's UsbKbDxe driver runs, which registers the default HII keyboard + * layout package — the PS/2 driver does not. That makes + * EFI_HII_DATABASE_PROTOCOL.GetKeyboardLayout() return a usable layout, which systemd-boot + * then exports via the LoaderKeyboardLayout EFI variable, which is useful for testing that + * codepath actually works. */ + r = qemu_config_section(config_file, "device", "xhci0", + "driver", "qemu-xhci"); + if (r < 0) + return r; + + r = qemu_config_section(config_file, "device", "usb-kbd0", + "driver", "usb-kbd", + "bus", "xhci0.0"); + if (r < 0) + return r; + break; case CONSOLE_HEADLESS: From 033be1a41b5f75a3f2c8f4fe212512062bc4d5f3 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Mon, 11 May 2026 10:12:47 +0200 Subject: [PATCH 217/242] hwdb/keyboard: use vendor/product specific match for X+ Piccolo The controller is used in other devices, and some of these do have a separate keypad with enter key. Fixes: 7ae0a588154ad279deaa98f82c15470684189856 --- hwdb.d/60-keyboard.hwdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hwdb.d/60-keyboard.hwdb b/hwdb.d/60-keyboard.hwdb index 23023740901ac..f965d2c6ec6fb 100644 --- a/hwdb.d/60-keyboard.hwdb +++ b/hwdb.d/60-keyboard.hwdb @@ -2228,7 +2228,7 @@ evdev:atkbd:dmi:bvnTIMI*:bvr*:bd*:svnTIMI*:pnMiNoteBookPro*:* ########################################################### # X+ piccolo series 81X (Intel N305, possibly more) -evdev:input:b0011v0001p0001eAB83* +evdev:atkbd:dmi:bvn*:bvr*:bd*:svnX-Plus.tech:pnX+ Piccolo:* KEYBOARD_KEY_9c=enter # KP_enter in the main area is wrong ########################################################### From 96a61b5483ef5650b41ccbfce9d5b229eece1adf Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Mon, 11 May 2026 10:10:06 +0200 Subject: [PATCH 218/242] Revert "hwdb/keyboard: fix KP_Enter on Clevo PA70ES" Fixed in previous commit. This reverts commit b7be9ccc8f4299269f72bde49e426a7a9d484da9. --- hwdb.d/60-keyboard.hwdb | 9 --------- 1 file changed, 9 deletions(-) diff --git a/hwdb.d/60-keyboard.hwdb b/hwdb.d/60-keyboard.hwdb index f965d2c6ec6fb..c9c0ea1db1afd 100644 --- a/hwdb.d/60-keyboard.hwdb +++ b/hwdb.d/60-keyboard.hwdb @@ -344,15 +344,6 @@ evdev:atkbd:dmi:bvn*:bvr*:bd*:svn*BenQ*:pn*Joybook*R22*:* # Clevo ########################################################### -# Clevo PA70ES (Avell C73) -# The ITE keyboard controller firmware (version 0xAB83) is shared with -# the X+ piccolo. The piccolo rule (below) matches by input device ID -# and remaps KP_Enter to Enter since the piccolo has no numpad and its -# main Enter sends the wrong scan code. The PA70ES has a real numpad, -# so the remap breaks KP_Enter. This restores the correct mapping. -evdev:atkbd:dmi:bvn*:bvr*:bd*:svnNotebook:pnPA70ES:* - KEYBOARD_KEY_9c=kpenter - evdev:atkbd:dmi:bvn*:bvr*:bd*:svnNotebook:pnW65_67SZ:* KEYBOARD_KEY_a0=!mute KEYBOARD_KEY_a2=!playpause From 5173116fdb1198b40b4f0386d20ada5c4729e0a4 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 19:17:53 +0200 Subject: [PATCH 219/242] curl-util: Fix clang-tidy warnings (#42030) Use the ref/unref macros to make sure the parameter names match Follow-up for 87cec65cae656f6ac2e702bd60dad6dd4fdae636 --- src/shared/curl-util.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/shared/curl-util.h b/src/shared/curl-util.h index 3436188952fbc..c35de1cb7b254 100644 --- a/src/shared/curl-util.h +++ b/src/shared/curl-util.h @@ -33,9 +33,7 @@ extern DLSYM_PROTOTYPE(curl_slist_free_all); typedef int (*curl_finished_t)(CurlSlot *slot, CURL *curl, CURLcode code, void *userdata); int curl_glue_new(CurlGlue **glue, sd_event *event); -CurlGlue* curl_glue_ref(CurlGlue *glue); -CurlGlue* curl_glue_unref(CurlGlue *glue); - +DECLARE_TRIVIAL_REF_UNREF_FUNC(CurlGlue, curl_glue); DEFINE_TRIVIAL_CLEANUP_FUNC(CurlGlue*, curl_glue_unref); /* Build a CURL easy handle with sane defaults. The caller configures any @@ -62,9 +60,7 @@ int curl_glue_perform_async( CURL* curl_slot_get_easy(CurlSlot *slot); CurlGlue* curl_slot_get_glue(CurlSlot *slot); -CurlSlot* curl_slot_ref(CurlSlot *slot); -CurlSlot* curl_slot_unref(CurlSlot *slot); - +DECLARE_TRIVIAL_REF_UNREF_FUNC(CurlSlot, curl_slot); DEFINE_TRIVIAL_CLEANUP_FUNC(CurlSlot*, curl_slot_unref); struct curl_slist *curl_slist_new(const char *first, ...) _sentinel_; From 7b9d76cba72680ffad2672b7143d0dc1cea70231 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 13:03:49 +0000 Subject: [PATCH 220/242] boot,vconsole: Propagate UEFI HII keyboard layout to the OS UEFI firmware can report the currently-active keyboard layout via EFI_HII_DATABASE_PROTOCOL.GetKeyboardLayout(). The layout descriptor includes an RFC 4646 / BCP 47 language tag (e.g. "en-US"). Query this from sd-boot/sd-stub and write it to a new LoaderKeyboardLayout EFI variable, advertised through a new EFI_LOADER_FEATURE_KEYBOARD_LAYOUT feature bit. On the OS side, systemd-vconsole-setup reads the variable as a lowest-priority fallback for the console keymap. To map the BCP 47 tag to a vconsole keymap we extend /usr/share/systemd/kbd-model-map with an optional sixth column listing the comma-separated BCP 47 tags each row covers; a new find_vconsole_keymap_for_bcp47() helper walks the file, preferring an exact tag match and otherwise falling back to the row whose tag matches the input's primary subtag. Credentials, /etc/vconsole.conf, and vconsole.keymap= on the kernel command line continue to take precedence. bootctl status surfaces the new variable, printing the language tag or "n/a (not reported by firmware)" when sd-boot advertises the feature but the firmware HII database didn't expose a layout (common on QEMU without a USB keyboard, since EDK2's PS/2 driver does not register an HII keyboard layout). --- docs/BOOT_LOADER_INTERFACE.md | 13 +++ man/systemd-vconsole-setup.service.xml | 14 +++ man/vconsole.conf.xml | 7 ++ src/boot/boot.c | 1 + src/boot/export-vars.c | 10 ++ src/boot/hii.c | 80 ++++++++++++++ src/boot/hii.h | 9 ++ src/boot/meson.build | 1 + src/boot/proto/hii-database.h | 126 +++++++++++++++++++++ src/bootctl/bootctl-status.c | 80 +++++++------- src/fundamental/efivars-fundamental.h | 1 + src/locale/kbd-model-map | 147 +++++++++++++------------ src/shared/vconsole-util.c | 77 +++++++++++++ src/shared/vconsole-util.h | 2 + src/vconsole/vconsole-setup.c | 37 ++++++- 15 files changed, 494 insertions(+), 111 deletions(-) create mode 100644 src/boot/hii.c create mode 100644 src/boot/hii.h create mode 100644 src/boot/proto/hii-database.h diff --git a/docs/BOOT_LOADER_INTERFACE.md b/docs/BOOT_LOADER_INTERFACE.md index 5c2e74f29011d..36380f38f3328 100644 --- a/docs/BOOT_LOADER_INTERFACE.md +++ b/docs/BOOT_LOADER_INTERFACE.md @@ -143,6 +143,8 @@ Variables will be listed below using the Linux efivarfs naming, * `1 << 18` → The boot loader reports active TPM2 PCR banks in the EFI variable `LoaderTpm2ActivePcrBanks-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f`. * `1 << 19` → The boot loader supports the `LoaderEntryPreferred` variable when set. + * `1 << 20` → The boot loader reports the firmware-configured keyboard layout in the + EFI variable `LoaderKeyboardLayout-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f`. * The EFI variable `LoaderSystemToken-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f` contains binary random data, @@ -171,6 +173,17 @@ Variables will be listed below using the Linux efivarfs naming, the TCG EFI ProtocolSpecification for TPM 2.0 as `EFI_TCG2_BOOT_HASH_ALG_*`. If no TPM2 support or no active banks were detected, will be set to `0`. +* The EFI variable `LoaderKeyboardLayout-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f` + contains the RFC 4646 (BCP 47) language tag of the currently-active keyboard + layout as reported by the UEFI HII database (e.g. `en-US`, `de-DE`). + It is formatted as a NUL-terminated UTF-16 string. + The boot loader sets this variable from the layout returned by + `EFI_HII_DATABASE_PROTOCOL.GetKeyboardLayout()`, + if that protocol is implemented by the firmware. + Userspace (notably `systemd-vconsole-setup`) + uses this as a lowest-priority fallback keyboard layout + when no explicit configuration is provided. + If `LoaderTimeInitUSec` and `LoaderTimeExecUSec` are set, `systemd-analyze` will include them in its boot-time analysis. If `LoaderDevicePartUUID` is set, systemd will mount the ESP that was used for the boot to `/boot`, but only if diff --git a/man/systemd-vconsole-setup.service.xml b/man/systemd-vconsole-setup.service.xml index 87cb9e4777bb4..e6656eb578545 100644 --- a/man/systemd-vconsole-setup.service.xml +++ b/man/systemd-vconsole-setup.service.xml @@ -95,6 +95,20 @@ + + Firmware-provided keyboard layout + + If the boot loader reports the firmware-configured keyboard layout via the + LoaderKeyboardLayout EFI variable (see the + Boot Loader Interface), + systemd-vconsole-setup uses it as the lowest-priority fallback for the + keymap. The RFC 4646 / BCP 47 language tag reported by the firmware (e.g. de-DE) is + matched against the optional sixth column of /usr/share/systemd/kbd-model-map, + which lists the language tags each virtual-console keymap covers. Credentials, + /etc/vconsole.conf, and kernel command line options all override this + firmware-provided default. + + See Also diff --git a/man/vconsole.conf.xml b/man/vconsole.conf.xml index e5e160cf3d55d..20b30d39948f4 100644 --- a/man/vconsole.conf.xml +++ b/man/vconsole.conf.xml @@ -56,6 +56,13 @@ might be checked for configuration of the virtual console as well, however only as fallback. + If the boot loader reports the firmware-configured keyboard layout via the + LoaderKeyboardLayout EFI variable (see the + Boot Loader Interface), + it is used as the lowest-priority fallback for KEYMAP=. + Any setting from credentials, /etc/vconsole.conf, or the kernel + command line overrides it. + /etc/vconsole.conf is usually created and updated using systemd-localed.service8. diff --git a/src/boot/boot.c b/src/boot/boot.c index a2a1becc9aaa0..8660814aaebd6 100644 --- a/src/boot/boot.c +++ b/src/boot/boot.c @@ -3266,6 +3266,7 @@ static void export_loader_variables( EFI_LOADER_FEATURE_TYPE1_UKI | EFI_LOADER_FEATURE_TYPE1_UKI_URL | EFI_LOADER_FEATURE_TPM2_ACTIVE_PCR_BANKS | + EFI_LOADER_FEATURE_KEYBOARD_LAYOUT | 0; assert(loaded_image); diff --git a/src/boot/export-vars.c b/src/boot/export-vars.c index 5c037bdd25235..aa49666f57ce4 100644 --- a/src/boot/export-vars.c +++ b/src/boot/export-vars.c @@ -3,6 +3,7 @@ #include "device-path-util.h" #include "efi-efivars.h" #include "export-vars.h" +#include "hii.h" #include "measure.h" #include "part-discovery.h" #include "url-discovery.h" @@ -60,4 +61,13 @@ void export_common_variables(EFI_LOADED_IMAGE_PROTOCOL *loaded_image) { s = xasprintf("0x%08x", active_pcr_banks); efivar_set_str16(MAKE_GUID_PTR(LOADER), u"LoaderTpm2ActivePcrBanks", s, 0); } + + /* Report the firmware's currently-active HII keyboard layout (as an RFC 4646 language tag, e.g. + * "de-DE"), so the OS can pick a matching console keymap. Best-effort: many firmwares do not + * implement the HII database protocol or expose no keyboard layout. */ + if (efivar_get_raw(MAKE_GUID_PTR(LOADER), u"LoaderKeyboardLayout", /* ret_data= */ NULL, /* ret_size= */ NULL) != EFI_SUCCESS) { + _cleanup_free_ char16_t *lang = hii_query_keyboard_layout_language(); + if (lang) + efivar_set_str16(MAKE_GUID_PTR(LOADER), u"LoaderKeyboardLayout", lang, /* flags= */ 0); + } } diff --git a/src/boot/hii.c b/src/boot/hii.c new file mode 100644 index 0000000000000..84bf299c4247b --- /dev/null +++ b/src/boot/hii.c @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "efi-log.h" +#include "hii.h" +#include "proto/hii-database.h" +#include "util.h" + +char16_t *hii_query_keyboard_layout_language(void) { + EFI_HII_DATABASE_PROTOCOL *hii_db = NULL; + EFI_STATUS err; + + err = BS->LocateProtocol(MAKE_GUID_PTR(EFI_HII_DATABASE_PROTOCOL), /* Registration= */ NULL, (void **) &hii_db); + if (err != EFI_SUCCESS) { + log_debug_status(err, "HII database protocol not available, ignoring: %m"); + return NULL; + } + + /* First call sizes the layout. We pass length=0 / buffer=NULL and expect EFI_BUFFER_TOO_SMALL. */ + uint16_t length = 0; + err = hii_db->GetKeyboardLayout(hii_db, /* KeyGuid= */ NULL, &length, /* KeyboardLayout= */ NULL); + if (err != EFI_BUFFER_TOO_SMALL) { + log_debug_status(err, "Initial GetKeyboardLayout did not report required buffer size, ignoring: %m"); + return NULL; + } + if (length < sizeof(EFI_HII_KEYBOARD_LAYOUT)) { + log_debug("Reported keyboard layout size %u is smaller than the header, ignoring.", length); + return NULL; + } + + _cleanup_free_ EFI_HII_KEYBOARD_LAYOUT *layout = xmalloc(length); + err = hii_db->GetKeyboardLayout(hii_db, /* KeyGuid= */ NULL, &length, layout); + if (err != EFI_SUCCESS) { + log_debug_status(err, "Failed to retrieve current keyboard layout, ignoring: %m"); + return NULL; + } + + if (length < sizeof(EFI_HII_KEYBOARD_LAYOUT)) { + log_debug("Reported keyboard layout size %u shrank below the header, ignoring.", length); + return NULL; + } + + if (layout->LayoutLength != length) { + log_debug("Keyboard layout reports inconsistent LayoutLength %u vs %u, ignoring.", + layout->LayoutLength, length); + return NULL; + } + + uint32_t off = layout->LayoutDescriptorStringOffset; + if (off > length || length - off < sizeof(EFI_DESCRIPTION_STRING_BUNDLE)) { + log_debug("Keyboard layout descriptor string offset %u out of bounds (length %u), ignoring.", + off, length); + return NULL; + } + + const EFI_DESCRIPTION_STRING_BUNDLE *bundle = (const EFI_DESCRIPTION_STRING_BUNDLE *) ((const uint8_t *) layout + off); + if (bundle->DescriptionCount == 0) { + log_debug("Keyboard layout has no description strings, ignoring."); + return NULL; + } + + /* Walk Strings[] looking for the U+0020 that terminates the first language tag. */ + size_t max_chars = (length - off - sizeof(EFI_DESCRIPTION_STRING_BUNDLE)) / sizeof(char16_t); + size_t n; + for (n = 0; n < max_chars; n++) + if (bundle->Strings[n] == u' ') + break; + if (n == max_chars) { + log_debug("Keyboard layout language tag is not terminated by a space, ignoring."); + return NULL; + } + if (n == 0) { + log_debug("Keyboard layout language tag is empty, ignoring."); + return NULL; + } + + char16_t *s = xnew(char16_t, n + 1); + memcpy(s, bundle->Strings, n * sizeof(char16_t)); + s[n] = u'\0'; + return s; +} diff --git a/src/boot/hii.h b/src/boot/hii.h new file mode 100644 index 0000000000000..23ebcd303208f --- /dev/null +++ b/src/boot/hii.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "efi.h" + +/* Queries the firmware's HII database for the currently-active keyboard layout and returns the RFC 4646 + * language tag (e.g. u"de-DE") embedded in the layout description bundle. Returns NULL if the protocol + * is not provided, the table is malformed, or no language tag is present. */ +char16_t *hii_query_keyboard_layout_language(void); diff --git a/src/boot/meson.build b/src/boot/meson.build index 29fb64efbee1b..1b8f94e58a247 100644 --- a/src/boot/meson.build +++ b/src/boot/meson.build @@ -320,6 +320,7 @@ libefi_sources = files( 'efi-string.c', 'export-vars.c', 'graphics.c', + 'hii.c', 'initrd.c', 'measure.c', 'part-discovery.c', diff --git a/src/boot/proto/hii-database.h b/src/boot/proto/hii-database.h new file mode 100644 index 0000000000000..7284704f00e23 --- /dev/null +++ b/src/boot/proto/hii-database.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "efi.h" + +#define EFI_HII_DATABASE_PROTOCOL_GUID \ + GUID_DEF(0xef9fc172, 0xa1b2, 0x4693, 0xb3, 0x27, 0x6d, 0x32, 0xfc, 0x41, 0x60, 0x42) + +typedef void *EFI_HII_HANDLE; + +typedef struct { + EFI_GUID PackageListGuid; + uint32_t PackageLength; +} EFI_HII_PACKAGE_LIST_HEADER; + +typedef struct _packed_ { + uint32_t LengthAndType; /* Length:24 | Type:8 (little-endian) */ +} EFI_HII_PACKAGE_HEADER; + +typedef size_t EFI_HII_DATABASE_NOTIFY_TYPE; + +typedef EFI_STATUS (EFIAPI *EFI_HII_DATABASE_NOTIFY)( + uint8_t PackageType, + EFI_GUID *PackageGuid, + EFI_HII_PACKAGE_HEADER *Package, + EFI_HII_HANDLE Handle, + EFI_HII_DATABASE_NOTIFY_TYPE NotifyType); + +typedef struct EFI_HII_DATABASE_PROTOCOL EFI_HII_DATABASE_PROTOCOL; + +struct EFI_HII_DATABASE_PROTOCOL { + EFI_STATUS (EFIAPI *NewPackageList)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_HII_PACKAGE_LIST_HEADER *PackageList, + EFI_HANDLE DriverHandle, + EFI_HII_HANDLE *Handle); + + EFI_STATUS (EFIAPI *RemovePackageList)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_HII_HANDLE Handle); + + EFI_STATUS (EFIAPI *UpdatePackageList)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_HII_HANDLE Handle, + EFI_HII_PACKAGE_LIST_HEADER *PackageList); + + EFI_STATUS (EFIAPI *ListPackageLists)( + EFI_HII_DATABASE_PROTOCOL *This, + uint8_t PackageType, + EFI_GUID *PackageGuid, + size_t *HandleBufferLength, + EFI_HII_HANDLE *Handle); + + EFI_STATUS (EFIAPI *ExportPackageLists)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_HII_HANDLE Handle, + size_t *BufferSize, + EFI_HII_PACKAGE_LIST_HEADER *Buffer); + + EFI_STATUS (EFIAPI *RegisterPackageNotify)( + EFI_HII_DATABASE_PROTOCOL *This, + uint8_t PackageType, + EFI_GUID *PackageGuid, + EFI_HII_DATABASE_NOTIFY PackageNotifyFn, + EFI_HII_DATABASE_NOTIFY_TYPE NotifyType, + EFI_HANDLE *NotifyHandle); + + EFI_STATUS (EFIAPI *UnregisterPackageNotify)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_HANDLE NotificationHandle); + + EFI_STATUS (EFIAPI *FindKeyboardLayouts)( + EFI_HII_DATABASE_PROTOCOL *This, + uint16_t *KeyGuidBufferLength, + EFI_GUID *KeyGuidBuffer); + + EFI_STATUS (EFIAPI *GetKeyboardLayout)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_GUID *KeyGuid, + uint16_t *KeyboardLayoutLength, + void *KeyboardLayout); + + EFI_STATUS (EFIAPI *SetKeyboardLayout)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_GUID *KeyGuid); + + EFI_STATUS (EFIAPI *GetPackageListHandle)( + EFI_HII_DATABASE_PROTOCOL *This, + EFI_HII_HANDLE PackageListHandle, + EFI_HANDLE *DriverHandle); +}; + +/* EFI_HII_KEYBOARD_LAYOUT and EFI_KEY_DESCRIPTOR are packed: LayoutDescriptorStringOffset follows + * a 16-byte EFI_GUID at offset 2, so it is at offset 18 — *not* a natural 4-byte alignment. */ +typedef struct _packed_ { + uint16_t LayoutLength; + EFI_GUID Guid; + uint32_t LayoutDescriptorStringOffset; + uint8_t DescriptorCount; + /* EFI_KEY_DESCRIPTOR Descriptors[DescriptorCount] follows here, then at + * LayoutDescriptorStringOffset (from the start of this struct) the description-string bundle. */ +} EFI_HII_KEYBOARD_LAYOUT; + +typedef struct _packed_ { + uint32_t Key; + char16_t Unicode; + char16_t ShiftedUnicode; + char16_t AltGrUnicode; + char16_t ShiftedAltGrUnicode; + uint16_t Modifier; + uint16_t AffectedAttribute; +} EFI_KEY_DESCRIPTOR; + +/* The description-string bundle that LayoutDescriptorStringOffset points to. After DescriptionCount, + * each of the DescriptionCount entries is laid out as: + * + * CHAR16 Language[]; // RFC 4646 tag, terminated by the Space below (no NUL) + * CHAR16 Space; // U+0020 + * CHAR16 DescriptionString[]; // NUL-terminated UTF-16 description + * + * Despite what the UEFI spec text says, Language is encoded as UTF-16 (CHAR16) in practice — see EDK2 + * MdeModulePkg/Bus/Usb/UsbKbDxe/KeyBoard.h USB_KEYBOARD_LAYOUT_PACK_BIN. */ +typedef struct _packed_ { + uint16_t DescriptionCount; + char16_t Strings[]; +} EFI_DESCRIPTION_STRING_BUNDLE; diff --git a/src/bootctl/bootctl-status.c b/src/bootctl/bootctl-status.c index 2c0eb4d1d00d4..4cd0e3aca0e75 100644 --- a/src/bootctl/bootctl-status.c +++ b/src/bootctl/bootctl-status.c @@ -320,7 +320,7 @@ static int efi_get_variable_path_and_warn(const char *variable, char **ret) { static void print_yes_no_line(bool first, bool good, const char *name) { printf("%s%s %s\n", - first ? " Features: " : " ", + first ? " Features: " : " ", COLOR_MARK_BOOL(good), name); } @@ -387,25 +387,26 @@ int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { uint64_t flag; const char *name; } loader_flags[] = { - { EFI_LOADER_FEATURE_BOOT_COUNTING, "Boot counting" }, - { EFI_LOADER_FEATURE_CONFIG_TIMEOUT, "Menu timeout control" }, - { EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT, "One-shot menu timeout control" }, - { EFI_LOADER_FEATURE_ENTRY_DEFAULT, "Default entry control" }, - { EFI_LOADER_FEATURE_ENTRY_ONESHOT, "One-shot entry control" }, - { EFI_LOADER_FEATURE_XBOOTLDR, "Support for XBOOTLDR partition" }, - { EFI_LOADER_FEATURE_RANDOM_SEED, "Support for passing random seed to OS" }, - { EFI_LOADER_FEATURE_LOAD_DRIVER, "Load drop-in drivers" }, - { EFI_LOADER_FEATURE_SORT_KEY, "Support Type #1 sort-key field" }, - { EFI_LOADER_FEATURE_SAVED_ENTRY, "Support @saved pseudo-entry" }, - { EFI_LOADER_FEATURE_DEVICETREE, "Support Type #1 devicetree field" }, - { EFI_LOADER_FEATURE_SECUREBOOT_ENROLL, "Enroll SecureBoot keys" }, - { EFI_LOADER_FEATURE_RETAIN_SHIM, "Retain SHIM protocols" }, - { EFI_LOADER_FEATURE_MENU_DISABLE, "Menu can be disabled" }, - { EFI_LOADER_FEATURE_MULTI_PROFILE_UKI, "Multi-Profile UKIs are supported" }, - { EFI_LOADER_FEATURE_REPORT_URL, "Loader reports network boot URL" }, - { EFI_LOADER_FEATURE_TYPE1_UKI, "Support Type #1 uki field" }, - { EFI_LOADER_FEATURE_TYPE1_UKI_URL, "Support Type #1 uki-url field" }, - { EFI_LOADER_FEATURE_TPM2_ACTIVE_PCR_BANKS, "Loader reports active TPM2 PCR banks" }, + { EFI_LOADER_FEATURE_BOOT_COUNTING, "Boot counting" }, + { EFI_LOADER_FEATURE_CONFIG_TIMEOUT, "Menu timeout control" }, + { EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT, "One-shot menu timeout control" }, + { EFI_LOADER_FEATURE_ENTRY_DEFAULT, "Default entry control" }, + { EFI_LOADER_FEATURE_ENTRY_ONESHOT, "One-shot entry control" }, + { EFI_LOADER_FEATURE_XBOOTLDR, "Support for XBOOTLDR partition" }, + { EFI_LOADER_FEATURE_RANDOM_SEED, "Support for passing random seed to OS" }, + { EFI_LOADER_FEATURE_LOAD_DRIVER, "Load drop-in drivers" }, + { EFI_LOADER_FEATURE_SORT_KEY, "Support Type #1 sort-key field" }, + { EFI_LOADER_FEATURE_SAVED_ENTRY, "Support @saved pseudo-entry" }, + { EFI_LOADER_FEATURE_DEVICETREE, "Support Type #1 devicetree field" }, + { EFI_LOADER_FEATURE_SECUREBOOT_ENROLL, "Enroll SecureBoot keys" }, + { EFI_LOADER_FEATURE_RETAIN_SHIM, "Retain SHIM protocols" }, + { EFI_LOADER_FEATURE_MENU_DISABLE, "Menu can be disabled" }, + { EFI_LOADER_FEATURE_MULTI_PROFILE_UKI, "Multi-Profile UKIs are supported" }, + { EFI_LOADER_FEATURE_REPORT_URL, "Loader reports network boot URL" }, + { EFI_LOADER_FEATURE_TYPE1_UKI, "Support Type #1 uki field" }, + { EFI_LOADER_FEATURE_TYPE1_UKI_URL, "Support Type #1 uki-url field" }, + { EFI_LOADER_FEATURE_TPM2_ACTIVE_PCR_BANKS, "Loader reports active TPM2 PCR banks" }, + { EFI_LOADER_FEATURE_KEYBOARD_LAYOUT, "Loader reports firmware keyboard layout" }, }; static const struct { uint64_t flag; @@ -426,7 +427,7 @@ int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { }; _cleanup_free_ char *fw_type = NULL, *fw_info = NULL, *loader = NULL, *loader_path = NULL, *stub = NULL, *stub_path = NULL, *current_entry = NULL, *oneshot_entry = NULL, *preferred_entry = NULL, *default_entry = NULL, *sysfail_entry = NULL, - *sysfail_reason = NULL; + *sysfail_reason = NULL, *keyboard_layout = NULL; uint64_t loader_features = 0, stub_features = 0; int have; @@ -444,6 +445,7 @@ int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { (void) efi_get_variable_string_and_warn(EFI_LOADER_VARIABLE_STR("LoaderEntryDefault"), &default_entry); (void) efi_get_variable_string_and_warn(EFI_LOADER_VARIABLE_STR("LoaderEntrySysFail"), &sysfail_entry); (void) efi_get_variable_string_and_warn(EFI_LOADER_VARIABLE_STR("LoaderSysFailReason"), &sysfail_reason); + (void) efi_get_variable_string_and_warn(EFI_LOADER_VARIABLE_STR("LoaderKeyboardLayout"), &keyboard_layout); SecureBootMode secure = efi_get_secure_boot_mode(); printf("%sSystem:%s\n", ansi_underline(), ansi_normal()); @@ -503,7 +505,7 @@ int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { if (loader) { printf("%sCurrent Boot Loader:%s\n", ansi_underline(), ansi_normal()); - printf(" Product: %s%s%s\n", ansi_highlight(), loader, ansi_normal()); + printf(" Product: %s%s%s\n", ansi_highlight(), loader, ansi_normal()); for (size_t i = 0; i < ELEMENTSOF(loader_flags); i++) print_yes_no_line(i == 0, FLAGS_SET(loader_features, loader_flags[i].flag), loader_flags[i].name); @@ -521,38 +523,42 @@ int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { SD_ID128_FORMAT_VAL(loader_partition_uuid), SD_ID128_FORMAT_VAL(esp_uuid)); - printf(" Partition: /dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR "\n", + printf(" Partition: /dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(loader_partition_uuid)); } else if (loader_path) - printf(" Partition: n/a\n"); + printf(" Partition: n/a\n"); if (loader_path) - printf(" Loader: %s%s%s/%s%s\n", + printf(" Loader: %s%s%s/%s%s\n", glyph(GLYPH_TREE_RIGHT), ansi_grey(), arg_esp_path, ansi_normal(), loader_path); if (loader_url) - printf(" Net Boot URL: %s\n", loader_url); + printf(" Net Boot URL: %s\n", loader_url); + + if (FLAGS_SET(loader_features, EFI_LOADER_FEATURE_KEYBOARD_LAYOUT)) + printf("Keyboard Layout: %s\n", + keyboard_layout ?: "n/a (not reported by firmware)"); if (sysfail_entry) - printf("SysFail Reason: %s\n", sysfail_reason); + printf(" SysFail Reason: %s\n", sysfail_reason); if (current_entry) - printf(" Current Entry: %s\n", current_entry); + printf(" Current Entry: %s\n", current_entry); if (preferred_entry) - printf(" Preferred Entry: %s\n", preferred_entry); + printf("Preferred Entry: %s\n", preferred_entry); if (default_entry) - printf(" Default Entry: %s\n", default_entry); + printf(" Default Entry: %s\n", default_entry); if (oneshot_entry && !streq_ptr(oneshot_entry, default_entry)) - printf(" OneShot Entry: %s\n", oneshot_entry); + printf(" OneShot Entry: %s\n", oneshot_entry); if (sysfail_entry) - printf(" SysFail Entry: %s\n", sysfail_entry); + printf(" SysFail Entry: %s\n", sysfail_entry); printf("\n"); } if (stub) { printf("%sCurrent Stub:%s\n", ansi_underline(), ansi_normal()); - printf(" Product: %s%s%s\n", ansi_highlight(), stub, ansi_normal()); + printf(" Product: %s%s%s\n", ansi_highlight(), stub, ansi_normal()); for (size_t i = 0; i < ELEMENTSOF(stub_flags); i++) print_yes_no_line(i == 0, FLAGS_SET(stub_features, stub_flags[i].flag), stub_flags[i].name); @@ -573,16 +579,16 @@ int verb_status(int argc, char *argv[], uintptr_t _data, void *userdata) { SD_ID128_FORMAT_VAL(esp_uuid), SD_ID128_FORMAT_VAL(xbootldr_uuid)); - printf(" Partition: /dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR "\n", + printf(" Partition: /dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(stub_partition_uuid)); } else if (stub_path) - printf(" Partition: n/a\n"); + printf(" Partition: n/a\n"); if (stub_path) - printf(" Stub: %s%s\n", glyph(GLYPH_TREE_RIGHT), strna(stub_path)); + printf(" Stub: %s%s\n", glyph(GLYPH_TREE_RIGHT), strna(stub_path)); if (stub_url) - printf(" Net Boot URL: %s\n", stub_url); + printf(" Net Boot URL: %s\n", stub_url); printf("\n"); } diff --git a/src/fundamental/efivars-fundamental.h b/src/fundamental/efivars-fundamental.h index 15be52119a0a2..fea23fa29c182 100644 --- a/src/fundamental/efivars-fundamental.h +++ b/src/fundamental/efivars-fundamental.h @@ -29,6 +29,7 @@ #define EFI_LOADER_FEATURE_TYPE1_UKI_URL (UINT64_C(1) << 17) #define EFI_LOADER_FEATURE_TPM2_ACTIVE_PCR_BANKS (UINT64_C(1) << 18) #define EFI_LOADER_FEATURE_ENTRY_PREFERRED (UINT64_C(1) << 19) +#define EFI_LOADER_FEATURE_KEYBOARD_LAYOUT (UINT64_C(1) << 20) /* Features of the stub, i.e. systemd-stub */ #define EFI_STUB_FEATURE_REPORT_BOOT_PARTITION (UINT64_C(1) << 0) diff --git a/src/locale/kbd-model-map b/src/locale/kbd-model-map index 612f6d749a76f..c0ef480530ac5 100644 --- a/src/locale/kbd-model-map +++ b/src/locale/kbd-model-map @@ -1,73 +1,76 @@ # Originally generated from system-config-keyboard's model list. -# consolelayout xlayout xmodel xvariant xoptions -sg ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp -nl nl pc105 - terminate:ctrl_alt_bksp -mk-utf mk,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -trq tr pc105 - terminate:ctrl_alt_bksp -uk gb pc105 - terminate:ctrl_alt_bksp -is-latin1 is pc105 - terminate:ctrl_alt_bksp -de de pc105 - terminate:ctrl_alt_bksp -la-latin1 latam pc105 - terminate:ctrl_alt_bksp -us us pc105+inet - terminate:ctrl_alt_bksp -ko kr pc105 - terminate:ctrl_alt_bksp -ro-std ro pc105 std terminate:ctrl_alt_bksp -de-latin1 de pc105 - terminate:ctrl_alt_bksp -slovene si pc105 - terminate:ctrl_alt_bksp -hu hu pc105 - terminate:ctrl_alt_bksp -jp106 jp jp106 - terminate:ctrl_alt_bksp -croat hr pc105 - terminate:ctrl_alt_bksp -it2 it pc105 - terminate:ctrl_alt_bksp -hu101 hu pc105 qwerty terminate:ctrl_alt_bksp -sr-latin rs pc105 latin terminate:ctrl_alt_bksp -fi fi pc105 - terminate:ctrl_alt_bksp -fr_CH ch pc105 fr terminate:ctrl_alt_bksp -dk-latin1 dk pc105 - terminate:ctrl_alt_bksp -fr fr pc105 - terminate:ctrl_alt_bksp -it it pc105 - terminate:ctrl_alt_bksp -ua-utf ua,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -fr-latin1 fr pc105 - terminate:ctrl_alt_bksp -sg-latin1 ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp -be-latin1 be pc105 - terminate:ctrl_alt_bksp -dk dk pc105 - terminate:ctrl_alt_bksp -fr-pc fr pc105 - terminate:ctrl_alt_bksp -bg_pho-utf8 bg,us pc105 ,phonetic terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -it-ibm it pc105 - terminate:ctrl_alt_bksp -cz-us-qwertz cz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -cz-qwerty cz,us pc105 qwerty, terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -br-abnt2 br abnt2 - terminate:ctrl_alt_bksp -ro ro pc105 - terminate:ctrl_alt_bksp -us-acentos us pc105 intl terminate:ctrl_alt_bksp -pt-latin1 pt pc105 - terminate:ctrl_alt_bksp -ro-std-cedilla ro pc105 std_cedilla terminate:ctrl_alt_bksp -tj_alt-UTF8 tj pc105 - terminate:ctrl_alt_bksp -de-latin1-nodeadkeys de pc105 nodeadkeys terminate:ctrl_alt_bksp -no no pc105 - terminate:ctrl_alt_bksp -bg_bds-utf8 bg,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -dvorak us pc105 dvorak terminate:ctrl_alt_bksp -dvorak us pc105 dvorak-alt-intl terminate:ctrl_alt_bksp -ru ru,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -cz-lat2 cz pc105 qwerty terminate:ctrl_alt_bksp -pl2 pl pc105 - terminate:ctrl_alt_bksp -es es pc105 - terminate:ctrl_alt_bksp -ro-cedilla ro pc105 cedilla terminate:ctrl_alt_bksp -ie ie pc105 - terminate:ctrl_alt_bksp -et ee pc105 - terminate:ctrl_alt_bksp -sk-qwerty sk pc105 qwerty terminate:ctrl_alt_bksp -sk-qwertz sk pc105 - terminate:ctrl_alt_bksp -fr-latin9 fr pc105 latin9 terminate:ctrl_alt_bksp -fr_CH-latin1 ch pc105 fr terminate:ctrl_alt_bksp -cf ca pc105 - terminate:ctrl_alt_bksp -sv-latin1 se pc105 - terminate:ctrl_alt_bksp -sr-cy rs pc105 - terminate:ctrl_alt_bksp -gr gr,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -by by,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -il il pc105 - terminate:ctrl_alt_bksp -kazakh kz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll -lt.baltic lt pc105 - terminate:ctrl_alt_bksp -lt.l4 lt pc105 - terminate:ctrl_alt_bksp -lt lt pc105 - terminate:ctrl_alt_bksp -khmer kh,us pc105 - terminate:ctrl_alt_bksp -es-dvorak es microsoftpro dvorak terminate:ctrl_alt_bksp -lv lv pc105 apostrophe terminate:ctrl_alt_bksp -lv-tilde lv pc105 tilde terminate:ctrl_alt_bksp -ge ge,us pc105 - terminate:ctrl_alt_bksp +# The sixth column is an optional comma-separated list of RFC 4646 / BCP 47 +# language tags the row matches; used to map a firmware-reported keyboard +# layout to a vconsole keymap. Use "-" or omit when no tags apply. +# consolelayout xlayout xmodel xvariant xoptions bcp47 +sg ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp de-CH +nl nl pc105 - terminate:ctrl_alt_bksp nl-NL,nl +mk-utf mk,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll mk-MK,mk +trq tr pc105 - terminate:ctrl_alt_bksp tr-TR,tr +uk gb pc105 - terminate:ctrl_alt_bksp en-GB +is-latin1 is pc105 - terminate:ctrl_alt_bksp is-IS,is +de de pc105 - terminate:ctrl_alt_bksp de-DE,de-AT,de +la-latin1 latam pc105 - terminate:ctrl_alt_bksp es-419,es-MX,es-AR,es-CO,es-CL,es-PE,es-VE +us us pc105+inet - terminate:ctrl_alt_bksp en-US,en +ko kr pc105 - terminate:ctrl_alt_bksp ko-KR,ko +ro-std ro pc105 std terminate:ctrl_alt_bksp - +de-latin1 de pc105 - terminate:ctrl_alt_bksp - +slovene si pc105 - terminate:ctrl_alt_bksp sl-SI,sl +hu hu pc105 - terminate:ctrl_alt_bksp hu-HU,hu +jp106 jp jp106 - terminate:ctrl_alt_bksp ja-JP,ja +croat hr pc105 - terminate:ctrl_alt_bksp hr-HR,hr +it2 it pc105 - terminate:ctrl_alt_bksp - +hu101 hu pc105 qwerty terminate:ctrl_alt_bksp - +sr-latin rs pc105 latin terminate:ctrl_alt_bksp sr-Latn-RS,sr-Latn +fi fi pc105 - terminate:ctrl_alt_bksp fi-FI,fi +fr_CH ch pc105 fr terminate:ctrl_alt_bksp fr-CH +dk-latin1 dk pc105 - terminate:ctrl_alt_bksp da-DK,da +fr fr pc105 - terminate:ctrl_alt_bksp fr-FR,fr +it it pc105 - terminate:ctrl_alt_bksp it-IT,it-CH,it +ua-utf ua,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll uk-UA,uk +fr-latin1 fr pc105 - terminate:ctrl_alt_bksp - +sg-latin1 ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp - +be-latin1 be pc105 - terminate:ctrl_alt_bksp fr-BE,nl-BE +dk dk pc105 - terminate:ctrl_alt_bksp - +fr-pc fr pc105 - terminate:ctrl_alt_bksp - +bg_pho-utf8 bg,us pc105 ,phonetic terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll bg-BG,bg +it-ibm it pc105 - terminate:ctrl_alt_bksp - +cz-us-qwertz cz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll cs-CZ,cs +cz-qwerty cz,us pc105 qwerty, terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll - +br-abnt2 br abnt2 - terminate:ctrl_alt_bksp pt-BR +ro ro pc105 - terminate:ctrl_alt_bksp ro-RO,ro +us-acentos us pc105 intl terminate:ctrl_alt_bksp - +pt-latin1 pt pc105 - terminate:ctrl_alt_bksp pt-PT,pt +ro-std-cedilla ro pc105 std_cedilla terminate:ctrl_alt_bksp - +tj_alt-UTF8 tj pc105 - terminate:ctrl_alt_bksp tg-TJ,tg +de-latin1-nodeadkeys de pc105 nodeadkeys terminate:ctrl_alt_bksp - +no no pc105 - terminate:ctrl_alt_bksp nb-NO,nn-NO,no +bg_bds-utf8 bg,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll - +dvorak us pc105 dvorak terminate:ctrl_alt_bksp - +dvorak us pc105 dvorak-alt-intl terminate:ctrl_alt_bksp - +ru ru,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll ru-RU,ru +cz-lat2 cz pc105 qwerty terminate:ctrl_alt_bksp - +pl2 pl pc105 - terminate:ctrl_alt_bksp pl-PL,pl +es es pc105 - terminate:ctrl_alt_bksp es-ES,es +ro-cedilla ro pc105 cedilla terminate:ctrl_alt_bksp - +ie ie pc105 - terminate:ctrl_alt_bksp en-IE,ga-IE,ga +et ee pc105 - terminate:ctrl_alt_bksp et-EE,et +sk-qwerty sk pc105 qwerty terminate:ctrl_alt_bksp - +sk-qwertz sk pc105 - terminate:ctrl_alt_bksp sk-SK,sk +fr-latin9 fr pc105 latin9 terminate:ctrl_alt_bksp - +fr_CH-latin1 ch pc105 fr terminate:ctrl_alt_bksp - +cf ca pc105 - terminate:ctrl_alt_bksp fr-CA +sv-latin1 se pc105 - terminate:ctrl_alt_bksp sv-SE,sv +sr-cy rs pc105 - terminate:ctrl_alt_bksp sr-Cyrl-RS,sr-Cyrl,sr-RS,sr +gr gr,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll el-GR,el +by by,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll be-BY,be +il il pc105 - terminate:ctrl_alt_bksp he-IL,he +kazakh kz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll kk-KZ,kk +lt.baltic lt pc105 - terminate:ctrl_alt_bksp - +lt.l4 lt pc105 - terminate:ctrl_alt_bksp - +lt lt pc105 - terminate:ctrl_alt_bksp lt-LT,lt +khmer kh,us pc105 - terminate:ctrl_alt_bksp km-KH,km +es-dvorak es microsoftpro dvorak terminate:ctrl_alt_bksp - +lv lv pc105 apostrophe terminate:ctrl_alt_bksp lv-LV,lv +lv-tilde lv pc105 tilde terminate:ctrl_alt_bksp - +ge ge,us pc105 - terminate:ctrl_alt_bksp ka-GE,ka diff --git a/src/shared/vconsole-util.c b/src/shared/vconsole-util.c index 6e8c17561e8a7..aa156f4736fc6 100644 --- a/src/shared/vconsole-util.c +++ b/src/shared/vconsole-util.c @@ -578,6 +578,83 @@ int find_language_fallback(const char *lang, char **ret) { } } +int find_vconsole_keymap_for_bcp47(const char *tag, char **ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *fallback = NULL; + const char *map; + int r; + + /* Look up a vconsole keymap by RFC 4646 / BCP 47 language tag (e.g. "de-DE") using the optional + * sixth column of /usr/share/systemd/kbd-model-map. That column lists comma-separated tags the + * row matches. An exact (case-insensitive) tag match returns immediately; if no exact match + * exists, the first row whose tag matches the input's primary subtag wins. Returns 1 on match, + * 0 otherwise. */ + + assert(tag); + assert(ret); + + if (isempty(tag)) { + *ret = NULL; + return 0; + } + + size_t primary_len = strcspn(tag, "-"); + if (primary_len == 0) { + *ret = NULL; + return 0; + } + + map = systemd_kbd_model_map(); + f = fopen(map, "re"); + if (!f) + return -errno; + + for (unsigned n = 0;;) { + _cleanup_strv_free_ char **a = NULL, **tags = NULL; + + r = read_next_mapping(map, 5, UINT_MAX, f, &n, &a); + if (r < 0) + return r; + if (r == 0) + break; + + /* The BCP 47 tag list is the optional 6th column. "-" / empty means "no tags". */ + if (strv_length(a) < 6 || isempty(a[5]) || streq(a[5], "-")) + continue; + + r = strv_split_full(&tags, a[5], ",", /* flags= */ 0); + if (r < 0) + return r; + + STRV_FOREACH(t, tags) { + if (strcaseeq(*t, tag)) { + log_debug("Found vconsole keymap '%s' for BCP 47 tag '%s' (exact match).", + a[0], tag); + + r = strdup_to(ret, a[0]); + if (r < 0) + return r; + + return 1; + } + if (!fallback && strlen(*t) == primary_len && !strchr(*t, '-') && strncaseeq(*t, tag, primary_len)) { + fallback = strdup(a[0]); + if (!fallback) + return -ENOMEM; + } + } + } + + if (!fallback) { + *ret = NULL; + return 0; + } + + log_debug("Found vconsole keymap '%s' for BCP 47 tag '%s' (primary subtag match).", fallback, tag); + *ret = TAKE_PTR(fallback); + return 1; +} + int vconsole_serialize(const VCContext *vc, const X11Context *xc, char ***env) { int r; diff --git a/src/shared/vconsole-util.h b/src/shared/vconsole-util.h index 494bc6ea489e6..3d52ec9866ce8 100644 --- a/src/shared/vconsole-util.h +++ b/src/shared/vconsole-util.h @@ -39,4 +39,6 @@ typedef int (*X11VerifyCallback)(const X11Context *xc); int vconsole_convert_to_x11(const VCContext *vc, X11VerifyCallback verify, X11Context *ret); int x11_convert_to_vconsole(const X11Context *xc, VCContext *ret); +int find_vconsole_keymap_for_bcp47(const char *tag, char **ret); + int vconsole_serialize(const VCContext *vc, const X11Context *xc, char ***env); diff --git a/src/vconsole/vconsole-setup.c b/src/vconsole/vconsole-setup.c index 73bf240cf5130..c1bf2b0f4f5d9 100644 --- a/src/vconsole/vconsole-setup.c +++ b/src/vconsole/vconsole-setup.c @@ -15,6 +15,7 @@ #include "alloc-util.h" #include "creds-util.h" +#include "efivars.h" #include "env-file.h" #include "errno-util.h" #include "fd-util.h" @@ -30,6 +31,7 @@ #include "string-util.h" #include "strv.h" #include "terminal-util.h" +#include "vconsole-util.h" typedef struct Context { char *keymap; @@ -72,6 +74,34 @@ static void context_merge_config( context_merge(dst, src, src_compat, font_unimap); } +static int context_read_efi(Context *c) { + _cleanup_(context_done) Context v = {}; + _cleanup_free_ char *tag = NULL; + int r; + + assert(c); + + if (!is_efi_boot()) + return 0; + + r = efi_get_variable_string(EFI_LOADER_VARIABLE_STR("LoaderKeyboardLayout"), &tag); + if (r == -ENOENT) + return 0; + if (r < 0) + return log_debug_errno(r, "Failed to read LoaderKeyboardLayout EFI variable, ignoring: %m"); + + r = find_vconsole_keymap_for_bcp47(tag, &v.keymap); + if (r < 0) + return log_debug_errno(r, "Failed to look up vconsole keymap for firmware tag '%s', ignoring: %m", tag); + if (r == 0) { + log_debug("No vconsole keymap matches firmware-provided keyboard layout '%s', ignoring.", tag); + return 0; + } + + context_merge_config(c, &v, /* src_compat= */ NULL); + return 0; +} + static int context_read_creds(Context *c) { _cleanup_(context_done) Context v = {}; int r; @@ -144,10 +174,13 @@ static int context_read_proc_cmdline(Context *c) { static void context_load_config(Context *c) { assert(c); - /* Load data from credentials (lowest priority) */ + /* Pick up the firmware-provided keyboard layout if any (lowest priority) */ + (void) context_read_efi(c); + + /* Load data from credentials */ (void) context_read_creds(c); - /* Load data from configuration file (middle priority) */ + /* Load data from configuration file */ (void) context_read_env(c); /* Let the kernel command line override /etc/vconsole.conf (highest priority) */ From 54776b5fcee5dc50f214fc7bba5faa8c29a74f00 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 13:42:53 +0200 Subject: [PATCH 221/242] vmspawn: Use builtin vdagent instead of spicevmc The builtin one also makes the clipboard and such work. spicevmc is only required for remote desktop use cases, so let's use the builtin one instead. --- src/vmspawn/vmspawn.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index f49b36af1e866..27fa3501ee5f3 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -2965,9 +2965,9 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return r; r = qemu_config_section(config_file, "chardev", "vdagent", - "backend", "spicevmc", - "debug", "0", - "name", "vdagent"); + "backend", "qemu-vdagent", + "clipboard", "on", + "debug", "0"); if (r < 0) return r; From f0064f1b05886e47d947cac8b18efd89c45ec47f Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 14:59:39 +0200 Subject: [PATCH 222/242] Add liburing to build image packages --- mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf | 1 + mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf | 1 + mkosi/mkosi.images/build/mkosi.conf.d/debian-ubuntu/mkosi.conf | 1 + mkosi/mkosi.images/build/mkosi.conf.d/opensuse/mkosi.conf | 1 + 4 files changed, 4 insertions(+) diff --git a/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf b/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf index d7a79d11c1051..c199288d94bd9 100644 --- a/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf +++ b/mkosi/mkosi.images/build/mkosi.conf.d/arch/mkosi.conf @@ -10,3 +10,4 @@ Packages= diffutils erofs-utils git + liburing diff --git a/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf b/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf index 4d0ca8917d83f..472e6b66927b1 100644 --- a/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf +++ b/mkosi/mkosi.images/build/mkosi.conf.d/centos-fedora/mkosi.conf @@ -11,6 +11,7 @@ Packages= gdb git-core libasan + liburing-devel libubsan rpm-build which diff --git a/mkosi/mkosi.images/build/mkosi.conf.d/debian-ubuntu/mkosi.conf b/mkosi/mkosi.images/build/mkosi.conf.d/debian-ubuntu/mkosi.conf index b3fd0836597cf..d762bf861a193 100644 --- a/mkosi/mkosi.images/build/mkosi.conf.d/debian-ubuntu/mkosi.conf +++ b/mkosi/mkosi.images/build/mkosi.conf.d/debian-ubuntu/mkosi.conf @@ -11,5 +11,6 @@ Packages= erofs-utils git-core ?exact-name(libclang-rt-dev) + liburing-dev dpkg-dev mount diff --git a/mkosi/mkosi.images/build/mkosi.conf.d/opensuse/mkosi.conf b/mkosi/mkosi.images/build/mkosi.conf.d/opensuse/mkosi.conf index 70a1b31b64196..6604b3bf2f45c 100644 --- a/mkosi/mkosi.images/build/mkosi.conf.d/opensuse/mkosi.conf +++ b/mkosi/mkosi.images/build/mkosi.conf.d/opensuse/mkosi.conf @@ -12,6 +12,7 @@ Packages= git-core grep gzip + liburing-devel patterns-base-minimal_base rpm-build sed From a19d92003d5703fce35bdd81ed7dce978fc814d1 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 11 May 2026 17:17:35 +0200 Subject: [PATCH 223/242] units: pull in basic.target rather than sysinit.target from system-install.target Many of our services are nowadays implemented via socket activation, and hence require sockets.target to be active to be accessible. One of them is mute-console.socket, which we typically want to use from systemd-firstboot.service, systemd-sysinstall.service and other related services. Hence let's pull in basic.target rather than sysinit.target from system-install.target since it pulls sockets.target in too. Effectively, this doesn't change much except for pulling in a bunch more sockets, and frankly going for sysinit.target was really a bug to begin width. --- units/system-install.target | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/units/system-install.target b/units/system-install.target index 660110dcea36a..d60f9d8137ee7 100644 --- a/units/system-install.target +++ b/units/system-install.target @@ -10,6 +10,6 @@ [Unit] Description=System Installer Documentation=man:systemd-sysinstall(8) -Requires=sysinit.target -After=sysinit.target +Requires=basic.target +After=basic.target AllowIsolate=yes From d0168f4db613610b674dcf41afababde57b8d56b Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 21:58:24 +0200 Subject: [PATCH 224/242] mkosi: Drop CPUs= limit Limiting VMs to 2 cpus was cargo culting without any actual data that this benefits performance. The host OS has a scheduler, let's make use of it and give the VM access to all the CPUs. This doesn't mean they become inaccessible to the host, it just means the VM gets as many virtual CPUs as the host has CPU cores (threads). How they get scheduled is still up to the host OS. --- mkosi/mkosi.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/mkosi/mkosi.conf b/mkosi/mkosi.conf index 2fc087cb73f40..0fbb81eeed23e 100644 --- a/mkosi/mkosi.conf +++ b/mkosi/mkosi.conf @@ -141,7 +141,6 @@ Credentials= tty.virtual.tty1.agetty.autologin=root tty.virtual.tty1.login.noauth=yes RuntimeBuildSources=yes -CPUs=2 TPM=yes VSock=yes KVM=yes From 324fab7ed665df0a2e3395b75c7b88f33805ccb8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 6 May 2026 16:13:12 +0200 Subject: [PATCH 225/242] firstboot,sysinstall,hostnamed: always show FANCY_NAME= This makes sure that whenever we want to show the OS name we can show the fancy name. Thus this moves the escaping/validation of the fancy name out of hostnamed into generic code, and then makes use of it in sysinstall,firstboot,prompt-util. --- src/basic/os-util.c | 45 +++++++++++++++++++++++++++++++++++++ src/basic/os-util.h | 3 +++ src/firstboot/firstboot.c | 7 ++++-- src/hostname/hostnamectl.c | 4 ++-- src/hostname/hostnamed.c | 16 +------------ src/shared/prompt-util.c | 10 +++++++-- src/sysinstall/sysinstall.c | 7 ++++-- 7 files changed, 69 insertions(+), 23 deletions(-) diff --git a/src/basic/os-util.c b/src/basic/os-util.c index 06b476f1344a8..bb87fe371c216 100644 --- a/src/basic/os-util.c +++ b/src/basic/os-util.c @@ -3,10 +3,12 @@ #include #include "alloc-util.h" +#include "ansi-color.h" #include "chase.h" #include "dirent-util.h" #include "env-file.h" #include "errno-util.h" +#include "escape.h" #include "fd-util.h" #include "fs-util.h" #include "glyph-util.h" @@ -512,3 +514,46 @@ const char* os_release_pretty_name(const char *pretty_name, const char *name) { return empty_to_null(pretty_name) ?: empty_to_null(name) ?: "Linux"; } + +char *unescape_fancy_name(char **fancy_name) { + assert(fancy_name); + + /* Checks if the fancy name is valid, unescapes if it is, nullifies it if not */ + + _cleanup_free_ char *unescaped_fancy_name = NULL; + + if (isempty(*fancy_name)) + goto clear; + + /* We undo one level of C escapes on this */ + ssize_t n = cunescape(*fancy_name, /* flags= */ 0, &unescaped_fancy_name); + if (n < 0) { + log_debug_errno((int) n, "Failed to unescape FANCY_NAME= string, suppressing: %m"); + goto clear; + } + + if (!utf8_is_valid(unescaped_fancy_name)) { + log_debug("Unescaped FANCY_NAME= string is not valid UTF-8, suppressing."); + goto clear; + } + + free_and_replace(*fancy_name, unescaped_fancy_name); + return *fancy_name; + +clear: + *fancy_name = mfree(*fancy_name); + return NULL; +} + +bool use_fancy_name(const char *fancy_name) { + + /* Decides whether to show the specified fancy name */ + + if (isempty(fancy_name)) + return false; + + if (!colors_enabled()) + return false; + + return emoji_enabled() || ascii_is_valid(fancy_name); +} diff --git a/src/basic/os-util.h b/src/basic/os-util.h index 02d2c9540f2de..336c17ec21aa0 100644 --- a/src/basic/os-util.h +++ b/src/basic/os-util.h @@ -55,3 +55,6 @@ int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char int os_release_support_ended(const char *support_end, bool quiet, usec_t *ret_eol); const char* os_release_pretty_name(const char *pretty_name, const char *name); + +bool use_fancy_name(const char *fancy_name); +char *unescape_fancy_name(char **fancy_name); diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c index 3d768b491f83a..8048ec0e810ca 100644 --- a/src/firstboot/firstboot.c +++ b/src/firstboot/firstboot.c @@ -103,7 +103,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline, freep); STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep); static void print_welcome(int rfd, sd_varlink **mute_console_link) { - _cleanup_free_ char *pretty_name = NULL, *os_name = NULL, *ansi_color = NULL; + _cleanup_free_ char *pretty_name = NULL, *os_name = NULL, *ansi_color = NULL, *fancy_name = NULL; static bool done = false; const char *pn, *ac; int r; @@ -133,6 +133,7 @@ static void print_welcome(int rfd, sd_varlink **mute_console_link) { r = parse_os_release_at(rfd, "PRETTY_NAME", &pretty_name, + "FANCY_NAME", &fancy_name, "NAME", &os_name, "ANSI_COLOR", &ansi_color); if (r < 0) @@ -142,7 +143,9 @@ static void print_welcome(int rfd, sd_varlink **mute_console_link) { pn = os_release_pretty_name(pretty_name, os_name); ac = isempty(ansi_color) ? "0" : ansi_color; - if (colors_enabled()) + if (use_fancy_name(unescape_fancy_name(&fancy_name))) + printf(ANSI_HIGHLIGHT "Welcome to " ANSI_NORMAL "%s" ANSI_HIGHLIGHT "!" ANSI_NORMAL "\n", fancy_name); + else if (colors_enabled()) printf(ANSI_HIGHLIGHT "Welcome to " ANSI_NORMAL "\x1B[%sm%s" ANSI_HIGHLIGHT "!" ANSI_NORMAL "\n", ac, pn); else printf("Welcome to %s!\n", pn); diff --git a/src/hostname/hostnamectl.c b/src/hostname/hostnamectl.c index 2989840b364d7..257b31fb88dda 100644 --- a/src/hostname/hostnamectl.c +++ b/src/hostname/hostnamectl.c @@ -23,13 +23,13 @@ #include "log.h" #include "main-func.h" #include "options.h" +#include "os-util.h" #include "parse-argument.h" #include "polkit-agent.h" #include "pretty-print.h" #include "runtime-scope.h" #include "string-util.h" #include "time-util.h" -#include "utf8.h" #include "verbs.h" static bool arg_ask_password = true; @@ -236,7 +236,7 @@ static int print_status_info(StatusInfo *i) { return table_log_add_error(r); } - if (!isempty(i->os_fancy_name) && (emoji_enabled() || ascii_is_valid(i->os_fancy_name)) && colors_enabled()) { + if (use_fancy_name(i->os_fancy_name)) { r = table_add_many(table, TABLE_FIELD, "Operating System", TABLE_STRING_WITH_ANSI, i->os_fancy_name, diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c index 60b48112449cf..5ec7b2fea992b 100644 --- a/src/hostname/hostnamed.c +++ b/src/hostname/hostnamed.c @@ -22,7 +22,6 @@ #include "device-private.h" #include "env-file.h" #include "env-util.h" -#include "escape.h" #include "extract-word.h" #include "fileio.h" #include "hashmap.h" @@ -230,20 +229,7 @@ static void context_read_os_release(Context *c) { if (free_and_strdup(&c->data[PROP_OS_PRETTY_NAME], os_release_pretty_name(os_pretty_name, os_name)) < 0) log_oom(); - if (!isempty(os_fancy_name)) { - _cleanup_free_ char *unescaped = NULL; - - /* We undo one level of C escapes on this */ - ssize_t l = cunescape(os_fancy_name, /* flags= */ 0, &unescaped); - if (l < 0) { - log_warning_errno(l, "Failed to unescape fancy OS name, ignoring: %m"); - os_fancy_name = mfree(os_fancy_name); - } else if (!utf8_is_valid(unescaped)) { - log_warning("Unescaped fancy OS name contains invalid UTF-8, ignoring."); - os_fancy_name = mfree(os_fancy_name); - } else - free_and_replace(os_fancy_name, unescaped); - } + unescape_fancy_name(&os_fancy_name); if (isempty(os_fancy_name)) { free(os_fancy_name); /* free if empty string */ diff --git a/src/shared/prompt-util.c b/src/shared/prompt-util.c index 7cead706fd95f..2f334f9b52832 100644 --- a/src/shared/prompt-util.c +++ b/src/shared/prompt-util.c @@ -12,6 +12,7 @@ #include "parse-util.h" #include "pretty-print.h" #include "prompt-util.h" +#include "stdio-util.h" #include "string-util.h" #include "strv.h" #include "terminal-util.h" @@ -234,11 +235,12 @@ int chrome_show( _cleanup_free_ char *b = NULL, *ansi_color_reverse = NULL; if (!bottom) { - _cleanup_free_ char *pretty_name = NULL, *os_name = NULL, *ansi_color = NULL, *documentation_url = NULL; + _cleanup_free_ char *pretty_name = NULL, *os_name = NULL, *ansi_color = NULL, *documentation_url = NULL, *fancy_name = NULL; r = parse_os_release( /* root= */ NULL, "PRETTY_NAME", &pretty_name, + "FANCY_NAME", &fancy_name, "NAME", &os_name, "ANSI_COLOR", &ansi_color, "ANSI_COLOR_REVERSE", &ansi_color_reverse, @@ -258,7 +260,11 @@ int chrome_show( free_and_replace(ansi_color_reverse, j); } - if (asprintf(&b, "\x1B[0;%sm %s %s", c, m, ansi_color_reverse ?: ANSI_COLOR_CHROME) < 0) + if (use_fancy_name(unescape_fancy_name(&fancy_name))) + b = asprintf_safe("\x1B[0;%sm \x1B[0m%s\x1B[0;%sm %s", c, fancy_name, c, ansi_color_reverse ?: ANSI_COLOR_CHROME); + else + b = asprintf_safe("\x1B[0;%sm %s %s", c, m, ansi_color_reverse ?: ANSI_COLOR_CHROME); + if (!b) return log_oom_debug(); if (documentation_url) { diff --git a/src/sysinstall/sysinstall.c b/src/sysinstall/sysinstall.c index d8f5cbee3c93f..7d08b4866b272 100644 --- a/src/sysinstall/sysinstall.c +++ b/src/sysinstall/sysinstall.c @@ -214,7 +214,7 @@ static int parse_argv(int argc, char *argv[]) { } static int print_welcome(sd_varlink **mute_console_link) { - _cleanup_free_ char *pretty_name = NULL, *os_name = NULL, *ansi_color = NULL; + _cleanup_free_ char *pretty_name = NULL, *os_name = NULL, *ansi_color = NULL, *fancy_name = NULL; const char *pn, *ac; int r; @@ -229,6 +229,7 @@ static int print_welcome(sd_varlink **mute_console_link) { r = parse_os_release( /* root= */ NULL, "PRETTY_NAME", &pretty_name, + "FANCY_NAME", &fancy_name, "NAME", &os_name, "ANSI_COLOR", &ansi_color); if (r < 0) @@ -238,7 +239,9 @@ static int print_welcome(sd_varlink **mute_console_link) { pn = os_release_pretty_name(pretty_name, os_name); ac = isempty(ansi_color) ? "0" : ansi_color; - if (colors_enabled()) + if (use_fancy_name(unescape_fancy_name(&fancy_name))) + printf(ANSI_HIGHLIGHT "Welcome to the " ANSI_NORMAL "%s" ANSI_HIGHLIGHT " Installer!" ANSI_NORMAL "\n", fancy_name); + else if (colors_enabled()) printf(ANSI_HIGHLIGHT "Welcome to the " ANSI_NORMAL "\x1B[%sm%s" ANSI_HIGHLIGHT " Installer!" ANSI_NORMAL "\n", ac, pn); else printf("Welcome to the %s Installer!\n", pn); From 2fcf96327370defdd8bd7dca71f7b34832c32d40 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 21:58:24 +0200 Subject: [PATCH 226/242] vmspawn: Add missing error logging --- src/vmspawn/vmspawn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 27fa3501ee5f3..97920aecc0efc 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -3103,7 +3103,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { &tree_local_lock, &snapshot_directory); if (r < 0) - return r; + return log_error_errno(r, "Failed to create ephemeral snapshot of '%s': %m", arg_directory); arg_directory = strdup(snapshot_directory); if (!arg_directory) From 58863bdaec9ba4f6be5a171bdc5ffa42f22580a7 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 24 Apr 2026 03:27:36 +0900 Subject: [PATCH 227/242] iovec-wrapper: make iovw_size() take NULL again This partially reverts 267b16f33c5636617927f15d7ae6b945c862a587. We usually make xyz_size() take NULL, e.g. hashmap_size(). --- src/basic/iovec-wrapper.c | 3 ++- src/test/test-iovec-wrapper.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/basic/iovec-wrapper.c b/src/basic/iovec-wrapper.c index 59b1addaaf266..c5d1a878ff3cb 100644 --- a/src/basic/iovec-wrapper.c +++ b/src/basic/iovec-wrapper.c @@ -237,7 +237,8 @@ void iovw_rebase(struct iovec_wrapper *iovw, void *old, void *new) { } size_t iovw_size(const struct iovec_wrapper *iovw) { - assert(iovw); + if (iovw_isempty(iovw)) + return 0; return iovec_total_size(iovw->iovec, iovw->count); } diff --git a/src/test/test-iovec-wrapper.c b/src/test/test-iovec-wrapper.c index 791a041ab8d6f..d38806e75e543 100644 --- a/src/test/test-iovec-wrapper.c +++ b/src/test/test-iovec-wrapper.c @@ -393,6 +393,8 @@ TEST(iovw_size) { ASSERT_OK(iovw_put(&iovw, (char*) "efghij", 6)); ASSERT_OK(iovw_put(&iovw, (char*) "kl", 2)); ASSERT_EQ(iovw_size(&iovw), 12U); + + ASSERT_EQ(iovw_size(NULL), 0U); } TEST(iovw_concat) { From fe6fed5d9730de40bd328ee11678ab235d067b94 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sat, 25 Apr 2026 09:57:41 +0900 Subject: [PATCH 228/242] iovec-wrapper: introduce iovw_put_full() and friends to make them accept zero length entry These will be used later. Preparation for later commits. --- src/basic/iovec-wrapper.c | 65 ++++++++++++++++++---------- src/basic/iovec-wrapper.h | 40 ++++++++++++++---- src/test/test-iovec-wrapper.c | 80 +++++++++++++++++++++++++++++++++-- 3 files changed, 151 insertions(+), 34 deletions(-) diff --git a/src/basic/iovec-wrapper.c b/src/basic/iovec-wrapper.c index c5d1a878ff3cb..375ecbdaaa23d 100644 --- a/src/basic/iovec-wrapper.c +++ b/src/basic/iovec-wrapper.c @@ -45,14 +45,13 @@ int iovw_compare(const struct iovec_wrapper *a, const struct iovec_wrapper *b) { return CMP(a->count, b->count); } -int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len) { +int iovw_put_full(struct iovec_wrapper *iovw, bool accept_zero, void *data, size_t len) { assert(iovw); + assert(data || len == 0); - if (len == 0) + if (len == 0 && !accept_zero) return 0; - assert(data); - if (iovw->count >= IOV_MAX) return -E2BIG; @@ -63,16 +62,18 @@ int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len) { return 1; } -int iovw_put_iov(struct iovec_wrapper *iovw, const struct iovec *iov) { +int iovw_put_iov_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec *iov) { assert(iovw); if (!iov) return 0; - return iovw_put(iovw, iov->iov_base, iov->iov_len); + return iovw_put_full(iovw, accept_zero, iov->iov_base, iov->iov_len); } -int iovw_put_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *source) { +int iovw_put_iovw_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec_wrapper *source) { + int r; + assert(iovw); if (iovw_isempty(source)) @@ -87,24 +88,41 @@ int iovw_put_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *source if (iovw->count + source->count > IOV_MAX) return -E2BIG; - if (!GREEDY_REALLOC_APPEND(iovw->iovec, iovw->count, source->iovec, source->count)) - return -ENOMEM; + if (accept_zero) { + if (!GREEDY_REALLOC_APPEND(iovw->iovec, iovw->count, source->iovec, source->count)) + return -ENOMEM; + + return 0; + } + + /* When accept_zero is false, we need to filter zero length iovec in source. */ + size_t original_count = iovw->count; + + FOREACH_ARRAY(iovec, source->iovec, source->count) { + r = iovw_put_iov_full(iovw, accept_zero, iovec); + if (r < 0) + goto rollback; + } return 0; + +rollback: + iovw->count = original_count; + return r; } -int iovw_consume(struct iovec_wrapper *iovw, void *data, size_t len) { +int iovw_consume_full(struct iovec_wrapper *iovw, bool accept_zero, void *data, size_t len) { /* Move data into iovw or free on error */ int r; - r = iovw_put(iovw, data, len); + r = iovw_put_full(iovw, accept_zero, data, len); if (r <= 0) free(data); return r; } -int iovw_consume_iov(struct iovec_wrapper *iovw, struct iovec *iov) { +int iovw_consume_iov_full(struct iovec_wrapper *iovw, bool accept_zero, struct iovec *iov) { int r; assert(iovw); @@ -112,7 +130,7 @@ int iovw_consume_iov(struct iovec_wrapper *iovw, struct iovec *iov) { if (!iov) return 0; - r = iovw_put_iov(iovw, iov); + r = iovw_put_iov_full(iovw, accept_zero, iov); if (r <= 0) iovec_done(iov); else @@ -123,27 +141,30 @@ int iovw_consume_iov(struct iovec_wrapper *iovw, struct iovec *iov) { return r; } -int iovw_extend(struct iovec_wrapper *iovw, const void *data, size_t len) { +int iovw_extend_full(struct iovec_wrapper *iovw, bool accept_zero, const void *data, size_t len) { + assert(iovw); + assert(data || len == 0); + if (len == 0) - return 0; + return iovw_put_full(iovw, accept_zero, /* data= */ NULL, /* len= */ 0); void *c = memdup(data, len); if (!c) return -ENOMEM; - return iovw_consume(iovw, c, len); + return iovw_consume_full(iovw, accept_zero, c, len); } -int iovw_extend_iov(struct iovec_wrapper *iovw, const struct iovec *iov) { +int iovw_extend_iov_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec *iov) { assert(iovw); - if (!iovec_is_set(iov)) + if (!iov) return 0; - return iovw_extend(iovw, iov->iov_base, iov->iov_len); + return iovw_extend_full(iovw, accept_zero, iov->iov_base, iov->iov_len); } -int iovw_extend_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *source) { +int iovw_extend_iovw_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec_wrapper *source) { int r; assert(iovw); @@ -165,7 +186,7 @@ int iovw_extend_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *sou size_t original_count = iovw->count; FOREACH_ARRAY(iovec, source->iovec, source->count) { - r = iovw_extend_iov(iovw, iovec); + r = iovw_extend_iov_full(iovw, accept_zero, iovec); if (r < 0) goto rollback; } @@ -260,7 +281,7 @@ int iovw_concat(const struct iovec_wrapper *iovw, struct iovec *ret) { uint8_t *p = buf; FOREACH_ARRAY(i, iovw->iovec, iovw->count) - p = mempcpy(p, i->iov_base, i->iov_len); + p = mempcpy_safe(p, i->iov_base, i->iov_len); *p = 0; diff --git a/src/basic/iovec-wrapper.h b/src/basic/iovec-wrapper.h index 26b7f5ad4be30..c860eaf3339e7 100644 --- a/src/basic/iovec-wrapper.h +++ b/src/basic/iovec-wrapper.h @@ -16,14 +16,38 @@ static inline bool iovw_equal(const struct iovec_wrapper *a, const struct iovec_ return iovw_compare(a, b) == 0; } -int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len); -int iovw_put_iov(struct iovec_wrapper *iovw, const struct iovec *iov); -int iovw_put_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *source); -int iovw_consume(struct iovec_wrapper *iovw, void *data, size_t len); -int iovw_consume_iov(struct iovec_wrapper *iovw, struct iovec *iov); -int iovw_extend(struct iovec_wrapper *iovw, const void *data, size_t len); -int iovw_extend_iov(struct iovec_wrapper *iovw, const struct iovec *iov); -int iovw_extend_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *source); +int iovw_put_full(struct iovec_wrapper *iovw, bool accept_zero, void *data, size_t len); +static inline int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len) { + return iovw_put_full(iovw, false, data, len); +} +int iovw_put_iov_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec *iov); +static inline int iovw_put_iov(struct iovec_wrapper *iovw, const struct iovec *iov) { + return iovw_put_iov_full(iovw, false, iov); +} +int iovw_put_iovw_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec_wrapper *source); +static inline int iovw_put_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *source) { + return iovw_put_iovw_full(iovw, false, source); +} +int iovw_consume_full(struct iovec_wrapper *iovw, bool accept_zero, void *data, size_t len); +static inline int iovw_consume(struct iovec_wrapper *iovw, void *data, size_t len) { + return iovw_consume_full(iovw, false, data, len); +} +int iovw_consume_iov_full(struct iovec_wrapper *iovw, bool accept_zero, struct iovec *iov); +static inline int iovw_consume_iov(struct iovec_wrapper *iovw, struct iovec *iov) { + return iovw_consume_iov_full(iovw, false, iov); +} +int iovw_extend_full(struct iovec_wrapper *iovw, bool accept_zero, const void *data, size_t len); +static inline int iovw_extend(struct iovec_wrapper *iovw, const void *data, size_t len) { + return iovw_extend_full(iovw, false, data, len); +} +int iovw_extend_iov_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec *iov); +static inline int iovw_extend_iov(struct iovec_wrapper *iovw, const struct iovec *iov) { + return iovw_extend_iov_full(iovw, false, iov); +} +int iovw_extend_iovw_full(struct iovec_wrapper *iovw, bool accept_zero, const struct iovec_wrapper *source); +static inline int iovw_extend_iovw(struct iovec_wrapper *iovw, const struct iovec_wrapper *source) { + return iovw_extend_iovw_full(iovw, false, source); +} static inline bool iovw_isempty(const struct iovec_wrapper *iovw) { return !iovw || iovw->count == 0; diff --git a/src/test/test-iovec-wrapper.c b/src/test/test-iovec-wrapper.c index d38806e75e543..3bd2123c3c95a 100644 --- a/src/test/test-iovec-wrapper.c +++ b/src/test/test-iovec-wrapper.c @@ -82,6 +82,12 @@ TEST(iovw_put) { ASSERT_EQ(memcmp(iovw.iovec[1].iov_base, "barbar", 6), 0); ASSERT_EQ(iovw.iovec[2].iov_len, 1U); ASSERT_EQ(memcmp(iovw.iovec[2].iov_base, "q", 1), 0); + + ASSERT_OK(iovw_put_full(&iovw, /* accept_zero= */ false, NULL, 0)); + ASSERT_EQ(iovw.count, 3U); + ASSERT_OK(iovw_put_full(&iovw, /* accept_zero= */ true, NULL, 0)); + ASSERT_EQ(iovw.count, 4U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[3], &(struct iovec) {})); } TEST(iovw_put_iov) { @@ -100,6 +106,12 @@ TEST(iovw_put_iov) { ASSERT_TRUE(iovec_equal(&iovw.iovec[0], &IOVEC_MAKE_STRING("aaa"))); ASSERT_TRUE(iovec_equal(&iovw.iovec[1], &IOVEC_MAKE_STRING("bbb"))); ASSERT_TRUE(iovec_equal(&iovw.iovec[2], &IOVEC_MAKE_STRING("ccc"))); + + ASSERT_OK(iovw_put_iov_full(&iovw, /* accept_zero= */ false, &(struct iovec) {})); + ASSERT_EQ(iovw.count, 3U); + ASSERT_OK(iovw_put_iov_full(&iovw, /* accept_zero= */ true, &(struct iovec) {})); + ASSERT_EQ(iovw.count, 4U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[3], &(struct iovec) {})); } TEST(iovw_put_iovw) { @@ -113,7 +125,8 @@ TEST(iovw_put_iovw) { ASSERT_OK(iovw_put_iov(&source, &IOVEC_MAKE_STRING("aaa"))); ASSERT_OK(iovw_put_iov(&source, &IOVEC_MAKE_STRING("bbb"))); ASSERT_OK(iovw_put_iov(&source, &IOVEC_MAKE_STRING("ccc"))); - ASSERT_EQ(source.count, 3U); + ASSERT_OK(iovw_put_iov_full(&source, /* accept_zero= */ true, &(struct iovec) {})); + ASSERT_EQ(source.count, 4U); /* Pre-seed target with one entry to check that append adds on top rather than replacing */ ASSERT_OK(iovw_put_iov(&target, &IOVEC_MAKE_STRING("xxx"))); @@ -136,10 +149,24 @@ TEST(iovw_put_iovw) { ASSERT_PTR_EQ(target.iovec[5].iov_base, source.iovec[2].iov_base); /* Source is unchanged */ - ASSERT_EQ(source.count, 3U); + ASSERT_EQ(source.count, 4U); ASSERT_TRUE(iovec_equal(&source.iovec[0], &IOVEC_MAKE_STRING("aaa"))); ASSERT_TRUE(iovec_equal(&source.iovec[1], &IOVEC_MAKE_STRING("bbb"))); ASSERT_TRUE(iovec_equal(&source.iovec[2], &IOVEC_MAKE_STRING("ccc"))); + ASSERT_TRUE(iovec_equal(&source.iovec[3], &(struct iovec) {})); + + ASSERT_OK(iovw_put_iovw_full(&target, /* accept_zero= */ true, &source)); + ASSERT_EQ(target.count, 10U); + ASSERT_TRUE(iovec_equal(&target.iovec[0], &IOVEC_MAKE_STRING("xxx"))); + ASSERT_TRUE(iovec_equal(&target.iovec[1], &IOVEC_MAKE_STRING("yyy"))); + ASSERT_TRUE(iovec_equal(&target.iovec[2], &IOVEC_MAKE_STRING("zzz"))); + ASSERT_TRUE(iovec_equal(&target.iovec[3], &IOVEC_MAKE_STRING("aaa"))); + ASSERT_TRUE(iovec_equal(&target.iovec[4], &IOVEC_MAKE_STRING("bbb"))); + ASSERT_TRUE(iovec_equal(&target.iovec[5], &IOVEC_MAKE_STRING("ccc"))); + ASSERT_TRUE(iovec_equal(&target.iovec[6], &IOVEC_MAKE_STRING("aaa"))); + ASSERT_TRUE(iovec_equal(&target.iovec[7], &IOVEC_MAKE_STRING("bbb"))); + ASSERT_TRUE(iovec_equal(&target.iovec[8], &IOVEC_MAKE_STRING("ccc"))); + ASSERT_TRUE(iovec_equal(&target.iovec[9], &(struct iovec) {})); /* Cannot pass the same objects */ ASSERT_ERROR(iovw_put_iovw(&target, &target), EINVAL); @@ -169,6 +196,12 @@ TEST(iovw_extend) { /* Mutating the caller's buffer does not affect what's stored */ memset(buf, 'X', sizeof buf); ASSERT_EQ(memcmp(iovw.iovec[0].iov_base, "one", 3), 0); + + ASSERT_OK(iovw_extend_full(&iovw, /* accept_zero= */ false, NULL, 0)); + ASSERT_EQ(iovw.count, 2U); + ASSERT_OK(iovw_extend_full(&iovw, /* accept_zero= */ true, NULL, 0)); + ASSERT_EQ(iovw.count, 3U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[2], &(struct iovec) {})); } TEST(iovw_extend_iov) { @@ -186,6 +219,12 @@ TEST(iovw_extend_iov) { ASSERT_TRUE(iovec_equal(&iovw.iovec[0], &IOVEC_MAKE_STRING("aaa"))); ASSERT_TRUE(iovec_equal(&iovw.iovec[1], &IOVEC_MAKE_STRING("bbb"))); ASSERT_TRUE(iovec_equal(&iovw.iovec[2], &IOVEC_MAKE_STRING("ccc"))); + + ASSERT_OK(iovw_extend_iov_full(&iovw, /* accept_zero= */ false, &(struct iovec) {})); + ASSERT_EQ(iovw.count, 3U); + ASSERT_OK(iovw_extend_iov_full(&iovw, /* accept_zero= */ true, &(struct iovec) {})); + ASSERT_EQ(iovw.count, 4U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[3], &(struct iovec) {})); } TEST(iovw_extend_iovw) { @@ -199,7 +238,8 @@ TEST(iovw_extend_iovw) { ASSERT_OK(iovw_put(&source, (char*) "one", 3)); ASSERT_OK(iovw_put(&source, (char*) "twotwo", 6)); - ASSERT_EQ(source.count, 2U); + ASSERT_OK(iovw_put_full(&source, /* accept_zero= */ true, NULL, 0)); + ASSERT_EQ(source.count, 3U); /* Pre-seed target with one entry to check that append adds on top rather than replacing */ char *seed = strdup("zero"); @@ -218,8 +258,17 @@ TEST(iovw_extend_iovw) { ASSERT_EQ(target.iovec[2].iov_len, 6U); ASSERT_EQ(memcmp(target.iovec[2].iov_base, "twotwo", 6), 0); + ASSERT_OK(iovw_extend_iovw_full(&target, /* accept_zero= */ true, &source)); + ASSERT_EQ(target.count, 6U); + ASSERT_TRUE(iovec_equal(&target.iovec[0], &IOVEC_MAKE_STRING("zero"))); + ASSERT_TRUE(iovec_equal(&target.iovec[1], &IOVEC_MAKE_STRING("one"))); + ASSERT_TRUE(iovec_equal(&target.iovec[2], &IOVEC_MAKE_STRING("twotwo"))); + ASSERT_TRUE(iovec_equal(&target.iovec[3], &IOVEC_MAKE_STRING("one"))); + ASSERT_TRUE(iovec_equal(&target.iovec[4], &IOVEC_MAKE_STRING("twotwo"))); + ASSERT_TRUE(iovec_equal(&target.iovec[5], &(struct iovec) {})); + /* Source is unchanged */ - ASSERT_EQ(source.count, 2U); + ASSERT_EQ(source.count, 3U); /* Cannot pass the same objects */ ASSERT_ERROR(iovw_extend_iovw(&target, &target), EINVAL); @@ -240,6 +289,16 @@ TEST(iovw_consume) { char *q = ASSERT_NOT_NULL(strdup("")); ASSERT_OK_ZERO(iovw_consume(&iovw, q, 0)); ASSERT_EQ(iovw.count, 1U); + + ASSERT_OK(iovw_consume_full(&iovw, /* accept_zero= */ false, NULL, 0)); + ASSERT_EQ(iovw.count, 1U); + ASSERT_OK(iovw_consume_full(&iovw, /* accept_zero= */ true, NULL, 0)); + ASSERT_EQ(iovw.count, 2U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[1], &(struct iovec) {})); + q = ASSERT_NOT_NULL(strdup("")); + ASSERT_OK(iovw_consume_full(&iovw, /* accept_zero= */ true, q, 0)); + ASSERT_EQ(iovw.count, 3U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[2], &(struct iovec) {})); } TEST(iovw_consume_iov) { @@ -270,6 +329,19 @@ TEST(iovw_consume_iov) { /* zero length iovec is also freed */ ASSERT_NULL(iov.iov_base); ASSERT_EQ(iov.iov_len, 0U); + + ASSERT_OK(iovw_consume_iov_full(&iovw, /* accept_zero= */ false, &(struct iovec) {})); + ASSERT_EQ(iovw.count, 1U); + ASSERT_OK(iovw_consume_iov_full(&iovw, /* accept_zero= */ true, &(struct iovec) {})); + ASSERT_EQ(iovw.count, 2U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[1], &(struct iovec) {})); + iov = (struct iovec) { + .iov_base = ASSERT_NOT_NULL(strdup("")), + .iov_len = 0, + }; + ASSERT_OK(iovw_consume_iov_full(&iovw, /* accept_zero= */ true, &iov)); + ASSERT_EQ(iovw.count, 3U); + ASSERT_TRUE(iovec_equal(&iovw.iovec[2], &(struct iovec) {})); } TEST(iovw_isempty) { From 32729ae92f53f78f5ffb94d22c56b3781e8e6154 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 24 Apr 2026 03:33:36 +0900 Subject: [PATCH 229/242] iovec-wrapper: introduce iovec_split() and iovw_merge() In many network protocols, the length-prefixed data format is often used. Let's add a simple parser and builder for the format. --- src/basic/iovec-wrapper.c | 119 +++++++++++++++++++++ src/basic/iovec-wrapper.h | 3 + src/test/test-iovec-wrapper.c | 193 ++++++++++++++++++++++++++++++++++ 3 files changed, 315 insertions(+) diff --git a/src/basic/iovec-wrapper.c b/src/basic/iovec-wrapper.c index 375ecbdaaa23d..6b4b006c059dc 100644 --- a/src/basic/iovec-wrapper.c +++ b/src/basic/iovec-wrapper.c @@ -6,6 +6,7 @@ #include "iovec-util.h" #include "iovec-wrapper.h" #include "string-util.h" +#include "unaligned.h" void iovw_done(struct iovec_wrapper *iovw) { assert(iovw); @@ -302,3 +303,121 @@ char* iovw_to_cstring(const struct iovec_wrapper *iovw) { assert(!memchr(iov.iov_base, 0, iov.iov_len)); return TAKE_PTR(iov.iov_base); } + +int iovec_split(const struct iovec *iov, size_t length_size, struct iovec_wrapper *ret) { + int r; + + assert(IN_SET(length_size, 1, 2, 4)); + assert(ret); + + /* This parses the input iovec as length-prefixed data, and stores the result as iovec_wrapper. + * Note, zero-length entries are silently dropped. */ + + if (!iovec_is_set(iov)) { + *ret = (struct iovec_wrapper) {}; + return 0; + } + + _cleanup_(iovw_done_free) struct iovec_wrapper iovw = {}; + for (struct iovec i = *iov; iovec_is_set(&i); ) { + if (i.iov_len < length_size) + return -EBADMSG; + + size_t len; + switch (length_size) { + case 1: + len = *(uint8_t*) i.iov_base; + break; + case 2: + len = unaligned_read_be16(i.iov_base); + break; + case 4: + len = unaligned_read_be32(i.iov_base); + break; + default: + assert_not_reached(); + } + + iovec_inc(&i, length_size); + + if (len == 0) + continue; + + if (i.iov_len < len) + return -EBADMSG; + + r = iovw_extend(&iovw, i.iov_base, len); + if (r < 0) + return r; + + iovec_inc(&i, len); + } + + *ret = TAKE_STRUCT(iovw); + return 0; +} + +int iovw_merge(const struct iovec_wrapper *iovw, size_t length_size, struct iovec *ret) { + assert(IN_SET(length_size, 1, 2, 4)); + assert(ret); + + /* This is the inverse of iovec_split(), and builds a length-prefixed data from iovec_wrapper. + * Note, zero-length entries are silently dropped. */ + + size_t sz = iovw_size(iovw); + if (sz == 0) { + *ret = (struct iovec) {}; + return 0; + } + if (sz == SIZE_MAX) + return -E2BIG; + + if (size_multiply_overflow(length_size, iovw->count)) + return -E2BIG; + + sz = size_add(sz, iovw->count * length_size); + if (sz == SIZE_MAX) + return -E2BIG; + + _cleanup_free_ uint8_t *buf = new(uint8_t, sz); + if (!buf) + return -ENOMEM; + + uint8_t *p = buf; + FOREACH_ARRAY(iov, iovw->iovec, iovw->count) { + if (iov->iov_len == 0) + continue; + + switch (length_size) { + case 1: + if (iov->iov_len > UINT8_MAX) + return -ERANGE; + + *p = iov->iov_len; + break; + case 2: + if (iov->iov_len > UINT16_MAX) + return -ERANGE; + + unaligned_write_be16(p, iov->iov_len); + break; + case 4: + if (iov->iov_len > UINT32_MAX) + return -ERANGE; + + unaligned_write_be32(p, iov->iov_len); + break; + default: + assert_not_reached(); + } + p += length_size; + + p = mempcpy(p, iov->iov_base, iov->iov_len); + } + + assert(sz >= (size_t) (p - buf)); + sz = p - buf; + + *ret = IOVEC_MAKE(TAKE_PTR(buf), sz); + return 0; +} diff --git a/src/basic/iovec-wrapper.h b/src/basic/iovec-wrapper.h index c860eaf3339e7..c2a0cff1aeed6 100644 --- a/src/basic/iovec-wrapper.h +++ b/src/basic/iovec-wrapper.h @@ -68,3 +68,6 @@ void iovw_rebase(struct iovec_wrapper *iovw, void *old, void *new); size_t iovw_size(const struct iovec_wrapper *iovw); int iovw_concat(const struct iovec_wrapper *iovw, struct iovec *ret); char* iovw_to_cstring(const struct iovec_wrapper *iovw); + +int iovec_split(const struct iovec *iov, size_t length_size, struct iovec_wrapper *ret); +int iovw_merge(const struct iovec_wrapper *iovw, size_t length_size, struct iovec *ret); diff --git a/src/test/test-iovec-wrapper.c b/src/test/test-iovec-wrapper.c index 3bd2123c3c95a..523089058707c 100644 --- a/src/test/test-iovec-wrapper.c +++ b/src/test/test-iovec-wrapper.c @@ -5,6 +5,7 @@ #include "alloc-util.h" #include "iovec-util.h" #include "iovec-wrapper.h" +#include "random-util.h" #include "tests.h" TEST(iovw_compare) { @@ -506,4 +507,196 @@ TEST(iovw_to_cstring) { ASSERT_STREQ(s, "foo/bar"); } +TEST(iovw_merge_and_iovec_split) { + _cleanup_(iovw_done_free) struct iovec_wrapper iovw = {}, iovw2 = {}; + _cleanup_(iovec_done) struct iovec v = {}, v2 = {}; + uint8_t *p; + + struct iovec + a = IOVEC_MAKE_STRING("aaa"), + b = IOVEC_MAKE_STRING("bbbb"), + c = IOVEC_MAKE_STRING("ccccc"); + + /* single entry */ + ASSERT_OK(iovw_extend_iov(&iovw, &a)); + + ASSERT_OK(iovw_merge(&iovw, sizeof(uint8_t), &v)); + ASSERT_EQ(v.iov_len, 1 + a.iov_len); + p = ASSERT_NOT_NULL(v.iov_base); + ASSERT_EQ(*p++, a.iov_len); + ASSERT_EQ(memcmp(p, a.iov_base, a.iov_len), 0); + p += a.iov_len; + ASSERT_OK(iovec_split(&v, sizeof(uint8_t), &iovw2)); + ASSERT_TRUE(iovw_equal(&iovw, &iovw2)); + + iovec_done(&v); + iovw_done_free(&iovw2); + + ASSERT_OK(iovw_merge(&iovw, sizeof(uint16_t), &v)); + ASSERT_EQ(v.iov_len, sizeof(uint16_t) + a.iov_len); + p = ASSERT_NOT_NULL(v.iov_base); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, a.iov_len); + ASSERT_EQ(memcmp(p, a.iov_base, a.iov_len), 0); + p += a.iov_len; + ASSERT_OK(iovec_split(&v, sizeof(uint16_t), &iovw2)); + ASSERT_TRUE(iovw_equal(&iovw, &iovw2)); + + iovec_done(&v); + iovw_done_free(&iovw2); + + ASSERT_OK(iovw_merge(&iovw, sizeof(uint32_t), &v)); + ASSERT_EQ(v.iov_len, sizeof(uint32_t) + a.iov_len); + p = ASSERT_NOT_NULL(v.iov_base); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, a.iov_len); + ASSERT_EQ(memcmp(p, a.iov_base, a.iov_len), 0); + p += a.iov_len; + ASSERT_OK(iovec_split(&v, sizeof(uint32_t), &iovw2)); + ASSERT_TRUE(iovw_equal(&iovw, &iovw2)); + + iovec_done(&v); + iovw_done_free(&iovw2); + + /* multiple entries */ + ASSERT_OK(iovw_extend_iov(&iovw, &b)); + ASSERT_OK(iovw_extend_iov(&iovw, &c)); + + ASSERT_OK(iovw_merge(&iovw, sizeof(uint8_t), &v)); + ASSERT_EQ(v.iov_len, 3 + a.iov_len + b.iov_len + c.iov_len); + p = ASSERT_NOT_NULL(v.iov_base); + ASSERT_EQ(*p++, a.iov_len); + ASSERT_EQ(memcmp(p, a.iov_base, a.iov_len), 0); + p += a.iov_len; + ASSERT_EQ(*p++, b.iov_len); + ASSERT_EQ(memcmp(p, b.iov_base, b.iov_len), 0); + p += b.iov_len; + ASSERT_EQ(*p++, c.iov_len); + ASSERT_EQ(memcmp(p, c.iov_base, c.iov_len), 0); + ASSERT_OK(iovec_split(&v, sizeof(uint8_t), &iovw2)); + ASSERT_TRUE(iovw_equal(&iovw, &iovw2)); + + iovec_done(&v); + iovw_done_free(&iovw2); + + ASSERT_OK(iovw_merge(&iovw, sizeof(uint16_t), &v)); + ASSERT_EQ(v.iov_len, 3 * sizeof(uint16_t) + a.iov_len + b.iov_len + c.iov_len); + p = ASSERT_NOT_NULL(v.iov_base); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, a.iov_len); + ASSERT_EQ(memcmp(p, a.iov_base, a.iov_len), 0); + p += a.iov_len; + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, b.iov_len); + ASSERT_EQ(memcmp(p, b.iov_base, b.iov_len), 0); + p += b.iov_len; + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, c.iov_len); + ASSERT_EQ(memcmp(p, c.iov_base, c.iov_len), 0); + ASSERT_OK(iovec_split(&v, sizeof(uint16_t), &iovw2)); + ASSERT_TRUE(iovw_equal(&iovw, &iovw2)); + + iovec_done(&v); + iovw_done_free(&iovw2); + + ASSERT_OK(iovw_merge(&iovw, sizeof(uint32_t), &v)); + ASSERT_EQ(v.iov_len, 3 * sizeof(uint32_t) + a.iov_len + b.iov_len + c.iov_len); + p = ASSERT_NOT_NULL(v.iov_base); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, a.iov_len); + ASSERT_EQ(memcmp(p, a.iov_base, a.iov_len), 0); + p += a.iov_len; + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, b.iov_len); + ASSERT_EQ(memcmp(p, b.iov_base, b.iov_len), 0); + p += b.iov_len; + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, 0); + ASSERT_EQ(*p++, c.iov_len); + ASSERT_EQ(memcmp(p, c.iov_base, c.iov_len), 0); + ASSERT_OK(iovec_split(&v, sizeof(uint32_t), &iovw2)); + ASSERT_TRUE(iovw_equal(&iovw, &iovw2)); + + iovec_done(&v); + iovw_done_free(&iovw2); + + /* with empty entries */ + _cleanup_(iovw_done) struct iovec_wrapper with_empty = { + .iovec = ASSERT_PTR(new0(struct iovec, 6)), + .count = 6, + }; + with_empty.iovec[0] = a; + with_empty.iovec[2] = b; + with_empty.iovec[4] = c; + ASSERT_OK(iovw_merge(&iovw, sizeof(uint8_t), &v)); + ASSERT_OK(iovw_merge(&with_empty, sizeof(uint8_t), &v2)); + ASSERT_TRUE(iovec_equal(&v, &v2)); + + iovec_done(&v); + iovec_done(&v2); + + size_t sz = 6 + a.iov_len + b.iov_len + c.iov_len; + _cleanup_free_ uint8_t *buf = ASSERT_PTR(new(uint8_t, sz)); + p = buf; + *p++ = a.iov_len; + p = mempcpy(p, a.iov_base, a.iov_len); + *p++ = 0; + *p++ = b.iov_len; + p = mempcpy(p, b.iov_base, b.iov_len); + *p++ = 0; + *p++ = c.iov_len; + p = mempcpy(p, c.iov_base, c.iov_len); + *p++ = 0; + ASSERT_OK(iovec_split(&IOVEC_MAKE(buf, sz), sizeof(uint8_t), &iovw2)); + ASSERT_TRUE(iovw_equal(&iovw, &iovw2)); + + iovw_done_free(&iovw2); + + /* truncated */ + ASSERT_OK(iovw_merge(&iovw, sizeof(uint8_t), &v)); + ASSERT_ERROR(iovec_split(&IOVEC_MAKE(v.iov_base, v.iov_len - 1), sizeof(uint8_t), &iovw2), EBADMSG); + + iovec_done(&v); + + /* too long */ + _cleanup_(iovec_done) struct iovec large = {}; + ASSERT_OK(random_bytes_allocate_iovec(256, &large)); + ASSERT_ERROR(iovw_merge(&(struct iovec_wrapper) { .iovec = &large, .count = 1, }, sizeof(uint8_t), &v), ERANGE); + ASSERT_OK(iovw_merge(&(struct iovec_wrapper) { .iovec = &large, .count = 1, }, sizeof(uint16_t), &v)); + ASSERT_OK(iovec_split(&v, sizeof(uint16_t), &iovw2)); + ASSERT_EQ(iovw2.count, 1u); + ASSERT_TRUE(iovec_equal(&iovw2.iovec[0], &large)); + + iovec_done(&v); + iovw_done_free(&iovw2); + + /* No entry */ + ASSERT_OK(iovw_merge(&(struct iovec_wrapper) {}, sizeof(uint8_t), &v)); + ASSERT_FALSE(iovec_is_set(&v)); + + ASSERT_OK(iovw_merge(NULL, sizeof(uint8_t), &v)); + ASSERT_FALSE(iovec_is_set(&v)); + + ASSERT_OK(iovec_split(&(struct iovec) {}, sizeof(uint8_t), &iovw2)); + ASSERT_TRUE(iovw_isempty(&iovw2)); + + ASSERT_OK(iovec_split(NULL, sizeof(uint8_t), &iovw2)); + ASSERT_TRUE(iovw_isempty(&iovw2)); + + /* empty entry only */ + ASSERT_OK(iovw_merge(&(struct iovec_wrapper) { .iovec = &(struct iovec) {}, .count = 1, }, sizeof(uint8_t), &v)); + ASSERT_FALSE(iovec_is_set(&v)); + + ASSERT_OK(iovec_split(&IOVEC_MAKE("", 1), sizeof(uint8_t), &iovw2)); + ASSERT_TRUE(iovw_isempty(&iovw2)); + +} + DEFINE_TEST_MAIN(LOG_INFO); From 4fc58bf62111ae164bbbbf2f276af24f1c4b0efd Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 24 Apr 2026 03:10:16 +0900 Subject: [PATCH 230/242] iovec-wrapper: reintroduce iovw_free() and iovw_free_free() They were dropped by the commit 267b16f33c5636617927f15d7ae6b945c862a587, but will be used later. Hence, let's reintroduce them. --- src/basic/iovec-wrapper.c | 16 ++++++++++++++++ src/basic/iovec-wrapper.h | 6 ++++++ 2 files changed, 22 insertions(+) diff --git a/src/basic/iovec-wrapper.c b/src/basic/iovec-wrapper.c index 6b4b006c059dc..da217170c573c 100644 --- a/src/basic/iovec-wrapper.c +++ b/src/basic/iovec-wrapper.c @@ -24,6 +24,22 @@ void iovw_done_free(struct iovec_wrapper *iovw) { iovw_done(iovw); } +struct iovec_wrapper* iovw_free(struct iovec_wrapper *iovw) { + if (!iovw) + return NULL; + + iovw_done(iovw); + return mfree(iovw); +} + +struct iovec_wrapper* iovw_free_free(struct iovec_wrapper *iovw) { + if (!iovw) + return NULL; + + iovw_done_free(iovw); + return mfree(iovw); +} + int iovw_compare(const struct iovec_wrapper *a, const struct iovec_wrapper *b) { int r; diff --git a/src/basic/iovec-wrapper.h b/src/basic/iovec-wrapper.h index c2a0cff1aeed6..a4f93f1fdb94b 100644 --- a/src/basic/iovec-wrapper.h +++ b/src/basic/iovec-wrapper.h @@ -11,6 +11,12 @@ struct iovec_wrapper { void iovw_done_free(struct iovec_wrapper *iovw); void iovw_done(struct iovec_wrapper *iovw); +struct iovec_wrapper* iovw_free(struct iovec_wrapper *iovw); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct iovec_wrapper*, iovw_free); + +struct iovec_wrapper* iovw_free_free(struct iovec_wrapper *iovw); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct iovec_wrapper*, iovw_free_free); + int iovw_compare(const struct iovec_wrapper *a, const struct iovec_wrapper *b) _pure_; static inline bool iovw_equal(const struct iovec_wrapper *a, const struct iovec_wrapper *b) { return iovw_compare(a, b) == 0; From 320ae462b1315564e0ab97f1d1b13e6e05b489af Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Tue, 21 Apr 2026 05:05:32 +0900 Subject: [PATCH 231/242] tlv-util: introduce tlv-util that handles Tag-Length-Value data format In many network protocols e.g. DHCP, the TLV format is used. Let's introduce a simple parser and builder of the data format. --- src/libsystemd-network/meson.build | 4 + src/libsystemd-network/test-tlv-util.c | 207 ++++++++++ src/libsystemd-network/tlv-util.c | 505 +++++++++++++++++++++++++ src/libsystemd-network/tlv-util.h | 82 ++++ 4 files changed, 798 insertions(+) create mode 100644 src/libsystemd-network/test-tlv-util.c create mode 100644 src/libsystemd-network/tlv-util.c create mode 100644 src/libsystemd-network/tlv-util.h diff --git a/src/libsystemd-network/meson.build b/src/libsystemd-network/meson.build index b0443c3695206..6239056e3b4b1 100644 --- a/src/libsystemd-network/meson.build +++ b/src/libsystemd-network/meson.build @@ -37,6 +37,7 @@ libsystemd_network_sources = files( 'sd-ndisc-router.c', 'sd-ndisc-router-solicit.c', 'sd-radv.c', + 'tlv-util.c', ) sources += libsystemd_network_sources @@ -113,6 +114,9 @@ executables += [ network_test_template + { 'sources' : files('test-sd-dhcp-lease.c'), }, + network_test_template + { + 'sources' : files('test-tlv-util.c'), + }, network_fuzz_template + { 'sources' : files('fuzz-dhcp-client.c'), }, diff --git a/src/libsystemd-network/test-tlv-util.c b/src/libsystemd-network/test-tlv-util.c new file mode 100644 index 0000000000000..4747afa98dfe5 --- /dev/null +++ b/src/libsystemd-network/test-tlv-util.c @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-dhcp-protocol.h" + +#include "hashmap.h" +#include "iovec-util.h" +#include "iovec-wrapper.h" +#include "random-util.h" +#include "tests.h" +#include "tlv-util.h" + +TEST(tlv_constant) { + ASSERT_EQ(TLV_TAG_PAD, (uint32_t) SD_DHCP_OPTION_PAD); + ASSERT_EQ(TLV_TAG_END, (uint32_t) SD_DHCP_OPTION_END); +} + +TEST(tlv) { + _cleanup_(tlv_done) TLV tlv = TLV_INIT(TLV_DHCP4); + + _cleanup_(iovec_done) struct iovec data0 = {}, data1 = {}, data2a = {}, data2b = {}, data3 = {}, data4 = {}; + ASSERT_OK(random_bytes_allocate_iovec(0, &data0)); + ASSERT_OK(random_bytes_allocate_iovec(111, &data1)); + ASSERT_OK(random_bytes_allocate_iovec(123, &data2a)); + ASSERT_OK(random_bytes_allocate_iovec(321, &data2b)); + ASSERT_OK(random_bytes_allocate_iovec(333, &data3)); + ASSERT_OK(random_bytes_allocate_iovec(444, &data4)); + + /* tlv_append() */ + ASSERT_OK(tlv_append(&tlv, 10, data0.iov_len, data0.iov_base)); + ASSERT_OK(tlv_append(&tlv, 11, data1.iov_len, data1.iov_base)); + ASSERT_OK(tlv_append(&tlv, 22, data2a.iov_len, data2a.iov_base)); + ASSERT_OK(tlv_append(&tlv, 22, data2b.iov_len, data2b.iov_base)); + ASSERT_OK(tlv_append(&tlv, 33, data3.iov_len, data3.iov_base)); + ASSERT_OK(tlv_append(&tlv, 44, data4.iov_len, data4.iov_base)); + ASSERT_ERROR(tlv_append(&tlv, 0x00, data4.iov_len, data4.iov_base), EINVAL); + ASSERT_ERROR(tlv_append(&tlv, 0xFF, data4.iov_len, data4.iov_base), EINVAL); + ASSERT_EQ(hashmap_size(tlv.entries), 5u); + + /* tlv_remove() */ + tlv_remove(&tlv, 44); + ASSERT_EQ(hashmap_size(tlv.entries), 4u); + tlv_remove(&tlv, 55); + ASSERT_EQ(hashmap_size(tlv.entries), 4u); + + /* tlv_append_tlv() */ + _cleanup_(tlv_done) TLV tlv_copy = TLV_INIT(TLV_DHCP4); + ASSERT_ERROR(tlv_append_tlv(&tlv_copy, &tlv_copy), EINVAL); + ASSERT_OK(tlv_append_tlv(&tlv_copy, NULL)); + ASSERT_OK(tlv_append_tlv(&tlv_copy, &tlv)); + ASSERT_EQ(hashmap_size(tlv_copy.entries), hashmap_size(tlv.entries)); + + /* tlv_isempty() */ + ASSERT_TRUE(tlv_isempty(NULL)); + ASSERT_TRUE(tlv_isempty(&TLV_INIT(TLV_DHCP4))); + ASSERT_FALSE(tlv_isempty(&tlv)); + + /* tlv_contains() */ + ASSERT_TRUE(tlv_contains(&tlv, 10)); + ASSERT_TRUE(tlv_contains(&tlv, 11)); + ASSERT_TRUE(tlv_contains(&tlv, 22)); + ASSERT_TRUE(tlv_contains(&tlv, 33)); + ASSERT_FALSE(tlv_contains(&tlv, 44)); + + /* tlv_get_all() */ + struct iovec_wrapper *iovw; + + iovw = ASSERT_NOT_NULL(tlv_get_all(&tlv, 10)); + ASSERT_EQ(iovw->count, 1u); + ASSERT_TRUE(iovec_equal(&iovw->iovec[0], &data0)); + + iovw = ASSERT_NOT_NULL(tlv_get_all(&tlv, 11)); + ASSERT_EQ(iovw->count, 1u); + ASSERT_TRUE(iovec_equal(&iovw->iovec[0], &data1)); + + iovw = ASSERT_NOT_NULL(tlv_get_all(&tlv, 22)); + ASSERT_EQ(iovw->count, 3u); + ASSERT_TRUE(iovec_equal(&iovw->iovec[0], &data2a)); + ASSERT_TRUE(iovec_equal(&iovw->iovec[1], &IOVEC_MAKE(data2b.iov_base, UINT8_MAX))); + ASSERT_TRUE(iovec_equal(&iovw->iovec[2], &IOVEC_SHIFT(&data2b, UINT8_MAX))); + + iovw = ASSERT_NOT_NULL(tlv_get_all(&tlv, 33)); + ASSERT_EQ(iovw->count, 2u); + ASSERT_TRUE(iovec_equal(&iovw->iovec[0], &IOVEC_MAKE(data3.iov_base, UINT8_MAX))); + ASSERT_TRUE(iovec_equal(&iovw->iovec[1], &IOVEC_SHIFT(&data3, UINT8_MAX))); + + ASSERT_NULL(tlv_get_all(&tlv, 44)); + + /* tlv_get_full() */ + struct iovec iov; + + ASSERT_OK(tlv_get(&tlv, 10, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &data0)); + ASSERT_OK(tlv_get_full(&tlv, 10, data0.iov_len, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &data0)); + ASSERT_ERROR(tlv_get_full(&tlv, 10, 123, &iov), ENODATA); + + ASSERT_OK(tlv_get(&tlv, 11, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &data1)); + ASSERT_OK(tlv_get_full(&tlv, 11, data1.iov_len, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &data1)); + ASSERT_ERROR(tlv_get_full(&tlv, 11, 123, &iov), ENODATA); + + ASSERT_OK(tlv_get(&tlv, 22, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &data2a)); + ASSERT_OK(tlv_get_full(&tlv, 22, data2a.iov_len, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &data2a)); + ASSERT_ERROR(tlv_get_full(&tlv, 22, data2b.iov_len, &iov), ENODATA); + ASSERT_OK(tlv_get_full(&tlv, 22, UINT8_MAX, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE(data2b.iov_base, UINT8_MAX))); + ASSERT_OK(tlv_get_full(&tlv, 22, data2b.iov_len - UINT8_MAX, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_SHIFT(&data2b, UINT8_MAX))); + + ASSERT_OK(tlv_get(&tlv, 33, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE(data3.iov_base, UINT8_MAX))); + ASSERT_ERROR(tlv_get_full(&tlv, 33, data3.iov_len, &iov), ENODATA); + ASSERT_OK(tlv_get_full(&tlv, 33, UINT8_MAX, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_MAKE(data3.iov_base, UINT8_MAX))); + ASSERT_OK(tlv_get_full(&tlv, 33, data3.iov_len - UINT8_MAX, &iov)); + ASSERT_TRUE(iovec_equal(&iov, &IOVEC_SHIFT(&data3, UINT8_MAX))); + + ASSERT_ERROR(tlv_get(&tlv, 44, NULL), ENODATA); + + /* tlv_get_alloc() */ + _cleanup_(iovec_done) struct iovec v = {}; + + ASSERT_OK(tlv_get_alloc(&tlv, 10, &v)); + ASSERT_TRUE(iovec_equal(&v, &data0)); + iovec_done(&v); + + ASSERT_OK(tlv_get_alloc(&tlv, 11, &v)); + ASSERT_TRUE(iovec_equal(&v, &data1)); + iovec_done(&v); + + ASSERT_OK(tlv_get_alloc(&tlv, 22, &v)); + ASSERT_EQ(v.iov_len, data2a.iov_len + data2b.iov_len); + ASSERT_EQ(memcmp(v.iov_base, data2a.iov_base, data2a.iov_len), 0); + ASSERT_EQ(memcmp((uint8_t*) v.iov_base + data2a.iov_len, data2b.iov_base, data2b.iov_len), 0); + iovec_done(&v); + + ASSERT_OK(tlv_get_alloc(&tlv, 33, &v)); + ASSERT_TRUE(iovec_equal(&v, &data3)); + iovec_done(&v); + + ASSERT_ERROR(tlv_get_alloc(&tlv, 44, NULL), ENODATA); + + /* tlv_size() */ + size_t sz = tlv_size(&tlv); + /* The tlv contains the 7 entries with a 2-byte header: + * tag 10: 1 entry, tag 11: 1 entry, tag 22: 3 entries, tag 33: 2 entries = 7 entries total. */ + ASSERT_EQ(sz, 7 * 2 + data0.iov_len + data1.iov_len + data2a.iov_len + data2b.iov_len + data3.iov_len + 1); + + /* tlv_build() */ + ASSERT_OK(tlv_build(&tlv, &v)); + ASSERT_EQ(v.iov_len, sz); + uint8_t *p = v.iov_base; + ASSERT_EQ(*p++, 10u); + ASSERT_EQ(*p++, data0.iov_len); + + ASSERT_EQ(*p++, 11u); + ASSERT_EQ(*p++, data1.iov_len); + ASSERT_EQ(memcmp(p, data1.iov_base, data1.iov_len), 0); + p += data1.iov_len; + + ASSERT_EQ(*p++, 22u); + ASSERT_EQ(*p++, data2a.iov_len); + ASSERT_EQ(memcmp(p, data2a.iov_base, data2a.iov_len), 0); + p += data2a.iov_len; + + ASSERT_EQ(*p++, 22u); + ASSERT_EQ(*p++, UINT8_MAX); + ASSERT_EQ(memcmp(p, data2b.iov_base, UINT8_MAX), 0); + p += UINT8_MAX; + + ASSERT_EQ(*p++, 22u); + ASSERT_EQ(*p++, data2b.iov_len - UINT8_MAX); + ASSERT_EQ(memcmp(p, (uint8_t*) data2b.iov_base + UINT8_MAX, data2b.iov_len - UINT8_MAX), 0); + p += data2b.iov_len - UINT8_MAX; + + ASSERT_EQ(*p++, 33u); + ASSERT_EQ(*p++, UINT8_MAX); + ASSERT_EQ(memcmp(p, data3.iov_base, UINT8_MAX), 0); + p += UINT8_MAX; + + ASSERT_EQ(*p++, 33u); + ASSERT_EQ(*p++, data3.iov_len - UINT8_MAX); + ASSERT_EQ(memcmp(p, (uint8_t*) data3.iov_base + UINT8_MAX, data3.iov_len - UINT8_MAX), 0); + p += data3.iov_len - UINT8_MAX; + + ASSERT_EQ(*p, 255u); + + /* tlv_new() and tlv_parse() */ + _cleanup_(tlv_unrefp) TLV *tlv2 = ASSERT_NOT_NULL(tlv_new(TLV_DHCP4 | TLV_TEMPORARY)); + ASSERT_OK(tlv_parse(tlv2, &v)); + ASSERT_EQ(hashmap_size(tlv.entries), hashmap_size(tlv2->entries)); + void *tagp; + HASHMAP_FOREACH_KEY(iovw, tagp, tlv.entries) { + struct iovec_wrapper *iovw2 = ASSERT_PTR(hashmap_get(tlv2->entries, tagp)); + ASSERT_TRUE(iovw_equal(iovw, iovw2)); + } + + /* tlv_build() again, and check the reproducibility. */ + _cleanup_(iovec_done) struct iovec v2 = {}; + ASSERT_OK(tlv_build(tlv2, &v2)); + ASSERT_TRUE(iovec_equal(&v, &v2)); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/libsystemd-network/tlv-util.c b/src/libsystemd-network/tlv-util.c new file mode 100644 index 0000000000000..68574e718c741 --- /dev/null +++ b/src/libsystemd-network/tlv-util.c @@ -0,0 +1,505 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "hashmap.h" +#include "iovec-util.h" +#include "iovec-wrapper.h" +#include "tlv-util.h" +#include "unaligned.h" + +#define TLV_MAX_ENTRIES 4096u + +TLVFlag tlv_flags_verify(TLVFlag flags) { + assert(IN_SET(flags & _TLV_TAG_MASK, TLV_TAG_U8, TLV_TAG_U16, TLV_TAG_U32)); + assert(IN_SET(flags & _TLV_LENGTH_MASK, TLV_LENGTH_U8, TLV_LENGTH_U16, TLV_LENGTH_U32)); + + /* TLV_PAD and TLV_END are for DHCPv4 options, hence here we assume TLV_TAG_U8 is set. */ + assert(!FLAGS_SET(flags, TLV_PAD) || FLAGS_SET(flags, TLV_TAG_U8)); + assert(!FLAGS_SET(flags, TLV_END) || FLAGS_SET(flags, TLV_TAG_U8)); + + /* When we requested to append the END tag, then we should understand the END tag on parse. */ + assert(!FLAGS_SET(flags, TLV_APPEND_END) || FLAGS_SET(flags, TLV_END)); + + return flags; +} + +void tlv_done(TLV *tlv) { + assert(tlv); + + tlv->entries = hashmap_free(tlv->entries); + tlv->n_entries = 0; +} + +static TLV* tlv_free(TLV *tlv) { + if (!tlv) + return NULL; + + tlv_done(tlv); + return mfree(tlv); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(TLV, tlv, tlv_free); + +TLV* tlv_new(TLVFlag flags) { + TLV *tlv = new(TLV, 1); + if (!tlv) + return NULL; + + *tlv = TLV_INIT(flags); + return tlv; +} + +bool tlv_isempty(const TLV *tlv) { + return !tlv || hashmap_isempty(tlv->entries); +} + +struct iovec_wrapper* tlv_get_all(const TLV *tlv, uint32_t tag) { + assert(tlv); + return hashmap_get(tlv->entries, UINT32_TO_PTR(tag)); +} + +int tlv_get_full(const TLV *tlv, uint32_t tag, size_t length, struct iovec *ret) { + assert(tlv); + + /* Do not free the result iovec, the data is still owned by TLV (or the original input data when + * TLV_TEMPORARY is set). */ + + struct iovec_wrapper *iovw = tlv_get_all(tlv, tag); + if (iovw_isempty(iovw)) + return -ENODATA; + + /* When multiple entries exist, use the first one matching the length. */ + FOREACH_ARRAY(iov, iovw->iovec, iovw->count) { + if (length != SIZE_MAX && iov->iov_len != length) + continue; + + if (ret) + *ret = *iov; + return 0; + } + + return -ENODATA; +} + +int tlv_get_alloc(const TLV *tlv, uint32_t tag, struct iovec *ret) { + assert(tlv); + + /* Free the result iovec. */ + + struct iovec_wrapper *iovw = tlv_get_all(tlv, tag); + if (iovw_isempty(iovw)) + return -ENODATA; + + if (!ret) + return 0; + + if (FLAGS_SET(tlv->flags, TLV_MERGE)) + return iovw_concat(iovw, ret); + + /* When TLV_MERGE is unset, provides the first entry. */ + if (!iovec_memdup(&iovw->iovec[0], ret)) + return -ENOMEM; + + return 0; +} + +void tlv_remove(TLV *tlv, uint32_t tag) { + assert(tlv); + + struct iovec_wrapper *iovw = hashmap_remove(tlv->entries, UINT32_TO_PTR(tag)); + if (!iovw) + return; + + assert(tlv->n_entries >= iovw->count); + tlv->n_entries -= iovw->count; + + if (FLAGS_SET(tlv->flags, TLV_TEMPORARY)) + iovw_free(iovw); + else + iovw_free_free(iovw); +} + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + tlv_hash_ops, + void, + trivial_hash_func, + trivial_compare_func, + struct iovec_wrapper, + iovw_free); + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + tlv_hash_ops_free, + void, + trivial_hash_func, + trivial_compare_func, + struct iovec_wrapper, + iovw_free_free); + +static int tlv_append_impl(TLV *tlv, uint32_t tag, size_t length, const void *data) { + int r; + + assert(tlv); + assert(length == 0 || data); + + if (tlv->n_entries >= TLV_MAX_ENTRIES) + return -E2BIG; + + if (FLAGS_SET(tlv->flags, TLV_TEMPORARY)) { + struct iovec_wrapper *e = tlv_get_all(tlv, tag); + if (e) { + r = iovw_put_full(e, /* accept_zero= */ true, (void*) data, length); + if (r < 0) + return r; + } else { + _cleanup_(iovw_freep) struct iovec_wrapper *v = new0(struct iovec_wrapper, 1); + if (!v) + return -ENOMEM; + + r = iovw_put_full(v, /* accept_zero= */ true, (void*) data, length); + if (r < 0) + return r; + + r = hashmap_ensure_put(&tlv->entries, &tlv_hash_ops, UINT32_TO_PTR(tag), v); + if (r < 0) + return r; + + TAKE_PTR(v); + } + } else { + struct iovec_wrapper *e = tlv_get_all(tlv, tag); + if (e) { + r = iovw_extend_full(e, /* accept_zero= */ true, data, length); + if (r < 0) + return r; + } else { + _cleanup_(iovw_free_freep) struct iovec_wrapper *v = new0(struct iovec_wrapper, 1); + if (!v) + return -ENOMEM; + + r = iovw_extend_full(v, /* accept_zero= */ true, data, length); + if (r < 0) + return r; + + r = hashmap_ensure_put(&tlv->entries, &tlv_hash_ops_free, UINT32_TO_PTR(tag), v); + if (r < 0) + return r; + + TAKE_PTR(v); + } + } + + tlv->n_entries++; + return 0; +} + +int tlv_append(TLV *tlv, uint32_t tag, size_t length, const void *data) { + int r; + + assert(tlv); + assert(length == 0 || data); + + switch (tlv->flags & _TLV_TAG_MASK) { + case TLV_TAG_U8: + if (tag > UINT8_MAX) + return -EINVAL; + break; + case TLV_TAG_U16: + if (tag > UINT16_MAX) + return -EINVAL; + break; + case TLV_TAG_U32: + break; + default: + assert_not_reached(); + } + + if ((FLAGS_SET(tlv->flags, TLV_PAD) && tag == TLV_TAG_PAD) || + (FLAGS_SET(tlv->flags, TLV_END) && tag == TLV_TAG_END)) + return -EINVAL; + + size_t max_length; + switch (tlv->flags & _TLV_LENGTH_MASK) { + case TLV_LENGTH_U8: + max_length = UINT8_MAX; + break; + case TLV_LENGTH_U16: + max_length = UINT16_MAX; + break; + case TLV_LENGTH_U32: + max_length = UINT32_MAX; + break; + default: + assert_not_reached(); + } + + if (FLAGS_SET(tlv->flags, TLV_MERGE)) { + /* If TLV_MERGE is set and the length is larger than the allowed maximum, then split the data + * and store them in multiple entries. + * + * Note, if tlv_append_impl() fails below, we do not rollback the entries, hence the caller + * of this function needs to discard the entire data in that case. */ + const uint8_t *p = data; + while (length > max_length) { + r = tlv_append_impl(tlv, tag, max_length, p); + if (r < 0) + return r; + + p += max_length; + length -= max_length; + } + + return tlv_append_impl(tlv, tag, length, p); + } + + /* Otherwise, refuse too long data. */ + if (length > max_length) + return -EINVAL; + + return tlv_append_impl(tlv, tag, length, data); +} + +int tlv_append_iov(TLV *tlv, uint32_t tag, const struct iovec *iov) { + assert(tlv); + assert(iovec_is_valid(iov)); + + return tlv_append(tlv, tag, iov ? iov->iov_len : 0, iov ? iov->iov_base : NULL); +} + +int tlv_append_tlv(TLV *tlv, const TLV *source) { + int r; + + assert(tlv); + + /* Note, this does not rollback entries on failure, hence the caller of this function needs to + * discard the entire data in that case. */ + + if (!source) + return 0; + + if (source == tlv) + return -EINVAL; + + void *tagp; + struct iovec_wrapper *iovw; + HASHMAP_FOREACH_KEY(iovw, tagp, source->entries) { + uint32_t tag = PTR_TO_UINT32(tagp); + + FOREACH_ARRAY(iov, iovw->iovec, iovw->count) { + r = tlv_append(tlv, tag, iov->iov_len, iov->iov_base); + if (r < 0) + return r; + } + } + + return 0; +} + +int tlv_parse(TLV *tlv, const struct iovec *iov) { + int r; + + assert(tlv); + assert(iovec_is_valid(iov)); + + /* Note, this does not rollback entries on failure, hence the caller of this function needs to + * discard the entire data in that case. */ + + if (!iovec_is_set(iov)) + return 0; + + for (struct iovec i = *iov; iovec_is_set(&i); ) { + uint32_t tag; + switch (tlv->flags & _TLV_TAG_MASK) { + case TLV_TAG_U8: + if (i.iov_len < sizeof(uint8_t)) + return -EBADMSG; + tag = *(uint8_t*) i.iov_base; + iovec_inc(&i, sizeof(uint8_t)); + break; + case TLV_TAG_U16: + if (i.iov_len < sizeof(uint16_t)) + return -EBADMSG; + tag = unaligned_read_be16(i.iov_base); + iovec_inc(&i, sizeof(uint16_t)); + break; + case TLV_TAG_U32: + if (i.iov_len < sizeof(uint32_t)) + return -EBADMSG; + tag = unaligned_read_be32(i.iov_base); + iovec_inc(&i, sizeof(uint32_t)); + break; + default: + assert_not_reached(); + } + + if (FLAGS_SET(tlv->flags, TLV_PAD) && tag == TLV_TAG_PAD) + continue; + if (FLAGS_SET(tlv->flags, TLV_END) && tag == TLV_TAG_END) + break; + + size_t len; + switch (tlv->flags & _TLV_LENGTH_MASK) { + case TLV_LENGTH_U8: + if (i.iov_len < sizeof(uint8_t)) + return -EBADMSG; + len = *(uint8_t*) i.iov_base; + iovec_inc(&i, sizeof(uint8_t)); + break; + case TLV_LENGTH_U16: + if (i.iov_len < sizeof(uint16_t)) + return -EBADMSG; + len = unaligned_read_be16(i.iov_base); + iovec_inc(&i, sizeof(uint16_t)); + break; + case TLV_LENGTH_U32: + if (i.iov_len < sizeof(uint32_t)) + return -EBADMSG; + len = unaligned_read_be32(i.iov_base); + iovec_inc(&i, sizeof(uint32_t)); + break; + default: + assert_not_reached(); + } + + if (i.iov_len < len) + return -EBADMSG; + + r = tlv_append_impl(tlv, tag, len, i.iov_base); + if (r < 0) + return r; + + iovec_inc(&i, len); + } + + return 0; +} + +size_t tlv_size(const TLV *tlv) { + assert(tlv); + + size_t header_sz; + switch (tlv->flags & _TLV_TAG_MASK) { + case TLV_TAG_U8: + header_sz = sizeof(uint8_t); + break; + case TLV_TAG_U16: + header_sz = sizeof(uint16_t); + break; + case TLV_TAG_U32: + header_sz = sizeof(uint32_t); + break; + default: + assert_not_reached(); + } + + switch (tlv->flags & _TLV_LENGTH_MASK) { + case TLV_LENGTH_U8: + header_sz += sizeof(uint8_t); + break; + case TLV_LENGTH_U16: + header_sz += sizeof(uint16_t); + break; + case TLV_LENGTH_U32: + header_sz += sizeof(uint32_t); + break; + default: + assert_not_reached(); + } + + size_t sz = FLAGS_SET(tlv->flags, TLV_APPEND_END); + + struct iovec_wrapper *iovw; + HASHMAP_FOREACH(iovw, tlv->entries) { + if (size_multiply_overflow(header_sz, iovw->count)) + return SIZE_MAX; + + sz = size_add(sz, size_add(header_sz * iovw->count, iovw_size(iovw))); + } + + return sz; +} + +int tlv_build(const TLV *tlv, struct iovec *ret) { + int r; + + assert(tlv); + assert(ret); + + size_t sz = tlv_size(tlv); + if (sz == SIZE_MAX) + return -ENOBUFS; + + _cleanup_free_ uint8_t *buf = new(uint8_t, sz); + if (!buf) + return -ENOMEM; + + /* Sort by tags, for reproducibility. */ + _cleanup_free_ void **sorted = NULL; + size_t n; + r = hashmap_dump_keys_sorted(tlv->entries, &sorted, &n); + if (r < 0) + return r; + + uint8_t *p = buf; + FOREACH_ARRAY(tagp, sorted, n) { + uint32_t tag = PTR_TO_UINT32(*tagp); + struct iovec_wrapper *iovw = ASSERT_PTR(tlv_get_all(tlv, tag)); + + if ((FLAGS_SET(tlv->flags, TLV_PAD) && tag == TLV_TAG_PAD) || + (FLAGS_SET(tlv->flags, TLV_END) && tag == TLV_TAG_END)) + return -EINVAL; + + FOREACH_ARRAY(iov, iovw->iovec, iovw->count) { + switch (tlv->flags & _TLV_TAG_MASK) { + case TLV_TAG_U8: + if (tag > UINT8_MAX) + return -EINVAL; + *p++ = tag; + break; + case TLV_TAG_U16: + if (tag > UINT16_MAX) + return -EINVAL; + unaligned_write_be16(p, tag); + p += sizeof(uint16_t); + break; + case TLV_TAG_U32: + unaligned_write_be32(p, tag); + p += sizeof(uint32_t); + break; + default: + assert_not_reached(); + } + + switch (tlv->flags & _TLV_LENGTH_MASK) { + case TLV_LENGTH_U8: + if (iov->iov_len > UINT8_MAX) + return -EINVAL; + *p++ = iov->iov_len; + break; + case TLV_LENGTH_U16: + if (iov->iov_len > UINT16_MAX) + return -EINVAL; + unaligned_write_be16(p, iov->iov_len); + p += sizeof(uint16_t); + break; + case TLV_LENGTH_U32: + if (iov->iov_len > UINT32_MAX) + return -EINVAL; + unaligned_write_be32(p, iov->iov_len); + p += sizeof(uint32_t); + break; + default: + assert_not_reached(); + } + + p = mempcpy_safe(p, iov->iov_base, iov->iov_len); + } + } + + if (FLAGS_SET(tlv->flags, TLV_APPEND_END)) + *p++ = TLV_TAG_END; + + assert(sz == (size_t) (p - buf)); + + *ret = IOVEC_MAKE(TAKE_PTR(buf), sz); + return 0; +} diff --git a/src/libsystemd-network/tlv-util.h b/src/libsystemd-network/tlv-util.h new file mode 100644 index 0000000000000..5344c28703244 --- /dev/null +++ b/src/libsystemd-network/tlv-util.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-forward.h" + +#define TLV_TAG_PAD UINT32_C(0) +#define TLV_TAG_END UINT32_C(0xFF) + +typedef enum TLVFlag { + TLV_TAG_U8 = 1 << 0, + TLV_TAG_U16 = 1 << 1, + TLV_TAG_U32 = 1 << 2, + _TLV_TAG_MASK = TLV_TAG_U8 | TLV_TAG_U16 | TLV_TAG_U32, + TLV_LENGTH_U8 = 1 << 3, + TLV_LENGTH_U16 = 1 << 4, + TLV_LENGTH_U32 = 1 << 5, + _TLV_LENGTH_MASK = TLV_LENGTH_U8 | TLV_LENGTH_U16 | TLV_LENGTH_U32, + TLV_PAD = 1 << 6, /* If set, tag == 0 is a pad, and does not have the length field. */ + TLV_END = 1 << 7, /* If set, tag == 0xFF is a sign of the end of the sequence. */ + TLV_APPEND_END = 1 << 8, /* If set, append the END tag at the end of the sequence on build. */ + TLV_MERGE = 1 << 9, /* If set, tlv_get_alloc() merges them, and tlv_append() split long data. */ + TLV_TEMPORARY = 1 << 10, /* If set, tlv_append() and tlv_parse() do not copy the data. */ + + /* DHCPv4 options. */ + TLV_DHCP4 = TLV_TAG_U8 | TLV_LENGTH_U8 | TLV_PAD | TLV_END | TLV_APPEND_END | TLV_MERGE, + /* Used for DHCPv4 sub-options, e.g. + * DHCPv4 Vendor Specific Information sub-option (43), + * DHCPv4 Relay Agent Information sub-option (82), or + * DHCPv4 Vendor-Identifying Vendor Specific Information sub-sub-option (125). + * Note that the PAD is not mentioned in RFC, but some implementations use it, hence let's gracefully + * handle it. Also note that the END tag is prohibited in most options, but we also gracefully handle + * it on parse, but of course do not append it on build. */ + TLV_DHCP4_SUBOPTION + = TLV_TAG_U8 | TLV_LENGTH_U8 | TLV_PAD | TLV_END, + /* DHCPv4 Vendor-Identifying Vendor Class sub-option (124) and + * DHCPv4 Vendor-Identifying Vendor Specific Information sub-option (125). + * The tag is called 'enterprise-number', and in uint32. */ + TLV_DHCP4_VENDOR_IDENTIFYING_OPTION + = TLV_TAG_U32 | TLV_LENGTH_U8 | TLV_MERGE, +} TLVFlag; + +typedef struct TLV { + unsigned n_ref; + TLVFlag flags; + unsigned n_entries; + Hashmap *entries; +} TLV; + +#define TLV_INIT(f) \ + (TLV) { \ + .n_ref = 1, \ + .flags = tlv_flags_verify(f), \ + } + +TLVFlag tlv_flags_verify(TLVFlag flags); + +void tlv_done(TLV *tlv); +TLV* tlv_ref(TLV *p); +TLV* tlv_unref(TLV *p); +DEFINE_TRIVIAL_CLEANUP_FUNC(TLV*, tlv_unref); +TLV* tlv_new(TLVFlag flags); + +bool tlv_isempty(const TLV *tlv); + +struct iovec_wrapper* tlv_get_all(const TLV *tlv, uint32_t tag); +static inline bool tlv_contains(const TLV *tlv, uint32_t tag) { + return tlv_get_all(tlv, tag); +} +int tlv_get_full(const TLV *tlv, uint32_t tag, size_t length, struct iovec *ret); +static inline int tlv_get(const TLV *tlv, uint32_t tag, struct iovec *ret) { + return tlv_get_full(tlv, tag, SIZE_MAX, ret); +} +int tlv_get_alloc(const TLV *tlv, uint32_t tag, struct iovec *ret); + +void tlv_remove(TLV *tlv, uint32_t tag); +int tlv_append(TLV *tlv, uint32_t tag, size_t length, const void *data); +int tlv_append_iov(TLV *tlv, uint32_t tag, const struct iovec *iov); +int tlv_append_tlv(TLV *tlv, const TLV *source); + +int tlv_parse(TLV *tlv, const struct iovec *iov); +size_t tlv_size(const TLV *tlv); +int tlv_build(const TLV *tlv, struct iovec *ret); From 53b16b68e14275b9e0ea2eb2df51461f648d47ea Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 11 May 2026 22:36:29 +0100 Subject: [PATCH 232/242] test: start systemd-report-basic.socket again SUSE uses a different preset, so don't just assert in the test, instead just start the socket in case it is not enabled TEST-74-AUX-UTILS.sh[1594]: ++ systemctl is-enabled systemd-report-basic.socket TEST-74-AUX-UTILS.sh[1540]: + [[ disabled == enabled ]] TEST-74-AUX-UTILS.sh[120]: + echo 'Subtest /usr/lib/systemd/tests/testdata/units/TEST-74-AUX-UTILS.report.sh failed' Follow-up for 4409e52494d803426a365b6636a66fd2dfc70b62 --- test/units/TEST-74-AUX-UTILS.report.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/units/TEST-74-AUX-UTILS.report.sh b/test/units/TEST-74-AUX-UTILS.report.sh index 7475978336f14..456132c04df92 100755 --- a/test/units/TEST-74-AUX-UTILS.report.sh +++ b/test/units/TEST-74-AUX-UTILS.report.sh @@ -51,7 +51,8 @@ varlinkctl --more call /run/systemd/report/io.systemd.Network io.systemd.Metrics varlinkctl --more call /run/systemd/report/io.systemd.Network io.systemd.Metrics.Describe {} # test io.systemd.Basic Metrics -[[ "$(systemctl is-enabled systemd-report-basic.socket)" == enabled ]] +# ensure the socket is running, as some distros don't enable it by default +systemctl start systemd-report-basic.socket varlinkctl info /run/systemd/report/io.systemd.Basic varlinkctl list-methods /run/systemd/report/io.systemd.Basic varlinkctl --more call /run/systemd/report/io.systemd.Basic io.systemd.Metrics.List {} From dc6b6e9d649f0738fa20862bd9daee76cc336783 Mon Sep 17 00:00:00 2001 From: Artem Proskurnev Date: Tue, 12 May 2026 11:07:39 +0300 Subject: [PATCH 233/242] hwdb/keyboard: Map f21 key on Wareus B15 Addition to PR https://github.com/systemd/systemd/pull/41181 Plasma-workspace OSD notifications about turning the touchpad on and off are guided by f21. When this match is specified, KDE notifies on this laptop that the on/off switch of the atchpad state is pressed. Fix dmesg: atkbd serio0: Unknown key pressed (translated set 2, code 0xc1 on isa0060/serio0). --- hwdb.d/60-keyboard.hwdb | 1 + 1 file changed, 1 insertion(+) diff --git a/hwdb.d/60-keyboard.hwdb b/hwdb.d/60-keyboard.hwdb index c9c0ea1db1afd..fa3e4fb154eac 100644 --- a/hwdb.d/60-keyboard.hwdb +++ b/hwdb.d/60-keyboard.hwdb @@ -2186,6 +2186,7 @@ evdev:name:SIPODEV USB Composite Device:dmi:bvn*:bvr*:bd*:svnVIOS:pnLTH17:* # Wareus B15 (8AD5A) evdev:atkbd:dmi:bvn*:bvr*:bd*:svnWareus*:pnB15*:* KEYBOARD_KEY_55=fn + KEYBOARD_KEY_c1=f21 ########################################################### # WeiHeng From 60d6f70ca4ced0a426800c55eaefa17438b89c3e Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 21:58:24 +0200 Subject: [PATCH 234/242] btrfs-util: Make nested subvolume operations work unpriv BTRFS_IOC_SEARCH is only available to root in the initial userns. This means we fail to recursively snapshot even if a subvolume has no nested subvolumes at the moment. Let's fix this by using the newer btrfs ioctls which do work even if we don't have CAP_SYS_ADMIN in the initial userns. --- src/shared/btrfs-util.c | 150 ++++++++++++---------------------------- 1 file changed, 46 insertions(+), 104 deletions(-) diff --git a/src/shared/btrfs-util.c b/src/shared/btrfs-util.c index bb3e28f6bbba4..9c86e49f0f86c 100644 --- a/src/shared/btrfs-util.c +++ b/src/shared/btrfs-util.c @@ -863,19 +863,6 @@ int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) { } static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) { - struct btrfs_ioctl_search_args args = { - .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, - - .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID, - .key.max_objectid = BTRFS_LAST_FREE_OBJECTID, - - .key.min_type = BTRFS_ROOT_BACKREF_KEY, - .key.max_type = BTRFS_ROOT_BACKREF_KEY, - - .key.min_transid = 0, - .key.max_transid = UINT64_MAX, - }; - struct btrfs_ioctl_vol_args vol_args = {}; _cleanup_close_ int subvol_fd = -EBADF; bool made_writable = false; @@ -923,43 +910,29 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY) return -errno; - /* OK, the subvolume is not empty, let's look for child - * subvolumes, and remove them, first */ - - args.key.min_offset = args.key.max_offset = subvol_id; + /* OK, the subvolume is not empty, let's look for child subvolumes, and remove them, first. + * BTRFS_IOC_GET_SUBVOL_ROOTREF and BTRFS_IOC_INO_LOOKUP_USER (kernel 4.18+) enumerate child + * subvolumes without requiring CAP_SYS_ADMIN in the initial user namespace, unlike the older + * BTRFS_IOC_TREE_SEARCH ioctl. */ - while (btrfs_ioctl_search_args_compare(&args) <= 0) { - struct btrfs_ioctl_search_header sh; - const void *body; + struct btrfs_ioctl_get_subvol_rootref_args rootref_args = {}; - args.key.nr_items = 256; - if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + for (;;) { + /* BTRFS_IOC_GET_SUBVOL_ROOTREF returns up to BTRFS_MAX_ROOTREF_BUFFER_NUM entries per + * call. If more are available, it returns -EOVERFLOW with num_items filled in and + * min_treeid advanced so we can resume on the next iteration. */ + int ioctl_ret = ioctl(subvol_fd, BTRFS_IOC_GET_SUBVOL_ROOTREF, &rootref_args); + if (ioctl_ret < 0 && errno != EOVERFLOW) return -errno; - if (args.key.nr_items <= 0) - break; - - FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) { - _cleanup_free_ char *p = NULL; - - btrfs_ioctl_search_args_set(&args, &sh); - - if (sh.type != BTRFS_ROOT_BACKREF_KEY) - continue; - if (sh.offset != subvol_id) - continue; - - const struct btrfs_root_ref *ref = body; - p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len)); - if (!p) - return -ENOMEM; + for (uint8_t i = 0; i < rootref_args.num_items; i++) { + uint64_t child_subvol_id = rootref_args.rootref[i].treeid; - struct btrfs_ioctl_ino_lookup_args ino_args = { - .treeid = subvol_id, - .objectid = htole64(ref->dirid), + struct btrfs_ioctl_ino_lookup_user_args lookup_args = { + .dirid = rootref_args.rootref[i].dirid, + .treeid = child_subvol_id, }; - - if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0) + if (ioctl(subvol_fd, BTRFS_IOC_INO_LOOKUP_USER, &lookup_args) < 0) return -errno; if (!made_writable) { @@ -970,29 +943,26 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol made_writable = true; } - if (isempty(ino_args.name)) - /* Subvolume is in the top-level - * directory of the subvolume. */ - r = subvol_remove_children(subvol_fd, p, sh.objectid, flags); + if (isempty(lookup_args.path)) + /* Subvolume is in the top-level directory of the subvolume. */ + r = subvol_remove_children(subvol_fd, lookup_args.name, child_subvol_id, flags); else { _cleanup_close_ int child_fd = -EBADF; - /* Subvolume is somewhere further down, - * hence we need to open the + /* Subvolume is somewhere further down, hence we need to open the * containing directory first */ - child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + child_fd = openat(subvol_fd, lookup_args.path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); if (child_fd < 0) return -errno; - r = subvol_remove_children(child_fd, p, sh.objectid, flags); + r = subvol_remove_children(child_fd, lookup_args.name, child_subvol_id, flags); } if (r < 0) return r; } - /* Increase search key by one, to read the next item, if we can. */ - if (!btrfs_ioctl_search_args_inc(&args)) + if (ioctl_ret >= 0) break; } @@ -1223,19 +1193,6 @@ static int subvol_snapshot_children( uint64_t old_subvol_id, BtrfsSnapshotFlags flags) { - struct btrfs_ioctl_search_args args = { - .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, - - .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID, - .key.max_objectid = BTRFS_LAST_FREE_OBJECTID, - - .key.min_type = BTRFS_ROOT_BACKREF_KEY, - .key.max_type = BTRFS_ROOT_BACKREF_KEY, - - .key.min_transid = 0, - .key.max_transid = UINT64_MAX, - }; - struct btrfs_ioctl_vol_args_v2 vol_args = { .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0, .fd = old_fd, @@ -1293,50 +1250,36 @@ static int subvol_snapshot_children( return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0; } - args.key.min_offset = args.key.max_offset = old_subvol_id; + /* Enumerate child subvolumes via BTRFS_IOC_GET_SUBVOL_ROOTREF + BTRFS_IOC_INO_LOOKUP_USER + * (kernel 4.18+), neither of which requires CAP_SYS_ADMIN, unlike BTRFS_IOC_TREE_SEARCH. */ - while (btrfs_ioctl_search_args_compare(&args) <= 0) { - struct btrfs_ioctl_search_header sh; - const void *body; + struct btrfs_ioctl_get_subvol_rootref_args rootref_args = {}; - args.key.nr_items = 256; - if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + for (;;) { + /* BTRFS_IOC_GET_SUBVOL_ROOTREF returns up to BTRFS_MAX_ROOTREF_BUFFER_NUM entries per + * call. If more are available, it returns -EOVERFLOW with num_items filled in and + * min_treeid advanced so we can resume on the next iteration. */ + int ioctl_ret = ioctl(old_fd, BTRFS_IOC_GET_SUBVOL_ROOTREF, &rootref_args); + if (ioctl_ret < 0 && errno != EOVERFLOW) return -errno; - if (args.key.nr_items <= 0) - break; - - FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) { - _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL; + for (uint8_t i = 0; i < rootref_args.num_items; i++) { + _cleanup_free_ char *c = NULL, *np = NULL; _cleanup_close_ int old_child_fd = -EBADF, new_child_fd = -EBADF; - - btrfs_ioctl_search_args_set(&args, &sh); - - if (sh.type != BTRFS_ROOT_BACKREF_KEY) - continue; - - /* Avoid finding the source subvolume a second time */ - if (sh.offset != old_subvol_id) - continue; + uint64_t child_subvol_id = rootref_args.rootref[i].treeid; /* Avoid running into loops if the new subvolume is below the old one. */ - if (sh.objectid == new_subvol_id) + if (child_subvol_id == new_subvol_id) continue; - const struct btrfs_root_ref *ref = body; - p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len)); - if (!p) - return -ENOMEM; - - struct btrfs_ioctl_ino_lookup_args ino_args = { - .treeid = old_subvol_id, - .objectid = htole64(ref->dirid), + struct btrfs_ioctl_ino_lookup_user_args lookup_args = { + .dirid = rootref_args.rootref[i].dirid, + .treeid = child_subvol_id, }; - - if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0) + if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP_USER, &lookup_args) < 0) return -errno; - c = path_join(ino_args.name, p); + c = path_join(lookup_args.path, lookup_args.name); if (!c) return -ENOMEM; @@ -1344,7 +1287,7 @@ static int subvol_snapshot_children( if (old_child_fd < 0) return -errno; - np = path_join(subvolume, ino_args.name); + np = path_join(subvolume, lookup_args.path); if (!np) return -ENOMEM; @@ -1369,7 +1312,7 @@ static int subvol_snapshot_children( /* When btrfs clones the subvolumes, child subvolumes appear as empty * directories. Remove them, so that we can create a new snapshot in their place */ - if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) { + if (unlinkat(new_child_fd, lookup_args.name, AT_REMOVEDIR) < 0) { int k = -errno; if (flags & BTRFS_SNAPSHOT_READ_ONLY) @@ -1378,7 +1321,7 @@ static int subvol_snapshot_children( return k; } - r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh.objectid, + r = subvol_snapshot_children(old_child_fd, new_child_fd, lookup_args.name, child_subvol_id, flags & ~(BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_LOCK_BSD)); /* Restore the readonly flag */ @@ -1394,8 +1337,7 @@ static int subvol_snapshot_children( return r; } - /* Increase search key by one, to read the next item, if we can. */ - if (!btrfs_ioctl_search_args_inc(&args)) + if (ioctl_ret >= 0) break; } From 6de004d99adbc5dc916672d6d2971eea80232114 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 12 Nov 2025 17:53:47 +0100 Subject: [PATCH 235/242] Introduce support for running code in fibers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Traditionally, asynchronous programming in systemd has been achieved using sd-event along with the asynchronous interfaces of sd-bus and sd-varlink. This works well when the system is reacting to events and all code triggered by those events can run without blocking. In these scenarios, the global Manager object is passed as userdata to the callback, and the callback can use the stack as usual, declaring local state and ensuring proper cleanup via _cleanup_. Control flow structures, such as loops, work as expected, and everything runs smoothly. However, challenges arise when the code needs to perform long-running operations within these callbacks. Since the system cannot block execution within the callback, we can't directly invoke a long-running operation and wait for its result without introducing complexities. Instead, we need to initiate the long-running task, register for completion with sd-event, sd-bus, or sd-varlink, and provide a callback to be invoked when the operation completes. This callback, however, only receives a single userdata pointer, which forces us to bundle all local variables into a struct and pass it along as part of the callback. On top of that, after queuing the asynchronous operation, the caller continues executing. As the caller's stack unwinds when the function exits, the resources and state within the local scope may be prematurely cleaned up. Therefore, the struct must store copies of the local variables or ensure proper reference counting to prevent premature resource cleanup. When multiple long-running operations need to be initiated within a loop, the complexity grows further. We must introduce additional shared state to track the completion of all operations before we can run any code that depends on their results. Furthermore, since the daemon may be shut down at any time, we must track the lifecycle of each long-running operation in the global Manager struct, ensuring proper cleanup even when stack unwinding can no longer manage the resources for us. Fibers, or green threads, provide a more natural way of handling asynchronous operations. By enabling cooperative multitasking within a single thread, fibers allow us to write code that looks like it’s running synchronously, but with the ability to yield control at predefined points, such as when waiting for long-running tasks to complete. With fibers, we can simplify the control flow by running asynchronous operations within a fiber, allowing us to "pause" execution while waiting for the long-running operation to finish and then "resume" the operation once it's complete. This eliminates the need for multiple callback chains, extensive state tracking, and the potential pitfalls of stack unwinding. This commit introduces the ability to execute long-running operations in a non-blocking manner while maintaining the simplicity and readability of synchronous code. The fiber-based approach will significantly improve the handling of complex workflows, making the code easier to write and maintain. The implementation is based on ucontext.h's makecontext() (with a fallback to the venerable sigaltstack() approach on musl), sigsetjmp()/siglongjmp() and sd-event. ucontext.h provides us with alternate stacks that we can switch between. We use sigsetjmp()/siglongjmp() instead of swapcontext() because the latter forcibly saves/restores a per context signal mask every time it is called. Using sigsetjmp()/siglongjmp(), we can avoid the unnecessary syscall and maintain a per thread signal mask, which makes much more sense than having a per fiber signal mask. The default stack size is the same as a regular thread. Because we use mmap() to allocate the stack, the memory won't actually be used until it is paged in by the kernel, so we don't actually use 8MB per fiber. To integrate fibers with the event loop, each fiber is assigned a deferred event source which resumes the fiber when enabled. The deferred event source is oneshot by default so the fiber will run immediately until it yields or suspends. If it yields, the deferred event source is enabled again (oneshot) immediately. If it suspends, before it suspends, one or more event sources are registered with sd-event that will enable the deferred event source (oneshot) to resume the fiber once the operation it is waiting for completes. Yielding or suspending the fiber is done by calling sd_fiber_yield() or sd_fiber_suspend() respectively. Both of these return zero on success or any error value from the async operation that caused the fiber to resume. This is also how fiber cancellation is implemented. When a fiber is cancelled, sd_fiber_yield() and sd_fiber_suspend() will return ECANCELED when the fiber is resumed, allowing the fiber to unwind its stack (which allows cleanup to happen automatically) and finish. Instead of having applications work directly with fibers, we hide them behind a generic futures interface to represent long-running operations, regardless of whether those operations are running on a fiber or not. Aside from fibers, the futures library (sd-future) will for example allow waiting for sd-event sources and doing sd-bus calls in the background as well. Fibers can suspend until a future is ready with sd_fiber_await() or by having the future wake up the fiber explicitly in its callback. A future always defaults to waking up the current fiber. Each future kind plugs into the library by providing an sd_future_ops vtable (alloc, free, cancel, set_priority). The library treats the impl pointer returned by alloc() as a black box. Future Implementations retrieve it via sd_future_get_private(). A future starts in SD_FUTURE_PENDING and transitions exactly once to SD_FUTURE_RESOLVED, carrying an integer result. Consumers can react to that transition either by installing a one-shot callback with sd_future_set_callback() (callback-style code) or by waiting on it from a fiber via sd_fiber_await() (synchronous-looking fiber code). sd_fiber_await() is itself built on a "wait future" that resolves when its target resolves; sd_future_new_wait() exposes the same primitive directly so non-fiber callers can chain futures without involving a fiber. Cancellation is cooperative: sd_future_cancel() invokes the future impl's cancel callback, which is responsible for tearing down its work and ultimately resolving the promise with -ECANCELED. For fiber futures this is what surfaces as the ECANCELED return from sd_fiber_yield()/sd_fiber_suspend() mentioned above. Fire-and-forget fibers — created by passing a NULL ret to sd_fiber_new() — take a self-reference on their future so they outlive the caller's scope. The self-ref is dropped when the fiber resolves. This floating mechanism (sd_fiber_set_floating()) is restricted to fiber futures because they uniquely guarantee resolution; allowing it for arbitrary future kinds would risk silent leaks for kinds that may never resolve. Note that fiber cleanup depends on the runtime operating normally. Each fiber's _cleanup_-style cleanups live on the fiber's own stack and run only when the fiber is resumed and allowed to unwind, which requires a working event loop to drive it to completion. The exit event source registered for top-level fibers ensures unwind on a normal sd_event_exit(), but if the event loop itself terminates abnormally (e.g. an unrecoverable allocation failure mid-dispatch) before all fibers have resolved, their stacks never unwind and any resources they own leak. The code lives in libsystemd as sd-future (not exported) for the following reasons: - We may want to make this a public libsystemd API in the future - The code can't live in src/basic as it makes heavy use of sd-event - The code can't live in src/shared as sd-bus and sd-event make use of it The log and log-context headers are updated with functions to allow fibers to have their own log prefix and log context. --- meson.build | 1 + src/basic/basic-forward.h | 2 + src/basic/log-context.c | 13 + src/basic/log-context.h | 4 + src/basic/log.c | 8 + src/basic/log.h | 4 + src/include/override/sys/mman.h | 7 + src/libsystemd/meson.build | 14 +- src/libsystemd/sd-common/sd-forward.h | 6 + src/libsystemd/sd-event/event-future.c | 129 ++ src/libsystemd/sd-event/event-future.h | 7 + src/libsystemd/sd-future/fiber.c | 947 +++++++++++++ src/libsystemd/sd-future/sd-future.c | 257 ++++ src/libsystemd/sd-future/test-fiber.c | 1167 +++++++++++++++++ src/systemd/_sd-common.h | 14 + src/systemd/meson.build | 1 + src/systemd/sd-future.h | 101 ++ .../TEST-02-UNITTESTS/meson.build | 2 +- 18 files changed, 2681 insertions(+), 3 deletions(-) create mode 100644 src/libsystemd/sd-event/event-future.c create mode 100644 src/libsystemd/sd-event/event-future.h create mode 100644 src/libsystemd/sd-future/fiber.c create mode 100644 src/libsystemd/sd-future/sd-future.c create mode 100644 src/libsystemd/sd-future/test-fiber.c create mode 100644 src/systemd/sd-future.h diff --git a/meson.build b/meson.build index d6fbd7c2b7ea6..562c3eac907b8 100644 --- a/meson.build +++ b/meson.build @@ -1798,6 +1798,7 @@ libsystemd_includes = [basic_includes, include_directories( 'src/libsystemd/sd-common', 'src/libsystemd/sd-device', 'src/libsystemd/sd-event', + 'src/libsystemd/sd-future', 'src/libsystemd/sd-hwdb', 'src/libsystemd/sd-id128', 'src/libsystemd/sd-journal', diff --git a/src/basic/basic-forward.h b/src/basic/basic-forward.h index 396056a8e55eb..2536ccda0133b 100644 --- a/src/basic/basic-forward.h +++ b/src/basic/basic-forward.h @@ -110,10 +110,12 @@ typedef enum UnitNameMangle UnitNameMangle; typedef enum UnitType UnitType; typedef enum WaitFlags WaitFlags; +typedef struct Fiber Fiber; typedef struct Hashmap Hashmap; typedef struct HashmapBase HashmapBase; typedef struct IteratedCache IteratedCache; typedef struct Iterator Iterator; +typedef struct LogContext LogContext; typedef struct OrderedHashmap OrderedHashmap; typedef struct OrderedSet OrderedSet; typedef struct Set Set; diff --git a/src/basic/log-context.c b/src/basic/log-context.c index a05b4b1980e6b..799920dec9de7 100644 --- a/src/basic/log-context.c +++ b/src/basic/log-context.c @@ -177,6 +177,19 @@ size_t log_context_num_fields(void) { return _log_context_num_fields; } +void log_context_swap(LogContext **log_context, size_t *num_fields) { + assert(log_context); + assert(num_fields); + + LogContext *prev_log_context = _log_context; + _log_context = *log_context; + *log_context = prev_log_context; + + size_t prev_log_context_num_fields = _log_context_num_fields; + _log_context_num_fields = *num_fields; + *num_fields = prev_log_context_num_fields; +} + void _reset_log_level(int *saved_log_level) { assert(saved_log_level); diff --git a/src/basic/log-context.h b/src/basic/log-context.h index ca112fa862acf..6f38131dc137f 100644 --- a/src/basic/log-context.h +++ b/src/basic/log-context.h @@ -66,6 +66,10 @@ size_t log_context_num_contexts(void); /* Returns the number of fields in all attached log contexts. */ size_t log_context_num_fields(void); +/* Atomically swap the thread-local log context list and field count with the values pointed to by *list and + * *num_fields. Used by sd-fiber to stash/restore the caller's log context when entering/leaving a fiber. */ +void log_context_swap(LogContext **log_context, size_t *num_fields); + void _reset_log_level(int *saved_log_level); #define _LOG_CONTEXT_SET_LOG_LEVEL(level, l) \ diff --git a/src/basic/log.c b/src/basic/log.c index d8b441bfadf21..539507dd0fab1 100644 --- a/src/basic/log.c +++ b/src/basic/log.c @@ -87,6 +87,14 @@ bool _log_message_dummy = false; /* Always false */ } \ } while (false) +void log_prefix_swap(const char **prefix) { + assert(prefix); + + const char *prev = log_prefix; + log_prefix = *prefix; + *prefix = prev; +} + static void log_close_console(void) { /* See comment in log_close_journal() */ (void) safe_close_above_stdio(TAKE_FD(console_fd)); diff --git a/src/basic/log.h b/src/basic/log.h index 46a4339de5565..d714e21a9a34b 100644 --- a/src/basic/log.h +++ b/src/basic/log.h @@ -380,6 +380,10 @@ int log_syntax_parse_error_internal( void log_setup(void); const char* _log_set_prefix(const char *prefix, bool force); + +/* Atomically swap the thread-local log prefix with the value pointed to by *prefix. Used by sd-fiber to + * stash/restore the caller's log prefix when entering/leaving a fiber. */ +void log_prefix_swap(const char **prefix); static inline const char* _log_unset_prefixp(const char **p) { assert(p); _log_set_prefix(*p, true); diff --git a/src/include/override/sys/mman.h b/src/include/override/sys/mman.h index 30ef92b83538e..9961ea4b21d93 100644 --- a/src/include/override/sys/mman.h +++ b/src/include/override/sys/mman.h @@ -18,3 +18,10 @@ static_assert(MFD_NOEXEC_SEAL == 0x0008U, ""); #else static_assert(MFD_EXEC == 0x0010U, ""); #endif + +/* since Linux 6.13 / glibc-2.42 */ +#ifndef MADV_GUARD_INSTALL +# define MADV_GUARD_INSTALL 102 +#else +static_assert(MADV_GUARD_INSTALL == 102, ""); +#endif diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build index 2fab54719474c..9b50e7e79ba3f 100644 --- a/src/libsystemd/meson.build +++ b/src/libsystemd/meson.build @@ -33,6 +33,7 @@ sd_daemon_sources = files('sd-daemon/sd-daemon.c') ############################################################ sd_event_sources = files( + 'sd-event/event-future.c', 'sd-event/event-util.c', 'sd-event/sd-event.c', ) @@ -75,6 +76,13 @@ sd_device_sources = files( ############################################################ +sd_future_sources = files( + 'sd-future/fiber.c', + 'sd-future/sd-future.c', +) + +############################################################ + sd_login_sources = files('sd-login/sd-login.c') ############################################################ @@ -135,8 +143,9 @@ libsystemd_sources = files( 'sd-resolve/sd-resolve.c', ) + sd_journal_sources + sd_id128_sources + sd_daemon_sources \ + sd_event_sources + sd_bus_sources + sd_device_sources \ - + sd_login_sources + sd_json_sources + sd_varlink_sources \ - + sd_path_sources + sd_netlink_sources + sd_network_sources + + sd_future_sources + sd_login_sources + sd_json_sources \ + + sd_varlink_sources + sd_path_sources + sd_netlink_sources \ + + sd_network_sources sources += libsystemd_sources @@ -181,6 +190,7 @@ simple_tests += files( 'sd-bus/test-bus-vtable.c', 'sd-device/test-device-util.c', 'sd-device/test-sd-device-monitor.c', + 'sd-future/test-fiber.c', 'sd-hwdb/test-sd-hwdb.c', 'sd-id128/test-id128.c', 'sd-journal/test-audit-type.c', diff --git a/src/libsystemd/sd-common/sd-forward.h b/src/libsystemd/sd-common/sd-forward.h index 8abe655209dec..96ab84e982886 100644 --- a/src/libsystemd/sd-common/sd-forward.h +++ b/src/libsystemd/sd-common/sd-forward.h @@ -127,3 +127,9 @@ typedef struct sd_resolve sd_resolve; typedef struct sd_resolve_query sd_resolve_query; typedef struct sd_hwdb sd_hwdb; + +typedef struct sd_future sd_future; + +typedef int (*sd_future_func_t)(sd_future *f); +typedef int (*sd_fiber_func_t)(void *userdata); +typedef _sd_destroy_t sd_fiber_destroy_t; diff --git a/src/libsystemd/sd-event/event-future.c b/src/libsystemd/sd-event/event-future.c new file mode 100644 index 0000000000000..e34dbe0c05252 --- /dev/null +++ b/src/libsystemd/sd-event/event-future.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-event.h" +#include "sd-future.h" + +#include "alloc-util.h" +#include "errno-util.h" +#include "event-future.h" + +typedef struct TimeFuture { + sd_event_source *source; + uint64_t usec; + + /* Result the future resolves with on natural expiry (vs. cancellation). 0 for normal sleep, + * non-zero (e.g. -ETIMEDOUT) lets a fiber waiting on this future resume with that error. */ + int result; +} TimeFuture; + +static void* time_future_alloc(void) { + return new0(TimeFuture, 1); +} + +static void time_future_free(sd_future *f) { + TimeFuture *tf = sd_future_get_private(ASSERT_PTR(f)); + sd_event_source_unref(tf->source); + free(tf); +} + +static int time_future_cancel(sd_future *f) { + TimeFuture *tf = sd_future_get_private(ASSERT_PTR(f)); + int r = sd_event_source_set_enabled(tf->source, SD_EVENT_OFF); + RET_GATHER(r, sd_future_resolve(f, -ECANCELED)); + return r; +} + +static int time_future_set_priority(sd_future *f, int64_t priority) { + TimeFuture *tf = sd_future_get_private(ASSERT_PTR(f)); + return sd_event_source_set_priority(tf->source, priority); +} + +static const sd_future_ops time_future_ops = { + .size = sizeof(sd_future_ops), + .alloc = time_future_alloc, + .free = time_future_free, + .cancel = time_future_cancel, + .set_priority = time_future_set_priority, +}; + +static int time_handler(sd_event_source *s, usec_t usec, void *userdata) { + sd_future *f = ASSERT_PTR(userdata); + TimeFuture *tf = sd_future_get_private(f); + + tf->usec = usec; + return sd_future_resolve(f, tf->result); +} + +int future_new_time(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret) { + int r; + + assert(e); + assert(ret); + + if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED)) + return -ECANCELED; + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_future_new(&time_future_ops, &f); + if (r < 0) + return r; + + TimeFuture *tf = sd_future_get_private(f); + tf->result = result; + + r = sd_event_add_time(e, &tf->source, clock, usec, accuracy, time_handler, f); + if (r < 0) + return r; + + if (sd_fiber_is_running()) { + int64_t priority; + + r = sd_fiber_get_priority(&priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(tf->source, priority); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(f); + return 0; +} + +int future_new_time_relative(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret) { + int r; + + assert(e); + assert(ret); + + if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED)) + return -ECANCELED; + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_future_new(&time_future_ops, &f); + if (r < 0) + return r; + + TimeFuture *tf = sd_future_get_private(f); + tf->result = result; + + r = sd_event_add_time_relative(e, &tf->source, clock, usec, accuracy, time_handler, f); + if (r < 0) + return r; + + if (sd_fiber_is_running()) { + int64_t priority; + + r = sd_fiber_get_priority(&priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(tf->source, priority); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(f); + return 0; +} diff --git a/src/libsystemd/sd-event/event-future.h b/src/libsystemd/sd-event/event-future.h new file mode 100644 index 0000000000000..7e956906ebf74 --- /dev/null +++ b/src/libsystemd/sd-event/event-future.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-forward.h" + +int future_new_time(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret); +int future_new_time_relative(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret); diff --git a/src/libsystemd/sd-future/fiber.c b/src/libsystemd/sd-future/fiber.c new file mode 100644 index 0000000000000..7ee94c709910b --- /dev/null +++ b/src/libsystemd/sd-future/fiber.c @@ -0,0 +1,947 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* glibc's _FORTIFY_SOURCE wraps siglongjmp with a check that the target SP is below the current SP. + * That check is incompatible with fiber switching, where the target SP lives on a separately-mmap'd + * stack and can be at any address relative to the caller. Disable fortify here so siglongjmp resolves + * to the plain glibc entry point. */ +#undef _FORTIFY_SOURCE +#define _FORTIFY_SOURCE 0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_VALGRIND_VALGRIND_H +#include +#endif + +#include "sd-event.h" +#include "sd-future.h" + +#include "alloc-util.h" +#include "env-util.h" +#include "errno-util.h" +#include "event-future.h" +#include "log-context.h" +#include "log.h" +#include "memory-util.h" +#include "pthread-util.h" +#include "time-util.h" + +#if HAS_FEATURE_ADDRESS_SANITIZER +#include +#endif + +/* musl libc deliberately does not provide the ucontext.h functions. Declaring them weak lets us link on + * musl without libucontext and pick the sigaltstack-based bootstrap at runtime. On + * glibc / libucontext-on-musl the symbols resolve normally and we use the cheaper ucontext path. */ +#pragma weak getcontext +#pragma weak makecontext +#pragma weak swapcontext + +static thread_local Fiber *current_fiber = NULL; + +typedef enum FiberState { + FIBER_STATE_INITIAL, + FIBER_STATE_READY, + FIBER_STATE_SUSPENDED, + FIBER_STATE_CANCELLED, + FIBER_STATE_COMPLETED, + _FIBER_STATE_MAX, + _FIBER_STATE_INVALID = -EINVAL, +} FiberState; + +typedef struct Fiber { + struct iovec stack; + sigjmp_buf context; /* Where to jump to when entering or resuming the fiber. */ + sigjmp_buf resume_context; /* Where to jump back to when the fiber yields or completes. */ + + /* Caller's stack range, recorded by fiber_run() on each entry so the fiber's siglongjmp back + * out (in fiber_swap() or the trampoline's terminate path) can hand AddressSanitizer the + * destination stack info. With ucontext this comes for free via uc_link/uc_stack; sigjmp_buf + * is opaque and doesn't carry it. */ + struct iovec resume_stack; + + FiberState state; + int result; /* Either resume error code or final return value */ + + sd_future *floating; /* Self-ref held while the fiber is floating; dropped on resolve. */ + + sd_event *event; + sd_event_source *defer_event_source; + sd_event_source *exit_event_source; + + char *name; + int64_t priority; + sd_fiber_func_t func; + void *userdata; + sd_fiber_destroy_t destroy; + + /* Storage for the swap performed in fiber_run(): while the fiber is suspended these hold the + * fiber's own log state; while it is running they hold the caller's log state. The active state + * always lives in the thread-locals in log.c / log-context.c. */ + LIST_HEAD(LogContext, log_context); + size_t log_context_num_fields; + const char *log_prefix; + +#if HAVE_VALGRIND_VALGRIND_H + unsigned stack_id; +#endif +} Fiber; + +static Fiber* fiber_get_current(void) { + return current_fiber; +} + +static void fiber_set_current(Fiber *f) { + current_fiber = f; +} + +static int fiber_allocate_stack(size_t size, void **ret) { + void *stack = NULL; + int r; + + assert(size > 0 && size % page_size() == 0); + assert(ret); + + stack = mmap(/* addr= */ NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, + /* fd= */ -EBADF, /* offset= */ 0); + if (stack == MAP_FAILED) + return -errno; + + /* Place the guard page where stack overflow will hit it: the high end on PA-RISC (the + * only arch where glibc defines _STACK_GROWS_UP, see sysdeps/hppa/stackinfo.h), the low + * end everywhere else. fiber_stack_usable() mirrors this with the inverse offset. */ +#if defined(__hppa__) || defined(__hppa64__) + void *guard = (uint8_t*) stack + size - page_size(); +#else + void *guard = stack; +#endif + + /* Prefer MADV_GUARD_INSTALL (Linux 6.13+): unlike mprotect(PROT_NONE) it doesn't split + * the VMA, so guard installation skips the mmap-lock contention and per-guard VMA cost. + * Fall back to mprotect on older kernels, which return EINVAL for unknown advice. */ + r = RET_NERRNO(madvise(guard, page_size(), MADV_GUARD_INSTALL)); + if (r == -EINVAL) + r = RET_NERRNO(mprotect(guard, page_size(), PROT_NONE)); + if (r < 0) { + (void) munmap(stack, size); + return r; + } + + *ret = TAKE_PTR(stack); + return 0; +} + +/* Usable stack range of a fiber: the full mmap region minus the guard page. Single source of + * truth for the layout assumed by fiber_allocate_stack(); every consumer (ucontext ss_sp, + * sigaltstack ss_sp, ASAN handoff iovecs, Valgrind stack registration) goes through here. + * + * iov_base is the lowest usable byte regardless of growth direction — that matches POSIX's + * definition of stack_t.ss_sp, so libc's makecontext()/sigaltstack() handle the direction for + * us. Only the guard page placement (and hence iov_base's offset within the mapping) varies. */ +static struct iovec fiber_stack_usable(const struct iovec *stack) { + assert(stack); + assert(stack->iov_len > page_size()); + return (struct iovec) { +#if defined(__hppa__) || defined(__hppa64__) + .iov_base = stack->iov_base, +#else + .iov_base = (uint8_t*) stack->iov_base + page_size(), +#endif + .iov_len = stack->iov_len - page_size(), + }; +} + +static inline void start_switch_stack(void **fake_stack_save, const struct iovec *dest) { +#if HAS_FEATURE_ADDRESS_SANITIZER + __sanitizer_start_switch_fiber(fake_stack_save, + dest ? dest->iov_base : NULL, + dest ? dest->iov_len : 0); +#else + (void) fake_stack_save; + (void) dest; +#endif +} + +static inline void finish_switch_stack(void *fake_stack_save) { +#if HAS_FEATURE_ADDRESS_SANITIZER + __sanitizer_finish_switch_fiber(fake_stack_save, NULL, NULL); +#else + (void) fake_stack_save; +#endif +} + +/* Refresh f->resume_stack from whoever is currently the running fiber, so the next siglongjmp out + * of f (in the trampoline or fiber_swap()) can hand the right destination stack to ASAN. Must be + * called before fiber_set_current(f) — relies on fiber_get_current() returning the caller. */ +static void fiber_set_resume_stack(Fiber *f, Fiber *resume) { + if (resume) + f->resume_stack = fiber_stack_usable(&resume->stack); + else + f->resume_stack = (struct iovec) {}; +} + +_noreturn_ static void fiber_entry_point(void) { + Fiber *f = ASSERT_PTR(fiber_get_current()); + void *fake_stack_save = NULL; + + assert(f->func); + assert(IN_SET(f->state, FIBER_STATE_INITIAL, FIBER_STATE_READY, FIBER_STATE_CANCELLED)); + + /* swapcontext() from fiber_bootstrap() got us here. */ + finish_switch_stack(NULL); + + /* Capture our resumable point on the fiber's stack, then bounce back to whoever last set + * f->resume_context. On bootstrap that's fiber_bootstrap(); on every subsequent yield it's + * the most recent fiber_run(). sigsetjmp(buf, 0) skips the signal-mask save: switching is + * thread-shared with respect to signal masks. */ + if (sigsetjmp(f->context, 0) == 0) { + start_switch_stack(&fake_stack_save, &f->resume_stack); + siglongjmp(f->resume_context, 1); + } + + /* Re-entered for real via fiber_run()'s siglongjmp(f->context). */ + finish_switch_stack(fake_stack_save); + + /* Block scope so the cleanups attached to LOG_SET_PREFIX / LOG_CONTEXT_PUSH_KEY_VALUE fire + * before the siglongjmp below — siglongjmp skips _cleanup_ attributes, so we have to make + * sure the scope ends via a normal control-flow path first. */ + { + LOG_SET_PREFIX(f->name); + LOG_CONTEXT_PUSH_KEY_VALUE("FIBER=", f->name); + + f->result = f->state == FIBER_STATE_CANCELLED ? -ECANCELED : f->func(f->userdata); + f->state = FIBER_STATE_COMPLETED; + } + + /* Pass NULL fake_stack_save to discard the fiber's fake stack since the fiber is done. */ + start_switch_stack(NULL, &f->resume_stack); + + /* Bounce back to whichever fiber_run() call most recently entered us. resume_context is + * per-fiber so nested fiber_run() — e.g. a bus method dispatched as a fiber handler while + * sd_event_loop() itself runs in a fiber — is safe. */ + siglongjmp(f->resume_context, 1); + assert_not_reached(); +} + +static int fiber_init_ucontext(Fiber *f) { + ucontext_t old_uc, uc; + void *fake_stack_save = NULL; + + assert(f); + assert(getcontext); + + if (getcontext(&uc) < 0) + return -errno; + + struct iovec fiber_stack = fiber_stack_usable(&f->stack); + + uc.uc_link = NULL; /* Unused: trampoline siglongjmps out instead of returning. */ + uc.uc_stack.ss_sp = fiber_stack.iov_base; + uc.uc_stack.ss_size = fiber_stack.iov_len; + uc.uc_stack.ss_flags = 0; + + Fiber *prev = fiber_get_current(); + fiber_set_current(f); + + makecontext(&uc, fiber_entry_point, 0); + + fiber_set_resume_stack(f, prev); + if (sigsetjmp(f->resume_context, 0) == 0) { + start_switch_stack(&fake_stack_save, &fiber_stack); + if (swapcontext(&old_uc, &uc) < 0) { + finish_switch_stack(fake_stack_save); + fiber_set_current(prev); + return -errno; + } + assert_not_reached(); /* Trampoline siglongjmps back; swapcontext doesn't return. */ + } + + finish_switch_stack(fake_stack_save); + + fiber_set_current(prev); + return 0; +} + +/* Per-thread state for the sigaltstack bootstrap. The signal handler captures its own resumable + * point in fiber_tr_reenter and returns; fiber_init_sigaltstack() then siglongjmps to that point + * to land on the alt stack outside signal context, falling through to fiber_entry_point(). */ +static thread_local sigjmp_buf fiber_tr_reenter; +static thread_local volatile sig_atomic_t fiber_tr_called; + +static void fiber_sigaltstack_trampoline(int sig) { + fiber_tr_called = 1; + if (sigsetjmp(fiber_tr_reenter, 0) == 0) + return; /* First entry: signal handler returns normally so the kernel can + * tear down the signal frame before we reuse this stack. */ + + /* siglongjmp(fiber_tr_reenter, 1) from fiber_init_sigaltstack() landed us here. We're on + * the alt stack (= fiber's stack), no longer in signal context. Fall through to the + * shared trampoline. */ + fiber_entry_point(); +} + +static int fiber_init_sigaltstack(Fiber *f) { + DISABLE_WARNING_ZERO_AS_NULL_POINTER_CONSTANT; + static pthread_mutex_t sigurg_mutex = PTHREAD_MUTEX_INITIALIZER; + REENABLE_WARNING; + struct iovec fiber_stack = fiber_stack_usable(&f->stack); + int r; + + assert(f); + + /* Block SIGURG on this thread so we can deliver it precisely via pthread_kill+sigsuspend + * once the alt stack is in place. */ + sigset_t sigs, osigs; + assert_se(sigemptyset(&sigs) >= 0); + assert_se(sigaddset(&sigs, SIGURG) >= 0); + r = pthread_sigmask(SIG_BLOCK, &sigs, &osigs); + if (r != 0) + return -r; + + struct sigaction sa = { + .sa_handler = fiber_sigaltstack_trampoline, + .sa_flags = SA_ONSTACK, + }; + assert_se(sigfillset(&sa.sa_mask) >= 0); + + _unused_ _cleanup_(pthread_mutex_unlock_assertp) pthread_mutex_t *_l = pthread_mutex_lock_assert(&sigurg_mutex); + + struct sigaction osa; + r = RET_NERRNO(sigaction(SIGURG, &sa, &osa)); + if (r < 0) { + (void) pthread_sigmask(SIG_SETMASK, &osigs, /* oldset= */ NULL); + return r; + } + + stack_t ss = { .ss_sp = fiber_stack.iov_base, .ss_size = fiber_stack.iov_len }; + stack_t oss; + r = RET_NERRNO(sigaltstack(&ss, &oss)); + if (r < 0) { + (void) sigaction(SIGURG, &osa, /* oldact= */ NULL); + (void) pthread_sigmask(SIG_SETMASK, &osigs, /* oldset= */ NULL); + return r; + } + + /* Send SIGURG to ourselves; the handler runs on the alt stack. Loop on sigsuspend until + * we observe the handler ran (sigsuspend can return spuriously on EINTR-like wakeups). */ + fiber_tr_called = 0; + assert_se(pthread_kill(pthread_self(), SIGURG) == 0); + + sigset_t suspend_mask; + assert_se(sigfillset(&suspend_mask) >= 0); + assert_se(sigdelset(&suspend_mask, SIGURG) >= 0); + while (!fiber_tr_called) + (void) sigsuspend(&suspend_mask); + + /* Disable our alt stack before reinstalling the previous one (POSIX: must disable first). */ + stack_t disable = { .ss_flags = SS_DISABLE }; + r = RET_NERRNO(sigaltstack(&disable, /* old_ss= */ NULL)); + if (!FLAGS_SET(oss.ss_flags, SS_DISABLE)) + RET_GATHER(r, RET_NERRNO(sigaltstack(&oss, /* old_ss= */ NULL))); + + RET_GATHER(r, RET_NERRNO(sigaction(SIGURG, &osa, /* oldact= */ NULL))); + RET_GATHER(r, -pthread_sigmask(SIG_SETMASK, &osigs, /* oldset= */ NULL)); + + if (r < 0) + return r; + + /* The handler captured its resumable point in fiber_tr_reenter and returned. Now we siglongjmp + * back into it from outside signal context — control resumes past the sigsetjmp and falls + * through to fiber_entry_point(), which does the same f->context capture / siglongjmp-back + * dance as the ucontext path. */ + Fiber *prev = fiber_get_current(); + fiber_set_current(f); + fiber_set_resume_stack(f, prev); + + void *fake_stack_save = NULL; + if (sigsetjmp(f->resume_context, 0) == 0) { + start_switch_stack(&fake_stack_save, &fiber_stack); + siglongjmp(fiber_tr_reenter, 1); + } + + finish_switch_stack(fake_stack_save); + fiber_set_current(prev); + return 0; +} + +static int fiber_init(Fiber *f) { + /* Pick the bootstrap backend based on whether libc actually provides the ucontext functions. + * Weak symbols at the top of this file mean these resolve to NULL on musl-without-libucontext, + * where we fall back to the sigaltstack path (originally described in Engelschall, + * "Portable Multithreading", USENIX ATC 2000: + * https://usenix.org/legacy/publications/library/proceedings/usenix2000/general/full_papers/engelschall/engelschall_html/index.html). + * SYSTEMD_FIBER_FORCE_SIGALTSTACK=1 forces the sigaltstack path on glibc for testing. */ + int r = secure_getenv_bool("SYSTEMD_FIBER_FORCE_SIGALTSTACK"); + if (r < 0 && r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_FIBER_FORCE_SIGALTSTACK, ignoring: %m"); + + if (!getcontext || r > 0) + return fiber_init_sigaltstack(f); + + return fiber_init_ucontext(f); +} + +/* Swap the thread-local log prefix and log context with the values stashed in f. While the fiber is + * suspended, f holds the fiber's own log state; while it's running, f holds the caller's log state. The + * swap is its own inverse, so the same call drives both directions. */ +static void fiber_swap_log_state(Fiber *f) { + assert(f); + log_prefix_swap(&f->log_prefix); + log_context_swap(&f->log_context, &f->log_context_num_fields); +} + +static void reset_current_fiber(void) { + /* Restore the caller's log state stashed in the running fiber (if any) before clearing + * current_fiber. Without this, the child of a fork() that happened mid-fiber would inherit the + * fiber's log prefix / context list in its thread-locals even though no fiber is running. */ + Fiber *f = fiber_get_current(); + if (f) { + fiber_swap_log_state(f); + } + fiber_set_current(NULL); +} + +static sd_event_source* fiber_current_event_source(Fiber *f) { + assert(f); + assert(f->state != FIBER_STATE_COMPLETED); + assert(f->event); + + return sd_event_get_state(f->event) == SD_EVENT_EXITING ? f->exit_event_source : f->defer_event_source; +} + +static int atfork_ret; + +static void install_atfork(void) { + /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's + * only half-documented (glibc doesn't document it but LSB does — though only superficially) + * we'll check for errors only in the most generic fashion possible. */ + atfork_ret = pthread_atfork(/* prepare= */ NULL, /* parent= */ NULL, reset_current_fiber); + if (atfork_ret != 0) + log_debug_errno(atfork_ret, "pthread_atfork() failed: %m"); +} + +static void fiber_resolve(sd_future *f) { + Fiber *fiber = sd_future_get_private(ASSERT_PTR(f)); + + fiber->defer_event_source = sd_event_source_disable_unref(fiber->defer_event_source); + fiber->exit_event_source = sd_event_source_disable_unref(fiber->exit_event_source); + /* The floating self-ref (if any) is potentially the last ref keeping the fiber alive — moving it + * into a local _cleanup_ slot ensures sd_future_resolve() runs callbacks and waiters while f is + * still valid; the local's cleanup drops the ref afterwards, at which point no further f->... + * access can happen. */ + _unused_ _cleanup_(sd_future_unrefp) sd_future *floating = TAKE_PTR(fiber->floating); + sd_future_resolve(f, fiber->result); +} + +static void fiber_enter(Fiber *fiber, Fiber *prev, void **fake_stack_save) { + fiber_set_current(fiber); + fiber_swap_log_state(fiber); + + struct iovec fiber_stack = fiber_stack_usable(&fiber->stack); + start_switch_stack(fake_stack_save, &fiber_stack); + fiber_set_resume_stack(fiber, prev); +} + +static void fiber_leave(Fiber *fiber, Fiber *prev, void *fake_stack_save) { + finish_switch_stack(fake_stack_save); + fiber_swap_log_state(fiber); + fiber_set_current(prev); +} + +static int fiber_run(sd_future *f) { + Fiber *fiber = sd_future_get_private(ASSERT_PTR(f)); + int r; + + if (fiber->state == FIBER_STATE_COMPLETED) + return -ESTALE; + + assert(IN_SET(fiber->state, FIBER_STATE_INITIAL, FIBER_STATE_READY, FIBER_STATE_CANCELLED)); + + static pthread_once_t atfork_once = PTHREAD_ONCE_INIT; + r = pthread_once(&atfork_once, install_atfork); + if (r != 0) + return -r; + if (atfork_ret != 0) + return -atfork_ret; + + LOG_SET_PREFIX(fiber->name); + LOG_CONTEXT_PUSH_KEY_VALUE("FIBER=", fiber->name); + + log_debug("Scheduling fiber"); + + /* Save the previously-current fiber (if any) so we can restore it when this fiber yields or + * completes. This matters when fiber_run() is invoked from within another fiber (e.g. an + * sd-event dispatch that happens to be running inside a fiber context itself): the + * LOG_SET_PREFIX/LOG_CONTEXT_PUSH above attached to whichever fiber was current at that moment, + * and their scope-level cleanup must see the same fiber_get_current() when it runs to detach + * them from the correct list. */ + Fiber *prev = fiber_get_current(); + void *fake_stack_save = NULL; + fiber_enter(fiber, prev, &fake_stack_save); + + /* This is where we start executing the fiber. Once it yields, we continue here as if nothing + * happened. resume_context captures this point; the fiber siglongjmps back to it. */ + if (sigsetjmp(fiber->resume_context, 0) == 0) + siglongjmp(fiber->context, 1); + + fiber_leave(fiber, prev, fake_stack_save); + + switch (fiber->state) { + + case FIBER_STATE_COMPLETED: + if (fiber->result < 0 && fiber->result != -ECANCELED) + log_debug_errno(fiber->result, "Fiber failed with error: %m"); + else + log_debug("Fiber finished executing"); + + fiber_resolve(f); + break; + + case FIBER_STATE_CANCELLED: + case FIBER_STATE_READY: + log_debug("Fiber yielded execution"); + + r = sd_event_source_set_enabled(fiber_current_event_source(fiber), SD_EVENT_ONESHOT); + if (r < 0) + return r; + break; + + case FIBER_STATE_SUSPENDED: + log_debug("Fiber suspended execution"); + /* Fiber is waiting for something - don't re-queue it */ + break; + + default: + assert_not_reached(); + } + + return 0; +} + +static int fiber_cancel(sd_future *f) { + Fiber *fiber = sd_future_get_private(ASSERT_PTR(f)); + int r; + + assert(fiber != fiber_get_current()); + + if (IN_SET(fiber->state, FIBER_STATE_COMPLETED, FIBER_STATE_CANCELLED)) + return 0; + + if (fiber->state == FIBER_STATE_INITIAL) { + /* The fiber's stack was allocated but never entered, so there are no scope-level cleanups + * waiting to run. Skip the dispatch round-trip that would just have fiber_entry_point() + * fall straight through with -ECANCELED, and settle the future right here — mirroring the + * FIBER_STATE_COMPLETED branch of fiber_run(). */ + fiber->result = -ECANCELED; + fiber->state = FIBER_STATE_COMPLETED; + fiber_resolve(f); + return 1; + } + + /* Once we cancel a fiber, we want to immediately resume it with -ECANCELED. */ + r = sd_event_source_set_enabled(fiber_current_event_source(fiber), SD_EVENT_ONESHOT); + if (r < 0) + return r; + + fiber->state = FIBER_STATE_CANCELLED; + + return 1; +} + +static int fiber_on_defer(sd_event_source *s, void *userdata) { + sd_future *f = ASSERT_PTR(userdata); + return fiber_run(f); +} + +static int fiber_on_exit(sd_event_source *s, void *userdata) { + sd_future *f = ASSERT_PTR(userdata); + Fiber *fiber = sd_future_get_private(f); + int r; + + /* The fiber may already have completed via the regular defer path before sd_event_exit() + * fires the exit source; in that case there's nothing left to drive and we'd otherwise + * trip fiber_run()'s -ESTALE return, which sd_event would log spuriously and disable the + * source for. */ + if (fiber->state == FIBER_STATE_COMPLETED) + return 0; + + /* If fiber_cancel() returned 1 the fiber was just marked cancelled and its deferred/exit event + * source was re-armed; we let the event loop dispatch that source on the next iteration so it goes + * through the normal fiber_on_defer/fiber_on_exit path rather than running it recursively here. */ + r = fiber_cancel(f); + if (r != 0) + return r; + + return fiber_run(f); +} + +static void* fiber_alloc(void) { + return new0(Fiber, 1); +} + +static void fiber_free(sd_future *f) { + Fiber *fiber = sd_future_get_private(f); + + /* To make sure all memory is deallocated, the fiber has to have completed by the time we free it to + * make sure its stack has finished unwinding (which will invoke the registered cleanup functions). + * As this function may get called when not running on a fiber ourselves, we can't guarantee here + * that we can run the fiber to completion ourselves, so we insist that this happens before we get + * here. To ensure fibers are cleaned up before exiting the event loop, exit handlers are added for + * fibers created outside of existing fibers. For fibers created within running fibers, unwinding the + * outer fiber should take care of cleaning up any created child fibers (for example using + * sd_future_cancel_wait_unref()). + * + * FIBER_STATE_INITIAL is also accepted: the stack was allocated but never entered, so there are no + * registered cleanups to run. This covers the partial-construction failure path in sd_fiber_new() + * as well as fibers that are unrefed before the event loop ever dispatches them. */ + assert(IN_SET(fiber->state, FIBER_STATE_INITIAL, FIBER_STATE_COMPLETED)); + + if (fiber->destroy) + fiber->destroy(fiber->userdata); + +#if HAVE_VALGRIND_VALGRIND_H + if (fiber->stack.iov_base) + VALGRIND_STACK_DEREGISTER(fiber->stack_id); +#endif + + if (fiber->stack.iov_base) + (void) munmap(fiber->stack.iov_base, fiber->stack.iov_len); + + sd_event_source_disable_unref(fiber->defer_event_source); + sd_event_source_disable_unref(fiber->exit_event_source); + sd_event_unref(fiber->event); + + free(fiber->name); + free(fiber); +} + +sd_future* sd_fiber_get_current(void) { + Fiber *f = fiber_get_current(); + if (!f) + return NULL; + + return sd_event_source_get_userdata(fiber_current_event_source(f)); +} + +int sd_fiber_is_running(void) { + return !!fiber_get_current(); +} + +sd_event* sd_fiber_get_event(void) { + Fiber *f = fiber_get_current(); + assert_return(f, NULL); + return f->event; +} + +int sd_fiber_get_priority(int64_t *ret) { + Fiber *f = fiber_get_current(); + + assert_return(ret, -EINVAL); + assert_return(f, -ESRCH); + + *ret = f->priority; + return 0; +} + +static int fiber_swap(FiberState state) { + Fiber *f = ASSERT_PTR(fiber_get_current()); + + f->state = state; + + void *fake_stack_save = NULL; + + if (sigsetjmp(f->context, 0) == 0) { + start_switch_stack(&fake_stack_save, &f->resume_stack); + siglongjmp(f->resume_context, 1); + } + + finish_switch_stack(fake_stack_save); + + /* When we get here, we've been resumed. */ + + if (f->state == FIBER_STATE_CANCELLED) + return -ECANCELED; + + /* sd_fiber_resume() stashes the resumer's value (an async wakeup error from a deadline + * timer, an io_uring CQE result, etc.) into f->result for us to surface here. Consume it + * unconditionally so it doesn't pollute subsequent suspends or the fiber's eventual return + * value — both negative errors and positive payloads (byte counts, accepted fds, revents + * masks) are valid resume values. */ + return TAKE_GENERIC(f->result, int, 0); +} + +int sd_fiber_yield(void) { + assert_return(fiber_get_current(), -ESRCH); + return fiber_swap(FIBER_STATE_READY); +} + +int sd_fiber_suspend(void) { + assert_return(fiber_get_current(), -ESRCH); + return fiber_swap(FIBER_STATE_SUSPENDED); +} + +static int fiber_set_priority(sd_future *f, int64_t priority) { + Fiber *fiber = sd_future_get_private(ASSERT_PTR(f)); + int r = 0; + + if (fiber->defer_event_source) + RET_GATHER(r, sd_event_source_set_priority(fiber->defer_event_source, priority)); + + if (fiber->exit_event_source) + RET_GATHER(r, sd_event_source_set_priority(fiber->exit_event_source, priority)); + + if (r >= 0) + fiber->priority = priority; + + return r; +} + +static const sd_future_ops fiber_future_ops; + +int sd_fiber_resume(sd_future *f, int result) { + assert_return(f, -EINVAL); + assert_return(sd_future_get_ops(f) == &fiber_future_ops, -EINVAL); + + Fiber *fiber = sd_future_get_private(f); + + if (fiber->state != FIBER_STATE_SUSPENDED) + return 0; + + /* Stash the result so fiber_swap() returns it from sd_fiber_suspend(). */ + fiber->result = result; + fiber->state = FIBER_STATE_READY; + return sd_event_source_set_enabled(fiber_current_event_source(fiber), SD_EVENT_ONESHOT); +} + +/* The fiber_future ops pass the Fiber pointer through as the future's private state. The fiber resolves + * its own future once it finishes running, so fiber_cancel() intentionally does not resolve. */ +static const sd_future_ops fiber_future_ops = { + .size = sizeof(sd_future_ops), + .alloc = fiber_alloc, + .free = fiber_free, + .cancel = fiber_cancel, + .set_priority = fiber_set_priority, +}; + +int sd_fiber_new(sd_event *e, const char *name, sd_fiber_func_t func, void *userdata, sd_fiber_destroy_t destroy, sd_future **ret) { + int r; + + assert_return(e, -EINVAL); + assert_return(name, -EINVAL); + assert_return(func, -EINVAL); + + if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED)) + return -ECANCELED; + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_future_new(&fiber_future_ops, &f); + if (r < 0) + return r; + + Fiber *fiber = sd_future_get_private(f); + + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if (getrlimit(RLIMIT_STACK, &rl) < 0) + log_debug_errno(errno, "Reading RLIMIT_STACK failed, ignoring: %m"); + if (rl.rlim_cur == RLIM_INFINITY) + rl.rlim_cur = 8U * U64_MB; /* Same as the default thread stack size */ + + /* Reserve room for the guard page so the usable region stays above PTHREAD_STACK_MIN, which + * is what libc/pthread routines (e.g. sigaltstack, TLS setup on musl) assume. */ + size_t stack_len = ROUND_UP(rl.rlim_cur, page_size()); + if (stack_len < (size_t) PTHREAD_STACK_MIN + page_size()) + stack_len = ROUND_UP((size_t) PTHREAD_STACK_MIN + page_size(), page_size()); + + *fiber = (Fiber) { + .stack.iov_len = stack_len, + .state = FIBER_STATE_INITIAL, + .name = strdup(name), + .func = func, + .userdata = userdata, + .event = sd_event_ref(e), + }; + if (!fiber->name) + return -ENOMEM; + + r = fiber_allocate_stack(fiber->stack.iov_len, &fiber->stack.iov_base); + if (r < 0) + return r; + +#if HAVE_VALGRIND_VALGRIND_H + /* Register the usable stack range (above the guard page) before fiber_bootstrap() so the + * trampoline's first sigsetjmp doesn't trip Valgrind's stack-tracking heuristics. */ + struct iovec usable = fiber_stack_usable(&fiber->stack); + fiber->stack_id = VALGRIND_STACK_REGISTER( + usable.iov_base, + (uint8_t*) usable.iov_base + usable.iov_len); +#endif + + r = fiber_init(fiber); + if (r < 0) + return r; + + /* Execution of the fiber is driven by two event sources, one deferred, one exit. The exit event + * source kicks in when sd_event_exit() is called, as from that point onwards only exit event + * sources will be dispatched. */ + + r = sd_event_add_defer(e, &fiber->defer_event_source, fiber_on_defer, f); + if (r < 0) + return r; + + r = sd_event_source_set_description(fiber->defer_event_source, fiber->name); + if (r < 0) + return r; + + r = sd_event_add_exit(e, &fiber->exit_event_source, fiber_on_exit, f); + if (r < 0) + return r; + + r = sd_event_source_set_description(fiber->exit_event_source, fiber->name); + if (r < 0) + return r; + + /* If we're on a fiber, we'll rely on the parent fiber to cancel this fiber if the event loop is + * exiting. Otherwise, we'll trigger cancellation of this fiber via the exit event source. Why cancel + * via the exit event source? We can only run the fiber while the event loop is active, so we need to + * make sure all fibers finish running before the event loop is finished, which an exit event source + * allows us to do. */ + r = sd_event_source_set_enabled(fiber->exit_event_source, sd_fiber_is_running() ? SD_EVENT_OFF : SD_EVENT_ONESHOT); + if (r < 0) + return r; + + /* Stays in FIBER_STATE_INITIAL until the event loop first dispatches it via fiber_run(). */ + + if (ret) + *ret = TAKE_PTR(f); + else { + /* Fire-and-forget: the fiber is guaranteed to resolve (via completion, cancellation, or + * the event loop exit handler), so making the future floating cleans it up. */ + r = sd_fiber_set_floating(f, true); + if (r < 0) + return r; + } + + /* We only take ownership of the given userdata pointer on success so assign the destroy callback + * at the very end so we don't clean up the userdata pointer on failure. */ + fiber->destroy = destroy; + + return 0; +} + +int sd_fiber_set_floating(sd_future *f, int b) { + assert_return(f, -EINVAL); + assert_return(sd_future_get_ops(f) == &fiber_future_ops, -EINVAL); + + Fiber *fiber = sd_future_get_private(f); + + if (!!fiber->floating == !!b) + return 0; + + /* The floating self-ref keeps the future alive until the fiber resolves; fiber_run() drops it + * in the COMPLETED branch. Only valid for fiber futures because fibers uniquely guarantee + * resolution (via completion, cancellation, or the event loop exit handler). */ + if (b) + fiber->floating = sd_future_ref(f); + else + fiber->floating = sd_future_unref(fiber->floating); + + return 0; +} + +int sd_fiber_get_floating(sd_future *f) { + assert_return(f, -EINVAL); + assert_return(sd_future_get_ops(f) == &fiber_future_ops, -EINVAL); + + Fiber *fiber = sd_future_get_private(f); + return !!fiber->floating; +} + +int sd_fiber_sleep(uint64_t usec) { + Fiber *f = fiber_get_current(); + int r; + + if (!f) + return usleep_safe(usec); + + if (usec == 0) + return sd_fiber_yield(); + + /* Match usleep_safe(USEC_INFINITY): suspend indefinitely. Passing USEC_INFINITY to + * sd_event_add_time_relative() would overflow into -EOVERFLOW. */ + if (usec == USEC_INFINITY) + return sd_fiber_suspend(); + + assert(f->event); + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *timer = NULL; + r = future_new_time_relative( + f->event, + CLOCK_MONOTONIC, + usec, + /* accuracy= */ 1, + /* result= */ 0, + &timer); + if (r < 0) + return r; + + return sd_fiber_suspend(); +} + +int sd_fiber_await(sd_future *target) { + int r; + + assert_return(fiber_get_current(), -ESRCH); + assert_return(target, -EINVAL); + assert_return(target != sd_fiber_get_current(), -EDEADLK); + + Fiber *f = fiber_get_current(); + + if (sd_future_state(target) == SD_FUTURE_RESOLVED) + return sd_future_result(target); + + /* Note that we do allow waiting for other fibers when the event loop is exiting, since waiting for + * other fibers does not require adding new event sources to the event loop. */ + if (sd_event_get_state(f->event) == SD_EVENT_FINISHED) + return -ECANCELED; + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *wait = NULL; + r = sd_future_new_wait(target, &wait); + if (r < 0) + return r; + + return sd_fiber_suspend(); +} + +sd_future* sd_fiber_timeout(uint64_t timeout) { + int r; + + assert_return(fiber_get_current(), NULL); + + Fiber *f = fiber_get_current(); + + if (timeout == USEC_INFINITY) + return NULL; + + sd_future *timer; + r = future_new_time_relative( + f->event, + CLOCK_MONOTONIC, + timeout, + /* accuracy= */ 1, + /* result= */ -ETIME, + &timer); + if (r < 0) + return NULL; /* On allocation failure no timer is armed and the scope becomes a no-op. + * Errors here are rare; if the caller cares they can compare to NULL. */ + + return timer; +} diff --git a/src/libsystemd/sd-future/sd-future.c b/src/libsystemd/sd-future/sd-future.c new file mode 100644 index 0000000000000..bfd322f615f20 --- /dev/null +++ b/src/libsystemd/sd-future/sd-future.c @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-future.h" + +#include "alloc-util.h" +#include "errno-util.h" +#include "log.h" +#include "macro.h" +#include "set.h" + +struct sd_future { + unsigned n_ref; + + int state; + int result; + + Set *waiters; + + sd_future_func_t callback; + void *userdata; + + const sd_future_ops *ops; + + /* Opaque per-future state owned by the future implementation (the code that called + * sd_future_new()). The ops vtable above receives this pointer in its callbacks, and + * external code can fetch it via sd_future_get_private(). */ + void *private; +}; + +static int fiber_resume_trampoline(sd_future *f) { + /* The future's result is what the fiber should resume with. Impls choose the value at + * resolution time — e.g. a deadline timer resolves with -ETIME, a wait future resolves + * with the target's result, a normal IO/sleep future resolves with 0 on success. */ + return sd_fiber_resume(sd_future_get_userdata(f), sd_future_result(f)); +} + +int sd_future_resolve(sd_future *f, int result) { + int r = 0; + + assert_return(f, -EINVAL); + + if (f->state != SD_FUTURE_PENDING) + return 0; + + f->state = SD_FUTURE_RESOLVED; + f->result = result; + + if (f->callback) + RET_GATHER(r, f->callback(f)); + + /* We'd like the set to not be modified while iterating over it, hence take ownership over it in + * a local variable. Otherwise code invoked via sd_future_resolve() could try to modify the set while + * we're iterating over it (for example wait_future_free()). */ + Set *waiters = TAKE_PTR(f->waiters); + sd_future *w; + SET_FOREACH(w, waiters) + RET_GATHER(r, sd_future_resolve(w, result)); + + set_free(waiters); + + return r; +} + +static sd_future* sd_future_free(sd_future *f) { + if (!f) + return NULL; + + if (f->state == SD_FUTURE_PENDING) + sd_future_resolve(f, -ECANCELED); + + set_free(f->waiters); + + if (f->ops->free) + f->ops->free(f); + + return mfree(f); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_future, sd_future, sd_future_free); +DEFINE_POINTER_ARRAY_CLEAR_FUNC(sd_future*, sd_future_unref); +DEFINE_POINTER_ARRAY_FREE_FUNC(sd_future*, sd_future_unref); + +sd_future* sd_future_cancel_wait_unref(sd_future *f) { + int r; + + if (!f) + return NULL; + + /* We have to be able to suspend until the fiber we're waiting for finishes, and that's only + * possible if we're running on a fiber ourselves. */ + if (!sd_fiber_is_running()) + return sd_future_unref(f); + + r = sd_future_cancel(f); + if (r < 0) + log_debug_errno(r, "Failed to cancel future, ignoring: %m"); + + if (f->state == SD_FUTURE_PENDING) { + /* Fast path: when f's resolve callback already targets the current fiber (the default for + * futures created on this fiber), we can suspend directly and let the existing trampoline + * wake us up — no need to allocate a wait future just to learn about the resolution. + * Otherwise fall back to sd_fiber_await() which sets up an explicit waiter. */ + if (f->callback == fiber_resume_trampoline && f->userdata == sd_fiber_get_current()) + r = sd_fiber_suspend(); + else + r = sd_fiber_await(f); + if (r < 0 && r != -ECANCELED) + log_debug_errno(r, "Failed to wait for future to finish, ignoring: %m"); + } + + return sd_future_unref(f); +} + +DEFINE_POINTER_ARRAY_CLEAR_FUNC(sd_future*, sd_future_cancel_wait_unref); +DEFINE_POINTER_ARRAY_FREE_FUNC(sd_future*, sd_future_cancel_wait_unref); + +int sd_future_new(const sd_future_ops *ops, sd_future **ret) { + assert_return(ops, -EINVAL); + assert_return(ops->size >= endoffsetof_field(sd_future_ops, set_priority), -EINVAL); + assert_return(ops->alloc, -EINVAL); + assert_return(ops->free, -EINVAL); + assert_return(ret, -EINVAL); + + sd_future *f = new(sd_future, 1); + if (!f) + return -ENOMEM; + + *f = (sd_future) { + .n_ref = 1, + .state = SD_FUTURE_PENDING, + .ops = ops, + }; + + f->private = ops->alloc(); + if (!f->private) { + free(f); + return -ENOMEM; + } + + /* If we're being created on a fiber, default the callback to resuming that fiber on resolve — + * this is almost always what you want, and it saves the usual set_callback boilerplate before + * sd_fiber_suspend(). Callers that want different behavior can override with + * sd_future_set_callback(). */ + sd_future *fiber = sd_fiber_get_current(); + if (fiber) + (void) sd_future_set_callback(f, fiber_resume_trampoline, fiber); + + *ret = f; + return 0; +} + +int sd_future_state(sd_future *f) { + assert_return(f, -EINVAL); + return f->state; +} + +int sd_future_result(sd_future *f) { + assert_return(f, -EINVAL); + assert_return(f->state == SD_FUTURE_RESOLVED, -EBUSY); + return f->result; +} + +void* sd_future_get_userdata(sd_future *f) { + assert_return(f, NULL); + return f->userdata; +} + +void* sd_future_get_private(sd_future *f) { + assert_return(f, NULL); + return f->private; +} + +const sd_future_ops* sd_future_get_ops(sd_future *f) { + assert_return(f, NULL); + return f->ops; +} + +int sd_future_set_callback(sd_future *f, sd_future_func_t callback, void *userdata) { + assert_return(f, -EINVAL); + + f->callback = callback; + f->userdata = userdata; + return 0; +} + +int sd_future_set_priority(sd_future *f, int64_t priority) { + assert_return(f, -EINVAL); + assert_return(f->state == SD_FUTURE_PENDING, -ESTALE); + assert_return(f->ops->set_priority, -EOPNOTSUPP); + + return f->ops->set_priority(f, priority); +} + +int sd_future_cancel(sd_future *f) { + assert_return(f, -EINVAL); + assert_return(f->ops->cancel, -EOPNOTSUPP); + + if (f->state == SD_FUTURE_RESOLVED) + return 0; + + return f->ops->cancel(f); +} + +typedef struct WaitFuture { + sd_future *target; +} WaitFuture; + +static void* wait_future_alloc(void) { + return new0(WaitFuture, 1); +} + +static void wait_future_free(sd_future *f) { + WaitFuture *wf = sd_future_get_private(f); + + set_remove(wf->target->waiters, f); + sd_future_unref(wf->target); + free(wf); +} + +static int wait_future_cancel(sd_future *f) { + WaitFuture *wf = sd_future_get_private(ASSERT_PTR(f)); + + set_remove(wf->target->waiters, f); + return sd_future_resolve(f, -ECANCELED); +} + +static const sd_future_ops wait_future_ops = { + .size = sizeof(sd_future_ops), + .alloc = wait_future_alloc, + .free = wait_future_free, + .cancel = wait_future_cancel, +}; + +int sd_future_new_wait(sd_future *target, sd_future **ret) { + int r; + + assert_return(target, -EINVAL); + assert_return(ret, -EINVAL); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_future_new(&wait_future_ops, &f); + if (r < 0) + return r; + + WaitFuture *wf = sd_future_get_private(f); + wf->target = sd_future_ref(target); + + if (target->state == SD_FUTURE_RESOLVED) + r = sd_future_resolve(f, target->result); + else + r = set_ensure_put(&target->waiters, &trivial_hash_ops, f); + if (r < 0) + return r; + + *ret = TAKE_PTR(f); + return 0; +} diff --git a/src/libsystemd/sd-future/test-fiber.c b/src/libsystemd/sd-future/test-fiber.c new file mode 100644 index 0000000000000..130fddd282fe9 --- /dev/null +++ b/src/libsystemd/sd-future/test-fiber.c @@ -0,0 +1,1167 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#if HAVE_VALGRIND_VALGRIND_H +# include +#endif + +#include "sd-event.h" +#include "sd-future.h" + +#include "log-context.h" +#include "memory-util.h" +#include "pidref.h" +#include "process-util.h" +#include "tests.h" +#include "time-util.h" + +static int simple_fiber(void *userdata) { + int *value = ASSERT_PTR(userdata); + return *value; +} + +TEST(fiber_simple) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int value = 5; + ASSERT_OK(sd_fiber_new(e, "simple", simple_fiber, &value, NULL, &f)); + ASSERT_OK(sd_event_loop(e)); + ASSERT_EQ(sd_future_result(f), 5); +} + +/* Fiber that yields once */ +static int yielding_fiber(void *userdata) { + int *counter = userdata; + (*counter)++; + + sd_fiber_yield(); + + (*counter)++; + return 0; +} + +/* Test: Single fiber that yields */ +TEST(fiber_single_yield) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "yielding", yielding_fiber, &counter, NULL, &f)); + + /* First iteration: fiber runs until first yield */ + ASSERT_EQ(counter, 0); + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_EQ(counter, 1); + + /* Second iteration: fiber runs from yield to completion */ + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_EQ(counter, 2); + + /* No more fibers to run */ + ASSERT_OK_ZERO(sd_event_loop(e)); +} + +static int counting_fiber(void *userdata) { + int counter = 0; + + for (int i = 0; i < 5; i++) { + counter++; + sd_fiber_yield(); + } + + return counter; +} + +/* Test: Multiple fibers yielding cooperatively */ +TEST(fiber_multiple_yield) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[5] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + _cleanup_free_ char *name = NULL; + ASSERT_OK(asprintf(&name, "counting-%zu", i)); + ASSERT_OK(sd_fiber_new(e, name, counting_fiber, NULL, NULL, &fibers[i])); + } + + ASSERT_OK(sd_event_loop(e)); + + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_OK_EQ(sd_future_result(fibers[i]), 5); +} + +static int priority_fiber(void *userdata) { + int *counter = ASSERT_PTR(userdata); + + (*counter)++; + sd_fiber_yield(); + + return *counter; +} + +/* Test: Priority-based scheduling */ +TEST(fiber_priority_ascending) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[5] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + int counter = 0; + + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + _cleanup_free_ char *name = NULL; + ASSERT_OK(asprintf(&name, "priority-%zu", i)); + ASSERT_OK(sd_fiber_new(e, name, priority_fiber, &counter, NULL, &fibers[i])); + ASSERT_OK(sd_future_set_priority(fibers[i], i)); + } + + ASSERT_OK(sd_event_loop(e)); + + /* The fibers have ascending priorities, so we the first one to run to completion, + * followed by the second one, etc. */ + + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_EQ(sd_future_result(fibers[i]), (int) i + 1); +} + +TEST(fiber_priority_identical) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[5] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + int counter = 0; + + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + _cleanup_free_ char *name = NULL; + ASSERT_OK(asprintf(&name, "priority-%zu", i)); + ASSERT_OK(sd_fiber_new(e, name, priority_fiber, &counter, NULL, &fibers[i])); + } + + ASSERT_OK(sd_event_loop(e)); + + /* The fibers have the same priorities, so we expect all of them to run once first, and then they'll + * all run again another time, so they should all return the same value. */ + + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_EQ(sd_future_result(fibers[i]), (int) 5); +} + +static int error_fiber(void *userdata) { + return -ENOENT; +} + +TEST(fiber_error_return) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "error", error_fiber, NULL, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_EQ(sd_future_result(f), -ENOENT); +} + +static int cancel_fiber(void *userdata) { + return sd_fiber_yield(); +} + +TEST(fiber_cancel_basic) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int value = 42; + ASSERT_OK(sd_fiber_new(e, "cancel", cancel_fiber, &value, NULL, &f)); + + ASSERT_OK(sd_future_cancel(f)); + ASSERT_OK(sd_event_loop(e)); + ASSERT_ERROR(sd_future_result(f), ECANCELED); +} + +static int fiber_that_yields(void *userdata) { + int *yield_count = userdata; + int r; + + for (int i = 0; i < 5; i++) { + (*yield_count)++; + r = sd_fiber_yield(); + if (r < 0) + return r; /* Propagate cancellation error */ + } + + return 0; +} + +/* Test: fiber_yield() returns error when fiber is cancelled externally */ +TEST(fiber_cancel_propagation_via_yield) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int yield_count = 0; + ASSERT_OK(sd_fiber_new(e, "yielding", fiber_that_yields, &yield_count, NULL, &f)); + + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_EQ(yield_count, 1); + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_EQ(yield_count, 2); + + ASSERT_OK(sd_future_cancel(f)); + + ASSERT_OK(sd_event_loop(e)); + + /* sd_fiber should have been cancelled */ + ASSERT_ERROR(sd_future_result(f), ECANCELED); + ASSERT_EQ(yield_count, 2); +} + +/* Test: Cancel a fiber that has already completed */ +TEST(fiber_cancel_completed) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int value = 42; + ASSERT_OK(sd_fiber_new(e, "simple", simple_fiber, &value, NULL, &f)); + + /* Run the fiber to completion */ + ASSERT_OK(sd_event_loop(e)); + + /* Canceling a completed fiber should be a no-op */ + ASSERT_OK(sd_future_cancel(f)); + ASSERT_EQ(sd_future_result(f), 42); +} + +static int multiple_yield_fiber(void *userdata) { + int *counter = userdata; + int r; + + for (int i = 0; i < 3; i++) { + (*counter)++; + r = sd_fiber_yield(); + if (r < 0) + return r; + } + + return 0; +} + +/* Test: Cancel one fiber among multiple */ +TEST(fiber_cancel_one_of_many) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[3] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + int counters[3] = {0, 0, 0}; + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_OK(sd_fiber_new(e, "multiple-yield", multiple_yield_fiber, &counters[i], NULL, &fibers[i])); + + /* Run one iteration - all fibers yield after incrementing once */ + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_EQ(counters[0], 1); + ASSERT_EQ(counters[1], 1); + ASSERT_EQ(counters[2], 1); + + /* Cancel the second fiber */ + ASSERT_OK(sd_future_cancel(fibers[1])); + + /* Run to completion */ + ASSERT_OK(sd_event_loop(e)); + + /* First and third fibers should complete normally */ + ASSERT_EQ(counters[0], 3); + ASSERT_EQ(counters[2], 3); + ASSERT_EQ(sd_future_result(fibers[0]), 0); + ASSERT_EQ(sd_future_result(fibers[2]), 0); + + /* Second fiber should be canceled with counter at 1 */ + ASSERT_EQ(counters[1], 1); + ASSERT_EQ(sd_future_result(fibers[1]), -ECANCELED); +} + +/* Test: sd_fiber_await() - wait for a fiber to complete */ +static int slow_fiber(void *userdata) { + int *counter = userdata; + + for (int i = 0; i < 3; i++) { + (*counter)++; + sd_fiber_yield(); + } + + return 42; +} + +static int waiting_fiber(void *userdata) { + sd_future *target = userdata; + int r; + + r = sd_fiber_await(target); + if (r < 0) + return r; + + r = sd_future_result(target); + return r == 42 ? 0 : -EIO; +} + +TEST(fiber_wait_for_basic) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create target fiber with lower priority (runs second) */ + _cleanup_(sd_future_unrefp) sd_future *target = NULL, *waiter = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "slow", slow_fiber, &counter, NULL, &target)); + ASSERT_OK(sd_future_set_priority(target, 1)); + + /* Create waiter fiber with higher priority (runs first) */ + ASSERT_OK(sd_fiber_new(e, "waiting", waiting_fiber, target, NULL, &waiter)); + ASSERT_OK(sd_future_set_priority(waiter, 0)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK(sd_future_result(waiter)); + ASSERT_OK_EQ(sd_future_result(target), 42); + ASSERT_EQ(counter, 3); +} + +/* Test: wait for already completed fiber */ +static int wait_for_completed_fiber(void *userdata) { + sd_future *target = userdata; + int r; + + r = sd_fiber_await(target); + if (r < 0) + return r; + + return sd_future_result(target); +} + +TEST(fiber_wait_for_completed) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *target = NULL, *waiter = NULL; + int value = 100; + + /* Create target fiber with higher priority (runs first) */ + ASSERT_OK(sd_fiber_new(e, "simple", simple_fiber, &value, NULL, &target)); + ASSERT_OK(sd_future_set_priority(target, 0)); + /* Create waiter fiber with lower priority (runs second, after target completes) */ + ASSERT_OK(sd_fiber_new(e, "wait-for-completed", wait_for_completed_fiber, target, NULL, &waiter)); + ASSERT_OK(sd_future_set_priority(waiter, 1)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK_EQ(sd_future_result(waiter), 100); + ASSERT_OK_EQ(sd_future_result(target), 100); +} + +/* Test: awaiting an already-resolved future returns the future's result directly */ +static int await_resolved_fiber(void *userdata) { + sd_future *target = userdata; + + ASSERT_EQ((int) sd_future_state(target), (int) SD_FUTURE_RESOLVED); + ASSERT_OK_EQ(sd_fiber_await(target), 77); + return 0; +} + +TEST(fiber_await_resolved_returns_result) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *target = NULL, *waiter = NULL; + int value = 77; + + /* Higher-priority target runs to completion before the waiter starts. */ + ASSERT_OK(sd_fiber_new(e, "target", simple_fiber, &value, NULL, &target)); + ASSERT_OK(sd_future_set_priority(target, 0)); + ASSERT_OK(sd_fiber_new(e, "await-resolved", await_resolved_fiber, target, NULL, &waiter)); + ASSERT_OK(sd_future_set_priority(waiter, 1)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK(sd_future_result(waiter)); + ASSERT_OK_EQ(sd_future_result(target), 77); +} + +/* Test: wait for cancelled fiber */ +static int wait_for_cancelled_fiber(void *userdata) { + sd_future *target = userdata; + int r; + + r = sd_fiber_await(target); + if (r < 0) + return r; + + return sd_future_result(target); +} + +TEST(fiber_wait_for_cancelled) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *target = NULL, *waiter = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "yielding", fiber_that_yields, &counter, NULL, &target)); + ASSERT_OK(sd_fiber_new(e, "wait-for-cancelled", wait_for_cancelled_fiber, target, NULL, &waiter)); + + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + + ASSERT_OK(sd_future_cancel(target)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_ERROR(sd_future_result(waiter), ECANCELED); + ASSERT_ERROR(sd_future_result(target), ECANCELED); +} + +/* Test: multiple fibers waiting for the same target */ +static int multi_waiter_fiber(void *userdata) { + sd_future *target = userdata; + int r; + + r = sd_fiber_await(target); + if (r < 0) + return r; + + return sd_future_result(target); +} + +TEST(fiber_wait_for_multiple_waiters) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *target = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "slow", slow_fiber, &counter, NULL, &target)); + + sd_future *waiters[3] = {}; + CLEANUP_ELEMENTS(waiters, sd_future_unref_array_clear); + for (size_t i = 0; i < ELEMENTSOF(waiters); i++) + ASSERT_OK(sd_fiber_new(e, "multi-waiter", multi_waiter_fiber, target, NULL, &waiters[i])); + + ASSERT_OK(sd_event_loop(e)); + + for (size_t i = 0; i < ELEMENTSOF(waiters); i++) + ASSERT_OK_EQ(sd_future_result(waiters[i]), 42); + + ASSERT_OK_EQ(sd_future_result(target), 42); + ASSERT_EQ(counter, 3); +} + +/* Test: chain of waiting fibers */ +static int chain_waiter_fiber(void *userdata) { + sd_future *target = userdata; + int r; + + r = sd_fiber_await(target); + if (r < 0) + return r; + + r = sd_future_result(target); + return r + 1; +} + +TEST(fiber_wait_for_chain) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[5] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + int value = 10; + + ASSERT_OK(sd_fiber_new(e, "simple", simple_fiber, &value, NULL, &fibers[0])); + + /* Each subsequent fiber waits for the previous and adds 1 */ + for (size_t i = 1; i < ELEMENTSOF(fibers); i++) + ASSERT_OK(sd_fiber_new(e, "chain-waiter", chain_waiter_fiber, fibers[i - 1], NULL, &fibers[i])); + + ASSERT_OK(sd_event_loop(e)); + + /* Check results: 10, 11, 12, 13, 14 */ + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_OK_EQ(sd_future_result(fibers[i]), 10 + (int) i); +} + +static int nested_run_inner_fiber(void *userdata) { + int *counter = ASSERT_PTR(userdata); + + (*counter)++; + int r = sd_fiber_yield(); + if (r < 0) + return r; + (*counter)++; + + return 0; +} + +static int nested_run_outer_fiber(void *userdata) { + int *counter = ASSERT_PTR(userdata); + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + _cleanup_(sd_future_unrefp) sd_future *nested = NULL; + int r; + + /* Yield once before the nested loop: this forces the outer fiber to later resume through its own + * siglongjmp back to its resume_context after the inner fiber_run() has executed, which is + * exactly the path that breaks when the resume context is stored thread-globally instead of + * per-fiber. */ + r = sd_fiber_yield(); + if (r < 0) + return r; + + r = sd_event_new(&inner); + if (r < 0) + return r; + + r = sd_event_set_exit_on_idle(inner, true); + if (r < 0) + return r; + + /* Spawn a fiber on the inner event loop. Driving it via sd_event_loop(inner) causes fiber_run() to + * be invoked while we are already executing inside fiber_run() for the outer fiber. */ + r = sd_fiber_new(inner, "inner", nested_run_inner_fiber, counter, NULL, &nested); + if (r < 0) + return r; + + r = sd_event_loop(inner); + if (r < 0) + return r; + + r = sd_future_result(nested); + if (r < 0) + return r; + + /* Yield again after the inner loop has returned. If the outer fiber's resume context was clobbered + * by the nested fiber_run(), the siglongjmp underneath this yield would jump into an already + * unwound stack frame. */ + return sd_fiber_yield(); +} + +TEST(fiber_nested_run) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *outer = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "outer", nested_run_outer_fiber, &counter, NULL, &outer)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(outer)); + + /* The inner fiber incremented the counter once before yielding and once after resuming. */ + ASSERT_EQ(counter, 2); +} + +static int nested_current_check_inner_fiber(void *userdata) { + sd_future **slots = ASSERT_PTR(userdata); + + slots[1] = sd_fiber_get_current(); + int r = sd_fiber_yield(); + if (r < 0) + return r; + /* After resuming, the current fiber must still be us, not the outer fiber that was current when + * fiber_run() re-entered. */ + if (sd_fiber_get_current() != slots[1]) + return -EBADF; + + return 0; +} + +static int nested_current_check_outer_fiber(void *userdata) { + sd_future **slots = ASSERT_PTR(userdata); + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + _cleanup_(sd_future_unrefp) sd_future *nested = NULL; + int r; + + slots[0] = sd_fiber_get_current(); + + r = sd_event_new(&inner); + if (r < 0) + return r; + + r = sd_event_set_exit_on_idle(inner, true); + if (r < 0) + return r; + + r = sd_fiber_new(inner, "inner", nested_current_check_inner_fiber, slots, NULL, &nested); + if (r < 0) + return r; + + r = sd_event_loop(inner); + if (r < 0) + return r; + + r = sd_future_result(nested); + if (r < 0) + return r; + + /* After the nested fiber_run() has returned, the current fiber must have been restored to the + * outer fiber rather than left as NULL or pointing at the (now freed) inner fiber. */ + if (sd_fiber_get_current() != slots[0]) + return -EBADF; + + return 0; +} + +TEST(fiber_nested_run_current_restored) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *slots[2] = {}; + _cleanup_(sd_future_unrefp) sd_future *outer = NULL; + ASSERT_OK(sd_fiber_new(e, "outer", nested_current_check_outer_fiber, slots, NULL, &outer)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(outer)); + + ASSERT_NOT_NULL(slots[0]); + ASSERT_NOT_NULL(slots[1]); + ASSERT_TRUE(slots[0] != slots[1]); +} + +static int nested_cancellation_fiber(void *userdata) { + int *counter = ASSERT_PTR(userdata); + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *nested = NULL; + int r; + + if (*counter >= 5) + return sd_fiber_sleep(10 * USEC_PER_SEC); + + (*counter)++; + + _cleanup_free_ char *name = NULL; + if (asprintf(&name, "nested-cancellation-%i", *counter) < 0) + return -ENOMEM; + + /* Create a nested fiber within this fiber */ + r = sd_fiber_new(sd_fiber_get_event(), name, nested_cancellation_fiber, counter, NULL, &nested); + if (r < 0) + return r; + + /* Wait for the nested fiber to complete */ + r = sd_fiber_await(nested); + if (r < 0) + return r; + + /* If we got here without cancellation, verify the nested fiber completed */ + return sd_future_result(nested); +} + +static int exit_loop_fiber(void *userdata) { + /* Just exit the event loop, causing the outer fiber to be cancelled */ + return sd_event_exit(sd_fiber_get_event(), 0); +} + +TEST(fiber_nested_cancellation) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + + int counter = 0; + + /* Create outer fiber with higher priority (runs first) */ + _cleanup_(sd_future_unrefp) sd_future *outer = NULL; + ASSERT_OK(sd_fiber_new(e, "outer", nested_cancellation_fiber, &counter, NULL, &outer)); + + /* Create exit fiber with lower priority (runs after all nested fibers have suspended) */ + _cleanup_(sd_future_unrefp) sd_future *exit_fiber = NULL; + ASSERT_OK(sd_fiber_new(e, "exit-loop", exit_loop_fiber, NULL, NULL, &exit_fiber)); + ASSERT_OK(sd_future_set_priority(exit_fiber, 1)); + + /* Run the event loop - the exit fiber should cause it to exit, + * which should cancel the outer fiber, which should cancel the nested fiber, and so forth. */ + ASSERT_OK(sd_event_loop(e)); + + /* The exit fiber should have completed successfully */ + ASSERT_OK(sd_future_result(exit_fiber)); + + /* The outer fiber should have been cancelled */ + ASSERT_ERROR(sd_future_result(outer), ECANCELED); + + /* The nested fiber was created and incremented counter once before being cancelled */ + ASSERT_GT(counter, 0); +} + +static int nested_fiber_cleanup_nested_fiber(void *userdata) { + int *counter = ASSERT_PTR(userdata); + int r; + + r = sd_fiber_sleep(10 * USEC_PER_SEC); + if (r == -ECANCELED) + (*counter)++; + else if (r < 0) + return r; + + return 0; +} + +static int nested_fiber_cleanup_fiber(void *userdata) { + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *nested = NULL; + int r; + + /* Create a nested fiber within this fiber. */ + r = sd_fiber_new(sd_fiber_get_event(), "nested", nested_fiber_cleanup_nested_fiber, userdata, NULL, &nested); + if (r < 0) + return r; + + /* Yield and then exit, the nested fiber should be cancelled. */ + return sd_fiber_yield(); +} + +TEST(nested_fiber_cleanup) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *outer = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "outer", nested_fiber_cleanup_fiber, &counter, NULL, &outer)); + + ASSERT_OK(sd_event_loop(e)); + + /* The outer fiber should have finished normally */ + ASSERT_OK(sd_future_result(outer)); + + /* The nested fiber was created and incremented its counter once when it was cancelled. */ + ASSERT_GT(counter, 0); +} + +static int priority_check_fiber(void *userdata) { + int64_t *ret = ASSERT_PTR(userdata); + + /* Verify that sd_fiber_get_priority() returns the value set via sd_future_set_priority() */ + ASSERT_OK(sd_fiber_get_priority(ret)); + + /* Exercise sd_fiber_sleep() which internally creates a time future. This verifies that the priority + * is correctly propagated to the time event source (via f->time.source, not f->io.source). */ + return sd_fiber_sleep(1); +} + +TEST(fiber_priority_get) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + int64_t got_priority = 0; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "priority-check", priority_check_fiber, &got_priority, NULL, &f)); + ASSERT_OK(sd_future_set_priority(f, 10)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); + + /* Verify priority was stored and retrievable */ + ASSERT_EQ(got_priority, 10); +} + +static int floating_fiber(void *userdata) { + int *counter = ASSERT_PTR(userdata); + + (*counter)++; + int r = sd_fiber_yield(); + if (r < 0) + return r; + (*counter)++; + + return 0; +} + +TEST(fiber_floating) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "floating", floating_fiber, &counter, NULL, &f)); + + ASSERT_OK_ZERO(sd_fiber_get_floating(f)); + ASSERT_OK(sd_fiber_set_floating(f, true)); + ASSERT_OK_POSITIVE(sd_fiber_get_floating(f)); + + /* Drop our handle: the floating ref keeps the future alive until the fiber resolves, after + * which the self-unref frees it. If this didn't work we'd either leak (visible under ASan) or + * trip fiber_free()'s "state == COMPLETED" assertion. */ + f = sd_future_unref(f); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_EQ(counter, 2); +} + +static int drop_extra_ref(sd_future *f) { + /* Drop an extra ref the test installed before the callback fires. After this returns, the + * floating self-ref is the only thing keeping the future alive — exercising the path where + * the floating unref in fiber_run() is the last unref. */ + sd_future_unref(f); + return 0; +} + +TEST(fiber_floating_callback_drops_ref) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *f = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "floating-cb", floating_fiber, &counter, NULL, &f)); + + ASSERT_OK(sd_fiber_set_floating(f, true)); + + /* Bump the ref for the callback to drop, then install the callback. */ + sd_future_ref(f); + ASSERT_OK(sd_future_set_callback(f, drop_extra_ref, NULL)); + + /* Drop our handle. Refs remaining: floating self-ref + the extra ref the callback will drop. */ + f = sd_future_unref(f); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_EQ(counter, 2); +} + +TEST(fiber_floating_toggle) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int counter = 0; + ASSERT_OK(sd_fiber_new(e, "floating-toggle", floating_fiber, &counter, NULL, &f)); + + /* Toggling floating on and off again should leave the refcount unchanged: set_floating(true) + * takes a ref and set_floating(false) drops it. If the accounting were off, the subsequent + * event loop would either free the future while the fiber still runs (fiber_free assertion) + * or leak it. */ + ASSERT_OK(sd_fiber_set_floating(f, true)); + ASSERT_OK(sd_fiber_set_floating(f, false)); + ASSERT_OK_ZERO(sd_fiber_get_floating(f)); + + /* Setting floating to the same value twice should be a no-op. */ + ASSERT_OK(sd_fiber_set_floating(f, false)); + ASSERT_OK(sd_fiber_set_floating(f, true)); + ASSERT_OK(sd_fiber_set_floating(f, true)); + + /* Drop our handle; the still-floating ref drives cleanup. */ + f = sd_future_unref(f); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_EQ(counter, 2); +} + +/* Test: SD_FIBER_TIMEOUT scope expires while the fiber is suspended with no other wakeup source. */ +static int timeout_suspend_fiber(void *userdata) { + SD_FIBER_TIMEOUT(50 * USEC_PER_MSEC); + + /* Plain suspend with no other future to wake us — only the deadline timer can resume. */ + return sd_fiber_suspend(); +} + +TEST(fiber_timeout_suspend_expires) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "timeout-suspend", timeout_suspend_fiber, NULL, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_ERROR(sd_future_result(f), ETIME); +} + +/* Test: SD_FIBER_TIMEOUT scope around a sleep that finishes before the deadline expires; the + * cleanup must cancel the timer cleanly without leaving a stale wakeup. */ +static int timeout_in_time_fiber(void *userdata) { + SD_FIBER_TIMEOUT(1 * USEC_PER_SEC); + return sd_fiber_sleep(10 * USEC_PER_MSEC); +} + +TEST(fiber_timeout_sleep_in_time) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "in-time", timeout_in_time_fiber, NULL, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK_ZERO(sd_future_result(f)); +} + +/* Test: SD_FIBER_TIMEOUT(USEC_INFINITY) is a no-op — no timer is created and the fiber completes + * normally. */ +static int timeout_infinite_fiber(void *userdata) { + SD_FIBER_TIMEOUT(USEC_INFINITY); + return sd_fiber_sleep(10 * USEC_PER_MSEC); +} + +TEST(fiber_timeout_infinite_no_op) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "infinite", timeout_infinite_fiber, NULL, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK_ZERO(sd_future_result(f)); +} + +/* Test: SD_FIBER_WITH_TIMEOUT block form returns -ETIME from the suspend inside it. */ +static int with_timeout_block_fiber(void *userdata) { + int r = 0; + SD_FIBER_WITH_TIMEOUT(50 * USEC_PER_MSEC) + r = sd_fiber_suspend(); + return r; +} + +TEST(fiber_with_timeout_block) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "with-timeout", with_timeout_block_fiber, NULL, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_ERROR(sd_future_result(f), ETIME); +} + +/* Test: nested SD_FIBER_TIMEOUT — inner scope's timer fires first; once we're back in just the + * outer scope, suspending again must time out via the still-armed outer timer. */ +static int nested_timeout_fiber(void *userdata) { + int *fired = ASSERT_PTR(userdata); + + SD_FIBER_TIMEOUT(50 * USEC_PER_MSEC); /* outer */ + + SD_FIBER_WITH_TIMEOUT(20 * USEC_PER_MSEC) { /* inner — expires first */ + int r = sd_fiber_suspend(); + if (r != -ETIME) + return -ENOTRECOVERABLE; + (*fired)++; + } + + /* Inner scope is gone, but the outer timer is still armed (it only used ~20ms of its + * 100ms budget). Suspending again must eventually wake us with -ETIME. */ + int r = sd_fiber_suspend(); + if (r != -ETIME) + return -ENOTRECOVERABLE; + (*fired)++; + + return 0; +} + +TEST(fiber_timeout_nested) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + int fired = 0; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "nested-timeout", nested_timeout_fiber, &fired, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK_ZERO(sd_future_result(f)); + ASSERT_EQ(fired, 2); +} + +/* Test: signal mask is per-thread, not per-fiber. Changes one fiber makes via pthread_sigmask + * must be visible to other fibers on the same thread, both while the modifying fiber is + * suspended and after it resumes. The fiber switch (sigsetjmp/siglongjmp with savesigs=0) + * deliberately doesn't save or restore the mask. */ +static int sigmask_peer_fiber(void *userdata) { + sigset_t set, current; + + /* The waiter blocked SIGUSR1 before await'ing us; the per-thread mask should still + * have it blocked here. */ + ASSERT_OK_ZERO(-pthread_sigmask(SIG_SETMASK, NULL, ¤t)); + ASSERT_TRUE(sigismember(¤t, SIGUSR1)); + + ASSERT_OK(sigemptyset(&set)); + ASSERT_OK(sigaddset(&set, SIGUSR1)); + ASSERT_OK_ZERO(-pthread_sigmask(SIG_UNBLOCK, &set, NULL)); + + return 0; +} + +static int sigmask_waiter_fiber(void *userdata) { + sd_future *peer = ASSERT_PTR(userdata); + sigset_t set, current; + + ASSERT_OK(sigemptyset(&set)); + ASSERT_OK(sigaddset(&set, SIGUSR1)); + ASSERT_OK_ZERO(-pthread_sigmask(SIG_BLOCK, &set, NULL)); + + ASSERT_OK_ZERO(-pthread_sigmask(SIG_SETMASK, NULL, ¤t)); + ASSERT_TRUE(sigismember(¤t, SIGUSR1)); + + int r = sd_fiber_await(peer); + if (r < 0) + return r; + + /* The peer unblocked SIGUSR1 while we were suspended. The change is per-thread, so + * we must observe it here. */ + ASSERT_OK_ZERO(-pthread_sigmask(SIG_SETMASK, NULL, ¤t)); + ASSERT_FALSE(sigismember(¤t, SIGUSR1)); + + return 0; +} + +TEST(fiber_signal_mask_is_per_thread) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sigset_t saved; + ASSERT_OK_ZERO(-pthread_sigmask(SIG_SETMASK, NULL, &saved)); + + _cleanup_(sd_future_unrefp) sd_future *waiter = NULL, *peer = NULL; + ASSERT_OK(sd_fiber_new(e, "sigmask-peer", sigmask_peer_fiber, NULL, NULL, &peer)); + ASSERT_OK(sd_future_set_priority(peer, 1)); + ASSERT_OK(sd_fiber_new(e, "sigmask-waiter", sigmask_waiter_fiber, peer, NULL, &waiter)); + ASSERT_OK(sd_future_set_priority(waiter, 0)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(waiter)); + ASSERT_OK(sd_future_result(peer)); + + ASSERT_OK_ZERO(-pthread_sigmask(SIG_SETMASK, &saved, NULL)); +} + +/* Test: log context is per-fiber. fiber_run() swaps the thread-local log context (and prefix) with + * a per-fiber stash on entry and exit, so fields pushed by one fiber must not leak into another + * fiber that runs while the first is suspended, and must be restored when the first resumes. */ +static int log_context_peer_fiber(void *userdata) { + size_t *peer_observed = ASSERT_PTR(userdata); + + /* The waiter pushed a field before await'ing us. If log context were shared across fibers, + * we would observe it here. Record what we see and let the caller verify. */ + *peer_observed = log_context_num_fields(); + + return 0; +} + +static int log_context_waiter_fiber(void *userdata) { + sd_future *peer = ASSERT_PTR(userdata); + + size_t before_push = log_context_num_fields(); + + LOG_CONTEXT_PUSH("WAITER=here"); + size_t after_push = log_context_num_fields(); + if (after_push != before_push + 1) + return -EBADF; + + int r = sd_fiber_await(peer); + if (r < 0) + return r; + + /* Our pushed field must be visible again after the peer ran and resumed us. */ + if (log_context_num_fields() != after_push) + return -EBADF; + + return 0; +} + +TEST(fiber_log_context_per_fiber) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + size_t baseline = log_context_num_fields(); + + size_t peer_observed = 0; + _cleanup_(sd_future_unrefp) sd_future *waiter = NULL, *peer = NULL; + ASSERT_OK(sd_fiber_new(e, "log-peer", log_context_peer_fiber, &peer_observed, NULL, &peer)); + ASSERT_OK(sd_future_set_priority(peer, 1)); + ASSERT_OK(sd_fiber_new(e, "log-waiter", log_context_waiter_fiber, peer, NULL, &waiter)); + ASSERT_OK(sd_future_set_priority(waiter, 0)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(waiter)); + ASSERT_OK(sd_future_result(peer)); + + /* Inside the peer, only the peer's own FIBER= field (pushed by fiber_run) should have been + * active — the waiter's WAITER= push must have been swapped out. */ + ASSERT_EQ(peer_observed, baseline + 1); + + /* The thread-local log context should be exactly as it was before the test ran. */ + ASSERT_EQ(log_context_num_fields(), baseline); +} + +static int stack_overflow_fiber(void *userdata) { + volatile char anchor; + size_t pagesz = page_size(); + + /* Walk one page at a time below the fiber's current SP, writing one byte per page, + * until the kernel raises a fatal signal — either by hitting the guard page at the + * base of the fiber's mapping (downward stacks) or by walking off the end of the + * mapping into unmapped memory (upward stacks like hppa, where SP starts near the + * low end). The 64 MiB ceiling is purely a safety net so the test fails loudly + * instead of looping if the guard isn't there. */ + for (size_t i = 1; i < (64U * U64_MB) / pagesz; i++) { + volatile char *p = (volatile char *) ((uintptr_t) &anchor - i * pagesz); + *p = 0; + } + return 0; +} + +TEST(fiber_stack_guard) { +#if HAS_FEATURE_ADDRESS_SANITIZER + (void) log_tests_skipped("ASan intercepts deliberate stack OOB writes"); + return; +#endif +#if HAVE_VALGRIND_VALGRIND_H + if (RUNNING_ON_VALGRIND) { + (void) log_tests_skipped("Valgrind intercepts deliberate stack OOB writes"); + return; + } +#endif + + _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; + int r = pidref_safe_fork("(stack-overflow)", FORK_RESET_SIGNALS|FORK_LOG, &pidref); + ASSERT_OK(r); + + if (r == 0) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "overflow", stack_overflow_fiber, NULL, NULL, &f)); + (void) sd_event_loop(e); + _exit(EXIT_SUCCESS); /* unreachable if the guard fires */ + } + + siginfo_t si; + ASSERT_OK(pidref_wait_for_terminate(&pidref, &si)); + ASSERT_TRUE(IN_SET(si.si_code, CLD_KILLED, CLD_DUMPED)); + ASSERT_TRUE(IN_SET(si.si_status, SIGSEGV, SIGBUS)); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/systemd/_sd-common.h b/src/systemd/_sd-common.h index f9c9a2627d55c..8da080bf18e61 100644 --- a/src/systemd/_sd-common.h +++ b/src/systemd/_sd-common.h @@ -69,6 +69,20 @@ typedef void (*_sd_destroy_t)(void *userdata); # define _SD_STRINGIFY(x) _SD_XSTRINGIFY(x) #endif +/* Mirror of CONCATENATE / UNIQ from macro-fundamental.h, available to public sd-* headers. */ +#ifndef _SD_CONCATENATE +# define _SD_XCONCATENATE(x, y) x ## y +# define _SD_CONCATENATE(x, y) _SD_XCONCATENATE(x, y) +#endif + +#ifndef _SD_UNIQ +# ifdef __COUNTER__ +# define _SD_UNIQ __COUNTER__ +# else +# define _SD_UNIQ __LINE__ +# endif +#endif + #ifndef _SD_BEGIN_DECLARATIONS # ifdef __cplusplus # define _SD_BEGIN_DECLARATIONS \ diff --git a/src/systemd/meson.build b/src/systemd/meson.build index d7335cee558de..0d0e9ab68f174 100644 --- a/src/systemd/meson.build +++ b/src/systemd/meson.build @@ -37,6 +37,7 @@ _not_installed_headers = [ 'sd-dhcp6-option.h', 'sd-dhcp6-protocol.h', 'sd-dns-resolver.h', + 'sd-future.h', 'sd-ipv4acd.h', 'sd-ipv4ll.h', 'sd-lldp-rx.h', diff --git a/src/systemd/sd-future.h b/src/systemd/sd-future.h new file mode 100644 index 0000000000000..9d0d03acf48a7 --- /dev/null +++ b/src/systemd/sd-future.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#ifndef foosdfuturefoo +#define foosdfuturefoo + +/*** + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include "_sd-common.h" + +_SD_BEGIN_DECLARATIONS; + +typedef struct sd_event sd_event; +typedef struct sd_future sd_future; +typedef struct sd_future_ops sd_future_ops; +typedef int (*sd_future_func_t)(sd_future *f); +typedef int (*sd_fiber_func_t)(void *userdata); +typedef _sd_destroy_t sd_fiber_destroy_t; + +struct sd_future_ops { + size_t size; + void* (*alloc)(void); + void (*free)(sd_future *f); + int (*cancel)(sd_future *f); + int (*set_priority)(sd_future *f, int64_t priority); +}; + +__extension__ typedef enum _SD_ENUM_TYPE_S64(sd_future_state_t) { + SD_FUTURE_PENDING, + SD_FUTURE_RESOLVED, + _SD_ENUM_FORCE_S64(SD_FUTURE_STATE) +} sd_future_state_t; + +int sd_future_new(const sd_future_ops *ops, sd_future **ret); +int sd_future_cancel(sd_future *f); +int sd_future_resolve(sd_future *f, int result); + +_SD_DECLARE_TRIVIAL_REF_UNREF_FUNC(sd_future); +_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_future, sd_future_unref); +void sd_future_unref_array_clear(sd_future *array[], size_t n); +void sd_future_unref_array(sd_future *array[], size_t n); + +sd_future* sd_future_cancel_wait_unref(sd_future *f); +_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_future, sd_future_cancel_wait_unref); +void sd_future_cancel_wait_unref_array_clear(sd_future *array[], size_t n); +void sd_future_cancel_wait_unref_array(sd_future *array[], size_t n); + +int sd_future_state(sd_future *f); +int sd_future_result(sd_future *f); +void* sd_future_get_userdata(sd_future *f); +void* sd_future_get_private(sd_future *f); +const sd_future_ops* sd_future_get_ops(sd_future *f); + +int sd_future_set_callback(sd_future *f, sd_future_func_t callback, void *userdata); +int sd_future_set_priority(sd_future *f, int64_t priority); + +int sd_future_new_wait(sd_future *target, sd_future **ret); + +int sd_fiber_new(sd_event *e, const char *name, sd_fiber_func_t func, void *userdata, sd_fiber_destroy_t destroy, sd_future **ret); + +int sd_fiber_set_floating(sd_future *f, int b); +int sd_fiber_get_floating(sd_future *f); + +int sd_fiber_is_running(void); +sd_future* sd_fiber_get_current(void); +int sd_fiber_get_priority(int64_t *ret); +sd_event* sd_fiber_get_event(void); + +int sd_fiber_yield(void); +int sd_fiber_sleep(uint64_t usec); +int sd_fiber_await(sd_future *target); +int sd_fiber_suspend(void); +int sd_fiber_resume(sd_future *f, int result); + +sd_future* sd_fiber_timeout(uint64_t timeout); + +#define SD_FIBER_TIMEOUT(timeout) _SD_FIBER_TIMEOUT(_SD_UNIQ, (timeout)) +#define _SD_FIBER_TIMEOUT(uniq, timeout) \ + sd_future *_SD_CONCATENATE(_sd_fto_, uniq) __attribute__((cleanup(sd_future_cancel_wait_unrefp), unused)) = sd_fiber_timeout(timeout) + +#define SD_FIBER_WITH_TIMEOUT(timeout) _SD_FIBER_WITH_TIMEOUT(_SD_UNIQ, (timeout)) +#define _SD_FIBER_WITH_TIMEOUT(uniq, timeout) \ + for (sd_future *_SD_CONCATENATE(_sd_fto_, uniq) __attribute__((cleanup(sd_future_cancel_wait_unrefp), unused)) = sd_fiber_timeout(timeout), \ + *_SD_CONCATENATE(_sd_fto_b_, uniq) = (sd_future*) (uintptr_t) 1; \ + _SD_CONCATENATE(_sd_fto_b_, uniq); \ + _SD_CONCATENATE(_sd_fto_b_, uniq) = NULL) + +_SD_END_DECLARATIONS; + +#endif diff --git a/test/integration-tests/TEST-02-UNITTESTS/meson.build b/test/integration-tests/TEST-02-UNITTESTS/meson.build index 9be6f2a6d9532..b074903c00ac9 100644 --- a/test/integration-tests/TEST-02-UNITTESTS/meson.build +++ b/test/integration-tests/TEST-02-UNITTESTS/meson.build @@ -3,7 +3,7 @@ integration_tests += [ integration_test_template + { 'name' : fs.name(meson.current_source_dir()), - 'coredump-exclude-regex' : '/(bash|python3.[0-9]+|systemd-executor)$', + 'coredump-exclude-regex' : '/(bash|python3.[0-9]+|systemd-executor|test-fiber)$', 'cmdline' : integration_test_template['cmdline'] + [ ''' From 5264cb213b671dfabcfca3408f2216cfa4c6e8ab Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sat, 25 Apr 2026 22:31:58 +0200 Subject: [PATCH 236/242] sd-future: add fiber-aware non-blocking I/O wrappers Add a family of sd_fiber_*() I/O wrappers that, when called from a fiber, behave like blocking I/O from the caller's perspective but yield to the event loop instead of blocking the thread: sd_fiber_read / sd_fiber_write sd_fiber_readv / sd_fiber_writev sd_fiber_recv / sd_fiber_send sd_fiber_connect sd_fiber_recvmsg / sd_fiber_sendmsg sd_fiber_recvfrom / sd_fiber_sendto sd_fiber_accept sd_fiber_ppoll Most of them share a single helper, fiber_io_operation(), which when invoked outside a fiber falls through to the underlying syscall directly, preserving the regular blocking behaviour. Inside a fiber the helper flips the fd to non-blocking (restoring its original mode on return), tries the syscall once on the fast path, and on EAGAIN/ EWOULDBLOCK creates an sd-event-backed IO future via future_new_io(), suspends the fiber, and retries the syscall once the event source fires. future_new_io() itself is added to sd-event/event-future.{c,h} as a new IoFuture kind. It wraps sd_event_add_io() into an sd_future: oneshot enable, EPOLLERR translated via SO_ERROR (suppressed for non-sockets), and the fd duplicated with F_DUPFD_CLOEXEC to avoid EEXIST when multiple sources watch the same descriptor. Together these let fiber-using code write straight-line socket and pipe I/O without bundling state into callbacks. --- src/basic/io-util.c | 9 + src/basic/io-util.h | 6 + src/libsystemd/meson.build | 2 + src/libsystemd/sd-event/event-future.c | 118 ++ src/libsystemd/sd-event/event-future.h | 1 + src/libsystemd/sd-future/fiber-io.c | 459 +++++++ src/libsystemd/sd-future/test-fiber-io.c | 1388 ++++++++++++++++++++++ src/systemd/sd-future.h | 25 + 8 files changed, 2008 insertions(+) create mode 100644 src/libsystemd/sd-future/fiber-io.c create mode 100644 src/libsystemd/sd-future/test-fiber-io.c diff --git a/src/basic/io-util.c b/src/basic/io-util.c index 103aa2a7cde03..b4f643b5721aa 100644 --- a/src/basic/io-util.c +++ b/src/basic/io-util.c @@ -3,6 +3,7 @@ #include #include #include +#include /* IWYU pragma: keep */ #include #include @@ -10,6 +11,14 @@ #include "io-util.h" #include "time-util.h" +/* EPOLL_POLL_COMMON_MASK in io-util.h treats POLL* and EPOLL* as interchangeable; verify it. */ +assert_cc((uint32_t) POLLIN == EPOLLIN); +assert_cc((uint32_t) POLLOUT == EPOLLOUT); +assert_cc((uint32_t) POLLERR == EPOLLERR); +assert_cc((uint32_t) POLLHUP == EPOLLHUP); +assert_cc((uint32_t) POLLPRI == EPOLLPRI); +assert_cc((uint32_t) POLLRDHUP == EPOLLRDHUP); + int flush_fd(int fd) { int count = 0; diff --git a/src/basic/io-util.h b/src/basic/io-util.h index 918108c023b68..ab52dc8db6506 100644 --- a/src/basic/io-util.h +++ b/src/basic/io-util.h @@ -3,6 +3,12 @@ #include "basic-forward.h" +/* The intersection of poll() and epoll_wait() event masks. Linux defines POLL* and EPOLL* with the + * same numeric values for these — see the assert_cc()s in io-util.c — so this mask can be used + * interchangeably as a `revents` (poll) or `events` (epoll) bitset. */ +#define EPOLL_POLL_COMMON_MASK \ + (EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLPRI | EPOLLRDHUP) + int flush_fd(int fd); ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll); diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build index 9b50e7e79ba3f..bc3d54eb12f6f 100644 --- a/src/libsystemd/meson.build +++ b/src/libsystemd/meson.build @@ -77,6 +77,7 @@ sd_device_sources = files( ############################################################ sd_future_sources = files( + 'sd-future/fiber-io.c', 'sd-future/fiber.c', 'sd-future/sd-future.c', ) @@ -191,6 +192,7 @@ simple_tests += files( 'sd-device/test-device-util.c', 'sd-device/test-sd-device-monitor.c', 'sd-future/test-fiber.c', + 'sd-future/test-fiber-io.c', 'sd-hwdb/test-sd-hwdb.c', 'sd-id128/test-id128.c', 'sd-journal/test-audit-type.c', diff --git a/src/libsystemd/sd-event/event-future.c b/src/libsystemd/sd-event/event-future.c index e34dbe0c05252..2902c047c0f37 100644 --- a/src/libsystemd/sd-event/event-future.c +++ b/src/libsystemd/sd-event/event-future.c @@ -6,6 +6,124 @@ #include "alloc-util.h" #include "errno-util.h" #include "event-future.h" +#include "fd-util.h" + +typedef struct IoFuture { + sd_event_source *source; +} IoFuture; + +static void* io_future_alloc(void) { + return new0(IoFuture, 1); +} + +static void io_future_free(sd_future *f) { + IoFuture *iof = sd_future_get_private(f); + sd_event_source_unref(iof->source); + free(iof); +} + +static int io_future_cancel(sd_future *f) { + IoFuture *iof = sd_future_get_private(ASSERT_PTR(f)); + int r = 0; + + RET_GATHER(r, sd_event_source_set_enabled(iof->source, SD_EVENT_OFF)); + RET_GATHER(r, sd_future_resolve(f, -ECANCELED)); + return r; +} + +static int io_future_set_priority(sd_future *f, int64_t priority) { + IoFuture *iof = sd_future_get_private(ASSERT_PTR(f)); + return sd_event_source_set_priority(iof->source, priority); +} + +static const sd_future_ops io_future_ops = { + .size = sizeof(sd_future_ops), + .alloc = io_future_alloc, + .free = io_future_free, + .cancel = io_future_cancel, + .set_priority = io_future_set_priority, +}; + +static int io_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + sd_future *f = ASSERT_PTR(userdata); + + /* Resolve with the revents mask on success (matching io_uring poll_add's CQE convention) so + * callers can read it directly off the future result. EPOLLERR is the one exception: surface + * the actual socket error via SO_ERROR so callers like sd_fiber_connect() can return -errno + * directly without re-querying. */ + if (FLAGS_SET(revents, EPOLLERR)) { + int error = 0; + socklen_t len = sizeof(error); + + int r = RET_NERRNO(getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len)); + if (r == -ENOTSOCK) + return sd_future_resolve(f, (int) revents); + if (r >= 0 && error != 0) + return sd_future_resolve(f, -error); + if (r >= 0) + /* EPOLLERR was reported but SO_ERROR returned no pending error (e.g. + * already consumed elsewhere). Surface the revents mask so the caller + * still sees the error condition rather than mistaking it for success. */ + return sd_future_resolve(f, (int) revents); + /* On any other getsockopt() error fall through and resolve the future with that + * error so the waiting fiber wakes up rather than hanging forever. */ + return sd_future_resolve(f, r); + } + + return sd_future_resolve(f, (int) revents); +} + +int future_new_io(sd_event *e, int fd, uint32_t events, sd_future **ret) { + int r; + + assert(e); + assert(fd >= 0); + assert(ret); + + if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED)) + return -ECANCELED; + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_future_new(&io_future_ops, &f); + if (r < 0) + return r; + + IoFuture *iof = sd_future_get_private(f); + + /* Duplicate fd to avoid EEXIST from epoll when adding the same fd multiple times */ + _cleanup_close_ int fd_copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (fd_copy < 0) + return -errno; + + r = sd_event_add_io(e, &iof->source, fd_copy, events, io_handler, f); + if (r < 0) + return r; + + r = sd_event_source_set_io_fd_own(iof->source, true); + if (r < 0) + return r; + + TAKE_FD(fd_copy); + + r = sd_event_source_set_enabled(iof->source, SD_EVENT_ONESHOT); + if (r < 0) + return r; + + if (sd_fiber_is_running()) { + int64_t priority; + + r = sd_fiber_get_priority(&priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(iof->source, priority); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(f); + return 0; +} typedef struct TimeFuture { sd_event_source *source; diff --git a/src/libsystemd/sd-event/event-future.h b/src/libsystemd/sd-event/event-future.h index 7e956906ebf74..3bc275e7b7ac9 100644 --- a/src/libsystemd/sd-event/event-future.h +++ b/src/libsystemd/sd-event/event-future.h @@ -3,5 +3,6 @@ #include "sd-forward.h" +int future_new_io(sd_event *e, int fd, uint32_t events, sd_future **ret); int future_new_time(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret); int future_new_time_relative(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret); diff --git a/src/libsystemd/sd-future/fiber-io.c b/src/libsystemd/sd-future/fiber-io.c new file mode 100644 index 0000000000000..acee73f078bca --- /dev/null +++ b/src/libsystemd/sd-future/fiber-io.c @@ -0,0 +1,459 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include /* IWYU pragma: keep */ +#include +#include +#include +#include + +#include "sd-event.h" +#include "sd-future.h" + +#include "alloc-util.h" +#include "errno-util.h" +#include "event-future.h" +#include "fd-util.h" +#include "io-util.h" +#include "time-util.h" + +typedef ssize_t (*FiberIOFunc)(int fd, void *args); + +static ssize_t fiber_io_operation( + int fd, + uint32_t events, + FiberIOFunc func, + void *args) { + _cleanup_(nonblock_resetp) int reset_fd = -EBADF; + int r; + + assert(fd >= 0); + assert(func); + + if (!sd_fiber_is_running()) + return func(fd, args); + + sd_event *e = sd_fiber_get_event(); + assert(e); + + r = fd_nonblock(fd, true); + if (r < 0) + return r; + if (r > 0) + reset_fd = fd; + + ssize_t n = func(fd, args); + if (n >= 0 || !ERRNO_IS_NEG_TRANSIENT(n)) + return n; + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *io = NULL; + r = future_new_io(e, fd, events, &io); + if (r < 0) + return r; + + r = sd_fiber_suspend(); + if (r < 0) + return r; + + return func(fd, args); +} + +typedef struct ReadArgs { + void *buf; + size_t count; +} ReadArgs; + +static ssize_t read_callback(int fd, void *args) { + ReadArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = read(fd, a->buf, a->count); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_read(int fd, void *buf, size_t count) { + assert_return(fd >= 0, -EBADF); + assert_return(buf || count == 0, -EINVAL); + + return fiber_io_operation(fd, EPOLLIN, read_callback, &(ReadArgs) { + .buf = buf, + .count = count, + }); +} + +typedef struct WriteArgs { + const void *buf; + size_t count; +} WriteArgs; + +static ssize_t write_callback(int fd, void *args) { + WriteArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = write(fd, a->buf, a->count); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_write(int fd, const void *buf, size_t count) { + assert_return(fd >= 0, -EBADF); + assert_return(buf || count == 0, -EINVAL); + + return fiber_io_operation(fd, EPOLLOUT, write_callback, &(WriteArgs) { + .buf = buf, + .count = count, + }); +} + +typedef struct ReadvArgs { + const struct iovec *iov; + int iovcnt; +} ReadvArgs; + +static ssize_t readv_callback(int fd, void *args) { + ReadvArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = readv(fd, a->iov, a->iovcnt); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_readv(int fd, const struct iovec *iov, int iovcnt) { + assert_return(fd >= 0, -EBADF); + assert_return(iov || iovcnt == 0, -EINVAL); + + return fiber_io_operation(fd, EPOLLIN, readv_callback, &(ReadvArgs) { + .iov = iov, + .iovcnt = iovcnt, + }); +} + +typedef struct WritevArgs { + const struct iovec *iov; + int iovcnt; +} WritevArgs; + +static ssize_t writev_callback(int fd, void *args) { + WritevArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = writev(fd, a->iov, a->iovcnt); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_writev(int fd, const struct iovec *iov, int iovcnt) { + assert_return(fd >= 0, -EBADF); + assert_return(iov || iovcnt == 0, -EINVAL); + + return fiber_io_operation(fd, EPOLLOUT, writev_callback, &(WritevArgs) { + .iov = iov, + .iovcnt = iovcnt, + }); +} + +typedef struct RecvArgs { + void *buf; + size_t len; + int flags; +} RecvArgs; + +static ssize_t recv_callback(int fd, void *args) { + RecvArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = recv(fd, a->buf, a->len, a->flags); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_recv(int sockfd, void *buf, size_t len, int flags) { + assert_return(sockfd >= 0, -EBADF); + assert_return(buf || len == 0, -EINVAL); + + return fiber_io_operation(sockfd, EPOLLIN, recv_callback, &(RecvArgs) { + .buf = buf, + .len = len, + .flags = flags, + }); +} + +typedef struct SendArgs { + const void *buf; + size_t len; + int flags; +} SendArgs; + +static ssize_t send_callback(int fd, void *args) { + SendArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = send(fd, a->buf, a->len, a->flags); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_send(int sockfd, const void *buf, size_t len, int flags) { + assert_return(sockfd >= 0, -EBADF); + assert_return(buf || len == 0, -EINVAL); + + return fiber_io_operation(sockfd, EPOLLOUT, send_callback, &(SendArgs) { + .buf = buf, + .len = len, + .flags = flags, + }); +} + +int sd_fiber_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { + _cleanup_(nonblock_resetp) int reset_fd = -EBADF; + int r; + + assert_return(sockfd >= 0, -EBADF); + assert_return(addr, -EINVAL); + + if (!sd_fiber_is_running()) + return RET_NERRNO(connect(sockfd, addr, addrlen)); + + sd_event *e = sd_fiber_get_event(); + assert(e); + + r = fd_nonblock(sockfd, true); + if (r < 0) + return r; + if (r > 0) + reset_fd = sockfd; + + r = RET_NERRNO(connect(sockfd, addr, addrlen)); + if (r != -EINPROGRESS) + return r; + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *io = NULL; + r = future_new_io(e, sockfd, EPOLLOUT, &io); + if (r < 0) + return r; + + /* future_new_io resolves with the revents mask on success; translate any positive value + * (e.g. POLLOUT) back to the connect(2) success status. */ + r = sd_fiber_suspend(); + return r > 0 ? 0 : r; +} + +typedef struct RecvmsgArgs { + struct msghdr *msg; + int flags; +} RecvmsgArgs; + +static ssize_t recvmsg_callback(int fd, void *args) { + RecvmsgArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = recvmsg(fd, a->msg, a->flags); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_recvmsg(int sockfd, struct msghdr *msg, int flags) { + assert_return(sockfd >= 0, -EBADF); + assert_return(msg, -EINVAL); + + return fiber_io_operation(sockfd, EPOLLIN, recvmsg_callback, &(RecvmsgArgs) { + .msg = msg, + .flags = flags, + }); +} + +typedef struct SendmsgArgs { + const struct msghdr *msg; + int flags; +} SendmsgArgs; + +static ssize_t sendmsg_callback(int fd, void *args) { + SendmsgArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = sendmsg(fd, a->msg, a->flags); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_sendmsg(int sockfd, const struct msghdr *msg, int flags) { + assert_return(sockfd >= 0, -EBADF); + assert_return(msg, -EINVAL); + + return fiber_io_operation(sockfd, EPOLLOUT, sendmsg_callback, &(SendmsgArgs) { + .msg = msg, + .flags = flags, + }); +} + +static ssize_t recvfrom_callback(int fd, void *args) { + RecvmsgArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = recvmsg(fd, a->msg, a->flags); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_recvfrom(int sockfd, void *buf, size_t len, int flags, struct sockaddr *src_addr, socklen_t *addrlen) { + ssize_t n; + + assert_return(sockfd >= 0, -EBADF); + assert_return(buf || len == 0, -EINVAL); + assert_return(!src_addr || addrlen, -EINVAL); + + /* io_uring has no direct recvfrom prep helper, so emulate via recvmsg with a single-iovec + * msghdr. The kernel updates msg_namelen in place; we copy it back to *addrlen below. */ + struct iovec iov = { .iov_base = buf, .iov_len = len }; + struct msghdr msg = { + .msg_name = src_addr, + .msg_namelen = src_addr ? *addrlen : 0, + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + n = fiber_io_operation(sockfd, EPOLLIN, recvfrom_callback, &(RecvmsgArgs) { + .msg = &msg, + .flags = flags, + }); + if (n < 0) + return n; + + if (addrlen) + *addrlen = msg.msg_namelen; + + return n; +} + +static ssize_t sendto_callback(int fd, void *args) { + SendmsgArgs *a = ASSERT_PTR(args); + ssize_t n; + + n = sendmsg(fd, a->msg, a->flags); + return n >= 0 ? n : -errno; +} + +ssize_t sd_fiber_sendto(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen) { + assert_return(sockfd >= 0, -EBADF); + assert_return(buf || len == 0, -EINVAL); + + struct iovec iov = { .iov_base = (void *) buf, .iov_len = len }; + struct msghdr msg = { + .msg_name = (void *) dest_addr, + .msg_namelen = dest_addr ? addrlen : 0, + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + return fiber_io_operation(sockfd, EPOLLOUT, sendto_callback, &(SendmsgArgs) { + .msg = &msg, + .flags = flags, + }); +} + +typedef struct AcceptArgs { + struct sockaddr *addr; + socklen_t *addrlen; + int flags; +} AcceptArgs; + +static ssize_t accept_callback(int fd, void *args) { + AcceptArgs *a = ASSERT_PTR(args); + + return RET_NERRNO(accept4(fd, a->addr, a->addrlen, a->flags)); +} + +int sd_fiber_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) { + assert_return(sockfd >= 0, -EBADF); + + return fiber_io_operation(sockfd, EPOLLIN, accept_callback, &(AcceptArgs) { + .addr = addr, + .addrlen = addrlen, + .flags = flags, + }); +} + +int sd_fiber_ppoll(struct pollfd *fds, size_t n_fds, const struct timespec *timeout, const sigset_t *sigmask) { + int r; + + assert_return(fds || n_fds == 0, -EINVAL); + + if (!sd_fiber_is_running()) + return RET_NERRNO(ppoll(fds, n_fds, timeout, sigmask)); + + /* When on a fiber signals are handled via sd-event hence we should never mess around with the + * signal mask when running on a fiber. */ + assert_return(!sigmask, -EOPNOTSUPP); + + sd_event *e = sd_fiber_get_event(); + assert(e); + + /* No fds to wait on and no timeout means there's nothing that could ever wake the fiber up, + * since unlike raw ppoll() we cannot use signal delivery as a wakeup. Signals received while + * the fiber is suspended are handled by sd-event via signalfd, in which case the signal handler + * is expected to cancel the fiber via sd_future_cancel() if a wakeup is desired. */ + if (n_fds == 0 && !timeout) + return -EINVAL; + + bool zero_timeout = timeout && timeout->tv_sec == 0 && timeout->tv_nsec == 0; + + /* Try polling with zero timeout first to see if any are immediately ready. */ + r = RET_NERRNO(ppoll(fds, n_fds, &(const struct timespec) {}, /* sigmask= */ NULL)); + if (zero_timeout || r != 0) /* Either error or some fds are ready */ + return r; + + sd_future **futures = NULL; + CLEANUP_ARRAY(futures, n_fds, sd_future_cancel_wait_unref_array); + + futures = new0(sd_future*, n_fds); + if (!futures) + return -ENOMEM; + + /* Set up I/O event sources for all valid fds. POLL* and EPOLL* share their bit values (see + * EPOLL_POLL_COMMON_MASK in io-util.h), so we can pass the user-supplied event mask through + * to either backend without translation. */ + size_t n_io_futures = 0; + for (size_t i = 0; i < n_fds; i++) { + if (fds[i].fd < 0) + continue; + + uint32_t events = fds[i].events & EPOLL_POLL_COMMON_MASK; + if (events == 0) + continue; + + r = future_new_io(e, fds[i].fd, events, &futures[i]); + if (r < 0) + return r; + + n_io_futures++; + } + + /* If every fd was skipped (negative or empty event mask) and no timeout was given, there's + * nothing that could ever wake the fiber up — same situation as n_fds == 0 && !timeout, just + * not detectable upfront. Refuse rather than suspend forever. */ + if (n_io_futures == 0 && !timeout) + return -EINVAL; + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *timer = NULL; + if (timeout) { + r = future_new_time_relative( + e, + CLOCK_MONOTONIC, + timespec_load(timeout), + /* accuracy= */ 1, + /* result= */ 0, + &timer); + if (r < 0) + return r; + } + + r = sd_fiber_suspend(); + if (r < 0 && r != -ETIME) + return r; + + /* Always sweep fds with a non-blocking ppoll(): the timer and an fd readiness can resolve in + * the same event-loop tick (or the fd can become ready between the timer firing and us being + * scheduled), and ppoll() semantics give events precedence over the timeout in that case. */ + int n = RET_NERRNO(ppoll(fds, n_fds, &(const struct timespec) {}, /* sigmask= */ NULL)); + if (n != 0) + return n; + + /* No fds ready: distinguish our own timer from an external -ETIME. */ + if (timer && sd_future_state(timer) == SD_FUTURE_RESOLVED) + return 0; + + return r; +} diff --git a/src/libsystemd/sd-future/test-fiber-io.c b/src/libsystemd/sd-future/test-fiber-io.c new file mode 100644 index 0000000000000..aef38950b6332 --- /dev/null +++ b/src/libsystemd/sd-future/test-fiber-io.c @@ -0,0 +1,1388 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "sd-event.h" +#include "sd-future.h" + +#include "fd-util.h" +#include "tests.h" +#include "time-util.h" + +/* Test: Basic pipe I/O with sd-event */ + +typedef struct PipeIOContext { + int *pipefd; + int order; +} PipeIOContext; + +static int pipe_read_fiber(void *userdata) { + PipeIOContext *ctx = ASSERT_PTR(userdata); + char buf[64]; + ssize_t n; + + n = sd_fiber_read(ctx->pipefd[0], buf, sizeof(buf)); + if (n < 0) + return (int) n; + + /* Verify we read "hello" */ + if (n != 5 || memcmp(buf, "hello", 5) != 0) + return -EIO; + + return (int) n; +} + +TEST(fiber_io_basic) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + PipeIOContext ctx = { .pipefd = pipefd }; + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "pipe-read", pipe_read_fiber, &ctx, NULL, &f)); + + /* Write data to the pipe */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "hello", 5), 5); + + /* Run the scheduler - should process the I/O */ + ASSERT_OK(sd_event_loop(e)); + + /* Verify fiber read the data */ + ASSERT_OK_EQ(sd_future_result(f), 5); +} + +static int pipe_read_order_fiber(void *userdata) { + PipeIOContext *ctx = ASSERT_PTR(userdata); + char buf[64]; + ssize_t n; + + /* Record that the read fiber started before attempting the blocking read */ + ASSERT_EQ(ctx->order, 0); + ctx->order = 1; + + n = sd_fiber_read(ctx->pipefd[0], buf, sizeof(buf)); + if (n < 0) + return (int) n; + + /* After resuming, verify the write fiber ran while we were suspended */ + ASSERT_EQ(ctx->order, 2); + + /* Verify we read "hello" */ + if (n != 5 || memcmp(buf, "hello", 5) != 0) + return -EIO; + + return (int) n; +} + +static int pipe_write_order_fiber(void *userdata) { + PipeIOContext *ctx = ASSERT_PTR(userdata); + + /* Verify the read fiber already ran and suspended before we started */ + ASSERT_EQ(ctx->order, 1); + ctx->order = 2; + + return sd_fiber_write(ctx->pipefd[1], "hello", STRLEN("hello")); +} + +TEST(fiber_io_read_write) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + PipeIOContext ctx = { .pipefd = pipefd }; + + /* Higher priority for the read fiber, which will run first and then suspend because no data is + * available. The write fiber will run second, write data to the pipe, causing the read fiber to get + * resumed. */ + _cleanup_(sd_future_unrefp) sd_future *fr = NULL, *fw = NULL; + ASSERT_OK(sd_fiber_new(e, "pipe-read", pipe_read_order_fiber, &ctx, NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 0)); + ASSERT_OK(sd_fiber_new(e, "pipe-write", pipe_write_order_fiber, &ctx, NULL, &fw)); + ASSERT_OK(sd_future_set_priority(fw, 1)); + + /* Run the scheduler - should process the I/O */ + ASSERT_OK(sd_event_loop(e)); + + /* Verify both fibers completed and the full read->suspend->write->resume sequence occurred */ + ASSERT_OK_EQ(sd_future_result(fr), 5); + ASSERT_OK_EQ(sd_future_result(fw), 5); +} + +/* Test: Multiple concurrent reads */ +static int concurrent_read_fiber(void *userdata) { + int *args = userdata; + int fd = args[0]; + int expected = args[1]; + char buf[64]; + ssize_t n; + + n = sd_fiber_read(fd, buf, sizeof buf); + if (n < 0) + return (int) n; + + if (n != 1 || buf[0] != (char) expected) + return -EIO; + + return 0; +} + +TEST(fiber_io_concurrent) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[3] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + + /* Create 3 pipes and 3 fibers */ + int pipes[3][2]; + int args[3][2]; + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + ASSERT_OK_ERRNO(pipe2(pipes[i], O_CLOEXEC | O_NONBLOCK)); + args[i][0] = pipes[i][0]; + args[i][1] = 'A' + i; + ASSERT_OK(sd_fiber_new(e, "concurrent-read", concurrent_read_fiber, args[i], NULL, &fibers[i])); + } + + /* Write data in reverse order */ + ASSERT_EQ(write(pipes[2][1], "C", 1), 1); + ASSERT_EQ(write(pipes[1][1], "B", 1), 1); + ASSERT_EQ(write(pipes[0][1], "A", 1), 1); + + /* Run until all complete */ + ASSERT_OK(sd_event_loop(e)); + + /* All should complete successfully */ + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + ASSERT_OK(sd_future_result(fibers[i])); + safe_close_pair(pipes[i]); + } +} + +/* Test: Cancel fiber during I/O */ +static int blocking_read_fiber(void *userdata) { + int fd = PTR_TO_INT(userdata); + char buf[64]; + ssize_t n; + + n = sd_fiber_read(fd, buf, sizeof(buf)); + return (int) n; +} + +TEST(fiber_io_cancel) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "blocking-read", blocking_read_fiber, INT_TO_PTR(pipefd[0]), NULL, &f)); + + /* Run once - fiber will suspend on read */ + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + + /* Fiber should be suspended now - add explicit check via state tracking */ + + /* Cancel the fiber */ + ASSERT_OK(sd_future_cancel(f)); + + /* Run to completion */ + ASSERT_OK(sd_event_loop(e)); + + /* Should be cancelled */ + ASSERT_ERROR(sd_future_result(f), ECANCELED); +} + +TEST(fiber_io_fallback) { + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); /* Note: blocking pipe */ + + char buf[STRLEN("fallback")] = {}; + ASSERT_OK_EQ(sd_fiber_write(pipefd[1], "fallback", sizeof(buf)), (ssize_t) sizeof(buf)); + ASSERT_OK_EQ(sd_fiber_read(pipefd[0], buf, sizeof(buf)), (ssize_t) sizeof(buf)); +} + +static int pipe_readv_order_fiber(void *userdata) { + PipeIOContext *ctx = ASSERT_PTR(userdata); + char buf1[5], buf2[5]; + struct iovec iov[] = { + { .iov_base = buf1, .iov_len = sizeof(buf1) }, + { .iov_base = buf2, .iov_len = sizeof(buf2) }, + }; + ssize_t n; + + /* Record that the read fiber started before attempting the blocking read */ + ASSERT_EQ(ctx->order, 0); + ctx->order = 1; + + /* This will initially block since no data is available */ + n = sd_fiber_readv(ctx->pipefd[0], iov, ELEMENTSOF(iov)); + if (n < 0) + return (int) n; + + /* After resuming, verify the write fiber ran while we were suspended */ + ASSERT_EQ(ctx->order, 2); + + if (n != 10 || memcmp(buf1, "fiber", 5) != 0 || memcmp(buf2, "readv", 5) != 0) + return -EIO; + + return (int) n; +} + +static int pipe_writev_order_fiber(void *userdata) { + PipeIOContext *ctx = ASSERT_PTR(userdata); + const char *part1 = "fiber"; + const char *part2 = "readv"; + struct iovec iov[] = { + { .iov_base = (void*) part1, .iov_len = 5 }, + { .iov_base = (void*) part2, .iov_len = 5 }, + }; + + /* Verify the read fiber already ran and suspended before we started */ + ASSERT_EQ(ctx->order, 1); + ctx->order = 2; + + return sd_fiber_writev(ctx->pipefd[1], iov, ELEMENTSOF(iov)); +} + +TEST(fiber_io_readv_writev) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + PipeIOContext ctx = { .pipefd = pipefd }; + + /* Higher priority for the read fiber, which will run first and then suspend because no data is + * available. The write fiber will run second, write data to the pipe, causing the read fiber to get + * resumed. */ + _cleanup_(sd_future_unrefp) sd_future *fr = NULL, *fw = NULL; + ASSERT_OK(sd_fiber_new(e, "pipe-readv", pipe_readv_order_fiber, &ctx, NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 0)); + ASSERT_OK(sd_fiber_new(e, "pipe-writev", pipe_writev_order_fiber, &ctx, NULL, &fw)); + ASSERT_OK(sd_future_set_priority(fw, 1)); + + /* Run the scheduler - should process the I/O */ + ASSERT_OK(sd_event_loop(e)); + + /* Verify both fibers completed and the full read->suspend->write->resume sequence occurred */ + ASSERT_OK_EQ(sd_future_result(fr), 10); + ASSERT_OK_EQ(sd_future_result(fw), 10); +} + +static int concurrent_readv_fiber(void *userdata) { + int *args = userdata; + int fd = args[0]; + int expected1 = args[1]; + int expected2 = args[2]; + char buf1[1], buf2[1]; + struct iovec iov[] = { + { .iov_base = buf1, .iov_len = sizeof(buf1) }, + { .iov_base = buf2, .iov_len = sizeof(buf2) }, + }; + ssize_t n; + + n = sd_fiber_readv(fd, iov, ELEMENTSOF(iov)); + if (n < 0) + return (int) n; + + if (n != 2 || buf1[0] != (char) expected1 || buf2[0] != (char) expected2) + return -EIO; + + return 0; +} + +TEST(fiber_io_readv_concurrent) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[3] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + + /* Create 3 pipes and 3 fibers */ + int pipes[3][2]; + int args[3][3]; + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + ASSERT_OK_ERRNO(pipe2(pipes[i], O_CLOEXEC | O_NONBLOCK)); + args[i][0] = pipes[i][0]; + args[i][1] = 'A' + i; + args[i][2] = 'a' + i; + ASSERT_OK(sd_fiber_new(e, "concurrent-readv", concurrent_readv_fiber, args[i], NULL, &fibers[i])); + } + + /* Write data in reverse order */ + ASSERT_EQ(write(pipes[2][1], "Cc", 2), 2); + ASSERT_EQ(write(pipes[1][1], "Bb", 2), 2); + ASSERT_EQ(write(pipes[0][1], "Aa", 2), 2); + + /* Run until all complete */ + ASSERT_OK(sd_event_loop(e)); + + /* All should complete successfully */ + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + ASSERT_OK(sd_future_result(fibers[i])); + safe_close_pair(pipes[i]); + } +} + +typedef struct SocketIOContext { + int *sockfd; + int order; +} SocketIOContext; + +static int socket_send_order_fiber(void *userdata) { + SocketIOContext *ctx = ASSERT_PTR(userdata); + + /* Verify the recv fiber already ran and suspended before we started */ + ASSERT_EQ(ctx->order, 1); + ctx->order = 2; + + return sd_fiber_send(ctx->sockfd[0], "socket", STRLEN("socket"), 0); +} + +static int socket_recv_order_fiber(void *userdata) { + SocketIOContext *ctx = ASSERT_PTR(userdata); + char buf[64]; + ssize_t n; + + /* Record that the recv fiber started before attempting the blocking recv */ + ASSERT_EQ(ctx->order, 0); + ctx->order = 1; + + n = sd_fiber_recv(ctx->sockfd[1], buf, sizeof(buf), 0); + if (n < 0) + return (int) n; + + /* After resuming, verify the send fiber ran while we were suspended */ + ASSERT_EQ(ctx->order, 2); + + /* Verify we received "socket" */ + if (n != 6 || memcmp(buf, "socket", 6) != 0) + return -EIO; + + return (int) n; +} + +TEST(fiber_io_recv_send) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int sockfd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, sockfd)); + + SocketIOContext ctx = { .sockfd = sockfd }; + + /* Higher priority for the recv fiber, which will run first and suspend */ + _cleanup_(sd_future_unrefp) sd_future *fs = NULL, *fr = NULL; + ASSERT_OK(sd_fiber_new(e, "socket-recv", socket_recv_order_fiber, &ctx, NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 0)); + ASSERT_OK(sd_fiber_new(e, "socket-send", socket_send_order_fiber, &ctx, NULL, &fs)); + ASSERT_OK(sd_future_set_priority(fs, 1)); + + ASSERT_OK(sd_event_loop(e)); + + /* Verify both fibers completed and the full recv->suspend->send->resume sequence occurred */ + ASSERT_OK_EQ(sd_future_result(fr), 6); + ASSERT_OK_EQ(sd_future_result(fs), 6); +} + +static int socket_recv_peek_fiber(void *userdata) { + int sockfd = PTR_TO_INT(userdata); + char buf1[64], buf2[64]; + ssize_t n1, n2; + + /* First peek at the data */ + n1 = sd_fiber_recv(sockfd, buf1, sizeof(buf1), MSG_PEEK); + if (n1 < 0) + return (int) n1; + + /* Then actually read it */ + n2 = sd_fiber_recv(sockfd, buf2, sizeof(buf2), 0); + if (n2 < 0) + return (int) n2; + + /* Both should have read the same data */ + if (n1 != n2 || memcmp(buf1, buf2, n1) != 0) + return -EIO; + + if (n1 != 4 || memcmp(buf1, "peek", 4) != 0) + return -EIO; + + return 0; +} + +TEST(fiber_io_recv_peek) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int sockfd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, sockfd)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "socket-recv-peek", socket_recv_peek_fiber, INT_TO_PTR(sockfd[1]), NULL, &f)); + + /* Write data to the socket */ + ASSERT_OK_EQ_ERRNO(write(sockfd[0], "peek", 4), 4); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +static int socket_connect_fiber(void *userdata) { + struct sockaddr_un *addr = userdata; + _cleanup_close_ int sockfd = -EBADF; + + sockfd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); + if (sockfd < 0) + return -errno; + + return sd_fiber_connect(sockfd, (struct sockaddr*) addr, sizeof(*addr)); +} + +TEST(fiber_io_connect) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create listening socket with abstract namespace */ + _cleanup_close_ int listen_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); + ASSERT_OK(listen_fd); + + /* Use abstract socket (starts with null byte) */ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + addr.sun_path[0] = '\0'; + snprintf(addr.sun_path + 1, sizeof(addr.sun_path) - 1, "test-fiber-connect-%d", getpid()); + + ASSERT_OK(bind(listen_fd, (struct sockaddr*) &addr, sizeof(addr))); + ASSERT_OK(listen(listen_fd, 1)); + + /* Create fiber to connect */ + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "socket-connect", socket_connect_fiber, &addr, NULL, &f)); + + /* Run the event loop - connection should complete */ + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +static int socket_sendmsg_fiber(void *userdata) { + int sockfd = PTR_TO_INT(userdata); + struct iovec iov = { + .iov_base = (void*) "message", + .iov_len = STRLEN("message"), + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + return sd_fiber_sendmsg(sockfd, &msg, 0); +} + +static int socket_recvmsg_fiber(void *userdata) { + int sockfd = PTR_TO_INT(userdata); + char buf[64]; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf), + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + ssize_t n; + + n = sd_fiber_recvmsg(sockfd, &msg, 0); + if (n < 0) + return (int) n; + + if (n != 7 || memcmp(buf, "message", 7) != 0) + return -EIO; + + return (int) n; +} + +TEST(fiber_io_recvmsg_sendmsg) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int sockfd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, sockfd)); + + _cleanup_(sd_future_unrefp) sd_future *fs = NULL, *fr = NULL; + ASSERT_OK(sd_fiber_new(e, "socket-recvmsg", socket_recvmsg_fiber, INT_TO_PTR(sockfd[1]), NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 1)); + ASSERT_OK(sd_fiber_new(e, "socket-sendmsg", socket_sendmsg_fiber, INT_TO_PTR(sockfd[0]), NULL, &fs)); + ASSERT_OK(sd_future_set_priority(fs, 0)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK_EQ(sd_future_result(fr), 7); + ASSERT_OK_EQ(sd_future_result(fs), 7); +} + +static int socket_sendto_fiber(void *userdata) { + int sockfd = PTR_TO_INT(userdata); + + /* For socketpair dgram sockets, we can use NULL address since they're connected */ + return sd_fiber_sendto(sockfd, "datagram", STRLEN("datagram"), 0, NULL, 0); +} + +static int socket_recvfrom_fiber(void *userdata) { + int sockfd = PTR_TO_INT(userdata); + char buf[64]; + struct sockaddr_un addr; + socklen_t addr_len = sizeof(addr); + ssize_t n; + + n = sd_fiber_recvfrom(sockfd, buf, sizeof(buf), 0, + (struct sockaddr*) &addr, &addr_len); + if (n < 0) + return (int) n; + + if (n != 8 || memcmp(buf, "datagram", 8) != 0) + return -EIO; + + return (int) n; +} + +TEST(fiber_io_recvfrom_sendto) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int sockfd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, sockfd)); + + _cleanup_(sd_future_unrefp) sd_future *fs = NULL, *fr = NULL; + ASSERT_OK(sd_fiber_new(e, "socket-recvfrom", socket_recvfrom_fiber, INT_TO_PTR(sockfd[1]), NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 1)); + ASSERT_OK(sd_fiber_new(e, "socket-sendto", socket_sendto_fiber, INT_TO_PTR(sockfd[0]), NULL, &fs)); + ASSERT_OK(sd_future_set_priority(fs, 0)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK_EQ(sd_future_result(fr), 8); + ASSERT_OK_EQ(sd_future_result(fs), 8); +} + +static int socket_sendmsg_fd_fiber(void *userdata) { + int *args = userdata; + int sockfd = args[0]; + int fd_to_send = args[1]; + struct iovec iov = { + .iov_base = (void*) "X", + .iov_len = 1, + }; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + struct cmsghdr *cmsg; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg), &fd_to_send, sizeof(int)); + + return sd_fiber_sendmsg(sockfd, &msg, 0); +} + +static int socket_recvmsg_fd_fiber(void *userdata) { + int sockfd = PTR_TO_INT(userdata); + char buf[1]; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf), + }; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + struct cmsghdr *cmsg; + int received_fd; + ssize_t n; + + n = sd_fiber_recvmsg(sockfd, &msg, 0); + if (n < 0) + return (int) n; + + if (n != 1 || buf[0] != 'X') + return -EIO; + + /* Extract the file descriptor */ + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) + return -EIO; + + memcpy(&received_fd, CMSG_DATA(cmsg), sizeof(int)); + + /* Verify we can use the fd */ + if (fcntl(received_fd, F_GETFD) < 0) + return -errno; + + close(received_fd); + return 0; +} + +TEST(fiber_io_sendmsg_recvmsg_fd) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int sockfd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, sockfd)); + + /* Create a test file descriptor to send */ + _cleanup_close_ int test_fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_OK_ERRNO(test_fd); + + _cleanup_(sd_future_unrefp) sd_future *fs = NULL, *fr = NULL; + int args[2] = { sockfd[0], test_fd }; + ASSERT_OK(sd_fiber_new(e, "socket-recvmsg-fd", socket_recvmsg_fd_fiber, INT_TO_PTR(sockfd[1]), NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 1)); + ASSERT_OK(sd_fiber_new(e, "socket-sendmsg-fd", socket_sendmsg_fd_fiber, args, NULL, &fs)); + ASSERT_OK(sd_future_set_priority(fs, 0)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK(sd_future_result(fr)); + ASSERT_OK_EQ(sd_future_result(fs), 1); +} + +TEST(fiber_io_socket_fallback) { + _cleanup_close_pair_ int sockfd[2] = EBADF_PAIR; + char buf[STRLEN("fallback")] = {}; + + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sockfd)); + + /* Test send/recv without fiber context */ + ASSERT_OK_EQ(sd_fiber_send(sockfd[0], "fallback", sizeof(buf), 0), (ssize_t) sizeof(buf)); + ASSERT_OK_EQ(sd_fiber_recv(sockfd[1], buf, sizeof(buf), 0), (ssize_t) sizeof(buf)); + + /* Test sendto/recvfrom without fiber context */ + ASSERT_OK_EQ(sd_fiber_sendto(sockfd[0], "fallback", sizeof(buf), 0, NULL, 0), (ssize_t) sizeof(buf)); + ASSERT_OK_EQ(sd_fiber_recvfrom(sockfd[1], buf, sizeof(buf), 0, NULL, NULL), (ssize_t) sizeof(buf)); +} + +static int blocking_recv_fiber(void *userdata) { + int sockfd = PTR_TO_INT(userdata); + char buf[64]; + + return sd_fiber_recv(sockfd, buf, sizeof(buf), 0); +} + +TEST(fiber_io_socket_cancel) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int sockfd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, sockfd)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "blocking-recv", blocking_recv_fiber, INT_TO_PTR(sockfd[0]), NULL, &f)); + + /* Run once - fiber will suspend on recv */ + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + + /* Cancel the fiber */ + ASSERT_OK(sd_future_cancel(f)); + + /* Run to completion */ + ASSERT_OK(sd_event_loop(e)); + + /* Should be cancelled */ + ASSERT_ERROR(sd_future_result(f), ECANCELED); +} + +/* Test: Basic accept operation */ +static int accept_fiber(void *userdata) { + int listen_fd = PTR_TO_INT(userdata); + struct sockaddr_un addr; + socklen_t addr_len = sizeof(addr); + int client_fd; + + client_fd = sd_fiber_accept(listen_fd, (struct sockaddr*) &addr, &addr_len, SOCK_CLOEXEC); + if (client_fd < 0) + return client_fd; + + close(client_fd); + return 0; +} + +TEST(fiber_io_accept_basic) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create listening socket with abstract namespace */ + _cleanup_close_ int listen_fd = -EBADF; + ASSERT_OK_ERRNO(listen_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)); + + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + addr.sun_path[0] = '\0'; + snprintf(addr.sun_path + 1, sizeof(addr.sun_path) - 1, "test-fiber-accept-%d", getpid()); + + ASSERT_OK_ERRNO(bind(listen_fd, (struct sockaddr*) &addr, sizeof(addr))); + ASSERT_OK_ERRNO(listen(listen_fd, 1)); + + /* Create fiber to accept connection */ + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "accept", accept_fiber, INT_TO_PTR(listen_fd), NULL, &f)); + + /* Connect from outside fiber context */ + _cleanup_close_ int connect_fd = -EBADF; + ASSERT_OK(connect_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)); + ASSERT_OK(connect(connect_fd, (struct sockaddr*) &addr, sizeof(addr))); + + /* Run the event loop - accept should complete */ + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: Multiple sequential accepts */ +static int accept_multiple_fiber(void *userdata) { + int listen_fd = PTR_TO_INT(userdata); + struct sockaddr_un addr; + socklen_t addr_len; + int count = 0; + + for (int i = 0; i < 3; i++) { + _cleanup_close_ int client_fd = -EBADF; + + addr_len = sizeof(addr); + client_fd = sd_fiber_accept(listen_fd, (struct sockaddr*) &addr, &addr_len, SOCK_CLOEXEC); + if (client_fd < 0) + return client_fd; + + count++; + } + + return count; +} + +TEST(fiber_io_accept_multiple) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create listening socket */ + _cleanup_close_ int listen_fd = -EBADF; + ASSERT_OK(listen_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)); + + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + addr.sun_path[0] = '\0'; + snprintf(addr.sun_path + 1, sizeof(addr.sun_path) - 1, "test-fiber-accept-multi-%d", getpid()); + + ASSERT_OK(bind(listen_fd, (struct sockaddr*) &addr, sizeof(addr))); + ASSERT_OK(listen(listen_fd, 5)); + + /* Create fiber to accept multiple connections */ + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "accept-multiple", accept_multiple_fiber, INT_TO_PTR(listen_fd), NULL, &f)); + + /* Connect multiple times */ + int connect_fds[3] = { -EBADF, -EBADF, -EBADF }; + for (size_t i = 0; i < 3; i++) { + connect_fds[i] = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); + ASSERT_OK(connect_fds[i]); + ASSERT_OK(connect(connect_fds[i], (struct sockaddr*) &addr, sizeof(addr))); + } + + /* Run the event loop */ + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK_EQ(sd_future_result(f), 3); + + /* Clean up connection fds */ + for (size_t i = 0; i < 3; i++) + safe_close(connect_fds[i]); +} + +/* Test: Accept and exchange data */ +static int accept_and_read_fiber(void *userdata) { + int listen_fd = PTR_TO_INT(userdata); + _cleanup_close_ int client_fd = -EBADF; + char buf[64]; + ssize_t n; + + client_fd = sd_fiber_accept(listen_fd, NULL, NULL, SOCK_CLOEXEC); + if (client_fd < 0) + return client_fd; + + n = sd_fiber_read(client_fd, buf, sizeof(buf)); + if (n < 0) + return (int) n; + + if (n != 5 || memcmp(buf, "hello", 5) != 0) + return -EIO; + + return 0; +} + +TEST(fiber_io_accept_and_read) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create listening socket */ + _cleanup_close_ int listen_fd = -EBADF; + ASSERT_OK(listen_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)); + + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + addr.sun_path[0] = '\0'; + snprintf(addr.sun_path + 1, sizeof(addr.sun_path) - 1, "test-fiber-accept-read-%d", getpid()); + + ASSERT_OK(bind(listen_fd, (struct sockaddr*) &addr, sizeof(addr))); + ASSERT_OK(listen(listen_fd, 1)); + + /* Create fiber to accept and read */ + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "accept-and-read", accept_and_read_fiber, INT_TO_PTR(listen_fd), NULL, &f)); + + /* Connect and send data */ + _cleanup_close_ int connect_fd = -EBADF; + ASSERT_OK(connect_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)); + ASSERT_OK(connect(connect_fd, (struct sockaddr*) &addr, sizeof(addr))); + ASSERT_OK_EQ_ERRNO(write(connect_fd, "hello", 5), 5); + + /* Run the event loop */ + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: poll with single fd ready immediately */ +static int poll_immediate_fiber(void *userdata) { + int *pipefd = userdata; + struct pollfd fds[] = { + { .fd = pipefd[0], .events = POLLIN }, + }; + int r; + + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), NULL, NULL); + if (r < 0) + return r; + + /* Should have one fd ready */ + if (r != 1) + return -EIO; + + if (!(fds[0].revents & POLLIN)) + return -EIO; + + return 0; +} + +TEST(fiber_poll_immediate) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + /* Write data before creating fiber */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "X", 1), 1); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-immediate", poll_immediate_fiber, pipefd, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: poll with fd that becomes ready after suspension */ +static int poll_fiber(void *userdata) { + int *pipefd = userdata; + struct pollfd fds[] = { + { .fd = pipefd[0], .events = POLLIN }, + }; + int r; + + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), NULL, NULL); + if (r < 0) + return r; + + if (r != 1 || !(fds[0].revents & POLLIN)) + return -EIO; + + /* Read the data */ + char buf[1]; + if (read(pipefd[0], buf, 1) != 1 || buf[0] != 'Y') + return -EIO; + + return 0; +} + +TEST(fiber_poll) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-suspend", poll_fiber, pipefd, NULL, &f)); + + /* Run once - fiber will suspend on poll */ + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + + /* Write data to wake it up */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "Y", 1), 1); + + /* Complete execution */ + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: poll with multiple fds */ +static int poll_multiple_fiber(void *userdata) { + int (*pipes)[2] = userdata; + struct pollfd fds[] = { + { .fd = pipes[0][0], .events = POLLIN }, + { .fd = pipes[1][0], .events = POLLIN }, + { .fd = pipes[2][0], .events = POLLIN }, + }; + int r; + + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), NULL, NULL); + if (r < 0) + return r; + + /* Should have all three ready */ + if (r != 3) + return -EIO; + + for (size_t i = 0; i < 3; i++) { + if (!(fds[i].revents & POLLIN)) + return -EIO; + + char buf[1]; + if (read(fds[i].fd, buf, 1) != 1 || buf[0] != (char) ('A' + i)) + return -EIO; + } + + return 0; +} + +TEST(fiber_poll_multiple) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create three pipes */ + int pipes[3][2]; + for (size_t i = 0; i < 3; i++) + ASSERT_OK_ERRNO(pipe2(pipes[i], O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-multiple", poll_multiple_fiber, pipes, NULL, &f)); + + /* Run once - fiber will suspend waiting for data */ + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + + /* Write to all three pipes in different order */ + ASSERT_OK_EQ_ERRNO(write(pipes[2][1], "C", 1), 1); + ASSERT_OK_EQ_ERRNO(write(pipes[0][1], "A", 1), 1); + ASSERT_OK_EQ_ERRNO(write(pipes[1][1], "B", 1), 1); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); + + for (size_t i = 0; i < 3; i++) + safe_close_pair(pipes[i]); +} + +/* Test: poll with POLLOUT (write readiness) */ +static int poll_pollout_fiber(void *userdata) { + int *pipefd = userdata; + struct pollfd fds[] = { + { .fd = pipefd[1], .events = POLLOUT }, + }; + int r; + + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), NULL, NULL); + if (r < 0) + return r; + + if (r != 1 || !(fds[0].revents & POLLOUT)) + return -EIO; + + /* Pipe should be writable */ + if (write(pipefd[1], "Z", 1) != 1) + return -errno; + + return 0; +} + +TEST(fiber_poll_pollout) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-pollout", poll_pollout_fiber, pipefd, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); + + /* Verify data was written */ + char buf[1]; + ASSERT_OK_EQ_ERRNO(read(pipefd[0], buf, 1), 1); + ASSERT_EQ(buf[0], 'Z'); +} + +/* Test: poll with timeout that expires */ +static int poll_timeout_fiber(void *userdata) { + int *pipefd = userdata; + struct pollfd fds[] = { + { .fd = pipefd[0], .events = POLLIN }, + }; + int r; + + /* Poll with 100ms timeout - no data will arrive */ + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), &(struct timespec) { .tv_nsec = 100 * NSEC_PER_MSEC }, NULL); + if (r < 0) + return r; + + /* Should timeout with no fds ready */ + if (r != 0) + return -EIO; + + return 0; +} + +TEST(fiber_poll_timeout) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-timeout", poll_timeout_fiber, pipefd, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: poll with zero timeout (should not block) */ +static int poll_zero_timeout_fiber(void *userdata) { + int *pipefd = userdata; + struct pollfd fds[] = { + { .fd = pipefd[0], .events = POLLIN }, + }; + int r; + + /* Poll with zero timeout - should return immediately */ + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), &(struct timespec) {}, NULL); + if (r < 0) + return r; + + /* No data available, so should return 0 */ + if (r != 0) + return -EIO; + + /* Now write data */ + if (write(pipefd[1], "Q", 1) != 1) + return -errno; + + /* Poll again with zero timeout - should see data */ + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), NULL, NULL); + if (r < 0) + return r; + + if (r != 1 || !(fds[0].revents & POLLIN)) + return -EIO; + + return 0; +} + +TEST(fiber_poll_zero_timeout) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-zero-timeout", poll_zero_timeout_fiber, pipefd, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: poll with zero fds and zero timeout (should return immediately) */ +static int poll_zero_fds_fiber(void *userdata) { + return sd_fiber_ppoll(NULL, 0, &(struct timespec) {}, NULL); +} + +TEST(fiber_poll_zero_fds) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-zero-fds", poll_zero_fds_fiber, NULL, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK_EQ(sd_future_result(f), 0); +} + +/* Test: poll with zero fds and no timeout has no possible wakeup, must reject with -EINVAL */ +static int poll_zero_fds_no_timeout_fiber(void *userdata) { + return sd_fiber_ppoll(NULL, 0, NULL, NULL); +} + +TEST(fiber_poll_zero_fds_no_timeout) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-zero-fds-no-timeout", poll_zero_fds_no_timeout_fiber, NULL, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_ERROR(sd_future_result(f), EINVAL); +} + +/* Test: poll with negative fd (should be ignored) */ +static int poll_negative_fd_fiber(void *userdata) { + int *pipefd = userdata; + struct pollfd fds[] = { + { .fd = -1, .events = POLLIN }, + { .fd = pipefd[0], .events = POLLIN }, + }; + int r; + + r = sd_fiber_ppoll(fds, ELEMENTSOF(fds), NULL, NULL); + if (r < 0) + return r; + + /* Only the second fd should be ready */ + if (r != 1 || !(fds[1].revents & POLLIN)) + return -EIO; + + /* First fd should have no events */ + if (fds[0].revents != 0) + return -EIO; + + return 0; +} + +TEST(fiber_poll_negative_fd) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + /* Write data before creating fiber */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "N", 1), 1); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "poll-negative-fd", poll_negative_fd_fiber, pipefd, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: Multiple fibers waiting on the same fd */ +typedef struct SharedFdArgs { + int pipefd; + int *counter; +} SharedFdArgs; + +static int shared_fd_read_fiber(void *userdata) { + SharedFdArgs *args = ASSERT_PTR(userdata); + char buf[1]; + ssize_t n; + + n = sd_fiber_read(args->pipefd, buf, sizeof(buf)); + if (n < 0) + return (int) n; + + if (n != 1) + return -EIO; + + /* Increment counter to track successful reads */ + (*args->counter)++; + + return 0; +} + +TEST(fiber_io_same_fd_multiple_fibers) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + /* Create 3 fibers all waiting on the same pipe read end */ + sd_future *fibers[3] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + SharedFdArgs args[3]; + int counter = 0; + + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) { + args[i].pipefd = pipefd[0]; + args[i].counter = &counter; + ASSERT_OK(sd_fiber_new(e, "shared-fd-read", shared_fd_read_fiber, &args[i], NULL, &fibers[i])); + } + + /* All fibers should suspend waiting for data */ + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_OK_POSITIVE(sd_event_run(e, 0)); + + /* Write 3 bytes - each byte will wake one fiber */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "ABC", 3), 3); + + /* Run until all fibers complete */ + ASSERT_OK(sd_event_loop(e)); + + /* All should complete successfully and each should have read one byte */ + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_OK(sd_future_result(fibers[i])); + + ASSERT_EQ(counter, 3); +} + +static int blocking_fd_preserve_fiber(void *userdata) { + int *pipefd = ASSERT_PTR(userdata); + char buf[8] = {}; + ssize_t n; + + /* The pipe has data pre-filled, so this should succeed immediately on the fast path. + * This exercises the fd blocking state restore: fiber_io_operation() temporarily sets the fd + * to nonblocking, and must restore it to blocking on the success path. */ + n = sd_fiber_read(pipefd[0], buf, sizeof(buf)); + if (n < 0) + return (int) n; + + if ((size_t) n != sizeof(buf) || memcmp(buf, "blocking", sizeof(buf)) != 0) + return -EIO; + + return 0; +} + +TEST(fiber_io_blocking_fd_preserved) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create a blocking pipe (no O_NONBLOCK) */ + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); + + /* Pre-fill the pipe so the read will succeed immediately */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "blocking", 8), 8); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "blocking-fd-preserve", blocking_fd_preserve_fiber, pipefd, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); + + /* Verify the read end is still in blocking mode after the fiber completed */ + ASSERT_OK_ZERO(fd_nonblock(pipefd[0], false)); +} + +static int socket_connect_blocking_fiber(void *userdata) { + struct sockaddr_un *addr = userdata; + _cleanup_close_ int sockfd = -EBADF; + + /* Use a blocking socket (no SOCK_NONBLOCK). sd_fiber_connect() should temporarily set it + * to nonblocking, handle the EINPROGRESS path with getsockopt(SO_ERROR), and restore + * the blocking state. */ + sockfd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (sockfd < 0) + return -errno; + + int r = sd_fiber_connect(sockfd, (struct sockaddr*) addr, sizeof(*addr)); + if (r < 0) + return r; + + /* Verify the socket is back in blocking mode */ + r = fd_nonblock(sockfd, false); + if (r < 0) + return r; + if (r > 0) + return -EBUSY; /* fd was nonblocking, but should have been restored to blocking */ + + return 0; +} + +TEST(fiber_io_connect_blocking) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + /* Create listening socket */ + _cleanup_close_ int listen_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); + ASSERT_OK(listen_fd); + + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + addr.sun_path[0] = '\0'; + snprintf(addr.sun_path + 1, sizeof(addr.sun_path) - 1, "test-fiber-connect-blocking-%d", getpid()); + + ASSERT_OK(bind(listen_fd, (struct sockaddr*) &addr, sizeof(addr))); + ASSERT_OK(listen(listen_fd, 1)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "connect-blocking", socket_connect_blocking_fiber, &addr, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/systemd/sd-future.h b/src/systemd/sd-future.h index 9d0d03acf48a7..5e0fa22525615 100644 --- a/src/systemd/sd-future.h +++ b/src/systemd/sd-future.h @@ -17,10 +17,18 @@ along with systemd; If not, see . ***/ +#include + #include "_sd-common.h" _SD_BEGIN_DECLARATIONS; +struct iovec; +struct pollfd; +struct sockaddr; +struct msghdr; +struct timespec; + typedef struct sd_event sd_event; typedef struct sd_future sd_future; typedef struct sd_future_ops sd_future_ops; @@ -96,6 +104,23 @@ sd_future* sd_fiber_timeout(uint64_t timeout); _SD_CONCATENATE(_sd_fto_b_, uniq); \ _SD_CONCATENATE(_sd_fto_b_, uniq) = NULL) +/* Fiber I/O operations - use sd-event for non-blocking I/O when in fiber context */ +ssize_t sd_fiber_read(int fd, void *buf, size_t count); +ssize_t sd_fiber_write(int fd, const void *buf, size_t count); +ssize_t sd_fiber_readv(int fd, const struct iovec *iov, int iovcnt); +ssize_t sd_fiber_writev(int fd, const struct iovec *iov, int iovcnt); +ssize_t sd_fiber_recv(int sockfd, void *buf, size_t len, int flags); +ssize_t sd_fiber_send(int sockfd, const void *buf, size_t len, int flags); +int sd_fiber_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen); +ssize_t sd_fiber_recvmsg(int sockfd, struct msghdr *msg, int flags); +ssize_t sd_fiber_sendmsg(int sockfd, const struct msghdr *msg, int flags); +ssize_t sd_fiber_recvfrom(int sockfd, void *buf, size_t len, int flags, struct sockaddr *src_addr, socklen_t *addrlen); +ssize_t sd_fiber_sendto(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen); +int sd_fiber_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags); +#ifndef __STRICT_ANSI__ +int sd_fiber_ppoll(struct pollfd *fds, size_t n_fds, const struct timespec *timeout, const sigset_t *sigmask); +#endif + _SD_END_DECLARATIONS; #endif From 5137cbbf0cac6c29119704954db4cfca813d75df Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sat, 25 Apr 2026 22:06:54 +0200 Subject: [PATCH 237/242] sd-future: make src/basic blocking helpers fiber-aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some helpers in src/basic — ppoll_usec_full() (used by fd_wait_for_event()), loop_read(), loop_read_exact(), loop_write_full() and pidref_wait_for_terminate_full() — block the calling thread. That's the right behaviour outside a fiber but not inside one, where blocking the thread also stalls every other fiber running on the same event loop. Rewriting every caller to pick a fiber or non-fiber variant explicitly would be a lot of churn and would split otherwise-shared code paths in two. Instead, the helpers detect at runtime whether they're running on a fiber and dispatch to a suspending variant when they are. FiberOps in fiber-ops.h holds five function pointers (ppoll, read, write, timeout, cancel_wait_unref); a fiber_ops global constant is populated whenever we enter a fiber with functions that delegate to suspending variants of common syscalls. With this approach, the variants themselves stay in libsystemd which is required because they make use of sd-event. - loop_read()/loop_read_exact() take the fiber read hook on a fiber unless the caller asked for a non-blocking attempt (do_poll=false) and the fd is already non-blocking — in that case we fall through to read() to preserve the existing return-EAGAIN-immediately semantic. The hook itself suspends on EAGAIN until data is available, so neither the do_poll knob nor the explicit fd_wait_for_event() retry loop are needed on the fiber path. - loop_write_full() likewise takes the fiber write hook on a fiber, except when timeout=0 with an already-non-blocking fd (preserving the fast-return-EAGAIN semantic). The fiber path runs inside a FIBER_OPS_TIMEOUT() scope so the caller's timeout is honoured via a deadline future, mirroring SD_FIBER_TIMEOUT() but reachable from src/basic without pulling in sd-future.h. - pidref_wait_for_terminate_full() polls the pidfd via fd_wait_for_event() before each waitid() when either a finite timeout is set or we're on a fiber, and requires pidref->fd >= 0 in those cases (returning -ENOMEDIUM otherwise — extending the rule that already applied to finite timeouts). The poll suspends the fiber via the ppoll hook above; the subsequent waitid() doesn't block because the pidfd is already signalled. --- src/basic/basic-forward.h | 1 + src/basic/fiber-ops.c | 50 ++ src/basic/fiber-ops.h | 34 ++ src/basic/io-util.c | 90 +++- src/basic/meson.build | 1 + src/basic/pidref.c | 23 +- src/libsystemd/meson.build | 1 + src/libsystemd/sd-future/fiber.c | 14 + src/libsystemd/sd-future/test-fiber-ops.c | 574 ++++++++++++++++++++++ 9 files changed, 763 insertions(+), 25 deletions(-) create mode 100644 src/basic/fiber-ops.c create mode 100644 src/basic/fiber-ops.h create mode 100644 src/libsystemd/sd-future/test-fiber-ops.c diff --git a/src/basic/basic-forward.h b/src/basic/basic-forward.h index 2536ccda0133b..17fa50fa19ae8 100644 --- a/src/basic/basic-forward.h +++ b/src/basic/basic-forward.h @@ -111,6 +111,7 @@ typedef enum UnitType UnitType; typedef enum WaitFlags WaitFlags; typedef struct Fiber Fiber; +typedef struct FiberOps FiberOps; typedef struct Hashmap Hashmap; typedef struct HashmapBase HashmapBase; typedef struct IteratedCache IteratedCache; diff --git a/src/basic/fiber-ops.c b/src/basic/fiber-ops.c new file mode 100644 index 0000000000000..27b795f2c7e58 --- /dev/null +++ b/src/basic/fiber-ops.c @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include +#include +#include + +#include "errno-util.h" +#include "fiber-ops.h" + +static thread_local const FiberOps *fiber_ops = NULL; + +bool fiber_ops_is_set(void) { + return fiber_ops != NULL; +} + +void fiber_ops_set(const FiberOps *ops) { + fiber_ops = ops; +} + +int fiber_ops_ppoll(struct pollfd *fds, size_t n_fds, const struct timespec *timeout, const sigset_t *sigmask) { + if (fiber_ops) + return fiber_ops->ppoll(fds, n_fds, timeout, sigmask); + + return RET_NERRNO(ppoll(fds, n_fds, timeout, sigmask)); +} + +ssize_t fiber_ops_read(int fd, void *buf, size_t count) { + if (fiber_ops) + return fiber_ops->read(fd, buf, count); + + return RET_NERRNO(read(fd, buf, count)); +} + +ssize_t fiber_ops_write(int fd, const void *buf, size_t count) { + if (fiber_ops) + return fiber_ops->write(fd, buf, count); + + return RET_NERRNO(write(fd, buf, count)); +} + +sd_future* fiber_ops_timeout(uint64_t timeout) { + assert(fiber_ops); + + return fiber_ops->timeout(timeout); +} + +sd_future* fiber_ops_cancel_wait_unref(sd_future *f) { + assert(fiber_ops); + + return fiber_ops->cancel_wait_unref(f); +} diff --git a/src/basic/fiber-ops.h b/src/basic/fiber-ops.h new file mode 100644 index 0000000000000..64bd82353ba16 --- /dev/null +++ b/src/basic/fiber-ops.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "basic-forward.h" + +typedef struct sd_future sd_future; + +/* Hooks installed on a fiber so that functions in src/basic can transparently defer to the suspending + * variants in sd-future when invoked from a running fiber. Populated by sd_fiber_new() with pointers to the + * implementations in fiber-ops.c. */ +typedef struct FiberOps { + int (*ppoll)(struct pollfd *fds, size_t n_fds, const struct timespec *timeout, const sigset_t *sigmask); + ssize_t (*read)(int fd, void *buf, size_t count); + ssize_t (*write)(int fd, const void *buf, size_t count); + sd_future* (*timeout)(uint64_t timeout); + sd_future* (*cancel_wait_unref)(sd_future *f); +} FiberOps; + +bool fiber_ops_is_set(void); +void fiber_ops_set(const FiberOps *fiber_ops); + +int fiber_ops_ppoll(struct pollfd *fds, size_t n_fds, const struct timespec *timeout, const sigset_t *sigmask); +ssize_t fiber_ops_read(int fd, void *buf, size_t count); +ssize_t fiber_ops_write(int fd, const void *buf, size_t count); + +/* Mirror of SD_FIBER_TIMEOUT() for code under src/basic that doesn't include sd-future.h: dispatches + * through FiberOps so the actual sd_fiber_timeout() implementation lives in libsystemd. */ +sd_future* fiber_ops_timeout(uint64_t timeout); +sd_future* fiber_ops_cancel_wait_unref(sd_future *f); +DEFINE_TRIVIAL_CLEANUP_FUNC(sd_future*, fiber_ops_cancel_wait_unref); + +#define FIBER_OPS_TIMEOUT(timeout) _FIBER_OPS_TIMEOUT(UNIQ, (timeout)) +#define _FIBER_OPS_TIMEOUT(uniq, timeout) \ + _unused_ _cleanup_(fiber_ops_cancel_wait_unrefp) sd_future *UNIQ_T(_fot_, uniq) = fiber_ops_timeout(timeout) diff --git a/src/basic/io-util.c b/src/basic/io-util.c index b4f643b5721aa..3dbc3670a4a85 100644 --- a/src/basic/io-util.c +++ b/src/basic/io-util.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include #include #include #include @@ -8,6 +9,7 @@ #include #include "errno-util.h" +#include "fiber-ops.h" #include "io-util.h" #include "time-util.h" @@ -69,25 +71,44 @@ ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) { if (nbytes > (size_t) SSIZE_MAX) return -EINVAL; + /* do_poll == false means "don't wait, return what we have if EAGAIN". If the fd is already + * non-blocking, read() can't block the thread, so the non-fiber path satisfies that semantic + * correctly even from a fiber. Only use the fiber path when the fd is blocking (where read() + * would otherwise block the entire event loop). */ + int flags = 0; + if (fiber_ops_is_set() && !do_poll) { + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + } + do { ssize_t k; - k = read(fd, p, nbytes); - if (k < 0) { - if (errno == EINTR) - continue; - - if (errno == EAGAIN && do_poll) { - - /* We knowingly ignore any return value here, - * and expect that any error/EOF is reported - * via read() */ - - (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY); - continue; + if (fiber_ops_is_set() && (do_poll || !FLAGS_SET(flags, O_NONBLOCK))) { + /* On a fiber the read op suspends on EAGAIN until data is available, so we don't + * need a separate poll step or the do_poll knob. */ + k = fiber_ops_read(fd, p, nbytes); + if (k < 0) + return n > 0 ? n : k; + } else { + k = read(fd, p, nbytes); + if (k < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN && do_poll) { + + /* We knowingly ignore any return value here, + * and expect that any error/EOF is reported + * via read() */ + + (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY); + continue; + } + + return n > 0 ? n : -errno; } - - return n > 0 ? n : -errno; } if (k == 0) @@ -137,6 +158,37 @@ int loop_write_full(int fd, const void *buf, size_t nbytes, usec_t timeout) { p = buf; } + /* timeout == 0 means "don't wait, return -EAGAIN if not ready". If the fd is already + * non-blocking, write() can't block the thread, so the non-fiber path satisfies that + * semantic correctly even from a fiber. Only use the fiber path when the fd is blocking + * (where write() would otherwise block the entire event loop). */ + int flags = 0; + if (fiber_ops_is_set() && timeout == 0) { + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + } + + if (fiber_ops_is_set() && !FLAGS_SET(flags, O_NONBLOCK)) { + /* On a fiber the write op suspends on EAGAIN until the fd is writable; honor the + * caller's timeout via a deadline scope. */ + FIBER_OPS_TIMEOUT(timestamp_is_set(timeout) ? timeout : USEC_INFINITY); + + while (nbytes > 0) { + ssize_t k = fiber_ops_write(fd, p, nbytes); + if (k < 0) + return (int) k; + if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */ + return -EIO; + + assert((size_t) k <= nbytes); + p += k; + nbytes -= k; + } + + return 0; + } + /* When timeout is 0 or USEC_INFINITY this is not used. But we initialize it to a sensible value. */ end = timestamp_is_set(timeout) ? usec_add(now(CLOCK_MONOTONIC), timeout) : USEC_INFINITY; @@ -220,11 +272,9 @@ int ppoll_usec_full(struct pollfd *fds, size_t n_fds, usec_t timeout, const sigs if (n_fds == 0 && timeout == 0) return 0; - r = ppoll(fds, n_fds, timeout == USEC_INFINITY ? NULL : TIMESPEC_STORE(timeout), ss); - if (r < 0) - return -errno; - if (r == 0) - return 0; + r = fiber_ops_ppoll(fds, n_fds, timeout == USEC_INFINITY ? NULL : TIMESPEC_STORE(timeout), ss); + if (r <= 0) + return r; for (size_t i = 0, n = r; i < n_fds && n > 0; i++) { if (fds[i].revents == 0) diff --git a/src/basic/meson.build b/src/basic/meson.build index f847b175b61f0..f8968111fea04 100644 --- a/src/basic/meson.build +++ b/src/basic/meson.build @@ -36,6 +36,7 @@ basic_sources = files( 'ether-addr-util.c', 'extract-word.c', 'fd-util.c', + 'fiber-ops.c', 'fileio.c', 'filesystems.c', 'format-ifname.c', diff --git a/src/basic/pidref.c b/src/basic/pidref.c index 10ff9a63b12bc..33131c0586d12 100644 --- a/src/basic/pidref.c +++ b/src/basic/pidref.c @@ -7,6 +7,7 @@ #include "alloc-util.h" #include "errno-util.h" #include "fd-util.h" +#include "fiber-ops.h" #include "format-util.h" #include "hash-funcs.h" #include "io-util.h" @@ -466,16 +467,28 @@ int pidref_wait_for_terminate_full(PidRef *pidref, usec_t timeout, siginfo_t *re if (pidref->pid == 1 || pidref_is_self(pidref)) return -ECHILD; - if (timeout != USEC_INFINITY && pidref->fd < 0) + if (pidref->fd < 0 && (timeout != USEC_INFINITY || fiber_ops_is_set())) return -ENOMEDIUM; usec_t ts = timeout == USEC_INFINITY ? USEC_INFINITY : usec_add(now(CLOCK_MONOTONIC), timeout); + /* Poll the pidfd before waitid() if either there's a finite timeout (so we can honor it) or + * we're on a fiber (so fd_wait_for_event() can suspend us instead of blocking the event loop + * inside waitid()). Otherwise let waitid() block directly. The precondition above guarantees + * pidref->fd >= 0 in both cases. */ + bool poll_first = ts != USEC_INFINITY || fiber_ops_is_set(); + for (;;) { - if (ts != USEC_INFINITY) { - usec_t left = usec_sub_unsigned(ts, now(CLOCK_MONOTONIC)); - if (left == 0) - return -ETIMEDOUT; + if (poll_first) { + usec_t left; + + if (ts == USEC_INFINITY) + left = USEC_INFINITY; + else { + left = usec_sub_unsigned(ts, now(CLOCK_MONOTONIC)); + if (left == 0) + return -ETIMEDOUT; + } r = fd_wait_for_event(pidref->fd, POLLIN, left); if (r == 0) diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build index bc3d54eb12f6f..d3b54a2f2c43d 100644 --- a/src/libsystemd/meson.build +++ b/src/libsystemd/meson.build @@ -193,6 +193,7 @@ simple_tests += files( 'sd-device/test-sd-device-monitor.c', 'sd-future/test-fiber.c', 'sd-future/test-fiber-io.c', + 'sd-future/test-fiber-ops.c', 'sd-hwdb/test-sd-hwdb.c', 'sd-id128/test-id128.c', 'sd-journal/test-audit-type.c', diff --git a/src/libsystemd/sd-future/fiber.c b/src/libsystemd/sd-future/fiber.c index 7ee94c709910b..3c4b348efb9e5 100644 --- a/src/libsystemd/sd-future/fiber.c +++ b/src/libsystemd/sd-future/fiber.c @@ -27,6 +27,7 @@ #include "env-util.h" #include "errno-util.h" #include "event-future.h" +#include "fiber-ops.h" #include "log-context.h" #include "log.h" #include "memory-util.h" @@ -406,6 +407,7 @@ static void reset_current_fiber(void) { Fiber *f = fiber_get_current(); if (f) { fiber_swap_log_state(f); + fiber_ops_set(NULL); } fiber_set_current(NULL); } @@ -442,9 +444,19 @@ static void fiber_resolve(sd_future *f) { sd_future_resolve(f, fiber->result); } +static const FiberOps fiber_ops = { + .ppoll = sd_fiber_ppoll, + .read = sd_fiber_read, + .write = sd_fiber_write, + .timeout = sd_fiber_timeout, + .cancel_wait_unref = sd_future_cancel_wait_unref, +}; + static void fiber_enter(Fiber *fiber, Fiber *prev, void **fake_stack_save) { fiber_set_current(fiber); fiber_swap_log_state(fiber); + if (!prev) + fiber_ops_set(&fiber_ops); struct iovec fiber_stack = fiber_stack_usable(&fiber->stack); start_switch_stack(fake_stack_save, &fiber_stack); @@ -453,6 +465,8 @@ static void fiber_enter(Fiber *fiber, Fiber *prev, void **fake_stack_save) { static void fiber_leave(Fiber *fiber, Fiber *prev, void *fake_stack_save) { finish_switch_stack(fake_stack_save); + if (!prev) + fiber_ops_set(NULL); fiber_swap_log_state(fiber); fiber_set_current(prev); } diff --git a/src/libsystemd/sd-future/test-fiber-ops.c b/src/libsystemd/sd-future/test-fiber-ops.c new file mode 100644 index 0000000000000..f2c603fda0b87 --- /dev/null +++ b/src/libsystemd/sd-future/test-fiber-ops.c @@ -0,0 +1,574 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "sd-event.h" +#include "sd-future.h" + +#include "alloc-util.h" +#include "cleanup-util.h" +#include "fd-util.h" +#include "io-util.h" +#include "pidref.h" +#include "process-util.h" +#include "tests.h" +#include "time-util.h" + +/* Test: wait_for_terminate basic functionality */ +static int wait_simple_fiber(void *userdata) { + _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL; + siginfo_t si; + int r; + + /* Fork a child that exits immediately */ + r = pidref_safe_fork("(test-child)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_LOG, &pidref); + if (r < 0) + return r; + + if (r == 0) + _exit(42); + + /* Parent - wait for child */ + r = pidref_wait_for_terminate(&pidref, &si); + if (r < 0) + return r; + + pidref_done(&pidref); + + /* Verify child exited with status 42 */ + if (si.si_code != CLD_EXITED || si.si_status != 42) + return -EIO; + + return 0; +} + +TEST(wait_for_terminate_fiber_basic) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "wait-simple", wait_simple_fiber, NULL, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +/* Test: wait_for_terminate with multiple children */ +static int wait_multiple_fiber(void *userdata) { + PidRef pidrefs[3] = { PIDREF_NULL, PIDREF_NULL, PIDREF_NULL }; + siginfo_t si; + int r; + + /* Fork three children with different exit codes */ + for (size_t i = 0; i < 3; i++) { + r = pidref_safe_fork("(test-child)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_LOG, &pidrefs[i]); + if (r < 0) + goto cleanup; + + if (r == 0) + /* Child process */ + _exit(10 + i); + } + + /* Wait for all three in order */ + for (size_t i = 0; i < 3; i++) { + r = pidref_wait_for_terminate(&pidrefs[i], &si); + if (r < 0) + goto cleanup; + + pidref_done(&pidrefs[i]); + + if (si.si_code != CLD_EXITED || si.si_status != (int) (10 + i)) { + r = -EIO; + goto cleanup; + } + } + + return 0; + +cleanup: + for (size_t i = 0; i < 3; i++) + pidref_done(&pidrefs[i]); + + return r; +} + +TEST(wait_for_terminate_fiber_multiple) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "wait-multiple", wait_multiple_fiber, NULL, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(f)); +} + +static int concurrent_wait_fiber(void *userdata) { + _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL; + siginfo_t si; + int r; + + r = pidref_safe_fork("(test-child)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_LOG, &pidref); + if (r < 0) + return r; + + if (r == 0) + /* Child exits with specified status */ + _exit(PTR_TO_INT(userdata)); + + r = pidref_wait_for_terminate(&pidref, &si); + if (r < 0) + return r; + + pidref_done(&pidref); + + if (si.si_code != CLD_EXITED || si.si_status != PTR_TO_INT(userdata)) + return -EIO; + + return 0; +} + +TEST(wait_for_terminate_fiber_concurrent) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + sd_future *fibers[3] = {}; + CLEANUP_ELEMENTS(fibers, sd_future_unref_array_clear); + + /* Create 3 fibers, each waiting for a different child */ + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_OK(sd_fiber_new(e, "concurrent-wait", concurrent_wait_fiber, INT_TO_PTR(20 + i), /* destroy= */ NULL, &fibers[i])); + + ASSERT_OK(sd_event_loop(e)); + + /* All fibers should complete successfully */ + for (size_t i = 0; i < ELEMENTSOF(fibers); i++) + ASSERT_OK(sd_future_result(fibers[i])); +} + +typedef struct LoopIOContext { + int *pipefd; + const char *data; + size_t len; + int order; +} LoopIOContext; + +static int loop_read_suspend_fiber(void *userdata) { + LoopIOContext *ctx = ASSERT_PTR(userdata); + char buf[64]; + + ASSERT_EQ(ctx->order, 0); + ctx->order = 1; + + ssize_t n = loop_read(ctx->pipefd[0], buf, sizeof(buf), /* do_poll= */ true); + + /* While we were suspended, the writer fiber should have run. */ + ASSERT_EQ(ctx->order, 2); + + if (n < 0) + return (int) n; + if ((size_t) n != ctx->len || memcmp(buf, ctx->data, ctx->len) != 0) + return -EIO; + + return (int) n; +} + +static int loop_write_suspend_fiber(void *userdata) { + LoopIOContext *ctx = ASSERT_PTR(userdata); + + ASSERT_EQ(ctx->order, 1); + ctx->order = 2; + + int r = loop_write(ctx->pipefd[1], ctx->data, ctx->len); + if (r < 0) + return r; + + /* Close the write end so the reader sees EOF after reading the data. */ + ctx->pipefd[1] = safe_close(ctx->pipefd[1]); + return 0; +} + +/* Test: two fibers cooperatively pass a small payload through a blocking pipe using the suspending + * loop helpers. Exercises the non-blocking flip, event-loop yielding, and the blocking-mode restore. */ +TEST(loop_read_write_suspend) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); + + static const char payload[] = "loop-suspend"; + LoopIOContext ctx = { + .pipefd = pipefd, + .data = payload, + .len = sizeof(payload) - 1, + }; + + _cleanup_(sd_future_unrefp) sd_future *fr = NULL, *fw = NULL; + ASSERT_OK(sd_fiber_new(e, "loop-read", loop_read_suspend_fiber, &ctx, /* destroy= */ NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 0)); + ASSERT_OK(sd_fiber_new(e, "loop-write", loop_write_suspend_fiber, &ctx, /* destroy= */ NULL, &fw)); + ASSERT_OK(sd_future_set_priority(fw, 1)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK_EQ(sd_future_result(fr), (int) ctx.len); + ASSERT_OK_ZERO(sd_future_result(fw)); + + /* The read fd started out blocking and loop_read() must have restored it before returning. */ + ASSERT_OK_ZERO(fcntl(pipefd[0], F_GETFL) & O_NONBLOCK); +} + +static int loop_read_exact_short_fiber(void *userdata) { + int fd = PTR_TO_INT(userdata); + char buf[16]; + + /* Requesting more bytes than the peer writes should return -EIO once EOF is hit. */ + return loop_read_exact(fd, buf, sizeof(buf), /* do_poll= */ true); +} + +/* Test: loop_read_exact() returns -EIO when the peer closes early. */ +TEST(loop_read_exact_short) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "loop-read-exact", loop_read_exact_short_fiber, + INT_TO_PTR(pipefd[0]), /* destroy= */ NULL, &f)); + + /* Write a few bytes and close the write end — less than the fiber asked for. */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "abc", 3), (ssize_t) 3); + pipefd[1] = safe_close(pipefd[1]); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_ERROR(sd_future_result(f), EIO); +} + +typedef struct LoopWriteTimeoutContext { + int fd; + int result; +} LoopWriteTimeoutContext; + +static int loop_write_timeout_fiber(void *userdata) { + LoopWriteTimeoutContext *ctx = ASSERT_PTR(userdata); + + /* Try to write much more than the pipe buffer can hold with a short timeout. The write will + * succeed partially and then hit -ETIME after exhausting the timeout while blocked. */ + static const char big_buf[128 * 1024] = { 0 }; + ctx->result = loop_write_full(ctx->fd, big_buf, sizeof(big_buf), 100 * USEC_PER_MSEC); + return 0; +} + +/* Test: loop_write_full() returns -ETIME when the peer never drains. */ +TEST(loop_write_full_timeout) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); + + /* Shrink the pipe buffer to its minimum (one page) so the 128K write below is guaranteed to block + * regardless of the architecture's page size. The default pipe buffer is 16 pages, which on + * 64K-page architectures (e.g. ppc64le) is 1 MiB — enough to absorb the entire write without ever + * blocking, defeating the purpose of the timeout. */ + ASSERT_OK_ERRNO(fcntl(pipefd[1], F_SETPIPE_SZ, 1)); + + LoopWriteTimeoutContext ctx = { .fd = pipefd[1], .result = 0 }; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "loop-write-timeout", loop_write_timeout_fiber, &ctx, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK_ZERO(sd_future_result(f)); + ASSERT_ERROR(ctx.result, ETIME); +} + +typedef struct PpollDispatchContext { + int *pipefd; + int order; +} PpollDispatchContext; + +static int ppoll_dispatch_read_fiber(void *userdata) { + PpollDispatchContext *ctx = ASSERT_PTR(userdata); + struct pollfd pfd = { + .fd = ctx->pipefd[0], + .events = POLLIN, + }; + + ASSERT_EQ(ctx->order, 0); + ctx->order = 1; + + /* Direct ppoll_usec() call from a fiber must dispatch through sd_fiber_poll(), suspending the + * fiber instead of blocking the entire thread. If dispatch fails, the writer fiber never gets a + * chance to run and the test deadlocks. */ + int r = ppoll_usec(&pfd, 1, USEC_INFINITY); + if (r < 0) + return r; + + ASSERT_EQ(ctx->order, 2); + + if (r != 1 || !FLAGS_SET(pfd.revents, POLLIN)) + return -EIO; + + return 0; +} + +static int ppoll_dispatch_write_fiber(void *userdata) { + PpollDispatchContext *ctx = ASSERT_PTR(userdata); + + ASSERT_EQ(ctx->order, 1); + ctx->order = 2; + + if (write(ctx->pipefd[1], "x", 1) < 0) + return -errno; + + return 0; +} + +/* Test: ppoll_usec() called from a fiber dispatches through the FiberOps hook to sd_fiber_poll(), + * yielding to the event loop instead of blocking. */ +TEST(ppoll_usec_dispatch) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + PpollDispatchContext ctx = { .pipefd = pipefd }; + + _cleanup_(sd_future_unrefp) sd_future *fr = NULL, *fw = NULL; + ASSERT_OK(sd_fiber_new(e, "ppoll-read", ppoll_dispatch_read_fiber, &ctx, /* destroy= */ NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 0)); + ASSERT_OK(sd_fiber_new(e, "ppoll-write", ppoll_dispatch_write_fiber, &ctx, /* destroy= */ NULL, &fw)); + ASSERT_OK(sd_future_set_priority(fw, 1)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(fr)); + ASSERT_OK(sd_future_result(fw)); +} + +static int loop_write_zero_timeout_nonblock_fiber(void *userdata) { + int fd = PTR_TO_INT(userdata); + + /* Fill the pipe so the next write would block. The fd is non-blocking, so on a fiber + * loop_write_full(timeout=0) must take the non-fiber path and return -EAGAIN immediately + * rather than suspending. */ + static const char big_buf[128 * 1024] = { 0 }; + return loop_write_full(fd, big_buf, sizeof(big_buf), /* timeout= */ 0); +} + +/* Test: timeout == 0 on a non-blocking fd from a fiber preserves the "don't wait" semantic and + * returns -EAGAIN when the pipe buffer is full, instead of suspending the fiber. */ +TEST(loop_write_zero_timeout_nonblock) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + ASSERT_OK_ERRNO(fcntl(pipefd[1], F_SETPIPE_SZ, 1)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "loop-write-zt-nb", loop_write_zero_timeout_nonblock_fiber, + INT_TO_PTR(pipefd[1]), /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_ERROR(sd_future_result(f), EAGAIN); +} + +typedef struct LoopWriteZeroBlockingContext { + int *pipefd; + size_t total; + int order; +} LoopWriteZeroBlockingContext; + +static int loop_write_zero_blocking_writer_fiber(void *userdata) { + LoopWriteZeroBlockingContext *ctx = ASSERT_PTR(userdata); + + ASSERT_EQ(ctx->order, 0); + ctx->order = 1; + + /* timeout == 0 on a *blocking* fd from a fiber: the fast EAGAIN return isn't possible, so + * loop_write_full() takes the fiber path. The reader fiber drains the pipe, letting our + * write complete via fiber suspension/resume. */ + _cleanup_free_ char *big_buf = malloc0(ctx->total); + ASSERT_NOT_NULL(big_buf); + int r = loop_write_full(ctx->pipefd[1], big_buf, ctx->total, /* timeout= */ 0); + + ASSERT_EQ(ctx->order, 2); + return r; +} + +static int loop_write_zero_blocking_reader_fiber(void *userdata) { + LoopWriteZeroBlockingContext *ctx = ASSERT_PTR(userdata); + + ASSERT_EQ(ctx->order, 1); + ctx->order = 2; + + _cleanup_free_ char *buf = malloc(ctx->total); + ASSERT_NOT_NULL(buf); + ssize_t n = loop_read(ctx->pipefd[0], buf, ctx->total, /* do_poll= */ true); + if (n < 0) + return (int) n; + return (int) n; +} + +/* Test: timeout == 0 on a blocking fd from a fiber takes the fiber path (suspends until the peer + * drains) instead of blocking the entire thread. */ +TEST(loop_write_zero_timeout_blocking) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); + ASSERT_OK_ERRNO(fcntl(pipefd[1], F_SETPIPE_SZ, 1)); + + /* F_SETPIPE_SZ rounds up to the kernel's pipe minimum (typically a page); query the actual + * size and write more than that, so the write must wait on the reader regardless of page size. */ + int pipe_sz = fcntl(pipefd[1], F_GETPIPE_SZ); + ASSERT_OK_ERRNO(pipe_sz); + + LoopWriteZeroBlockingContext ctx = { .pipefd = pipefd, .total = (size_t) pipe_sz * 2 }; + + _cleanup_(sd_future_unrefp) sd_future *fw = NULL, *fr = NULL; + ASSERT_OK(sd_fiber_new(e, "loop-write-zt-blk", loop_write_zero_blocking_writer_fiber, + &ctx, /* destroy= */ NULL, &fw)); + ASSERT_OK(sd_future_set_priority(fw, 0)); + ASSERT_OK(sd_fiber_new(e, "loop-read-zt-blk", loop_write_zero_blocking_reader_fiber, + &ctx, /* destroy= */ NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 1)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK(sd_future_result(fw)); + ASSERT_OK_EQ(sd_future_result(fr), (int) ctx.total); +} + +static int loop_read_no_poll_nonblock_fiber(void *userdata) { + int fd = PTR_TO_INT(userdata); + char buf[64]; + + /* Empty non-blocking pipe + do_poll=false: on a fiber loop_read() must take the non-fiber + * path and return -EAGAIN immediately rather than suspending. */ + return (int) loop_read(fd, buf, sizeof(buf), /* do_poll= */ false); +} + +/* Test: do_poll == false on a non-blocking fd from a fiber preserves the "don't wait" semantic + * and returns -EAGAIN when no data is available, instead of suspending the fiber. */ +TEST(loop_read_no_poll_nonblock) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(e, "loop-read-np-nb", loop_read_no_poll_nonblock_fiber, + INT_TO_PTR(pipefd[0]), /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_ERROR(sd_future_result(f), EAGAIN); +} + +typedef struct LoopReadNoPollBlockingContext { + int *pipefd; + const char *data; + size_t len; + int order; +} LoopReadNoPollBlockingContext; + +static int loop_read_no_poll_blocking_reader_fiber(void *userdata) { + LoopReadNoPollBlockingContext *ctx = ASSERT_PTR(userdata); + char buf[64]; + + ASSERT_EQ(ctx->order, 0); + ctx->order = 1; + + /* do_poll == false on a *blocking* fd from a fiber: the fast EAGAIN return isn't possible, + * so loop_read() takes the fiber path and suspends until the writer fiber feeds data. */ + ssize_t n = loop_read(ctx->pipefd[0], buf, sizeof(buf), /* do_poll= */ false); + + ASSERT_EQ(ctx->order, 2); + + if (n < 0) + return (int) n; + if ((size_t) n != ctx->len || memcmp(buf, ctx->data, ctx->len) != 0) + return -EIO; + + return (int) n; +} + +static int loop_read_no_poll_blocking_writer_fiber(void *userdata) { + LoopReadNoPollBlockingContext *ctx = ASSERT_PTR(userdata); + + ASSERT_EQ(ctx->order, 1); + ctx->order = 2; + + int r = loop_write(ctx->pipefd[1], ctx->data, ctx->len); + if (r < 0) + return r; + + ctx->pipefd[1] = safe_close(ctx->pipefd[1]); + return 0; +} + +/* Test: do_poll == false on a blocking fd from a fiber takes the fiber path (suspends until the + * peer feeds data) instead of blocking the entire thread. */ +TEST(loop_read_no_poll_blocking) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); + + static const char payload[] = "no-poll"; + LoopReadNoPollBlockingContext ctx = { + .pipefd = pipefd, + .data = payload, + .len = sizeof(payload) - 1, + }; + + _cleanup_(sd_future_unrefp) sd_future *fr = NULL, *fw = NULL; + ASSERT_OK(sd_fiber_new(e, "loop-read-np-blk", loop_read_no_poll_blocking_reader_fiber, + &ctx, /* destroy= */ NULL, &fr)); + ASSERT_OK(sd_future_set_priority(fr, 0)); + ASSERT_OK(sd_fiber_new(e, "loop-write-np-blk", loop_read_no_poll_blocking_writer_fiber, + &ctx, /* destroy= */ NULL, &fw)); + ASSERT_OK(sd_future_set_priority(fw, 1)); + + ASSERT_OK(sd_event_loop(e)); + ASSERT_OK_EQ(sd_future_result(fr), (int) ctx.len); + ASSERT_OK_ZERO(sd_future_result(fw)); +} + +/* Test: loop_*() helpers transparently fall back to blocking I/O when called outside any + * fiber context. */ +TEST(loop_read_write_fallback) { + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC)); + + ASSERT_OK(loop_write(pipefd[1], "fallback", STRLEN("fallback"))); + + char buf[16]; + ssize_t n = loop_read(pipefd[0], buf, STRLEN("fallback"), /* do_poll= */ true); + ASSERT_OK_EQ(n, (ssize_t) STRLEN("fallback")); + ASSERT_EQ(memcmp(buf, "fallback", STRLEN("fallback")), 0); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); From 036b53c16651f06c4ac234d639ced93ad6221fb8 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 23 Mar 2026 10:15:27 +0100 Subject: [PATCH 238/242] sd-event: suspend instead of blocking when sd_event_run() runs on a fiber MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sd_event_run() blocks the calling thread on the event loop's epoll fd until something happens. When the caller is a fiber, that's the wrong behaviour: blocking the thread also stalls every other fiber and the outer event loop driving them. The most common way to hit this is a fiber that creates its own inner event loop (e.g. a server-style fiber that wants to dispatch its own sources independently of whatever loop the test or supervising fiber is running on) — with the existing implementation the inner sd_event_run() would hold the thread while the outer scheduler should be free to advance other fibers. Add an event_run_suspend() variant in sd-event/event-future.c that performs the same prepare/wait/dispatch dance, but when the fast path finds nothing ready it (a) creates an IO future watching the inner event loop's epoll fd on the *outer* event loop, (b) optionally creates a time future for the timeout, and (c) suspends the fiber. When either future fires the fiber is resumed and the prepare/wait/dispatch sequence runs once more to actually dispatch what's pending. sd_event_run() checks sd_fiber_is_running() and delegates to this variant when on a fiber; profile_delays accounting is intentionally skipped on that path since the underlying prepare/wait/dispatch primitives already account for themselves. --- src/libsystemd/meson.build | 1 + src/libsystemd/sd-event/event-future.c | 71 ++++ src/libsystemd/sd-event/event-future.h | 2 + src/libsystemd/sd-event/event-util.h | 3 + src/libsystemd/sd-event/sd-event.c | 13 +- src/libsystemd/sd-event/test-event-future.c | 358 ++++++++++++++++++++ 6 files changed, 445 insertions(+), 3 deletions(-) create mode 100644 src/libsystemd/sd-event/test-event-future.c diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build index d3b54a2f2c43d..3365869625592 100644 --- a/src/libsystemd/meson.build +++ b/src/libsystemd/meson.build @@ -191,6 +191,7 @@ simple_tests += files( 'sd-bus/test-bus-vtable.c', 'sd-device/test-device-util.c', 'sd-device/test-sd-device-monitor.c', + 'sd-event/test-event-future.c', 'sd-future/test-fiber.c', 'sd-future/test-fiber-io.c', 'sd-future/test-fiber-ops.c', diff --git a/src/libsystemd/sd-event/event-future.c b/src/libsystemd/sd-event/event-future.c index 2902c047c0f37..bd0ec7e5212a7 100644 --- a/src/libsystemd/sd-event/event-future.c +++ b/src/libsystemd/sd-event/event-future.c @@ -6,6 +6,7 @@ #include "alloc-util.h" #include "errno-util.h" #include "event-future.h" +#include "event-util.h" #include "fd-util.h" typedef struct IoFuture { @@ -245,3 +246,73 @@ int future_new_time_relative(sd_event *e, clockid_t clock, uint64_t usec, uint64 *ret = TAKE_PTR(f); return 0; } + +int event_run_suspend(sd_event *e, uint64_t timeout) { + int r; + + assert(e); + assert(sd_fiber_is_running()); + assert(sd_fiber_get_event()); + assert(e != sd_fiber_get_event()); + + /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */ + PROTECT_EVENT(e); + + r = sd_event_prepare(e); + if (r < 0) + return r; + if (r == 0) { + r = sd_event_wait(e, 0); + if (r < 0) + return r; + } + if (r > 0) { + r = sd_event_dispatch(e); + if (r < 0) + return r; + + return 1; + } + + if (timeout == 0) + return 0; + + int fd = sd_event_get_fd(e); + if (fd < 0) + return fd; + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *io = NULL; + r = future_new_io(sd_fiber_get_event(), fd, EPOLLIN, &io); + if (r < 0) + return r; + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *timer = NULL; + if (timeout != USEC_INFINITY) { + r = future_new_time_relative( + sd_fiber_get_event(), + CLOCK_MONOTONIC, + timeout, + /* accuracy= */ 1, + /* result= */ 0, + &timer); + if (r < 0) + return r; + } + + r = sd_fiber_suspend(); + if (r < 0) + return r; + + r = sd_event_prepare(e); + if (r == 0) + r = sd_event_wait(e, 0); + if (r > 0) { + r = sd_event_dispatch(e); + if (r < 0) + return r; + + return 1; + } + + return r; +} diff --git a/src/libsystemd/sd-event/event-future.h b/src/libsystemd/sd-event/event-future.h index 3bc275e7b7ac9..83d5939d6b02d 100644 --- a/src/libsystemd/sd-event/event-future.h +++ b/src/libsystemd/sd-event/event-future.h @@ -6,3 +6,5 @@ int future_new_io(sd_event *e, int fd, uint32_t events, sd_future **ret); int future_new_time(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret); int future_new_time_relative(sd_event *e, clockid_t clock, uint64_t usec, uint64_t accuracy, int result, sd_future **ret); + +int event_run_suspend(sd_event *e, uint64_t timeout); diff --git a/src/libsystemd/sd-event/event-util.h b/src/libsystemd/sd-event/event-util.h index dc3b3ed70ff12..ce213b9c9e4d9 100644 --- a/src/libsystemd/sd-event/event-util.h +++ b/src/libsystemd/sd-event/event-util.h @@ -5,6 +5,9 @@ #include "sd-forward.h" +#define PROTECT_EVENT(e) \ + _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e); + extern const struct hash_ops event_source_hash_ops; int event_reset_time( diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 9e7ba7813cde9..95ecf8fe144b1 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -10,12 +10,15 @@ #include "sd-daemon.h" #include "sd-event.h" +#include "sd-future.h" #include "sd-id128.h" #include "sd-messages.h" #include "alloc-util.h" #include "errno-util.h" +#include "event-future.h" #include "event-source.h" +#include "event-util.h" #include "fd-util.h" #include "format-util.h" #include "glyph-util.h" @@ -474,9 +477,6 @@ _public_ sd_event* sd_event_unref(sd_event *e) { return event_free(e); } -#define PROTECT_EVENT(e) \ - _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e); - _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) { int r; @@ -4943,6 +4943,13 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) { assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); + /* When running on a fiber, delegate to the suspending implementation. Note that the + * profile_delays accounting below is intentionally skipped on that path: the suspending variant + * drives the event loop via sd_event_prepare()/sd_event_wait()/sd_event_dispatch() itself, which + * are the same primitives profile_delays tracks when called directly. */ + if (sd_fiber_is_running()) + return event_run_suspend(e, timeout); + if (e->profile_delays && e->last_run_usec != 0) { usec_t this_run; unsigned l; diff --git a/src/libsystemd/sd-event/test-event-future.c b/src/libsystemd/sd-event/test-event-future.c new file mode 100644 index 0000000000000..754daf0df2fc4 --- /dev/null +++ b/src/libsystemd/sd-event/test-event-future.c @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "sd-event.h" +#include "sd-future.h" + +#include "fd-util.h" +#include "tests.h" +#include "time-util.h" + +static int timer_callback(sd_event_source *s, uint64_t usec, void *userdata) { + int *count = ASSERT_PTR(userdata); + int r; + + (*count)++; + + r = sd_event_source_set_time_relative(s, 5 * USEC_PER_MSEC); + if (r < 0) + return r; + + if (sd_fiber_is_running() && *count >= 3) + return sd_event_exit(sd_event_source_get_event(s), 0); + + return 0; +} + +static int event_run_fiber_func(void *userdata) { + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *inner_timer = NULL; + int r; + + /* Create inner event loop from within the fiber */ + r = sd_event_new(&inner); + if (r < 0) + return r; + + /* Add a timer to the inner event loop that fires every 5ms */ + r = sd_event_add_time_relative(inner, &inner_timer, CLOCK_MONOTONIC, + 5 * USEC_PER_MSEC, 0, timer_callback, + userdata); + if (r < 0) + return r; + + r = sd_event_source_set_enabled(inner_timer, SD_EVENT_ON); + if (r < 0) + return r; + + return sd_event_loop(inner); +} + +TEST(sd_event_loop_fiber) { + /* Create outer event loop for the fiber scheduler */ + _cleanup_(sd_event_unrefp) sd_event *outer = NULL; + ASSERT_OK(sd_event_new(&outer)); + ASSERT_OK(sd_event_set_exit_on_idle(outer, true)); + + /* Add a timer to the outer event loop that fires every 5ms */ + _cleanup_(sd_event_source_unrefp) sd_event_source *outer_timer = NULL; + int outer_timer_count = 0; + ASSERT_OK(sd_event_add_time_relative(outer, &outer_timer, CLOCK_MONOTONIC, + 5 * USEC_PER_MSEC, 0, timer_callback, + &outer_timer_count)); + + /* Create a fiber that will create and run the inner event loop */ + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + int inner_timer_count = 0; + ASSERT_OK(sd_fiber_new(outer, "event-runner", event_run_fiber_func, &inner_timer_count, /* destroy= */ NULL, &f)); + + /* Run the outer event loop */ + ASSERT_OK(sd_event_loop(outer)); + + /* Fiber should have completed successfully */ + ASSERT_OK(sd_future_result(f)); + + /* Both timers should have fired at least once */ + ASSERT_EQ(inner_timer_count, 3); + ASSERT_GT(outer_timer_count, 0); +} + +static int event_run_fiber_timeout_func(void *userdata) { + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + int r; + + /* Create inner event loop from within the fiber */ + r = sd_event_new(&inner); + if (r < 0) + return r; + + /* Run with a short timeout - should timeout since there are no events */ + return sd_event_run(inner, 10 * USEC_PER_MSEC); +} + +TEST(sd_event_run_fiber_timeout) { + /* Create outer event loop for the fiber scheduler */ + _cleanup_(sd_event_unrefp) sd_event *outer = NULL; + ASSERT_OK(sd_event_new(&outer)); + ASSERT_OK(sd_event_set_exit_on_idle(outer, true)); + + /* Create a fiber that will run sd_event_run() with timeout */ + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(outer, "event-timeout", event_run_fiber_timeout_func, NULL, /* destroy= */ NULL, &f)); + + /* Run the outer event loop */ + ASSERT_OK(sd_event_loop(outer)); + + /* Fiber should have completed successfully (timeout returns 0) */ + ASSERT_OK_ZERO(sd_future_result(f)); +} + +/* Test: sd_event_run() with zero timeout returns immediately */ +static int sd_event_run_zero_timeout_fiber(void *userdata) { + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + int r; + + r = sd_event_new(&inner); + if (r < 0) + return r; + + /* With zero timeout on an empty event loop, should return 0 immediately */ + r = sd_event_run(inner, 0); + if (r != 0) + return r < 0 ? r : -EIO; + + return 0; +} + +TEST(sd_event_run_zero_timeout) { + _cleanup_(sd_event_unrefp) sd_event *outer = NULL; + ASSERT_OK(sd_event_new(&outer)); + ASSERT_OK(sd_event_set_exit_on_idle(outer, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(outer, "run-suspend-zero", sd_event_run_zero_timeout_fiber, NULL, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(outer)); + ASSERT_OK_ZERO(sd_future_result(f)); +} + +/* Test: sd_event_run() dispatches immediately pending IO */ +static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + int *counter = ASSERT_PTR(userdata); + char buf[64]; + + (*counter)++; + + /* Drain the fd */ + (void) read(fd, buf, sizeof(buf)); + + return sd_event_exit(sd_event_source_get_event(s), 0); +} + +static int sd_event_run_immediate_fiber(void *userdata) { + int *pipefd = ASSERT_PTR(userdata); + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL; + int counter = 0, r; + + r = sd_event_new(&inner); + if (r < 0) + return r; + + /* Add IO source watching the read end of the pipe */ + r = sd_event_add_io(inner, &source, pipefd[0], EPOLLIN, io_callback, &counter); + if (r < 0) + return r; + + /* Data is already available on the pipe (written before fiber started), so + * sd_event_run() should dispatch immediately without suspending */ + r = sd_event_run(inner, USEC_INFINITY); + if (r < 0) + return r; + + /* The IO callback should have fired */ + if (counter != 1) + return -EIO; + + return 0; +} + +TEST(sd_event_run_immediate) { + _cleanup_(sd_event_unrefp) sd_event *outer = NULL; + ASSERT_OK(sd_event_new(&outer)); + ASSERT_OK(sd_event_set_exit_on_idle(outer, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + /* Write data before starting the fiber so it's immediately available */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "X", 1), 1); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(outer, "run-suspend-immediate", sd_event_run_immediate_fiber, pipefd, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(outer)); + ASSERT_OK_ZERO(sd_future_result(f)); +} + +/* Test: sd_event_run() with IO arriving during suspension */ +static int sd_event_run_io_fiber(void *userdata) { + int *pipefd = ASSERT_PTR(userdata); + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL; + int counter = 0, r; + + r = sd_event_new(&inner); + if (r < 0) + return r; + + r = sd_event_add_io(inner, &source, pipefd[0], EPOLLIN, io_callback, &counter); + if (r < 0) + return r; + + /* No data available yet, so this will suspend the fiber until IO arrives */ + r = sd_event_run(inner, USEC_INFINITY); + if (r < 0) + return r; + + if (counter != 1) + return -EIO; + + return 0; +} + +TEST(sd_event_run_io) { + _cleanup_(sd_event_unrefp) sd_event *outer = NULL; + ASSERT_OK(sd_event_new(&outer)); + ASSERT_OK(sd_event_set_exit_on_idle(outer, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(outer, "run-suspend-io", sd_event_run_io_fiber, pipefd, /* destroy= */ NULL, &f)); + + /* First iteration: fiber runs, adds IO source, suspends because no data */ + ASSERT_OK_POSITIVE(sd_event_run(outer, 0)); + + /* Write data to the pipe to wake the inner event loop */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "Y", 1), 1); + + /* Complete: fiber resumes, dispatches IO, finishes */ + ASSERT_OK(sd_event_loop(outer)); + ASSERT_OK_ZERO(sd_future_result(f)); +} + +/* Test: event_run called in a loop keeps event loop state consistent. + * This is a regression test for a bug where error paths after sd_event_prepare() + * could leave the inner event loop stuck in SD_EVENT_ARMED state. */ +static int sd_event_run_loop_fiber(void *userdata) { + int *pipefd = ASSERT_PTR(userdata); + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL; + int counter = 0, r; + + r = sd_event_new(&inner); + if (r < 0) + return r; + + r = sd_event_add_io(inner, &source, pipefd[0], EPOLLIN, io_callback, &counter); + if (r < 0) + return r; + + /* Call sd_event_run() multiple times with short timeouts. + * Each call should leave the inner event loop in a clean state for the next call. */ + for (int i = 0; i < 5; i++) { + r = sd_event_run(inner, 10 * USEC_PER_MSEC); + if (r < 0) + return r; + if (r > 0) + break; + } + + /* After multiple timeouts, the event loop should still be usable. + * Write data and do one more run to verify. */ + if (counter == 0) { + /* Data wasn't written yet, do a final run with longer timeout */ + r = sd_event_run(inner, USEC_INFINITY); + if (r < 0) + return r; + } + + if (counter != 1) + return -EIO; + + return 0; +} + +TEST(sd_event_run_loop) { + _cleanup_(sd_event_unrefp) sd_event *outer = NULL; + ASSERT_OK(sd_event_new(&outer)); + ASSERT_OK(sd_event_set_exit_on_idle(outer, true)); + + _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR; + ASSERT_OK_ERRNO(pipe2(pipefd, O_CLOEXEC | O_NONBLOCK)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(outer, "run-suspend-loop", sd_event_run_loop_fiber, pipefd, /* destroy= */ NULL, &f)); + + /* Let the fiber run through a few timeout iterations */ + for (int i = 0; i < 10; i++) + ASSERT_OK(sd_event_run(outer, 50 * USEC_PER_MSEC)); + + /* Write data to unblock the fiber */ + ASSERT_OK_EQ_ERRNO(write(pipefd[1], "Z", 1), 1); + + ASSERT_OK(sd_event_loop(outer)); + ASSERT_OK_ZERO(sd_future_result(f)); +} + +/* Test: sd_event_run() with an inner timer that fires during suspension */ +static int inner_timer_handler(sd_event_source *s, uint64_t usec, void *userdata) { + int *counter = ASSERT_PTR(userdata); + (*counter)++; + return sd_event_exit(sd_event_source_get_event(s), 0); +} + +static int sd_event_run_timer_fiber(void *userdata) { + _cleanup_(sd_event_unrefp) sd_event *inner = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL; + int counter = 0, r; + + r = sd_event_new(&inner); + if (r < 0) + return r; + + /* Add a timer that fires after 10ms */ + r = sd_event_add_time_relative(inner, &source, CLOCK_MONOTONIC, + 10 * USEC_PER_MSEC, 0, inner_timer_handler, + &counter); + if (r < 0) + return r; + + /* Should suspend, then resume when the timer fires */ + r = sd_event_run(inner, USEC_INFINITY); + if (r < 0) + return r; + + if (counter != 1) + return -EIO; + + return 0; +} + +TEST(sd_event_run_timer) { + _cleanup_(sd_event_unrefp) sd_event *outer = NULL; + ASSERT_OK(sd_event_new(&outer)); + ASSERT_OK(sd_event_set_exit_on_idle(outer, true)); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + ASSERT_OK(sd_fiber_new(outer, "run-suspend-timer", sd_event_run_timer_fiber, NULL, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(outer)); + ASSERT_OK_ZERO(sd_future_result(f)); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); From e9c49dd1e612f8a176751a33f1ff182816252d5f Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 11 May 2026 16:27:34 +0200 Subject: [PATCH 239/242] sd-bus: make sd-bus fiber-aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes to teach sd-bus how to behave when called from a fiber, in order of increasing depth: 2. sd_bus_call() now redirects to a new bus_call_suspend() helper when the caller is a fiber whose event loop is the same one the bus is attached to. The plain bus_poll() path serializes all bus traffic on the slot's reply (only one method call can be in flight per sd_bus*), which would defeat the point of running multiple fibers against one bus. bus_call_suspend() builds on the async sd-bus API: it wraps the call in a new BusFuture (sd-bus/bus-future.{c,h}) that resolves when the reply or method-error arrives, lets the fiber await that future, and surfaces the reply to the caller via future_get_bus_reply(). Because the futures live on the event loop rather than a per-bus slot, multiple fibers can drive concurrent method calls against the same bus. 3. A new private SD_BUS_VTABLE_METHOD_FIBER flag dispatches a vtable method handler on its own fiber, so handlers are free to use sd_bus_call() against the same bus, sd_fiber_sleep(), loop_read(), etc. without stalling the event loop for other connections or handlers. The flag stays out of sd-bus-vtable.h (its bit value is reserved there to prevent collisions) — the fiber runtime is a systemd-internal implementation detail. Lifecycle of fiber-dispatched handlers is tracked on the bus itself: a new bus->fiber_futures set holds a ref to each in-flight handler. bus_enter_closing() cancels every entry and process_closing() returns with the bus still in CLOSING state until the set drains, so we can be sure no fiber handler outlives the bus. bus_fiber_resolved() removes the entry on completion. bus_free()'s assert(set_isempty()) makes the invariant load-bearing. Note that plain sd_bus_call() already works correctly on a fiber as it calls ppoll_usec() which has already been modified to suspend when running on a fiber. To exercise these changes the existing thread-based client/server sd-bus tests (test-bus-chat, test-bus-objects, test-bus-peersockaddr, test-bus-server, test-bus-watch-bind) are migrated to fibers, and a new test-bus-fiber is added that covers SD_BUS_VTABLE_METHOD_FIBER — including handlers that issue nested sd_bus_call() on the same bus, the cancel-on-close path, and concurrent dispatches across multiple fibers. --- src/libsystemd/meson.build | 2 + src/libsystemd/sd-bus/bus-future.c | 124 +++++++++++ src/libsystemd/sd-bus/bus-future.h | 14 ++ src/libsystemd/sd-bus/bus-internal.h | 13 ++ src/libsystemd/sd-bus/bus-objects.c | 111 ++++++++++ src/libsystemd/sd-bus/sd-bus.c | 50 ++++- src/libsystemd/sd-bus/test-bus-chat.c | 51 ++--- src/libsystemd/sd-bus/test-bus-fiber.c | 194 ++++++++++++++++++ src/libsystemd/sd-bus/test-bus-objects.c | 62 ++---- src/libsystemd/sd-bus/test-bus-peersockaddr.c | 33 +-- src/libsystemd/sd-bus/test-bus-server.c | 48 +++-- src/libsystemd/sd-bus/test-bus-watch-bind.c | 102 +++++---- src/systemd/sd-bus-vtable.h | 1 + 13 files changed, 653 insertions(+), 152 deletions(-) create mode 100644 src/libsystemd/sd-bus/bus-future.c create mode 100644 src/libsystemd/sd-bus/bus-future.h create mode 100644 src/libsystemd/sd-bus/test-bus-fiber.c diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build index 3365869625592..0fa3847a20748 100644 --- a/src/libsystemd/meson.build +++ b/src/libsystemd/meson.build @@ -49,6 +49,7 @@ sd_bus_sources = files( 'sd-bus/bus-dump.c', 'sd-bus/bus-dump-json.c', 'sd-bus/bus-error.c', + 'sd-bus/bus-future.c', 'sd-bus/bus-internal.c', 'sd-bus/bus-introspect.c', 'sd-bus/bus-kernel.c', @@ -186,6 +187,7 @@ libsystemd_pc = custom_target( simple_tests += files( 'sd-bus/test-bus-creds.c', + 'sd-bus/test-bus-fiber.c', 'sd-bus/test-bus-introspect.c', 'sd-bus/test-bus-match.c', 'sd-bus/test-bus-vtable.c', diff --git a/src/libsystemd/sd-bus/bus-future.c b/src/libsystemd/sd-bus/bus-future.c new file mode 100644 index 0000000000000..d2a0cba81f0a9 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-future.c @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-bus.h" +#include "sd-future.h" + +#include "alloc-util.h" +#include "bus-future.h" +#include "bus-internal.h" +#include "bus-message.h" + +typedef struct BusFuture { + sd_bus_slot *slot; + sd_bus_message *reply; +} BusFuture; + +static void* bus_future_alloc(void) { + return new0(BusFuture, 1); +} + +static void bus_future_free(sd_future *f) { + BusFuture *bf = sd_future_get_private(f); + sd_bus_slot_unref(bf->slot); + sd_bus_message_unref(bf->reply); + free(bf); +} + +static int bus_future_cancel(sd_future *f) { + BusFuture *bf = sd_future_get_private(ASSERT_PTR(f)); + + bf->slot = sd_bus_slot_unref(bf->slot); + return sd_future_resolve(f, -ECANCELED); +} + +static const sd_future_ops bus_future_ops = { + .size = sizeof(sd_future_ops), + .alloc = bus_future_alloc, + .free = bus_future_free, + .cancel = bus_future_cancel, +}; + +static int bus_future_handler(sd_bus_message *m, void *userdata, sd_bus_error *reterr_error) { + sd_future *f = ASSERT_PTR(userdata); + BusFuture *bf = sd_future_get_private(f); + + /* Resolve with 0 on any reply (including error replies). The reply itself carries + * success/error information via future_get_bus_reply(); the future's resolution result is + * reserved for cancellation (-ECANCELED), so callers can distinguish "got a reply" from + * "no reply will arrive". */ + bf->slot = sd_bus_slot_unref(bf->slot); + bf->reply = sd_bus_message_ref(m); + return sd_future_resolve(f, 0); +} + +int bus_call_future(sd_bus *bus, sd_bus_message *m, uint64_t usec, sd_future **ret) { + int r; + + assert(bus); + assert(m); + assert(ret); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_future_new(&bus_future_ops, &f); + if (r < 0) + return r; + + BusFuture *bf = sd_future_get_private(f); + + r = sd_bus_call_async(bus, &bf->slot, m, bus_future_handler, f, usec); + if (r < 0) + return r; + + *ret = TAKE_PTR(f); + return 0; +} + +int future_get_bus_reply(sd_future *f, sd_bus_error *reterr_error, sd_bus_message **ret_reply) { + BusFuture *bf = sd_future_get_private(ASSERT_PTR(f)); + sd_bus_message *reply = ASSERT_PTR(bf->reply); + + assert(sd_future_get_ops(f) == &bus_future_ops); + assert(sd_future_state(f) == SD_FUTURE_RESOLVED); + + if (sd_bus_message_is_method_error(reply, NULL)) { + if (reterr_error) + return sd_bus_error_copy(reterr_error, sd_bus_message_get_error(reply)); + return -sd_bus_message_get_errno(reply); + } + + if (reply->n_fds > 0 && !sd_bus_message_get_bus(reply)->accept_fd) + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INCONSISTENT_MESSAGE, + "Reply message contained file descriptors which I couldn't accept. Sorry."); + + if (reterr_error) + *reterr_error = SD_BUS_ERROR_NULL; + if (ret_reply) + *ret_reply = sd_bus_message_ref(reply); + + return 1; +} + +int bus_call_suspend( + sd_bus *bus, + sd_bus_message *m, + uint64_t usec, + sd_bus_error *reterr_error, + sd_bus_message **ret_reply) { + + int r; + + assert(bus); + assert(m); + assert(sd_fiber_is_running()); + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *call = NULL; + r = bus_call_future(bus, m, usec, &call); + if (r < 0) + return sd_bus_error_set_errno(reterr_error, r); + + r = sd_fiber_suspend(); + if (r < 0) + return sd_bus_error_set_errno(reterr_error, r); + + return future_get_bus_reply(call, reterr_error, ret_reply); +} diff --git a/src/libsystemd/sd-bus/bus-future.h b/src/libsystemd/sd-bus/bus-future.h new file mode 100644 index 0000000000000..ec9bd80b1598a --- /dev/null +++ b/src/libsystemd/sd-bus/bus-future.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-forward.h" + +int bus_call_future(sd_bus *bus, sd_bus_message *m, uint64_t usec, sd_future **ret); +int future_get_bus_reply(sd_future *f, sd_bus_error *reterr_error, sd_bus_message **ret_reply); + +int bus_call_suspend( + sd_bus *bus, + sd_bus_message *m, + uint64_t usec, + sd_bus_error *reterr_error, + sd_bus_message **ret_reply); diff --git a/src/libsystemd/sd-bus/bus-internal.h b/src/libsystemd/sd-bus/bus-internal.h index 19a3b67d12f6a..3a52f738d6bd7 100644 --- a/src/libsystemd/sd-bus/bus-internal.h +++ b/src/libsystemd/sd-bus/bus-internal.h @@ -17,6 +17,13 @@ #define DEFAULT_SYSTEM_BUS_ADDRESS "unix:path=/run/dbus/system_bus_socket" #define DEFAULT_USER_BUS_ADDRESS_FMT "unix:path=%s/bus" +/* Private vtable flag: dispatch the method handler on its own fiber, so it can use suspending + * primitives (sd_bus_call() on a fiber, sd_fiber_sleep(), loop_read_suspend(), ...) without + * blocking the event loop for other connections or method calls. Kept out of the public + * sd-bus-vtable.h so the fiber runtime stays an implementation detail of systemd. The bit value is + * reserved in sd-bus-vtable.h to make sure it never collides with a future public flag. */ +#define SD_BUS_VTABLE_METHOD_FIBER (UINT64_C(1) << 10) + typedef struct BusReplyCallback { sd_bus_message_handler_t callback; usec_t timeout_usec; /* this is a relative timeout until we reach the BUS_HELLO state, and an absolute one right after */ @@ -222,6 +229,12 @@ typedef struct sd_bus { Set *vtable_methods; Set *vtable_properties; + /* Futures for outstanding SD_BUS_VTABLE_METHOD_FIBER dispatches. Entries are added as the + * dispatcher spawns each fiber and removed when the fiber resolves. On bus_enter_closing() + * we cancel everything in here and then wait in process_closing() until the set drains, + * before tearing down the rest of the bus. */ + Set *fiber_futures; + union sockaddr_union sockaddr; socklen_t sockaddr_size; diff --git a/src/libsystemd/sd-bus/bus-objects.c b/src/libsystemd/sd-bus/bus-objects.c index 83ba3a523992b..76948f0cb489a 100644 --- a/src/libsystemd/sd-bus/bus-objects.c +++ b/src/libsystemd/sd-bus/bus-objects.c @@ -3,6 +3,7 @@ #include #include "sd-bus.h" +#include "sd-future.h" #include "alloc-util.h" #include "bus-internal.h" @@ -337,6 +338,69 @@ static int check_access(sd_bus *bus, sd_bus_message *m, BusVTableMember *c, sd_b return sd_bus_error_setf(reterr_error, SD_BUS_ERROR_ACCESS_DENIED, "Access to %s.%s() not permitted.", c->interface, c->member); } +typedef struct BusFiberData { + sd_bus *bus; + sd_bus_message *message; + sd_bus_slot *slot; + sd_bus_message_handler_t handler; + void *userdata; +} BusFiberData; + +static BusFiberData* bus_fiber_data_free(BusFiberData *d) { + if (!d) + return NULL; + + sd_bus_slot_unref(d->slot); + sd_bus_message_unref(d->message); + sd_bus_unref(d->bus); + return mfree(d); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(BusFiberData*, bus_fiber_data_free); + +static void bus_fiber_data_destroy(void *userdata) { + bus_fiber_data_free(userdata); +} + +static void bus_fiber_future_unref(void *p) { + sd_future_unref(p); +} + +DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR( + bus_fiber_future_hash_ops, + void, + trivial_hash_func, + trivial_compare_func, + bus_fiber_future_unref); + +static int bus_fiber_resolved(sd_future *f) { + sd_bus *bus = ASSERT_PTR(sd_future_get_userdata(f)); + + /* Remove the future from the bus' tracking set. set_remove() calls sd_future_unref() via the + * hash_ops destructor; fiber_run() holds an extra ref across the resolve path so the future + * itself isn't freed mid-resolution even if our ref was the last one. */ + assert_se(set_remove(bus->fiber_futures, f) == f); + return 0; +} + +static int bus_fiber_entry(void *userdata) { + BusFiberData *d = ASSERT_PTR(userdata); + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + /* Note: unlike the synchronous dispatch path, we deliberately do NOT set + * bus->current_slot/handler/userdata around the callback. Those fields track the slot of the + * message currently being dispatched inline and must be NULL at each entry into + * bus_process_internal(). Because a fiber handler can yield and let the event loop dispatch + * other messages before it resumes, leaving current_slot non-NULL across yields would trip + * that invariant. Fiber handlers receive their slot's userdata via the handler argument, so + * sd_bus_get_current_slot()/handler()/userdata() simply aren't meaningful inside them — the + * handler should use the message/userdata parameters directly instead. */ + r = d->handler(d->message, d->userdata, &error); + + return bus_maybe_reply_error(d->message, r, &error); +} + static int method_callbacks_run( sd_bus *bus, sd_bus_message *m, @@ -407,6 +471,53 @@ static int method_callbacks_run( slot = container_of(c->parent, sd_bus_slot, node_vtable); + if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_METHOD_FIBER)) { + /* A fiber-dispatched method requires an event loop to spawn the fiber on. + * By the time a method call actually arrives the bus is running, so the + * event loop should already be attached — if not, the caller set up the bus + * wrong and there's no meaningful recovery. */ + assert(bus->event); + + _cleanup_(bus_fiber_data_freep) BusFiberData *d = new(BusFiberData, 1); + if (!d) + return -ENOMEM; + + *d = (BusFiberData) { + .bus = sd_bus_ref(bus), + .message = sd_bus_message_ref(m), + .slot = sd_bus_slot_ref(slot), + .handler = c->vtable->x.method.handler, + .userdata = u, + }; + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_fiber_new(bus->event, c->member, bus_fiber_entry, d, bus_fiber_data_destroy, &f); + if (r < 0) + return bus_maybe_reply_error(m, r, NULL); + + /* The fiber now owns d via bus_fiber_data_destroy. Drop our cleanup before any + * further fallible calls, so a later failure unwinding f doesn't double-free d. */ + TAKE_PTR(d); + + r = set_ensure_put(&bus->fiber_futures, &bus_fiber_future_hash_ops, f); + if (r < 0) + return bus_maybe_reply_error(m, r, NULL); + assert(r > 0); + + /* Track the future on the bus so shutdown can cancel it and wait for it. */ + r = sd_future_set_callback(f, bus_fiber_resolved, bus); + if (r < 0) { + /* TAKE_PTR(f) hasn't run yet, so our cleanup attribute still owns the + * ref; set_remove() returns the raw pointer without firing the hash_ops + * destructor, and the cleanup will unref f on return. */ + assert_se(set_remove(bus->fiber_futures, f) == f); + return bus_maybe_reply_error(m, r, NULL); + } + + TAKE_PTR(f); + return 1; + } + bus->current_slot = sd_bus_slot_ref(slot); bus->current_handler = c->vtable->x.method.handler; bus->current_userdata = u; diff --git a/src/libsystemd/sd-bus/sd-bus.c b/src/libsystemd/sd-bus/sd-bus.c index 27f788d995576..e44c439fad862 100644 --- a/src/libsystemd/sd-bus/sd-bus.c +++ b/src/libsystemd/sd-bus/sd-bus.c @@ -10,12 +10,14 @@ #include "sd-bus.h" #include "sd-event.h" +#include "sd-future.h" #include "af-list.h" #include "alloc-util.h" #include "bus-container.h" #include "bus-control.h" #include "bus-error.h" +#include "bus-future.h" #include "bus-internal.h" #include "bus-kernel.h" #include "bus-label.h" @@ -222,6 +224,12 @@ static sd_bus* bus_free(sd_bus *b) { ordered_hashmap_free(b->reply_callbacks); prioq_free(b->reply_callbacks_prioq); + /* Outstanding fiber handlers pin the bus via their BusFiberData ref, so by the time refcount + * reaches zero and bus_free() runs, every fiber has already resolved and removed itself from + * this set. */ + assert(set_isempty(b->fiber_futures)); + set_free(b->fiber_futures); + assert(b->match_callbacks.type == BUS_MATCH_ROOT); bus_match_free(&b->match_callbacks); @@ -1809,6 +1817,9 @@ _public_ sd_bus* sd_bus_flush_close_unref(sd_bus *bus) { } void bus_enter_closing(sd_bus *bus, int exit_code) { + sd_future *f; + int r; + assert(bus); if (!IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING, BUS_HELLO, BUS_RUNNING)) @@ -1816,6 +1827,19 @@ void bus_enter_closing(sd_bus *bus, int exit_code) { bus_set_state(bus, BUS_CLOSING); bus->exit_code = exit_code; + + /* Cancel all outstanding fiber-dispatched method handlers. Most cancellations are scheduled + * asynchronously (fibers resolve with -ECANCELED the next time they run), but a fiber still + * in FIBER_STATE_INITIAL resolves synchronously, which fires bus_fiber_resolved() and + * removes f from this set mid-iteration. That's safe because SET_FOREACH permits removal of + * exactly the current entry — see the assertion in hashmap_iterate_entry(). Either way this + * doesn't block here: process_closing() waits for the fiber_futures set to drain before it + * continues tearing down the rest of the bus. */ + SET_FOREACH(f, bus->fiber_futures) { + r = sd_future_cancel(f); + if (r < 0) + log_debug_errno(r, "Failed to cancel outstanding fiber method handler, ignoring: %m"); + } } /* Define manually so we can add the PID check */ @@ -2388,23 +2412,30 @@ _public_ int sd_bus_call( sd_bus_error *reterr_error, sd_bus_message **ret_reply) { - _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m); usec_t timeout; uint64_t cookie; size_t i; int r; - bus_assert_return(m, -EINVAL, reterr_error); - bus_assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL, reterr_error); - bus_assert_return(!(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED), -EINVAL, reterr_error); + bus_assert_return(_m, -EINVAL, reterr_error); + bus_assert_return(_m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL, reterr_error); + bus_assert_return(!(_m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED), -EINVAL, reterr_error); bus_assert_return(!bus_error_is_dirty(reterr_error), -EINVAL, reterr_error); if (bus) assert_return(bus = bus_resolve(bus), -ENOPKG); else - assert_return(bus = m->bus, -ENOTCONN); + assert_return(bus = _m->bus, -ENOTCONN); bus_assert_return(!bus_origin_changed(bus), -ECHILD, reterr_error); + /* If the current fiber and the bus share their event loop, we can use sd_bus_call_suspend() + * instead which does an async method call. This allows multiple invocations of sd_bus_call() to + * happen across multiple fibers at once. */ + if (sd_fiber_is_running() && bus->event == sd_fiber_get_event()) + return bus_call_suspend(bus, _m, usec, reterr_error, ret_reply); + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m); + if (!BUS_IS_OPEN(bus->state)) { r = -ENOTCONN; goto fail; @@ -3177,7 +3208,14 @@ static int process_closing(sd_bus *bus, sd_bus_message **ret) { assert(bus); assert(bus->state == BUS_CLOSING); - /* First, fail all outstanding method calls */ + /* Wait for any still-running fiber method handlers to finish unwinding their cancellation + * before tearing down the rest of the bus. bus_enter_closing() scheduled the cancel; each + * fiber resolves asynchronously and bus_fiber_resolved() removes it from the set. Returning + * 1 here keeps the bus in CLOSING state so the event loop drives the fibers to completion. */ + if (!set_isempty(bus->fiber_futures)) + return 1; + + /* Then, fail all outstanding method calls */ c = ordered_hashmap_first(bus->reply_callbacks); if (c) return process_closing_reply_callback(bus, c); diff --git a/src/libsystemd/sd-bus/test-bus-chat.c b/src/libsystemd/sd-bus/test-bus-chat.c index 1f358ccd3396e..d6f4860c41511 100644 --- a/src/libsystemd/sd-bus/test-bus-chat.c +++ b/src/libsystemd/sd-bus/test-bus-chat.c @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include -#include #include #include #include "sd-bus.h" +#include "sd-future.h" #include "alloc-util.h" #include "bus-error.h" @@ -102,7 +102,8 @@ static int server_init(sd_bus **ret) { return 0; } -static int server(sd_bus *bus) { +static int server(void *userdata) { + sd_bus *bus = ASSERT_PTR(userdata); bool client1_gone = false, client2_gone = false; int r; @@ -178,7 +179,9 @@ static int server(sd_bus *bus) { client2_gone = true; } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Slow")) { - sleep(1); + r = sd_fiber_sleep(1 * USEC_PER_SEC); + if (r < 0) + return r; r = sd_bus_reply_method_return(m, NULL); if (r < 0) @@ -194,10 +197,10 @@ static int server(sd_bus *bus) { log_info("Received fd=%d", fd); - if (write(fd, &x, 1) < 0) { - r = log_error_errno(errno, "Failed to write to fd: %m"); + ssize_t n = sd_fiber_write(fd, &x, 1); + if (n < 0) { safe_close(fd); - return r; + return log_error_errno(n, "Failed to write to fd: %m"); } r = sd_bus_reply_method_return(m, NULL); @@ -217,7 +220,7 @@ static int server(sd_bus *bus) { return 0; } -static void* client1(void *p) { +static int client1(void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -277,9 +280,9 @@ static void* client1(void *p) { goto finish; } - errno = 0; - if (read(pp[0], &x, 1) <= 0) { - log_error("Failed to read from pipe: %s", STRERROR_OR_EOF(errno)); + ssize_t n = sd_fiber_read(pp[0], &x, 1); + if (n <= 0) { + log_error("Failed to read from pipe: %s", STRERROR_OR_EOF(n)); goto finish; } @@ -303,7 +306,7 @@ static void* client1(void *p) { } - return INT_TO_PTR(r); + return r; } static int quit_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { @@ -315,7 +318,7 @@ static int quit_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_er return 1; } -static void* client2(void *p) { +static int client2(void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -494,7 +497,7 @@ static void* client2(void *p) { (void) sd_bus_send(bus, q, NULL); } - return INT_TO_PTR(r); + return r; } static ino_t get_inode(int fd) { @@ -626,9 +629,9 @@ TEST(ctrunc) { } TEST(chat) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f_server = NULL, *f_client1 = NULL, *f_client2 = NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - pthread_t c1, c2; - void *p; int r; test_setup_logging(LOG_INFO); @@ -639,16 +642,18 @@ TEST(chat) { log_info("Initialized..."); - ASSERT_OK(-pthread_create(&c1, NULL, client1, NULL)); - ASSERT_OK(-pthread_create(&c2, NULL, client2, NULL)); + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + ASSERT_OK(sd_fiber_new(e, "client-1", client1, NULL, /* destroy= */ NULL, &f_client1)); + ASSERT_OK(sd_fiber_new(e, "client-2", client2, NULL, /* destroy= */ NULL, &f_client2)); + ASSERT_OK(sd_fiber_new(e, "server", server, bus, /* destroy= */ NULL, &f_server)); - r = server(bus); + ASSERT_OK(sd_event_loop(e)); - ASSERT_OK(-pthread_join(c1, &p)); - ASSERT_OK(PTR_TO_INT(p)); - ASSERT_OK(-pthread_join(c2, &p)); - ASSERT_OK(PTR_TO_INT(p)); - ASSERT_OK(r); + ASSERT_OK(sd_future_result(f_client1)); + ASSERT_OK(sd_future_result(f_client2)); + ASSERT_OK(sd_future_result(f_server)); } DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-bus/test-bus-fiber.c b/src/libsystemd/sd-bus/test-bus-fiber.c new file mode 100644 index 0000000000000..3c3509f2ca2d4 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-fiber.c @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "sd-bus.h" +#include "sd-event.h" +#include "sd-future.h" + +#include "bus-internal.h" +#include "tests.h" +#include "time-util.h" + +typedef struct Context { + /* Counters for the concurrency check: every Concurrent invocation bumps in_flight on entry + * and drops it on exit, and tracks the maximum observed concurrency. If fiber dispatch + * works, two overlapping client calls must both be inside the handler at the same time, + * giving a max of at least 2. */ + int in_flight; + int max_in_flight; +} Context; + +static int method_concurrent(sd_bus_message *m, void *userdata, sd_bus_error *reterr_error) { + Context *c = ASSERT_PTR(userdata); + + ASSERT_OK_POSITIVE(sd_fiber_is_running()); + + c->in_flight++; + if (c->in_flight > c->max_in_flight) + c->max_in_flight = c->in_flight; + + ASSERT_OK(sd_fiber_sleep(10 * USEC_PER_MSEC)); + + c->in_flight--; + + return sd_bus_reply_method_return(m, NULL); +} + +static int method_fail_errno(sd_bus_message *m, void *userdata, sd_bus_error *reterr_error) { + ASSERT_OK_POSITIVE(sd_fiber_is_running()); + + /* Yielding first exercises the deferred-error path in the fiber entry: the handler returns + * a negative errno after suspending, and bus_maybe_reply_error() must still turn that into + * a matching sd_bus error reply. */ + ASSERT_OK(sd_fiber_sleep(1 * USEC_PER_MSEC)); + + return -EACCES; +} + +static int method_fail_error(sd_bus_message *m, void *userdata, sd_bus_error *reterr_error) { + ASSERT_OK_POSITIVE(sd_fiber_is_running()); + + ASSERT_OK(sd_fiber_sleep(1 * USEC_PER_MSEC)); + + return sd_bus_error_set(reterr_error, SD_BUS_ERROR_INVALID_ARGS, "bad arguments from fiber"); +} + +static const sd_bus_vtable vtable[] = { + SD_BUS_VTABLE_START(0), + SD_BUS_METHOD("Concurrent", NULL, NULL, method_concurrent, + SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_METHOD_FIBER), + SD_BUS_METHOD("FailErrno", NULL, NULL, method_fail_errno, + SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_METHOD_FIBER), + SD_BUS_METHOD("FailError", NULL, NULL, method_fail_error, + SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_METHOD_FIBER), + SD_BUS_VTABLE_END, +}; + +typedef struct Setup { + int fds[2]; + Context *c; +} Setup; + +static int attach_pair(Setup *s, sd_bus **ret_server, sd_bus **ret_client) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *server = NULL, *client = NULL; + sd_id128_t id; + + assert(ret_server); + assert(ret_client); + + ASSERT_OK(sd_id128_randomize(&id)); + ASSERT_OK(sd_bus_new(&server)); + ASSERT_OK(sd_bus_set_description(server, "server")); + ASSERT_OK(sd_bus_set_fd(server, s->fds[0], s->fds[0])); + ASSERT_OK(sd_bus_set_server(server, true, id)); + ASSERT_OK(sd_bus_attach_event(server, sd_fiber_get_event(), 0)); + ASSERT_OK(sd_bus_add_object_vtable(server, NULL, "/test", "test.Fiber", vtable, s->c)); + ASSERT_OK(sd_bus_start(server)); + + ASSERT_OK(sd_bus_new(&client)); + ASSERT_OK(sd_bus_set_description(client, "client")); + ASSERT_OK(sd_bus_set_fd(client, s->fds[1], s->fds[1])); + ASSERT_OK(sd_bus_attach_event(client, sd_fiber_get_event(), 0)); + ASSERT_OK(sd_bus_start(client)); + + *ret_server = TAKE_PTR(server); + *ret_client = TAKE_PTR(client); + return 0; +} + +static int call_concurrent_fiber(void *userdata) { + sd_bus *client = ASSERT_PTR(userdata); + + /* A plain suspending sd_bus_call() — on a fiber this goes through sd_bus_call_suspend() + * which multiplexes onto the single client connection, so multiple caller fibers can have + * calls in flight at the same time. */ + return sd_bus_call_method(client, NULL, "/test", "test.Fiber", "Concurrent", + NULL, NULL, NULL); +} + +static int concurrency_fiber(void *userdata) { + Setup *s = ASSERT_PTR(userdata); + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *server = NULL, *client = NULL; + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *f_a = NULL, *f_b = NULL; + + ASSERT_OK(attach_pair(s, &server, &client)); + + /* Two concurrent calls on the shared client bus. Each lands in method_concurrent which + * sleeps 10ms; if fiber dispatch works the second is entered while the first is suspended, + * so max_in_flight on the context reaches 2. */ + ASSERT_OK(sd_fiber_new(sd_fiber_get_event(), "call-a", call_concurrent_fiber, client, + /* destroy= */ NULL, &f_a)); + ASSERT_OK(sd_fiber_new(sd_fiber_get_event(), "call-b", call_concurrent_fiber, client, + /* destroy= */ NULL, &f_b)); + + ASSERT_OK(sd_fiber_await(f_a)); + ASSERT_OK(sd_fiber_await(f_b)); + + ASSERT_OK(sd_future_result(f_a)); + ASSERT_OK(sd_future_result(f_b)); + return 0; +} + +TEST(fiber_method_concurrency) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + Context c = {}; + Setup s = { .c = &c }; + + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM, 0, s.fds)); + + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + ASSERT_OK(sd_fiber_new(e, "concurrency", concurrency_fiber, &s, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK(sd_future_result(f)); + ASSERT_GE(c.max_in_flight, 2); +} + +static int errors_fiber(void *userdata) { + Setup *s = ASSERT_PTR(userdata); + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *server = NULL, *client = NULL; + + ASSERT_OK(attach_pair(s, &server, &client)); + + /* A fiber handler that returns a negative errno gets turned into a matching sd_bus error + * reply (bus_maybe_reply_error → sd_bus_reply_method_errno). */ + _cleanup_(sd_bus_error_free) sd_bus_error e1 = SD_BUS_ERROR_NULL; + ASSERT_ERROR(sd_bus_call_method(client, NULL, "/test", "test.Fiber", "FailErrno", + &e1, NULL, NULL), + EACCES); + ASSERT_TRUE(sd_bus_error_has_name(&e1, SD_BUS_ERROR_ACCESS_DENIED)); + + /* A fiber handler that populates sd_bus_error directly propagates both name and message. */ + _cleanup_(sd_bus_error_free) sd_bus_error e2 = SD_BUS_ERROR_NULL; + ASSERT_FAIL(sd_bus_call_method(client, NULL, "/test", "test.Fiber", "FailError", + &e2, NULL, NULL)); + ASSERT_TRUE(sd_bus_error_has_name(&e2, SD_BUS_ERROR_INVALID_ARGS)); + ASSERT_STREQ(e2.message, "bad arguments from fiber"); + + return 0; +} + +TEST(fiber_method_errors) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + Context c = {}; + Setup s = { .c = &c }; + + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM, 0, s.fds)); + + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + ASSERT_OK(sd_fiber_new(e, "errors", errors_fiber, &s, /* destroy= */ NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK(sd_future_result(f)); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/libsystemd/sd-bus/test-bus-objects.c b/src/libsystemd/sd-bus/test-bus-objects.c index 4ad60f0d58225..ac33086a6f374 100644 --- a/src/libsystemd/sd-bus/test-bus-objects.c +++ b/src/libsystemd/sd-bus/test-bus-objects.c @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include - #include "sd-bus.h" +#include "sd-future.h" #include "alloc-util.h" #include "bus-internal.h" @@ -211,9 +210,9 @@ static int enumerator3_callback(sd_bus *bus, const char *path, void *userdata, c return 1; } -static void* server(void *p) { - struct context *c = p; - sd_bus *bus = NULL; +static int server(void *userdata) { + struct context *c = ASSERT_PTR(userdata); + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; sd_id128_t id; int r; @@ -242,36 +241,25 @@ static void* server(void *p) { log_error("Loop!"); r = sd_bus_process(bus, NULL); - if (r < 0) { - log_error_errno(r, "Failed to process requests: %m"); - goto fail; - } + if (r < 0) + return log_error_errno(r, "Failed to process requests: %m"); if (r == 0) { r = sd_bus_wait(bus, UINT64_MAX); - if (r < 0) { - log_error_errno(r, "Failed to wait: %m"); - goto fail; - } + if (r < 0) + return log_error_errno(r, "Failed to wait: %m"); continue; } } - r = 0; - -fail: - if (bus) { - sd_bus_flush(bus); - sd_bus_unref(bus); - } - - return INT_TO_PTR(r); + return 0; } -static int client(struct context *c) { +static int client(void *p) { + struct context *c = ASSERT_PTR(p); _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; - _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_strv_free_ char **lines = NULL; const char *s; @@ -575,16 +563,13 @@ static int client(struct context *c) { ASSERT_OK(sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Exit", &error, NULL, NULL)); - sd_bus_flush(bus); - return 0; } int main(int argc, char *argv[]) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f_server = NULL, *f_client = NULL; struct context c = {}; - pthread_t s; - void *p; - int r, q; test_setup_logging(LOG_DEBUG); @@ -593,21 +578,16 @@ int main(int argc, char *argv[]) { ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds)); - r = pthread_create(&s, NULL, server, &c); - if (r != 0) - return -r; - - r = client(&c); + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); - q = pthread_join(s, &p); - if (q != 0) - return -q; + ASSERT_OK(sd_fiber_new(e, "server", server, &c, /* destroy= */ NULL, &f_server)); + ASSERT_OK(sd_fiber_new(e, "client", client, &c, /* destroy= */ NULL, &f_client)); - if (r < 0) - return r; + ASSERT_OK(sd_event_loop(e)); - if (PTR_TO_INT(p) < 0) - return PTR_TO_INT(p); + ASSERT_OK(sd_future_result(f_server)); + ASSERT_OK(sd_future_result(f_client)); free(c.something); free(c.automatic_string_property); diff --git a/src/libsystemd/sd-bus/test-bus-peersockaddr.c b/src/libsystemd/sd-bus/test-bus-peersockaddr.c index 2cac35dde4033..bee76c9b10ca7 100644 --- a/src/libsystemd/sd-bus/test-bus-peersockaddr.c +++ b/src/libsystemd/sd-bus/test-bus-peersockaddr.c @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include #include "sd-bus.h" +#include "sd-future.h" #include "bus-dump.h" #include "fd-util.h" @@ -38,9 +38,9 @@ static bool gid_list_same(const gid_t *a, size_t n, const gid_t *b, size_t m) { gid_list_contained(b, m, a, n); } -static void* server(void *p) { +static int server(void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_close_ int listen_fd = PTR_TO_INT(p), fd = -EBADF; + _cleanup_close_ int listen_fd = PTR_TO_INT(userdata), fd = -EBADF; _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL; _cleanup_free_ char *our_comm = NULL; sd_id128_t id; @@ -48,7 +48,7 @@ static void* server(void *p) { ASSERT_OK(sd_id128_randomize(&id)); - ASSERT_OK_ERRNO(fd = accept4(listen_fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK)); + ASSERT_OK(fd = sd_fiber_accept(listen_fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK)); ASSERT_OK(sd_bus_new(&bus)); ASSERT_OK(sd_bus_set_fd(bus, fd, fd)); @@ -114,17 +114,18 @@ static void* server(void *p) { } } - return NULL; + return 0; } -static void* client(void *p) { +static int client(void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; const char *z; ASSERT_OK(sd_bus_new(&bus)); ASSERT_OK(sd_bus_set_description(bus, "wuffwuff")); - ASSERT_OK(sd_bus_set_address(bus, p)); + ASSERT_OK(sd_bus_set_address(bus, userdata)); + ASSERT_OK(sd_bus_attach_event(bus, sd_fiber_get_event(), 0)); ASSERT_OK(sd_bus_start(bus)); ASSERT_OK(sd_bus_call_method(bus, "foo.foo", "/foo", "foo.foo", "Foo", NULL, &reply, "s", "foo")); @@ -132,17 +133,18 @@ static void* client(void *p) { ASSERT_OK(sd_bus_message_read(reply, "s", &z)); ASSERT_STREQ(z, "bar"); - return NULL; + return 0; } TEST(description) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f_server = NULL, *f_client = NULL; _cleanup_free_ char *a = NULL; _cleanup_close_ int fd = -EBADF; union sockaddr_union sa = { .un.sun_family = AF_UNIX, }; socklen_t salen; - pthread_t s, c; ASSERT_OK_ERRNO(fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0)); ASSERT_OK_ERRNO(bind(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path))); /* force auto-bind */ @@ -155,13 +157,18 @@ TEST(description) { ASSERT_OK(asprintf(&a, "unix:abstract=%s", sa.un.sun_path + 1)); - ASSERT_OK(-pthread_create(&s, NULL, server, INT_TO_PTR(fd))); + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); + + ASSERT_OK(sd_fiber_new(e, "server", server, INT_TO_PTR(fd), /* destroy= */ NULL, &f_server)); TAKE_FD(fd); - ASSERT_OK(-pthread_create(&c, NULL, client, a)); + ASSERT_OK(sd_fiber_new(e, "client", client, a, /* destroy= */ NULL, &f_client)); + + ASSERT_OK(sd_event_loop(e)); - ASSERT_OK(-pthread_join(s, NULL)); - ASSERT_OK(-pthread_join(c, NULL)); + ASSERT_OK(sd_future_result(f_server)); + ASSERT_OK(sd_future_result(f_client)); } DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-bus/test-bus-server.c b/src/libsystemd/sd-bus/test-bus-server.c index 989d2bf10dcaa..1edcec858f2ac 100644 --- a/src/libsystemd/sd-bus/test-bus-server.c +++ b/src/libsystemd/sd-bus/test-bus-server.c @@ -1,10 +1,12 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include #include #include "sd-bus.h" +#include "sd-event.h" +#include "sd-future.h" +#include "errno-util.h" #include "log.h" #include "memory-util.h" #include "string-util.h" @@ -20,7 +22,8 @@ struct context { bool server_anonymous_auth; }; -static int _server(struct context *c) { +static int server(void *userdata) { + struct context *c = ASSERT_PTR(userdata); _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; sd_id128_t id; bool quit = false; @@ -29,6 +32,7 @@ static int _server(struct context *c) { ASSERT_OK(sd_id128_randomize(&id)); ASSERT_OK(sd_bus_new(&bus)); + ASSERT_OK(sd_bus_set_description(bus, "server")); ASSERT_OK(sd_bus_set_fd(bus, c->fds[0], c->fds[0])); ASSERT_OK(sd_bus_set_server(bus, 1, id)); ASSERT_OK(sd_bus_set_anonymous(bus, c->server_anonymous_auth)); @@ -74,17 +78,16 @@ static int _server(struct context *c) { return 0; } -static void* server(void *p) { - return INT_TO_PTR(_server(p)); -} - -static int client(struct context *c) { +static int client(void *userdata) { + struct context *c = ASSERT_PTR(userdata); _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; - _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; ASSERT_OK(sd_bus_new(&bus)); + ASSERT_OK(sd_bus_set_description(bus, "client")); ASSERT_OK(sd_bus_set_fd(bus, c->fds[1], c->fds[1])); + ASSERT_OK(sd_bus_attach_event(bus, sd_fiber_get_event(), 0)); ASSERT_OK(sd_bus_negotiate_fds(bus, c->client_negotiate_unix_fds)); ASSERT_OK(sd_bus_set_anonymous(bus, c->client_anonymous_auth)); ASSERT_OK(sd_bus_start(bus)); @@ -103,10 +106,10 @@ static int client(struct context *c) { static int test_one(bool client_negotiate_unix_fds, bool server_negotiate_unix_fds, bool client_anonymous_auth, bool server_anonymous_auth) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f_server = NULL, *f_client = NULL; struct context c; - pthread_t s; - void *p; - int r, q; + int r = 0; zero(c); @@ -117,23 +120,18 @@ static int test_one(bool client_negotiate_unix_fds, bool server_negotiate_unix_f c.client_anonymous_auth = client_anonymous_auth; c.server_anonymous_auth = server_anonymous_auth; - r = pthread_create(&s, NULL, server, &c); - if (r != 0) - return -r; + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); - r = client(&c); + ASSERT_OK(sd_fiber_new(e, "server", server, &c, /* destroy= */ NULL, &f_server)); + ASSERT_OK(sd_fiber_new(e, "client", client, &c, /* destroy= */ NULL, &f_client)); - q = pthread_join(s, &p); - if (q != 0) - return -q; + ASSERT_OK(sd_event_loop(e)); - if (r < 0) - return r; + RET_GATHER(r, sd_future_result(f_client)); + RET_GATHER(r, sd_future_result(f_server)); - if (PTR_TO_INT(p) < 0) - return PTR_TO_INT(p); - - return 0; + return r; } int main(int argc, char *argv[]) { @@ -145,7 +143,7 @@ int main(int argc, char *argv[]) { ASSERT_OK(test_one(false, false, false, false)); ASSERT_OK(test_one(true, true, true, true)); ASSERT_OK(test_one(true, true, false, true)); - ASSERT_ERROR(test_one(true, true, true, false), EPERM); + ASSERT_ERROR(test_one(true, true, true, false), EACCES); return EXIT_SUCCESS; } diff --git a/src/libsystemd/sd-bus/test-bus-watch-bind.c b/src/libsystemd/sd-bus/test-bus-watch-bind.c index 1bf4ee7017119..6561633b8a823 100644 --- a/src/libsystemd/sd-bus/test-bus-watch-bind.c +++ b/src/libsystemd/sd-bus/test-bus-watch-bind.c @@ -1,10 +1,8 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include -#include - #include "sd-bus.h" #include "sd-event.h" +#include "sd-future.h" #include "sd-id128.h" #include "alloc-util.h" @@ -44,33 +42,33 @@ static const sd_bus_vtable vtable[] = { SD_BUS_VTABLE_END, }; -static void* thread_server(void *p) { +static int server(void *userdata) { _cleanup_free_ char *suffixed = NULL, *suffixed_basename = NULL, *suffixed2 = NULL, *d = NULL; _cleanup_close_ int fd = -EBADF; union sockaddr_union u; - const char *path = p; + const char *path = ASSERT_PTR(userdata); int r; log_debug("Initializing server"); /* Let's play some games, by slowly creating the socket directory, and renaming it in the middle */ - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); ASSERT_OK(mkdir_parents(path, 0755)); - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); ASSERT_OK(path_extract_directory(path, &d)); ASSERT_OK(asprintf(&suffixed, "%s.%" PRIx64, d, random_u64())); ASSERT_OK_ERRNO(rename(d, suffixed)); - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); ASSERT_OK(asprintf(&suffixed2, "%s.%" PRIx64, d, random_u64())); ASSERT_OK_ERRNO(symlink(suffixed2, d)); - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); ASSERT_OK(path_extract_filename(suffixed, &suffixed_basename)); ASSERT_OK_ERRNO(symlink(suffixed_basename, suffixed2)); - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); socklen_t sa_len; r = sockaddr_un_set_path(&u.un, path); @@ -81,13 +79,13 @@ static void* thread_server(void *p) { ASSERT_OK_ERRNO(fd); ASSERT_OK_ERRNO(bind(fd, &u.sa, sa_len)); - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); ASSERT_OK_ERRNO(listen(fd, SOMAXCONN_DELUXE)); - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); ASSERT_OK(touch(path)); - usleep_safe(100 * USEC_PER_MSEC); + ASSERT_OK(sd_fiber_sleep(100 * USEC_PER_MSEC)); log_debug("Initialized server"); @@ -101,8 +99,7 @@ static void* thread_server(void *p) { ASSERT_OK(sd_event_new(&event)); - bus_fd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); - ASSERT_OK_ERRNO(bus_fd); + ASSERT_OK(bus_fd = sd_fiber_accept(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC)); log_debug("Accepted server connection"); @@ -129,13 +126,13 @@ static void* thread_server(void *p) { log_debug("Server done"); - return NULL; + return 0; } -static void* thread_client1(void *p) { +static int client1(void *userdata) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - const char *path = p, *t; + const char *path = ASSERT_PTR(userdata), *t; log_debug("Initializing client1"); @@ -151,59 +148,65 @@ static void* thread_client1(void *p) { log_debug("Client1 done"); - return NULL; -} - -static int client2_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { - ASSERT_OK_ZERO(sd_bus_message_is_method_error(m, NULL)); - ASSERT_OK(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0)); return 0; } -static void* thread_client2(void *p) { +static int client2(void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_(sd_event_unrefp) sd_event *event = NULL; - const char *path = p, *t; + const char *path = ASSERT_PTR(userdata), *t; log_debug("Initializing client2"); - ASSERT_OK(sd_event_new(&event)); ASSERT_OK(sd_bus_new(&bus)); ASSERT_OK(sd_bus_set_description(bus, "client2")); t = strjoina("unix:path=", path); ASSERT_OK(sd_bus_set_address(bus, t)); ASSERT_OK(sd_bus_set_watch_bind(bus, true)); - ASSERT_OK(sd_bus_attach_event(bus, event, 0)); + ASSERT_OK(sd_bus_attach_event(bus, sd_fiber_get_event(), 0)); ASSERT_OK(sd_bus_start(bus)); - ASSERT_OK(sd_bus_call_method_async(bus, NULL, "foo.bar", "/foo", "foo.TestInterface", "Foobar", client2_callback, NULL, NULL)); - - ASSERT_OK(sd_event_loop(event)); + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + ASSERT_OK(sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Foobar", NULL, &m, NULL)); + ASSERT_OK_ZERO(sd_bus_message_is_method_error(m, NULL)); log_debug("Client2 done"); - return NULL; + return 0; } -static void request_exit(const char *path) { +typedef struct RequestExitArgs { + const char *path; + sd_future *client1; + sd_future *client2; +} RequestExitArgs; + +static int request_exit(void *userdata) { _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + RequestExitArgs *args = ASSERT_PTR(userdata); const char *t; + /* Wait for all client fibers to complete before requesting exit */ + ASSERT_OK(sd_fiber_await(args->client1)); + ASSERT_OK(sd_fiber_await(args->client2)); + ASSERT_OK(sd_bus_new(&bus)); - t = strjoina("unix:path=", path); + t = strjoina("unix:path=", args->path); ASSERT_OK(sd_bus_set_address(bus, t)); ASSERT_OK(sd_bus_set_watch_bind(bus, true)); ASSERT_OK(sd_bus_set_description(bus, "request-exit")); ASSERT_OK(sd_bus_start(bus)); ASSERT_OK(sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Exit", NULL, NULL, NULL)); + + return 0; } int main(int argc, char *argv[]) { _cleanup_(rm_rf_physical_and_freep) char *d = NULL; - pthread_t server, client1, client2; + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f_server = NULL, *f_client1 = NULL, *f_client2 = NULL, *f_exit = NULL; char *path; test_setup_logging(LOG_DEBUG); @@ -214,16 +217,27 @@ int main(int argc, char *argv[]) { path = strjoina(d, "/this/is/a/socket"); - ASSERT_OK(-pthread_create(&server, NULL, thread_server, path)); - ASSERT_OK(-pthread_create(&client1, NULL, thread_client1, path)); - ASSERT_OK(-pthread_create(&client2, NULL, thread_client2, path)); + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); - ASSERT_OK(-pthread_join(client1, NULL)); - ASSERT_OK(-pthread_join(client2, NULL)); + ASSERT_OK(sd_fiber_new(e, "server", server, path, /* destroy= */ NULL, &f_server)); - request_exit(path); + ASSERT_OK(sd_fiber_new(e, "client-1", client1, path, /* destroy= */ NULL, &f_client1)); + ASSERT_OK(sd_fiber_new(e, "client-2", client2, path, /* destroy= */ NULL, &f_client2)); - ASSERT_OK(-pthread_join(server, NULL)); + RequestExitArgs args = { + .path = path, + .client1 = f_client1, + .client2 = f_client2, + }; + ASSERT_OK(sd_fiber_new(e, "request-exit", request_exit, &args, /* destroy= */ NULL, &f_exit)); - return 0; + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK(sd_future_result(f_client1)); + ASSERT_OK(sd_future_result(f_client2)); + ASSERT_OK(sd_future_result(f_exit)); + ASSERT_OK(sd_future_result(f_server)); + + return EXIT_SUCCESS; } diff --git a/src/systemd/sd-bus-vtable.h b/src/systemd/sd-bus-vtable.h index 5c11ca8ae5b71..036bda3fe47e9 100644 --- a/src/systemd/sd-bus-vtable.h +++ b/src/systemd/sd-bus-vtable.h @@ -44,6 +44,7 @@ __extension__ enum { SD_BUS_VTABLE_PROPERTY_EXPLICIT = 1ULL << 7, SD_BUS_VTABLE_SENSITIVE = 1ULL << 8, /* covers both directions: method call + reply */ SD_BUS_VTABLE_ABSOLUTE_OFFSET = 1ULL << 9, + /* Bit 10 is reserved for the private SD_BUS_VTABLE_METHOD_FIBER flag (see bus-internal.h). */ _SD_BUS_VTABLE_CAPABILITY_MASK = 0xFFFFULL << 40 }; From 9aa2ae0d97bb6b5e74b8d8d8544550453adf1fb1 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Tue, 14 Apr 2026 08:54:49 +0000 Subject: [PATCH 240/242] sd-varlink: make sd-varlink fiber-aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add varlink_server_bind_fiber() and varlink_server_bind_fiber_many() in varlink-util.{c,h} for registering a method handler that should run on a dedicated fiber per dispatch. The fiber-bound methods live in a separate s->fiber_methods map alongside the regular s->methods; bind_internal()/bind_many_internal() are factored out so the regular and fiber bind variants share their parsing/insertion code. Registering the same method in both maps is rejected because the dispatcher consults the regular map first and would otherwise silently shadow the fiber binding. varlink_dispatch_fiber() builds a VarlinkFiberData (refs to the connection, parameters, and method name), spawns a fiber via sd_fiber_new(), and makes the future floating so the fiber self-manages its lifetime — neither the dispatcher nor the connection has to track it. The fiber's priority is set to one below the connection's quit event source so that on graceful shutdown the fiber's exit handler fires (and runs its cleanup) before varlink's quit_callback() closes the connection underneath it; this is what lets a fiber-bound handler reply or flush its sentinel on a still-open connection during shutdown. The connection state transitions are reordered so they happen before the fiber spawn rather than after the synchronous callback returns: the fiber runs after dispatch has already moved past PROCESSING, which matches the behaviour expected for a deferred reply (the fiber may either reply immediately, or stash the connection and reply later, in which case the post-callback logic treats it as a PENDING_METHOD). Note that all the synchronous varlink APIs (sd_varlink_call() and friends) already behave properly when on a fiber because they call json_stream_wait() which calls ppoll_usec() which we already fixed to suspend when called from a fiber. The client/server varlink tests are migrated to fibers (threads → mock server fibers on the same event loop) to exercise the new paths. --- src/libsystemd/sd-varlink/sd-varlink.c | 301 +++++++++++++--- src/libsystemd/sd-varlink/test-varlink.c | 360 ++++++++++++++----- src/libsystemd/sd-varlink/varlink-internal.h | 3 +- src/libsystemd/sd-varlink/varlink-util.h | 4 + 4 files changed, 528 insertions(+), 140 deletions(-) diff --git a/src/libsystemd/sd-varlink/sd-varlink.c b/src/libsystemd/sd-varlink/sd-varlink.c index 8e43e38800bde..3427a61f71a2c 100644 --- a/src/libsystemd/sd-varlink/sd-varlink.c +++ b/src/libsystemd/sd-varlink/sd-varlink.c @@ -6,6 +6,7 @@ #include "sd-daemon.h" #include "sd-event.h" +#include "sd-future.h" #include "sd-varlink.h" #include "alloc-util.h" @@ -37,6 +38,7 @@ #include "varlink-internal.h" #include "varlink-io.systemd.h" #include "varlink-org.varlink.service.h" +#include "varlink-util.h" #define VARLINK_DEFAULT_CONNECTIONS_MAX 4096U #define VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX 128U @@ -956,6 +958,178 @@ static int generic_method_get_interface_description( SD_JSON_BUILD_PAIR_STRING("description", text)); } +static int varlink_dispatch_sentinel(sd_varlink *v) { + int r; + + assert(v); + assert(v->sentinel); + + if (v->previous) { + r = json_stream_enqueue_full(&v->stream, v->previous, v->previous_fds, v->n_previous_fds); + if (r >= 0) { + v->previous = sd_json_variant_unref(v->previous); + v->previous_fds = mfree(v->previous_fds); + v->n_previous_fds = 0; + /* Mirror sd_varlink_reply()'s post-enqueue state machine: PENDING_* means we're + * outside the dispatch stack frame (e.g. called from varlink_fiber_entry after + * the fiber returned), so we go straight to IDLE_SERVER ourselves. PROCESSING_* + * means we're inside varlink_dispatch_method(), which will transition us. */ + if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) { + varlink_clear_current(v); + varlink_set_state(v, VARLINK_IDLE_SERVER); + } else + varlink_set_state(v, VARLINK_PROCESSED_METHOD); + } + + return r; + } + + char *sentinel = TAKE_PTR(v->sentinel); + + /* Propagate the sentinel to the client if one was configured and no replies were enqueued by + * the callback. */ + if (sentinel == POINTER_MAX) + r = sd_varlink_reply(v, NULL); + else { + r = sd_varlink_error(v, sentinel, NULL); + /* sd_varlink_error() deliberately returns a negative + * errno mapped from the error id on success (so method + * callbacks can `return sd_varlink_error(...);` to + * enqueue a reply and propagate a matching errno in one + * go). For sentinel dispatch we don't care about that + * mapping — the reply is either enqueued or not, which + * we detect via the state transition instead. */ + if (IN_SET(v->state, VARLINK_PROCESSED_METHOD, VARLINK_IDLE_SERVER)) + r = 0; + } + + if (sentinel != POINTER_MAX) + free(sentinel); + + return r; +} + +typedef struct VarlinkFiberData { + sd_varlink *link; + sd_json_variant *parameters; + sd_varlink_method_flags_t flags; + void *userdata; + sd_varlink_method_t callback; +} VarlinkFiberData; + +static VarlinkFiberData* varlink_fiber_data_free(VarlinkFiberData *d) { + if (!d) + return NULL; + + sd_json_variant_unref(d->parameters); + sd_varlink_unref(d->link); + return mfree(d); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(VarlinkFiberData*, varlink_fiber_data_free); + +static void varlink_fiber_data_destroy(void *userdata) { + varlink_fiber_data_free(userdata); +} + +static int varlink_fiber_entry(void *userdata) { + VarlinkFiberData *d = ASSERT_PTR(userdata); + sd_varlink *v = d->link; + int r; + + r = d->callback(v, d->parameters, d->flags, d->userdata); + + /* The fiber runs after varlink_dispatch_method() has already transitioned the state from + * VARLINK_PROCESSING_METHOD{,_MORE} to VARLINK_PENDING_METHOD{,_MORE}, so that's what we match + * here to decide whether the call still needs a reply. Any other state (e.g. IDLE_SERVER after + * the callback replied, or DISCONNECTED after sd_varlink_close()) means no fixup is needed. */ + if (!IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) + return r; + + if (r < 0) { + varlink_log_errno(v, r, "Fiber returned error: %m"); + + /* Propagate error to the client if the method call remains unanswered. */ + r = sd_varlink_error_errno(v, r); + } else if (v->sentinel) { + r = varlink_dispatch_sentinel(v); + if (r < 0) + varlink_log_errno(v, r, "Failed to process sentinel: %m"); + } else if (v->n_ref <= 2) { + /* Bare minimum refs (server + fiber data) means the connection wasn't stashed + * to reply later, so the fiber was supposed to reply itself but didn't. */ + r = varlink_log_errno(v, SYNTHETIC_ERRNO(EPROTO), + "Fiber returned without enqueuing a reply or stashing connection, failing."); + goto fail; + } else + r = 0; + + /* If we didn't manage to enqueue a response, then fail the connection completely. */ + if (r < 0 && IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) + goto fail; + + return r; + +fail: + varlink_set_state(v, VARLINK_PROCESSING_FAILURE); + varlink_dispatch_local_error(v, SD_VARLINK_ERROR_PROTOCOL); + sd_varlink_close(v); + + return r; +} + +static int varlink_dispatch_fiber(sd_varlink *v, const char *method, sd_varlink_method_t callback, sd_json_variant *parameters, sd_varlink_method_flags_t flags) { + int r; + + assert(v); + assert(v->server); + assert(method); + assert(callback); + + if (!v->server->event) + return varlink_log_errno(v, SYNTHETIC_ERRNO(ENOTCONN), + "Cannot dispatch fiber method without event loop."); + + _cleanup_(varlink_fiber_data_freep) VarlinkFiberData *d = new(VarlinkFiberData, 1); + if (!d) + return log_oom_debug(); + + *d = (VarlinkFiberData) { + .link = sd_varlink_ref(v), + .parameters = sd_json_variant_ref(parameters), + .flags = flags, + .userdata = v->userdata, + .callback = callback, + }; + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_fiber_new(v->server->event, method, varlink_fiber_entry, d, varlink_fiber_data_destroy, &f); + if (r < 0) + return r; + + TAKE_PTR(d); /* The fiber owns the data now. */ + + /* Run the fiber at a higher priority than the connection's quit event source, so that on event + * loop exit the fiber's exit source (which cancels it and drives its cleanup) fires before + * varlink's quit_callback closes the connection. This lets a fiber handler reply with an error + * or flush its sentinel on a still-open connection during graceful shutdown. */ + int64_t priority; + r = sd_event_source_get_priority(v->quit_event_source, &priority); + if (r < 0) + return r; + + r = sd_future_set_priority(f, priority > INT64_MIN ? priority - 1 : priority); + if (r < 0) + return r; + + /* Hand the future's lifetime over to the event loop: it'll auto-unref on resolve. */ + r = sd_fiber_set_floating(f, true); + if (r < 0) + return r; + + return 0; +} + static int varlink_dispatch_method(sd_varlink *v) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *parameters = NULL; sd_varlink_method_flags_t flags = 0; @@ -1053,7 +1227,13 @@ static int varlink_dispatch_method(sd_varlink *v) { v->protocol_upgrade || FLAGS_SET(v->server->flags, SD_VARLINK_SERVER_UPGRADABLE)); /* First consult user supplied method implementations */ + bool is_fiber = false; callback = hashmap_get(v->server->methods, method); + if (!callback) { + callback = hashmap_get(v->server->fiber_methods, method); + if (callback) + is_fiber = true; + } if (!callback) { if (streq(method, "org.varlink.service.GetInfo")) callback = generic_method_get_info; @@ -1105,7 +1285,13 @@ static int varlink_dispatch_method(sd_varlink *v) { } if (!invalid) { - r = callback(v, parameters, flags, v->userdata); + if (is_fiber) + /* Spawn a fiber to run the callback. The VarlinkFiberData takes a ref on the + * connection (bumping n_ref above 2), so the post-callback logic below treats + * this as a deferred reply and moves state to PENDING_METHOD. */ + r = varlink_dispatch_fiber(v, method, callback, parameters, flags); + else + r = callback(v, parameters, flags, v->userdata); if (VARLINK_STATE_WANTS_REPLY(v->state)) { if (r < 0) { varlink_log_errno(v, r, "Callback for '%s' returned error: %m", method); @@ -1114,37 +1300,7 @@ static int varlink_dispatch_method(sd_varlink *v) { * if the method call remains unanswered. */ r = sd_varlink_error_errno(v, r); } else if (v->sentinel) { - if (v->previous) { - r = json_stream_enqueue_full(&v->stream, v->previous, v->previous_fds, v->n_previous_fds); - if (r >= 0) { - v->previous = sd_json_variant_unref(v->previous); - v->previous_fds = mfree(v->previous_fds); - v->n_previous_fds = 0; - varlink_set_state(v, VARLINK_PROCESSED_METHOD); - } - } else { - char *sentinel = TAKE_PTR(v->sentinel); - - /* Propagate the sentinel to the client if one was configured - * and no replies were enqueued by the callback. */ - if (sentinel == POINTER_MAX) - r = sd_varlink_reply(v, NULL); - else { - r = sd_varlink_error(v, sentinel, NULL); - /* sd_varlink_error() deliberately returns a negative - * errno mapped from the error id on success (so method - * callbacks can `return sd_varlink_error(...);` to - * enqueue a reply and propagate a matching errno in one - * go). For sentinel dispatch we don't care about that - * mapping — the reply is either enqueued or not, which - * we detect via the state transition instead. */ - if (v->state == VARLINK_PROCESSED_METHOD) - r = 0; - } - - if (sentinel != POINTER_MAX) - free(sentinel); - } + r = varlink_dispatch_sentinel(v); if (r < 0) varlink_log_errno(v, r, "Failed to process sentinel for method '%s': %m", method); } else { @@ -2596,8 +2752,12 @@ _public_ int sd_varlink_set_sentinel(sd_varlink *v, const char *error_id) { if (v->state == VARLINK_PROCESSING_METHOD_ONEWAY) return 0; - /* This has to be called during a callback, and not after it has exited. */ - assert_return(IN_SET(v->state, VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE), + /* This has to be called during a callback, and not after it has exited. The PENDING states + * apply to fiber callbacks, which run after varlink_dispatch_method() has already transitioned + * the state from PROCESSING to PENDING. */ + assert_return(IN_SET(v->state, + VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE, + VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE), -EUCLEAN); char *s = NULL; @@ -2899,7 +3059,11 @@ static sd_varlink_server* varlink_server_destroy(sd_varlink_server *s) { while ((m = hashmap_steal_first_key(s->methods))) free(m); + while ((m = hashmap_steal_first_key(s->fiber_methods))) + free(m); + hashmap_free(s->methods); + hashmap_free(s->fiber_methods); hashmap_free(s->interfaces); hashmap_free(s->symbols); hashmap_free(s->by_uid); @@ -3590,23 +3754,32 @@ static bool varlink_symbol_in_interface(const char *method, const char *interfac return !strchr(p+1, '.'); } -_public_ int sd_varlink_server_bind_method(sd_varlink_server *s, const char *method, sd_varlink_method_t callback) { +static int varlink_server_bind_internal(sd_varlink_server *s, Hashmap **methods, const char *method, sd_varlink_method_t callback) { _cleanup_free_ char *m = NULL; int r; - assert_return(s, -EINVAL); - assert_return(method, -EINVAL); - assert_return(callback, -EINVAL); + assert(s); + assert(methods); + assert(method); + assert(callback); if (varlink_symbol_in_interface(method, "org.varlink.service") || varlink_symbol_in_interface(method, "io.systemd")) return varlink_server_log_errno(s, SYNTHETIC_ERRNO(EEXIST), "Cannot bind server to '%s'.", method); + /* Refuse to register the same method in both the regular and fiber method maps: the dispatcher + * always consults methods first and would silently ignore a shadowed fiber_methods entry (or vice + * versa), hiding the misconfiguration. */ + Hashmap *other = methods == &s->methods ? s->fiber_methods : s->methods; + if (hashmap_contains(other, method)) + return varlink_server_log_errno(s, SYNTHETIC_ERRNO(EEXIST), + "Method '%s' is already bound in the other method map.", method); + m = strdup(method); if (!m) return log_oom_debug(); - r = hashmap_ensure_put(&s->methods, &string_hash_ops, m, callback); + r = hashmap_ensure_put(methods, &string_hash_ops, m, callback); if (r == -ENOMEM) return log_oom_debug(); if (r < 0) @@ -3617,13 +3790,12 @@ _public_ int sd_varlink_server_bind_method(sd_varlink_server *s, const char *met return 0; } -_public_ int sd_varlink_server_bind_method_many_internal(sd_varlink_server *s, ...) { - va_list ap; +static int varlink_server_bind_many_internal(sd_varlink_server *s, Hashmap **methods, va_list ap) { int r = 0; - assert_return(s, -EINVAL); + assert(s); + assert(methods); - va_start(ap, s); for (;;) { sd_varlink_method_t callback; const char *method; @@ -3634,10 +3806,51 @@ _public_ int sd_varlink_server_bind_method_many_internal(sd_varlink_server *s, . callback = va_arg(ap, sd_varlink_method_t); - r = sd_varlink_server_bind_method(s, method, callback); + r = varlink_server_bind_internal(s, methods, method, callback); if (r < 0) break; } + + return r; +} + +_public_ int sd_varlink_server_bind_method(sd_varlink_server *s, const char *method, sd_varlink_method_t callback) { + assert_return(s, -EINVAL); + assert_return(method, -EINVAL); + assert_return(callback, -EINVAL); + + return varlink_server_bind_internal(s, &s->methods, method, callback); +} + +_public_ int sd_varlink_server_bind_method_many_internal(sd_varlink_server *s, ...) { + va_list ap; + int r; + + assert_return(s, -EINVAL); + + va_start(ap, s); + r = varlink_server_bind_many_internal(s, &s->methods, ap); + va_end(ap); + + return r; +} + +int varlink_server_bind_fiber(sd_varlink_server *s, const char *method, sd_varlink_method_t callback) { + assert_return(s, -EINVAL); + assert_return(method, -EINVAL); + assert_return(callback, -EINVAL); + + return varlink_server_bind_internal(s, &s->fiber_methods, method, callback); +} + +int varlink_server_bind_fiber_many_internal(sd_varlink_server *s, ...) { + va_list ap; + int r; + + assert_return(s, -EINVAL); + + va_start(ap, s); + r = varlink_server_bind_many_internal(s, &s->fiber_methods, ap); va_end(ap); return r; diff --git a/src/libsystemd/sd-varlink/test-varlink.c b/src/libsystemd/sd-varlink/test-varlink.c index 72edc033dd068..a628b9f701953 100644 --- a/src/libsystemd/sd-varlink/test-varlink.c +++ b/src/libsystemd/sd-varlink/test-varlink.c @@ -2,12 +2,12 @@ #include #include -#include #include #include #include #include "sd-event.h" +#include "sd-future.h" #include "sd-json.h" #include "sd-varlink.h" @@ -214,7 +214,12 @@ static void flood_test(const char *address) { /* Block the main event loop while we flood */ ASSERT_OK_EQ_ERRNO(write(block_write_fd, &x, sizeof(x)), (ssize_t) sizeof(x)); - ASSERT_OK(sd_event_default(&e)); + /* Create a fresh event loop for the flood test — we can't reuse the default event because the + * main test (and the fiber we're running in) is already running it, and sd_event_loop() asserts + * the event is in the INITIAL state. Exit-on-idle so the nested loop terminates once the + * overload reply has been received and all other work is quiesced. */ + ASSERT_OK(sd_event_new(&e)); + ASSERT_OK(sd_event_set_exit_on_idle(e, true)); /* Flood the server with connections */ ASSERT_NOT_NULL(connections = new0(sd_varlink*, OVERLOAD_CONNECTIONS)); @@ -249,7 +254,7 @@ static void flood_test(const char *address) { connections[k] = sd_varlink_unref(connections[k]); } -static void *thread(void *arg) { +static int client_fiber(void *arg) { _cleanup_(sd_varlink_flush_close_unrefp) sd_varlink *c = NULL; _cleanup_(sd_json_variant_unrefp) sd_json_variant *i = NULL; _cleanup_(sd_json_variant_unrefp) sd_json_variant *wrong = NULL; @@ -261,7 +266,7 @@ static void *thread(void *arg) { SD_JSON_BUILD_PAIR_INTEGER("b", 99)))); ASSERT_OK(sd_varlink_connect_address(&c, arg)); - ASSERT_OK(sd_varlink_set_description(c, "thread-client")); + ASSERT_OK(sd_varlink_set_description(c, "fiber-client")); ASSERT_OK(sd_varlink_set_allow_fd_passing_input(c, true)); ASSERT_OK(sd_varlink_set_allow_fd_passing_output(c, true)); @@ -319,7 +324,7 @@ static void *thread(void *arg) { ASSERT_OK(sd_varlink_send(c, "io.test.Done", NULL)); - return NULL; + return 0; } static int block_fd_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) { @@ -346,8 +351,8 @@ TEST(chat) { _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL; _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; _cleanup_close_pair_ int block_fds[2] = EBADF_PAIR; - pthread_t t; const char *sp; ASSERT_OK(mkdtemp_malloc("/tmp/varlink-test-XXXXXX", &tmpdir)); @@ -386,11 +391,11 @@ TEST(chat) { ASSERT_OK(sd_varlink_attach_event(c, e, 0)); - ASSERT_OK(-pthread_create(&t, NULL, thread, (void*) sp)); + ASSERT_OK(sd_fiber_new(e, "client", client_fiber, (void*) sp, /* destroy= */ NULL, &f)); ASSERT_OK(sd_event_loop(e)); - ASSERT_OK(-pthread_join(t, NULL)); + ASSERT_OK(sd_future_result(f)); } static int method_invalid(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { @@ -616,6 +621,173 @@ TEST(sentinel_oneway) { ASSERT_OK(sd_event_loop(e)); } +static int method_fiber_sentinel_error(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + /* Set an error sentinel from a fiber callback and return without sending a reply. The sentinel + * error should still be propagated by the fiber's post-callback logic, even though the varlink + * state has already been transitioned to VARLINK_PENDING_METHOD by the time the fiber runs. */ + ASSERT_OK(sd_varlink_set_sentinel(link, "io.test.SentinelError")); + return 0; +} + +TEST(fiber_sentinel_error) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_default(&e)); + + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *s = NULL; + ASSERT_OK(sd_varlink_server_new(&s, 0)); + + ASSERT_OK(sd_varlink_server_attach_event(s, e, 0)); + + ASSERT_OK(varlink_server_bind_fiber(s, "io.test.FiberSentinelError", method_fiber_sentinel_error)); + + int connfd[2]; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, connfd)); + ASSERT_OK(sd_varlink_server_add_connection(s, connfd[0], /* ret= */ NULL)); + + _cleanup_(sd_varlink_unrefp) sd_varlink *c = NULL; + ASSERT_OK(sd_varlink_connect_fd(&c, connfd[1])); + + ASSERT_OK(sd_varlink_attach_event(c, e, 0)); + + ASSERT_OK(sd_varlink_bind_reply(c, reply_sentinel_error)); + + ASSERT_OK(sd_varlink_invoke(c, "io.test.FiberSentinelError", /* parameters= */ NULL)); + + ASSERT_OK(sd_event_loop(e)); +} + +static int method_fiber_errno(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + /* Return a negative errno without sending a reply. The fiber's post-callback logic should + * convert this into a SD_VARLINK_ERROR_SYSTEM reply. */ + return -ENOSYS; +} + +static int reply_fiber_errno(sd_varlink *link, sd_json_variant *parameters, const char *error_id, sd_varlink_reply_flags_t flags, void *userdata) { + ASSERT_STREQ(error_id, SD_VARLINK_ERROR_SYSTEM); + ASSERT_EQ(sd_json_variant_integer(sd_json_variant_by_key(parameters, "errno")), ENOSYS); + ASSERT_OK(sd_event_exit(sd_varlink_get_event(link), EXIT_SUCCESS)); + return 0; +} + +TEST(fiber_errno) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_default(&e)); + + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *s = NULL; + ASSERT_OK(sd_varlink_server_new(&s, 0)); + + ASSERT_OK(sd_varlink_server_attach_event(s, e, 0)); + + ASSERT_OK(varlink_server_bind_fiber(s, "io.test.FiberErrno", method_fiber_errno)); + + int connfd[2]; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, connfd)); + ASSERT_OK(sd_varlink_server_add_connection(s, connfd[0], /* ret= */ NULL)); + + _cleanup_(sd_varlink_unrefp) sd_varlink *c = NULL; + ASSERT_OK(sd_varlink_connect_fd(&c, connfd[1])); + + ASSERT_OK(sd_varlink_attach_event(c, e, 0)); + + ASSERT_OK(sd_varlink_bind_reply(c, reply_fiber_errno)); + + ASSERT_OK(sd_varlink_invoke(c, "io.test.FiberErrno", /* parameters= */ NULL)); + + ASSERT_OK(sd_event_loop(e)); +} + +static int method_fiber_no_reply(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + /* Return success without replying and without stashing a ref. The fiber's post-callback + * logic should detect this and fail the connection. */ + return 0; +} + +static int reply_fiber_no_reply(sd_varlink *link, sd_json_variant *parameters, const char *error_id, sd_varlink_reply_flags_t flags, void *userdata) { + ASSERT_STREQ(error_id, SD_VARLINK_ERROR_DISCONNECTED); + ASSERT_OK(sd_event_exit(sd_varlink_get_event(link), EXIT_SUCCESS)); + return 0; +} + +TEST(fiber_no_reply) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_default(&e)); + + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *s = NULL; + ASSERT_OK(sd_varlink_server_new(&s, 0)); + + ASSERT_OK(sd_varlink_server_attach_event(s, e, 0)); + + ASSERT_OK(varlink_server_bind_fiber(s, "io.test.FiberNoReply", method_fiber_no_reply)); + + int connfd[2]; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, connfd)); + ASSERT_OK(sd_varlink_server_add_connection(s, connfd[0], /* ret= */ NULL)); + + _cleanup_(sd_varlink_unrefp) sd_varlink *c = NULL; + ASSERT_OK(sd_varlink_connect_fd(&c, connfd[1])); + + ASSERT_OK(sd_varlink_attach_event(c, e, 0)); + + ASSERT_OK(sd_varlink_bind_reply(c, reply_fiber_no_reply)); + + ASSERT_OK(sd_varlink_invoke(c, "io.test.FiberNoReply", /* parameters= */ NULL)); + + ASSERT_OK(sd_event_loop(e)); +} + +static int fiber_stashed_deferred_reply(sd_event_source *s, void *userdata) { + _cleanup_(sd_varlink_unrefp) sd_varlink *link = ASSERT_PTR(userdata); + + sd_event_source_disable_unref(s); + ASSERT_OK(sd_varlink_replybo(link, SD_JSON_BUILD_PAIR_STRING("result", "stashed"))); + return 0; +} + +static int method_fiber_stash(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { + /* Stash a ref on the connection so n_ref > 2 when the fiber returns, and reply later from a + * deferred event source. The fiber's post-callback logic should see the extra ref and treat + * this as a valid deferred-reply case instead of failing the connection. */ + sd_event_source *source; + + ASSERT_OK(sd_event_add_defer(sd_varlink_get_event(link), &source, fiber_stashed_deferred_reply, sd_varlink_ref(link))); + ASSERT_OK(sd_event_source_set_enabled(source, SD_EVENT_ONESHOT)); + return 0; +} + +static int reply_fiber_stash(sd_varlink *link, sd_json_variant *parameters, const char *error_id, sd_varlink_reply_flags_t flags, void *userdata) { + ASSERT_NULL(error_id); + ASSERT_STREQ(sd_json_variant_string(sd_json_variant_by_key(parameters, "result")), "stashed"); + ASSERT_OK(sd_event_exit(sd_varlink_get_event(link), EXIT_SUCCESS)); + return 0; +} + +TEST(fiber_stash) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + ASSERT_OK(sd_event_default(&e)); + + _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *s = NULL; + ASSERT_OK(sd_varlink_server_new(&s, 0)); + + ASSERT_OK(sd_varlink_server_attach_event(s, e, 0)); + + ASSERT_OK(varlink_server_bind_fiber(s, "io.test.FiberStash", method_fiber_stash)); + + int connfd[2]; + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, connfd)); + ASSERT_OK(sd_varlink_server_add_connection(s, connfd[0], /* ret= */ NULL)); + + _cleanup_(sd_varlink_unrefp) sd_varlink *c = NULL; + ASSERT_OK(sd_varlink_connect_fd(&c, connfd[1])); + + ASSERT_OK(sd_varlink_attach_event(c, e, 0)); + + ASSERT_OK(sd_varlink_bind_reply(c, reply_fiber_stash)); + + ASSERT_OK(sd_varlink_invoke(c, "io.test.FiberStash", /* parameters= */ NULL)); + + ASSERT_OK(sd_event_loop(e)); +} + static int method_with_fd_sentinel(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { _cleanup_close_ int fd1 = -EBADF, fd2 = -EBADF; @@ -766,8 +938,9 @@ static int method_upgrade(sd_varlink *link, sd_json_variant *parameters, sd_varl if (r < 0) return r; - /* After upgrade, do raw I/O: read until EOF, reverse, write back. - * The client shuts down its write side after sending, so we get a clean EOF. */ + /* After upgrade, do raw I/O: read until the client shuts down its write side (giving us a clean + * EOF), reverse what we got, and write it back. Use suspending I/O so other fibers (the client) + * can make progress while we're waiting on the socket. */ char buf[64] = {}; ssize_t n = ASSERT_OK(loop_read(input_fd, buf, sizeof(buf) - 1, /* do_poll= */ true)); ASSERT_GT(n, 0); @@ -787,12 +960,10 @@ static int method_upgrade_without_flag(sd_varlink *link, sd_json_variant *parame /* Calling reply_and_upgrade without the client requesting it should fail with -EPROTO */ ASSERT_ERROR(sd_varlink_reply_and_upgrade(link, /* parameters= */ NULL, &input_fd, &output_fd), EPROTO); - sd_event_exit(sd_varlink_get_event(link), EXIT_SUCCESS); - return sd_varlink_reply(link, /* parameters= */ NULL); } -static void *upgrade_thread(void *arg) { +static int upgrade_client_fiber(void *arg) { _cleanup_(sd_varlink_flush_close_unrefp) sd_varlink *c = NULL; _cleanup_close_ int input_fd = -EBADF, output_fd = -EBADF; sd_json_variant *o = NULL; @@ -825,14 +996,15 @@ static void *upgrade_thread(void *arg) { ASSERT_OK(sd_varlink_call(c2, "io.test.UpgradeWithoutFlag", /* parameters= */ NULL, &o, &error_id)); ASSERT_NULL(error_id); - return NULL; + ASSERT_OK(sd_event_exit(sd_fiber_get_event(), EXIT_SUCCESS)); + return 0; } TEST(upgrade) { _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *s = NULL; _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL; _cleanup_(sd_event_unrefp) sd_event *e = NULL; - pthread_t t; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; const char *sp; ASSERT_OK(mkdtemp_malloc("/tmp/varlink-test-XXXXXX", &tmpdir)); @@ -842,31 +1014,23 @@ TEST(upgrade) { ASSERT_OK(sd_varlink_server_new(&s, SD_VARLINK_SERVER_UPGRADABLE)); ASSERT_OK(sd_varlink_server_set_description(s, "upgrade-server")); - ASSERT_OK(sd_varlink_server_bind_method(s, "io.test.Upgrade", method_upgrade)); + /* The method does raw I/O on the upgraded socket — bind it as a fiber method so it can + * suspend on loop_read()/loop_write() and the client fiber can make progress concurrently. */ + ASSERT_OK(varlink_server_bind_fiber(s, "io.test.Upgrade", method_upgrade)); ASSERT_OK(sd_varlink_server_bind_method(s, "io.test.UpgradeWithoutFlag", method_upgrade_without_flag)); ASSERT_OK(sd_varlink_server_listen_address(s, sp, 0600)); ASSERT_OK(sd_varlink_server_attach_event(s, e, 0)); - ASSERT_OK(-pthread_create(&t, NULL, upgrade_thread, (void*) sp)); + ASSERT_OK(sd_fiber_new(e, "upgrade-client", upgrade_client_fiber, (void*) sp, /* destroy= */ NULL, &f)); - /* Run the event loop until no more connections (the thread will disconnect when done) */ + /* Run the event loop. Exits on idle once the client fiber completes and all server connections + * have been torn down. */ ASSERT_OK(sd_event_loop(e)); - ASSERT_OK(-pthread_join(t, NULL)); + ASSERT_OK(sd_future_result(f)); } -static int method_upgrade_and_exit(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { - sd_event *event = ASSERT_PTR(userdata); - - int r = method_upgrade(link, parameters, flags, /* userdata= */ NULL); - - /* Exit the event loop after the upgrade is handled. We can't use sd_varlink_get_event() - * here because the connection is already disconnected after reply_and_upgrade. */ - (void) sd_event_exit(event, r < 0 ? r : EXIT_SUCCESS); - return r; -} - -static void *upgrade_pipelining_thread(void *arg) { +static int upgrade_pipelining_client_fiber(void *arg) { union sockaddr_union sa = {}; _cleanup_close_ int fd = -EBADF; @@ -893,8 +1057,8 @@ static void *upgrade_pipelining_thread(void *arg) { /* Shut down write side so server's method_upgrade sees EOF after raw payload */ ASSERT_OK_ERRNO(shutdown(fd, SHUT_WR)); - /* Read everything: upgrade reply (JSON + \0) + reversed raw payload. The server closes - * the connection after writing, so loop_read() reads until EOF and gets it all. */ + /* Read everything: upgrade reply (JSON + \0) + reversed raw payload. The server closes the + * connection after writing, so loop_read_suspend() reads until EOF and gets it all. */ char buf[256] = {}; ssize_t n = ASSERT_OK(loop_read(fd, buf, sizeof(buf) - 1, /* do_poll= */ true)); ASSERT_GT(n, 0); @@ -909,14 +1073,15 @@ static void *upgrade_pipelining_thread(void *arg) { ASSERT_EQ(raw_size, strlen(raw_payload)); ASSERT_STREQ(strndupa_safe(raw, raw_size), "!denilepiP"); - return NULL; + ASSERT_OK(sd_event_exit(sd_fiber_get_event(), EXIT_SUCCESS)); + return 0; } TEST(upgrade_pipelining) { _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *s = NULL; _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL; _cleanup_(sd_event_unrefp) sd_event *e = NULL; - pthread_t t; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; const char *sp; ASSERT_OK(mkdtemp_malloc("/tmp/varlink-test-XXXXXX", &tmpdir)); @@ -924,25 +1089,23 @@ TEST(upgrade_pipelining) { ASSERT_OK(sd_event_new(&e)); - ASSERT_OK(sd_varlink_server_new(&s, SD_VARLINK_SERVER_UPGRADABLE|SD_VARLINK_SERVER_INHERIT_USERDATA)); + ASSERT_OK(sd_varlink_server_new(&s, SD_VARLINK_SERVER_UPGRADABLE)); ASSERT_OK(sd_varlink_server_set_description(s, "upgrade-pipelining-server")); - ASSERT_OK(sd_varlink_server_bind_method(s, "io.test.Upgrade", method_upgrade_and_exit)); + /* method_upgrade does raw I/O on the upgraded socket, so bind as a fiber method. */ + ASSERT_OK(varlink_server_bind_fiber(s, "io.test.Upgrade", method_upgrade)); ASSERT_OK(sd_varlink_server_listen_address(s, sp, 0600)); ASSERT_OK(sd_varlink_server_attach_event(s, e, 0)); - sd_varlink_server_set_userdata(s, e); - ASSERT_OK(-pthread_create(&t, NULL, upgrade_pipelining_thread, (void*) sp)); + ASSERT_OK(sd_fiber_new(e, "upgrade-pipelining-client", upgrade_pipelining_client_fiber, (void*) sp, /* destroy= */ NULL, &f)); ASSERT_OK(sd_event_loop(e)); - ASSERT_OK(-pthread_join(t, NULL)); + ASSERT_OK(sd_future_result(f)); } typedef struct ExecDirServer { sd_varlink_server *server; - sd_event *event; const char *name; - pthread_t thread; } ExecDirServer; static int method_execute_dir_ping(sd_varlink *link, sd_json_variant *parameters, sd_varlink_method_flags_t flags, void *userdata) { @@ -951,20 +1114,6 @@ static int method_execute_dir_ping(sd_varlink *link, sd_json_variant *parameters return sd_varlink_replybo(link, SD_JSON_BUILD_PAIR_STRING("name", srv->name)); } -static void on_execute_dir_disconnect(sd_varlink_server *s, sd_varlink *link, void *userdata) { - ExecDirServer *srv = ASSERT_PTR(userdata); - - /* Only one client (from varlink_execute_directory()) connects per server — once it's gone, we're done. */ - ASSERT_OK(sd_event_exit(srv->event, 0)); -} - -static void *execute_dir_server_thread(void *arg) { - ExecDirServer *srv = arg; - - ASSERT_OK(sd_event_loop(srv->event)); - return NULL; -} - static int execute_dir_reply(sd_varlink *link, sd_json_variant *parameters, const char *error_id, sd_varlink_reply_flags_t flags, void *userdata) { size_t *count = ASSERT_PTR(userdata); @@ -975,51 +1124,28 @@ static int execute_dir_reply(sd_varlink *link, sd_json_variant *parameters, cons return 0; } -TEST(execute_directory) { - _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL; - static const char * const names[] = { "alpha", "beta", "gamma" }; - ExecDirServer servers[ELEMENTSOF(names)] = {}; - size_t reply_count = 0; +typedef struct ExecDirClientArgs { + const char *tmpdir; + size_t n_servers; + size_t *reply_count; +} ExecDirClientArgs; - ASSERT_OK(mkdtemp_malloc("/tmp/varlink-execdir-XXXXXX", &tmpdir)); - - for (size_t i = 0; i < ELEMENTSOF(names); i++) { - ExecDirServer *eds = servers + i; - servers[i].name = names[i]; - - _cleanup_free_ char *j = ASSERT_PTR(path_join(tmpdir, names[i])); - - ASSERT_OK(sd_event_new(&eds->event)); - ASSERT_OK(varlink_server_new(&eds->server, - SD_VARLINK_SERVER_INHERIT_USERDATA, - eds)); - ASSERT_OK(sd_varlink_server_bind_method(eds->server, "io.test.ExecDirPing", method_execute_dir_ping)); - ASSERT_OK(sd_varlink_server_bind_disconnect(eds->server, on_execute_dir_disconnect)); - ASSERT_OK(sd_varlink_server_listen_address(eds->server, j, 0600)); - ASSERT_OK(sd_varlink_server_attach_event(eds->server, eds->event, 0)); - - ASSERT_OK(-pthread_create(&eds->thread, NULL, execute_dir_server_thread, eds)); - } +static int execute_dir_client_fiber(void *arg) { + ExecDirClientArgs *a = ASSERT_PTR(arg); ASSERT_OK_EQ(varlink_execute_directory( - tmpdir, + a->tmpdir, "io.test.ExecDirPing", /* parameters= */ NULL, /* more= */ false, /* timeout_usec= */ USEC_INFINITY, execute_dir_reply, - &reply_count), (ssize_t) ELEMENTSOF(names)); - ASSERT_EQ(reply_count, ELEMENTSOF(names)); - - FOREACH_ELEMENT(eds, servers) { - ASSERT_OK(-pthread_join(eds->thread, NULL)); - eds->server = sd_varlink_server_unref(eds->server); - eds->event = sd_event_unref(eds->event); - } + a->reply_count), (ssize_t) a->n_servers); + ASSERT_EQ(*a->reply_count, a->n_servers); /* Calling the helper against a non-existent directory must fail. */ _cleanup_free_ char *nope = NULL; - ASSERT_OK(asprintf(&nope, "%s/does-not-exist", tmpdir)); + ASSERT_OK(asprintf(&nope, "%s/does-not-exist", a->tmpdir)); ASSERT_FAIL(varlink_execute_directory( nope, "io.test.ExecDirPing", @@ -1027,13 +1153,13 @@ TEST(execute_directory) { /* more= */ false, /* timeout_usec= */ USEC_INFINITY, execute_dir_reply, - &reply_count)); + a->reply_count)); /* An empty directory must simply return 0 and not invoke the reply callback. */ - _cleanup_free_ char *empty = ASSERT_PTR(path_join(tmpdir, "empty")); + _cleanup_free_ char *empty = ASSERT_PTR(path_join(a->tmpdir, "empty")); ASSERT_OK_ERRNO(mkdir(empty, 0755)); - size_t count_before = reply_count; + size_t count_before = *a->reply_count; ASSERT_OK_ZERO(varlink_execute_directory( empty, "io.test.ExecDirPing", @@ -1041,8 +1167,52 @@ TEST(execute_directory) { /* more= */ false, /* timeout_usec= */ USEC_INFINITY, execute_dir_reply, - &reply_count)); - ASSERT_EQ(reply_count, count_before); + a->reply_count)); + ASSERT_EQ(*a->reply_count, count_before); + + ASSERT_OK(sd_event_exit(sd_fiber_get_event(), EXIT_SUCCESS)); + return 0; +} + +TEST(execute_directory) { + _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL; + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + static const char * const names[] = { "alpha", "beta", "gamma" }; + ExecDirServer servers[ELEMENTSOF(names)] = {}; + size_t reply_count = 0; + + ASSERT_OK(mkdtemp_malloc("/tmp/varlink-execdir-XXXXXX", &tmpdir)); + + ASSERT_OK(sd_event_new(&e)); + + for (size_t i = 0; i < ELEMENTSOF(names); i++) { + ExecDirServer *eds = servers + i; + servers[i].name = names[i]; + + _cleanup_free_ char *j = ASSERT_PTR(path_join(tmpdir, names[i])); + + ASSERT_OK(varlink_server_new(&eds->server, + SD_VARLINK_SERVER_INHERIT_USERDATA, + eds)); + ASSERT_OK(sd_varlink_server_bind_method(eds->server, "io.test.ExecDirPing", method_execute_dir_ping)); + ASSERT_OK(sd_varlink_server_listen_address(eds->server, j, 0600)); + ASSERT_OK(sd_varlink_server_attach_event(eds->server, e, 0)); + } + + ExecDirClientArgs args = { + .tmpdir = tmpdir, + .n_servers = ELEMENTSOF(names), + .reply_count = &reply_count, + }; + ASSERT_OK(sd_fiber_new(e, "execute-dir-client", execute_dir_client_fiber, &args, NULL, &f)); + + ASSERT_OK(sd_event_loop(e)); + + ASSERT_OK(sd_future_result(f)); + + FOREACH_ELEMENT(eds, servers) + eds->server = sd_varlink_server_unref(eds->server); } DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/libsystemd/sd-varlink/varlink-internal.h b/src/libsystemd/sd-varlink/varlink-internal.h index 32d6d5983a75f..beec5be42c709 100644 --- a/src/libsystemd/sd-varlink/varlink-internal.h +++ b/src/libsystemd/sd-varlink/varlink-internal.h @@ -135,7 +135,8 @@ typedef struct sd_varlink_server { LIST_HEAD(VarlinkServerSocket, sockets); - Hashmap *methods; /* Fully qualified symbol name of a method → VarlinkMethod */ + Hashmap *methods; /* Fully qualified symbol name of a method → sd_varlink_method_t */ + Hashmap *fiber_methods; /* Fully qualified symbol name of a fiber method → sd_varlink_method_t */ Hashmap *interfaces; /* Fully qualified interface name → VarlinkInterface* */ Hashmap *symbols; /* Fully qualified symbol name of method/error → VarlinkSymbol* */ sd_varlink_connect_t connect_callback; diff --git a/src/libsystemd/sd-varlink/varlink-util.h b/src/libsystemd/sd-varlink/varlink-util.h index d6ecb03c54533..d5765ca2c72f1 100644 --- a/src/libsystemd/sd-varlink/varlink-util.h +++ b/src/libsystemd/sd-varlink/varlink-util.h @@ -19,6 +19,10 @@ int varlink_many_notifyb(Set *s, ...); int varlink_many_reply(Set *s, sd_json_variant *parameters); int varlink_many_error(Set *s, const char *error_id, sd_json_variant *parameters); +int varlink_server_bind_fiber(sd_varlink_server *s, const char *method, sd_varlink_method_t callback); +int varlink_server_bind_fiber_many_internal(sd_varlink_server *s, ...); +#define varlink_server_bind_fiber_many(s, ...) varlink_server_bind_fiber_many_internal(s, __VA_ARGS__, NULL) + int varlink_set_info_systemd(sd_varlink_server *server); int varlink_server_new( From 5eee6a4aeb928fe4b6fd957859d55c867d7b6f96 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 24 Apr 2026 09:49:02 +0000 Subject: [PATCH 241/242] qmp-client: add fiber-aware call paths The synchronous qmp_client_call() pumps the event loop until its reply arrives, pinning the parsed reply on c->current so it can hand out borrowed pointers to the caller. That model only fits one in-flight sync call: a second qmp_client_call() on the same client clears c->current before issuing its own send, invalidating the first caller's borrowed pointers. On a single-threaded event loop that was fine, but with fibers two concurrent calls on the same client can interleave through the pump (json_stream_wait() suspends the running fiber) and trample each other. To fix this, make qmp_client_call() detect when it's running on a fiber whose event loop matches the client and transparently delegate to qmp_client_call_suspend(), which makes use of a new QmpFuture to allow multiple concurrent calls to qmp_client_call(). To make this work concurrently, we also change qmp_client_call() to hand out references and copies of errors so that we don't have to store the borrowed pointers we hand out in the QmpClient struct. --- src/shared/qmp-client.c | 209 +++++++++++++++++++++++++++++++++++----- src/shared/qmp-client.h | 16 ++- 2 files changed, 199 insertions(+), 26 deletions(-) diff --git a/src/shared/qmp-client.c b/src/shared/qmp-client.c index 41b0c6dd57034..1af2d0d9e5645 100644 --- a/src/shared/qmp-client.c +++ b/src/shared/qmp-client.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include "sd-event.h" +#include "sd-future.h" #include "sd-json.h" #include "alloc-util.h" @@ -226,19 +227,23 @@ static int qmp_extract_response_id(sd_json_variant *v, uint64_t *ret) { return 1; } -/* Returns 0 on success (ret_result = "return" value), -EIO on QMP error (reterr_desc set). */ -static int qmp_parse_response(sd_json_variant *v, sd_json_variant **ret_result, const char **reterr_desc) { +/* Returns 0 on success (ret_result = freshly reffed "return" value), -EIO on QMP error + * (ret_error_desc set to a freshly allocated string). Caller owns both outputs. */ +static int qmp_parse_response(sd_json_variant *v, sd_json_variant **ret_result, char **ret_error_desc) { const char *desc; desc = qmp_extract_error_description(v); if (desc) { - if (reterr_desc) - *reterr_desc = desc; + if (ret_error_desc) { + *ret_error_desc = strdup(desc); + if (!*ret_error_desc) + return -ENOMEM; + } return -EIO; } if (ret_result) - *ret_result = sd_json_variant_by_key(v, "return"); + *ret_result = sd_json_variant_ref(sd_json_variant_by_key(v, "return")); return 0; } @@ -273,8 +278,8 @@ static int qmp_client_build_command( /* Route c->current to event callback or matching async slot. Returns 1 on dispatch. */ static int qmp_client_dispatch(QmpClient *c) { - sd_json_variant *result = NULL; - const char *desc = NULL; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *result = NULL; + _cleanup_free_ char *desc = NULL; uint64_t id; int error, r; @@ -318,8 +323,8 @@ static int qmp_client_dispatch(QmpClient *c) { } /* Synchronous slot (no callback): leave c->current pinned so qmp_client_call() can - * pick up the reply and hand out borrowed pointers into it. The sync caller owns a - * ref on the slot and detects completion by observing slot->client turning NULL. */ + * pick the reply up after its pump loop. The sync caller owns a ref on the slot and + * detects completion by observing slot->client turning NULL. */ if (!slot->callback) { qmp_slot_disconnect(slot, /* unref= */ true); return 1; @@ -574,6 +579,10 @@ static void qmp_client_clear(QmpClient *c) { qmp_client_detach_event(c); qmp_client_clear_current(c); json_stream_done(&c->stream); + /* qmp_client_handle_disconnect() above drained every entry via qmp_client_fail_pending(); + * the set is borrow-only for non-floating slots, so set_free() can't safely run a + * destructor over leftovers — enforce the drain invariant instead. */ + assert(set_isempty(c->slots)); c->slots = set_free(c->slots); } @@ -745,7 +754,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(QmpClientArgs*, qmp_client_args_close_fds); /* Shared send path for qmp_client_invoke() and qmp_client_call(). A NULL callback registers * a "synchronous" slot: dispatch_reply leaves c->current pinned on match instead of invoking - * a callback, so qmp_client_call() can hand out borrowed pointers into the reply. If ret_slot + * a callback, so qmp_client_call() can pick the reply up after its pump loop. If ret_slot * is NULL the slot is allocated as floating (owned by c->slots); otherwise a reference is * handed back to the caller. */ static int qmp_client_send( @@ -810,21 +819,176 @@ int qmp_client_invoke( return qmp_client_send(c, command, args, callback, userdata, ret_slot); } +typedef struct QmpFuture { + QmpSlot *slot; /* owned, non-floating; NULL once disconnected */ + sd_json_variant *result; + char *error_desc; +} QmpFuture; + +static void* qmp_future_alloc(void) { + return new0(QmpFuture, 1); +} + +static void qmp_future_free(sd_future *f) { + QmpFuture *qf = sd_future_get_private(f); + qmp_slot_unref(qf->slot); + sd_json_variant_unref(qf->result); + free(qf->error_desc); + free(qf); +} + +static int qmp_future_cancel(sd_future *f) { + QmpFuture *qf = sd_future_get_private(ASSERT_PTR(f)); + + /* Drop the pending slot so dispatch_reply won't try to fire our callback (and touch + * freed memory) when the reply eventually arrives. */ + qf->slot = qmp_slot_unref(qf->slot); + return sd_future_resolve(f, -ECANCELED); +} + +static const sd_future_ops qmp_call_future_ops = { + .size = sizeof(sd_future_ops), + .alloc = qmp_future_alloc, + .free = qmp_future_free, + .cancel = qmp_future_cancel, +}; + +static int qmp_future_callback( + QmpClient *c, + sd_json_variant *result, + const char *desc, + int error, + void *userdata) { + + sd_future *f = ASSERT_PTR(userdata); + QmpFuture *qf = sd_future_get_private(f); + + assert(result || desc || error); + + if (result) + qf->result = sd_json_variant_ref(result); + if (desc) { + qf->error_desc = strdup(desc); + if (!qf->error_desc) + /* No usable reply payload to surface — propagate as transport-style + * failure so suspend() / sd_future_result() see the OOM. */ + return sd_future_resolve(f, -ENOMEM); + } + + /* Resolve with 0 whenever a reply landed (success or QMP-level error) so the future's + * result encodes only "no reply will arrive" — i.e. transport failure or cancellation. + * The reply payload is dispatched in future_get_qmp_reply(). */ + return sd_future_resolve(f, (result || desc) ? 0 : error); +} + +int qmp_client_call_future( + QmpClient *c, + const char *command, + QmpClientArgs *args, + sd_future **ret) { + + int r; + + assert(c); + assert(command); + assert(ret); + + _cleanup_(sd_future_unrefp) sd_future *f = NULL; + r = sd_future_new(&qmp_call_future_ops, &f); + if (r < 0) + return r; + + QmpFuture *qf = sd_future_get_private(f); + + r = qmp_client_send(c, command, args, qmp_future_callback, f, &qf->slot); + if (r < 0) + return r; + + *ret = TAKE_PTR(f); + return 0; +} + +/* Extract the reply from a resolved qmp_client_call_future(). On success *ret_result is a fresh + * reference (caller unrefs) and *ret_error_desc is a freshly allocated string (caller frees). + * Returns -EIO when a QMP-level error was returned but the caller passed a NULL ret_error_desc. */ +int future_get_qmp_reply(sd_future *f, sd_json_variant **ret_result, char **ret_error_desc) { + assert(f); + assert(sd_future_get_ops(f) == &qmp_call_future_ops); + assert(sd_future_state(f) == SD_FUTURE_RESOLVED); + + if (sd_future_result(f) < 0) + return sd_future_result(f); + + QmpFuture *qf = sd_future_get_private(f); + + if (qf->error_desc && !ret_error_desc) + return -EIO; + + if (ret_error_desc && qf->error_desc) { + char *desc = strdup(qf->error_desc); + if (!desc) + return -ENOMEM; + *ret_error_desc = desc; + } else if (ret_error_desc) + *ret_error_desc = NULL; + + if (ret_result) + *ret_result = sd_json_variant_ref(qf->result); + return 0; +} + +static int qmp_client_call_suspend( + QmpClient *c, + const char *command, + QmpClientArgs *args, + sd_json_variant **ret_result, + char **ret_error_desc) { + + int r; + + assert(c); + assert(command); + assert(sd_fiber_is_running()); + + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *call = NULL; + r = qmp_client_call_future(c, command, args, &call); + if (r < 0) + return r; + + /* The call future resolves with 0 once a reply (success or QMP-level error) lands, + * negative on transport failure or cancellation; sd_fiber_suspend() propagates that. */ + r = sd_fiber_suspend(); + if (r < 0) + return r; + + r = future_get_qmp_reply(call, ret_result, ret_error_desc); + if (r < 0) + return r; + + return 1; +} + int qmp_client_call( QmpClient *c, const char *command, QmpClientArgs *args, sd_json_variant **ret_result, - const char **ret_error_desc) { + char **ret_error_desc) { - _cleanup_(qmp_slot_unrefp) QmpSlot *slot = NULL; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *result = NULL; + _cleanup_free_ char *desc = NULL; int r; assert_return(c, -EINVAL); assert_return(command, -EINVAL); - /* Drop any reply pinned by a previous qmp_client_call() before we pin a new one. */ - qmp_client_clear_current(c); + /* If we're on a fiber sharing the QMP client's event loop, use the async + suspend path so + * multiple concurrent qmp_client_call() invocations across fibers don't deadlock each other + * on the process+wait pump. */ + if (sd_fiber_is_running() && qmp_client_get_event(c) == sd_fiber_get_event()) + return qmp_client_call_suspend(c, command, args, ret_result, ret_error_desc); + + _cleanup_(qmp_slot_unrefp) QmpSlot *slot = NULL; /* NULL callback marks this as a synchronous slot: dispatch_reply matches on id like * any other slot (so stray unknown-id replies still get logged and dropped), but @@ -855,18 +1019,19 @@ int qmp_client_call( return r; } - sd_json_variant *result = NULL; - const char *desc = NULL; - int error = qmp_parse_response(c->current, &result, &desc); + _cleanup_(sd_json_variant_unrefp) sd_json_variant *current = TAKE_PTR(c->current); + r = qmp_parse_response(current, &result, &desc); + if (r < 0 && r != -EIO) + return r; - /* If caller doesn't ask for the error string, surface the error as the return code. */ - if (!ret_error_desc && error < 0) - return error; + /* If caller doesn't ask for the error string, surface QMP errors as -EIO. */ + if (desc && !ret_error_desc) + return -EIO; if (ret_result) - *ret_result = result; + *ret_result = TAKE_PTR(result); if (ret_error_desc) - *ret_error_desc = desc; + *ret_error_desc = TAKE_PTR(desc); return 1; } diff --git a/src/shared/qmp-client.h b/src/shared/qmp-client.h index 7dcd53355d06c..e5bd4eb8564b5 100644 --- a/src/shared/qmp-client.h +++ b/src/shared/qmp-client.h @@ -68,15 +68,23 @@ int qmp_client_invoke( qmp_command_callback_t callback, void *userdata); -/* Synchronous send + receive. Pumps the event loop until the reply arrives. *ret_result and - * *ret_error_desc are borrowed pointers into the last reply, valid until the next - * qmp_client_call(). Same contract as sd_varlink_call(). */ int qmp_client_call( QmpClient *client, const char *command, QmpClientArgs *args, sd_json_variant **ret_result, - const char **ret_error_desc); + char **ret_error_desc); + +int qmp_client_call_future( + QmpClient *client, + const char *command, + QmpClientArgs *args, + sd_future **ret); + +int future_get_qmp_reply( + sd_future *f, + sd_json_variant **ret_result, + char **ret_error_desc); void qmp_client_bind_event(QmpClient *c, qmp_event_callback_t callback, void *userdata); void qmp_client_bind_disconnect(QmpClient *c, qmp_disconnect_callback_t callback, void *userdata); From 55159e0189827901599734263bbd42b46bd7173b Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 24 Apr 2026 09:49:10 +0000 Subject: [PATCH 242/242] test-qmp-client: run mock QMP servers as fibers on the shared event loop The mock servers used to be driven out-of-band: each test created a socketpair, forked a child, ran a hand-coded request/response script against the raw fd, and sent SIGTERM to tear it down. That worked but required pidref/process-util/signal plumbing in every test, two distinct execution contexts that couldn't share state, and a JsonStream attached to the mock side that pretended to be event-loop-driven while actually being driven manually via blocking reads. Now that JsonStream suspends when on a fiber, the mocks can live inside the same process and event loop as the client. Each mock is rewritten as an sd-fiber that runs alongside the client fiber: so the mock fiber yields on I/O and the event loop schedules the client in the meantime. Both sides progress cooperatively, no fork/SIGTERM/PID tracking, no manual phase tracking. Two cleanups fall out of the rewrite: - A QMP_TEST(name, mock_fn) { ... } macro encapsulates the per-test scaffolding (event loop, socketpair, mock fiber spawn, exit-on-idle shim) and injects an already-connected QmpClient *client into the test body. Each test now reads as a flat sequence of qmp_client_call() invocations against that client. - Repeated mock command/reply scripting is factored into mock_qmp_expect(), mock_qmp_reply(), mock_qmp_expect_and_reply(), mock_qmp_handshake(), and mock_qmp_query_status_running(). The greeting JSON is built with sd_json_buildo() instead of being parsed from a literal. The file shrinks from 756 to 494 lines, mostly through deletions. --- src/test/test-qmp-client.c | 799 +++++++++++++------------------------ 1 file changed, 272 insertions(+), 527 deletions(-) diff --git a/src/test/test-qmp-client.c b/src/test/test-qmp-client.c index befee02484588..e8ea61d7fcc1a 100644 --- a/src/test/test-qmp-client.c +++ b/src/test/test-qmp-client.c @@ -1,33 +1,27 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ -#include -#include +#include #include #include "sd-event.h" +#include "sd-future.h" #include "sd-json.h" #include "errno-util.h" #include "fd-util.h" #include "json-stream.h" -#include "pidref.h" -#include "process-util.h" #include "qmp-client.h" #include "string-util.h" #include "tests.h" -/* Mock QMP server: runs in the child process of a fork, communicates via one end of a socketpair. - * Uses JsonStream as the transport so framing (CRLF delimiter, message queuing, SCM_RIGHTS) is - * handled the same way as on the client side — individual recv() syscalls may coalesce multiple - * messages, and the parser must re-emit each one on its own. */ +/* Mock QMP server runs as an sd-fiber alongside the client on the same event loop. Its + * JsonStream uses the suspending json_stream_wait()/json_stream_flush() helpers, so the mock + * fiber yields whenever it's blocked on I/O and the client makes progress in the meantime. */ -/* We drive the stream manually via read/parse/wait; always report READING so json_stream_wait() - * asks for POLLIN. */ static JsonStreamPhase mock_qmp_phase(void *userdata) { return JSON_STREAM_PHASE_READING; } -/* Never reached — we don't wire the mock stream up to sd-event — but required at init. */ static int mock_qmp_dispatch(void *userdata) { return 0; } @@ -43,9 +37,6 @@ static void mock_qmp_init(JsonStream *s, int fd) { ASSERT_OK(json_stream_connect_fd_pair(s, fd, fd)); } -/* Read one complete JSON message, blocking until available. Handles the case where multiple - * client messages arrived coalesced into a single recv(): the parser walks the input buffer - * one CRLF-delimited message at a time. */ static void mock_qmp_recv(JsonStream *s, sd_json_variant **ret) { int r; @@ -62,142 +53,137 @@ static void mock_qmp_recv(JsonStream *s, sd_json_variant **ret) { } } -/* Enqueue one JSON variant and block until it has been fully written. */ static void mock_qmp_send(JsonStream *s, sd_json_variant *v) { ASSERT_OK(json_stream_enqueue(s, v)); ASSERT_OK(json_stream_flush(s)); } -/* Parse a literal JSON string and send it. Used for fixed greetings and unsolicited events. */ -static void mock_qmp_send_literal(JsonStream *s, const char *msg) { +static void mock_qmp_send_greeting(JsonStream *s) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - ASSERT_OK(sd_json_parse(msg, 0, &v, NULL, NULL)); + ASSERT_OK(sd_json_buildo(&v, + SD_JSON_BUILD_PAIR("QMP", SD_JSON_BUILD_OBJECT( + SD_JSON_BUILD_PAIR("version", SD_JSON_BUILD_OBJECT( + SD_JSON_BUILD_PAIR("qemu", SD_JSON_BUILD_OBJECT( + SD_JSON_BUILD_PAIR_UNSIGNED("micro", 0), + SD_JSON_BUILD_PAIR_UNSIGNED("minor", 2), + SD_JSON_BUILD_PAIR_UNSIGNED("major", 9))))), + SD_JSON_BUILD_PAIR("capabilities", SD_JSON_BUILD_STRV(STRV_MAKE("oob"))))))); mock_qmp_send(s, v); } -/* Read a command from the client, verify it contains the expected command name, and send a - * reply carrying the same id. If reply_data is NULL, an empty return object is sent. */ -static void mock_qmp_expect_and_reply(JsonStream *s, const char *expected_command, sd_json_variant *reply_data) { - _cleanup_(sd_json_variant_unrefp) sd_json_variant *cmd = NULL, *reply_obj = NULL, *response = NULL; - - mock_qmp_recv(s, &cmd); - - sd_json_variant *execute = ASSERT_NOT_NULL(sd_json_variant_by_key(cmd, "execute")); +/* Receive one command, assert it matches `expected_command`, return its id (borrowed from *cmd). */ +static sd_json_variant* mock_qmp_expect(JsonStream *s, const char *expected_command, sd_json_variant **cmd) { + mock_qmp_recv(s, cmd); + sd_json_variant *execute = ASSERT_NOT_NULL(sd_json_variant_by_key(*cmd, "execute")); ASSERT_STREQ(sd_json_variant_string(execute), expected_command); + return ASSERT_NOT_NULL(sd_json_variant_by_key(*cmd, "id")); +} - sd_json_variant *id = ASSERT_NOT_NULL(sd_json_variant_by_key(cmd, "id")); +/* Send a reply for a previously-received command id. Passing NULL reply_data sends {}. */ +static void mock_qmp_reply(JsonStream *s, sd_json_variant *id, sd_json_variant *reply_data) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *empty = NULL, *response = NULL; - if (!reply_data) - ASSERT_OK(sd_json_variant_new_object(&reply_obj, NULL, 0)); + if (!reply_data) { + ASSERT_OK(sd_json_build(&empty, SD_JSON_BUILD_EMPTY_OBJECT)); + reply_data = empty; + } - ASSERT_OK(sd_json_buildo( - &response, - SD_JSON_BUILD_PAIR("return", SD_JSON_BUILD_VARIANT(reply_data ?: reply_obj)), - SD_JSON_BUILD_PAIR("id", SD_JSON_BUILD_VARIANT(id)))); + ASSERT_OK(sd_json_buildo(&response, + SD_JSON_BUILD_PAIR("return", SD_JSON_BUILD_VARIANT(reply_data)), + SD_JSON_BUILD_PAIR("id", SD_JSON_BUILD_VARIANT(id)))); mock_qmp_send(s, response); } -/* Same shape as mock_qmp_expect_and_reply() but replies with a QMP error object. */ -static void mock_qmp_expect_and_reply_error(JsonStream *s, const char *expected_command, const char *error_desc) { - _cleanup_(sd_json_variant_unrefp) sd_json_variant *cmd = NULL, *error_obj = NULL, *response = NULL; - - mock_qmp_recv(s, &cmd); - - sd_json_variant *execute = ASSERT_NOT_NULL(sd_json_variant_by_key(cmd, "execute")); - ASSERT_STREQ(sd_json_variant_string(execute), expected_command); +static void mock_qmp_expect_and_reply(JsonStream *s, const char *expected_command, sd_json_variant *reply_data) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *cmd = NULL; + mock_qmp_reply(s, mock_qmp_expect(s, expected_command, &cmd), reply_data); +} - sd_json_variant *id = ASSERT_NOT_NULL(sd_json_variant_by_key(cmd, "id")); +static void mock_qmp_expect_and_reply_error(JsonStream *s, const char *expected_command, const char *error_desc) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *cmd = NULL, *response = NULL; + sd_json_variant *id = mock_qmp_expect(s, expected_command, &cmd); - ASSERT_OK(sd_json_buildo( - &error_obj, + ASSERT_OK(sd_json_buildo(&response, + SD_JSON_BUILD_PAIR("error", SD_JSON_BUILD_OBJECT( SD_JSON_BUILD_PAIR_STRING("class", "GenericError"), - SD_JSON_BUILD_PAIR_STRING("desc", error_desc))); - - ASSERT_OK(sd_json_buildo( - &response, - SD_JSON_BUILD_PAIR("error", SD_JSON_BUILD_VARIANT(error_obj)), - SD_JSON_BUILD_PAIR("id", SD_JSON_BUILD_VARIANT(id)))); + SD_JSON_BUILD_PAIR_STRING("desc", error_desc))), + SD_JSON_BUILD_PAIR("id", SD_JSON_BUILD_VARIANT(id)))); mock_qmp_send(s, response); } -static _noreturn_ void mock_qmp_server(int fd) { - _cleanup_(json_stream_done) JsonStream s = {}; - _cleanup_(sd_json_variant_unrefp) sd_json_variant *status_return = NULL; - - mock_qmp_init(&s, fd); +static void mock_qmp_handshake(JsonStream *s) { + mock_qmp_send_greeting(s); + mock_qmp_expect_and_reply(s, "qmp_capabilities", NULL); +} - /* Send QMP greeting */ - mock_qmp_send_literal(&s, - "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 2, \"major\": 9}}, \"capabilities\": [\"oob\"]}}"); +/* Reply to query-status with a running=true/status="running" payload. */ +static void mock_qmp_query_status_running(JsonStream *s) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; - /* Accept qmp_capabilities */ - mock_qmp_expect_and_reply(&s, "qmp_capabilities", NULL); + ASSERT_OK(sd_json_buildo(&v, + SD_JSON_BUILD_PAIR_BOOLEAN("running", true), + SD_JSON_BUILD_PAIR_STRING("status", "running"))); + mock_qmp_expect_and_reply(s, "query-status", v); +} - /* Accept query-status, reply with running state */ - ASSERT_OK(sd_json_buildo( - &status_return, - SD_JSON_BUILD_PAIR_BOOLEAN("running", true), - SD_JSON_BUILD_PAIR_STRING("status", "running"))); - mock_qmp_expect_and_reply(&s, "query-status", status_return); +/* Drive a mock+client pair on a single event loop. The client fiber runs as userdata=client, + * the mock fiber as userdata=fd (the server-side socket). */ +static void run_qmp_test(sd_fiber_func_t mock_fn, sd_fiber_func_t client_fn) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + _cleanup_(sd_future_unrefp) sd_future *client_f = NULL, *mock_f = NULL; + _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; + _cleanup_close_pair_ int qmp_fds[2] = EBADF_PAIR; - /* Accept stop */ - mock_qmp_expect_and_reply(&s, "stop", NULL); + ASSERT_OK(sd_event_new(&event)); + ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); - /* Send a STOP event */ - mock_qmp_send_literal(&s, - "{\"event\": \"STOP\", \"timestamp\": {\"seconds\": 1234, \"microseconds\": 5678}}"); + ASSERT_OK(qmp_client_connect_fd(&client, TAKE_FD(qmp_fds[0]))); + ASSERT_OK(qmp_client_attach_event(client, event, SD_EVENT_PRIORITY_NORMAL)); - /* Accept cont */ - mock_qmp_expect_and_reply(&s, "cont", NULL); + ASSERT_OK(sd_fiber_new(event, "mock", mock_fn, FD_TO_PTR(TAKE_FD(qmp_fds[1])), NULL, &mock_f)); + ASSERT_OK(sd_fiber_new(event, "client", client_fn, client, NULL, &client_f)); - /* json_stream_done() on cleanup closes our fd and signals EOF. */ - _exit(EXIT_SUCCESS); + ASSERT_OK(sd_event_loop(event)); + ASSERT_OK(sd_future_result(client_f)); + ASSERT_OK(sd_future_result(mock_f)); } -/* Test helper: tracks an async QMP command result and signals completion. */ -typedef struct { - sd_json_variant *result; - char *error_desc; - int error; - bool done; -} QmpTestResult; - -static int on_test_result( - QmpClient *client, - sd_json_variant *result, - const char *error_desc, - int error, - void *userdata) { - - QmpTestResult *t = ASSERT_PTR(userdata); - - t->error = error; - if (result) - t->result = sd_json_variant_ref(result); - if (error_desc) - t->error_desc = strdup(error_desc); - t->done = true; - return 0; -} +/* Define a test whose body runs as the client fiber on an event loop shared with `mock_fn`. + * The body receives `QmpClient *client` as its argument. */ +#define QMP_TEST(name, mock_fn) \ + static int test_##name##_body(QmpClient *client); \ + static int test_##name##_fiber(void *userdata) { \ + int r = test_##name##_body(userdata); \ + ASSERT_OK(sd_event_exit(sd_fiber_get_event(), 0)); \ + return r; \ + } \ + TEST(name) { \ + run_qmp_test(mock_fn, test_##name##_fiber); \ + } \ + static int test_##name##_body(QmpClient *client) + +static int mock_qmp_basic_fiber(void *userdata) { + _cleanup_(json_stream_done) JsonStream s = {}; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *stop_event = NULL; -/* Run the event loop until the test result callback fires. */ -static void qmp_test_wait(sd_event *event, QmpTestResult *t) { - assert(event); - assert(t); + mock_qmp_init(&s, PTR_TO_FD(userdata)); + mock_qmp_handshake(&s); - while (!t->done) - ASSERT_OK(sd_event_run(event, UINT64_MAX)); -} + mock_qmp_query_status_running(&s); + mock_qmp_expect_and_reply(&s, "stop", NULL); -static void qmp_test_result_done(QmpTestResult *t) { - assert(t); + ASSERT_OK(sd_json_buildo(&stop_event, + SD_JSON_BUILD_PAIR_STRING("event", "STOP"), + SD_JSON_BUILD_PAIR("timestamp", SD_JSON_BUILD_OBJECT( + SD_JSON_BUILD_PAIR_UNSIGNED("seconds", 1234), + SD_JSON_BUILD_PAIR_UNSIGNED("microseconds", 5678))))); + mock_qmp_send(&s, stop_event); - sd_json_variant_unref(t->result); - free(t->error_desc); - *t = (QmpTestResult) {}; + mock_qmp_expect_and_reply(&s, "cont", NULL); + return 0; } static int test_event_callback( @@ -208,516 +194,281 @@ static int test_event_callback( bool *event_received = ASSERT_PTR(userdata); - /* We may also receive a synthetic SHUTDOWN event when the mock server closes the connection; - * only validate the STOP event we actually care about. */ + /* Ignore the synthetic SHUTDOWN emitted when the mock closes the connection. */ if (streq(event, "STOP")) *event_received = true; return 0; } -TEST(qmp_client_basic) { - _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; - _cleanup_(sd_event_unrefp) sd_event *event = NULL; - _cleanup_(pidref_done) PidRef pid = PIDREF_NULL; - QmpTestResult t = {}; - sd_json_variant *running, *status; - int qmp_fds[2]; - int r; - - ASSERT_OK(sd_event_new(&event)); - - ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); - - r = ASSERT_OK(pidref_safe_fork("(mock-qmp)", FORK_DEATHSIG_SIGKILL|FORK_LOG, &pid)); - - if (r == 0) { - safe_close(qmp_fds[0]); - mock_qmp_server(qmp_fds[1]); - } - - safe_close(qmp_fds[1]); - - /* Connect then attach to event loop — handshake completes transparently - * inside the first call()/invoke(). */ - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); - ASSERT_OK(qmp_client_attach_event(client, event, SD_EVENT_PRIORITY_NORMAL)); - - /* Set event callback to catch STOP event during cont */ +QMP_TEST(qmp_client_basic, mock_qmp_basic_fiber) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *result = NULL; + _cleanup_free_ char *error_desc = NULL; bool event_received = false; + qmp_client_bind_event(client, test_event_callback, &event_received); - /* Execute query-status */ - ASSERT_OK(qmp_client_invoke(client, /* ret_slot= */ NULL, "query-status", NULL, on_test_result, &t)); - qmp_test_wait(event, &t); - ASSERT_EQ(t.error, 0); - ASSERT_NOT_NULL(t.result); + ASSERT_OK_POSITIVE(qmp_client_call(client, "query-status", NULL, &result, &error_desc)); + ASSERT_NULL(error_desc); - running = ASSERT_NOT_NULL(sd_json_variant_by_key(t.result, "running")); + sd_json_variant *running = ASSERT_NOT_NULL(sd_json_variant_by_key(result, "running")); ASSERT_TRUE(sd_json_variant_boolean(running)); - - status = ASSERT_NOT_NULL(sd_json_variant_by_key(t.result, "status")); + sd_json_variant *status = ASSERT_NOT_NULL(sd_json_variant_by_key(result, "status")); ASSERT_STREQ(sd_json_variant_string(status), "running"); - qmp_test_result_done(&t); + ASSERT_OK_POSITIVE(qmp_client_call(client, "stop", NULL, NULL, NULL)); + ASSERT_OK_POSITIVE(qmp_client_call(client, "cont", NULL, NULL, NULL)); - /* Execute stop */ - ASSERT_OK(qmp_client_invoke(client, /* ret_slot= */ NULL, "stop", NULL, on_test_result, &t)); - qmp_test_wait(event, &t); - ASSERT_EQ(t.error, 0); - qmp_test_result_done(&t); + ASSERT_TRUE(event_received); + return 0; +} - /* Execute cont -- the STOP event should be dispatched by the IO callback */ - ASSERT_OK(qmp_client_invoke(client, /* ret_slot= */ NULL, "cont", NULL, on_test_result, &t)); - qmp_test_wait(event, &t); - ASSERT_EQ(t.error, 0); - qmp_test_result_done(&t); +static int mock_qmp_eof_fiber(void *userdata) { + _cleanup_(json_stream_done) JsonStream s = {}; - /* Verify the STOP event was received */ - ASSERT_TRUE(event_received); + mock_qmp_init(&s, PTR_TO_FD(userdata)); + mock_qmp_handshake(&s); + /* Return; _cleanup_ closes the fd → client sees EOF. */ + return 0; +} - /* Wait for child and verify clean exit */ - siginfo_t si = {}; - ASSERT_OK(pidref_wait_for_terminate(&pid, &si)); - ASSERT_EQ(si.si_code, CLD_EXITED); - ASSERT_EQ(si.si_status, EXIT_SUCCESS); +QMP_TEST(qmp_client_eof, mock_qmp_eof_fiber) { + int r = qmp_client_call(client, "query-status", NULL, NULL, NULL); + ASSERT_TRUE(ERRNO_IS_NEG_DISCONNECT(r)); + return 0; } -TEST(qmp_client_eof) { - _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; - _cleanup_(sd_event_unrefp) sd_event *event = NULL; - _cleanup_(pidref_done) PidRef pid = PIDREF_NULL; - QmpTestResult t = {}; - int qmp_fds[2]; - int r; +static int mock_qmp_call_fiber(void *userdata) { + _cleanup_(json_stream_done) JsonStream s = {}; - ASSERT_OK(sd_event_new(&event)); - ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); + mock_qmp_init(&s, PTR_TO_FD(userdata)); + mock_qmp_handshake(&s); - r = ASSERT_OK(pidref_safe_fork("(mock-qmp-eof)", FORK_DEATHSIG_SIGKILL|FORK_LOG, &pid)); + mock_qmp_query_status_running(&s); + mock_qmp_expect_and_reply_error(&s, "stop", "not running"); + mock_qmp_expect_and_reply_error(&s, "stop", "still not running"); + return 0; +} - if (r == 0) { - _cleanup_(json_stream_done) JsonStream s = {}; +QMP_TEST(qmp_client_call, mock_qmp_call_fiber) { + _cleanup_(sd_future_cancel_wait_unrefp) sd_future *f = NULL; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *result = NULL; + _cleanup_free_ char *error_desc = NULL; - safe_close(qmp_fds[0]); - mock_qmp_init(&s, qmp_fds[1]); + /* Exercise qmp_client_call_future() + sd_fiber_await() + future_get_qmp_reply() + * directly — success path. */ + ASSERT_OK(qmp_client_call_future(client, "query-status", NULL, &f)); + ASSERT_OK(sd_fiber_await(f)); + ASSERT_OK(sd_future_result(f)); + ASSERT_OK(future_get_qmp_reply(f, &result, &error_desc)); - /* Send greeting and accept capabilities, then die */ - mock_qmp_send_literal(&s, - "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 0, \"major\": 9}}, \"capabilities\": []}}"); + ASSERT_NULL(error_desc); + sd_json_variant *running = ASSERT_NOT_NULL(sd_json_variant_by_key(result, "running")); + ASSERT_TRUE(sd_json_variant_boolean(running)); + sd_json_variant *status = ASSERT_NOT_NULL(sd_json_variant_by_key(result, "status")); + ASSERT_STREQ(sd_json_variant_string(status), "running"); - mock_qmp_expect_and_reply(&s, "qmp_capabilities", NULL); + /* QMP-level error: future resolves with 0 (the reply landed); future_get_qmp_reply() + * surfaces the error via error_desc, with result left NULL. */ + f = sd_future_unref(f); + result = sd_json_variant_unref(result); + error_desc = mfree(error_desc); - /* _exit() closes our fd via kernel teardown, signalling EOF to the peer. */ - _exit(EXIT_SUCCESS); - } + ASSERT_OK(qmp_client_call_future(client, "stop", NULL, &f)); + ASSERT_OK(sd_fiber_await(f)); + ASSERT_OK(sd_future_result(f)); + ASSERT_OK(future_get_qmp_reply(f, &result, &error_desc)); - safe_close(qmp_fds[1]); + ASSERT_NULL(result); + ASSERT_STREQ(error_desc, "not running"); - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); - ASSERT_OK(qmp_client_attach_event(client, event, SD_EVENT_PRIORITY_NORMAL)); + /* qmp_client_call() surfaces QMP errors as -EIO when the caller doesn't ask for the desc. */ + ASSERT_ERROR(qmp_client_call(client, "stop", NULL, NULL, NULL), EIO); + return 0; +} - /* Executing a command should fail with a disconnect error because the server - * closed. The handshake may succeed or fail inside invoke() — either way the - * invoke itself or the async callback should report a disconnect. */ - r = qmp_client_invoke(client, /* ret_slot= */ NULL, "query-status", NULL, on_test_result, &t); - if (r < 0) - ASSERT_TRUE(ERRNO_IS_NEG_DISCONNECT(r)); - else { - qmp_test_wait(event, &t); - ASSERT_TRUE(ERRNO_IS_NEG_DISCONNECT(t.error)); - qmp_test_result_done(&t); - } +static int mock_qmp_call_disconnect_fiber(void *userdata) { + _cleanup_(json_stream_done) JsonStream s = {}; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *stop_cmd = NULL; + + mock_qmp_init(&s, PTR_TO_FD(userdata)); + mock_qmp_handshake(&s); - siginfo_t si = {}; - ASSERT_OK(pidref_wait_for_terminate(&pid, &si)); - ASSERT_EQ(si.si_code, CLD_EXITED); - ASSERT_EQ(si.si_status, EXIT_SUCCESS); + /* Consume the stop command but don't reply — cleanup closes the fd and the client + * sees a disconnect while suspended. */ + mock_qmp_recv(&s, &stop_cmd); + return 0; } -/* Mock QMP server for the fd-passing test. Drives the wire dance: - * greeting → recv qmp_capabilities → reply → recv add-fd → reply - * Asserts that exactly one SCM_RIGHTS fd arrives total across the two recvs. We can't - * require the fd to come attached to add-fd specifically: AF_UNIX coalesces the client's - * non-SCM cap sendmsg forward into the SCM-bearing add-fd sendmsg, so the fd may surface - * with either recv depending on kernel scheduling. QEMU's FIFO fd queue doesn't care. */ -static _noreturn_ void mock_qmp_server_fd(int fd) { +QMP_TEST(qmp_client_call_disconnect, mock_qmp_call_disconnect_fiber) { + int r = qmp_client_call(client, "stop", NULL, NULL, NULL); + ASSERT_TRUE(ERRNO_IS_NEG_DISCONNECT(r)); + return 0; +} + +static int mock_qmp_fd_fiber(void *userdata) { _cleanup_(json_stream_done) JsonStream s = {}; - _cleanup_(sd_json_variant_unrefp) sd_json_variant *cap_cmd = NULL, - *addfd_cmd = NULL, - *cap_reply = NULL, - *addfd_return = NULL, - *addfd_reply = NULL; + _cleanup_(sd_json_variant_unrefp) sd_json_variant *cap_cmd = NULL, *addfd_cmd = NULL, + *addfd_return = NULL; - mock_qmp_init(&s, fd); - ASSERT_OK(json_stream_set_allow_fd_passing_input(&s, true, /* with_sockopt= */ true)); + mock_qmp_init(&s, PTR_TO_FD(userdata)); + ASSERT_OK(json_stream_set_allow_fd_passing_input(&s, true, true)); - /* Greeting */ - mock_qmp_send_literal(&s, - "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 0, \"major\": 9}}, \"capabilities\": []}}"); + mock_qmp_send_greeting(&s); - /* Receive qmp_capabilities (may or may not carry the fd depending on coalescing). */ - mock_qmp_recv(&s, &cap_cmd); + /* The fd may ride with either command depending on AF_UNIX coalescing; count across both. */ + sd_json_variant *cap_id = mock_qmp_expect(&s, "qmp_capabilities", &cap_cmd); size_t n_fds_total = json_stream_get_n_input_fds(&s); - ASSERT_STREQ(sd_json_variant_string(sd_json_variant_by_key(cap_cmd, "execute")), "qmp_capabilities"); json_stream_close_input_fds(&s); + mock_qmp_reply(&s, cap_id, NULL); - sd_json_variant *cap_id = ASSERT_NOT_NULL(sd_json_variant_by_key(cap_cmd, "id")); - ASSERT_OK(sd_json_buildo( - &cap_reply, - SD_JSON_BUILD_PAIR("return", SD_JSON_BUILD_EMPTY_OBJECT), - SD_JSON_BUILD_PAIR("id", SD_JSON_BUILD_VARIANT(cap_id)))); - mock_qmp_send(&s, cap_reply); - - /* Receive add-fd (fd may already have been consumed with cap's recv). */ - mock_qmp_recv(&s, &addfd_cmd); + sd_json_variant *addfd_id = mock_qmp_expect(&s, "add-fd", &addfd_cmd); n_fds_total += json_stream_get_n_input_fds(&s); - ASSERT_STREQ(sd_json_variant_string(sd_json_variant_by_key(addfd_cmd, "execute")), "add-fd"); json_stream_close_input_fds(&s); - ASSERT_EQ(n_fds_total, (size_t) 1); - sd_json_variant *addfd_id = ASSERT_NOT_NULL(sd_json_variant_by_key(addfd_cmd, "id")); - ASSERT_OK(sd_json_buildo( - &addfd_return, - SD_JSON_BUILD_PAIR_UNSIGNED("fdset-id", 0), - SD_JSON_BUILD_PAIR_UNSIGNED("fd", 42))); - ASSERT_OK(sd_json_buildo( - &addfd_reply, - SD_JSON_BUILD_PAIR("return", SD_JSON_BUILD_VARIANT(addfd_return)), - SD_JSON_BUILD_PAIR("id", SD_JSON_BUILD_VARIANT(addfd_id)))); - mock_qmp_send(&s, addfd_reply); - - _exit(EXIT_SUCCESS); + ASSERT_OK(sd_json_buildo(&addfd_return, + SD_JSON_BUILD_PAIR_UNSIGNED("fdset-id", 0), + SD_JSON_BUILD_PAIR_UNSIGNED("fd", 42))); + mock_qmp_reply(&s, addfd_id, addfd_return); + return 0; } -/* End-to-end fd-passing through qmp_client_invoke() with QMP_CLIENT_ARGS_FD(): open a real - * fd, send add-fd, confirm the mock received a single SCM_RIGHTS fd and replied successfully. */ -TEST(qmp_client_invoke_with_fd) { - _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; - _cleanup_(sd_event_unrefp) sd_event *event = NULL; - _cleanup_(pidref_done) PidRef pid = PIDREF_NULL; +QMP_TEST(qmp_client_invoke_with_fd, mock_qmp_fd_fiber) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL; _cleanup_close_ int fd_to_pass = -EBADF; - QmpTestResult t = {}; - int qmp_fds[2]; - int r; - - ASSERT_OK(sd_event_new(&event)); - ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); - - r = ASSERT_OK(pidref_safe_fork("(mock-qmp-fd)", FORK_DEATHSIG_SIGKILL|FORK_LOG, &pid)); - - if (r == 0) { - safe_close(qmp_fds[0]); - mock_qmp_server_fd(qmp_fds[1]); - } - - safe_close(qmp_fds[1]); + _cleanup_(sd_json_variant_unrefp) sd_json_variant *result = NULL; - /* Open a real fd to pass — /dev/null is universally available. */ - fd_to_pass = open("/dev/null", O_RDWR|O_CLOEXEC); - ASSERT_OK(fd_to_pass); - - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); - ASSERT_OK(qmp_client_attach_event(client, event, SD_EVENT_PRIORITY_NORMAL)); + fd_to_pass = ASSERT_OK_ERRNO(eventfd(0, EFD_CLOEXEC)); ASSERT_OK(sd_json_buildo(&args, SD_JSON_BUILD_PAIR_UNSIGNED("fdset-id", 0))); - ASSERT_OK(qmp_client_invoke(client, /* ret_slot= */ NULL, "add-fd", - QMP_CLIENT_ARGS_FD(args, TAKE_FD(fd_to_pass)), - on_test_result, &t)); + ASSERT_OK_POSITIVE(qmp_client_call(client, "add-fd", + QMP_CLIENT_ARGS_FD(args, TAKE_FD(fd_to_pass)), + &result, NULL)); + ASSERT_NOT_NULL(result); + return 0; +} + +static int on_dead_peer_reply( + QmpClient *client, + sd_json_variant *result, + const char *error_desc, + int error, + void *userdata) { - qmp_test_wait(event, &t); - ASSERT_EQ(t.error, 0); - ASSERT_NOT_NULL(t.result); - qmp_test_result_done(&t); + bool *fired = ASSERT_PTR(userdata); - /* Wait for the mock. If its fd-count assertion tripped, si.si_status is non-zero. */ - siginfo_t si = {}; - ASSERT_OK(pidref_wait_for_terminate(&pid, &si)); - ASSERT_EQ(si.si_code, CLD_EXITED); - ASSERT_EQ(si.si_status, EXIT_SUCCESS); + /* Peer was closed before the write hit the wire; expect a disconnect. */ + ASSERT_TRUE(ERRNO_IS_NEG_DISCONNECT(error)); + *fired = true; + return 0; } -/* Regression: the caller-supplied fds — already TAKE_FD()'d through QMP_CLIENT_ARGS_FD() — - * must never leak, regardless of whether the invoke reaches the wire. Verified here via a - * dead peer: invoke enqueues (non-blocking), the queue item owns the fd, and client teardown - * must close it. */ +/* Verify caller-supplied fds passed through QMP_CLIENT_ARGS_FD() are closed on client teardown + * even when the peer is already dead: invoke enqueues, the queue item owns the fd, unref closes. */ TEST(qmp_client_invoke_failure_closes_fds) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *args = NULL; _cleanup_close_ int fd_to_pass = -EBADF; QmpClient *client = NULL; - QmpTestResult t = {}; - int qmp_fds[2]; + _cleanup_close_pair_ int qmp_fds[2] = EBADF_PAIR; int saved_fd_value; + bool callback_fired = false; ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); + qmp_fds[1] = safe_close(qmp_fds[1]); - /* Close the peer end immediately so any write attempt sees EPIPE. */ - safe_close(qmp_fds[1]); - - fd_to_pass = open("/dev/null", O_RDWR|O_CLOEXEC); - ASSERT_OK(fd_to_pass); - saved_fd_value = fd_to_pass; /* remember the int value for the closed-check */ + fd_to_pass = ASSERT_OK_ERRNO(eventfd(0, EFD_CLOEXEC)); + saved_fd_value = fd_to_pass; ASSERT_OK(sd_json_buildo(&args, SD_JSON_BUILD_PAIR_UNSIGNED("fdset-id", 0))); - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); + ASSERT_OK(qmp_client_connect_fd(&client, TAKE_FD(qmp_fds[0]))); - /* invoke no longer blocks on the handshake — it just enqueues. The fd is now - * owned by the underlying JsonStream output queue. */ - ASSERT_OK(qmp_client_invoke(client, /* ret_slot= */ NULL, "add-fd", + ASSERT_OK(qmp_client_invoke(client, NULL, "add-fd", QMP_CLIENT_ARGS_FD(args, TAKE_FD(fd_to_pass)), - on_test_result, &t)); - ASSERT_EQ(fd_to_pass, -EBADF); /* TAKE_FD cleared our local handle */ - - /* The fd is still open here (held in JsonStream's queue). */ + on_dead_peer_reply, &callback_fired)); + ASSERT_EQ(fd_to_pass, -EBADF); ASSERT_OK_ERRNO(fcntl(saved_fd_value, F_GETFD)); - /* Client teardown (json_stream_done) must close queued output fds, otherwise the - * saved fd number would still be valid. */ client = qmp_client_unref(client); - ASSERT_EQ(fcntl(saved_fd_value, F_GETFD), -1); - ASSERT_EQ(errno, EBADF); + ASSERT_ERROR_ERRNO(fcntl(saved_fd_value, F_GETFD), EBADF); + ASSERT_TRUE(callback_fired); } -/* Mock for the slot lifecycle + cancel tests: greets, accepts capabilities, then accepts - * query-status and stop, replying with dummy returns. A cancelled query-status still gets - * sent on the wire (cancel merely removes the pending slot), so the server must be prepared - * to read and reply to it. */ -static _noreturn_ void mock_qmp_server_slot(int fd) { +/* Shared mock for the two slot tests: the follow-up stop is what drives the event loop long + * enough to dispatch the query-status reply. */ +static int mock_qmp_slot_fiber(void *userdata) { _cleanup_(json_stream_done) JsonStream s = {}; - _cleanup_(sd_json_variant_unrefp) sd_json_variant *status_return = NULL; - - mock_qmp_init(&s, fd); - mock_qmp_send_literal(&s, - "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 0, \"major\": 9}}, \"capabilities\": []}}"); - - mock_qmp_expect_and_reply(&s, "qmp_capabilities", NULL); - - ASSERT_OK(sd_json_buildo( - &status_return, - SD_JSON_BUILD_PAIR_BOOLEAN("running", true), - SD_JSON_BUILD_PAIR_STRING("status", "running"))); - mock_qmp_expect_and_reply(&s, "query-status", status_return); + mock_qmp_init(&s, PTR_TO_FD(userdata)); + mock_qmp_handshake(&s); + mock_qmp_query_status_running(&s); mock_qmp_expect_and_reply(&s, "stop", NULL); - - _exit(EXIT_SUCCESS); + return 0; } -/* Verify that when qmp_client_invoke() returns a slot, qmp_slot_get_client() tracks the - * connection state: the client pointer is reported while the call is in flight, and flipped - * back to NULL once the reply has been dispatched. The caller must still be able to drop its - * ref safely after that. */ -TEST(qmp_client_invoke_slot_lifecycle) { - _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; - _cleanup_(sd_event_unrefp) sd_event *event = NULL; - _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL; - _cleanup_(qmp_slot_unrefp) QmpSlot *slot = NULL; - QmpTestResult t = {}; - int qmp_fds[2]; - int r; +static int nop_callback( + QmpClient *client, + sd_json_variant *result, + const char *error_desc, + int error, + void *userdata) { - ASSERT_OK(sd_event_new(&event)); - ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); + return 0; +} - r = ASSERT_OK(pidref_safe_fork("(mock-qmp-slot-life)", FORK_DEATHSIG_SIGKILL|FORK_LOG, &pid)); - if (r == 0) { - safe_close(qmp_fds[0]); - mock_qmp_server_slot(qmp_fds[1]); - } - safe_close(qmp_fds[1]); +/* Tripwire for the cancel test: if it fires, the cancel didn't do its job. */ +static int tripwire_callback( + QmpClient *client, + sd_json_variant *result, + const char *error_desc, + int error, + void *userdata) { - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); - ASSERT_OK(qmp_client_attach_event(client, event, SD_EVENT_PRIORITY_NORMAL)); + bool *fired = ASSERT_PTR(userdata); + *fired = true; + return 0; +} - ASSERT_OK(qmp_client_invoke(client, &slot, "query-status", NULL, on_test_result, &t)); +QMP_TEST(qmp_client_invoke_slot_lifecycle, mock_qmp_slot_fiber) { + _cleanup_(qmp_slot_unrefp) QmpSlot *slot = NULL; - /* While in flight the slot still references its client. */ - ASSERT_NOT_NULL(slot); + ASSERT_OK(qmp_client_invoke(client, &slot, "query-status", NULL, nop_callback, NULL)); ASSERT_PTR_EQ(qmp_slot_get_client(slot), client); - qmp_test_wait(event, &t); - ASSERT_EQ(t.error, 0); - ASSERT_NOT_NULL(t.result); + /* Drive the loop via a follow-up stop; its suspending call lets both replies dispatch. */ + ASSERT_OK_POSITIVE(qmp_client_call(client, "stop", NULL, NULL, NULL)); - /* Once dispatched, the slot is disconnected from the client but still owned by us. */ + /* After dispatch the slot is disconnected from the client but still owned by us. */ ASSERT_NULL(qmp_slot_get_client(slot)); - qmp_test_result_done(&t); - - /* Drop our ref explicitly (out of order w.r.t. cleanup) to exercise the - * already-disconnected path in qmp_slot_free(). */ + /* Explicit out-of-order unref exercises the already-disconnected path in qmp_slot_free(). */ slot = qmp_slot_unref(slot); - ASSERT_NULL(slot); + return 0; } -/* Verify that dropping the only reference on a pending slot before the reply arrives cancels - * the callback. The command is already enqueued on the stream at that point, so the server - * still sees it and replies — but the reply lands on an unknown id and is discarded. */ -TEST(qmp_client_invoke_slot_cancel) { - _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; - _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL; - QmpTestResult t_cancelled = {}; +QMP_TEST(qmp_client_invoke_slot_cancel, mock_qmp_slot_fiber) { QmpSlot *slot = NULL; - int qmp_fds[2]; - int r; + bool fired = false; - ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); - - r = ASSERT_OK(pidref_safe_fork("(mock-qmp-slot-cancel)", FORK_DEATHSIG_SIGKILL|FORK_LOG, &pid)); - if (r == 0) { - safe_close(qmp_fds[0]); - mock_qmp_server_slot(qmp_fds[1]); - } - safe_close(qmp_fds[1]); - - /* Drive without an event loop so the subsequent qmp_client_call() owns all pumping; - * it serializes write→read round-trips, which avoids the mock server seeing the - * cancelled query-status and the follow-up stop concatenated into a single recv(). */ - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); + ASSERT_OK(qmp_client_invoke(client, &slot, "query-status", NULL, tripwire_callback, &fired)); - ASSERT_OK(qmp_client_invoke(client, &slot, "query-status", NULL, on_test_result, &t_cancelled)); - ASSERT_NOT_NULL(slot); - - /* Drop our sole ref → slot disconnects itself from the client's pending set. The - * enqueued query-status is still on the wire; when its reply arrives, dispatch_reply - * won't find a matching slot and will log-and-discard it. */ + /* Drop our sole ref → slot disconnects from the client's pending set. The enqueued + * query-status is still on the wire; its reply lands on an unknown id and is discarded. */ slot = qmp_slot_unref(slot); - ASSERT_NULL(slot); - - /* Synchronous call drives its own process+wait pump: it first drains the already- - * enqueued query-status write, consumes (and discards) its reply, then sends stop - * and waits for that reply. Any improper fire of the cancelled callback would have - * happened during that process() pass. */ - ASSERT_EQ(qmp_client_call(client, "stop", NULL, NULL, NULL), 1); - /* The cancelled callback must never have fired. */ - ASSERT_FALSE(t_cancelled.done); - ASSERT_NULL(t_cancelled.result); - ASSERT_NULL(t_cancelled.error_desc); -} + ASSERT_OK_POSITIVE(qmp_client_call(client, "stop", NULL, NULL, NULL)); -/* Drives a small wire dance for the sync call test: greeting, capabilities, one successful - * command reply, and two error replies (one for the ret_error_desc path, one for the -EIO - * path). */ -static _noreturn_ void mock_qmp_server_call(int fd) { - _cleanup_(json_stream_done) JsonStream s = {}; - _cleanup_(sd_json_variant_unrefp) sd_json_variant *status_return = NULL; - - mock_qmp_init(&s, fd); - - mock_qmp_send_literal(&s, - "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 0, \"major\": 9}}, \"capabilities\": []}}"); - - mock_qmp_expect_and_reply(&s, "qmp_capabilities", NULL); - - ASSERT_OK(sd_json_buildo( - &status_return, - SD_JSON_BUILD_PAIR_BOOLEAN("running", true), - SD_JSON_BUILD_PAIR_STRING("status", "running"))); - mock_qmp_expect_and_reply(&s, "query-status", status_return); - - mock_qmp_expect_and_reply_error(&s, "stop", "not running"); - mock_qmp_expect_and_reply_error(&s, "stop", "still not running"); - - _exit(EXIT_SUCCESS); -} - -TEST(qmp_client_call) { - _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; - _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL; - int qmp_fds[2]; - int r; - - ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); - - r = ASSERT_OK(pidref_safe_fork("(mock-qmp-call)", FORK_DEATHSIG_SIGKILL|FORK_LOG, &pid)); - if (r == 0) { - safe_close(qmp_fds[0]); - mock_qmp_server_call(qmp_fds[1]); - } - safe_close(qmp_fds[1]); - - /* qmp_client_call() drives its own process()+wait() pump, so no event loop needed. */ - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); - - /* Successful call: borrowed result pointer is valid until the next call. */ - sd_json_variant *result = NULL; - const char *error_desc = NULL; - ASSERT_EQ(qmp_client_call(client, "query-status", NULL, &result, &error_desc), 1); - ASSERT_NULL(error_desc); - ASSERT_NOT_NULL(result); - - sd_json_variant *running = ASSERT_NOT_NULL(sd_json_variant_by_key(result, "running")); - ASSERT_TRUE(sd_json_variant_boolean(running)); - sd_json_variant *status = ASSERT_NOT_NULL(sd_json_variant_by_key(result, "status")); - ASSERT_STREQ(sd_json_variant_string(status), "running"); - - /* QMP error with ret_error_desc provided: returns 1, result NULL, desc set. */ - result = (sd_json_variant*) 0x1; /* poison to catch lack-of-write */ - error_desc = NULL; - ASSERT_EQ(qmp_client_call(client, "stop", NULL, &result, &error_desc), 1); - ASSERT_NULL(result); - ASSERT_STREQ(error_desc, "not running"); - - /* QMP error without ret_error_desc: surfaces as -EIO. */ - ASSERT_EQ(qmp_client_call(client, "stop", NULL, NULL, NULL), -EIO); -} - -/* Server variant for the sync-call disconnect test: greets, accepts capabilities, reads one - * command without replying, then closes the socket so the client sees EOF mid-wait. */ -static _noreturn_ void mock_qmp_server_call_disconnect(int fd) { - _cleanup_(json_stream_done) JsonStream s = {}; - _cleanup_(sd_json_variant_unrefp) sd_json_variant *stop_cmd = NULL; - - mock_qmp_init(&s, fd); - - mock_qmp_send_literal(&s, - "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 0, \"major\": 9}}, \"capabilities\": []}}"); - - mock_qmp_expect_and_reply(&s, "qmp_capabilities", NULL); - - /* Consume the stop command but don't reply — json_stream_done() on cleanup closes - * our fd, triggering EOF while the client is blocked in qmp_client_call()'s - * process+wait pump. */ - mock_qmp_recv(&s, &stop_cmd); - - _exit(EXIT_SUCCESS); -} - -TEST(qmp_client_call_disconnect) { - _cleanup_(qmp_client_unrefp) QmpClient *client = NULL; - _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL; - int qmp_fds[2]; - int r; - - ASSERT_OK_ERRNO(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, qmp_fds)); - - r = ASSERT_OK(pidref_safe_fork("(mock-qmp-call-disc)", FORK_DEATHSIG_SIGKILL|FORK_LOG, &pid)); - if (r == 0) { - safe_close(qmp_fds[0]); - mock_qmp_server_call_disconnect(qmp_fds[1]); - } - safe_close(qmp_fds[1]); - - ASSERT_OK(qmp_client_connect_fd(&client, qmp_fds[0])); - - /* The server reads our stop command and closes without replying. qmp_client_call() - * is driving its own pump, so it must notice the EOF, transition to DISCONNECTED, - * and return a disconnect error rather than hanging. */ - r = qmp_client_call(client, "stop", NULL, NULL, NULL); - ASSERT_TRUE(r < 0); - ASSERT_TRUE(ERRNO_IS_NEG_DISCONNECT(r)); + ASSERT_FALSE(fired); + return 0; } TEST(qmp_schema_has_member) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *schema = NULL; - /* QEMU introspection uses opaque numeric type ids ("0", "1", ...) — only member names are - * the actual QAPI strings. Verify we walk all object entries and find the member by name. */ + /* QEMU introspection uses opaque numeric type ids ("0", "1", ...); only member names + * are the real QAPI strings. Verify we walk all object entries to find members by name. */ ASSERT_OK(sd_json_build(&schema, SD_JSON_BUILD_ARRAY( SD_JSON_BUILD_OBJECT( @@ -747,10 +498,4 @@ TEST(qmp_schema_has_member) { ASSERT_FALSE(qmp_schema_has_member(NULL, "discard-no-unref")); } -static int intro(void) { - /* Ignore SIGPIPE so that write() to a closed socket returns EPIPE instead of killing us */ - ASSERT_TRUE(signal(SIGPIPE, SIG_IGN) != SIG_ERR); - return 0; -} - -DEFINE_TEST_MAIN_FULL(LOG_DEBUG, intro, NULL); +DEFINE_TEST_MAIN(LOG_DEBUG);