From 427bc4aa46a82b9ffcb8471f5659603035973dbe Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Thu, 29 Aug 2024 18:19:30 -0600 Subject: [PATCH] switchroot: Stop making /sysroot mount private Back in 2b8d586c5, /sysroot was changed to be a private mount so that submounts of /var do not propagate back to the stateroot /var. That's laudible, but it makes /sysroot different than every other shared mount in the root namespace. In particular, it means that submounts of /sysroot do not propagate into separate mount namespaces. Rather than make /sysroot private, make /var a slave+shared mount so that it receives mount events from /sysroot but not vice versa. That achieves the same effect of preventing /var submount events from propagating back to /sysroot while allowing /sysroot mount events to propagate forward like every other system mount. The mount propagation flags are applied as options in the generated var.mount unit. This depends on a mount(8) feature that has been present since util-linux 2.23. That's available in RHEL 7 and every non-EOL Debian and Ubuntu release. Applying the propagation from var.mount fixes a small race, too. Previously, if a /var submount was added before /sysroot was made private, it would have propagated back into /sysroot. That was possible since ostree-remount.service orders itself after var.mount but not before any /var submounts. Fixes: #2086 --- src/libostree/ostree-impl-system-generator.c | 9 ++++++++- src/switchroot/ostree-prepare-root.c | 11 ----------- src/switchroot/ostree-remount.c | 9 --------- tests/kolainst/destructive/mount-propagation.sh | 5 ++--- 4 files changed, 10 insertions(+), 24 deletions(-) diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index c5fe503287..eefde23a96 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -229,6 +229,13 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE); gsize bytes_written; /* This code is inspired by systemd's fstab-generator.c. + * + * To avoid having submounts of /var propagate into + * /sysroot/ostree/deploy/$stateroot/var, the mount is made with slave+shared + * propagation. This means that /var will receive mount events from the + * parent /sysroot mount, but not vice versa. Adding a shared peer group + * below the slave group means that submounts of /var will inherit normal + * shared propagation. See https://github.com/ostreedev/ostree/issues/2086. * * Note that our unit doesn't run if systemd.volatile is enabled; * see https://github.com/ostreedev/ostree/pull/856 @@ -243,7 +250,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor "[Mount]\n" "Where=%s\n" "What=%s\n" - "Options=bind\n", + "Options=bind,slave,shared\n", var_path, stateroot_var_path)) return FALSE; if (!g_output_stream_flush (outstream, cancellable, error)) diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 70bf78f630..f02ad3c967 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -687,16 +687,5 @@ main (int argc, char *argv[]) err (EXIT_FAILURE, "failed to make /sysroot read-only"); } - /* The /sysroot mount needs to be private to avoid having a mount for e.g. /var/cache - * also propagate to /sysroot/ostree/deploy/$stateroot/var/cache - * - * Now in reality, today this is overridden by systemd: the *actual* way we fix this up - * is in ostree-remount.c. But let's do it here to express the semantics we want - * at the very start (perhaps down the line systemd will have compile/runtime option - * to say that the initramfs environment did everything right from the start). - */ - if (mount ("none", "sysroot", NULL, MS_PRIVATE | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "remounting 'sysroot' private"); - exit (EXIT_SUCCESS); } diff --git a/src/switchroot/ostree-remount.c b/src/switchroot/ostree-remount.c index 3babb75141..f0a4b3d925 100644 --- a/src/switchroot/ostree-remount.c +++ b/src/switchroot/ostree-remount.c @@ -167,15 +167,6 @@ main (int argc, char *argv[]) } g_autoptr (GVariantDict) ostree_run_metadata = g_variant_dict_new (ostree_run_metadata_v); - /* The /sysroot mount needs to be private to avoid having a mount for e.g. /var/cache - * also propagate to /sysroot/ostree/deploy/$stateroot/var/cache - * - * Today systemd remounts / (recursively) as shared, so we're undoing that as early - * as possible. See also a copy of this in ostree-prepare-root.c. - */ - if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0) - perror ("warning: While remounting /sysroot MS_PRIVATE"); - const char *transient_etc = NULL; g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s", &transient_etc); diff --git a/tests/kolainst/destructive/mount-propagation.sh b/tests/kolainst/destructive/mount-propagation.sh index 1c2395fdf6..e49cee7aa1 100755 --- a/tests/kolainst/destructive/mount-propagation.sh +++ b/tests/kolainst/destructive/mount-propagation.sh @@ -54,10 +54,9 @@ test_mounts() { assert_has_mount /sysroot/bar assert_not_has_mount "/sysroot/ostree/deploy/${stateroot}/var/foo" - # Repeat with the sub mount namespace. Since /sysroot is marked private, - # /sysroot/bar will not be propagated into it. + # Repeat with the sub mount namespace. assert_has_mount /var/foo "${ns_pid}" - assert_not_has_mount /sysroot/bar "${ns_pid}" + assert_has_mount /sysroot/bar "${ns_pid}" assert_not_has_mount "/sysroot/ostree/deploy/${stateroot}/var/foo" "${ns_pid}" }