From f617a341f371fdfde89abe0ec546fec66c4489fb Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Fri, 29 Sep 2023 13:37:22 +0200 Subject: [PATCH] Support transient /etc If the `prepare-root.conf` file contains: ``` [etc] transient=yes ``` Then during prepare-root, an overlayfs is mounted as /etc, with the upper dir being in /run. If composefs is used, the lower dir is `usr/etc` from the composefs image , or it is the deployed `$deploydir/usr/etc`. Note that for this to work with selinux, the commit must have been built with OSTREE_REPO_COMMIT_MODIFIER_FLAGS_USRETC_AS_ETC. Otherwise the lowerdir (/usr/etc) will have the wrong selinux contexts for the final location of the mount (/etc). We also set the transient-etc key in the ostree-booted file, pointing it to the directory that is used for the overlayfs. There are some additional work happening in ostree-remount, mostly related to selinux (as this needs to happen post selinux policy load): * Recent versions of selinux-poliy have issues with the overlayfs mount being kernel_t, and that is not allowed to manage files as needed. This is fixed in https://github.com/fedora-selinux/selinux-policy/pull/1893 * Any /etc files created in the initramfs will not be labeled, because the selinux policy has not been loaded. In addition, the upper dir is on a tmpfs, and any manually set xattr-based selinux labels on those are reset during policy load. To work around this ostree-remount will relabel all files on /etc that have corresponding files in overlayfs upper dir. * During early boot, systemd mounts /run/machine-id on top of /etc/machine-id (as /etc is readonly). Later during boot, when etc is readwrite, systemd-machine-id-commit.service will remove the mount and update the real file under it with the right content. To ensure that this keeps working, we need to ensure that when we relabel /etc/machine-id we relabel the real (covered) file, not the temporary bind-mount. * ostree-remount no longer needs to remount /etc read-only in the transient-etc case. Signed-off-by: Alexander Larsson --- Makefile-switchroot.am | 5 ++ man/ostree-prepare-root.xml | 4 ++ src/libotcore/otcore.h | 2 + src/switchroot/ostree-prepare-root.c | 55 ++++++++++++++-- src/switchroot/ostree-remount.c | 98 +++++++++++++++++++++++++++- 5 files changed, 155 insertions(+), 9 deletions(-) diff --git a/Makefile-switchroot.am b/Makefile-switchroot.am index 71a3cbda57..1e458e0e2c 100644 --- a/Makefile-switchroot.am +++ b/Makefile-switchroot.am @@ -63,6 +63,11 @@ ostree_remount_SOURCES = \ ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -I$(srcdir)/libglnx ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libotcore.la libotutil.la libglnx.la +if USE_SELINUX +ostree_remount_CPPFLAGS += $(OT_DEP_SELINUX_CFLAGS) +ostree_remount_LDADD += $(OT_DEP_SELINUX_LIBS) +endif + if USE_COMPOSEFS ostree_prepare_root_LDADD += libcomposefs.la endif diff --git a/man/ostree-prepare-root.xml b/man/ostree-prepare-root.xml index 820e6a278e..03bf022e27 100644 --- a/man/ostree-prepare-root.xml +++ b/man/ostree-prepare-root.xml @@ -113,6 +113,10 @@ License along with this library. If not, see . sysroot.readonly A boolean value; the default is false. If this is set to true, then the /sysroot mount point is mounted read-only. + + etc.transient + A boolean value; the default is false. If this is set to true, then the /etc mount point is mounted transiently i.e. a non-persistent location. + composefs.enabled This can be yes, no. maybe or diff --git a/src/libotcore/otcore.h b/src/libotcore/otcore.h index ba162b8d14..1593e7b77f 100644 --- a/src/libotcore/otcore.h +++ b/src/libotcore/otcore.h @@ -72,3 +72,5 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error); #define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed" // This key will be present if the sysroot-ro flag was found #define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro" + +#define OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC "transient-etc" diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index ca4ebb9914..27d06fa7f8 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -87,6 +87,9 @@ #define SYSROOT_KEY "sysroot" #define READONLY_KEY "readonly" +#define ETC_KEY "etc" +#define TRANSIENT_KEY "transient" + #define COMPOSEFS_KEY "composefs" #define ENABLED_KEY "enabled" #define KEYPATH_KEY "keypath" @@ -547,13 +550,51 @@ main (int argc, char *argv[]) * the deployment needs to be created and remounted as read/write. */ if (sysroot_readonly || using_composefs) { - /* Bind-mount /etc (at deploy path), and remount as writable. */ - if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc"); - if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, - NULL) - < 0) - err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc"); + gboolean etc_transient = FALSE; + if (!ot_keyfile_get_boolean_with_default (config, ETC_KEY, TRANSIENT_KEY, FALSE, + &etc_transient, &error)) + errx (EXIT_FAILURE, "Failed to parse etc.transient value: %s", error->message); + + if (etc_transient) + { + char *ovldir = "/run/ostree/transient-etc"; + + g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, + g_variant_new_string (ovldir)); + + char *lowerdir = "usr/etc"; + if (using_composefs) + lowerdir = TMP_SYSROOT "/usr/etc"; + + g_autofree char *upperdir = g_build_filename (ovldir, "upper", NULL); + g_autofree char *workdir = g_build_filename (ovldir, "work", NULL); + + struct + { + const char *path; + int mode; + } subdirs[] = { { ovldir, 0700 }, { upperdir, 0755 }, { workdir, 0755 } }; + for (int i = 0; i < G_N_ELEMENTS (subdirs); i++) + { + if (mkdirat (AT_FDCWD, subdirs[i].path, subdirs[i].mode) < 0) + err (EXIT_FAILURE, "Failed to create dir %s", subdirs[i].path); + } + + g_autofree char *ovl_options + = g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", lowerdir, upperdir, workdir); + if (mount ("overlay", TMP_SYSROOT "/etc", "overlay", MS_SILENT, ovl_options) < 0) + err (EXIT_FAILURE, "failed to mount transient etc overlayfs"); + } + else + { + /* Bind-mount /etc (at deploy path), and remount as writable. */ + if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc"); + if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, + NULL) + < 0) + err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc"); + } } /* Prepare /usr. diff --git a/src/switchroot/ostree-remount.c b/src/switchroot/ostree-remount.c index d8b01f6858..497603e9d9 100644 --- a/src/switchroot/ostree-remount.c +++ b/src/switchroot/ostree-remount.c @@ -35,6 +35,9 @@ #include #include #include +#ifdef HAVE_SELINUX +#include +#endif #include "ostree-mount-util.h" #include "otcore.h" @@ -76,6 +79,50 @@ do_remount (const char *target, bool writable) printf ("Remounted %s: %s\n", writable ? "rw" : "ro", target); } +/* Relabel the directory $real_path, which is going to be an overlayfs mount, + * based on the content of an overlayfs upperdirectory that is in use by the mount. + * The goal is that we relabel in the overlay mount all the files that have been + * modified (directly or via parent copyup operations) since the overlayfs was + * mounted. This will be used for the /etc overlayfs mount where no selinux labels + * are set before selinux policy is loaded. + */ +static void +relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean is_dir) +{ +#ifdef HAVE_SELINUX + if (selinux_restorecon (real_path, 0)) + err (EXIT_FAILURE, "Failed to relabel %s", real_path); + + if (!is_dir) + return; + + g_auto (GLnxDirFdIterator) dfd_iter = { + 0, + }; + + if (!glnx_dirfd_iterator_init_at (AT_FDCWD, upper_path, FALSE, &dfd_iter, NULL)) + err (EXIT_FAILURE, "Failed to open upper directory %s for relabeling", upper_path); + + while (TRUE) + { + struct dirent *dent; + + if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, NULL, NULL)) + { + err (EXIT_FAILURE, "Failed to read upper directory %s for relabelin", upper_path); + break; + } + + if (dent == NULL) + break; + + g_autofree char *upper_child = g_build_filename (upper_path, dent->d_name, NULL); + g_autofree char *real_child = g_build_filename (real_path, dent->d_name, NULL); + relabel_dir_for_upper (upper_child, real_child, dent->d_type == DT_DIR); + } +#endif +} + int main (int argc, char *argv[]) { @@ -119,6 +166,52 @@ main (int argc, char *argv[]) if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0) perror ("warning: While remounting /sysroot MS_PRIVATE"); + const char *transient_etc = NULL; + g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s", + &transient_etc); + + if (transient_etc) + { + /* If the initramfs created any files in /etc (directly or via overlay copy-up) they + * will be unlabeled, because the selinux policy is not loaded until after the + * pivot-root. So, for all files in the upper dir, relabel the corresponding overlay + * file. + * + * Also, note that during boot systemd will create a /run/machine-id -> + * /etc/machine-id bind mount (as /etc is read-only early on). It will then later + * replace this mount with a real one (in systemd-machine-id-commit.service). + * + * We need to label the actual overlayfs file, not the temporary bind-mount. To do + * this we unmount the covering mount before relabeling, but we do so in a temporary + * private namespace to avoid affecting other parts of the system. + */ + + glnx_autofd int initial_ns_fd = -1; + if (g_file_test ("/run/machine-id", G_FILE_TEST_EXISTS) + && g_file_test ("/etc/machine-id", G_FILE_TEST_EXISTS)) + { + initial_ns_fd = open ("/proc/self/ns/mnt", O_RDONLY | O_NOCTTY | O_CLOEXEC); + if (initial_ns_fd < 0) + err (EXIT_FAILURE, "Failed to open initial namespace"); + + if (unshare (CLONE_NEWNS) < 0) + err (EXIT_FAILURE, "Failed to unshare initial namespace"); + + /* Ensure unmount is not propagated */ + if (mount ("none", "/etc", NULL, MS_REC | MS_PRIVATE, NULL) < 0) + err (EXIT_FAILURE, "warning: While remounting /etc MS_PRIVATE"); + + if (umount2 ("/etc/machine-id", MNT_DETACH) < 0) + err (EXIT_FAILURE, "Failed to unmount machine-id"); + } + + g_autofree char *upper = g_build_filename (transient_etc, "upper", NULL); + relabel_dir_for_upper (upper, "/etc", TRUE); + + if (initial_ns_fd != -1 && setns (initial_ns_fd, CLONE_NEWNS) < 0) + err (EXIT_FAILURE, "Failed to join initial namespace"); + } + gboolean root_is_composefs = FALSE; g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_COMPOSEFS, "b", &root_is_composefs); @@ -140,8 +233,9 @@ main (int argc, char *argv[]) do_remount ("/sysroot", !sysroot_configured_readonly); /* And also make sure to make /etc rw again. We make this conditional on - * sysroot_configured_readonly because only in that case is it a bind-mount. */ - if (sysroot_configured_readonly) + * sysroot_configured_readonly && !transient_etc because only in that case is it a + * bind-mount. */ + if (sysroot_configured_readonly && !transient_etc) do_remount ("/etc", true); /* If /var was created as as an OSTree default bind mount (instead of being a separate