From 1124c804ddee3c6ee3e591907c1a330d0cb9dc79 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Fri, 29 Sep 2023 13:37:22 +0200 Subject: [PATCH] Support transient /etc If the `prepare-root.conf` file contains: ``` [etc] transient=yes ``` Then during prepare-root, an overlayfs is mounted as /etc, with the upper dir being in /run. If composefs is used, the lower dir is `usr/etc` from the composefs image (which is ralabeled to work as /etc), or it is the deployed `$deploydir/usr/etc`. Note that for this to work with selinux, the commit must have been built with OSTREE_REPO_COMMIT_MODIFIER_FLAGS_USRETC_AS_ETC. Otherwise the lower will have the wrong selinux contexts for the final location. We also set the transient-etc key in the ostree-booted file, pointing it to the upper directory that is used. There are some additional complexities here: * Semi-recent versions of selinux-poliy have issues with the overlayfs mount being kernel_t, and that is not allowed to manage files. This should be mostly fixed in selinux-policy-38.21 but some further details need to be ironed out. * Any /etc files created in the initramfs will not be labeled, because the selinux policy has not been loaded. In addition, the upper dir is on a tmpfs, and any manually set xattr-based selinux labels on those are reset during policy load. To work around this we hook into ostree-remount and relabel all files on /etc that have are from the overlayfs upper dir. * During the initramfs, Systemd mounts /run/machine-id on top of /etc/machine-id, and if this mount exists during later boot, then systemd-machine-id-commit.service will remove it and update the real file with its content once etc is writable. To ensure that this keeps working, we need to re-add this bind mount in the remounted /etc if /run/machine-id existst. * ostree-remount no longer needs to remount /etc read-only in the transient-etc case. Signed-off-by: Alexander Larsson --- Makefile-switchroot.am | 5 ++ man/ostree-prepare-root.xml | 4 ++ src/libotcore/otcore.h | 2 + src/switchroot/ostree-prepare-root.c | 55 +++++++++++++++--- src/switchroot/ostree-remount.c | 85 +++++++++++++++++++++++++++- 5 files changed, 143 insertions(+), 8 deletions(-) diff --git a/Makefile-switchroot.am b/Makefile-switchroot.am index 71a3cbda57..1e458e0e2c 100644 --- a/Makefile-switchroot.am +++ b/Makefile-switchroot.am @@ -63,6 +63,11 @@ ostree_remount_SOURCES = \ ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -I$(srcdir)/libglnx ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libotcore.la libotutil.la libglnx.la +if USE_SELINUX +ostree_remount_CPPFLAGS += $(OT_DEP_SELINUX_CFLAGS) +ostree_remount_LDADD += $(OT_DEP_SELINUX_LIBS) +endif + if USE_COMPOSEFS ostree_prepare_root_LDADD += libcomposefs.la endif diff --git a/man/ostree-prepare-root.xml b/man/ostree-prepare-root.xml index 820e6a278e..03bf022e27 100644 --- a/man/ostree-prepare-root.xml +++ b/man/ostree-prepare-root.xml @@ -113,6 +113,10 @@ License along with this library. If not, see . sysroot.readonly A boolean value; the default is false. If this is set to true, then the /sysroot mount point is mounted read-only. + + etc.transient + A boolean value; the default is false. If this is set to true, then the /etc mount point is mounted transiently i.e. a non-persistent location. + composefs.enabled This can be yes, no. maybe or diff --git a/src/libotcore/otcore.h b/src/libotcore/otcore.h index ba162b8d14..1593e7b77f 100644 --- a/src/libotcore/otcore.h +++ b/src/libotcore/otcore.h @@ -72,3 +72,5 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error); #define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed" // This key will be present if the sysroot-ro flag was found #define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro" + +#define OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC "transient-etc" diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index ca4ebb9914..27d06fa7f8 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -87,6 +87,9 @@ #define SYSROOT_KEY "sysroot" #define READONLY_KEY "readonly" +#define ETC_KEY "etc" +#define TRANSIENT_KEY "transient" + #define COMPOSEFS_KEY "composefs" #define ENABLED_KEY "enabled" #define KEYPATH_KEY "keypath" @@ -547,13 +550,51 @@ main (int argc, char *argv[]) * the deployment needs to be created and remounted as read/write. */ if (sysroot_readonly || using_composefs) { - /* Bind-mount /etc (at deploy path), and remount as writable. */ - if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc"); - if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, - NULL) - < 0) - err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc"); + gboolean etc_transient = FALSE; + if (!ot_keyfile_get_boolean_with_default (config, ETC_KEY, TRANSIENT_KEY, FALSE, + &etc_transient, &error)) + errx (EXIT_FAILURE, "Failed to parse etc.transient value: %s", error->message); + + if (etc_transient) + { + char *ovldir = "/run/ostree/transient-etc"; + + g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, + g_variant_new_string (ovldir)); + + char *lowerdir = "usr/etc"; + if (using_composefs) + lowerdir = TMP_SYSROOT "/usr/etc"; + + g_autofree char *upperdir = g_build_filename (ovldir, "upper", NULL); + g_autofree char *workdir = g_build_filename (ovldir, "work", NULL); + + struct + { + const char *path; + int mode; + } subdirs[] = { { ovldir, 0700 }, { upperdir, 0755 }, { workdir, 0755 } }; + for (int i = 0; i < G_N_ELEMENTS (subdirs); i++) + { + if (mkdirat (AT_FDCWD, subdirs[i].path, subdirs[i].mode) < 0) + err (EXIT_FAILURE, "Failed to create dir %s", subdirs[i].path); + } + + g_autofree char *ovl_options + = g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", lowerdir, upperdir, workdir); + if (mount ("overlay", TMP_SYSROOT "/etc", "overlay", MS_SILENT, ovl_options) < 0) + err (EXIT_FAILURE, "failed to mount transient etc overlayfs"); + } + else + { + /* Bind-mount /etc (at deploy path), and remount as writable. */ + if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc"); + if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, + NULL) + < 0) + err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc"); + } } /* Prepare /usr. diff --git a/src/switchroot/ostree-remount.c b/src/switchroot/ostree-remount.c index d8b01f6858..795e1ef3a2 100644 --- a/src/switchroot/ostree-remount.c +++ b/src/switchroot/ostree-remount.c @@ -35,6 +35,9 @@ #include #include #include +#ifdef HAVE_SELINUX +#include +#endif #include "ostree-mount-util.h" #include "otcore.h" @@ -76,6 +79,43 @@ do_remount (const char *target, bool writable) printf ("Remounted %s: %s\n", writable ? "rw" : "ro", target); } +static void +relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean is_dir) +{ +#ifdef HAVE_SELINUX + if (selinux_restorecon (real_path, 0)) + g_printerr ("Failed to relabel %s\n", real_path); + + if (!is_dir) + return; + + g_auto (GLnxDirFdIterator) dfd_iter = { + 0, + }; + + if (!glnx_dirfd_iterator_init_at (AT_FDCWD, upper_path, FALSE, &dfd_iter, NULL)) + g_printerr ("Failed to open directory %s\n", upper_path); + + while (TRUE) + { + struct dirent *dent; + + if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, NULL, NULL)) + { + g_printerr ("Failed to read directory %s\n", upper_path); + break; + } + + if (dent == NULL) + break; + + g_autofree char *upper_child = g_build_filename (upper_path, dent->d_name, NULL); + g_autofree char *real_child = g_build_filename (real_path, dent->d_name, NULL); + relabel_dir_for_upper (upper_child, real_child, dent->d_type == DT_DIR); + } +#endif +} + int main (int argc, char *argv[]) { @@ -119,6 +159,49 @@ main (int argc, char *argv[]) if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0) perror ("warning: While remounting /sysroot MS_PRIVATE"); + const char *transient_etc = NULL; + g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s", + &transient_etc); + + if (transient_etc) + { + /* Systemd will create a /run/machine-id -> /etc/machine-id bind mount if /etc is + * read-only, and then it will later replace this mount (if it exist) with a real one. + * We need to relabel the file on the overlayfs, below the bind mount, so we unmount + * the covering mount. However, we do so in a temporary private namespace to avoid + * affecting other parts of the system. + */ + + glnx_autofd int initial_ns_fd = -1; + if (g_file_test ("/run/machine-id", G_FILE_TEST_EXISTS)) + { + initial_ns_fd = open ("/proc/self/ns/mnt", O_RDONLY | O_NOCTTY | O_CLOEXEC); + if (initial_ns_fd < 0) + perror ("Failed to open initial namespace"); + + if (unshare (CLONE_NEWNS) < 0) + perror ("Failed to unshare initial namespace"); + + /* Ensure unmount is not propagated */ + if (mount ("none", "/etc", NULL, MS_REC | MS_PRIVATE, NULL) < 0) + perror ("warning: While remounting /etc MS_PRIVATE"); + + if (umount2 ("/etc/machine-id", MNT_DETACH) < 0) + perror ("Failed to unmount machine-id"); + } + + /* If the initramfs created any files in /etc (directly or via overlay copy-up) + * they will be unlabeled, because the selinux policy is not loaded until after + * the pivot-root. So, for all files in the upper dir, relabel the corresponding + * overlay file. + */ + g_autofree char *upper = g_build_filename (transient_etc, "upper", NULL); + relabel_dir_for_upper (upper, "/etc", TRUE); + + if (initial_ns_fd != -1 && setns (initial_ns_fd, CLONE_NEWNS) < 0) + perror ("Failed to join initial namespace"); + } + gboolean root_is_composefs = FALSE; g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_COMPOSEFS, "b", &root_is_composefs); @@ -141,7 +224,7 @@ main (int argc, char *argv[]) /* And also make sure to make /etc rw again. We make this conditional on * sysroot_configured_readonly because only in that case is it a bind-mount. */ - if (sysroot_configured_readonly) + if (sysroot_configured_readonly && !transient_etc) do_remount ("/etc", true); /* If /var was created as as an OSTree default bind mount (instead of being a separate