From 96e2fb8717eadf187c8f7c8b749ff7578bce8392 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 02:43:40 +0800 Subject: [PATCH] sysroot: Do not require enter mount namespace for ensure_visible and ensure_writable --- src/libostree/ostree-sysroot-private.h | 11 -- src/libostree/ostree-sysroot.c | 210 ++++++++++++++++--------- src/libotutil/ot-fs-utils.c | 4 +- src/libotutil/ot-fs-utils.h | 2 +- src/ostree/ot-admin-builtin-nsenter.c | 4 +- src/ostree/ot-main.c | 11 +- src/ostree/ot-main.h | 1 + 7 files changed, 155 insertions(+), 88 deletions(-) diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h index 4cf8414ad4..bc24168484 100644 --- a/src/libostree/ostree-sysroot-private.h +++ b/src/libostree/ostree-sysroot-private.h @@ -69,12 +69,6 @@ struct OstreeSysroot GLnxLockFile lock; OstreeSysrootLoadState loadstate; - /* - * XXX: It's very bad that mount namespaces are per thread, not per process. - * In a multi-threading environment, it's troublesome to ensure current thread is always in the ns. - * So, do not use OstreeSysroot from another thread if you want mount namespace. - */ - gboolean mount_namespace_in_use; /* TRUE if caller has told us they used CLONE_NEWNS */ gboolean root_is_ostree_booted; /* TRUE if sysroot is / and we are booted via ostree */ /* The device/inode for / and /etc, used to detect booted deployment */ dev_t root_device; @@ -119,13 +113,8 @@ struct OstreeSysroot // Relative to /boot, consumed by ostree-boot-complete.service #define _OSTREE_FINALIZE_STAGED_FAILURE_PATH "ostree/finalize-failure.stamp" -gboolean _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error); - gboolean _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error); -gboolean -_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error); - void _ostree_sysroot_emit_journal_msg (OstreeSysroot *self, const char *msg); gboolean _ostree_sysroot_read_boot_loader_configs (OstreeSysroot *self, int bootversion, diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 0a8aa9149e..8723a44b43 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -262,6 +262,8 @@ _ostree_in_root_mount_namespace (gboolean *out_val, GError **error) * If you invoke this function, it must be before ostree_sysroot_load(); it may * be invoked before or after ostree_sysroot_initialize(). * + * This is function is now a stub. + * * Since: 2020.1 */ void @@ -273,23 +275,57 @@ ostree_sysroot_set_mount_namespace_in_use (OstreeSysroot *self) gboolean in_root; g_autoptr (GError) local_error = NULL; g_assert (_ostree_in_root_mount_namespace (&in_root, &local_error) && !in_root); - self->mount_namespace_in_use = TRUE; } +static gboolean +ensure_sysroot_fd (OstreeSysroot *self, GError **error); + gboolean +_ostree_sysroot_ensure_boot_fd (OstreeSysroot *self, GError **error); + +static gboolean +_ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError **error); + +static gboolean _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error) { - /* Do nothing if we're not privileged */ - if (!ot_util_process_privileged ()) - return TRUE; - /* We also assume operating on non-booted roots won't have a readonly sysroot */ if (!self->root_is_ostree_booted) return TRUE; - // If the mount namespaces are the same, we need to unshare(). gboolean in_root; - g_return_val_if_fail (_ostree_in_root_mount_namespace (&in_root, error), FALSE); + if (!_ostree_in_root_mount_namespace (&in_root, error)) + return FALSE; + + /* Backup tree fd of sysroot_fd and boot_fd */ + glnx_autofd int sysroot_tree_fd = -1; + if ((sysroot_tree_fd = (int)syscall (SYS_open_tree, self->sysroot_fd, "", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC | AT_EMPTY_PATH)) < 0) + { + if (errno == EINVAL) + { + /* This means sysroot_fd is already a fd obtained by open_tree */ + sysroot_tree_fd = g_steal_fd (&self->sysroot_fd); + } + else + return glnx_throw_errno_prefix (error, "open_tree"); + } + + glnx_autofd int boot_tree_fd = -1; + if (self->boot_fd >= 0) + { + if ((boot_tree_fd = (int)syscall (SYS_open_tree, self->boot_fd, "", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC | AT_EMPTY_PATH)) < 0) + { + if (errno == EINVAL) + { + /* This means boot_fd is already a fd obtained by open_tree */ + boot_tree_fd = g_steal_fd (&self->boot_fd); + } + else + return glnx_throw_errno_prefix (error, "open_tree"); + } + } + + // If the mount namespaces are the same, we need to unshare(). if (in_root) { if (unshare (CLONE_NEWNS) < 0) @@ -300,7 +336,42 @@ _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error) return glnx_throw_errno_prefix (error, "Failed to set the mount propagation to private"); } - ostree_sysroot_set_mount_namespace_in_use (self); + /* Mount sysroot and boot back */ + ostree_sysroot_unload (self); + if (!ensure_sysroot_fd (self, error)) + return FALSE; + + gboolean invisible; + if (!_ostree_sysroot_invisible (self, &invisible, error)) + return FALSE; + + if (invisible) + { + glnx_autofd int old_sysroot_fd = g_steal_fd (&self->sysroot_fd); + + if (syscall (SYS_move_mount, sysroot_tree_fd, "", old_sysroot_fd, "sysroot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0) + return glnx_throw_errno_prefix (error, "move_mount"); + + if (!glnx_opendirat (old_sysroot_fd, "sysroot", TRUE, &self->sysroot_fd, error)) + return FALSE; + + if (boot_tree_fd >= 0) + { + if (syscall (SYS_move_mount, boot_tree_fd, "", old_sysroot_fd, "boot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0) + return glnx_throw_errno_prefix (error, "move_mount"); + + if (!glnx_opendirat (old_sysroot_fd, "boot", TRUE, &self->boot_fd, error)) + return FALSE; + } + } + else + { + if (boot_tree_fd >= 0) + { + if (!_ostree_sysroot_ensure_boot_fd (self, error)) + return FALSE; + } + } return TRUE; } @@ -401,23 +472,27 @@ remount_writable (const char *path, gboolean *did_remount, GError **error) } static gboolean -_ostree_sysroot_invisible (gboolean *out_val, GError **error) +_ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError **error) { gboolean exists; - if (!ot_path_exists (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &exists, error)) + g_assert (self->sysroot_fd >= 0); + g_assert (self->root_is_ostree_booted); + + if (!ot_path_exists (self->sysroot_fd, "sysroot/ostree", &exists, error)) return FALSE; - if (!exists) + if (exists) { *out_val = FALSE; return TRUE; } - if (!ot_path_exists ("/sysroot/ostree", &exists, error)) + // root_is_ostree_booted is true so we can use AT_FDCWD here + if (!ot_path_exists (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &exists, error)) return FALSE; - if (exists) + if (!exists) { *out_val = FALSE; return TRUE; @@ -428,54 +503,46 @@ _ostree_sysroot_invisible (gboolean *out_val, GError **error) } /* Make /sysroot visible */ -gboolean +static gboolean _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) { - if (!ostree_sysroot_initialize (self, error)) + gboolean invisible; + if (!_ostree_sysroot_invisible (self, &invisible, error)) return FALSE; - /* Do nothing if no mount namespace is in use */ - if (!self->mount_namespace_in_use) + if (!invisible) return TRUE; - /* If we aren't operating on a booted system, then we don't - * do anything with mounts. + /* Boot may reside on the original sysroot. + * To prevent from losing it, try ensuring it now. */ - if (!self->root_is_ostree_booted) - return TRUE; - - gboolean invisible; - g_return_val_if_fail (_ostree_sysroot_invisible (&invisible, error), FALSE); - /* Handle invisible sysroot */ - if (invisible) + if (!_ostree_sysroot_ensure_boot_fd (self, error)) { - glnx_autofd int sysroot_ns_fd = -1; - if (!glnx_openat_rdonly (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", TRUE, &sysroot_ns_fd, error)) - return FALSE; + // ignore failure + } - g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); - glnx_autofd int cur_ns_fd = -1; - if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) - return FALSE; + glnx_autofd int sysroot_ns_fd = -1; + if (!glnx_openat_rdonly (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", TRUE, &sysroot_ns_fd, error)) + return FALSE; - if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0) - return glnx_throw_errno_prefix (error, "setns"); + g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); + glnx_autofd int cur_ns_fd = -1; + if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) + return FALSE; - glnx_autofd int tree_fd = (int)syscall (SYS_open_tree, AT_FDCWD, "/", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC); - if (tree_fd < 0) - return glnx_throw_errno_prefix (error, "open_tree"); + /* Because namespace is per-thread, there is no race here */ + if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "setns"); - if (setns (cur_ns_fd, CLONE_NEWNS) < 0) - abort (); // it's unsafe to continue if we cannot switch back + glnx_autofd int tree_fd = (int)syscall (SYS_open_tree, AT_FDCWD, "/", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC); + if (tree_fd < 0) + return glnx_throw_errno_prefix (error, "open_tree"); - if (syscall (SYS_move_mount, tree_fd, "", AT_FDCWD, "/sysroot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0) - return glnx_throw_errno_prefix (error, "move_mount"); - } + if (setns (cur_ns_fd, CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "setns"); - /* Now close and reopen our file descriptors */ - ostree_sysroot_unload (self); - if (!ensure_sysroot_fd (self, error)) - return FALSE; + glnx_close_fd (&self->sysroot_fd); + self->sysroot_fd = g_steal_fd (&tree_fd); return TRUE; } @@ -484,13 +551,9 @@ _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) gboolean _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) { - if (!_ostree_sysroot_ensure_visible (self, error)) + if (!ostree_sysroot_initialize (self, error)) return FALSE; - /* Do nothing if no mount namespace is in use */ - if (!self->mount_namespace_in_use) - return TRUE; - /* If we aren't operating on a booted system, then we don't * do anything with mounts. */ @@ -501,20 +564,34 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) if (!_ostree_sysroot_ensure_boot_fd (self, error)) return FALSE; + glnx_autofd int cur_ns_fd = -1; + g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); + if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) + return FALSE; + + if (!_ostree_sysroot_enter_mount_namespace (self, error)) + return FALSE; + + ostree_sysroot_unload (self); + + const char *path = gs_file_get_path_cached (self->path); + g_autofree char *sysroot_path = g_strdup_printf ("%s/sysroot", path); gboolean did_remount_sysroot = FALSE; - if (!remount_writable ("/sysroot", &did_remount_sysroot, error)) + if (!remount_writable (sysroot_path, &did_remount_sysroot, error)) return FALSE; + g_autofree char *boot_path = g_strdup_printf ("%s/boot", path); gboolean did_remount_boot = FALSE; - if (!remount_writable ("/boot", &did_remount_boot, error)) + if (!remount_writable (boot_path, &did_remount_boot, error)) return FALSE; - /* Now close and reopen our file descriptors */ - ostree_sysroot_unload (self); if (!ensure_sysroot_fd (self, error)) return FALSE; if (!_ostree_sysroot_ensure_boot_fd (self, error)) return FALSE; + if (setns (cur_ns_fd, CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "setns"); + return TRUE; } @@ -1167,21 +1244,14 @@ ostree_sysroot_initialize (OstreeSysroot *self, GError **error) self->root_is_ostree_booted = (ostree_booted && root_is_sysroot); g_debug ("root_is_ostree_booted: %d", self->root_is_ostree_booted); - self->loadstate = OSTREE_SYSROOT_LOAD_STATE_INIT; - } - else - { - return TRUE; - } - gboolean invisible; - g_return_val_if_fail (_ostree_sysroot_invisible (&invisible, error), FALSE); - if (invisible) - { - if (!_ostree_sysroot_enter_mount_namespace (self, error)) - return FALSE; - if (!_ostree_sysroot_ensure_visible (self, error)) - return FALSE; + if (self->root_is_ostree_booted) + { + if (!_ostree_sysroot_ensure_visible (self, error)) + return FALSE; + } + + self->loadstate = OSTREE_SYSROOT_LOAD_STATE_INIT; } return TRUE; diff --git a/src/libotutil/ot-fs-utils.c b/src/libotutil/ot-fs-utils.c index 31097a5b08..f986f8d75e 100644 --- a/src/libotutil/ot-fs-utils.c +++ b/src/libotutil/ot-fs-utils.c @@ -280,12 +280,12 @@ ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size /* Check whether a path exists */ gboolean -ot_path_exists (const char *path, gboolean *out_val, GError **error) +ot_path_exists (int dfd, const char *path, gboolean *out_val, GError **error) { g_autoptr (GError) local_error = NULL; struct stat stbuf; - if (glnx_fstatat (AT_FDCWD, path, &stbuf, 0, &local_error)) + if (glnx_fstatat (dfd, path, &stbuf, 0, &local_error)) { *out_val = TRUE; return TRUE; diff --git a/src/libotutil/ot-fs-utils.h b/src/libotutil/ot-fs-utils.h index 671988177a..52f0f6219b 100644 --- a/src/libotutil/ot-fs-utils.h +++ b/src/libotutil/ot-fs-utils.h @@ -78,6 +78,6 @@ gboolean ot_parse_file_by_line (const char *path, gboolean (*cb) (const char *, gboolean ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size, GCancellable *cancellable, GError **error); -gboolean ot_path_exists (const char *path, gboolean *out_val, GError **error); +gboolean ot_path_exists (int dfd, const char *path, gboolean *out_val, GError **error); G_END_DECLS diff --git a/src/ostree/ot-admin-builtin-nsenter.c b/src/ostree/ot-admin-builtin-nsenter.c index fab7964ec8..bf6a14772a 100644 --- a/src/ostree/ot-admin-builtin-nsenter.c +++ b/src/ostree/ot-admin-builtin-nsenter.c @@ -65,8 +65,8 @@ ot_admin_builtin_nsenter (int argc, char **argv, OstreeCommandInvocation *invoca } if (!ostree_admin_option_context_parse (context, options, &argc, &argv, - OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED, invocation, &sysroot, - cancellable, error)) + OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED | OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS, + invocation, &sysroot, cancellable, error)) return FALSE; argc = new_argc; diff --git a/src/ostree/ot-main.c b/src/ostree/ot-main.c index d47a59cad5..e69b4f7498 100644 --- a/src/ostree/ot-main.c +++ b/src/ostree/ot-main.c @@ -559,12 +559,19 @@ gboolean ostree_admin_sysroot_load (OstreeSysroot *sysroot, OstreeAdminBuiltinFlags flags, GCancellable *cancellable, GError **error) { - if ((flags & OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED) == 0) + if (flags & OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS) { - /* Set up the mount namespace, if applicable */ if (!ostree_sysroot_initialize_with_mount_namespace (sysroot, cancellable, error)) return FALSE; + } + else + { + if (!ostree_sysroot_initialize (sysroot, error)) + return FALSE; + } + if ((flags & OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED) == 0) + { /* Released when sysroot is finalized, or on process exit */ if (!ot_admin_sysroot_lock (sysroot, error)) return FALSE; diff --git a/src/ostree/ot-main.h b/src/ostree/ot-main.h index 8df1ca8e9a..e4cbc2427a 100644 --- a/src/ostree/ot-main.h +++ b/src/ostree/ot-main.h @@ -39,6 +39,7 @@ typedef enum OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED = (1 << 1), OSTREE_ADMIN_BUILTIN_FLAG_NO_SYSROOT = (1 << 2), OSTREE_ADMIN_BUILTIN_FLAG_NO_LOAD = (1 << 3), + OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS = (1 << 4), } OstreeAdminBuiltinFlags; typedef struct OstreeCommandInvocation OstreeCommandInvocation;