Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC] Hide /sysroot in a private mount namespace #3358

Draft
wants to merge 32 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1db98c0
prepare-sysroot: Bind var under /run instead of inplace
ruihe774 Dec 18, 2024
bfb84a3
prepare-root: Unmount temporary var mount after /var is mounted
ruihe774 Dec 19, 2024
cb03e44
prepare-root: It's not necessary to make /var slave anymore
ruihe774 Dec 20, 2024
91dda71
prepare-sysroot: Add sysroot.invisible
ruihe774 Dec 18, 2024
412ec41
remount: Skip remount /sysroot if invisible
ruihe774 Dec 18, 2024
a762cae
sysroot: Handle invisible sysroot
ruihe774 Dec 18, 2024
b7e46b4
ostree: Add command admin nsenter
ruihe774 Dec 18, 2024
c5c414a
chore: Use geteuid() instead of getuid() to check privilege
ruihe774 Dec 18, 2024
83438a1
sysroot: Assert not in root mount namespace
ruihe774 Dec 18, 2024
bf10b40
sysroot: Do not trust mount_namespace_in_use as it's per-thread
ruihe774 Dec 18, 2024
fb8a0ce
sysroot: Fix _ostree_sysroot_ensure_visible ns
ruihe774 Dec 18, 2024
52d03d7
prepare-root: Use sysroot.readonly=invisible
ruihe774 Dec 19, 2024
a5c64da
prepare-root: Add sysroot.protect as alias of sysroot.readonly
ruihe774 Dec 19, 2024
1d4dc03
prepare-root: Make leftover /sysroot immutable
ruihe774 Dec 19, 2024
6626f51
prepare-root: Add some comments
ruihe774 Dec 19, 2024
3dc167e
prepare-root: Fail if sysroot-ns already exists
ruihe774 Dec 19, 2024
300d21c
sysroot: _ostree_sysroot_invisible & _ostree_in_root_mount_namespace
ruihe774 Dec 19, 2024
646cc34
sysroot: rework _ostree_sysroot_ensure_visible
ruihe774 Dec 19, 2024
96e2fb8
sysroot: Do not require enter mount namespace for ensure_visible and …
ruihe774 Dec 20, 2024
01de3ea
nsenter: Fix argument parsing
ruihe774 Dec 20, 2024
627b4f8
prepare-root: Tidy code for root_upperdir and root_workdir
ruihe774 Dec 20, 2024
c7caee9
sysroot: Use glnx_fstatat_allow_noent, drop ot_path_exists
ruihe774 Dec 20, 2024
124035b
Revert "prepare-root: Make leftover /sysroot immutable"
ruihe774 Dec 20, 2024
2bd95ea
sysroot: Fix ostree in ostree nsenter
ruihe774 Dec 20, 2024
04d36fa
sysroot: Use ostree/repo to check visibility
ruihe774 Dec 21, 2024
2960f0a
sysroot: Tidy code for _ostree_sysroot_ensure_writable
ruihe774 Dec 21, 2024
e808a69
finalize-staged: Add a comment
ruihe774 Dec 21, 2024
b3851de
sysroot: Fix _ostree_sysroot_ensure_writable
ruihe774 Dec 22, 2024
e8bd540
sysroot-deploy: Fix _ostree_sysroot_run_in_deployment
ruihe774 Dec 23, 2024
cc10e37
sysroot-deploy: Fix ostree_sysroot_write_deployments_with_options
ruihe774 Dec 23, 2024
979ac47
sysroot-deploy: Fix full_system_sync
ruihe774 Dec 23, 2024
594d873
sysroot-deploy: Fix swap_bootloader
ruihe774 Dec 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile-ostree.am
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ ostree_SOURCES += \
src/ostree/ot-admin-builtin-upgrade.c \
src/ostree/ot-admin-builtin-unlock.c \
src/ostree/ot-admin-builtin-state-overlay.c \
src/ostree/ot-admin-builtin-nsenter.c \
src/ostree/ot-admin-builtins.h \
src/ostree/ot-admin-instutil-builtin-selinux-ensure-labeled.c \
src/ostree/ot-admin-instutil-builtin-set-kargs.c \
Expand Down
4 changes: 2 additions & 2 deletions src/libostree/ostree-bootloader-zipl.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ _ostree_bootloader_zipl_post_bls_sync (OstreeBootloader *bootloader, int bootver
// This can happen in a unit testing environment; at some point what we want to do here
// is move all of the zipl logic to a systemd unit instead that's keyed of
// ostree-finalize-staged.service.
if (getuid () != 0)
if (!ot_util_process_privileged ())
return TRUE;

// If we're in a booted deployment, we don't need to spawn a container.
Expand Down Expand Up @@ -490,7 +490,7 @@ _ostree_bootloader_zipl_post_bls_sync (OstreeBootloader *bootloader, int bootver
g_autofree char *sysroot_boot
= g_build_filename (gs_file_get_path_cached (self->sysroot->path), "boot", NULL);
const char *bwrap_args[] = { "--bind", sysroot_boot, "/boot", NULL };
if (!_ostree_sysroot_run_in_deployment (deployment_dfd, bwrap_args, zipl_argv, &estatus, NULL,
if (!_ostree_sysroot_run_in_deployment (self->sysroot, deployment_dfd, bwrap_args, zipl_argv, &estatus, NULL,
error))
return glnx_prefix_error (error, "Failed to invoke zipl");
}
Expand Down
156 changes: 66 additions & 90 deletions src/libostree/ostree-impl-system-generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,31 +126,32 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha
#endif
}

// Resolve symlink to return osname
static gboolean
ruihe774 marked this conversation as resolved.
Show resolved Hide resolved
_ostree_sysroot_parse_bootlink_aboot (const char *bootlink, char **out_osname, GError **error)
write_unit_file (int dir_fd, const char *path, GCancellable *cancellable, GError **error, const char *fmt, ...)
{
static gsize regex_initialized;
static GRegex *regex;
g_autofree char *symlink_val = glnx_readlinkat_malloc (-1, bootlink, NULL, error);
if (!symlink_val)
return glnx_prefix_error (error, "Failed to read '%s' symlink", bootlink);

if (g_once_init_enter (&regex_initialized))
{
regex = g_regex_new ("^deploy/([^/]+)/", 0, 0, NULL);
g_assert (regex);
g_once_init_leave (&regex_initialized, 1);
}

g_autoptr (GMatchInfo) match = NULL;
if (!g_regex_match (regex, symlink_val, 0, &match))
return glnx_throw (error,
"Invalid aboot symlink in /ostree, expected symlink to resolve to "
"deploy/OSNAME/... instead it resolves to '%s'",
symlink_val);

*out_osname = g_match_info_fetch (match, 1);
g_auto (GLnxTmpfile) tmpf = {
0,
};
if (!glnx_open_tmpfile_linkable_at (dir_fd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error))
return FALSE;
g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE);
gsize bytes_written;
va_list args;
va_start (args, fmt);
const gboolean r = g_output_stream_vprintf (outstream, &bytes_written, cancellable, error, fmt, args);
va_end (args);
if (!r)
return FALSE;
if (!g_output_stream_flush (outstream, cancellable, error))
return FALSE;
g_clear_object (&outstream);
/* It should be readable */
if (!glnx_fchmod (tmpf.fd, 0644, error))
return FALSE;
/* Error out if somehow it already exists, that'll help us debug conflicts */
if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, dir_fd, path,
error))
return FALSE;
return TRUE;
}

Expand All @@ -163,22 +164,37 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
/* Not currently cancellable, but define a var in case we care later */
GCancellable *cancellable = NULL;
/* Some path constants to avoid typos */
static const char fstab_path[] = "/etc/fstab";
static const char var_path[] = "/var";
const char *fstab_path = "/etc/fstab";
const char *var_dst = "/var";
const char *var_src = OTCORE_RUN_OSTREE_PRIVATE "/var";

/* Written by ostree-sysroot-deploy.c. We parse out the stateroot here since we
* need to know it to mount /var. Unfortunately we can't easily use the
* libostree API to find the booted deployment since /boot might not have been
* mounted yet.
/* Prepare to write to the output unit dir; we use the "normal" dir
* that overrides /usr, but not /etc.
*/
g_autofree char *stateroot = NULL;
if (is_aboot)
{
if (!_ostree_sysroot_parse_bootlink_aboot (ostree_target, &stateroot, error))
return glnx_prefix_error (error, "Parsing aboot stateroot");
}
else if (!_ostree_sysroot_parse_bootlink (ostree_target, NULL, &stateroot, NULL, NULL, error))
return glnx_prefix_error (error, "Parsing stateroot");
glnx_autofd int normal_dir_dfd = -1;
if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error))
return FALSE;

/* Generate a unit to unmount var_src */
if (!write_unit_file (normal_dir_dfd, "ostree-unmount-temp-var.service", cancellable, error,
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
"[Unit]\n"
"Documentation=man:ostree(1)\n"
"ConditionPathIsMountPoint=%s\n"
"After=var.mount\n"
"\n"
"[Service]\n"
"Type=oneshot\n"
"ExecStart=/usr/bin/umount --lazy %s\n",
var_src, var_src))
return FALSE;

if (!glnx_shutil_mkdir_p_at (normal_dir_dfd, "local-fs.target.wants", 0755, cancellable,
error))
return FALSE;
if (symlinkat ("../ostree-unmount-temp-var.service", normal_dir_dfd,
"local-fs.target.wants/ostree-unmount-temp-var.service") < 0)
return glnx_throw_errno_prefix (error, "symlinkat");

/* Load /etc/fstab if it exists, and look for a /var mount */
g_autoptr (OtLibMountFile) fstab = setmntent (fstab_path, "re");
Expand All @@ -199,7 +215,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
path_kill_slashes (where);

/* We're only looking for /var here */
if (strcmp (where, var_path) != 0)
if (strcmp (where, var_dst) != 0)
continue;

found_var_mnt = TRUE;
Expand All @@ -211,59 +227,19 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
if (found_var_mnt)
return TRUE;

/* Prepare to write to the output unit dir; we use the "normal" dir
* that overrides /usr, but not /etc.
*/
glnx_autofd int normal_dir_dfd = -1;
if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error))
return FALSE;

/* Generate our bind mount unit */
const char *stateroot_var_path = glnx_strjoina ("/sysroot/ostree/deploy/", stateroot, "/var");

g_auto (GLnxTmpfile) tmpf = {
0,
};
if (!glnx_open_tmpfile_linkable_at (normal_dir_dfd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error))
return FALSE;
g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE);
gsize bytes_written;
/* This code is inspired by systemd's fstab-generator.c.
*
* Note that our unit doesn't run if systemd.volatile is enabled;
* see https://github.com/ostreedev/ostree/pull/856
*
* To avoid having submounts of /var propagate into $stateroot/var, the mount
* is made with slave+shared propagation. This means that /var will receive
* mount events from the parent /sysroot mount, but not vice versa. Adding a
* shared peer group below the slave group means that submounts of /var will
* inherit normal shared propagation. See mount_namespaces(7), Linux
* Documentation/filesystems/sharedsubtree.txt and
* https://github.com/ostreedev/ostree/issues/2086. This also happens in
* ostree-prepare-root.c for the INITRAMFS_MOUNT_VAR case.
*/
if (!g_output_stream_printf (outstream, &bytes_written, cancellable, error,
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
"[Unit]\n"
"Documentation=man:ostree(1)\n"
"ConditionKernelCommandLine=!systemd.volatile\n"
"Before=local-fs.target\n"
"\n"
"[Mount]\n"
"Where=%s\n"
"What=%s\n"
"Options=bind,slave,shared\n",
var_path, stateroot_var_path))
return FALSE;
if (!g_output_stream_flush (outstream, cancellable, error))
return FALSE;
g_clear_object (&outstream);
/* It should be readable */
if (!glnx_fchmod (tmpf.fd, 0644, error))
return FALSE;
/* Error out if somehow it already exists, that'll help us debug conflicts */
if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, normal_dir_dfd, "var.mount",
error))
if (!write_unit_file (normal_dir_dfd, "var.mount", cancellable, error,
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
"[Unit]\n"
"Documentation=man:ostree(1)\n"
"ConditionKernelCommandLine=!systemd.volatile\n"
"Before=local-fs.target\n"
"\n"
"[Mount]\n"
"Where=%s\n"
"What=%s\n"
"Options=bind\n",
var_dst, var_src))
return FALSE;

/* And ensure it's required; newer systemd will auto-inject fs dependencies
Expand Down
2 changes: 1 addition & 1 deletion src/libostree/ostree-repo-commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1658,7 +1658,7 @@ ostree_repo_prepare_transaction (OstreeRepo *self, gboolean *out_transaction_res
self->reserved_blocks = reserved_bytes / self->txn.blocksize;

/* Use the appropriate free block count if we're unprivileged */
guint64 bfree = (getuid () != 0 ? stvfsbuf.f_bavail : stvfsbuf.f_bfree);
guint64 bfree = (ot_util_process_privileged () ? stvfsbuf.f_bfree : stvfsbuf.f_bavail);
if (bfree > self->reserved_blocks)
self->txn.max_blocks = bfree - self->reserved_blocks;
else
Expand Down
Loading
Loading