diff --git a/hack/lldb/deploy.sh b/hack/lldb/deploy.sh index 99adad6a7..5c6e0e896 100755 --- a/hack/lldb/deploy.sh +++ b/hack/lldb/deploy.sh @@ -11,7 +11,7 @@ sudo podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" -f Containerf mkdir -p ~/.cache/bootc-dev/disks rm -f ~/.cache/bootc-dev/disks/lldb.raw truncate -s 10G ~/.cache/bootc-dev/disks/lldb.raw -sudo podman run --pid=host --network=host --privileged --security-opt label=type:unconfined_t -v /dev:/dev -v /var/lib/containers:/var/lib/containers -v ~/.cache/bootc-dev/disks:/output -v /dev:/dev localhost/bootc-lldb bootc install to-disk --via-loopback --generic-image --skip-fetch-check /output/lldb.raw +sudo podman run --pid=host --network=host --privileged --security-opt label=type:unconfined_t -v ~/.cache/bootc-dev/disks:/output localhost/bootc-lldb bootc install to-disk --via-loopback --generic-image --skip-fetch-check /output/lldb.raw # create a new VM in libvirt set +e diff --git a/lib/src/install.rs b/lib/src/install.rs index 845379a63..34a333a00 100644 --- a/lib/src/install.rs +++ b/lib/src/install.rs @@ -56,6 +56,8 @@ use crate::utils::sigpolicy_from_opts; const BOOT: &str = "boot"; /// Directory for transient runtime state const RUN_BOOTC: &str = "/run/bootc"; +/// The default path for the host rootfs +const ALONGSIDE_ROOT_MOUNT: &str = "/target"; /// This is an ext4 special directory we need to ignore. const LOST_AND_FOUND: &str = "lost+found"; /// The filename of the composefs EROFS superblock; TODO move this into ostree @@ -316,9 +318,10 @@ pub(crate) struct InstallToExistingRootOpts { #[clap(long)] pub(crate) acknowledge_destructive: bool, - /// Path to the mounted root; it's expected to invoke podman with - /// `-v /:/target`, then supplying this argument is unnecessary. - #[clap(default_value = "/target")] + /// Path to the mounted root; this is now not necessary to provide. + /// Historically it was necessary to ensure the host rootfs was mounted at here + /// via e.g. `-v /:/target`. + #[clap(default_value = ALONGSIDE_ROOT_MOUNT)] pub(crate) root_path: Utf8PathBuf, } @@ -333,8 +336,6 @@ pub(crate) struct SourceInfo { pub(crate) selinux: bool, /// Whether the source is available in the host mount namespace pub(crate) in_host_mountns: bool, - /// Whether we were invoked with -v /var/lib/containers:/var/lib/containers - pub(crate) have_host_container_storage: bool, } // Shared read-only global state @@ -516,38 +517,13 @@ impl SourceInfo { tracing::debug!("Finding digest for image ID {}", container_info.imageid); let digest = crate::podman::imageid_to_digest(&container_info.imageid)?; - let have_host_container_storage = Utf8Path::new(crate::podman::CONTAINER_STORAGE) - .try_exists()? - && ostree_ext::mountutil::is_mountpoint( - &root, - crate::podman::CONTAINER_STORAGE.trim_start_matches('/'), - )? - .unwrap_or_default(); - - // Verify up front we can do the fetch - if have_host_container_storage { - tracing::debug!("Host container storage found"); - } else { - tracing::debug!( - "No {} mount available, checking skopeo", - crate::podman::CONTAINER_STORAGE - ); - require_skopeo_with_containers_storage()?; - } - - Self::new( - imageref, - Some(digest), - root, - true, - have_host_container_storage, - ) + Self::new(imageref, Some(digest), root, true) } #[context("Creating source info from a given imageref")] pub(crate) fn from_imageref(imageref: &str, root: &Dir) -> Result { let imageref = ostree_container::ImageReference::try_from(imageref)?; - Self::new(imageref, None, root, false, false) + Self::new(imageref, None, root, false) } fn have_selinux_from_repo(root: &Dir) -> Result { @@ -573,7 +549,6 @@ impl SourceInfo { digest: Option, root: &Dir, in_host_mountns: bool, - have_host_container_storage: bool, ) -> Result { let selinux = if Path::new("/ostree/repo").try_exists()? { Self::have_selinux_from_repo(root)? @@ -585,7 +560,6 @@ impl SourceInfo { digest, selinux, in_host_mountns, - have_host_container_storage, }) } } @@ -716,19 +690,7 @@ async fn install_container( } }; - // We need to fetch the container image from the root mount namespace. If - // we don't have /var/lib/containers mounted in this image, fork off skopeo - // in the host mountnfs. - let skopeo_cmd = if !state.source.have_host_container_storage { - Some(run_in_host_mountns("skopeo")) - } else { - None - }; - let proxy_cfg = ostree_container::store::ImageProxyConfig { - skopeo_cmd, - ..Default::default() - }; - + let proxy_cfg = ostree_container::store::ImageProxyConfig::default(); (src_imageref, Some(proxy_cfg)) }; let src_imageref = ostree_container::OstreeImageReference { @@ -895,32 +857,6 @@ pub(crate) fn exec_in_host_mountns(args: &[std::ffi::OsString]) -> Result<()> { Err(Command::new(cmd).args(args).exec()).context("exec")? } -#[context("Querying skopeo version")] -fn require_skopeo_with_containers_storage() -> Result<()> { - let out = Task::new_cmd("skopeo --version", run_in_host_mountns("skopeo")) - .args(["--version"]) - .quiet() - .read() - .context("Failed to run skopeo (it currently must be installed in the host root)")?; - let mut v = out - .strip_prefix("skopeo version ") - .map(|v| v.split('.')) - .ok_or_else(|| anyhow::anyhow!("Unexpected output from skopeo version"))?; - let major = v - .next() - .ok_or_else(|| anyhow::anyhow!("Missing major version"))?; - let minor = v - .next() - .ok_or_else(|| anyhow::anyhow!("Missing minor version"))?; - let (major, minor) = (major.parse::()?, minor.parse::()?); - let supported = major > 1 || minor > 10; - if supported { - Ok(()) - } else { - anyhow::bail!("skopeo >= 1.11 is required on host") - } -} - pub(crate) struct RootSetup { luks_device: Option, device_info: crate::blockdev::PartitionTable, @@ -1269,6 +1205,8 @@ async fn prepare_install( tracing::debug!("Target image reference: {target_imgref}"); // A bit of basic global state setup + crate::mount::ensure_mirrored_host_mount("/dev")?; + crate::mount::ensure_mirrored_host_mount("/var/lib/containers")?; ensure_var()?; setup_tmp_mounts()?; // Allocate a temporary directory we can use in various places to avoid @@ -1454,12 +1392,6 @@ async fn install_to_filesystem_impl(state: &State, rootfs: &mut RootSetup) -> Re .ok_or_else(|| anyhow!("No uuid for boot/root"))?; tracing::debug!("boot uuid={boot_uuid}"); - // If we're doing an alongside install, then the /dev bootupd sees needs to be the host's. - ensure!( - crate::mount::is_same_as_host(Utf8Path::new("/dev"))?, - "Missing /dev mount to host /dev" - ); - let bound_images = BoundImages::from_state(state).await?; // Initialize the ostree sysroot (repo, stateroot, etc.) @@ -1514,9 +1446,6 @@ pub(crate) async fn install_to_disk(mut opts: InstallToDiskOpts) -> Result<()> { block_opts.device ); } - if !crate::mount::is_same_as_host(Utf8Path::new("/dev"))? { - anyhow::bail!("Loopback mounts (--via-loopback) require host devices (-v /dev:/dev)"); - } } else if !target_blockdev_meta.file_type().is_block_device() { anyhow::bail!("Not a block device: {}", block_opts.device); } @@ -1705,6 +1634,23 @@ pub(crate) async fn install_to_filesystem( // And the last bit of state here is the fsopts, which we also destructure now. let mut fsopts = opts.filesystem_opts; + // If we're doing an alongside install, automatically set up the host rootfs + // mount if it wasn't done already. + if targeting_host_root + && fsopts.root_path.as_str() == ALONGSIDE_ROOT_MOUNT + && !fsopts.root_path.try_exists()? + { + tracing::debug!("Mounting host / to {ALONGSIDE_ROOT_MOUNT}"); + std::fs::create_dir(ALONGSIDE_ROOT_MOUNT)?; + crate::mount::bind_mount_from_pidns( + crate::mount::PID1, + "/".into(), + ALONGSIDE_ROOT_MOUNT.into(), + true, + ) + .context("Mounting host / to {ALONGSIDE_ROOT_MOUNT}")?; + } + // Check that the target is a directory { let root_path = &fsopts.root_path; diff --git a/lib/src/mount.rs b/lib/src/mount.rs index 825229339..037dbef56 100644 --- a/lib/src/mount.rs +++ b/lib/src/mount.rs @@ -1,15 +1,36 @@ //! Helpers for interacting with mountpoints -use std::process::Command; +use std::{ + fs, + os::fd::{AsFd, OwnedFd}, + process::Command, +}; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use bootc_utils::CommandRunExt; use camino::Utf8Path; use fn_error_context::context; +use rustix::{ + mount::{MoveMountFlags, OpenTreeFlags}, + net::{ + AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags, + SocketFlags, SocketType, + }, + process::WaitOptions, + thread::Pid, +}; use serde::Deserialize; use crate::task::Task; +/// Well known identifier for pid 1 +pub(crate) const PID1: Pid = const { + match Pid::from_raw(1) { + Some(v) => v, + None => panic!("Expected to parse pid1"), + } +}; + #[derive(Deserialize, Debug)] #[serde(rename_all = "kebab-case")] #[allow(dead_code)] @@ -124,3 +145,134 @@ pub(crate) fn is_same_as_host(path: &Utf8Path) -> Result { ); Ok(devstat.f_fsid == hostdevstat.f_fsid) } + +/// Given a pid, enter its mount namespace and acquire a file descriptor +/// for a mount from that namespace. +#[allow(unsafe_code)] +#[context("Opening mount tree from pid")] +pub(crate) fn open_tree_from_pidns( + pid: rustix::process::Pid, + path: &Utf8Path, + recursive: bool, +) -> Result { + // Allocate a socket pair to use for sending file descriptors. + let (sock_parent, sock_child) = rustix::net::socketpair( + AddressFamily::UNIX, + SocketType::STREAM, + SocketFlags::CLOEXEC, + None, + ) + .context("socketpair")?; + const DUMMY_DATA: &[u8] = &[b'!']; + match unsafe { libc::fork() } { + 0 => { + // We're in the child. At this point we know we don't have multiple threads, so we + // can safely `setns`. + + // Open up the namespace of the target process as a file descriptor, and enter it. + let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?; + rustix::thread::move_into_link_name_space( + pidlink.as_fd(), + Some(rustix::thread::LinkNameSpaceType::Mount), + ) + .context("setns")?; + + // Open the target mount path as a file descriptor. + let recursive = if recursive { + OpenTreeFlags::AT_RECURSIVE + } else { + OpenTreeFlags::empty() + }; + let fd = rustix::mount::open_tree( + rustix::fs::CWD, + path.as_std_path(), + OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive, + ) + .context("open_tree")?; + + // And send that file descriptor via fd passing over the socketpair. + let fd = fd.as_fd(); + let fds = [fd]; + let mut buffer = [0u8; rustix::cmsg_space!(ScmRights(1))]; + let mut control = SendAncillaryBuffer::new(&mut buffer); + let pushed = control.push(SendAncillaryMessage::ScmRights(&fds)); + assert!(pushed); + let ios = std::io::IoSlice::new(DUMMY_DATA); + rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?; + // Then we're done. + std::process::exit(0) + } + -1 => { + // fork failed + let e = std::io::Error::last_os_error(); + anyhow::bail!("failed to fork: {e}"); + } + n => { + // We're in the parent; create a pid (checking that n > 0). + let pid = rustix::process::Pid::from_raw(n).unwrap(); + // Receive the mount file descriptor from the child + let mut cmsg_space = vec![0; rustix::cmsg_space!(ScmRights(1))]; + let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space); + let mut buf = [0u8; DUMMY_DATA.len()]; + let iov = std::io::IoSliceMut::new(buf.as_mut()); + let mut iov = [iov]; + let nread = rustix::net::recvmsg( + sock_parent, + &mut iov, + &mut cmsg_buffer, + RecvFlags::CMSG_CLOEXEC, + ) + .context("recvmsg")? + .bytes; + assert_eq!(nread, DUMMY_DATA.len()); + assert_eq!(buf, DUMMY_DATA); + // And extract the file descriptor + let r = cmsg_buffer + .drain() + .filter_map(|m| match m { + rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f), + _ => None, + }) + .flatten() + .next() + .ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?; + rustix::process::waitpid(Some(pid), WaitOptions::empty())?; + Ok(r) + } + } +} + +/// Create a bind mount from the mount namespace of the target pid +/// into our mount namespace. +pub(crate) fn bind_mount_from_pidns( + pid: Pid, + src: &Utf8Path, + target: &Utf8Path, + recursive: bool, +) -> Result<()> { + let src = open_tree_from_pidns(pid, src, recursive)?; + rustix::mount::move_mount( + src.as_fd(), + "", + rustix::fs::CWD, + target.as_std_path(), + MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH, + ) + .context("Moving mount")?; + Ok(()) +} + +// If the target path is not already mirrored from the host (e.g. via -v /dev:/dev) +// then recursively mount it. +pub(crate) fn ensure_mirrored_host_mount(path: impl AsRef) -> Result<()> { + let path = path.as_ref(); + // If we didn't have this in our filesystem already (e.g. for /var/lib/containers) + // then create it now. + std::fs::create_dir_all(path)?; + if is_same_as_host(path)? { + tracing::debug!("Already mounted from host: {path}"); + return Ok(()); + } + tracing::debug!("Propagating host mount: {path}"); + bind_mount_from_pidns(PID1, path, path, true) +} diff --git a/ostree-ext/.github/workflows/bootc.yml b/ostree-ext/.github/workflows/bootc.yml index 2f0d0b22c..8e8dbe78c 100644 --- a/ostree-ext/.github/workflows/bootc.yml +++ b/ostree-ext/.github/workflows/bootc.yml @@ -59,7 +59,7 @@ jobs: - name: Integration tests run: | set -xeuo pipefail - sudo podman run --rm -ti --privileged -v /:/target -v /var/lib/containers:/var/lib/containers -v ./usr/bin/bootc:/usr/bin/bootc --pid=host --security-opt label=disable \ + sudo podman run --rm -ti --privileged -v ./usr/bin/bootc:/usr/bin/bootc --pid=host --security-opt label=disable \ quay.io/centos-bootc/centos-bootc-dev:stream9 bootc install to-filesystem \ --karg=foo=bar --disable-selinux --replace=alongside /target diff --git a/tests-integration/src/install.rs b/tests-integration/src/install.rs index b21886daa..c4eb31dd9 100644 --- a/tests-integration/src/install.rs +++ b/tests-integration/src/install.rs @@ -16,10 +16,6 @@ pub(crate) const BASE_ARGS: &[&str] = &[ "run", "--rm", "--privileged", - "-v", - "/dev:/dev", - "-v", - "/var/lib/containers:/var/lib/containers", "--pid=host", "--security-opt", "label=disable", @@ -149,7 +145,7 @@ pub(crate) fn run_alongside(image: &str, mut testargs: libtest_mimic::Arguments) Trial::test("Install and verify selinux state", move || { let sh = &xshell::Shell::new()?; reset_root(sh, image)?; - cmd!(sh, "sudo {BASE_ARGS...} {target_args...} {image} bootc install to-existing-root --acknowledge-destructive {generic_inst_args...}").run()?; + cmd!(sh, "sudo {BASE_ARGS...} {image} bootc install to-existing-root --acknowledge-destructive {generic_inst_args...}").run()?; generic_post_install_verification()?; let root = &Dir::open_ambient_dir("/ostree", cap_std::ambient_authority()).unwrap(); let mut path = PathBuf::from("."); @@ -159,7 +155,7 @@ pub(crate) fn run_alongside(image: &str, mut testargs: libtest_mimic::Arguments) Trial::test("Install to non-default stateroot", move || { let sh = &xshell::Shell::new()?; reset_root(sh, image)?; - cmd!(sh, "sudo {BASE_ARGS...} {target_args...} {image} bootc install to-existing-root --stateroot {NON_DEFAULT_STATEROOT} --acknowledge-destructive {generic_inst_args...}").run()?; + cmd!(sh, "sudo {BASE_ARGS...} {image} bootc install to-existing-root --stateroot {NON_DEFAULT_STATEROOT} --acknowledge-destructive {generic_inst_args...}").run()?; generic_post_install_verification()?; assert!( Utf8Path::new(&format!("/ostree/deploy/{NON_DEFAULT_STATEROOT}")).try_exists()? @@ -171,7 +167,7 @@ pub(crate) fn run_alongside(image: &str, mut testargs: libtest_mimic::Arguments) reset_root(sh, image)?; let empty = sh.create_temp_dir()?; let empty = empty.path().to_str().unwrap(); - cmd!(sh, "sudo {BASE_ARGS...} {target_args...} -v {empty}:/usr/lib/bootc/install {image} bootc install to-existing-root {generic_inst_args...}").run()?; + cmd!(sh, "sudo {BASE_ARGS...} -v {empty}:/usr/lib/bootc/install {image} bootc install to-existing-root {generic_inst_args...}").run()?; generic_post_install_verification()?; Ok(()) }), diff --git a/tests/e2e/bootc-install.sh b/tests/e2e/bootc-install.sh index c2ad7a4d2..52b186d9b 100755 --- a/tests/e2e/bootc-install.sh +++ b/tests/e2e/bootc-install.sh @@ -234,8 +234,6 @@ case "$TEST_CASE" in --privileged \ --pid=host \ --security-opt label=type:unconfined_t \ - -v /var/lib/containers:/var/lib/containers \ - -v /dev:/dev \ -v .:/output \ "$TEST_IMAGE_URL" \ bootc install to-disk --filesystem "$ROOTFS" --generic-image --via-loopback /output/disk.raw diff --git a/tests/e2e/playbooks/install.yaml b/tests/e2e/playbooks/install.yaml index 876c0ddeb..8fe7809a8 100644 --- a/tests/e2e/playbooks/install.yaml +++ b/tests/e2e/playbooks/install.yaml @@ -54,9 +54,6 @@ --privileged \ --tls-verify=false \ --pid=host \ - -v /dev:/dev \ - -v /:/target \ - -v /var/lib/containers:/var/lib/containers \ --security-opt label=type:unconfined_t \ {{ test_image_url }} \ bootc install to-existing-root"