From 23063f308b7c500069342abcfc3095fe0bb5283b Mon Sep 17 00:00:00 2001 From: Allison Karlitskaya Date: Thu, 19 Dec 2024 14:39:08 +0100 Subject: [PATCH] lints: add check for non-utf-8 filesystem content As mentioned in #975, we should add a lint to ensure that all filenames are UTF-8, giving nice errors (with full pathnames) in the event that we encounter invalid filenames. We also check symlink targets. Add a unit test that tries various valid and invalid scenarios. Signed-off-by: Allison Karlitskaya Signed-off-by: Colin Walters --- lib/src/lints.rs | 144 ++++++++++++++++++++++++++++++++++++++++++++++- lib/src/utils.rs | 31 +++++++++- 2 files changed, 172 insertions(+), 3 deletions(-) diff --git a/lib/src/lints.rs b/lib/src/lints.rs index 6417b660..935d71d7 100644 --- a/lib/src/lints.rs +++ b/lib/src/lints.rs @@ -4,18 +4,26 @@ use std::env::consts::ARCH; -use anyhow::Result; +use anyhow::{bail, ensure, Result}; use cap_std::fs::Dir; use cap_std_ext::cap_std; use cap_std_ext::dirext::CapStdExtDirExt as _; use fn_error_context::context; +use crate::utils::openat2_with_retry; + /// check for the existence of the /var/run directory /// if it exists we need to check that it links to /run if not error /// if it does not exist error. #[context("Linting")] pub(crate) fn lint(root: &Dir) -> Result<()> { - let lints = [check_var_run, check_kernel, check_parse_kargs, check_usretc]; + let lints = [ + check_var_run, + check_kernel, + check_parse_kargs, + check_usretc, + check_utf8, + ]; for lint in lints { lint(&root)?; } @@ -59,12 +67,72 @@ fn check_kernel(root: &Dir) -> Result<()> { Ok(()) } +/// Open the target directory, but return Ok(None) if this would cross a mount point. +fn open_dir_noxdev( + parent: &Dir, + path: impl AsRef, +) -> std::io::Result> { + use rustix::fs::{Mode, OFlags, ResolveFlags}; + match openat2_with_retry( + parent, + path, + OFlags::CLOEXEC | OFlags::DIRECTORY | OFlags::NOFOLLOW, + Mode::empty(), + ResolveFlags::NO_XDEV | ResolveFlags::BENEATH, + ) { + Ok(r) => Ok(Some(Dir::reopen_dir(&r)?)), + Err(e) if e == rustix::io::Errno::XDEV => Ok(None), + Err(e) => return Err(e.into()), + } +} + +fn check_utf8(dir: &Dir) -> Result<()> { + for entry in dir.entries()? { + let entry = entry?; + let name = entry.file_name(); + + let Some(strname) = &name.to_str() else { + // will escape nicely like "abc\xFFdéf" + bail!("/: Found non-utf8 filename {name:?}"); + }; + + let ifmt = entry.file_type()?; + if ifmt.is_symlink() { + let target = dir.read_link_contents(&name)?; + ensure!( + target.to_str().is_some(), + "/{strname}: Found non-utf8 symlink target" + ); + } else if ifmt.is_dir() { + let Some(subdir) = open_dir_noxdev(dir, entry.file_name())? else { + continue; + }; + if let Err(err) = check_utf8(&subdir) { + // Try to do the path pasting only in the event of an error + bail!("/{strname}{err:?}"); + } + } + } + Ok(()) +} + #[cfg(test)] fn fixture() -> Result { let tempdir = cap_std_ext::cap_tempfile::tempdir(cap_std::ambient_authority())?; Ok(tempdir) } +#[test] +fn test_open_noxdev() -> Result<()> { + let root = Dir::open_ambient_dir("/", cap_std::ambient_authority())?; + // This hard requires the host setup to have /usr/bin on the same filesystem as / + let usr = Dir::open_ambient_dir("/usr", cap_std::ambient_authority())?; + assert!(open_dir_noxdev(&usr, "bin").unwrap().is_some()); + // Requires a mounted /proc, but that also seems ane. + assert!(open_dir_noxdev(&root, "proc").unwrap().is_none()); + Ok(()) +} + #[test] fn test_var_run() -> Result<()> { let root = &fixture()?; @@ -117,3 +185,75 @@ fn test_usr_etc() -> Result<()> { check_usretc(root).unwrap(); Ok(()) } + +#[test] +fn test_non_utf8() { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let root = &fixture().unwrap(); + + // Try to create some adversarial symlink situations to ensure the walk doesn't crash + root.create_dir("subdir").unwrap(); + // Self-referential symlinks + root.symlink("self", "self").unwrap(); + // Infinitely looping dir symlinks + root.symlink("..", "subdir/parent").unwrap(); + // Broken symlinks + root.symlink("does-not-exist", "broken").unwrap(); + // Out-of-scope symlinks + root.symlink("../../x", "escape").unwrap(); + // Should be fine + check_utf8(root).unwrap(); + + // But this will cause an issue + let baddir = OsStr::from_bytes(b"subdir/2/bad\xffdir"); + root.create_dir("subdir/2").unwrap(); + root.create_dir(baddir).unwrap(); + let Err(err) = check_utf8(root) else { + unreachable!("Didn't fail"); + }; + assert_eq!( + err.to_string(), + r#"/subdir/2/: Found non-utf8 filename "bad\xFFdir""# + ); + root.remove_dir(baddir).unwrap(); // Get rid of the problem + check_utf8(root).unwrap(); // Check it + + // Create a new problem in the form of a regular file + let badfile = OsStr::from_bytes(b"regular\xff"); + root.write(badfile, b"Hello, world!\n").unwrap(); + let Err(err) = check_utf8(root) else { + unreachable!("Didn't fail"); + }; + assert_eq!( + err.to_string(), + r#"/: Found non-utf8 filename "regular\xFF""# + ); + root.remove_file(badfile).unwrap(); // Get rid of the problem + check_utf8(root).unwrap(); // Check it + + // And now test invalid symlink targets + root.symlink(badfile, "subdir/good-name").unwrap(); + let Err(err) = check_utf8(root) else { + unreachable!("Didn't fail"); + }; + assert_eq!( + err.to_string(), + r#"/subdir/good-name: Found non-utf8 symlink target"# + ); + root.remove_file("subdir/good-name").unwrap(); // Get rid of the problem + check_utf8(root).unwrap(); // Check it + + // Finally, test a self-referential symlink with an invalid name. + // We should spot the invalid name before we check the target. + root.symlink(badfile, badfile).unwrap(); + let Err(err) = check_utf8(root) else { + unreachable!("Didn't fail"); + }; + assert_eq!( + err.to_string(), + r#"/: Found non-utf8 filename "regular\xFF""# + ); + root.remove_file(badfile).unwrap(); // Get rid of the problem + check_utf8(root).unwrap(); // Check it +} diff --git a/lib/src/utils.rs b/lib/src/utils.rs index b809f016..d5d1006a 100644 --- a/lib/src/utils.rs +++ b/lib/src/utils.rs @@ -1,6 +1,7 @@ use std::future::Future; use std::io::Write; -use std::os::fd::BorrowedFd; +use std::os::fd::{AsFd, BorrowedFd, OwnedFd}; +use std::path::Path; use std::process::Command; use std::time::Duration; @@ -20,6 +21,7 @@ use libsystemd::logging::journal_print; use ostree::glib; use ostree_ext::container::SignatureSource; use ostree_ext::ostree; +use rustix::path::Arg; /// Try to look for keys injected by e.g. rpm-ostree requesting machine-local /// changes; if any are present, return `true`. @@ -56,6 +58,33 @@ pub(crate) fn deployment_fd( sysroot_dir.open_dir(&dirpath).map_err(Into::into) } +/// A thin wrapper for [`openat2`] but that retries on interruption. +pub fn openat2_with_retry( + dirfd: impl AsFd, + path: impl AsRef, + oflags: rustix::fs::OFlags, + mode: rustix::fs::Mode, + resolve: rustix::fs::ResolveFlags, +) -> rustix::io::Result { + let dirfd = dirfd.as_fd(); + let path = path.as_ref(); + // We loop forever on EAGAIN right now. The cap-std version loops just 4 times, + // which seems really arbitrary. + path.into_with_c_str(|path_c_str| 'start: loop { + match rustix::fs::openat2(dirfd, path_c_str, oflags, mode, resolve) { + Ok(file) => { + return Ok(file); + } + Err(rustix::io::Errno::AGAIN | rustix::io::Errno::INTR) => { + continue 'start; + } + Err(e) => { + return Err(e); + } + } + }) +} + /// Given an mount option string list like foo,bar=baz,something=else,ro parse it and find /// the first entry like $optname= /// This will not match a bare `optname` without an equals.