From 74ad5d83f03dc2bc4eeceb449ee29feb2b801b28 Mon Sep 17 00:00:00 2001 From: Sean Olson Date: Wed, 8 Nov 2023 22:55:34 -0800 Subject: [PATCH] Refactor integration tests into unit tests. This change moves the integration tests for `Glob::walk` into unit tests in the `walk` module. There is no particular reason that these tests were separated beyond reading and writing to a temporary directory and it was not possible to write tests against internal components. The `walk_glob_with_not` test now examines the feed (from `SeparatingFilter`) to ensure that filters properly cancel traversals into directories that can be ignored. This change also implements various standard traits for types in the `walk::filter` module. --- src/capture.rs | 13 +- src/walk/filter.rs | 66 +++++++++- src/walk/glob.rs | 11 +- src/walk/mod.rs | 321 ++++++++++++++++++++++++++++++++++++++++++++- tests/walk.rs | 264 ------------------------------------- 5 files changed, 398 insertions(+), 277 deletions(-) delete mode 100644 tests/walk.rs diff --git a/src/capture.rs b/src/capture.rs index 83c8a3b..fae4601 100644 --- a/src/capture.rs +++ b/src/capture.rs @@ -124,8 +124,9 @@ impl<'t> MatchedText<'t> { /// Clones any borrowed data to an owning instance. /// /// This function is similar to [`into_owned`], but does not consume its receiver. Due to a - /// technical limitation, `MatchedText` cannot implement [`Clone`], so this function is - /// provided as a stop gap that allows a distinct instance to be created that owns its data. + /// technical limitation, `MatchedText` cannot properly implement [`Clone`] with borrowed data + /// , so this function is provided as a stop gap that allows a distinct instance to be created + /// that owns its data. /// /// [`Clone`]: std::clone::Clone /// [`into_owned`]: crate::MatchedText::into_owned @@ -175,6 +176,14 @@ impl<'t> MatchedText<'t> { } } +impl Clone for MatchedText<'static> { + fn clone(&self) -> Self { + MatchedText { + inner: self.inner.to_owned(), + } + } +} + // TODO: This probably shouldn't be part of the public API. impl<'t> From> for MatchedText<'t> { fn from(captures: BorrowedText<'t>) -> Self { diff --git a/src/walk/filter.rs b/src/walk/filter.rs index 8d07e23..e97034e 100644 --- a/src/walk/filter.rs +++ b/src/walk/filter.rs @@ -19,6 +19,8 @@ //! [`Iterator::filter`]: std::iter::Iterator::filter //! [`SeparatingFilter`]: crate::walk::filter::SeparatingFilter +use std::cmp::{Eq, PartialEq}; +use std::hash::{Hash, Hasher}; use std::marker::PhantomData; mod kind { @@ -88,6 +90,29 @@ where } } +impl Eq for Product where T: Eq {} + +impl Hash for Product +where + T: Hash, +{ + fn hash(&self, state: &mut H) + where + H: Hasher, + { + self.inner.hash(state); + } +} + +impl PartialEq for Product +where + T: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + self.inner.eq(&other.inner) + } +} + pub type Filtrate = Product; pub type Residue = Product; @@ -137,7 +162,7 @@ impl Filtrate { impl AsRef for Residue> { fn as_ref(&self) -> &T { - self.get().as_ref() + self.get().get() } } @@ -175,10 +200,14 @@ pub trait Isomeric: Feed { fn substituent(separation: &Separation) -> Self::Substituent<'_>; } +// TODO: The derived trait implementations are likely incorrect and imply a bound on `S`. It just +// so happens that `S` is typically a tuple and tuples will implement these same traits if +// the composed types implement them. Instead, the bounds ought to be on the associated +// `Filtrate` and `Residue` types. /// The separated output of a [`SeparatingFilter`] feed. /// /// [`SeparatingFilter`]: crate::walk::filter::SeparatingFilter -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum Separation where S: Feed + ?Sized, @@ -221,6 +250,16 @@ where } } + pub fn map_residue(self, f: F) -> Separation<(S::Filtrate, U)> + where + F: FnOnce(S::Residue) -> U, + { + match self { + Separation::Filtrate(filtrate) => filtrate.into(), + Separation::Residue(residue) => residue.map(f).into(), + } + } + pub fn filtrate(self) -> Option> { match self { Separation::Filtrate(filtrate) => Some(filtrate), @@ -417,14 +456,31 @@ where /// Residue of a [`SeparatingFilter`] over a tree data structure. /// /// [`SeparatingFilter`]: crate::walk::filter::SeparatingFilter -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum TreeResidue { Node(T), Tree(T), } -impl AsRef for TreeResidue { - fn as_ref(&self) -> &T { +impl TreeResidue { + pub fn map(self, f: F) -> TreeResidue + where + F: FnOnce(T) -> U, + { + match self { + TreeResidue::Node(residue) => TreeResidue::Node(f(residue)), + TreeResidue::Tree(residue) => TreeResidue::Tree(f(residue)), + } + } + + pub fn as_ref(&self) -> TreeResidue<&T> { + match self { + TreeResidue::Node(ref residue) => TreeResidue::Node(residue), + TreeResidue::Tree(ref residue) => TreeResidue::Tree(residue), + } + } + + pub fn get(&self) -> &T { match self { TreeResidue::Node(ref residue) | TreeResidue::Tree(ref residue) => residue, } diff --git a/src/walk/glob.rs b/src/walk/glob.rs index c9736cd..dc739a3 100644 --- a/src/walk/glob.rs +++ b/src/walk/glob.rs @@ -159,6 +159,7 @@ impl<'t> Glob<'t> { return Separation::from(error.map(Err)); }, }, + // `Path::walk_with_behavior` yields no residue. _ => unreachable!(), }; let entry = filtrate.as_ref(); @@ -177,8 +178,8 @@ impl<'t> Glob<'t> { match (position, candidate) { (First | Middle, Both(candidate, pattern)) => { if !pattern.is_match(candidate.as_ref()) { - // Do not descend into directories that do not match the - // corresponding component pattern. + // Do not walk directories that do not match the corresponding + // component pattern. return filtrate.filter_tree(cancellation).into(); } }, @@ -199,8 +200,8 @@ impl<'t> Glob<'t> { } } else { - // Do not descend into directories that do not match the - // corresponding component pattern. + // Do not walk directories that do not match the corresponding + // component pattern. filtrate.filter_tree(cancellation).into() }; }, @@ -406,7 +407,7 @@ impl FilterAny { /// /// [`Glob`]: crate::Glob #[cfg_attr(docsrs, doc(cfg(feature = "walk")))] -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct GlobEntry { entry: TreeEntry, matched: MatchedText<'static>, diff --git a/src/walk/mod.rs b/src/walk/mod.rs index 67d5d91..367b50e 100644 --- a/src/walk/mod.rs +++ b/src/walk/mod.rs @@ -35,7 +35,7 @@ where fn substituent(separation: &Separation) -> Self::Substituent<'_> { match separation { Separation::Filtrate(ref filtrate) => filtrate.get().as_ref(), - Separation::Residue(ref residue) => residue.get().as_ref(), + Separation::Residue(ref residue) => residue.get().get(), } } } @@ -704,3 +704,322 @@ impl From for TreeResidue<()> { } } } + +// TODO: Rust's testing framework does not provide a mechanism for maintaining shared state. This +// means that tests that write to the file system must do so individually rather than writing +// before and after all tests have run. This should probably be avoided. +#[cfg(test)] +mod tests { + use build_fs_tree::{dir, file, Build, FileSystemTree}; + use std::collections::HashSet; + use std::path::PathBuf; + use tempfile::{self, TempDir}; + + use crate::walk::filter::{HierarchicalIterator, Separation, TreeResidue}; + use crate::walk::{FileIterator, LinkBehavior, WalkBehavior}; + use crate::Glob; + + /// Writes a testing directory tree to a temporary location on the file system. + fn temptree() -> (TempDir, PathBuf) { + let root = tempfile::tempdir().unwrap(); + let tree: FileSystemTree<&str, &str> = dir! { + "doc" => dir! { + "guide.md" => file!(""), + }, + "src" => dir! { + "glob.rs" => file!(""), + "lib.rs" => file!(""), + }, + "tests" => dir! { + "harness" => dir! { + "mod.rs" => file!(""), + }, + "walk.rs" => file!(""), + }, + "README.md" => file!(""), + }; + let path = root.path().join("project"); + tree.build(&path).unwrap(); + (root, path) + } + + /// Writes a testing directory tree that includes a reentrant symbolic link to a temporary + /// location on the file system. + #[cfg(any(unix, windows))] + fn temptree_with_cyclic_link() -> (TempDir, PathBuf) { + use std::io; + use std::path::Path; + + #[cfg(unix)] + fn link(target: impl AsRef, link: impl AsRef) -> io::Result<()> { + std::os::unix::fs::symlink(target, link) + } + + #[cfg(windows)] + fn link(target: impl AsRef, link: impl AsRef) -> io::Result<()> { + std::os::windows::fs::symlink_dir(target, link) + } + + // Get a temporary tree and create a reentrant symbolic link. + let (root, path) = temptree(); + link(path.as_path(), path.join("tests/cycle")).unwrap(); + (root, path) + } + + #[test] + fn walk_glob_with_tree() { + let (_root, path) = temptree(); + + let glob = Glob::new("**").unwrap(); + let paths: HashSet<_> = glob + .walk(&path) + .flatten() + .map(|entry| entry.into_path()) + .collect(); + assert_eq!( + paths, + [ + #[allow(clippy::redundant_clone)] + path.to_path_buf(), + path.join("doc"), + path.join("doc/guide.md"), + path.join("src"), + path.join("src/glob.rs"), + path.join("src/lib.rs"), + path.join("tests"), + path.join("tests/harness"), + path.join("tests/harness/mod.rs"), + path.join("tests/walk.rs"), + path.join("README.md"), + ] + .into_iter() + .collect(), + ); + } + + #[test] + fn walk_glob_with_invariant_terminating_component() { + let (_root, path) = temptree(); + + let glob = Glob::new("**/*.md").unwrap(); + let paths: HashSet<_> = glob + .walk(&path) + .flatten() + .map(|entry| entry.into_path()) + .collect(); + assert_eq!( + paths, + [path.join("doc/guide.md"), path.join("README.md"),] + .into_iter() + .collect(), + ); + } + + #[test] + fn walk_glob_with_invariant_intermediate_component() { + let (_root, path) = temptree(); + + let glob = Glob::new("**/src/**/*.rs").unwrap(); + let paths: HashSet<_> = glob + .walk(&path) + .flatten() + .map(|entry| entry.into_path()) + .collect(); + assert_eq!( + paths, + [path.join("src/glob.rs"), path.join("src/lib.rs"),] + .into_iter() + .collect(), + ); + } + + #[test] + fn walk_glob_with_only_invariant() { + let (_root, path) = temptree(); + + let glob = Glob::new("src/lib.rs").unwrap(); + let paths: HashSet<_> = glob + .walk(&path) + .flatten() + .map(|entry| entry.into_path()) + .collect(); + assert_eq!(paths, [path.join("src/lib.rs")].into_iter().collect()); + } + + #[test] + fn walk_glob_with_only_invariant_partitioned() { + let (_root, path) = temptree(); + + let (prefix, glob) = Glob::new("src/lib.rs").unwrap().partition(); + let paths: HashSet<_> = glob + .walk(path.join(prefix)) + .flatten() + .map(|entry| entry.into_path()) + .collect(); + assert_eq!(paths, [path.join("src/lib.rs")].into_iter().collect()); + } + + #[test] + fn walk_glob_with_not() { + #[derive(Debug, Eq, Hash, PartialEq)] + enum TestSeparation { + Filtrate(T), + Residue(TreeResidue), + } + use TestSeparation::{Filtrate, Residue}; + use TreeResidue::{Node, Tree}; + + let (_root, path) = temptree(); + + let glob = Glob::new("**/*.{md,rs}").unwrap(); + let mut paths = HashSet::new(); + glob.walk(&path) + .not(["**/harness/**"]) + .unwrap() + // Inspect the feed rather than the `Iterator` output (filtrate). While it is trivial + // to provide a way to collect the feed, it is difficult to inspect its contents. In + // particular, it is not possible to construct `Product`s outside of the `filter` + // module (by design). Instead, the feed is collected into a simpler format in + // `filter_map_tree`. + .filter_map_tree(|_, separation| { + paths.insert(match separation.as_ref() { + Separation::Filtrate(ref filtrate) => Filtrate( + filtrate + .get() + .as_ref() + .expect("failed to read file") + .path() + .to_path_buf(), + ), + Separation::Residue(ref residue) => Residue( + residue + .get() + .as_ref() + .map(|residue| residue.path().to_path_buf()), + ), + }); + separation + }) + .for_each(drop); + assert_eq!( + paths, + [ + Residue(Node(path.to_path_buf())), + Residue(Node(path.join("doc"))), + Filtrate(path.join("doc/guide.md")), + Residue(Node(path.join("src"))), + Filtrate(path.join("src/glob.rs")), + Filtrate(path.join("src/lib.rs")), + Residue(Node(path.join("tests"))), + // This entry is important. The glob does **not** match this path and will separate + // it into node residue (not tree residue, because the pattern is **not** + // exhaustive). The glob **does** match paths beneath it. The `not` iterator must + // subsequently observe the residue and map it from node to tree and cancel the + // walk. Nothing beneath this directory must be present at all, even as residue. + Residue(Tree(path.join("tests/harness"))), + Filtrate(path.join("tests/walk.rs")), + Filtrate(path.join("README.md")), + ] + .into_iter() + .collect(), + ); + } + + #[test] + fn walk_glob_with_depth() { + let (_root, path) = temptree(); + + let glob = Glob::new("**").unwrap(); + let paths: HashSet<_> = glob + .walk_with_behavior( + &path, + WalkBehavior { + depth: 1, + ..Default::default() + }, + ) + .flatten() + .map(|entry| entry.into_path()) + .collect(); + assert_eq!( + paths, + [ + #[allow(clippy::redundant_clone)] + path.to_path_buf(), + path.join("doc"), + path.join("src"), + path.join("tests"), + path.join("README.md"), + ] + .into_iter() + .collect(), + ); + } + + #[test] + #[cfg(any(unix, windows))] + fn walk_glob_with_cyclic_link_file() { + let (_root, path) = temptree_with_cyclic_link(); + + let glob = Glob::new("**").unwrap(); + let paths: HashSet<_> = glob + .walk_with_behavior(&path, LinkBehavior::ReadFile) + .flatten() + .map(|entry| entry.into_path()) + .collect(); + assert_eq!( + paths, + [ + #[allow(clippy::redundant_clone)] + path.to_path_buf(), + path.join("README.md"), + path.join("doc"), + path.join("doc/guide.md"), + path.join("src"), + path.join("src/glob.rs"), + path.join("src/lib.rs"), + path.join("tests"), + path.join("tests/cycle"), + path.join("tests/harness"), + path.join("tests/harness/mod.rs"), + path.join("tests/walk.rs"), + ] + .into_iter() + .collect(), + ); + } + + #[test] + #[cfg(any(unix, windows))] + fn walk_glob_with_cyclic_link_target() { + let (_root, path) = temptree_with_cyclic_link(); + + // Collect paths into `Vec`s so that duplicates can be detected. + let expected = vec![ + #[allow(clippy::redundant_clone)] + path.to_path_buf(), + path.join("README.md"), + path.join("doc"), + path.join("doc/guide.md"), + path.join("src"), + path.join("src/glob.rs"), + path.join("src/lib.rs"), + path.join("tests"), + path.join("tests/harness"), + path.join("tests/harness/mod.rs"), + path.join("tests/walk.rs"), + ]; + let glob = Glob::new("**").unwrap(); + let mut paths: Vec<_> = glob + .walk_with_behavior(&path, LinkBehavior::ReadTarget) + .flatten() + // Take an additional item. This prevents an infinite loop if there is a problem with + // detecting the cycle while also introducing unexpected files so that the error can be + // detected. + .take(expected.len() + 1) + .map(|entry| entry.into_path()) + .collect(); + paths.sort_unstable(); + assert_eq!(paths, expected); + } +} diff --git a/tests/walk.rs b/tests/walk.rs deleted file mode 100644 index 9852010..0000000 --- a/tests/walk.rs +++ /dev/null @@ -1,264 +0,0 @@ -#![cfg(feature = "walk")] - -use build_fs_tree::{dir, file, Build, FileSystemTree}; -use std::collections::HashSet; -use std::path::PathBuf; -use tempfile::{self, TempDir}; - -use wax::walk::{FileIterator, LinkBehavior, WalkBehavior}; -use wax::Glob; - -// TODO: Rust's testing framework does not provide a mechanism for maintaining -// shared state. This means that tests that write to the file system must -// do so individually rather than writing before and after all tests have -// run. This should probably be avoided. - -/// Writes a testing directory tree to a temporary location on the file system. -fn temptree() -> (TempDir, PathBuf) { - let root = tempfile::tempdir().unwrap(); - let tree: FileSystemTree<&str, &str> = dir! { - "doc" => dir! { - "guide.md" => file!(""), - }, - "src" => dir! { - "glob.rs" => file!(""), - "lib.rs" => file!(""), - }, - "tests" => dir! { - "walk.rs" => file!(""), - }, - "README.md" => file!(""), - }; - let path = root.path().join("project"); - tree.build(&path).unwrap(); - (root, path) -} - -/// Writes a testing directory tree that includes a reentrant symbolic link to a -/// temporary location on the file system. -#[cfg(any(unix, windows))] -fn temptree_with_cyclic_link() -> (TempDir, PathBuf) { - use std::io; - use std::path::Path; - - #[cfg(unix)] - fn link(target: impl AsRef, link: impl AsRef) -> io::Result<()> { - std::os::unix::fs::symlink(target, link) - } - - #[cfg(windows)] - fn link(target: impl AsRef, link: impl AsRef) -> io::Result<()> { - std::os::windows::fs::symlink_dir(target, link) - } - - // Get a temporary tree and create a reentrant symbolic link. - let (root, path) = temptree(); - link(path.as_path(), path.join("tests/cycle")).unwrap(); - (root, path) -} - -#[test] -fn walk_with_tree() { - let (_root, path) = temptree(); - - let glob = Glob::new("**").unwrap(); - let paths: HashSet<_> = glob - .walk(&path) - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!( - paths, - [ - #[allow(clippy::redundant_clone)] - path.to_path_buf(), - path.join("doc"), - path.join("doc/guide.md"), - path.join("src"), - path.join("src/glob.rs"), - path.join("src/lib.rs"), - path.join("tests"), - path.join("tests/walk.rs"), - path.join("README.md"), - ] - .into_iter() - .collect(), - ); -} - -#[test] -fn walk_with_invariant_terminating_component() { - let (_root, path) = temptree(); - - let glob = Glob::new("**/*.md").unwrap(); - let paths: HashSet<_> = glob - .walk(&path) - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!( - paths, - IntoIterator::into_iter([path.join("doc/guide.md"), path.join("README.md"),]).collect(), - ); -} - -#[test] -fn walk_with_invariant_intermediate_component() { - let (_root, path) = temptree(); - - let glob = Glob::new("**/src/**/*.rs").unwrap(); - let paths: HashSet<_> = glob - .walk(&path) - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!( - paths, - IntoIterator::into_iter([path.join("src/glob.rs"), path.join("src/lib.rs"),]).collect(), - ); -} - -#[test] -fn walk_with_invariant_glob() { - let (_root, path) = temptree(); - - let glob = Glob::new("src/lib.rs").unwrap(); - let paths: HashSet<_> = glob - .walk(&path) - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!(paths, [path.join("src/lib.rs")].into_iter().collect(),); -} - -#[test] -fn walk_with_invariant_partitioned_glob() { - let (_root, path) = temptree(); - - let (prefix, glob) = Glob::new("src/lib.rs").unwrap().partition(); - let paths: HashSet<_> = glob - .walk(path.join(prefix)) - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!(paths, [path.join("src/lib.rs")].into_iter().collect(),); -} - -#[test] -fn walk_with_not() { - let (_root, path) = temptree(); - - let glob = Glob::new("**/*.{md,rs}").unwrap(); - let paths: HashSet<_> = glob - .walk(&path) - .not(["tests/**"]) - .unwrap() - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!( - paths, - [ - path.join("doc/guide.md"), - path.join("src/glob.rs"), - path.join("src/lib.rs"), - path.join("README.md"), - ] - .into_iter() - .collect(), - ); -} - -#[test] -fn walk_with_depth() { - let (_root, path) = temptree(); - - let glob = Glob::new("**").unwrap(); - let paths: HashSet<_> = glob - .walk_with_behavior( - &path, - WalkBehavior { - depth: 1, - ..Default::default() - }, - ) - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!( - paths, - [ - #[allow(clippy::redundant_clone)] - path.to_path_buf(), - path.join("doc"), - path.join("src"), - path.join("tests"), - path.join("README.md"), - ] - .into_iter() - .collect(), - ); -} - -#[test] -#[cfg(any(unix, windows))] -fn walk_with_cyclic_link_file() { - let (_root, path) = temptree_with_cyclic_link(); - - let glob = Glob::new("**").unwrap(); - let paths: HashSet<_> = glob - .walk_with_behavior(&path, LinkBehavior::ReadFile) - .flatten() - .map(|entry| entry.into_path()) - .collect(); - assert_eq!( - paths, - [ - #[allow(clippy::redundant_clone)] - path.to_path_buf(), - path.join("README.md"), - path.join("doc"), - path.join("doc/guide.md"), - path.join("src"), - path.join("src/glob.rs"), - path.join("src/lib.rs"), - path.join("tests"), - path.join("tests/cycle"), - path.join("tests/walk.rs"), - ] - .into_iter() - .collect(), - ); -} - -#[test] -#[cfg(any(unix, windows))] -fn walk_with_cyclic_link_target() { - let (_root, path) = temptree_with_cyclic_link(); - - // Collect paths into `Vec`s so that duplicates can be detected. - let expected = vec![ - #[allow(clippy::redundant_clone)] - path.to_path_buf(), - path.join("README.md"), - path.join("doc"), - path.join("doc/guide.md"), - path.join("src"), - path.join("src/glob.rs"), - path.join("src/lib.rs"), - path.join("tests"), - path.join("tests/walk.rs"), - ]; - let glob = Glob::new("**").unwrap(); - let mut paths: Vec<_> = glob - .walk_with_behavior(&path, LinkBehavior::ReadTarget) - .flatten() - // Take an additional item. This prevents an infinite loop if there is a - // problem with detecting the cycle while also introducing unexpected - // files so that the error can be detected. - .take(expected.len() + 1) - .map(|entry| entry.into_path()) - .collect(); - paths.sort_unstable(); - assert_eq!(paths, expected); -}