Skip to content

Commit

Permalink
Refactor Anchor into a consistent GlobWalker.
Browse files Browse the repository at this point in the history
`Anchor` was previously exposed in public APIs so that client code could
query roots and prefixes. However, this type and its usage in `walk`
APIs was disjoint, allowing an `Anchor` constructed from one `Glob` to
be used to construct an iterator using a different `Glob`.

Instead, this change exposes an intermediate `GlobWalker`, which
provides `walk` APIs as well as accessors to query the root and prefix
of a walk pattern in the specified path. Most users can likely ignore
this type, but it can be used if and when this information is needed.
  • Loading branch information
olson-sean-k committed Nov 9, 2023
1 parent f3df644 commit fb8a995
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 89 deletions.
189 changes: 101 additions & 88 deletions src/walk/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ impl<'t> Glob<'t> {
/// [`WalkBehavior`]: crate::WalkBehavior
/// [`WalkEntry`]: crate::WalkEntry
#[cfg_attr(docsrs, doc(cfg(feature = "walk")))]
pub fn walk(&self, anchor: impl IntoAnchor) -> impl FileIterator<Entry = GlobEntry> {
self.walk_with_behavior(anchor, WalkBehavior::default())
pub fn walk(&self, directory: impl Into<PathBuf>) -> impl FileIterator<Entry = GlobEntry> {
self.walk_with_behavior(directory, WalkBehavior::default())
}

/// Gets an iterator over matching files in a directory tree.
Expand Down Expand Up @@ -136,13 +136,97 @@ impl<'t> Glob<'t> {
#[cfg_attr(docsrs, doc(cfg(feature = "walk")))]
pub fn walk_with_behavior(
&self,
anchor: impl IntoAnchor,
directory: impl Into<PathBuf>,
behavior: impl Into<WalkBehavior>,
) -> impl FileIterator<Entry = GlobEntry> {
let Anchor { root, prefix } = anchor.into_anchor(self);
let component_patterns =
compile(self.tree.as_ref().tokens()).expect("failed to compile glob sub-expressions");
let complete_pattern = self.pattern.clone();
self.walker(directory).walk_with_behavior(behavior)
}

#[cfg_attr(docsrs, doc(cfg(feature = "walk")))]
pub fn walker(&self, directory: impl Into<PathBuf>) -> GlobWalker {
GlobWalker {
anchor: self.anchor(directory),
pattern: WalkPattern {
complete: self.pattern.clone(),
components: compile(self.tree.as_ref().tokens())
.expect("failed to compile glob sub-expressions"),
},
}
}

fn anchor(&self, directory: impl Into<PathBuf>) -> Anchor {
fn invariant_path_prefix<'t, A, I>(tokens: I) -> Option<PathBuf>
where
A: 't,
I: IntoIterator<Item = &'t Token<'t, A>>,
{
let prefix = token::invariant_text_prefix(tokens);
if prefix.is_empty() {
None
}
else {
Some(prefix.into())
}
}

let directory = directory.into();
// The directory tree is traversed from `root`, which may include an invariant prefix from
// the glob pattern. Patterns are only applied to path components following this prefix in
// `root`.
let (root, prefix) = match invariant_path_prefix(self.tree.as_ref().tokens()) {
Some(prefix) => {
let root = directory.join(&prefix);
if prefix.is_absolute() {
// Absolute paths replace paths with which they are joined, in which case there
// is no prefix.
(root, PathBuf::new())
}
else {
(root, directory)
}
},
_ => (directory.clone(), directory),
};
Anchor { root, prefix }
}
}

#[derive(Clone, Debug)]
struct Anchor {
root: PathBuf,
prefix: PathBuf,
}

#[derive(Clone, Debug)]
struct WalkPattern {
complete: Regex,
components: Vec<Regex>,
}

#[derive(Clone, Debug)]
pub struct GlobWalker {
anchor: Anchor,
pattern: WalkPattern,
}

impl GlobWalker {
pub fn root(&self) -> &Path {
self.anchor.root.as_ref()
}

pub fn prefix(&self) -> &Path {
self.anchor.prefix.as_ref()
}

pub fn walk(self) -> impl FileIterator<Entry = GlobEntry> {
self.walk_with_behavior(WalkBehavior::default())
}

pub fn walk_with_behavior(
self,
behavior: impl Into<WalkBehavior>,
) -> impl FileIterator<Entry = GlobEntry> {
let Anchor { root, prefix } = self.anchor;
root.walk_with_behavior(behavior)
.filter_map_tree(move |cancellation, separation| {
use itertools::EitherOrBoth::{Both, Left, Right};
Expand Down Expand Up @@ -172,7 +256,7 @@ impl<'t> Glob<'t> {
Component::Normal(component) => Some(CandidatePath::from(component)),
_ => None,
})
.zip_longest(component_patterns.iter().skip(depth))
.zip_longest(self.pattern.components.iter().skip(depth))
.with_position()
{
match (position, candidate) {
Expand All @@ -186,7 +270,9 @@ impl<'t> Glob<'t> {
(Last | Only, Both(candidate, pattern)) => {
return if pattern.is_match(candidate.as_ref()) {
let candidate = CandidatePath::from(path);
if let Some(matched) = complete_pattern
if let Some(matched) = self
.pattern
.complete
.captures(candidate.as_ref())
.map(MatchedText::from)
.map(MatchedText::into_owned)
Expand All @@ -207,7 +293,9 @@ impl<'t> Glob<'t> {
},
(_, Left(_candidate)) => {
let candidate = CandidatePath::from(path);
return if let Some(matched) = complete_pattern
return if let Some(matched) = self
.pattern
.complete
.captures(candidate.as_ref())
.map(MatchedText::from)
.map(MatchedText::into_owned)
Expand All @@ -228,7 +316,9 @@ impl<'t> Glob<'t> {
// If the component loop is not entered, then check for a match. This may indicate
// that the `Glob` is empty and a single invariant path may be matched.
let candidate = CandidatePath::from(path);
if let Some(matched) = complete_pattern
if let Some(matched) = self
.pattern
.complete
.captures(candidate.as_ref())
.map(MatchedText::from)
.map(MatchedText::into_owned)
Expand All @@ -240,83 +330,6 @@ impl<'t> Glob<'t> {
filtrate.filter_node().into()
})
}

#[cfg_attr(docsrs, doc(cfg(feature = "walk")))]
pub fn anchor(&self, directory: impl Into<PathBuf>) -> Anchor {
fn invariant_path_prefix<'t, A, I>(tokens: I) -> Option<PathBuf>
where
A: 't,
I: IntoIterator<Item = &'t Token<'t, A>>,
{
let prefix = token::invariant_text_prefix(tokens);
if prefix.is_empty() {
None
}
else {
Some(prefix.into())
}
}

let directory = directory.into();
// The directory tree is traversed from `root`, which may include an invariant prefix from
// the glob pattern. Patterns are only applied to path components following this prefix in
// `root`.
let (root, prefix) = match invariant_path_prefix(self.tree.as_ref().tokens()) {
Some(prefix) => {
let root = directory.join(&prefix);
if prefix.is_absolute() {
// Absolute paths replace paths with which they are joined, in which case there
// is no prefix.
(root, PathBuf::new())
}
else {
(root, directory)
}
},
_ => (directory.clone(), directory),
};
Anchor { root, prefix }
}
}

pub trait IntoAnchor {
fn into_anchor(self, glob: &Glob<'_>) -> Anchor;
}

impl IntoAnchor for Anchor {
fn into_anchor(self, _: &Glob<'_>) -> Anchor {
self
}
}

impl<P> IntoAnchor for P
where
P: Into<PathBuf>,
{
fn into_anchor(self, glob: &Glob<'_>) -> Anchor {
glob.anchor(self)
}
}

#[derive(Clone, Debug, Hash, PartialEq)]
pub struct Anchor {
root: PathBuf,
prefix: PathBuf,
}

impl Anchor {
pub fn into_root_prefix(self) -> (PathBuf, PathBuf) {
let Anchor { root, prefix } = self;
(root, prefix)
}

pub fn root(&self) -> &Path {
self.root.as_ref()
}

pub fn prefix(&self) -> &Path {
self.prefix.as_ref()
}
}

/// Negated glob combinator that efficiently filters [`WalkEntry`]s.
Expand Down
2 changes: 1 addition & 1 deletion src/walk/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::walk::filter::{
use crate::walk::glob::FilterAny;
use crate::{BuildError, Combine};

pub use crate::walk::glob::{Anchor, GlobEntry, IntoAnchor};
pub use crate::walk::glob::{GlobEntry, GlobWalker};

pub type FileFiltrate<T> = Result<T, WalkError>;
pub type FileResidue = TreeResidue<TreeEntry>;
Expand Down

0 comments on commit fb8a995

Please sign in to comment.