diff --git a/src/capture.rs b/src/capture.rs index ab40c81..83c8a3b 100644 --- a/src/capture.rs +++ b/src/capture.rs @@ -78,20 +78,18 @@ impl From for MaybeOwnedText<'static> { /// Text that has been matched by a [`Pattern`] and its captures. /// -/// To match a [`Glob`] or other [`Pattern`] against a [`CandidatePath`] and get -/// the matched text, use the [`Pattern::matched`] function. +/// To match a [`Glob`] or other [`Pattern`] against a [`CandidatePath`] and get the matched text, +/// use the [`Pattern::matched`] function. /// -/// All [`Pattern`]s provide an implicit capture of the complete text of a -/// match. This implicit capture has index zero, and is exposed via the -/// [`complete`] function as well as the [`get`] function using index zero. -/// Capturing tokens are indexed starting at one, and can be used to isolate -/// more specific sub-text. +/// All [`Pattern`]s provide an implicit capture of the complete text of a match. This implicit +/// capture has index zero, and is exposed via the [`complete`] function as well as the [`get`] +/// function using index zero. Capturing tokens are indexed starting at one, and can be used to +/// isolate more specific sub-text. /// /// # Examples /// -/// Capturing tokens and matched text can be used to isolate sub-text in a -/// match. For example, the file name of a match can be extracted using an -/// alternative to group patterns. +/// Capturing tokens and matched text can be used to isolate sub-text in a match. For example, the +/// file name of a match can be extracted using an alternative to group patterns. /// /// ```rust /// use wax::{CandidatePath, Glob, Pattern}; @@ -125,10 +123,9 @@ impl<'t> MatchedText<'t> { /// Clones any borrowed data to an owning instance. /// - /// This function is similar to [`into_owned`], but does not consume its - /// receiver. Due to a technical limitation, `MatchedText` cannot implement - /// [`Clone`], so this function is provided as a stop gap that allows a - /// distinct instance to be created that owns its data. + /// This function is similar to [`into_owned`], but does not consume its receiver. Due to a + /// technical limitation, `MatchedText` cannot implement [`Clone`], so this function is + /// provided as a stop gap that allows a distinct instance to be created that owns its data. /// /// [`Clone`]: std::clone::Clone /// [`into_owned`]: crate::MatchedText::into_owned @@ -142,9 +139,8 @@ impl<'t> MatchedText<'t> { /// Gets the complete text of a match. /// - /// All [`Pattern`]s have an implicit capture of the complete text at index - /// zero. This function is therefore equivalent to unwrapping the output of - /// the [`get`] function with index zero. + /// All [`Pattern`]s have an implicit capture of the complete text at index zero. This function + /// is therefore equivalent to unwrapping the output of the [`get`] function with index zero. /// /// [`get`]: crate::MatchedText::get /// [`Pattern`]: crate::Pattern @@ -154,16 +150,14 @@ impl<'t> MatchedText<'t> { /// Gets the matched text of a capture at the given index. /// - /// All [`Pattern`]s have an implicit capture of the complete text at index - /// zero. Capturing tokens are indexed from one, so any capturing - /// sub-expression will be indexed after the implicit complete text. For - /// example, the sub-expression `*` in the glob expression `*.txt` is at - /// index one and will exclude the suffix `.txt` in its matched text. + /// All [`Pattern`]s have an implicit capture of the complete text at index zero. Capturing + /// tokens are indexed from one, so any capturing sub-expression will be indexed after the + /// implicit complete text. For example, the sub-expression `*` in the glob expression `*.txt` + /// is at index one and will exclude the suffix `.txt` in its matched text. /// - /// Alternative and repetition patterns group their sub-globs into a single - /// capture, so it is not possible to isolate matched text from their - /// sub-globs. This can be used to explicitly group matched text, such as - /// isolating an entire matched file name using an expression like + /// Alternative and repetition patterns group their sub-globs into a single capture, so it is + /// not possible to isolate matched text from their sub-globs. This can be used to explicitly + /// group matched text, such as isolating an entire matched file name using an expression like /// `{*.{go,rs}}`. /// /// [`Pattern`]: crate::Pattern diff --git a/src/diagnostics/miette.rs b/src/diagnostics/miette.rs index 3889848..95a62ae 100644 --- a/src/diagnostics/miette.rs +++ b/src/diagnostics/miette.rs @@ -65,8 +65,8 @@ pub fn diagnose<'i, 't>( mod tests { use crate::Glob; - // It is non-trivial to downcast `&dyn Diagnostic`, so diagnostics are - // identified in tests by their code. + // It is non-trivial to downcast `&dyn Diagnostic`, so diagnostics are identified in tests by + // their code. const CODE_SEMANTIC_LITERAL: &str = "wax::glob::semantic_literal"; const CODE_TERMINATING_SEPARATOR: &str = "wax::glob::terminating_separator"; diff --git a/src/diagnostics/mod.rs b/src/diagnostics/mod.rs index 8071e3a..3a5430f 100644 --- a/src/diagnostics/mod.rs +++ b/src/diagnostics/mod.rs @@ -10,9 +10,9 @@ pub use crate::diagnostics::miette::diagnose; /// Location and length of a token within a glob expression. /// -/// Spans are encoded as a tuple of `usize`s, where the first element is the -/// location or position and the second element is the length. Both position and -/// length are measured in bytes and **not** code points, graphemes, etc. +/// Spans are encoded as a tuple of `usize`s, where the first element is the location or position +/// and the second element is the length. Both position and length are measured in bytes and +/// **not** code points, graphemes, etc. /// /// # Examples /// @@ -44,18 +44,16 @@ impl SpanExt for Span { /// Error associated with a [`Span`] within a glob expression. /// -/// Located errors describe specific instances of an error within a glob -/// expression. Types that implement this trait provide a location within a glob -/// expression via the [`LocatedError::span`] function as well as a description -/// via the [`Display`] trait. See [`BuildError::locations`]. +/// Located errors describe specific instances of an error within a glob expression. Types that +/// implement this trait provide a location within a glob expression via the [`LocatedError::span`] +/// function as well as a description via the [`Display`] trait. See [`BuildError::locations`]. /// /// [`BuildError::locations`]: crate::BuildError::locations /// [`Display`]: std::fmt::Display /// [`LocatedError::span`]: crate::LocatedError::span /// [`Span`]: crate::Span pub trait LocatedError: Display { - /// Gets the span within the glob expression with which the error is - /// associated. + /// Gets the span within the glob expression with which the error is associated. fn span(&self) -> Span; } diff --git a/src/encode.rs b/src/encode.rs index 250315a..c6f2762 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -15,13 +15,12 @@ const SEPARATOR_CLASS_EXPRESSION: &str = "/\\\\"; #[cfg(unix)] const SEPARATOR_CLASS_EXPRESSION: &str = "/"; -// This only encodes the platform's main separator, so any additional separators -// will be missed. It may be better to have explicit platform support and invoke -// `compile_error!` on unsupported platforms, as this could cause very aberrant -// behavior. Then again, it seems that platforms using more than one separator -// are rare. GS/OS, OS/2, and Windows are likely the best known examples and of -// those only Windows is a supported Rust target at the time of writing (and is -// already supported by Wax). +// This only encodes the platform's main separator, so any additional separators will be missed. It +// may be better to have explicit platform support and invoke `compile_error!` on unsupported +// platforms, as this could cause very aberrant behavior. Then again, it seems that platforms using +// more than one separator are rare. GS/OS, OS/2, and Windows are likely the best known examples +// and of those only Windows is a supported Rust target at the time of writing (and is already +// supported by Wax). #[cfg(not(any(windows, unix)))] const SEPARATOR_CLASS_EXPRESSION: &str = main_separator_class_expression(); @@ -29,8 +28,8 @@ const SEPARATOR_CLASS_EXPRESSION: &str = main_separator_class_expression(); const fn main_separator_class_expression() -> &'static str { use std::path::MAIN_SEPARATOR; - // TODO: This is based upon `regex_syntax::is_meta_character`, but that - // function is not `const`. Perhaps that can be changed upstream. + // TODO: This is based upon `regex_syntax::is_meta_character`, but that function is not + // `const`. Perhaps that can be changed upstream. const fn escape(x: char) -> &'static str { match x { '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' @@ -56,9 +55,8 @@ macro_rules! nsepexpr { /// Describes errors that occur when compiling a glob expression. /// -/// **This error only occurs when the size of the compiled program is too -/// large.** All other compilation errors are considered internal bugs and will -/// panic. +/// **This error only occurs when the size of the compiled program is too large.** All other +/// compilation errors are considered internal bugs and will panic. #[derive(Clone, Debug, Error)] #[error("failed to compile glob: {kind}")] pub struct CompileError { @@ -176,8 +174,8 @@ fn encode<'t, A, T>( pattern.push(')'); } - // TODO: Use `Grouping` everywhere a group is encoded. For invariant groups - // that ignore `grouping`, construct a local `Grouping` instead. + // TODO: Use `Grouping` everywhere a group is encoded. For invariant groups that ignore + // `grouping`, construct a local `Grouping` instead. for (position, token) in tokens.into_iter().with_position() { match (position, token.borrow().kind()) { (_, Literal(literal)) => { @@ -261,13 +259,11 @@ fn encode<'t, A, T>( pattern.push_str(nsepexpr!("&&{0}")); } pattern.push(']'); - // Compile the character class sub-expression. This may fail - // if the subtraction of the separator pattern yields an - // empty character class (meaning that the glob expression - // matches only separator characters on the target - // platform). If compilation fails, then use the null - // character class, which matches nothing on supported - // platforms. + // Compile the character class sub-expression. This may fail if the subtraction + // of the separator pattern yields an empty character class (meaning that the + // glob expression matches only separator characters on the target platform). + // If compilation fails, then use the null character class, which matches + // nothing on supported platforms. if Regex::new(&pattern).is_ok() { pattern.into() } diff --git a/src/lib.rs b/src/lib.rs index f4e5791..d4d14f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,8 @@ -//! Wax provides opinionated and portable globs that can be matched against file -//! paths and directory trees. Globs use a familiar syntax and support -//! expressive features with semantics that emphasize component boundaries. +//! Wax provides opinionated and portable globs that can be matched against file paths and +//! directory trees. Globs use a familiar syntax and support expressive features with semantics +//! that emphasize component boundaries. //! -//! See the [repository -//! documentation](https://github.com/olson-sean-k/wax/blob/master/README.md) +//! See the [repository documentation](https://github.com/olson-sean-k/wax/blob/master/README.md) //! for details about glob expressions and patterns. #![cfg_attr(docsrs, feature(doc_cfg))] @@ -115,9 +114,9 @@ pub struct CapturingToken { impl CapturingToken { /// Gets the index of the capture. /// - /// Captures are one-indexed and the index zero always represents the - /// implicit capture of the complete match, so the index of - /// `CapturingToken`s is always one or greater. See [`MatchedText`]. + /// Captures are one-indexed and the index zero always represents the implicit capture of the + /// complete match, so the index of `CapturingToken`s is always one or greater. See + /// [`MatchedText`]. /// /// [`MatchedText`]: crate::MatchedText pub fn index(&self) -> usize { @@ -130,24 +129,21 @@ impl CapturingToken { } } -// This type is similar to `token::Variance>`, but is -// simplified for the public API. Invariant text is always expressed as a path -// and no variant bounds are provided. +// This type is similar to `token::Variance>`, but is simplified for the public +// API. Invariant text is always expressed as a path and no variant bounds are provided. /// Variance of a [`Pattern`]. /// -/// The variance of a pattern describes the kinds of paths it can match with -/// respect to the platform file system APIs. [`Pattern`]s are either variant or -/// invariant. +/// The variance of a pattern describes the kinds of paths it can match with respect to the +/// platform file system APIs. [`Pattern`]s are either variant or invariant. /// -/// An invariant [`Pattern`] can be represented and completely described by an -/// equivalent path using the platform's file system APIs. For example, the glob -/// expression `path/to/file.txt` resolves identically to the paths -/// `path/to/file.txt` and `path\to\file.txt` on Unix and Windows, respectively. +/// An invariant [`Pattern`] can be represented and completely described by an equivalent path +/// using the platform's file system APIs. For example, the glob expression `path/to/file.txt` +/// resolves identically to the paths `path/to/file.txt` and `path\to\file.txt` on Unix and +/// Windows, respectively. /// -/// A variant [`Pattern`] resolves differently than any particular path used -/// with the platform's file system APIs. Such an expression cannot be -/// represented by a single path. This is typically because the expression -/// matches multiple texts using a regular pattern, such as in the glob +/// A variant [`Pattern`] resolves differently than any particular path used with the platform's +/// file system APIs. Such an expression cannot be represented by a single path. This is typically +/// because the expression matches multiple texts using a regular pattern, such as in the glob /// expression `**/*.rs`. /// /// [`Pattern`]: crate::Pattern @@ -157,25 +153,23 @@ pub enum Variance { /// A [`Pattern`] is invariant and equivalent to a path. /// /// Some non-literal expressions may be invariant, such as in the expression - /// `path/[t][o]/{file,file}.txt`, which is invariant on Unix (but not on - /// Windows, because the character class expressions do not match with case - /// folding). + /// `path/[t][o]/{file,file}.txt`, which is invariant on Unix (but not on Windows, because the + /// character class expressions do not match with case folding). /// /// [`Pattern`]: crate::Pattern Invariant( - /// An equivalent path that completely describes the invariant - /// [`Pattern`] with respect to platform file system APIs. + /// An equivalent path that completely describes the invariant [`Pattern`] with respect to + /// platform file system APIs. /// /// [`Pattern`]: crate::Pattern PathBuf, ), /// A [`Pattern`] is variant and cannot be completely described by a path. /// - /// Variant expressions may be formed from literals or other **seemingly** - /// invariant expressions. For example, the variance of literals considers - /// the case sensitivity of the platform's file system APIs, so the - /// expression `(?i)path/to/file.txt` is variant on Unix but not on Windows. - /// Similarly, the expression `path/[t][o]/file.txt` is variant on Windows + /// Variant expressions may be formed from literals or other **seemingly** invariant + /// expressions. For example, the variance of literals considers the case sensitivity of the + /// platform's file system APIs, so the expression `(?i)path/to/file.txt` is variant on Unix + /// but not on Windows. Similarly, the expression `path/[t][o]/file.txt` is variant on Windows /// but not on Unix. /// /// [`Pattern`]: crate::Pattern @@ -217,10 +211,9 @@ impl From>> for Variance { /// A compiled glob expression that can be inspected and matched against paths. /// -/// Matching is a logical operation and does **not** interact with a file -/// system. To handle path operations, use [`Path`] and/or [`PathBuf`] and their -/// associated functions. See [`Glob::partition`] for more about globs and path -/// operations. +/// Matching is a logical operation and does **not** interact with a file system. To handle path +/// operations, use [`Path`] and/or [`PathBuf`] and their associated functions. See +/// [`Glob::partition`] for more about globs and path operations. /// /// [`Glob::partition`]: crate::Glob::partition /// [`Path`]: std::path::Path @@ -243,22 +236,21 @@ pub trait Pattern<'t>: Combine<'t, Error = Infallible> { /// Gets the variance of the pattern. /// - /// The variance of a pattern describes the kinds of paths it can match with - /// respect to the platform file system APIs. + /// The variance of a pattern describes the kinds of paths it can match with respect to the + /// platform file system APIs. fn variance(&self) -> Variance; /// Returns `true` if the pattern is exhaustive. /// - /// A glob expression is exhaustive if its terminating component matches any - /// and all sub-trees, such as in the expressions `/home/**` and - /// `local/</>*`. + /// A glob expression is exhaustive if its terminating component matches any and all sub-trees, + /// such as in the expressions `/home/**` and `local/</>*`. fn is_exhaustive(&self) -> bool; } /// A glob expression representation that can be incorporated into a combinator. /// -/// This trait is implemented by types that can be (fallibly) converted into a -/// [`Pattern`] and incorporated into a combinator. See [`any`]. +/// This trait is implemented by types that can be (fallibly) converted into a [`Pattern`] and +/// incorporated into a combinator. See [`any`]. /// /// [`any`]: crate::any /// [`Pattern`]: crate::Pattern @@ -276,15 +268,14 @@ impl<'t> Combine<'t> for &'t str { /// General errors concerning [`Pattern`]s. /// -/// This is the most general error and each of its variants exposes a particular -/// error type that describes the details of its associated error condition. -/// This error is not used in any Wax APIs directly, but can be used to -/// encapsulate the more specific errors that are. +/// This is the most general error and each of its variants exposes a particular error type that +/// describes the details of its associated error condition. This error is not used in any Wax APIs +/// directly, but can be used to encapsulate the more specific errors that are. /// /// # Examples /// -/// To encapsulate different errors in the Wax API behind a function, convert -/// them into a `GlobError` via `?`. +/// To encapsulate different errors in the Wax API behind a function, convert them into a +/// `GlobError` via `?`. /// /// ```rust,no_run,ignore /// use std::path::Path; @@ -327,26 +318,22 @@ impl From for GlobError { } } -// TODO: `Diagnostic` is implemented with macros for brevity and to ensure -// complete coverage of features. However, this means that documentation -// does not annotate the implementation with a feature flag requirement. -// If possible, perhaps in a later version of Rust, close this gap. -/// Describes errors that occur when building a [`Pattern`] from a glob -/// expression. -/// -/// Glob expressions may fail to build if they cannot be parsed, violate rules, -/// or cannot be compiled. Parsing errors occur when a glob expression has -/// invalid syntax. Patterns must also follow rules as described in the -/// [repository -/// documentation](https://github.com/olson-sean-k/wax/blob/master/README.md), -/// which are designed to avoid nonsense expressions and ambiguity. Lastly, -/// compilation errors occur **only if the size of the compiled program is too -/// large** (all other compilation errors are considered internal bugs and will -/// panic). -/// -/// When the `miette` feature is enabled, this and other error types implement -/// the [`Diagnostic`] trait. Due to a technical limitation, this may not be -/// properly annotated in API documentation. +// TODO: `Diagnostic` is implemented with macros for brevity and to ensure complete coverage of +// features. However, this means that documentation does not annotate the implementation with +// a feature flag requirement. If possible, perhaps in a later version of Rust, close this +// gap. +/// Describes errors that occur when building a [`Pattern`] from a glob expression. +/// +/// Glob expressions may fail to build if they cannot be parsed, violate rules, or cannot be +/// compiled. Parsing errors occur when a glob expression has invalid syntax. Patterns must also +/// follow rules as described in the [repository +/// documentation](https://github.com/olson-sean-k/wax/blob/master/README.md), which are designed +/// to avoid nonsense expressions and ambiguity. Lastly, compilation errors occur **only if the +/// size of the compiled program is too large** (all other compilation errors are considered +/// internal bugs and will panic). +/// +/// When the `miette` feature is enabled, this and other error types implement the [`Diagnostic`] +/// trait. Due to a technical limitation, this may not be properly annotated in API documentation. /// /// [`Diagnostic`]: miette::Diagnostic /// [`Pattern`]: crate::Pattern @@ -361,22 +348,21 @@ pub struct BuildError { impl BuildError { /// Gets [`LocatedError`]s detailing the errors within a glob expression. /// - /// This function returns an [`Iterator`] over the [`LocatedError`]s that - /// detail where and why an error occurred when the error has associated - /// [`Span`]s within a glob expression. For errors with no such associated - /// information, the [`Iterator`] yields no items, such as compilation - /// errors. + /// This function returns an [`Iterator`] over the [`LocatedError`]s that detail where and why + /// an error occurred when the error has associated [`Span`]s within a glob expression. For + /// errors with no such associated information, the [`Iterator`] yields no items, such as + /// compilation errors. /// /// # Examples /// - /// [`LocatedError`]s can be used to provide information to users about - /// which parts of a glob expression are associated with an error. + /// [`LocatedError`]s can be used to provide information to users about which parts of a glob + /// expression are associated with an error. /// /// ```rust /// use wax::Glob; /// - /// // This glob expression violates rules. The error handling code prints details - /// // about the alternative where the violation occurred. + /// // This glob expression violates rules. The error handling code prints details about the + /// // alternative where the violation occurred. /// let expression = "**/{foo,**/bar,baz}"; /// match Glob::new(expression) { /// Ok(glob) => { @@ -469,11 +455,10 @@ enum BuildErrorKind { /// Path that can be matched against a [`Pattern`]. /// -/// `CandidatePath`s are always UTF-8 encoded. On some platforms this requires a -/// lossy conversion that uses Unicode replacement codepoints `�` whenever a -/// part of a path cannot be represented as valid UTF-8 (such as Windows). This -/// means that some byte sequences cannot be matched, though this is uncommon in -/// practice. +/// `CandidatePath`s are always UTF-8 encoded. On some platforms this requires a lossy conversion +/// that uses Unicode replacement codepoints `�` whenever a part of a path cannot be represented as +/// valid UTF-8 (such as Windows). This means that some byte sequences cannot be matched, though +/// this is uncommon in practice. /// /// [`Pattern`]: crate::Pattern #[derive(Clone)] @@ -530,16 +515,14 @@ impl<'b> From<&'b str> for CandidatePath<'b> { /// Pattern that can be matched against paths and directory trees. /// -/// `Glob`s are constructed from strings called glob expressions that resemble -/// Unix paths consisting of nominal components delimited by separators. Glob -/// expressions support various patterns that match and capture specified text -/// in a path. These patterns can be used to logically match individual paths -/// and to semantically match and walk directory trees. +/// `Glob`s are constructed from strings called glob expressions that resemble Unix paths +/// consisting of nominal components delimited by separators. Glob expressions support various +/// patterns that match and capture specified text in a path. These patterns can be used to +/// logically match individual paths and to semantically match and walk directory trees. /// /// # Examples /// -/// A `Glob` can be used to determine if a path matches a pattern via the -/// [`Pattern`] trait. +/// A `Glob` can be used to determine if a path matches a pattern via the [`Pattern`] trait. /// /// ```rust /// use wax::{Glob, Pattern}; @@ -558,8 +541,8 @@ impl<'b> From<&'b str> for CandidatePath<'b> { /// assert_eq!("lib.rs", glob.matched(&candidate).unwrap().get(2).unwrap()); /// ``` /// -/// To match a `Glob` against a directory tree, the [`walk`] function can be -/// used to get an iterator over matching paths. +/// To match a `Glob` against a directory tree, the [`walk`] function can be used to get an +/// iterator over matching paths. /// /// ```rust,no_run,ignore /// use wax::Glob; @@ -587,18 +570,15 @@ impl<'t> Glob<'t> { encode::compile(tokens) } - // TODO: Document pattern syntax in the crate documentation and refer to it - // here. + // TODO: Document pattern syntax in the crate documentation and refer to it here. /// Constructs a [`Glob`] from a glob expression. /// - /// A glob expression is UTF-8 encoded text that resembles a Unix path - /// consisting of nominal components delimited by separators and patterns - /// that can be matched against native paths. + /// A glob expression is UTF-8 encoded text that resembles a Unix path consisting of nominal + /// components delimited by separators and patterns that can be matched against native paths. /// /// # Errors /// - /// Returns an error if the glob expression fails to build. See - /// [`BuildError`]. + /// Returns an error if the glob expression fails to build. See [`BuildError`]. /// /// [`Glob`]: crate::Glob /// [`BuildError`]: crate::BuildError @@ -610,8 +590,8 @@ impl<'t> Glob<'t> { /// Constructs a [`Glob`] from a glob expression with diagnostics. /// - /// This function is the same as [`Glob::new`], but additionally returns - /// detailed diagnostics on both success and failure. + /// This function is the same as [`Glob::new`], but additionally returns detailed diagnostics + /// on both success and failure. /// /// See [`Glob::diagnose`]. /// @@ -641,39 +621,33 @@ impl<'t> Glob<'t> { }) } - /// Partitions a [`Glob`] into an invariant [`PathBuf`] prefix and variant - /// [`Glob`] postfix. + /// Partitions a [`Glob`] into an invariant [`PathBuf`] prefix and variant [`Glob`] postfix. /// - /// The invariant prefix contains no glob patterns nor other variant - /// components and therefore can be interpreted as a native path. The - /// [`Glob`] postfix is variant and contains the remaining components that - /// follow the prefix. For example, the glob expression `.local/**/*.log` - /// would produce the path `.local` and glob `**/*.log`. It is possible for + /// The invariant prefix contains no glob patterns nor other variant components and therefore + /// can be interpreted as a native path. The [`Glob`] postfix is variant and contains the + /// remaining components that follow the prefix. For example, the glob expression + /// `.local/**/*.log` would produce the path `.local` and glob `**/*.log`. It is possible for /// either partition to be empty. /// - /// Literal components may be considered variant if they contain characters - /// with casing and the configured case sensitivity differs from the target - /// platform's file system. For example, the case-insensitive literal - /// expression `(?i)photos` is considered variant on Unix and invariant on - /// Windows, because the literal `photos` resolves differently in Unix file - /// system APIs. + /// Literal components may be considered variant if they contain characters with casing and the + /// configured case sensitivity differs from the target platform's file system. For example, + /// the case-insensitive literal expression `(?i)photos` is considered variant on Unix and + /// invariant on Windows, because the literal `photos` resolves differently in Unix file system + /// APIs. /// - /// Partitioning a [`Glob`] allows any invariant prefix to be used as a - /// native path to establish a working directory or to interpret semantic - /// components that are not recognized by globs, such as parent directory - /// `..` components. + /// Partitioning a [`Glob`] allows any invariant prefix to be used as a native path to + /// establish a working directory or to interpret semantic components that are not recognized + /// by globs, such as parent directory `..` components. /// - /// Partitioned [`Glob`]s are never rooted. If the glob expression has a - /// root component, then it is always included in the invariant [`PathBuf`] - /// prefix. + /// Partitioned [`Glob`]s are never rooted. If the glob expression has a root component, then + /// it is always included in the invariant [`PathBuf`] prefix. /// /// # Examples /// - /// To match paths against a [`Glob`] while respecting semantic components, - /// the invariant prefix and candidate path can be canonicalized. The - /// following example canonicalizes both the working directory joined with - /// the prefix as well as the candidate path and then attempts to match the - /// [`Glob`] if the candidate path contains the prefix. + /// To match paths against a [`Glob`] while respecting semantic components, the invariant + /// prefix and candidate path can be canonicalized. The following example canonicalizes both + /// the working directory joined with the prefix as well as the candidate path and then + /// attempts to match the [`Glob`] if the candidate path contains the prefix. /// /// ```rust,no_run /// use dunce; // Avoids UNC paths on Windows. @@ -713,9 +687,8 @@ impl<'t> Glob<'t> { /// /// # Examples /// - /// `Glob`s borrow data in the corresponding glob expression. To move a - /// `Glob` beyond the scope of a glob expression, clone the data with this - /// function. + /// `Glob`s borrow data in the corresponding glob expression. To move a `Glob` beyond the scope + /// of a glob expression, clone the data with this function. /// /// ```rust /// use wax::{BuildError, Glob}; @@ -735,32 +708,29 @@ impl<'t> Glob<'t> { /// Gets an iterator over matching files in a directory tree. /// - /// This function matches a [`Glob`] against a directory tree, returning - /// each matching file as a [`WalkEntry`]. [`Glob`]s are the only patterns - /// that support this semantic operation; it is not possible to match - /// combinators over directory trees. + /// This function matches a [`Glob`] against a directory tree, returning each matching file as + /// a [`WalkEntry`]. [`Glob`]s are the only patterns that support this semantic operation; it + /// is not possible to match combinators over directory trees. /// - /// As with [`Path::join`] and [`PathBuf::push`], the base directory can be - /// escaped or overridden by rooted [`Glob`]s. In many cases, the current - /// working directory `.` is an appropriate base directory and will be - /// intuitively ignored if the [`Glob`] is rooted, such as in - /// `/mnt/media/**/*.mp4`. The [`has_root`] function can be used to check if - /// a [`Glob`] is rooted and the [`Walk::root`] function can be used to get - /// the resulting root directory of the traversal. + /// As with [`Path::join`] and [`PathBuf::push`], the base directory can be escaped or + /// overridden by rooted [`Glob`]s. In many cases, the current working directory `.` is an + /// appropriate base directory and will be intuitively ignored if the [`Glob`] is rooted, such + /// as in `/mnt/media/**/*.mp4`. The [`has_root`] function can be used to check if a [`Glob`] + /// is rooted and the [`Walk::root`] function can be used to get the resulting root directory + /// of the traversal. /// /// The [root directory][`Walk::root`] is established via the [invariant - /// prefix][`Glob::partition`] of the [`Glob`]. **The prefix and any - /// [semantic literals][`Glob::has_semantic_literals`] in this prefix are - /// interpreted semantically as a path**, so components like `.` and `..` - /// that precede variant patterns interact with the base directory - /// semantically. This means that expressions like `../**` escape the base + /// prefix][`Glob::partition`] of the [`Glob`]. **The prefix and any [semantic + /// literals][`Glob::has_semantic_literals`] in this prefix are interpreted semantically as a + /// path**, so components like `.` and `..` that precede variant patterns interact with the + /// base directory semantically. This means that expressions like `../**` escape the base /// directory as expected on Unix and Windows, for example. /// - /// This function uses the default [`WalkBehavior`]. To configure the - /// behavior of the traversal, see [`Glob::walk_with_behavior`]. + /// This function uses the default [`WalkBehavior`]. To configure the behavior of the + /// traversal, see [`Glob::walk_with_behavior`]. /// - /// Unlike functions in [`Pattern`], **this operation is semantic and - /// interacts with the file system**. + /// Unlike functions in [`Pattern`], **this operation is semantic and interacts with the file + /// system**. /// /// # Examples /// @@ -774,10 +744,9 @@ impl<'t> Glob<'t> { /// } /// ``` /// - /// Glob expressions do not support general negations, but the [`not`] - /// iterator adaptor can be used when walking a directory tree to filter - /// [`WalkEntry`]s using arbitary patterns. **This should generally be - /// preferred over functions like [`Iterator::filter`], because it avoids + /// Glob expressions do not support general negations, but the [`not`] iterator adaptor can be + /// used when walking a directory tree to filter [`WalkEntry`]s using arbitary patterns. **This + /// should generally be preferred over functions like [`Iterator::filter`], because it avoids /// unnecessary reads of directory trees when matching [exhaustive /// negations][`Pattern::is_exhaustive`].** /// @@ -815,13 +784,13 @@ impl<'t> Glob<'t> { /// Gets an iterator over matching files in a directory tree. /// - /// This function is the same as [`Glob::walk`], but it additionally accepts - /// a [`WalkBehavior`]. This can be used to configure how the traversal - /// interacts with symbolic links, the maximum depth from the root, etc. + /// This function is the same as [`Glob::walk`], but it additionally accepts a + /// [`WalkBehavior`]. This can be used to configure how the traversal interacts with symbolic + /// links, the maximum depth from the root, etc. /// - /// Depth is relative to the [root directory][`Walk::root`] of the - /// traversal, which is determined by joining the given path and any - /// [invariant prefix][`Glob::partition`] of the [`Glob`]. + /// Depth is relative to the [root directory][`Walk::root`] of the traversal, which is + /// determined by joining the given path and any [invariant prefix][`Glob::partition`] of the + /// [`Glob`]. /// /// See [`Glob::walk`] for more information. /// @@ -837,9 +806,9 @@ impl<'t> Glob<'t> { /// } /// ``` /// - /// By default, symbolic links are read as normal files and their targets - /// are ignored. To follow symbolic links and traverse any directories that - /// they reference, specify a [`LinkBehavior`]. + /// By default, symbolic links are read as normal files and their targets are ignored. To + /// follow symbolic links and traverse any directories that they reference, specify a + /// [`LinkBehavior`]. /// /// ```rust,no_run /// use wax::{Glob, LinkBehavior}; @@ -869,9 +838,9 @@ impl<'t> Glob<'t> { /// Gets **non-error** [`Diagnostic`]s. /// - /// This function requires a receiving [`Glob`] and so does not report - /// error-level [`Diagnostic`]s. It can be used to get non-error diagnostics - /// after constructing or [partitioning][`Glob::partition`] a [`Glob`]. + /// This function requires a receiving [`Glob`] and so does not report error-level + /// [`Diagnostic`]s. It can be used to get non-error diagnostics after constructing or + /// [partitioning][`Glob::partition`] a [`Glob`]. /// /// See [`Glob::diagnosed`]. /// @@ -887,11 +856,9 @@ impl<'t> Glob<'t> { /// Gets metadata for capturing sub-expressions. /// - /// This function returns an iterator over capturing tokens, which describe - /// the index and location of sub-expressions that capture [matched - /// text][`MatchedText`]. For example, in the expression `src/**/*.rs`, both - /// `**` and `*` form - /// captures. + /// This function returns an iterator over capturing tokens, which describe the index and + /// location of sub-expressions that capture [matched text][`MatchedText`]. For example, in the + /// expression `src/**/*.rs`, both `**` and `*` form captures. /// /// [`MatchedText`]: crate::MatchedText pub fn captures(&self) -> impl '_ + Clone + Iterator { @@ -909,9 +876,8 @@ impl<'t> Glob<'t> { /// Returns `true` if the glob has a root. /// - /// As with Unix paths, a glob expression has a root if it begins with a - /// separator `/`. Patterns other than separators may also root an - /// expression, such as `/**` or ``. + /// As with Unix paths, a glob expression has a root if it begins with a separator `/`. + /// Patterns other than separators may also root an expression, such as `/**` or ``. pub fn has_root(&self) -> bool { self.tree .as_ref() @@ -920,14 +886,13 @@ impl<'t> Glob<'t> { .map_or(false, Token::has_root) } - /// Returns `true` if the glob has literals that have non-nominal semantics - /// on the target platform. + /// Returns `true` if the glob has literals that have non-nominal semantics on the target + /// platform. /// - /// The most notable semantic literals are the relative path components `.` - /// and `..`, which refer to a current and parent directory on Unix and - /// Windows operating systems, respectively. These are interpreted as - /// literals in glob expressions, and so only logically match paths that - /// contain these exact nominal components (semantic meaning is lost). + /// The most notable semantic literals are the relative path components `.` and `..`, which + /// refer to a current and parent directory on Unix and Windows operating systems, + /// respectively. These are interpreted as literals in glob expressions, and so only logically + /// match paths that contain these exact nominal components (semantic meaning is lost). /// /// See [`Glob::partition`]. /// @@ -986,8 +951,8 @@ impl<'t> Combine<'t> for Glob<'t> { /// Combinator that matches any of its component [`Pattern`]s. /// -/// An instance of `Any` is constructed using the [`any`] function, which -/// combines multiple [`Pattern`]s for more ergonomic and efficient matching. +/// An instance of `Any` is constructed using the [`any`] function, which combines multiple +/// [`Pattern`]s for more ergonomic and efficient matching. /// /// [`any`]: crate::any /// [`Pattern`]: crate::Pattern @@ -1027,27 +992,23 @@ impl<'t> Combine<'t> for Any<'t> { type Error = Infallible; } -// TODO: It may be useful to use dynamic dispatch via trait objects instead. -// This would allow for a variety of types to be composed in an `any` call -// and would be especially useful if additional combinators are -// introduced. +// TODO: It may be useful to use dynamic dispatch via trait objects instead. This would allow for a +// variety of types to be composed in an `any` call and would be especially useful if +// additional combinators are introduced. /// Constructs a combinator that matches if any of its input [`Pattern`]s match. /// -/// This function accepts an [`IntoIterator`] with items that implement -/// [`Combine`], such as [`Glob`] and `&str`. The output [`Any`] implements -/// [`Pattern`] by matching its component [`Pattern`]s. [`Any`] is often more -/// ergonomic and efficient than matching individually against multiple -/// [`Pattern`]s. +/// This function accepts an [`IntoIterator`] with items that implement [`Combine`], such as +/// [`Glob`] and `&str`. The output [`Any`] implements [`Pattern`] by matching its component +/// [`Pattern`]s. [`Any`] is often more ergonomic and efficient than matching individually against +/// multiple [`Pattern`]s. /// -/// [`Any`] groups all captures and therefore only exposes the complete text of -/// a match. It is not possible to index a particular capturing token in the -/// component patterns. Combinators only support logical matching and cannot be -/// used to semantically match (walk) a directory tree. +/// [`Any`] groups all captures and therefore only exposes the complete text of a match. It is not +/// possible to index a particular capturing token in the component patterns. Combinators only +/// support logical matching and cannot be used to semantically match (walk) a directory tree. /// /// # Examples /// -/// To match a path against multiple patterns, the patterns can first be -/// combined into an [`Any`]. +/// To match a path against multiple patterns, the patterns can first be combined into an [`Any`]. /// /// ```rust /// use wax::{Glob, Pattern}; @@ -1062,8 +1023,7 @@ impl<'t> Combine<'t> for Any<'t> { /// assert!(any.is_match("src/lib.rs")); /// ``` /// -/// [`Glob`]s and other compiled [`Pattern`]s can also be composed into an -/// [`Any`]. +/// [`Glob`]s and other compiled [`Pattern`]s can also be composed into an [`Any`]. /// /// ```rust /// use wax::{Glob, Pattern}; @@ -1073,8 +1033,8 @@ impl<'t> Combine<'t> for Any<'t> { /// assert!(wax::any([red, blue]).unwrap().is_match("red/potion.txt")); /// ``` /// -/// This function can only combine patterns of the same type, but intermediate -/// combinators can be used to combine different types into a single combinator. +/// This function can only combine patterns of the same type, but intermediate combinators can be +/// used to combine different types into a single combinator. /// /// ```rust /// use wax::{Glob, Pattern}; @@ -1099,9 +1059,8 @@ impl<'t> Combine<'t> for Any<'t> { /// /// # Errors /// -/// Returns an error if any of the inputs fail to build. If the inputs are a -/// compiled [`Pattern`] type such as [`Glob`], then this only occurs if the -/// compiled program is too large. +/// Returns an error if any of the inputs fail to build. If the inputs are a compiled [`Pattern`] +/// type such as [`Glob`], then this only occurs if the compiled program is too large. /// /// [`Any`]: crate::Any /// [`Combine`]: crate::Combine @@ -1126,14 +1085,13 @@ where /// Escapes text as a literal glob expression. /// -/// This function escapes any and all meta-characters in the given string, such -/// that all text is interpreted as a literal or separator when read as a glob -/// expression. +/// This function escapes any and all meta-characters in the given string, such that all text is +/// interpreted as a literal or separator when read as a glob expression. /// /// # Examples /// -/// This function can be used to escape opaque strings, such as a string -/// obtained from a user that must be interpreted literally. +/// This function can be used to escape opaque strings, such as a string obtained from a user that +/// must be interpreted literally. /// /// ```rust /// use wax::Glob; @@ -1149,9 +1107,8 @@ where /// if let Ok(glob) = Glob::new(&expression) { /* ... */ } /// ``` /// -/// Sometimes part of a path contains numerous meta-characters. This function -/// can be used to reliably escape them while making the unescaped part of the -/// expression a bit easier to read. +/// Sometimes part of a path contains numerous meta-characters. This function can be used to +/// reliably escape them while making the unescaped part of the expression a bit easier to read. /// /// ```rust /// use wax::Glob; @@ -1159,8 +1116,8 @@ where /// let expression = format!("{}{}", "logs/**/", wax::escape("ingest[01](L).txt")); /// let glob = Glob::new(&expression).unwrap(); /// ``` -// It is possible to call this function using a mutable reference, which may -// appear to mutate the parameter in place. +// It is possible to call this function using a mutable reference, which may appear to mutate the +// parameter in place. #[must_use] pub fn escape(unescaped: &str) -> Cow { const ESCAPE: char = '\\'; @@ -1183,10 +1140,9 @@ pub fn escape(unescaped: &str) -> Cow { // TODO: Is it possible for `:` and `,` to be contextual meta-characters? /// Returns `true` if the given character is a meta-character. /// -/// This function does **not** return `true` for contextual meta-characters that -/// may only be escaped in particular contexts, such as hyphens `-` in character -/// class expressions. To detect these characters, use -/// [`is_contextual_meta_character`]. +/// This function does **not** return `true` for contextual meta-characters that may only be +/// escaped in particular contexts, such as hyphens `-` in character class expressions. To detect +/// these characters, use [`is_contextual_meta_character`]. /// /// [`is_contextual_meta_character`]: crate::is_contextual_meta_character pub const fn is_meta_character(x: char) -> bool { @@ -1198,10 +1154,9 @@ pub const fn is_meta_character(x: char) -> bool { /// Returns `true` if the given character is a contextual meta-character. /// -/// Contextual meta-characters may only be escaped in particular contexts, such -/// as hyphens `-` in character class expressions. Elsewhere, they are -/// interpreted as literals. To detect non-contextual meta-characters, use -/// [`is_meta_character`]. +/// Contextual meta-characters may only be escaped in particular contexts, such as hyphens `-` in +/// character class expressions. Elsewhere, they are interpreted as literals. To detect +/// non-contextual meta-characters, use [`is_meta_character`]. /// /// [`is_meta_character`]: crate::is_meta_character pub const fn is_contextual_meta_character(x: char) -> bool { @@ -1227,9 +1182,8 @@ fn parse_and_diagnose(expression: &str) -> DiagnosticResult> }) } -// TODO: Construct paths from components in tests. In practice, using string -// literals works, but is technically specific to platforms that support -// `/` as a separator. +// TODO: Construct paths from components in tests. In practice, using string literals works, but is +// technically specific to platforms that support `/` as a separator. #[cfg(test)] mod tests { use std::path::Path; @@ -1455,9 +1409,9 @@ mod tests { assert!(Glob::new("a/[a-z-]/c").is_err()); assert!(Glob::new("a/[-a-z]/c").is_err()); assert!(Glob::new("a/[-]/c").is_err()); - // NOTE: Without special attention to escaping and character parsing, - // this could be mistakenly interpreted as an empty range over the - // character `-`. This should be rejected. + // NOTE: Without special attention to escaping and character parsing, this could be + // mistakenly interpreted as an empty range over the character `-`. This should be + // rejected. assert!(Glob::new("a/[---]/c").is_err()); assert!(Glob::new("a/[[]/c").is_err()); assert!(Glob::new("a/[]]/c").is_err()); @@ -1528,8 +1482,8 @@ mod tests { assert!(Glob::new("/").is_err()); } - // Rooted repetitions are rejected if their lower bound is zero; any other - // lower bound is accepted. + // Rooted repetitions are rejected if their lower bound is zero; any other lower bound is + // accepted. #[test] fn reject_glob_with_rooted_repetition_tokens() { assert!(Glob::new("maybe").is_err()); @@ -1620,8 +1574,8 @@ mod tests { assert!(!glob.is_match(Path::new("a/bb"))); assert!(!glob.is_match(Path::new("a/b/c"))); - // There are no variant tokens with which to capture, but the matched - // text should always be available. + // There are no variant tokens with which to capture, but the matched text should always be + // available. assert_eq!( "a/b", glob.matched(&CandidatePath::from(Path::new("a/b"))) @@ -1725,9 +1679,8 @@ mod tests { #[cfg(any(unix, windows))] #[test] fn match_glob_with_empty_class_tokens() { - // A character class is "empty" if it only matches separators on the - // target platform. Such a character class only matches `NUL` and so - // effectively matches nothing. + // A character class is "empty" if it only matches separators on the target platform. Such + // a character class only matches `NUL` and so effectively matches nothing. let glob = Glob::new("a[/]b").unwrap(); assert!(!glob.is_match(Path::new("a/b"))); @@ -2051,13 +2004,13 @@ mod tests { assert!(Glob::new("").unwrap().has_root()); assert!(!Glob::new("").unwrap().has_root()); - // This is not rooted, because character classes may not match - // separators. This example compiles an "empty" character class, which - // attempts to match `NUL` and so effectively matches nothing. + // This is not rooted, because character classes may not match separators. This example + // compiles an "empty" character class, which attempts to match `NUL` and so effectively + // matches nothing. #[cfg(any(unix, windows))] assert!(!Glob::new("[/]root").unwrap().has_root()); - // The leading forward slash in tree tokens is meaningful. When omitted, - // at the beginning of an expression, the resulting glob is not rooted. + // The leading forward slash in tree tokens is meaningful. When omitted, at the beginning + // of an expression, the resulting glob is not rooted. assert!(!Glob::new("**/").unwrap().has_root()); } diff --git a/src/rule.rs b/src/rule.rs index 92416ee..cfbfe1f 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -1,17 +1,15 @@ //! Rules and limitations for token sequences. //! -//! This module provides the `check` function, which examines a token sequence -//! and emits an error if the sequence violates rules. Rules are invariants that -//! are difficult or impossible to enforce when parsing text and primarily -//! detect and reject token sequences that produce anomalous, meaningless, or -//! unexpected globs (regular expressions) when compiled. +//! This module provides the `check` function, which examines a token sequence and emits an error +//! if the sequence violates rules. Rules are invariants that are difficult or impossible to +//! enforce when parsing text and primarily detect and reject token sequences that produce +//! anomalous, meaningless, or unexpected globs (regular expressions) when compiled. //! -//! Most rules concern alternatives, which have complex interactions with -//! neighboring tokens. +//! Most rules concern alternatives, which have complex interactions with neighboring tokens. -// TODO: The `check` function fails fast and either report no errors or exactly -// one error. To better support diagnostics, `check` should probably -// perform an exhaustive analysis and report zero or more errors. +// TODO: The `check` function fails fast and either report no errors or exactly one error. To +// better support diagnostics, `check` should probably perform an exhaustive analysis and +// report zero or more errors. use itertools::Itertools as _; #[cfg(feature = "miette")] @@ -31,13 +29,12 @@ use crate::{Any, BuildError, Combine, Glob}; /// Maximum invariant size. /// -/// This size is equal to or greater than the maximum size of a path on -/// supported platforms. The primary purpose of this limit is to mitigate -/// malicious or mistaken expressions that encode very large invariant text, -/// namely via repetitions. +/// This size is equal to or greater than the maximum size of a path on supported platforms. The +/// primary purpose of this limit is to mitigate malicious or mistaken expressions that encode very +/// large invariant text, namely via repetitions. /// -/// This limit is independent of the back end encoding. This code does not rely -/// on errors in the encoder by design, such as size limitations. +/// This limit is independent of the back end encoding. This code does not rely on errors in the +/// encoder by design, such as size limitations. const MAX_INVARIANT_SIZE: InvariantSize = InvariantSize::new(0x10000); trait IteratorExt: Iterator + Sized { @@ -174,10 +171,9 @@ impl Terminals { /// Describes errors concerning rules and patterns in a glob expression. /// /// Patterns must follow rules described in the [repository -/// documentation](https://github.com/olson-sean-k/wax/blob/master/README.md). -/// These rules are designed to avoid nonsense glob expressions and ambiguity. -/// If a glob expression parses but violates these rules or is otherwise -/// malformed, then this error is returned by some APIs. +/// documentation](https://github.com/olson-sean-k/wax/blob/master/README.md). These rules are +/// designed to avoid nonsense glob expressions and ambiguity. If a glob expression parses but +/// violates these rules or is otherwise malformed, then this error is returned by some APIs. #[derive(Debug, Error)] #[error("malformed glob expression: {kind}")] pub struct RuleError<'t> { @@ -290,12 +286,10 @@ impl<'t> Checked> { I: IntoIterator>, { Checked { - // `token::any` constructs an alternative from the input token - // trees. The alternative is not checked, but the `any` combinator - // is explicitly allowed to ignore the subset of rules that may be - // violated by this construction. In particular, branches may or may - // not have roots such that the alternative can match overlapping - // directory trees. + // `token::any` constructs an alternative from the input token trees. The alternative + // is not checked, but the `any` combinator is explicitly allowed to ignore the subset + // of rules that may be violated by this construction. In particular, branches may or + // may not have roots such that the alternative can match overlapping directory trees. inner: token::any( trees .into_iter() @@ -476,8 +470,8 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { } fn diagnose<'i, 't>( - // This is a somewhat unusual API, but it allows the lifetime `'t` of - // the `Cow` to be properly forwarded to output values (`RuleError`). + // This is a somewhat unusual API, but it allows the lifetime `'t` of the `Cow` to be + // properly forwarded to output values (`RuleError`). #[allow(clippy::ptr_arg)] expression: &'i Cow<'t, str>, token: &'i Token<'t>, label: &'static str, @@ -495,8 +489,8 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { } fn recurse<'i, 't, I>( - // This is a somewhat unusual API, but it allows the lifetime `'t` of - // the `Cow` to be properly forwarded to output values (`RuleError`). + // This is a somewhat unusual API, but it allows the lifetime `'t` of the `Cow` to be + // properly forwarded to output values (`RuleError`). #[allow(clippy::ptr_arg)] expression: &Cow<'t, str>, tokens: I, outer: Outer<'i, 't>, @@ -541,8 +535,7 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { ) -> Result<(), CorrelatedError> { let Outer { left, right } = outer; match terminals.map(|token| (token, token.kind())) { - // The group is preceded by component boundaries; disallow leading - // separators. + // The group is preceded by component boundaries; disallow leading separators. // // For example, `foo/{bar,/}`. Only((inner, Separator(_))) | StartEnd((inner, Separator(_)), _) @@ -554,8 +547,7 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { inner, )) }, - // The group is followed by component boundaries; disallow trailing - // separators. + // The group is followed by component boundaries; disallow trailing separators. // // For example, `{foo,/}/bar`. Only((inner, Separator(_))) | StartEnd(_, (inner, Separator(_))) @@ -575,8 +567,7 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { None, inner, )), - // The group is preceded by component boundaries; disallow leading - // tree tokens. + // The group is preceded by component boundaries; disallow leading tree tokens. // // For example, `foo/{bar,**/baz}`. StartEnd((inner, Wildcard(Tree { .. })), _) if has_ending_component_boundary(left) => { @@ -586,8 +577,7 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { inner, )) }, - // The group is followed by component boundaries; disallow trailing - // tree tokens. + // The group is followed by component boundaries; disallow trailing tree tokens. // // For example, `{foo,bar/**}/baz`. StartEnd(_, (inner, Wildcard(Tree { .. }))) @@ -599,8 +589,7 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { inner, )) }, - // The group is prefixed by a zero-or-more token; disallow leading - // zero-or-more tokens. + // The group is prefixed by a zero-or-more token; disallow leading zero-or-more tokens. // // For example, `foo*{bar,*,baz}`. Only((inner, Wildcard(ZeroOrMore(_)))) @@ -613,8 +602,8 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { inner, )) }, - // The group is followed by a zero-or-more token; disallow trailing - // zero-or-more tokens. + // The group is followed by a zero-or-more token; disallow trailing zero-or-more + // tokens. // // For example, `{foo,*,bar}*baz`. Only((inner, Wildcard(ZeroOrMore(_)))) @@ -637,8 +626,7 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { ) -> Result<(), CorrelatedError> { let Outer { left, .. } = outer; match terminals.map(|token| (token, token.kind())) { - // The alternative is preceded by a termination; disallow rooted - // sub-globs. + // The alternative is preceded by a termination; disallow rooted sub-globs. // // For example, `{foo,/}` or `{foo,/bar}`. Only((inner, Separator(_))) | StartEnd((inner, Separator(_)), _) if left.is_none() => { @@ -648,8 +636,7 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { inner, )) }, - // The alternative is preceded by a termination; disallow rooted - // sub-globs. + // The alternative is preceded by a termination; disallow rooted sub-globs. // // For example, `{/**/foo,bar}`. Only((inner, Wildcard(Tree { has_root: true }))) @@ -674,8 +661,8 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { let Outer { left, .. } = outer; let (lower, _) = bounds; match terminals.map(|token| (token, token.kind())) { - // The repetition is preceded by a termination; disallow rooted - // sub-globs with a zero lower bound. + // The repetition is preceded by a termination; disallow rooted sub-globs with a zero + // lower bound. // // For example, ``. Only((inner, Separator(_))) | StartEnd((inner, Separator(_)), _) @@ -687,8 +674,8 @@ fn group<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { inner, )) }, - // The repetition is preceded by a termination; disallow rooted - // sub-globs with a zero lower bound. + // The repetition is preceded by a termination; disallow rooted sub-globs with a zero + // lower bound. // // For example, ``. Only((inner, Wildcard(Tree { has_root: true }))) @@ -758,10 +745,9 @@ fn bounds<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { fn size<'t>(tokenized: &Tokenized<'t>) -> Result<(), RuleError<'t>> { if let Some((_, token)) = tokenized .walk() - // TODO: This is expensive. For each token tree encountered, the - // tree is traversed to determine its variance. If variant, - // the tree is traversed and queried again, revisiting the - // same tokens to recompute their local variance. + // TODO: This is expensive. For each token tree encountered, the tree is traversed to + // determine its variance. If variant, the tree is traversed and queried again, + // revisiting the same tokens to recompute their local variance. .find(|(_, token)| { token .variance::() diff --git a/src/token/mod.rs b/src/token/mod.rs index 3a58c70..f171abf 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -84,18 +84,16 @@ impl<'t> Tokenized<'t, Annotation> { .map(|token| token.annotation().1) .sum(); - // Drain invariant tokens from the beginning of the token sequence and - // unroot any tokens at the beginning of the variant sequence (tree - // wildcards). Finally, translate spans and discard the corresponding - // invariant bytes in the expression. + // Drain invariant tokens from the beginning of the token sequence and unroot any tokens at + // the beginning of the variant sequence (tree wildcards). Finally, translate spans and + // discard the corresponding invariant bytes in the expression. tokens.drain(0..n); if tokens.first_mut().map_or(false, Token::unroot) { - // TODO: The relationship between roots, the unrooting operation, - // and the span in an expression that represents such a root - // (if any) is not captured by these APIs very well. Perhaps - // `unroot` should do more here? - // Pop additional bytes for the root separator expression if the - // initial token has lost a root. + // TODO: The relationship between roots, the unrooting operation, and the span in an + // expression that represents such a root (if any) is not captured by these APIs + // very well. Perhaps `unroot` should do more here? + // Pop additional bytes for the root separator expression if the initial token has lost + // a root. offset += ROOT_SEPARATOR_EXPRESSION.len(); } for token in tokens.iter_mut() { @@ -508,9 +506,9 @@ impl<'i, 't> UnitVariance> for &'i Archetype { impl<'i> UnitVariance for &'i Archetype { fn unit_variance(self) -> Variance { - // This is pessimistic and assumes that the code point will require four - // bytes when encoded as UTF-8. This is technically possible, but most - // commonly only one or two bytes will be required. + // This is pessimistic and assumes that the code point will require four bytes when encoded + // as UTF-8. This is technically possible, but most commonly only one or two bytes will be + // required. self.domain_variance().map_invariance(|_| 4.into()) } } @@ -542,9 +540,9 @@ where { fn unit_variance(self) -> Variance { if self.is_negated { - // It is not feasible to encode a character class that matches all - // UTF-8 text and therefore nothing when negated, and so a character - // class must be variant if it is negated. + // It is not feasible to encode a character class that matches all UTF-8 text and + // therefore nothing when negated, and so a character class must be variant if it is + // negated. Variance::Variant(Boundedness::Closed) } else { @@ -585,9 +583,8 @@ impl<'t> Literal<'t> { } pub fn has_variant_casing(&self) -> bool { - // If path case sensitivity agrees with the literal case sensitivity, - // then the literal is not variant. Otherwise, the literal is variant if - // it contains characters with casing. + // If path case sensitivity agrees with the literal case sensitivity, then the literal is + // not variant. Otherwise, the literal is variant if it contains characters with casing. (PATHS_ARE_CASE_INSENSITIVE != self.is_case_insensitive) && self.text.has_casing() } } @@ -614,11 +611,10 @@ impl<'i, 't> UnitVariance for &'i Literal<'t> { pub struct Repetition<'t, A = ()> { tokens: Vec>, lower: usize, - // This representation is not ideal, as it does not statically enforce the - // invariant that the upper bound is greater than or equal to the lower - // bound. For example, this field could instead be a summand. However, - // tokens must closely resemble their glob expression representations so - // that errors in expressions can be deferred and presented more clearly. + // This representation is not ideal, as it does not statically enforce the invariant that the + // upper bound is greater than or equal to the lower bound. For example, this field could + // instead be a summand. However, tokens must closely resemble their glob expression + // representations so that errors in expressions can be deferred and presented more clearly. // Failures in the parser are difficult to describe. upper: Option, } @@ -698,10 +694,10 @@ where let variance = self .tokens() .iter() - // Coalesce tokens with open variance with separators. This isn't - // destructive and doesn't affect invariance, because this only - // happens in the presence of open variance, which means that the - // repetition is variant (and has no invariant size or text). + // Coalesce tokens with open variance with separators. This isn't destructive and + // doesn't affect invariance, because this only happens in the presence of open + // variance, which means that the repetition is variant (and has no invariant size or + // text). .coalesce(|left, right| { match ( (left.kind(), left.unit_variance()), @@ -714,13 +710,12 @@ where }) .conjunctive_variance(); match self.upper { - // Repeating invariance can cause overflows, very large allocations, - // and very inefficient comparisons (e.g., comparing very large - // strings). This is detected by both `encode::compile` and - // `rule::check` (in distinct but similar ways). Querying token - // trees for their invariance must be done with care (after using - // these functions) to avoid expanding pathological invariant - // expressions like ``. + // Repeating invariance can cause overflows, very large allocations, and very + // inefficient comparisons (e.g., comparing very large strings). This is detected by + // both `encode::compile` and `rule::check` (in distinct but similar ways). Querying + // token trees for their invariance must be done with care (after using these + // functions) to avoid expanding pathological invariant expressions like + // ``. Some(_) if self.is_converged() => { variance.map_invariance(|invariance| invariance * self.lower) }, diff --git a/src/token/parse.rs b/src/token/parse.rs index 4b6a85f..e883d76 100644 --- a/src/token/parse.rs +++ b/src/token/parse.rs @@ -89,9 +89,9 @@ impl LocatedError for ErrorEntry<'_> { /// Describes errors that occur when parsing a glob expression. /// -/// Common examples of glob expressions that cannot be parsed are alternative -/// and repetition patterns with missing delimiters and ambiguous patterns, such -/// as `src/***/*.rs` or `{.local,.config/**/*.toml`. +/// Common examples of glob expressions that cannot be parsed are alternative and repetition +/// patterns with missing delimiters and ambiguous patterns, such as `src/***/*.rs` or +/// `{.local,.config/**/*.toml`. #[derive(Clone, Debug, Error)] #[error("failed to parse glob expression")] pub struct ParseError<'t> { diff --git a/src/token/variance.rs b/src/token/variance.rs index bf7a6b6..249fd49 100644 --- a/src/token/variance.rs +++ b/src/token/variance.rs @@ -49,10 +49,9 @@ where T: Invariance, { fn disjunctive_variance(self) -> Variance { - // TODO: This implementation is incomplete. Unbounded variance (and - // unbounded depth) are "infectious" when disjunctive. If any unit - // variance is variant and unbounded (open), then the disjunctive - // variance should be the same. + // TODO: This implementation is incomplete. Unbounded variance (and unbounded depth) are + // "infectious" when disjunctive. If any unit variance is variant and unbounded + // (open), then the disjunctive variance should be the same. // There are three distinct possibilities for disjunctive variance. // // - The iterator is empty and there are no unit variances to @@ -221,10 +220,9 @@ impl Mul for InvariantSize { } } -// TODO: The derived `PartialEq` implementation is incomplete and does not -// detect contiguous like fragments that are equivalent to an aggregated -// fragment. This works, but relies on constructing `InvariantText` by -// consistently appending fragments. +// TODO: The derived `PartialEq` implementation is incomplete and does not detect contiguous like +// fragments that are equivalent to an aggregated fragment. This works, but relies on +// constructing `InvariantText` by consistently appending fragments. #[derive(Clone, Debug, Eq, PartialEq)] pub struct InvariantText<'t> { fragments: VecDeque>, @@ -384,13 +382,11 @@ impl<'t> PartialEq for InvariantFragment<'t> { match (self, other) { (Nominal(ref left), Nominal(ref right)) => { if PATHS_ARE_CASE_INSENSITIVE { - // This comparison uses Unicode simple case folding. It - // would be better to use full case folding (and better - // still to use case folding appropriate for the language of - // the text), but this approach is used to have consistent - // results with the regular expression encoding of compiled - // globs. A more comprehensive alternative would be to use - // something like the `focaccia` crate. See also + // This comparison uses Unicode simple case folding. It would be better to use + // full case folding (and better still to use case folding appropriate for the + // language of the text), but this approach is used to have consistent results + // with the regular expression encoding of compiled globs. A more comprehensive + // alternative would be to use something like the `focaccia` crate. See also // `CharExt::has_casing`. encode::case_folded_eq(left.as_ref(), right.as_ref()) } @@ -423,16 +419,13 @@ impl Boundedness { #[derive(Clone, Debug, Eq)] pub enum Variance { Invariant(T), - // NOTE: In this context, _boundedness_ refers to whether or not a variant - // token or expression is _constrained_ or _unconstrained_. For - // example, the expression `**` is unconstrained and matches _any and - // all_, while the expression `a*z` is constrained and matches _some_. - // Note that both expressions match an infinite number of components, - // but the constrained expression does *not* match any component. - // Boundedness does **not** consider length, only whether or not some - // part of an expression is constrained to a known set of matches. As - // such, both the expressions `?` and `*` are variant with open - // bounds. + // In this context, _boundedness_ refers to whether or not a variant token or expression is + // _constrained_ or _unconstrained_. For example, the expression `**` is unconstrained and + // matches _any and all_, while the expression `a*z` is constrained and matches _some_. Note + // that both expressions match an infinite number of components, but the constrained expression + // does *not* match any component. Boundedness does **not** consider length, only whether or + // not some part of an expression is constrained to a known set of matches. As such, both the + // expressions `?` and `*` are variant with open bounds. Variant(Boundedness), } @@ -498,8 +491,7 @@ where } } -// TODO: Is there some way to unify this with -// `invariant_text_prefix_upper_bound`? +// TODO: Is there some way to unify this with `invariant_text_prefix_upper_bound`? pub fn invariant_text_prefix<'t, A, I>(tokens: I) -> String where A: 't, @@ -512,13 +504,12 @@ where .peek() .map_or(false, |token| !token.has_sub_tokens() && token.has_root()) { - // Push a preceding separator if the first token has a root and is not a - // group. This ensures that initiating separators and tree wildcards - // express a root in invariant prefixes. + // Push a preceding separator if the first token has a root and is not a group. This + // ensures that initiating separators and tree wildcards express a root in invariant + // prefixes. prefix.push_str(separator); } - // TODO: Replace `map`, `take_while`, and `flatten` with `map_while` - // when it stabilizes. + // TODO: Replace `map`, `take_while`, and `flatten` with `map_while` when it stabilizes. prefix.push_str( &token::components(tokens) .map(|component| { @@ -570,8 +561,8 @@ where /// Returns `true` if the token tree is exhaustive. /// -/// A glob expression and its token tree are exhaustive if the terminal -/// component has unbounded depth and unbounded variance. +/// A glob expression and its token tree are exhaustive if the terminal component has unbounded +/// depth and unbounded variance. pub fn is_exhaustive<'i, 't, A, I>(tokens: I) -> bool where 't: 'i, diff --git a/src/walk.rs b/src/walk.rs index d02662a..ca991ee 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -16,8 +16,7 @@ use crate::{BuildError, CandidatePath, Combine, Glob}; pub type WalkItem<'e> = Result, WalkError>; -/// Describes errors that occur when matching a [`Glob`] against a directory -/// tree. +/// Describes errors that occur when matching a [`Glob`] against a directory tree. /// /// `WalkError` implements conversion into [`io::Error`]. /// @@ -39,8 +38,7 @@ impl WalkError { self.kind.path() } - /// Gets the depth from [the root][`Walk::root`] at which the error - /// occurred. + /// Gets the depth from [the root][`Walk::root`] at which the error occurred. /// /// [`Walk::root`]: crate::Walk::root pub fn depth(&self) -> usize { @@ -109,20 +107,19 @@ impl WalkErrorKind { /// Traverses a directory tree via a `Walk` instance. /// -/// This macro emits an interruptable loop that executes a block of code -/// whenever a `WalkEntry` or error is encountered while traversing a directory -/// tree. The block may return from its function or otherwise interrupt and -/// subsequently resume the loop. +/// This macro emits an interruptable loop that executes a block of code whenever a `WalkEntry` or +/// error is encountered while traversing a directory tree. The block may return from its function +/// or otherwise interrupt and subsequently resume the loop. /// -/// Note that if the block attempts to emit a `WalkEntry` across a function -/// boundary, then the entry contents must be copied via `into_owned`. +/// Note that if the block attempts to emit a `WalkEntry` across a function boundary, then the +/// entry contents must be copied via `into_owned`. macro_rules! walk { ($state:expr => |$entry:ident| $f:block) => { use itertools::EitherOrBoth::{Both, Left, Right}; use itertools::Position::{First, Last, Middle, Only}; - // `while-let` avoids a mutable borrow of `walk`, which would prevent a - // subsequent call to `skip_current_dir` within the loop body. + // `while-let` avoids a mutable borrow of `walk`, which would prevent a subsequent call to + // `skip_current_dir` within the loop body. #[allow(clippy::while_let_on_iterator)] #[allow(unreachable_code)] 'walk: while let Some(entry) = $state.walk.next() { @@ -152,8 +149,8 @@ macro_rules! walk { match (position, candidate) { (First | Middle, Both(component, pattern)) => { if !pattern.is_match(component.as_ref()) { - // Do not descend into directories that do not match - // the corresponding component pattern. + // Do not descend into directories that do not match the corresponding + // component pattern. if entry.file_type().is_dir() { $state.walk.skip_current_dir(); } @@ -174,8 +171,8 @@ macro_rules! walk { } } else { - // Do not descend into directories that do not match - // the corresponding component pattern. + // Do not descend into directories that do not match the corresponding + // component pattern. if entry.file_type().is_dir() { $state.walk.skip_current_dir(); } @@ -200,9 +197,8 @@ macro_rules! walk { } } } - // If the loop is not entered, check for a match. This may indicate - // that the `Glob` is empty and a single invariant path may be - // matched. + // If the loop is not entered, check for a match. This may indicate that the `Glob` is + // empty and a single invariant path may be matched. let path = CandidatePath::from(path); if let Some(matched) = $state.pattern.captures(path.as_ref()).map(MatchedText::from) { let $entry = Ok(WalkEntry { @@ -217,13 +213,12 @@ macro_rules! walk { /// An [`Iterator`] over [`WalkEntry`]s that can filter directory trees. /// -/// A `FileIterator` is a `TreeIterator` that yields [`WalkEntry`]s. This trait -/// is implemented by [`Walk`] and adaptors like [`FilterTree`]. A -/// `TreeIterator` is an iterator that reads its items from a tree and therefore -/// can meaningfully filter not only items but their corresponding sub-trees to -/// avoid unnecessary work. To that end, this trait provides the `filter_tree` -/// function, which allows directory trees to be discarded (not read from the -/// file system) when matching [`Glob`]s against directory trees. +/// A `FileIterator` is a `TreeIterator` that yields [`WalkEntry`]s. This trait is implemented by +/// [`Walk`] and adaptors like [`FilterTree`]. A `TreeIterator` is an iterator that reads its items +/// from a tree and therefore can meaningfully filter not only items but their corresponding +/// sub-trees to avoid unnecessary work. To that end, this trait provides the `filter_tree` +/// function, which allows directory trees to be discarded (not read from the file system) when +/// matching [`Glob`]s against directory trees. /// /// [`filter_tree`]: crate::FileIterator::filter_tree /// [`Glob`]: crate::Glob @@ -233,28 +228,24 @@ macro_rules! walk { pub trait FileIterator: Sized + TreeIterator> { /// Filters [`WalkEntry`]s and controls the traversal of directory trees. /// - /// This function creates an adaptor that filters [`WalkEntry`]s and - /// furthermore specifies how iteration proceeds to traverse directory - /// trees. The adaptor accepts a function that, when discarding a - /// [`WalkEntry`], yields a [`FilterTarget`]. **If the entry refers to a - /// directory and [`FilterTarget::Tree`] is returned by the function, then - /// iteration will not descend into that directory and the tree will not be - /// read from the file system.** Therefore, this adaptor should be preferred - /// over functions like [`Iterator::filter`] when discarded directories do - /// not need to be read. + /// This function creates an adaptor that filters [`WalkEntry`]s and furthermore specifies how + /// iteration proceeds to traverse directory trees. The adaptor accepts a function that, when + /// discarding a [`WalkEntry`], yields a [`FilterTarget`]. **If the entry refers to a directory + /// and [`FilterTarget::Tree`] is returned by the function, then iteration will not descend + /// into that directory and the tree will not be read from the file system.** Therefore, this + /// adaptor should be preferred over functions like [`Iterator::filter`] when discarded + /// directories do not need to be read. /// - /// Errors are not filtered, so if an error occurs reading a file at a path - /// that would have been discarded, then that error is still yielded by the - /// iterator. + /// Errors are not filtered, so if an error occurs reading a file at a path that would have + /// been discarded, then that error is still yielded by the iterator. /// /// # Examples /// - /// The [`FilterTree`] adaptor can be used to apply additional custom - /// filtering that avoids unnecessary directory reads. The following example - /// filters out hidden files on Unix and Windows. On Unix, hidden files are - /// filtered out nominally via [`not`]. On Windows, `filter_tree` is used to - /// detect the [hidden attribute][attributes]. In both cases, the adaptor - /// does not read conventionally hidden directory trees. + /// The [`FilterTree`] adaptor can be used to apply additional custom filtering that avoids + /// unnecessary directory reads. The following example filters out hidden files on Unix and + /// Windows. On Unix, hidden files are filtered out nominally via [`not`]. On Windows, + /// `filter_tree` is used to detect the [hidden attribute][attributes]. In both cases, the + /// adaptor does not read conventionally hidden directory trees. /// /// ```rust,no_run /// use wax::Glob; @@ -263,8 +254,8 @@ pub trait FileIterator: Sized + TreeIterator> { /// /// let glob = Glob::new("**/*.(?i){jpg,jpeg}").unwrap(); /// let walk = glob.walk("./Pictures"); - /// // Filter out nominally hidden files on Unix. Like `filter_tree`, `not` - /// // does not perform unnecessary reads of directory trees. + /// // Filter out nominally hidden files on Unix. Like `filter_tree`, `not` does not perform + /// // unnecessary reads of directory trees. /// #[cfg(unix)] /// let walk = walk.not(["**/.*/**"]).unwrap(); /// // Filter out files with the hidden attribute on Windows. @@ -327,9 +318,9 @@ impl TreeIterator for walkdir::IntoIter { /// Negated combinator that efficiently filters [`WalkEntry`]s. /// /// Determines an appropriate [`FilterTarget`] for a [`WalkEntry`] based on the -/// [exhaustiveness][`Pattern::is_exhaustive`] of its component [`Pattern`]s. -/// This can be used with [`FilterTree`] to efficiently filter [`WalkEntry`]s -/// without reading directory trees from the file system when not necessary. +/// [exhaustiveness][`Pattern::is_exhaustive`] of its component [`Pattern`]s. This can be used with +/// [`FilterTree`] to efficiently filter [`WalkEntry`]s without reading directory trees from the +/// file system when not necessary. /// /// [`FilterTarget`]: crate::FilterTarget /// [`FilterTree`]: crate::FilterTree @@ -346,14 +337,14 @@ pub struct WalkNegation { impl WalkNegation { /// Combines glob expressions into a `WalkNegation`. /// - /// This function accepts an [`IntoIterator`] with items that implement - /// [`Combine`], such as [`Glob`] and `&str`. + /// This function accepts an [`IntoIterator`] with items that implement [`Combine`], such as + /// [`Glob`] and `&str`. /// /// # Errors /// - /// Returns an error if any of the inputs fail to build. If the inputs are a - /// compiled [`Pattern`] types such as [`Glob`], then this only occurs if - /// the compiled program is too large. + /// Returns an error if any of the inputs fail to build. If the inputs are a compiled + /// [`Pattern`] types such as [`Glob`], then this only occurs if the compiled program is too + /// large. /// /// [`Combine`]: crate::Combine /// [`Glob`]: crate::Glob @@ -380,13 +371,11 @@ impl WalkNegation { /// Gets the appropriate [`FilterTarget`] for the given [`WalkEntry`]. /// - /// This function can be used with [`FileIterator::filter_tree`] to - /// effeciently filter [`WalkEntry`]s without reading directory trees from - /// the file system when not necessary. + /// This function can be used with [`FileIterator::filter_tree`] to effeciently filter + /// [`WalkEntry`]s without reading directory trees from the file system when not necessary. /// - /// Returns [`FilterTarget::Tree`] if the [`WalkEntry`] matches an - /// [exhaustive glob expression][`Pattern::is_exhaustive`], such as - /// `secret/**`. + /// Returns [`FilterTarget::Tree`] if the [`WalkEntry`] matches an [exhaustive glob + /// expression][`Pattern::is_exhaustive`], such as `secret/**`. /// /// [`FileIterator::filter_tree`]: crate::FileIterator::filter_tree /// [`FilterTarget`]: crate::FilterTarget @@ -396,8 +385,7 @@ impl WalkNegation { pub fn target(&self, entry: &WalkEntry) -> Option { let path = entry.to_candidate_path(); if self.exhaustive.is_match(path.as_ref()) { - // Do not descend into directories that match the exhaustive - // negation. + // Do not descend into directories that match the exhaustive negation. Some(FilterTarget::Tree) } else if self.nonexhaustive.is_match(path.as_ref()) { @@ -411,9 +399,9 @@ impl WalkNegation { /// Configuration for interpreting symbolic links. /// -/// Determines how symbolic links are interpreted when traversing directory -/// trees using functions like [`Glob::walk`]. **By default, symbolic links are -/// read as regular files and their targets are ignored.** +/// Determines how symbolic links are interpreted when traversing directory trees using functions +/// like [`Glob::walk`]. **By default, symbolic links are read as regular files and their targets +/// are ignored.** /// /// [`Glob::walk`]: crate::Glob::walk #[cfg_attr(docsrs, doc(cfg(feature = "walk")))] @@ -421,23 +409,21 @@ impl WalkNegation { pub enum LinkBehavior { /// Read the symbolic link file itself. /// - /// This behavior reads the symbolic link as a regular file. The - /// corresponding [`WalkEntry`] uses the path of the link file and its - /// metadata describes the link file itself. The target is effectively - /// ignored and traversal will **not** follow the link. + /// This behavior reads the symbolic link as a regular file. The corresponding [`WalkEntry`] + /// uses the path of the link file and its metadata describes the link file itself. The target + /// is effectively ignored and traversal will **not** follow the link. /// /// [`WalkEntry`]: crate::WalkEntry #[default] ReadFile, /// Read the target of the symbolic link. /// - /// This behavior reads the target of the symbolic link. The corresponding - /// [`WalkEntry`] uses the path of the link file and its metadata describes - /// the target. If the target is a directory, then traversal will follow the - /// link and descend into the target. + /// This behavior reads the target of the symbolic link. The corresponding [`WalkEntry`] uses + /// the path of the link file and its metadata describes the target. If the target is a + /// directory, then traversal will follow the link and descend into the target. /// - /// If a link is reentrant and forms a cycle, then an error will be emitted - /// instead of a [`WalkEntry`] and traversal will not follow the link. + /// If a link is reentrant and forms a cycle, then an error will be emitted instead of a + /// [`WalkEntry`] and traversal will not follow the link. /// /// [`WalkEntry`]: crate::WalkEntry ReadTarget, @@ -445,17 +431,16 @@ pub enum LinkBehavior { /// Configuration for matching [`Glob`]s against directory trees. /// -/// Determines the behavior of the traversal within a directory tree when using -/// functions like [`Glob::walk`]. `WalkBehavior` can be constructed via -/// conversions from types representing its fields. APIs generally accept `impl -/// Into`, so these conversion can be used implicitly. When -/// constructed using such a conversion, `WalkBehavior` will use defaults for -/// any remaining fields. +/// Determines the behavior of the traversal within a directory tree when using functions like +/// [`Glob::walk`]. `WalkBehavior` can be constructed via conversions from types representing its +/// fields. APIs generally accept `impl Into`, so these conversion can be used +/// implicitly. When constructed using such a conversion, `WalkBehavior` will use defaults for any +/// remaining fields. /// /// # Examples /// -/// By default, symbolic links are interpreted as regular files and targets are -/// ignored. To read linked targets, use [`LinkBehavior::ReadTarget`]. +/// By default, symbolic links are interpreted as regular files and targets are ignored. To read +/// linked targets, use [`LinkBehavior::ReadTarget`]. /// /// ```rust /// use wax::{Glob, LinkBehavior}; @@ -474,16 +459,14 @@ pub enum LinkBehavior { #[cfg_attr(docsrs, doc(cfg(feature = "walk")))] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct WalkBehavior { - // TODO: Consider using a dedicated type for this field. Using primitive - // types does not interact well with conversions used in `walk` APIs. - // For example, if another `usize` field is introduced, then the - // conversions become ambiguous and confusing. + // TODO: Consider using a dedicated type for this field. Using primitive types does not + // interact well with conversions used in `walk` APIs. For example, if another `usize` + // field is introduced, then the conversions become ambiguous and confusing. /// Maximum depth. /// - /// Determines the maximum depth to which a directory tree will be traversed - /// relative to [the root][`Walk::root`]. A depth of zero corresponds to the - /// root and so using such a depth will yield at most one entry for the - /// root. + /// Determines the maximum depth to which a directory tree will be traversed relative to [the + /// root][`Walk::root`]. A depth of zero corresponds to the root and so using such a depth will + /// yield at most one entry for the root. /// /// The default value is [`usize::MAX`]. /// @@ -492,8 +475,8 @@ pub struct WalkBehavior { pub depth: usize, /// Interpretation of symbolic links. /// - /// Determines how symbolic links are interpreted when traversing a - /// directory tree. See [`LinkBehavior`]. + /// Determines how symbolic links are interpreted when traversing a directory tree. See + /// [`LinkBehavior`]. /// /// The default value is [`LinkBehavior::ReadFile`]. /// @@ -577,8 +560,8 @@ impl<'g> Walk<'g> { .iter() .any(|token| token.has_component_boundary()) { - // Stop at component boundaries, such as tree wildcards or any - // boundary within a group token. + // Stop at component boundaries, such as tree wildcards or any boundary within a + // group token. break; } regexes.push(Glob::compile(component.tokens().iter().copied())?); @@ -606,10 +589,9 @@ impl<'g> Walk<'g> { /// Calls a closure on each matched file or error. /// - /// This function is similar to [`for_each`], but does not clone paths and - /// [matched text][`MatchedText`] and so may be somewhat more efficient. - /// Note that the closure receives borrowing [`WalkEntry`]s rather than - /// `'static` items. + /// This function is similar to [`for_each`], but does not clone paths and [matched + /// text][`MatchedText`] and so may be somewhat more efficient. Note that the closure receives + /// borrowing [`WalkEntry`]s rather than `'static` items. /// /// [`for_each`]: std::iter::Iterator::for_each /// [`WalkEntry`]: crate::WalkEntry @@ -621,36 +603,33 @@ impl<'g> Walk<'g> { /// Filters [`WalkEntry`]s against negated glob expressions. /// - /// This function creates an adaptor that discards [`WalkEntry`]s that match - /// any of the given glob expressions. This allows for broad negations while - /// matching a [`Glob`] against a directory tree that cannot be achieved - /// using a single glob expression alone. + /// This function creates an adaptor that discards [`WalkEntry`]s that match any of the given + /// glob expressions. This allows for broad negations while matching a [`Glob`] against a + /// directory tree that cannot be achieved using a single glob expression alone. /// - /// The adaptor is constructed via [`FilterTree`] and [`WalkNegation`] and - /// therefore does not read directory trees from the file system when a - /// directory matches an [exhaustive glob - /// expression][`Pattern::is_exhaustive`] such as `**/private/**` or - /// `hidden/</>*`. **This function should be preferred when filtering - /// [`WalkEntry`]s against [`Glob`]s, since this avoids potentially large - /// and unnecessary reads**. + /// The adaptor is constructed via [`FilterTree`] and [`WalkNegation`] and therefore does not + /// read directory trees from the file system when a directory matches an [exhaustive glob + /// expression][`Pattern::is_exhaustive`] such as `**/private/**` or `hidden/</>*`. **This + /// function should be preferred when filtering [`WalkEntry`]s against [`Glob`]s, since this + /// avoids potentially large and unnecessary reads**. /// /// # Errors /// - /// Returns an error if any of the inputs fail to build. If the inputs are a - /// compiled [`Pattern`] type such as [`Glob`], then this only occurs if the - /// compiled program is too large. + /// Returns an error if any of the inputs fail to build. If the inputs are a compiled + /// [`Pattern`] type such as [`Glob`], then this only occurs if the compiled program is too + /// large. /// /// # Examples /// - /// Because glob expressions do not support general negations, it is - /// sometimes impossible to express patterns that deny particular text. In - /// such cases, `not` can be used to apply additional patterns as a filter. + /// Because glob expressions do not support general negations, it is sometimes impossible to + /// express patterns that deny particular text. In such cases, `not` can be used to apply + /// additional patterns as a filter. /// /// ```rust,no_run /// use wax::Glob; /// - /// // Find image files, but not if they are beneath a directory with a name that - /// // suggests that they are private. + /// // Find image files, but not if they are beneath a directory with a name that suggests that + /// // they are private. /// let glob = Glob::new("**/*.(?i){jpg,jpeg,png}").unwrap(); /// for entry in glob.walk(".").not(["**/(?i)<.:0,1>private/**"]).unwrap() { /// let entry = entry.unwrap(); @@ -676,10 +655,9 @@ impl<'g> Walk<'g> { /// Gets the root directory of the traversal. /// - /// The root directory is determined by joining the directory path in - /// functions like [`Glob::walk`] with any [invariant - /// prefix](`Glob::partition`) of the [`Glob`]. When a [`Glob`] is rooted, - /// the root directory is the same as the invariant prefix. + /// The root directory is determined by joining the directory path in functions like + /// [`Glob::walk`] with any [invariant prefix](`Glob::partition`) of the [`Glob`]. When a + /// [`Glob`] is rooted, the root directory is the same as the invariant prefix. /// /// The depth specified via [`WalkBehavior`] is relative to this path. /// @@ -717,22 +695,21 @@ impl TreeIterator for Walk<'_> { pub enum FilterTarget { /// Discard the file. /// - /// The [`WalkEntry`] for the given file is discarded by the [`FilterTree`] - /// adaptor. Only this particular file is ignored and if the entry - /// represents a directory, then its tree is still read from the file - /// system. + /// The [`WalkEntry`] for the given file is discarded by the [`FilterTree`] adaptor. Only this + /// particular file is ignored and if the entry represents a directory, then its tree is still + /// read from the file system. /// /// [`FilterTree`]: crate::FilterTree /// [`WalkEntry`]: crate::WalkEntry File, /// Discard the file and its directory tree, if any. /// - /// The [`WalkEntry`] for the given file is discarded by the [`FilterTree`] - /// adaptor. If the entry represents a directory, then its entire tree is - /// ignored and is not read from the file system. + /// The [`WalkEntry`] for the given file is discarded by the [`FilterTree`] adaptor. If the + /// entry represents a directory, then its entire tree is ignored and is not read from the file + /// system. /// - /// When the [`WalkEntry`] represents a normal file (not a directory), then - /// this is the same as [`FilterTarget::File`]. + /// When the [`WalkEntry`] represents a normal file (not a directory), then this is the same as + /// [`FilterTarget::File`]. /// /// [`FilterTarget::File`]: crate::FilterTarget::File /// [`FilterTree`]: crate::FilterTree @@ -740,18 +717,16 @@ pub enum FilterTarget { Tree, } -/// Iterator adaptor that filters [`WalkEntry`]s and controls the traversal of -/// directory trees. +/// Iterator adaptor that filters [`WalkEntry`]s and controls the traversal of directory trees. /// -/// This adaptor is returned by [`FileIterator::filter_tree`] and in addition to -/// filtering [`WalkEntry`]s also determines how `TreeIterator`s traverse -/// directory trees. If discarded directories do not need to be read from the -/// file system, then **this adaptor should be preferred over functions like -/// [`Iterator::filter`], because it can avoid potentially large and unnecessary -/// reads.** +/// This adaptor is returned by [`FileIterator::filter_tree`] and in addition to filtering +/// [`WalkEntry`]s also determines how `TreeIterator`s traverse directory trees. If discarded +/// directories do not need to be read from the file system, then **this adaptor should be +/// preferred over functions like [`Iterator::filter`], because it can avoid potentially large and +/// unnecessary reads.** /// -/// `FilterTree` is a `TreeIterator` and supports [`FileIterator::filter_tree`] -/// so `filter_tree` may be chained. +/// `FilterTree` is a `TreeIterator` and supports [`FileIterator::filter_tree`] so `filter_tree` +/// may be chained. /// /// [`FileIterator::filter_tree`]: crate::FileIterator::filter_tree /// [`WalkEntry`]: crate::WalkEntry @@ -839,9 +814,9 @@ impl<'e> WalkEntry<'e> { /// Converts the entry to the relative [`CandidatePath`]. /// - /// **This differs from [`path`] and [`into_path`], which are natively - /// encoded and may be absolute.** The [`CandidatePath`] is always relative - /// to [the root][`Walk::root`] of the directory tree. + /// **This differs from [`path`] and [`into_path`], which are natively encoded and may be + /// absolute.** The [`CandidatePath`] is always relative to [the root][`Walk::root`] of the + /// directory tree. /// /// [`CandidatePath`]: crate::CandidatePath /// [`into_path`]: crate::WalkEntry::into_path @@ -859,8 +834,7 @@ impl<'e> WalkEntry<'e> { self.entry.metadata().map_err(WalkError::from) } - /// Gets the depth of the file from [the root][`Walk::root`] of the - /// directory tree. + /// Gets the depth of the file from [the root][`Walk::root`] of the directory tree. /// /// [`Walk::root`]: crate::Walk::root pub fn depth(&self) -> usize { @@ -880,9 +854,9 @@ pub fn walk<'g>( ) -> Walk<'g> { let directory = directory.as_ref(); let WalkBehavior { depth, link } = behavior.into(); - // The directory tree is traversed from `root`, which may include an - // invariant prefix from the glob pattern. `Walk` patterns are only applied - // to path components following this prefix in `root`. + // The directory tree is traversed from `root`, which may include an invariant prefix from the + // glob pattern. `Walk` patterns are only applied to path components following this prefix in + // `root`. let (root, prefix) = invariant_path_prefix(glob.tree.as_ref().tokens()).map_or_else( || { let root = Cow::from(directory); @@ -891,8 +865,8 @@ pub fn walk<'g>( |prefix| { let root = directory.join(&prefix).into(); if prefix.is_absolute() { - // Absolute paths replace paths with which they are joined, - // in which case there is no prefix. + // Absolute paths replace paths with which they are joined, in which case there is + // no prefix. (root, PathBuf::new().into()) } else {