From 076075cad4973fe16afe2e46a837795076bd597e Mon Sep 17 00:00:00 2001 From: Sean Olson Date: Wed, 14 Feb 2024 13:39:25 -0800 Subject: [PATCH] Update comments and various `token` module fixes. --- src/token/mod.rs | 19 +++++-- src/token/variance/bound.rs | 16 ++---- src/token/variance/invariant/mod.rs | 24 ++++---- src/token/variance/invariant/text.rs | 82 ++++++++++++++++++++-------- src/token/variance/mod.rs | 6 -- src/walk/glob.rs | 7 +++ 6 files changed, 96 insertions(+), 58 deletions(-) diff --git a/src/token/mod.rs b/src/token/mod.rs index 5350010..629208e 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -267,9 +267,10 @@ impl<'t, A> Tokenized<'t, A> where A: Default + Spanned, { - // TODO: This function is limited to immediate concatenations, but probably shouldn't be. As a - // result, expressions like `{.cargo/**/*.crate}` do not partition (in this case, into - // the path `.cargo` and glob `**/*.crate`). + // TODO: This function is limited to immediate concatenations and so expressions like + // `{.cargo/**/*.crate}` do not partition (into the path `.cargo` and glob + // `{**/*.crate}`). This requires a more sophisticated transformation of the token tree, + // but is probably worth supporting. Maybe this can be done via `FoldMap`. pub fn partition(self) -> (PathBuf, Option) { fn pop_expression_bytes(expression: &str, n: usize) -> &str { let n = cmp::min(expression.len(), n); @@ -473,7 +474,7 @@ impl<'t, A> Token<'t, A> { self.as_leaf().and_then(LeafKind::boundary) } - pub fn variance<'a, T>(&'a self) -> GlobVariance + pub fn variance(&self) -> GlobVariance where TreeVariance: Fold<'t, A, Term = GlobVariance>, T: Conjunction + Invariant, @@ -502,6 +503,8 @@ impl<'t, A> Token<'t, A> { let mut tokens = self.concatenation().iter().peekable(); if tokens.peek().map_or(false, |token| { + // This is a very general predicate, but at time of writing amounts to, "Is this a tree + // wildcard?" token.has_root().is_always() && token.variance::().is_variant() }) { return (0, String::from(Separator::INVARIANT_TEXT)); @@ -594,10 +597,16 @@ impl<'t, A> Token<'t, A> { } } + // TODO: Exhaustiveness should be expressed with `When` rather than `bool`. In particular, + // alternations may _sometimes_ be exhaustive. + // TODO: There is a distinction between exhaustiveness of a glob and exhaustiveness of a match + // (this is also true of other properties). The latter can be important for performance + // optimization, but may also be useful in the public API (perhaps as part of + // `MatchedText`). // NOTE: False positives in this function may cause logic errors and are completely // unacceptable. The discovery of a false positive here almost cetainly indicates a // serious bug. False positives in negative patterns cause matching to incorrectly - // discard directory trees. + // discard directory trees in the `FileIterator::not` combinator. pub fn is_exhaustive(&self) -> bool { self.fold(TreeExhaustiveness) .as_ref() diff --git a/src/token/variance/bound.rs b/src/token/variance/bound.rs index 3078413..1895eef 100644 --- a/src/token/variance/bound.rs +++ b/src/token/variance/bound.rs @@ -8,10 +8,6 @@ use crate::token::variance::Variance; pub use Boundedness::{Bounded, Unbounded}; -// TODO: Reorganize type definitions and `impl`s in this module. Reorder from specific (e.g., -// `Bound`) to general (e.g., `Bound`) or in whichever way is consistent -// with other modules in the crate. - pub trait Cobound: Sized { type Bound; @@ -53,7 +49,7 @@ pub trait OpenedUpperBound: Sized { } #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub struct Operand { +pub struct BinaryOperand { pub lhs: T, pub rhs: T, } @@ -65,7 +61,7 @@ pub struct LowerUpper { } pub type LowerUpperBound = LowerUpper, Upper>; -pub type LowerUpperOperand = LowerUpper>, Operand>>; +pub type LowerUpperOperand = LowerUpper>, BinaryOperand>>; pub type NaturalBound = Variance; pub type NaturalRange = Variance; @@ -123,8 +119,8 @@ impl NaturalRange { // here: the closed bound is considered unbounded when zero, but the open bound is only // considered unbounded when `None`. // NOTE: Given the naturals X and Y where X < Y, this defines an unconventional meaning for the - // range [Y,X] and repetitions like `<_:10,1>`: the bounds are reordered, so `<_:10,1>` and - // `<_:1,10>` are the same. + // range [Y,X] and therefore repetitions like `<_:10,1>`: the bounds are reordered, so + // `<_:10,1>` and `<_:1,10>` are the same. pub fn from_closed_and_open(closed: usize, open: T) -> Self where T: Into>, @@ -162,11 +158,11 @@ impl NaturalRange { { let lhs = self; let LowerUpper { lower, upper } = f(LowerUpper { - lower: Operand { + lower: BinaryOperand { lhs: lhs.lower(), rhs: rhs.lower(), }, - upper: Operand { + upper: BinaryOperand { lhs: lhs.upper(), rhs: rhs.upper(), }, diff --git a/src/token/variance/invariant/mod.rs b/src/token/variance/invariant/mod.rs index 6edf29c..036973e 100644 --- a/src/token/variance/invariant/mod.rs +++ b/src/token/variance/invariant/mod.rs @@ -28,26 +28,22 @@ pub trait Invariant: Sized + Identity { fn into_lower_bound(self) -> Boundedness; } -// Breadth is the size of components in a glob expression (for some notion of size, probably UTF-8 -// bytes). For example, the breadth of `{a/,a/b/}` is two. +// Breadth is the maximum size (UTF-8 bytes) of component text in a glob expression. For example, +// the breadth of `{a/,a/b/}` is two (bytes). // -// Composition (i.e., folds, conjunction, etc.) is not implemented for breadth and it is only -// queried in leaf tokens to compute exhaustiveness. No values are associated with its invariant -// nor bounds. -// -// Breadth is probably the least interesting and yet most difficult quantity to compute. -// Determining correct breadth values likely involves: +// Composition is not implemented for breadth and it is only queried from leaf tokens. No values +// are associated with its invariant nor bounds. Breadth is probably the least interesting and yet +// most difficult quantity to compute. Determining correct breadth values likely involves: // // - Complex terms that support both running sums and running maximums across boundaries. // - Searches from the start and end of sub-trees in repetitions, where terminals may interact. -// Consider `` vs. ``, which have breadths of two and one, respectively. These -// searches probably need to cross levels in the token tree somehow. +// Consider `` vs. ``, which have breadths of two and one, respectively. // - Potentially large sets for bounds. Ranges lose too much information, in particular information -// about boundaries when composing terms and bounds. +// about boundaries when composing terms. // -// There are likely other difficult composition problems, though this all assumes computation via a -// tree fold. As such, breadth is only implemented as much as needed. Any requirements on more -// complete breadth computations ought to be considered carefully and avoided. +// There are likely other difficult composition problems. As such, breadth is only implemented as +// much as needed. Any requirements on more complete breadth computations ought to be considered +// carefully. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Breadth; diff --git a/src/token/variance/invariant/text.rs b/src/token/variance/invariant/text.rs index 9597655..aff358f 100644 --- a/src/token/variance/invariant/text.rs +++ b/src/token/variance/invariant/text.rs @@ -9,25 +9,27 @@ use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::Variance; use crate::PATHS_ARE_CASE_INSENSITIVE; +use Semantic::{Nominal, Structural}; + pub trait IntoNominalText<'t> { fn into_nominal_text(self) -> Text<'t>; } impl<'t> IntoNominalText<'t> for Cow<'t, str> { fn into_nominal_text(self) -> Text<'t> { - Fragment::Nominal(self).into() + Nominal(self).into() } } impl<'t> IntoNominalText<'t> for &'t str { fn into_nominal_text(self) -> Text<'t> { - Fragment::Nominal(self.into()).into() + Nominal(self.into()).into() } } impl IntoNominalText<'static> for String { fn into_nominal_text(self) -> Text<'static> { - Fragment::Nominal(self.into()).into() + Nominal(self.into()).into() } } @@ -37,26 +39,23 @@ pub trait IntoStructuralText<'t> { impl<'t> IntoStructuralText<'t> for Cow<'t, str> { fn into_structural_text(self) -> Text<'t> { - Fragment::Structural(self).into() + Structural(self).into() } } impl<'t> IntoStructuralText<'t> for &'t str { fn into_structural_text(self) -> Text<'t> { - Fragment::Structural(self.into()).into() + Structural(self.into()).into() } } impl IntoStructuralText<'static> for String { fn into_structural_text(self) -> Text<'static> { - Fragment::Structural(self.into()).into() + Structural(self.into()).into() } } -// TODO: The derived `PartialEq` implementation is incomplete and does not detect contiguous like -// fragments that are equivalent to an aggregated fragment. This works, but relies on -// constructing `InvariantText` by consistently appending fragments. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq)] pub struct Text<'t> { fragments: VecDeque>, } @@ -96,6 +95,15 @@ impl<'t> Text<'t> { .collect(), } } + + fn bytes(&self) -> impl '_ + Iterator { + self.fragments.iter().flat_map(|fragment| { + fragment.bytes().map(match fragment { + Nominal(_) => Nominal, + Structural(_) => Structural, + }) + }) + } } impl<'t> Conjunction for Text<'t> { @@ -150,6 +158,7 @@ impl<'t> Identity for Text<'t> { } impl<'t> Invariant for Text<'t> { + // No bounds are computed for text (only boundedness). type Bound = UnitBound; fn bound(_: Self, _: Self) -> Boundedness { @@ -161,6 +170,12 @@ impl<'t> Invariant for Text<'t> { } } +impl<'t> PartialEq for Text<'t> { + fn eq(&self, other: &Self) -> bool { + self.bytes().eq(other.bytes()) + } +} + impl<'t> Product for Text<'t> { type Output = Self; @@ -185,16 +200,37 @@ impl<'t> Disjunction> for UnitBound { } } -#[derive(Clone, Debug, Eq)] -enum Fragment<'t> { - Nominal(Cow<'t, str>), - Structural(Cow<'t, str>), +#[derive(Clone, Copy, Debug)] +enum Semantic { + Nominal(T), + Structural(T), +} + +type Byte = Semantic; +type Fragment<'t> = Semantic>; + +impl AsRef for Semantic { + fn as_ref(&self) -> &T { + match self { + Nominal(ref inner) | Structural(ref inner) => inner, + } + } +} + +impl Eq for Byte {} + +impl PartialEq for Byte { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Nominal(ref left), Nominal(ref right)) + | (Structural(ref left), Structural(ref right)) => left == right, + _ => false, + } + } } impl<'t> Fragment<'t> { pub fn into_owned(self) -> Fragment<'static> { - use Fragment::{Nominal, Structural}; - match self { Nominal(text) => Nominal(text.into_owned().into()), Structural(text) => Structural(text.into_owned().into()), @@ -202,9 +238,11 @@ impl<'t> Fragment<'t> { } pub fn as_string(&self) -> &Cow<'t, str> { - match self { - Fragment::Nominal(ref text) | Fragment::Structural(ref text) => text, - } + self.as_ref() + } + + fn bytes(&self) -> impl '_ + Iterator { + self.as_string().bytes() } } @@ -212,8 +250,6 @@ impl<'t> Conjunction for Fragment<'t> { type Output = Text<'t>; fn conjunction(self, other: Self) -> Self::Output { - use Fragment::{Nominal, Structural}; - match (self, other) { (Nominal(left), Nominal(right)) => Text { fragments: [Nominal(left + right)].into_iter().collect(), @@ -228,10 +264,10 @@ impl<'t> Conjunction for Fragment<'t> { } } +impl<'t> Eq for Fragment<'t> {} + impl<'t> PartialEq for Fragment<'t> { fn eq(&self, other: &Self) -> bool { - use Fragment::{Nominal, Structural}; - match (self, other) { (Nominal(ref left), Nominal(ref right)) => { if PATHS_ARE_CASE_INSENSITIVE { diff --git a/src/token/variance/mod.rs b/src/token/variance/mod.rs index 24cc47d..622a100 100644 --- a/src/token/variance/mod.rs +++ b/src/token/variance/mod.rs @@ -39,12 +39,6 @@ where #[derive(Clone, Copy, Debug, Eq)] pub enum Variance> { Invariant(T), - // When variant, the bound describes the constraints of that variance. For example, the - // expression `**` is unconstrained and matches _any and all_ text, breadths, and depths. On - // the other hand, the expression `a*z` is constrained and matches _some_ text and _some_ - // breadths (and is invariant w.r.t. depth). Regarding text, note that bounds do **not** - // consider the length at all (breadth). Only constraints that limit an expression to a known - // set of matches are considered, so both `?` and `*` are unbounded w.r.t. text. Variant(B), } diff --git a/src/walk/glob.rs b/src/walk/glob.rs index 5c89bc3..286d16c 100644 --- a/src/walk/glob.rs +++ b/src/walk/glob.rs @@ -445,6 +445,13 @@ impl GlobWalker { } } +// TODO: Partitioned programs are important here, because there is no other way to determine the +// exhaustiveness of a match (which is not the same as the exhaustiveness of a glob). This +// partitioning leaks into the `FileIterator::not` API, which accepts a sequence of +// `Pattern`s rather than one `Pattern` as a best effort attempt to detect exhaustive +// negations. Consider instead a decomposition of token trees that separates the branches of +// level-adjacent alternations from the root. This may allow APIs to accept `Any` and still +// partition in this way when match exhaustiveness is important. #[derive(Clone, Debug)] enum FilterAnyProgram { Empty,