From 076075cad4973fe16afe2e46a837795076bd597e Mon Sep 17 00:00:00 2001
From: Sean Olson <olson.sean.k@gmail.com>
Date: Wed, 14 Feb 2024 13:39:25 -0800
Subject: [PATCH] Update comments and various `token` module fixes.

---
 src/token/mod.rs                     | 19 +++++--
 src/token/variance/bound.rs          | 16 ++----
 src/token/variance/invariant/mod.rs  | 24 ++++----
 src/token/variance/invariant/text.rs | 82 ++++++++++++++++++++--------
 src/token/variance/mod.rs            |  6 --
 src/walk/glob.rs                     |  7 +++
 6 files changed, 96 insertions(+), 58 deletions(-)
diff --git a/src/token/mod.rs b/src/token/mod.rs
index 5350010..629208e 100644
--- a/src/token/mod.rs
+++ b/src/token/mod.rs
@@ -267,9 +267,10 @@ impl<'t, A> Tokenized<'t, A>
 where
     A: Default + Spanned,
 {
-    // TODO: This function is limited to immediate concatenations, but probably shouldn't be. As a
-    //       result, expressions like `{.cargo/**/*.crate}` do not partition (in this case, into
-    //       the path `.cargo` and glob `**/*.crate`).
+    // TODO: This function is limited to immediate concatenations and so expressions like
+    //       `{.cargo/**/*.crate}` do not partition (into the path `.cargo` and glob
+    //       `{**/*.crate}`). This requires a more sophisticated transformation of the token tree,
+    //       but is probably worth supporting. Maybe this can be done via `FoldMap`.
     pub fn partition(self) -> (PathBuf, Option<Self>) {
         fn pop_expression_bytes(expression: &str, n: usize) -> &str {
             let n = cmp::min(expression.len(), n);
@@ -473,7 +474,7 @@ impl<'t, A> Token<'t, A> {
         self.as_leaf().and_then(LeafKind::boundary)
     }
 
-    pub fn variance<'a, T>(&'a self) -> GlobVariance<T>
+    pub fn variance<T>(&self) -> GlobVariance<T>
     where
         TreeVariance<T>: Fold<'t, A, Term = GlobVariance<T>>,
         T: Conjunction<Output = T> + Invariant,
@@ -502,6 +503,8 @@ impl<'t, A> Token<'t, A> {
 
         let mut tokens = self.concatenation().iter().peekable();
         if tokens.peek().map_or(false, |token| {
+            // This is a very general predicate, but at time of writing amounts to, "Is this a tree
+            // wildcard?"
             token.has_root().is_always() && token.variance::<Text>().is_variant()
         }) {
             return (0, String::from(Separator::INVARIANT_TEXT));
@@ -594,10 +597,16 @@ impl<'t, A> Token<'t, A> {
         }
     }
 
+    // TODO: Exhaustiveness should be expressed with `When` rather than `bool`. In particular,
+    //       alternations may _sometimes_ be exhaustive.
+    // TODO: There is a distinction between exhaustiveness of a glob and exhaustiveness of a match
+    //       (this is also true of other properties). The latter can be important for performance
+    //       optimization, but may also be useful in the public API (perhaps as part of
+    //       `MatchedText`).
     // NOTE: False positives in this function may cause logic errors and are completely
     //       unacceptable. The discovery of a false positive here almost cetainly indicates a
     //       serious bug. False positives in negative patterns cause matching to incorrectly
-    //       discard directory trees.
+    //       discard directory trees in the `FileIterator::not` combinator.
     pub fn is_exhaustive(&self) -> bool {
         self.fold(TreeExhaustiveness)
             .as_ref()
diff --git a/src/token/variance/bound.rs b/src/token/variance/bound.rs
index 3078413..1895eef 100644
--- a/src/token/variance/bound.rs
+++ b/src/token/variance/bound.rs
@@ -8,10 +8,6 @@ use crate::token::variance::Variance;
 
 pub use Boundedness::{Bounded, Unbounded};
 
-// TODO: Reorganize type definitions and `impl`s in this module. Reorder from specific (e.g.,
-//       `Bound<NonZeroUsize>`) to general (e.g., `Bound<T>`) or in whichever way is consistent
-//       with other modules in the crate.
-
 pub trait Cobound: Sized {
     type Bound;
 
@@ -53,7 +49,7 @@ pub trait OpenedUpperBound: Sized {
 }
 
 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
-pub struct Operand<T> {
+pub struct BinaryOperand<T> {
     pub lhs: T,
     pub rhs: T,
 }
@@ -65,7 +61,7 @@ pub struct LowerUpper<L, U> {
 }
 
 pub type LowerUpperBound<B> = LowerUpper<Lower<B>, Upper<B>>;
-pub type LowerUpperOperand<B> = LowerUpper<Operand<Lower<B>>, Operand<Upper<B>>>;
+pub type LowerUpperOperand<B> = LowerUpper<BinaryOperand<Lower<B>>, BinaryOperand<Upper<B>>>;
 
 pub type NaturalBound = Variance<Zero, NonZeroBound>;
 pub type NaturalRange = Variance<usize, VariantRange>;
@@ -123,8 +119,8 @@ impl NaturalRange {
     //       here: the closed bound is considered unbounded when zero, but the open bound is only
     //       considered unbounded when `None`.
     // NOTE: Given the naturals X and Y where X < Y, this defines an unconventional meaning for the
-    //       range [Y,X] and repetitions like `<_:10,1>`: the bounds are reordered, so `<_:10,1>` and
-    //       `<_:1,10>` are the same.
+    //       range [Y,X] and therefore repetitions like `<_:10,1>`: the bounds are reordered, so
+    //       `<_:10,1>` and `<_:1,10>` are the same.
     pub fn from_closed_and_open<T>(closed: usize, open: T) -> Self
     where
         T: Into<Option<usize>>,
@@ -162,11 +158,11 @@ impl NaturalRange {
     {
         let lhs = self;
         let LowerUpper { lower, upper } = f(LowerUpper {
-            lower: Operand {
+            lower: BinaryOperand {
                 lhs: lhs.lower(),
                 rhs: rhs.lower(),
             },
-            upper: Operand {
+            upper: BinaryOperand {
                 lhs: lhs.upper(),
                 rhs: rhs.upper(),
             },
diff --git a/src/token/variance/invariant/mod.rs b/src/token/variance/invariant/mod.rs
index 6edf29c..036973e 100644
--- a/src/token/variance/invariant/mod.rs
+++ b/src/token/variance/invariant/mod.rs
@@ -28,26 +28,22 @@ pub trait Invariant: Sized + Identity {
     fn into_lower_bound(self) -> Boundedness<Self::Bound>;
 }
 
-// Breadth is the size of components in a glob expression (for some notion of size, probably UTF-8
-// bytes). For example, the breadth of `{a/,a/b/}<c/d:2>` is two.
+// Breadth is the maximum size (UTF-8 bytes) of component text in a glob expression. For example,
+// the breadth of `{a/,a/b/}<c/d:2>` is two (bytes).
 //
-// Composition (i.e., folds, conjunction, etc.) is not implemented for breadth and it is only
-// queried in leaf tokens to compute exhaustiveness. No values are associated with its invariant
-// nor bounds.
-//
-// Breadth is probably the least interesting and yet most difficult quantity to compute.
-// Determining correct breadth values likely involves:
+// Composition is not implemented for breadth and it is only queried from leaf tokens. No values
+// are associated with its invariant nor bounds. Breadth is probably the least interesting and yet
+// most difficult quantity to compute. Determining correct breadth values likely involves:
 //
 // - Complex terms that support both running sums and running maximums across boundaries.
 // - Searches from the start and end of sub-trees in repetitions, where terminals may interact.
-//   Consider `<a/b:2>` vs. `<a/b/:2>`, which have breadths of two and one, respectively. These
-//   searches probably need to cross levels in the token tree somehow.
+//   Consider `<a/b:2>` vs. `<a/b/:2>`, which have breadths of two and one, respectively.
 // - Potentially large sets for bounds. Ranges lose too much information, in particular information
-//   about boundaries when composing terms and bounds.
+//   about boundaries when composing terms.
 //
-// There are likely other difficult composition problems, though this all assumes computation via a
-// tree fold. As such, breadth is only implemented as much as needed. Any requirements on more
-// complete breadth computations ought to be considered carefully and avoided.
+// There are likely other difficult composition problems. As such, breadth is only implemented as
+// much as needed. Any requirements on more complete breadth computations ought to be considered
+// carefully.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct Breadth;
 
diff --git a/src/token/variance/invariant/text.rs b/src/token/variance/invariant/text.rs
index 9597655..aff358f 100644
--- a/src/token/variance/invariant/text.rs
+++ b/src/token/variance/invariant/text.rs
@@ -9,25 +9,27 @@ use crate::token::variance::ops::{self, Conjunction, Disjunction, Product};
 use crate::token::variance::Variance;
 use crate::PATHS_ARE_CASE_INSENSITIVE;
 
+use Semantic::{Nominal, Structural};
+
 pub trait IntoNominalText<'t> {
     fn into_nominal_text(self) -> Text<'t>;
 }
 
 impl<'t> IntoNominalText<'t> for Cow<'t, str> {
     fn into_nominal_text(self) -> Text<'t> {
-        Fragment::Nominal(self).into()
+        Nominal(self).into()
     }
 }
 
 impl<'t> IntoNominalText<'t> for &'t str {
     fn into_nominal_text(self) -> Text<'t> {
-        Fragment::Nominal(self.into()).into()
+        Nominal(self.into()).into()
     }
 }
 
 impl IntoNominalText<'static> for String {
     fn into_nominal_text(self) -> Text<'static> {
-        Fragment::Nominal(self.into()).into()
+        Nominal(self.into()).into()
     }
 }
 
@@ -37,26 +39,23 @@ pub trait IntoStructuralText<'t> {
 
 impl<'t> IntoStructuralText<'t> for Cow<'t, str> {
     fn into_structural_text(self) -> Text<'t> {
-        Fragment::Structural(self).into()
+        Structural(self).into()
     }
 }
 
 impl<'t> IntoStructuralText<'t> for &'t str {
     fn into_structural_text(self) -> Text<'t> {
-        Fragment::Structural(self.into()).into()
+        Structural(self.into()).into()
     }
 }
 
 impl IntoStructuralText<'static> for String {
     fn into_structural_text(self) -> Text<'static> {
-        Fragment::Structural(self.into()).into()
+        Structural(self.into()).into()
     }
 }
 
-// TODO: The derived `PartialEq` implementation is incomplete and does not detect contiguous like
-//       fragments that are equivalent to an aggregated fragment. This works, but relies on
-//       constructing `InvariantText` by consistently appending fragments.
-#[derive(Clone, Debug, Eq, PartialEq)]
+#[derive(Clone, Debug, Eq)]
 pub struct Text<'t> {
     fragments: VecDeque<Fragment<'t>>,
 }
@@ -96,6 +95,15 @@ impl<'t> Text<'t> {
                 .collect(),
         }
     }
+
+    fn bytes(&self) -> impl '_ + Iterator<Item = Byte> {
+        self.fragments.iter().flat_map(|fragment| {
+            fragment.bytes().map(match fragment {
+                Nominal(_) => Nominal,
+                Structural(_) => Structural,
+            })
+        })
+    }
 }
 
 impl<'t> Conjunction for Text<'t> {
@@ -150,6 +158,7 @@ impl<'t> Identity for Text<'t> {
 }
 
 impl<'t> Invariant for Text<'t> {
+    // No bounds are computed for text (only boundedness).
     type Bound = UnitBound;
 
     fn bound(_: Self, _: Self) -> Boundedness<Self::Bound> {
@@ -161,6 +170,12 @@ impl<'t> Invariant for Text<'t> {
     }
 }
 
+impl<'t> PartialEq for Text<'t> {
+    fn eq(&self, other: &Self) -> bool {
+        self.bytes().eq(other.bytes())
+    }
+}
+
 impl<'t> Product<usize> for Text<'t> {
     type Output = Self;
 
@@ -185,16 +200,37 @@ impl<'t> Disjunction<Text<'t>> for UnitBound {
     }
 }
 
-#[derive(Clone, Debug, Eq)]
-enum Fragment<'t> {
-    Nominal(Cow<'t, str>),
-    Structural(Cow<'t, str>),
+#[derive(Clone, Copy, Debug)]
+enum Semantic<T> {
+    Nominal(T),
+    Structural(T),
+}
+
+type Byte = Semantic<u8>;
+type Fragment<'t> = Semantic<Cow<'t, str>>;
+
+impl<T> AsRef<T> for Semantic<T> {
+    fn as_ref(&self) -> &T {
+        match self {
+            Nominal(ref inner) | Structural(ref inner) => inner,
+        }
+    }
+}
+
+impl Eq for Byte {}
+
+impl PartialEq for Byte {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (Nominal(ref left), Nominal(ref right))
+            | (Structural(ref left), Structural(ref right)) => left == right,
+            _ => false,
+        }
+    }
 }
 
 impl<'t> Fragment<'t> {
     pub fn into_owned(self) -> Fragment<'static> {
-        use Fragment::{Nominal, Structural};
-
         match self {
             Nominal(text) => Nominal(text.into_owned().into()),
             Structural(text) => Structural(text.into_owned().into()),
@@ -202,9 +238,11 @@ impl<'t> Fragment<'t> {
     }
 
     pub fn as_string(&self) -> &Cow<'t, str> {
-        match self {
-            Fragment::Nominal(ref text) | Fragment::Structural(ref text) => text,
-        }
+        self.as_ref()
+    }
+
+    fn bytes(&self) -> impl '_ + Iterator<Item = u8> {
+        self.as_string().bytes()
     }
 }
 
@@ -212,8 +250,6 @@ impl<'t> Conjunction for Fragment<'t> {
     type Output = Text<'t>;
 
     fn conjunction(self, other: Self) -> Self::Output {
-        use Fragment::{Nominal, Structural};
-
         match (self, other) {
             (Nominal(left), Nominal(right)) => Text {
                 fragments: [Nominal(left + right)].into_iter().collect(),
@@ -228,10 +264,10 @@ impl<'t> Conjunction for Fragment<'t> {
     }
 }
 
+impl<'t> Eq for Fragment<'t> {}
+
 impl<'t> PartialEq for Fragment<'t> {
     fn eq(&self, other: &Self) -> bool {
-        use Fragment::{Nominal, Structural};
-
         match (self, other) {
             (Nominal(ref left), Nominal(ref right)) => {
                 if PATHS_ARE_CASE_INSENSITIVE {
diff --git a/src/token/variance/mod.rs b/src/token/variance/mod.rs
index 24cc47d..622a100 100644
--- a/src/token/variance/mod.rs
+++ b/src/token/variance/mod.rs
@@ -39,12 +39,6 @@ where
 #[derive(Clone, Copy, Debug, Eq)]
 pub enum Variance<T, B = Boundedness<UnitBound>> {
     Invariant(T),
-    // When variant, the bound describes the constraints of that variance. For example, the
-    // expression `**` is unconstrained and matches _any and all_ text, breadths, and depths. On
-    // the other hand, the expression `a*z` is constrained and matches _some_ text and _some_
-    // breadths (and is invariant w.r.t. depth). Regarding text, note that bounds do **not**
-    // consider the length at all (breadth). Only constraints that limit an expression to a known
-    // set of matches are considered, so both `?` and `*` are unbounded w.r.t. text.
     Variant(B),
 }
 
diff --git a/src/walk/glob.rs b/src/walk/glob.rs
index 5c89bc3..286d16c 100644
--- a/src/walk/glob.rs
+++ b/src/walk/glob.rs
@@ -445,6 +445,13 @@ impl GlobWalker {
     }
 }
 
+// TODO: Partitioned programs are important here, because there is no other way to determine the
+//       exhaustiveness of a match (which is not the same as the exhaustiveness of a glob). This
+//       partitioning leaks into the `FileIterator::not` API, which accepts a sequence of
+//       `Pattern`s rather than one `Pattern` as a best effort attempt to detect exhaustive
+//       negations. Consider instead a decomposition of token trees that separates the branches of
+//       level-adjacent alternations from the root. This may allow APIs to accept `Any` and still
+//       partition in this way when match exhaustiveness is important.
 #[derive(Clone, Debug)]
 enum FilterAnyProgram {
     Empty,