diff --git a/src/token/mod.rs b/src/token/mod.rs index 12fe5ac..fcce9aa 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -16,7 +16,7 @@ use crate::diagnostics::{Span, Spanned}; use crate::token::variance::bound::Bound; use crate::token::variance::invariant::{IntoNominalText, IntoStructuralText}; use crate::token::variance::ops::{self, Conjunction}; -use crate::token::variance::{TreeVariance, VarianceFold, VarianceTerm}; +use crate::token::variance::{TreeExhaustiveness, TreeVariance, VarianceFold, VarianceTerm}; use crate::token::walk::{BranchFold, Fold, FoldMap, Starting, TokenEntry}; use crate::{StrExt as _, PATHS_ARE_CASE_INSENSITIVE}; @@ -514,8 +514,13 @@ impl<'t, A> Token<'t, A> { struct IsRooting; impl<'t, A> Fold<'t, A> for IsRooting { + type Sequencer = Starting; type Term = When; + fn sequencer() -> Self::Sequencer { + Starting::default() + } + fn fold( &mut self, branch: &BranchKind<'t, A>, @@ -542,8 +547,7 @@ impl<'t, A> Token<'t, A> { } } - self.fold_with_sequence(Starting, IsRooting) - .unwrap_or(When::Never) + self.fold(IsRooting).unwrap_or(When::Never) } pub fn has_boundary(&self) -> bool { @@ -566,6 +570,12 @@ impl<'t, A> Token<'t, A> { } } + pub fn is_exhaustive1(&self) -> bool { + self.fold(TreeExhaustiveness) + .as_ref() + .map_or(false, Variance::is_exhaustive) + } + // NOTE: False positives in this function may cause logic errors and are completely // unacceptable. The discovery of a false positive here is likely a serious bug. pub fn is_exhaustive(&self) -> bool { @@ -1369,7 +1379,6 @@ impl<'t, A> VarianceFold for Concatenation<'t, A> { impl<'t, A> VarianceFold for Concatenation<'t, A> { fn fold(&self, terms: Vec>) -> Option> { - //terms.into_iter().reduce(ops::union) terms.into_iter().reduce(ops::conjunction) } } diff --git a/src/token/variance/invariant/mod.rs b/src/token/variance/invariant/mod.rs index aab5f52..1dfde71 100644 --- a/src/token/variance/invariant/mod.rs +++ b/src/token/variance/invariant/mod.rs @@ -78,14 +78,6 @@ impl Product for Bound { } } -//impl Union for UnitBound { -// type Output = Self; -// -// fn union(self, _: T) -> Self::Output { -// UnitBound -// } -//} - // NOTE: Breadth is probably the least "interesting" invariant to query. The variance w.r.t. // breadth is critical in determining the exhaustiveness of a glob (amongst other useful // things), but the bounds of both invariant and variant breadth are not particularly useful. @@ -278,3 +270,13 @@ macro_rules! impl_invariant_natural { } impl_invariant_natural!(Depth, once => 1); impl_invariant_natural!(Size); + +impl VarianceOf { + // TODO: Name this differently. Perhaps `has_upper_bound` (that inverts the meaning). + // TODO: Generalize this to `Variance<_, Bound<_>>`. + pub fn is_exhaustive(&self) -> bool { + self.as_ref() + .variant() + .map_or(false, |bounds| bounds.upper().into_bound().is_unbounded()) + } +} diff --git a/src/token/variance/mod.rs b/src/token/variance/mod.rs index 1ff824b..2b71f2a 100644 --- a/src/token/variance/mod.rs +++ b/src/token/variance/mod.rs @@ -2,15 +2,16 @@ pub mod bound; pub mod invariant; pub mod ops; +use itertools::Itertools; use std::cmp::Ordering; use std::marker::PhantomData; use std::num::NonZeroUsize; use crate::token::variance::bound::{Bound, NaturalRange, NonEmptyRange, OpenedUpperBound}; -use crate::token::variance::invariant::{Invariant, UnitBound, VarianceOf}; +use crate::token::variance::invariant::{Breadth, Depth, Invariant, Text, UnitBound, VarianceOf}; use crate::token::variance::ops::{Conjunction, Disjunction, Product}; -use crate::token::walk::Fold; -use crate::token::{BranchKind, LeafKind}; +use crate::token::walk::{ChildToken, Fold, Forward, ParentToken, Sequencer}; +use crate::token::{Boundary, BranchKind, Composition, LeafKind}; use self::bound::BoundedNonEmptyRange; @@ -337,8 +338,13 @@ where LeafKind<'t>: VarianceTerm, T: Invariant, { + type Sequencer = Forward; type Term = VarianceOf; + fn sequencer() -> Self::Sequencer { + Forward::default() + } + fn fold(&mut self, branch: &BranchKind<'t, A>, terms: Vec) -> Option { branch.fold(terms) } @@ -352,6 +358,116 @@ where } } +#[derive(Debug, Default)] +pub struct TreeExhaustiveness; + +impl Sequencer for TreeExhaustiveness { + fn enqueue<'i, 't, A>( + &mut self, + parent: ParentToken<'i, 't, A>, + ) -> impl Iterator> { + parent.into_tokens().rev().take_while(|token| { + token.as_ref().as_leaf().map_or(true, |leaf| { + if let Some(Boundary::Separator) = leaf.boundary() { + true + } + else { + let breadth: VarianceOf = leaf.term(); + let text: VarianceOf = leaf.term(); + breadth.is_unbounded() && text.is_unbounded() + } + }) + }) + } +} + +impl<'t, A> Fold<'t, A> for TreeExhaustiveness { + type Sequencer = Self; + type Term = VarianceOf; + + fn sequencer() -> Self::Sequencer { + Self::default() + } + + fn fold(&mut self, branch: &BranchKind<'t, A>, terms: Vec) -> Option { + // TODO: Detect generalizations in alternation branches. This may be possible in an + // optimization step that fold maps token trees and discards unnecessary branches. + // When folding terms into an alternation, if some but not all branches are exhaustive, + // then do not sum the terms and instead return the bounded depth [0,1]. This is necessary + // to prevent false positives when the sum of exhaustiveness terms for branches is + // exhaustive but a **non-overlapping** branch is non-exhaustive. Consider `{a/**,**/b}`. + // This pattern is nonexhaustive, because matches in `**/b` are not exhaustive and are not + // necessarily sub-trees of an exhaustive branch (in this example, the only such branch + // being `a/**`). Despite this, the terms exhaustiveness terms for the alternation are + // unbounded and zero. These terms sum to unbounded, which is a false positive. + // + // Note that this heuristic is also applied when all non-exhaustive branches overlap with + // an exhaustive branch (that is, all non-exhaustive branches are generalized by exhaustive + // branches), which causes false negatives. Consider `{/**,/**/a}`. The branch `/**` + // generalizes the remaining branches, so this pattern is exhaustive, but this heuristic + // rejects this. However, this false negative is far more acceptable than a false positive, + // which causes errant behavior. + if let BranchKind::Alternation(_) = branch { + let (all, any) = terms + .iter() + .fold_while((true, false), |sum, term| { + use itertools::FoldWhile::{Continue, Done}; + + let term = term.is_exhaustive(); + match (sum.0 && term, sum.1 || term) { + sum @ (false, true) => Done(sum), + sum => Continue(sum), + } + }) + .into_inner(); + if !all && any { + return Some(VarianceOf::::Variant(Bound::Bounded( + BoundedNonEmptyRange::Upper(unsafe { NonZeroUsize::new_unchecked(1) }), + ))); + } + } + + let n = terms.len(); + let term = VarianceFold::fold(branch, terms); + if branch.tokens().into_inner().len() == n { + term + } + else { + if term.as_ref().map_or(false, Variance::is_exhaustive) { + term + } + else { + Some(Variance::identity()) + } + } + } + + fn finalize(&mut self, branch: &BranchKind<'t, A>, term: Self::Term) -> Self::Term { + use Variance::{Invariant, Variant}; + + match branch { + branch @ BranchKind::Repetition(_) => match term { + // When folding terms into a repetition, only finalize variant terms and the + // multiplicative identity and annihilator (one and zero). This is necessary, + // because natural bounds do not express the subset nor relationship of matched + // values within the range. Consider `<*/*/>`. This pattern is unbounded w.r.t. + // depth, but only matches paths with a depth that is a multiple of two and so is + // nonexhaustive. However, the similar pattern `<*/>` is exhaustive and matches any + // sub-tree of a match. + Invariant(Depth(0)) | Invariant(Depth(1)) | Variant(_) => { + VarianceFold::finalize(branch, term) + }, + _ => term, + }, + branch => VarianceFold::finalize(branch, term), + } + } + + fn term(&mut self, leaf: &LeafKind<'t>) -> Self::Term { + VarianceTerm::term(leaf) + } +} + // TODO: How do we sum invariant with unbounded...? The conjunction is bounded, but... what are the // bounds? Is this a distinct case that requires a distinct representation or is there a // "natural" way to construct a bound from an invariant in this case? @@ -549,7 +665,7 @@ mod tests { token.variance::(), token.variance::(), ); - token.is_exhaustive() + token.is_exhaustive1() } assert!(is_exhaustive("**")); @@ -568,6 +684,7 @@ mod tests { assert!(is_exhaustive("</>*")); assert!(is_exhaustive("<*{/}>")); assert!(is_exhaustive("<*{/,/}>")); + assert!(is_exhaustive("<*{/,/**}>")); assert!(is_exhaustive("<{<<{{a/b}}{{/**}}>>}>")); assert!(!is_exhaustive("")); @@ -610,11 +727,15 @@ mod tests { // matches the second branch of the alternation, but not the first. This pattern does // not match any sub-tree of this match other than more `b` components (the first // branch will never be matched). - //assert!(!is_exhaustive("{a/**,**/b}")); // BROKEN. + assert!(!is_exhaustive("{a/**,**/b}")); // BROKEN. + assert!(!is_exhaustive("<{a/**,**/b}:1>")); // BROKEN. + + // NOTE: This expression is exhaustive. However, determining this is tricky. One branch + // generalizes the other. // TODO: This is considered exhaustive, but for the wrong reasons: see the case above. // Detecting this properly involves understanding that one branch generalizes the // other and so that other term can be discarded (only the more general term // matters). - assert!(is_exhaustive("{/**,/**/a}")); + //assert!(is_exhaustive("{/**,/**/a}")); } } diff --git a/src/token/variance/ops.rs b/src/token/variance/ops.rs index fd2ddff..8710e8e 100644 --- a/src/token/variance/ops.rs +++ b/src/token/variance/ops.rs @@ -1,24 +1,87 @@ +use std::num::NonZeroUsize; + +use crate::token::variance::Variance; + pub trait Conjunction { type Output; fn conjunction(self, rhs: T) -> Self::Output; } +impl Conjunction for NonZeroUsize { + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + self.checked_add(rhs.into()) + .expect("overflow computing conjunction of unsigned word") + } +} + +impl Conjunction for usize { + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + self.checked_add(rhs) + .expect("overflow computing conjunction of unsigned word") + } +} + pub trait Disjunction { type Output; fn disjunction(self, rhs: T) -> Self::Output; } -// TODO: This IS fundamental. Implement this over `Variance` and `usize`. That implementation -// yields `Invariant::identity` when given zero, and otherwise forwards a `NonZeroUsize` to -// the contents of the `Variance` term! +impl Disjunction for NonZeroUsize { + type Output = Variance; + + fn disjunction(self, rhs: Self) -> Self::Output { + if self.get() == rhs.into() { + Variance::Invariant(self) + } + else { + Variance::Variant(()) + } + } +} + +impl Disjunction for usize { + type Output = Variance; + + fn disjunction(self, rhs: Self) -> Self::Output { + if self == rhs { + Variance::Invariant(self) + } + else { + Variance::Variant(()) + } + } +} + pub trait Product { type Output; fn product(self, rhs: T) -> Self::Output; } +impl Product for NonZeroUsize { + type Output = Self; + + fn product(self, rhs: Self) -> Self::Output { + self.checked_mul(rhs.into()) + .expect("overflow computing product of unsigned word") + } +} + +impl Product for usize { + type Output = Self; + + fn product(self, rhs: Self) -> Self::Output { + self.checked_mul(rhs) + .expect("overflow computing product of unsigned word") + } +} + pub fn conjunction(lhs: T, rhs: U) -> T::Output where T: Conjunction, diff --git a/src/token/walk.rs b/src/token/walk.rs index a1d0272..0c82984 100644 --- a/src/token/walk.rs +++ b/src/token/walk.rs @@ -30,8 +30,11 @@ impl<'i, 't, A> Isomeric for TokenFeed<'i, 't, A> { // supports aggregations that must consider the complete sequence of terms without the need for // additional state in `Fold` implementers or `Term`s. pub trait Fold<'t, A> { + type Sequencer: Sequencer; type Term; + fn sequencer() -> Self::Sequencer; + fn initialize(&mut self, _branch: &BranchKind<'t, A>) -> Option { None } @@ -45,6 +48,34 @@ pub trait Fold<'t, A> { fn term(&mut self, leaf: &LeafKind<'t>) -> Self::Term; } +impl<'f, 't, A, F> Fold<'t, A> for &'f mut F +where + F: Fold<'t, A>, +{ + type Sequencer = F::Sequencer; + type Term = F::Term; + + fn sequencer() -> Self::Sequencer { + F::sequencer() + } + + fn initialize(&mut self, branch: &BranchKind<'t, A>) -> Option { + F::initialize(self, branch) + } + + fn fold(&mut self, branch: &BranchKind<'t, A>, terms: Vec) -> Option { + F::fold(self, branch, terms) + } + + fn finalize(&mut self, branch: &BranchKind<'t, A>, term: Self::Term) -> Self::Term { + F::finalize(self, branch, term) + } + + fn term(&mut self, leaf: &LeafKind<'t>) -> Self::Term { + F::term(self, leaf) + } +} + pub trait FoldMap<'t, 'o, A> { type Annotation; @@ -112,12 +143,12 @@ impl<'t, A> Token<'t, A> { where F: Fold<'t, A>, { - self.fold_with_sequence(Forward::default(), f) + self.fold_with_sequence(F::sequencer(), f) } - pub fn fold_with_sequence(&self, mut sequencer: S, f: F) -> Option + fn fold_with_sequence(&self, mut sequencer: S, f: F) -> Option where - S: BranchSequencer, + S: Sequencer, F: Fold<'t, A>, { struct TokenPath<'i, 't, A, F> @@ -347,7 +378,7 @@ impl<'t, A> Token<'t, A> { #[repr(transparent)] pub struct Parent(T); -type ParentToken<'i, 't, A> = Parent<&'i BranchKind<'t, A>>; +pub type ParentToken<'i, 't, A> = Parent<&'i BranchKind<'t, A>>; impl AsRef for Parent { fn as_ref(&self) -> &T { @@ -369,7 +400,7 @@ impl<'i, 't, A> ParentToken<'i, 't, A> { #[repr(transparent)] pub struct Child(T); -type ChildToken<'i, 't, A> = Child<&'i Token<'t, A>>; +pub type ChildToken<'i, 't, A> = Child<&'i Token<'t, A>>; impl AsRef for Child { fn as_ref(&self) -> &T { @@ -377,7 +408,7 @@ impl AsRef for Child { } } -pub trait BranchSequencer { +pub trait Sequencer { fn enqueue<'i, 't, A>( &mut self, parent: ParentToken<'i, 't, A>, @@ -387,7 +418,7 @@ pub trait BranchSequencer { #[derive(Default)] pub struct Forward; -impl BranchSequencer for Forward { +impl Sequencer for Forward { fn enqueue<'i, 't, A>( &mut self, parent: ParentToken<'i, 't, A>, @@ -399,7 +430,7 @@ impl BranchSequencer for Forward { #[derive(Default)] pub struct Reverse; -impl BranchSequencer for Reverse { +impl Sequencer for Reverse { fn enqueue<'i, 't, A>( &mut self, parent: ParentToken<'i, 't, A>, @@ -411,7 +442,7 @@ impl BranchSequencer for Reverse { #[derive(Default)] pub struct Starting; -impl BranchSequencer for Starting { +impl Sequencer for Starting { fn enqueue<'i, 't, A>( &mut self, parent: ParentToken<'i, 't, A>, @@ -429,7 +460,7 @@ impl BranchSequencer for Starting { #[derive(Default)] pub struct Ending; -impl BranchSequencer for Ending { +impl Sequencer for Ending { fn enqueue<'i, 't, A>( &mut self, parent: ParentToken<'i, 't, A>, @@ -546,7 +577,7 @@ impl<'i, 't, A, S> CancelWalk for Walk<'i, 't, A, S> { impl<'i, 't, A, S> Iterator for Walk<'i, 't, A, S> where - S: BranchSequencer, + S: Sequencer, { type Item = TokenEntry<'i, 't, A>; @@ -591,7 +622,7 @@ where impl<'i, 't, A, S> SeparatingFilterInput for Walk<'i, 't, A, S> where - S: BranchSequencer, + S: Sequencer, { type Feed = TokenFeed<'i, 't, A>; } @@ -604,7 +635,7 @@ pub fn with_sequence<'i, 't, T, S>( where 't: 'i, T: TokenTree<'t>, - S: 'i + BranchSequencer, + S: 'i + Sequencer, { Walk { branch: None,