Skip to content

Commit

Permalink
Implement exhaustiveness as a tree fold.
Browse files Browse the repository at this point in the history
This change reimplements `Token::is_exhaustive` as a tree fold (via
`Fold`) and applies a heuristic to alternations to prevent false
positives in alternations with certain exhaustiveness terms (at the cost
of false negatives in certain kinds of patterns).
  • Loading branch information
olson-sean-k committed Feb 12, 2024
1 parent e5754fa commit 885c876
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 34 deletions.
17 changes: 13 additions & 4 deletions src/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::diagnostics::{Span, Spanned};
use crate::token::variance::bound::Bound;
use crate::token::variance::invariant::{IntoNominalText, IntoStructuralText};
use crate::token::variance::ops::{self, Conjunction};
use crate::token::variance::{TreeVariance, VarianceFold, VarianceTerm};
use crate::token::variance::{TreeExhaustiveness, TreeVariance, VarianceFold, VarianceTerm};
use crate::token::walk::{BranchFold, Fold, FoldMap, Starting, TokenEntry};
use crate::{StrExt as _, PATHS_ARE_CASE_INSENSITIVE};

Expand Down Expand Up @@ -514,8 +514,13 @@ impl<'t, A> Token<'t, A> {
struct IsRooting;

impl<'t, A> Fold<'t, A> for IsRooting {
type Sequencer = Starting;
type Term = When;

fn sequencer() -> Self::Sequencer {
Starting::default()
}

fn fold(
&mut self,
branch: &BranchKind<'t, A>,
Expand All @@ -542,8 +547,7 @@ impl<'t, A> Token<'t, A> {
}
}

self.fold_with_sequence(Starting, IsRooting)
.unwrap_or(When::Never)
self.fold(IsRooting).unwrap_or(When::Never)
}

pub fn has_boundary(&self) -> bool {
Expand All @@ -566,6 +570,12 @@ impl<'t, A> Token<'t, A> {
}
}

pub fn is_exhaustive1(&self) -> bool {
self.fold(TreeExhaustiveness)
.as_ref()
.map_or(false, Variance::is_exhaustive)
}

// NOTE: False positives in this function may cause logic errors and are completely
// unacceptable. The discovery of a false positive here is likely a serious bug.
pub fn is_exhaustive(&self) -> bool {
Expand Down Expand Up @@ -1369,7 +1379,6 @@ impl<'t, A> VarianceFold<Breadth> for Concatenation<'t, A> {

impl<'t, A> VarianceFold<Depth> for Concatenation<'t, A> {
fn fold(&self, terms: Vec<VarianceOf<Depth>>) -> Option<VarianceOf<Depth>> {
//terms.into_iter().reduce(ops::union)
terms.into_iter().reduce(ops::conjunction)
}
}
Expand Down
18 changes: 10 additions & 8 deletions src/token/variance/invariant/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,6 @@ impl Product<NonZeroUsize> for Bound<UnitBound> {
}
}

//impl<T> Union<T> for UnitBound {
// type Output = Self;
//
// fn union(self, _: T) -> Self::Output {
// UnitBound
// }
//}

// NOTE: Breadth is probably the least "interesting" invariant to query. The variance w.r.t.
// breadth is critical in determining the exhaustiveness of a glob (amongst other useful
// things), but the bounds of both invariant and variant breadth are not particularly useful.
Expand Down Expand Up @@ -278,3 +270,13 @@ macro_rules! impl_invariant_natural {
}
impl_invariant_natural!(Depth, once => 1);
impl_invariant_natural!(Size);

impl VarianceOf<Depth> {
// TODO: Name this differently. Perhaps `has_upper_bound` (that inverts the meaning).
// TODO: Generalize this to `Variance<_, Bound<_>>`.
pub fn is_exhaustive(&self) -> bool {
self.as_ref()
.variant()
.map_or(false, |bounds| bounds.upper().into_bound().is_unbounded())
}
}
133 changes: 127 additions & 6 deletions src/token/variance/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ pub mod bound;
pub mod invariant;
pub mod ops;

use itertools::Itertools;
use std::cmp::Ordering;
use std::marker::PhantomData;
use std::num::NonZeroUsize;

use crate::token::variance::bound::{Bound, NaturalRange, NonEmptyRange, OpenedUpperBound};
use crate::token::variance::invariant::{Invariant, UnitBound, VarianceOf};
use crate::token::variance::invariant::{Breadth, Depth, Invariant, Text, UnitBound, VarianceOf};
use crate::token::variance::ops::{Conjunction, Disjunction, Product};
use crate::token::walk::Fold;
use crate::token::{BranchKind, LeafKind};
use crate::token::walk::{ChildToken, Fold, Forward, ParentToken, Sequencer};
use crate::token::{Boundary, BranchKind, Composition, LeafKind};

Check warning on line 14 in src/token/variance/mod.rs

View workflow job for this annotation

GitHub Actions / Lint

unused import: `Composition`

use self::bound::BoundedNonEmptyRange;

Expand Down Expand Up @@ -337,8 +338,13 @@ where
LeafKind<'t>: VarianceTerm<T>,
T: Invariant,
{
type Sequencer = Forward;
type Term = VarianceOf<T>;

fn sequencer() -> Self::Sequencer {
Forward::default()
}

fn fold(&mut self, branch: &BranchKind<'t, A>, terms: Vec<Self::Term>) -> Option<Self::Term> {
branch.fold(terms)
}
Expand All @@ -352,6 +358,116 @@ where
}
}

#[derive(Debug, Default)]
pub struct TreeExhaustiveness;

impl Sequencer for TreeExhaustiveness {
fn enqueue<'i, 't, A>(
&mut self,
parent: ParentToken<'i, 't, A>,
) -> impl Iterator<Item = ChildToken<'i, 't, A>> {
parent.into_tokens().rev().take_while(|token| {
token.as_ref().as_leaf().map_or(true, |leaf| {
if let Some(Boundary::Separator) = leaf.boundary() {
true
}
else {
let breadth: VarianceOf<Breadth> = leaf.term();
let text: VarianceOf<Text> = leaf.term();
breadth.is_unbounded() && text.is_unbounded()
}
})
})
}
}

impl<'t, A> Fold<'t, A> for TreeExhaustiveness {
type Sequencer = Self;
type Term = VarianceOf<Depth>;

fn sequencer() -> Self::Sequencer {
Self::default()
}

fn fold(&mut self, branch: &BranchKind<'t, A>, terms: Vec<Self::Term>) -> Option<Self::Term> {
// TODO: Detect generalizations in alternation branches. This may be possible in an
// optimization step that fold maps token trees and discards unnecessary branches.
// When folding terms into an alternation, if some but not all branches are exhaustive,
// then do not sum the terms and instead return the bounded depth [0,1]. This is necessary
// to prevent false positives when the sum of exhaustiveness terms for branches is
// exhaustive but a **non-overlapping** branch is non-exhaustive. Consider `{a/**,**/b}`.
// This pattern is nonexhaustive, because matches in `**/b` are not exhaustive and are not
// necessarily sub-trees of an exhaustive branch (in this example, the only such branch
// being `a/**`). Despite this, the terms exhaustiveness terms for the alternation are
// unbounded and zero. These terms sum to unbounded, which is a false positive.
//
// Note that this heuristic is also applied when all non-exhaustive branches overlap with
// an exhaustive branch (that is, all non-exhaustive branches are generalized by exhaustive
// branches), which causes false negatives. Consider `{/**,/**/a}`. The branch `/**`
// generalizes the remaining branches, so this pattern is exhaustive, but this heuristic
// rejects this. However, this false negative is far more acceptable than a false positive,
// which causes errant behavior.
if let BranchKind::Alternation(_) = branch {
let (all, any) = terms
.iter()
.fold_while((true, false), |sum, term| {
use itertools::FoldWhile::{Continue, Done};

let term = term.is_exhaustive();
match (sum.0 && term, sum.1 || term) {
sum @ (false, true) => Done(sum),
sum => Continue(sum),
}
})
.into_inner();
if !all && any {
return Some(VarianceOf::<Depth>::Variant(Bound::Bounded(
BoundedNonEmptyRange::Upper(unsafe { NonZeroUsize::new_unchecked(1) }),
)));
}
}

let n = terms.len();
let term = VarianceFold::fold(branch, terms);
if branch.tokens().into_inner().len() == n {
term
}
else {

Check failure on line 435 in src/token/variance/mod.rs

View workflow job for this annotation

GitHub Actions / Lint

this `else { if .. }` block can be collapsed
if term.as_ref().map_or(false, Variance::is_exhaustive) {
term
}
else {
Some(Variance::identity())
}
}
}

fn finalize(&mut self, branch: &BranchKind<'t, A>, term: Self::Term) -> Self::Term {
use Variance::{Invariant, Variant};

match branch {
branch @ BranchKind::Repetition(_) => match term {
// When folding terms into a repetition, only finalize variant terms and the
// multiplicative identity and annihilator (one and zero). This is necessary,
// because natural bounds do not express the subset nor relationship of matched
// values within the range. Consider `<*/*/>`. This pattern is unbounded w.r.t.
// depth, but only matches paths with a depth that is a multiple of two and so is
// nonexhaustive. However, the similar pattern `<*/>` is exhaustive and matches any
// sub-tree of a match.
Invariant(Depth(0)) | Invariant(Depth(1)) | Variant(_) => {
VarianceFold::finalize(branch, term)
},
_ => term,
},
branch => VarianceFold::finalize(branch, term),
}
}

fn term(&mut self, leaf: &LeafKind<'t>) -> Self::Term {
VarianceTerm::term(leaf)
}
}

// TODO: How do we sum invariant with unbounded...? The conjunction is bounded, but... what are the
// bounds? Is this a distinct case that requires a distinct representation or is there a
// "natural" way to construct a bound from an invariant in this case?
Expand Down Expand Up @@ -549,7 +665,7 @@ mod tests {
token.variance::<Size>(),
token.variance::<Text>(),
);
token.is_exhaustive()
token.is_exhaustive1()
}

assert!(is_exhaustive("**"));
Expand All @@ -568,6 +684,7 @@ mod tests {
assert!(is_exhaustive("<<?>/>*"));
assert!(is_exhaustive("<*{/}>"));
assert!(is_exhaustive("<*{/,/}>"));
assert!(is_exhaustive("<*{/,/**}>"));
assert!(is_exhaustive("<{<<{{a/b}}{{/**}}>>}>"));

assert!(!is_exhaustive(""));
Expand Down Expand Up @@ -610,11 +727,15 @@ mod tests {
// matches the second branch of the alternation, but not the first. This pattern does
// not match any sub-tree of this match other than more `b` components (the first
// branch will never be matched).
//assert!(!is_exhaustive("{a/**,**/b}")); // BROKEN.
assert!(!is_exhaustive("{a/**,**/b}")); // BROKEN.
assert!(!is_exhaustive("<{a/**,**/b}:1>")); // BROKEN.

// NOTE: This expression is exhaustive. However, determining this is tricky. One branch
// generalizes the other.
// TODO: This is considered exhaustive, but for the wrong reasons: see the case above.
// Detecting this properly involves understanding that one branch generalizes the
// other and so that other term can be discarded (only the more general term
// matters).
assert!(is_exhaustive("{/**,/**/a}"));
//assert!(is_exhaustive("{/**,/**/a}"));
}
}
69 changes: 66 additions & 3 deletions src/token/variance/ops.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,87 @@
use std::num::NonZeroUsize;

use crate::token::variance::Variance;

pub trait Conjunction<T = Self> {
type Output;

fn conjunction(self, rhs: T) -> Self::Output;
}

impl Conjunction for NonZeroUsize {
type Output = Self;

fn conjunction(self, rhs: Self) -> Self::Output {
self.checked_add(rhs.into())
.expect("overflow computing conjunction of unsigned word")
}
}

impl Conjunction for usize {
type Output = Self;

fn conjunction(self, rhs: Self) -> Self::Output {
self.checked_add(rhs)
.expect("overflow computing conjunction of unsigned word")
}
}

pub trait Disjunction<T = Self> {
type Output;

fn disjunction(self, rhs: T) -> Self::Output;
}

// TODO: This IS fundamental. Implement this over `Variance` and `usize`. That implementation
// yields `Invariant::identity` when given zero, and otherwise forwards a `NonZeroUsize` to
// the contents of the `Variance` term!
impl Disjunction for NonZeroUsize {
type Output = Variance<Self, ()>;

fn disjunction(self, rhs: Self) -> Self::Output {
if self.get() == rhs.into() {
Variance::Invariant(self)
}
else {
Variance::Variant(())
}
}
}

impl Disjunction for usize {
type Output = Variance<Self, ()>;

fn disjunction(self, rhs: Self) -> Self::Output {
if self == rhs {
Variance::Invariant(self)
}
else {
Variance::Variant(())
}
}
}

pub trait Product<T = Self> {
type Output;

fn product(self, rhs: T) -> Self::Output;
}

impl Product for NonZeroUsize {
type Output = Self;

fn product(self, rhs: Self) -> Self::Output {
self.checked_mul(rhs.into())
.expect("overflow computing product of unsigned word")
}
}

impl Product for usize {
type Output = Self;

fn product(self, rhs: Self) -> Self::Output {
self.checked_mul(rhs)
.expect("overflow computing product of unsigned word")
}
}

pub fn conjunction<T, U>(lhs: T, rhs: U) -> T::Output
where
T: Conjunction<U>,
Expand Down
Loading

0 comments on commit 885c876

Please sign in to comment.