From d5f20cf7bba70604bf61df95050ce56ecfd06e51 Mon Sep 17 00:00:00 2001 From: Sean Olson Date: Mon, 12 Feb 2024 16:29:38 -0800 Subject: [PATCH] Refactor bound types. This change renames "non-empty" bound types to "variant" bound types and generalizes `BoundedVariantRange`'s `union` and `extension` operations. This has a nice result of composing well with disjunction for naturals: there is no need to explicitly check for non-zero invariants. This arguably "fixes" the `Disjunction` implementation for `Variance` too, since the disjunction of bounded ranges ought to produce a potentially unbounded output. --- src/encode.rs | 1 + src/token/mod.rs | 20 +++ src/token/variance/bound.rs | 188 ++++++++++-------------- src/token/variance/invariant/mod.rs | 14 +- src/token/variance/invariant/natural.rs | 39 +++-- src/token/variance/invariant/text.rs | 18 +-- src/token/variance/mod.rs | 25 ++-- 7 files changed, 148 insertions(+), 157 deletions(-) diff --git a/src/encode.rs b/src/encode.rs index 6d99fa4..70842eb 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -155,6 +155,7 @@ where } // TODO: Implement this iteratively. +// TODO: Encode expressions using the HIR in `regex-syntax` rather than text. // TODO: Some versions of `const_format` in `^0.2.0` fail this lint in `formatcp`. See // https://github.com/rodrimati1992/const_format_crates/issues/38 #[allow(clippy::double_parens)] diff --git a/src/token/mod.rs b/src/token/mod.rs index 90d6e31..e7b2aaa 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -123,6 +123,26 @@ impl From for When { } } +impl From> for When { + fn from(when: Option) -> Self { + match when { + Some(true) => When::Always, + None => When::Sometimes, + Some(false) => When::Never, + } + } +} + +impl From for Option { + fn from(when: When) -> Self { + match when { + When::Always => Some(true), + When::Sometimes => None, + When::Never => Some(false), + } + } +} + #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Composition { Conjunctive(C), diff --git a/src/token/variance/bound.rs b/src/token/variance/bound.rs index 6c2ad4c..5933dfa 100644 --- a/src/token/variance/bound.rs +++ b/src/token/variance/bound.rs @@ -51,8 +51,23 @@ pub trait OpenedUpperBound: Sized { fn opened_upper_bound(self) -> Bound; } +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct Operand { + pub lhs: T, + pub rhs: T, +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct LowerUpper { + pub lower: L, + pub upper: U, +} + +pub type LowerUpperBound = LowerUpper, Upper>; +pub type LowerUpperOperand = LowerUpper>, Operand>>; + pub type NaturalBound = Variance; -pub type NaturalRange = Variance; +pub type NaturalRange = Variance; impl Conjunction for NaturalBound { type Output = Self; @@ -125,7 +140,7 @@ impl NaturalRange { }; match (lower, upper) { (0, None) => Variance::Variant(Bound::Unbounded), - (lower, upper) => BoundedNonEmptyRange::try_from_lower_and_upper(lower, upper) + (lower, upper) => BoundedVariantRange::try_from_lower_and_upper(lower, upper) .ok() .map(Bound::Bounded) .map_or_else(|| Variance::Invariant(lower), Variance::Variant), @@ -142,6 +157,24 @@ impl NaturalRange { Self::from_closed_and_open(lower.into_usize(), upper.into_usize()) } + fn by_lower_and_upper_with(self, rhs: Self, mut f: F) -> Self + where + F: FnMut(LowerUpperOperand) -> LowerUpperBound, + { + let lhs = self; + let LowerUpper { lower, upper } = f(LowerUpper { + lower: Operand { + lhs: lhs.lower(), + rhs: rhs.lower(), + }, + upper: Operand { + lhs: lhs.upper(), + rhs: rhs.upper(), + }, + }); + Self::from_closed_and_open(lower.into_usize(), upper.into_usize()) + } + pub fn lower(&self) -> Lower { match self { Variance::Invariant(ref n) => NaturalBound::from(*n).into_lower(), @@ -157,24 +190,24 @@ impl NaturalRange { } } -impl From for NaturalRange { - fn from(range: BoundedNonEmptyRange) -> Self { +impl From for NaturalRange { + fn from(range: BoundedVariantRange) -> Self { Variance::Variant(range.into()) } } -impl From for NaturalRange { - fn from(range: NonEmptyRange) -> Self { - Variance::Variant(range) - } -} - impl From for NaturalRange { fn from(n: usize) -> Self { Variance::Invariant(n) } } +impl From for NaturalRange { + fn from(range: VariantRange) -> Self { + Variance::Variant(range) + } +} + // NOTE: Given the naturals X and Y where X < Y, this defines an unconventional meaning for the // range [Y,X] and repetitions like `<_:10,1>`: the bounds are reordered, so `<_:10,1>` and // `<_:1,10>` are the same. @@ -215,7 +248,7 @@ pub enum Bound { } pub type NonZeroBound = Bound; -pub type NonEmptyRange = Bound; +pub type VariantRange = Bound; impl Bound { pub const BOUNDED: Self = Bound::Bounded(UnitBound); @@ -273,23 +306,21 @@ impl From for NonZeroBound { } } -impl NonEmptyRange { +impl VariantRange { pub fn lower(&self) -> Lower { - self.as_ref().bounded().map_or_else( - || Bound::Unbounded.into_lower(), - BoundedNonEmptyRange::lower, - ) + self.as_ref() + .bounded() + .map_or_else(|| Bound::Unbounded.into_lower(), BoundedVariantRange::lower) } pub fn upper(&self) -> Upper { - self.as_ref().bounded().map_or_else( - || Bound::Unbounded.into_upper(), - BoundedNonEmptyRange::upper, - ) + self.as_ref() + .bounded() + .map_or_else(|| Bound::Unbounded.into_upper(), BoundedVariantRange::upper) } } -impl Product for NonEmptyRange { +impl Product for VariantRange { type Output = Self; fn product(self, rhs: NonZeroUsize) -> Self::Output { @@ -460,29 +491,23 @@ impl NonZeroUpper { } } -// TODO: Consider using the term "variant" rather than "non-empty". -// TODO: This implementation uses a non-trivial amount of unsafe code. Try to reduce this as much -// as possible to as few constructors as possible or consider `expect`ing outputs instead. -// TODO: Especially considering the unsafe code, implement tests for this type. -// NOTE: By design, this cannot represent an invariant range, such as exactly three. This is what -// "empty" refers to here. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub enum BoundedNonEmptyRange { +pub enum BoundedVariantRange { Lower(NonZeroUsize), Upper(NonZeroUsize), Both { lower: NonZeroUsize, - // The extent must also not be zero, as this represents an invariant (empty) range. + // The extent must also be non-zero, as that represents an invariant range. extent: NonZeroUsize, }, } -impl BoundedNonEmptyRange { +impl BoundedVariantRange { pub fn try_from_lower_and_upper( lower: usize, upper: impl Into>, ) -> Result { - use BoundedNonEmptyRange::{Both, Lower, Upper}; + use BoundedVariantRange::{Both, Lower, Upper}; // SAFETY: The invariant that the value used to construct a `NonZeroUsize` is not zero is // explicitly checked here. @@ -500,56 +525,22 @@ impl BoundedNonEmptyRange { } } - unsafe fn from_non_empty_lower_and_upper(lower: NonZeroBound, upper: NonZeroBound) -> Self { - use Bound::{Bounded, Unbounded}; - use BoundedNonEmptyRange::{Lower, Upper}; - - match (lower, upper) { - (Bounded(lower), Bounded(upper)) => Self::from_bounded_lower_and_upper(lower, upper), - (Bounded(lower), Unbounded) => Lower(lower), - (Unbounded, Bounded(upper)) => Upper(upper), - (Unbounded, Unbounded) => unreachable!(), - } - } - - // SAFETY: `upper` must be greater than `lower` so that their difference is non-zero. - unsafe fn from_bounded_lower_and_upper(lower: NonZeroUsize, upper: NonZeroUsize) -> Self { - BoundedNonEmptyRange::Both { - lower, - extent: NonZeroUsize::new_unchecked(upper.get() - lower.get()), - } - } - - pub fn union(self, other: Self) -> Self { - let lower = cmp::min(self.lower(), other.lower()).into_bound(); - let upper = cmp::max(self.upper(), other.upper()).into_bound(); - // SAFETY: The difference between `upper` and `lower` can only become larger here and so - // cannot become zero. - unsafe { Self::from_non_empty_lower_and_upper(lower, upper) } - } - - pub fn extension(self, point: NonZeroUsize) -> Self { - use BoundedNonEmptyRange::{Both, Lower, Upper}; - - let upper = self.upper().into_bound().bounded(); - match self { - Both { lower, .. } => { - let lower = cmp::min(lower, point); - let upper = cmp::max(upper.unwrap(), point); - Both { - lower, - // SAFETY: The difference between `upper` and `lower` can only become larger - // here and so cannot become zero. - extent: unsafe { NonZeroUsize::new_unchecked(upper.get() - lower.get()) }, - } + pub fn union(self, other: impl Into) -> VariantRange { + match NaturalRange::by_lower_and_upper_with( + self.into(), + other.into(), + |LowerUpper { lower, upper }| LowerUpper { + lower: cmp::min(lower.lhs, lower.rhs), + upper: cmp::max(upper.lhs, upper.rhs), }, - Lower(lower) => Lower(cmp::min(lower, point)), - Upper(upper) => Lower(cmp::max(upper, point)), + ) { + Variance::Variant(range) => range, + _ => unreachable!(), } } pub fn translation(self, vector: usize) -> Self { - use BoundedNonEmptyRange::{Both, Lower, Upper}; + use BoundedVariantRange::{Both, Lower, Upper}; let expect_add = move |m: NonZeroUsize| { m.checked_add(vector) @@ -565,25 +556,8 @@ impl BoundedNonEmptyRange { } } - pub fn repeated(self, n: NonZeroUsize) -> Self { - use BoundedNonEmptyRange::{Both, Lower, Upper}; - - let expect_mul = move |m: NonZeroUsize| { - m.checked_mul(n) - .expect("overflow determining repetition of range") - }; - match self { - Both { lower, extent } => Both { - lower: expect_mul(lower), - extent: expect_mul(extent), - }, - Lower(lower) => Lower(expect_mul(lower)), - Upper(upper) => Lower(expect_mul(upper)), - } - } - - pub fn opened_lower_bound(self) -> NonEmptyRange { - use BoundedNonEmptyRange::{Both, Lower, Upper}; + pub fn opened_lower_bound(self) -> VariantRange { + use BoundedVariantRange::{Both, Lower, Upper}; match &self { Both { .. } => Upper( @@ -593,13 +567,13 @@ impl BoundedNonEmptyRange { .expect("closed range has no upper bound"), ) .into(), - Lower(_) => NonEmptyRange::Unbounded, + Lower(_) => VariantRange::Unbounded, _ => self.into(), } } pub fn lower(&self) -> Lower { - use BoundedNonEmptyRange::{Both, Lower, Upper}; + use BoundedVariantRange::{Both, Lower, Upper}; match self { Lower(ref lower) | Both { ref lower, .. } => Bound::Bounded(*lower), @@ -609,7 +583,7 @@ impl BoundedNonEmptyRange { } pub fn upper(&self) -> Upper { - use BoundedNonEmptyRange::{Both, Lower, Upper}; + use BoundedVariantRange::{Both, Lower, Upper}; match self { Both { @@ -629,7 +603,7 @@ impl BoundedNonEmptyRange { } } -impl Conjunction for BoundedNonEmptyRange { +impl Conjunction for BoundedVariantRange { type Output = Self; fn conjunction(self, rhs: Self) -> Self::Output { @@ -640,17 +614,17 @@ impl Conjunction for BoundedNonEmptyRange { } } -impl Disjunction for BoundedNonEmptyRange { - type Output = Self; +impl Disjunction for BoundedVariantRange { + type Output = VariantRange; fn disjunction(self, rhs: Self) -> Self::Output { self.union(rhs) } } -impl OpenedUpperBound for BoundedNonEmptyRange { +impl OpenedUpperBound for BoundedVariantRange { fn opened_upper_bound(self) -> Bound { - use BoundedNonEmptyRange::{Both, Lower, Upper}; + use BoundedVariantRange::{Both, Lower, Upper}; match &self { Both { .. } => Lower( @@ -660,14 +634,14 @@ impl OpenedUpperBound for BoundedNonEmptyRange { .expect("closed range has no lower bound"), ) .into(), - Upper(_) => NonEmptyRange::Unbounded, + Upper(_) => VariantRange::Unbounded, _ => self.into(), } } } -impl Product for BoundedNonEmptyRange { - type Output = NonEmptyRange; +impl Product for BoundedVariantRange { + type Output = VariantRange; fn product(self, rhs: Self) -> Self::Output { match NaturalRange::by_bound_with(self.into(), rhs.into(), ops::product) { @@ -677,7 +651,7 @@ impl Product for BoundedNonEmptyRange { } } -impl Product for BoundedNonEmptyRange { +impl Product for BoundedVariantRange { type Output = Self; fn product(self, rhs: NonZeroUsize) -> Self::Output { diff --git a/src/token/variance/invariant/mod.rs b/src/token/variance/invariant/mod.rs index 4258656..7cdedf3 100644 --- a/src/token/variance/invariant/mod.rs +++ b/src/token/variance/invariant/mod.rs @@ -3,7 +3,7 @@ mod text; use std::num::NonZeroUsize; -use crate::token::variance::bound::{Bound, BoundedNonEmptyRange, OpenedUpperBound}; +use crate::token::variance::bound::{Bound, BoundedVariantRange, OpenedUpperBound}; use crate::token::variance::ops::{Conjunction, Disjunction, Product}; use crate::token::variance::Variance; @@ -87,10 +87,10 @@ impl Conjunction for UnitBound { } impl Disjunction for UnitBound { - type Output = Self; + type Output = Bound; fn disjunction(self, _: Self) -> Self::Output { - self + self.into() } } @@ -100,8 +100,8 @@ impl From<()> for UnitBound { } } -impl From for UnitBound { - fn from(_: BoundedNonEmptyRange) -> Self { +impl From for UnitBound { + fn from(_: BoundedVariantRange) -> Self { UnitBound } } @@ -112,10 +112,10 @@ impl OpenedUpperBound for UnitBound { } } -impl Product for UnitBound { +impl Product for UnitBound { type Output = Bound; - fn product(self, _: BoundedNonEmptyRange) -> Self::Output { + fn product(self, _: BoundedVariantRange) -> Self::Output { self.into() } } diff --git a/src/token/variance/invariant/natural.rs b/src/token/variance/invariant/natural.rs index cc370ea..082ef9e 100644 --- a/src/token/variance/invariant/natural.rs +++ b/src/token/variance/invariant/natural.rs @@ -1,7 +1,7 @@ use std::cmp::Ordering; use std::num::NonZeroUsize; -use crate::token::variance::bound::{Bound, BoundedNonEmptyRange, NonEmptyRange}; +use crate::token::variance::bound::{Bound, BoundedVariantRange, VariantRange}; use crate::token::variance::invariant::{Identity, Invariant, VarianceOf}; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::Variance; @@ -71,7 +71,7 @@ macro_rules! impl_invariant_natural { } impl Invariant for $name { - type Bound = BoundedNonEmptyRange; + type Bound = BoundedVariantRange; fn once() -> Self { $name($once) @@ -79,13 +79,13 @@ macro_rules! impl_invariant_natural { fn bound(lhs: Self, rhs: Self) -> Bound { let (lower, upper) = self::minmax(lhs, rhs); - BoundedNonEmptyRange::try_from_lower_and_upper(lower.0, upper.0) + BoundedVariantRange::try_from_lower_and_upper(lower.0, upper.0) .map_or(Bound::Unbounded, Bound::Bounded) } fn into_lower_bound(self) -> Bound { NonZeroUsize::new(self.0) - .map(BoundedNonEmptyRange::Lower) + .map(BoundedVariantRange::Lower) .map(From::from) .unwrap_or(Bound::Unbounded) } @@ -97,10 +97,18 @@ macro_rules! impl_invariant_natural { } } - impl Product for $name { + impl Product for $name { + type Output = Self; + + fn product(self, rhs: usize) -> Self::Output { + self.map(|lhs| ops::product(lhs, rhs)) + } + } + + impl Product for $name { type Output = VarianceOf; - fn product(self, rhs: NonEmptyRange) -> Self::Output { + fn product(self, rhs: VariantRange) -> Self::Output { NonZeroUsize::new(self.0) .map_or_else( VarianceOf::::zero, @@ -109,15 +117,7 @@ macro_rules! impl_invariant_natural { } } - impl Product for $name { - type Output = Self; - - fn product(self, rhs: usize) -> Self::Output { - self.map(|lhs| ops::product(lhs, rhs)) - } - } - - impl Conjunction<$name> for BoundedNonEmptyRange { + impl Conjunction<$name> for BoundedVariantRange { type Output = Self; fn conjunction(self, rhs: $name) -> Self::Output { @@ -125,14 +125,11 @@ macro_rules! impl_invariant_natural { } } - impl Disjunction<$name> for BoundedNonEmptyRange { - type Output = NonEmptyRange; + impl Disjunction<$name> for BoundedVariantRange { + type Output = VariantRange; fn disjunction(self, rhs: $name) -> Self::Output { - match NonZeroUsize::new(rhs.0) { - Some(point) => self.extension(point).into(), - _ => self.opened_lower_bound(), - } + self.union(rhs.0) } } }; diff --git a/src/token/variance/invariant/text.rs b/src/token/variance/invariant/text.rs index e40da97..7b9ace8 100644 --- a/src/token/variance/invariant/text.rs +++ b/src/token/variance/invariant/text.rs @@ -3,7 +3,7 @@ use std::collections::VecDeque; use std::num::NonZeroUsize; use crate::encode; -use crate::token::variance::bound::{Bound, NonEmptyRange}; +use crate::token::variance::bound::{Bound, VariantRange}; use crate::token::variance::invariant::{Identity, Invariant, UnitBound, VarianceOf}; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::Variance; @@ -161,14 +161,6 @@ impl<'t> Invariant for Text<'t> { } } -impl<'t> Product for Text<'t> { - type Output = VarianceOf; - - fn product(self, rhs: NonEmptyRange) -> Self::Output { - Variance::Variant(rhs.map_bounded(|_| UnitBound)) - } -} - impl<'t> Product for Text<'t> { type Output = Self; @@ -177,6 +169,14 @@ impl<'t> Product for Text<'t> { } } +impl<'t> Product for Text<'t> { + type Output = VarianceOf; + + fn product(self, rhs: VariantRange) -> Self::Output { + Variance::Variant(rhs.map_bounded(|_| UnitBound)) + } +} + impl<'t> Disjunction> for UnitBound { type Output = Bound; diff --git a/src/token/variance/mod.rs b/src/token/variance/mod.rs index 61b266c..8bbc6b8 100644 --- a/src/token/variance/mod.rs +++ b/src/token/variance/mod.rs @@ -6,7 +6,7 @@ use itertools::Itertools; use std::marker::PhantomData; use std::num::NonZeroUsize; -use crate::token::variance::bound::{Bound, NaturalRange, NonEmptyRange, OpenedUpperBound}; +use crate::token::variance::bound::{Bound, NaturalRange, OpenedUpperBound, VariantRange}; use crate::token::variance::invariant::{ Breadth, Depth, Identity, Invariant, Text, UnitBound, VarianceOf, }; @@ -14,7 +14,7 @@ use crate::token::variance::ops::{Conjunction, Disjunction, Product}; use crate::token::walk::{ChildToken, Fold, Forward, ParentToken, Sequencer}; use crate::token::{Boundary, BranchKind, LeafKind}; -use self::bound::BoundedNonEmptyRange; +use self::bound::BoundedVariantRange; pub trait VarianceTerm where @@ -127,7 +127,7 @@ impl Variance> { } } -impl Variance { +impl Variance { pub fn has_upper_bound(&self) -> bool { self.as_ref() .variant() @@ -156,7 +156,6 @@ where }, // Both terms are unbounded. Conjunction is unbounded. (Variant(Unbounded), Variant(Unbounded)) => Variant(Unbounded), - // One term is bounded and the other is invariant. Conjunction is bounded over the sum // of the bound and the invariant. (Variant(Bounded(bound)), Invariant(invariant)) @@ -179,7 +178,7 @@ impl Disjunction for VarianceOf where Self: PartialEq, T: Invariant, - T::Bound: Disjunction + Disjunction>, + T::Bound: Disjunction> + Disjunction>, { type Output = Self; @@ -196,11 +195,8 @@ where (Invariant(lhs), Invariant(rhs)) => Variant(T::bound(lhs, rhs)), // A term is unbounded. Disjunction is unbounded. (Variant(Unbounded), _) | (_, Variant(Unbounded)) => Variant(Unbounded), - - // Both terms are bounded. Disjunction is bounded over the sum of the bounds. - (Variant(Bounded(lhs)), Variant(Bounded(rhs))) => { - Variant(Bounded(ops::disjunction(lhs, rhs))) - }, + // Both terms are bounded. Disjunction is variant over the sum of the bounds. + (Variant(Bounded(lhs)), Variant(Bounded(rhs))) => Variant(ops::disjunction(lhs, rhs)), // One term is bounded and the other is invariant. Disjunction is variant over the sum // (and bound) of the bound and the invariant. (Variant(Bounded(bound)), Invariant(invariant)) @@ -228,8 +224,8 @@ where impl Product for VarianceOf where Bound: Product>, - T: Invariant + Product> + Product, - T::Bound: Product>, + T: Invariant + Product> + Product, + T::Bound: Product>, { type Output = Self; @@ -353,7 +349,7 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { .into_inner(); if !all && any { return Some(VarianceOf::::Variant(Bound::Bounded( - BoundedNonEmptyRange::Upper(unsafe { NonZeroUsize::new_unchecked(1) }), + BoundedVariantRange::Upper(unsafe { NonZeroUsize::new_unchecked(1) }), ))); } } @@ -495,6 +491,8 @@ mod tests { assert!(is_exhaustive("a/<*/>*")); assert!(is_exhaustive("a/</>*")); assert!(is_exhaustive("{a/**,b/**}")); + assert!(is_exhaustive("{{a/**,b/**},c/**}")); + assert!(is_exhaustive("<{{a/**,b/**},c/**}:2,4>")); assert!(is_exhaustive("{a/**,b/**,[xyz]<*/>}")); assert!(is_exhaustive("</>")); assert!(is_exhaustive("</>*")); @@ -527,6 +525,7 @@ mod tests { // matched). assert!(!is_exhaustive("{a/**,**/b}")); assert!(!is_exhaustive("<{a/**,**/b}:1>")); + assert!(!is_exhaustive("{{{/**},**/b},c/**}")); // TODO: This expression is exhaustive. The `/**` branch generalizes the `/**/a` branch. // However, exhaustiveness applies a heuristic that rejects this (but importantly