diff --git a/src/diagnostics/mod.rs b/src/diagnostics/mod.rs index 84ddb96..80f1776 100644 --- a/src/diagnostics/mod.rs +++ b/src/diagnostics/mod.rs @@ -39,6 +39,16 @@ impl SpanExt for Span { } } +pub trait Spanned { + fn span(&self) -> &Span; +} + +impl Spanned for Span { + fn span(&self) -> &Span { + self + } +} + /// Error associated with a [`Span`] within a glob expression. /// /// Located errors describe specific instances of an error within a glob expression. Types that diff --git a/src/token/mod.rs b/src/token/mod.rs index f171abf..dfae2eb 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -6,42 +6,131 @@ use std::borrow::Cow; use std::cmp; use std::collections::VecDeque; use std::mem; -use std::ops::Deref; use std::path::{PathBuf, MAIN_SEPARATOR}; use std::slice; use std::str; +use crate::diagnostics::{Span, Spanned}; use crate::token::variance::{ CompositeBreadth, CompositeDepth, ConjunctiveVariance, DisjunctiveVariance, IntoInvariantText, - Invariance, UnitBreadth, UnitDepth, UnitVariance, + Invariant, UnitBreadth, UnitDepth, UnitVariance, }; use crate::{StrExt as _, PATHS_ARE_CASE_INSENSITIVE}; -pub use crate::token::parse::{parse, Annotation, ParseError, ROOT_SEPARATOR_EXPRESSION}; +pub use crate::token::parse::{parse, ParseError, ROOT_SEPARATOR_EXPRESSION}; pub use crate::token::variance::{ - invariant_text_prefix, is_exhaustive, Boundedness, InvariantSize, InvariantText, Variance, + invariant_text_prefix, is_exhaustive, Bound, InvariantSize, InvariantText, Variance, }; -pub trait TokenTree<'t>: Sized { +// TODO: Expression and glob trees differ only in their annotation data. This supports the +// distinction, but is inflexible and greatly limits any intermediate representation of a +// glob. Consider completely disjoint types for expression and glob trees, perhaps using an +// unintrusive tree data structure for both. + +pub type ExpressionToken<'t> = Token<'t, ExpressionMetadata>; +pub type ExpressionTree<'t> = Tokenized<'t, ExpressionMetadata>; + +pub type GlobToken<'t> = Token<'t, GlobMetadata>; +pub type GlobTree<'t> = Tokenized<'t, GlobMetadata>; + +pub type ExpressionMetadata = Span; + +// TODO: Describe the complete variance of the associated token. +#[derive(Clone, Copy, Debug)] +pub struct GlobMetadata { + pub span: Span, +} + +impl Spanned for GlobMetadata { + fn span(&self) -> &Span { + &self.span + } +} + +// TODO: This metadata has no spans, as the expressions are disjoint. However, it should probably +// include variance. +#[derive(Clone, Copy, Debug)] +pub struct AnyMetadata {} + +impl From for AnyMetadata { + fn from(metadata: GlobMetadata) -> Self { + AnyMetadata {} + } +} + +pub trait TokenTree<'t> { type Annotation; - fn into_tokens(self) -> Vec>; + fn into_token(self) -> Token<'t, Self::Annotation> + where + Self: Sized; + + fn as_token(&self) -> &Token<'t, Self::Annotation>; +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum When { + Always, + Sometimes, + Never, +} - fn tokens(&self) -> &[Token<'t, Self::Annotation>]; +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Junction { + Conjunctive(T), + Disjunctive(T), +} + +impl Junction { + pub fn into_inner(self) -> T { + match self { + Junction::Conjunctive(inner) | Junction::Disjunctive(inner) => inner, + } + } + + pub fn conjunctive(self) -> Option { + match self { + Junction::Conjunctive(inner) => Some(inner), + _ => None, + } + } + + pub fn disjunctive(self) -> Option { + match self { + Junction::Disjunctive(inner) => Some(inner), + _ => None, + } + } +} + +impl AsRef for Junction { + fn as_ref(&self) -> &T { + match self { + Junction::Conjunctive(ref inner) => inner, + Junction::Disjunctive(ref inner) => inner, + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Boundary { + Component, + Separator, } #[derive(Clone, Debug)] -pub struct Tokenized<'t, A = Annotation> { +pub struct Tokenized<'t, A> { expression: Cow<'t, str>, - tokens: Vec>, + token: Token<'t, A>, } +// TODO: Remove as much as possible from these APIs and instead rely on a rich `Token` API instead. impl<'t, A> Tokenized<'t, A> { pub fn into_owned(self) -> Tokenized<'static, A> { - let Tokenized { expression, tokens } = self; + let Tokenized { expression, token } = self; Tokenized { expression: expression.into_owned().into(), - tokens: tokens.into_iter().map(Token::into_owned).collect(), + token: token.into_owned(), } } @@ -51,7 +140,7 @@ impl<'t, A> Tokenized<'t, A> { pub fn variance(&self) -> Variance where - T: Invariance, + T: Invariant, for<'i> &'i Token<'t, A>: UnitVariance, { self.tokens().iter().conjunctive_variance() @@ -62,7 +151,10 @@ impl<'t, A> Tokenized<'t, A> { } } -impl<'t> Tokenized<'t, Annotation> { +impl<'t, A> Tokenized<'t, A> +where + A: Spanned, +{ pub fn partition(self) -> (PathBuf, Self) { fn pop_expression_bytes(expression: &str, n: usize) -> &str { let n = cmp::min(expression.len(), n); @@ -81,7 +173,7 @@ impl<'t> Tokenized<'t, Annotation> { let mut offset: usize = tokens .iter() .take(n) - .map(|token| token.annotation().1) + .map(|token| *token.annotation().span()) .sum(); // Drain invariant tokens from the beginning of the token sequence and unroot any tokens at @@ -114,76 +206,218 @@ impl<'t> Tokenized<'t, Annotation> { impl<'t, A> TokenTree<'t> for Tokenized<'t, A> { type Annotation = A; - fn into_tokens(self) -> Vec> { - let Tokenized { tokens, .. } = self; - tokens + fn into_token(self) -> Token<'t, Self::Annotation> { + let Tokenized { token, .. } = self; + token } - fn tokens(&self) -> &[Token<'t, Self::Annotation>] { - &self.tokens + fn as_token(&self) -> &Token<'t, Self::Annotation> { + &self.token } } +pub trait Fold<'t, A> { + type Term; + + fn initialize(&mut self, _branch: &BranchKind<'t, A>) -> Option { + None + } + + fn accumulate( + &mut self, + branch: &BranchKind<'t, A>, + accumulator: Self::Term, + term: Self::Term, + ) -> Self::Term; + + fn finalize(&mut self, _branch: &BranchKind<'t, A>, accumulator: Self::Term) -> Self::Term { + accumulator + } + + fn term(&mut self, leaf: &LeafKind<'t>) -> Self::Term; +} + +pub trait FoldMap<'t, A> { + type Annotation; + + fn fold( + &mut self, + annotation: A, + branch: BranchKind<'t, A>, + tokens: Vec>, + ) -> Option>; + + fn map(&mut self, annotation: A, leaf: LeafKind<'t>) -> Option>; +} + #[derive(Clone, Debug)] -pub struct Token<'t, A = Annotation> { - kind: TokenKind<'t, A>, +pub struct Token<'t, A> { + topology: TokenTopology<'t, A>, annotation: A, } impl<'t, A> Token<'t, A> { - fn new(kind: TokenKind<'t, A>, annotation: A) -> Self { - Token { kind, annotation } - } - - pub fn into_owned(self) -> Token<'static, A> { - let Token { kind, annotation } = self; + fn new(topology: impl Into>, annotation: A) -> Self { Token { - kind: kind.into_owned(), + topology: topology.into(), annotation, } } - pub fn unannotate(self) -> Token<'t, ()> { - let Token { kind, .. } = self; + pub fn into_owned(self) -> Token<'static, A> { + let Token { + topology, + annotation, + } = self; Token { - kind: kind.unannotate(), - annotation: (), + topology: topology.into_owned(), + annotation, } } pub fn unroot(&mut self) -> bool { - self.kind.unroot() + self.as_leaf_mut().map_or(false, LeafKind::unroot) } - pub fn kind(&self) -> &TokenKind<'t, A> { - self.as_ref() + pub fn fold(&self, f: F) -> Option + where + F: Fold<'t, A>, + { + todo!() } - pub fn annotation(&self) -> &A { - self.as_ref() + pub fn fold_map(self, f: F) -> Token<'t, F::Annotation> + where + F: FoldMap<'t, A>, + { + todo!() } pub fn walk(&self) -> Walk<'_, 't, A> { Walk::from(self) } + pub fn tokens(&self) -> Option> { + self.as_branch().map(BranchKind::tokens) + } + + pub fn conjunction(&self) -> &[Self] { + if let Some(Junction::Conjunctive(tokens)) = self.tokens() { + tokens + } + else { + slice::from_ref(self) + } + } + + pub fn topology(&self) -> &TokenTopology<'t, A> { + &self.topology + } + + pub fn annotation(&self) -> &A { + &self.annotation + } + + pub fn boundary(&self) -> Option { + self.as_leaf().and_then(LeafKind::boundary) + } + + pub fn variance(&self) -> Variance + where + T: Invariant, + { + todo!() + } + pub fn has_root(&self) -> bool { self.walk().starting().any(|(_, token)| { - matches!( - token.kind(), - TokenKind::Separator(_) | TokenKind::Wildcard(Wildcard::Tree { has_root: true }), - ) + token.as_leaf().map_or(false, |leaf| { + matches!( + leaf, + LeafKind::Separator(_) | LeafKind::Wildcard(Wildcard::Tree { has_root: true }), + ) + }) }) } - pub fn has_component_boundary(&self) -> bool { - self.walk().any(|(_, token)| token.is_component_boundary()) + pub fn has_boundary(&self) -> bool { + self.walk().any(|(_, token)| token.boundary().is_some()) + } + + pub fn is_capturing(&self) -> bool { + match self.topology { + TokenTopology::Branch(ref branch) => branch.is_capturing(), + TokenTopology::Leaf(ref leaf) => leaf.is_capturing(), + } + } + + pub fn is_exhaustive(&self) -> bool { + todo!() } } -impl<'t, A> AsRef> for Token<'t, A> { - fn as_ref(&self) -> &TokenKind<'t, A> { - &self.kind +impl<'t, A> Token<'t, A> { + pub fn as_branch(&self) -> Option<&BranchKind<'t, A>> { + match self.topology { + TokenTopology::Branch(ref branch) => Some(branch), + _ => None, + } + } + + pub fn as_leaf(&self) -> Option<&LeafKind<'t>> { + match self.topology { + TokenTopology::Leaf(ref leaf) => Some(leaf), + _ => None, + } + } + + fn as_leaf_mut(&mut self) -> Option<&mut LeafKind<'t>> { + match self.topology { + TokenTopology::Leaf(ref mut leaf) => Some(leaf), + _ => None, + } + } + + pub fn as_alternation(&self) -> Option<&Alternation<'t, A>> { + self.as_branch().and_then(|branch| match branch { + BranchKind::Alternation(ref alternation) => Some(alternation), + _ => None, + }) + } + + pub fn as_class(&self) -> Option<&Class> { + self.as_leaf().and_then(|leaf| match leaf { + LeafKind::Class(ref class) => Some(class), + _ => None, + }) + } + + pub fn as_concatenation(&self) -> Option<&Concatenation<'t, A>> { + self.as_branch().and_then(|branch| match branch { + BranchKind::Concatenation(ref concatenation) => Some(concatenation), + _ => None, + }) + } + + pub fn as_literal(&self) -> Option<&Literal<'t>> { + self.as_leaf().and_then(|leaf| match leaf { + LeafKind::Literal(ref literal) => Some(literal), + _ => None, + }) + } + + pub fn as_repetition(&self) -> Option<&Repetition<'t, A>> { + self.as_branch().and_then(|branch| match branch { + BranchKind::Repetition(ref repetition) => Some(repetition), + _ => None, + }) + } + + pub fn as_wildcard(&self) -> Option<&Wildcard> { + self.as_leaf().and_then(|leaf| match leaf { + LeafKind::Wildcard(ref wildcard) => Some(wildcard), + _ => None, + }) } } @@ -193,19 +427,14 @@ impl<'t, A> AsRef for Token<'t, A> { } } -impl<'t, A> Deref for Token<'t, A> { - type Target = TokenKind<'t, A>; - - fn deref(&self) -> &Self::Target { - self.as_ref() - } -} - -impl<'t> From> for Token<'t, ()> { - fn from(kind: TokenKind<'t, ()>) -> Self { +impl<'t, A> From> for Token<'t, A> +where + A: Default, +{ + fn from(topology: TokenTopology<'t, A>) -> Self { Token { - kind, - annotation: (), + topology, + annotation: A::default(), } } } @@ -213,261 +442,228 @@ impl<'t> From> for Token<'t, ()> { impl<'t, A> TokenTree<'t> for Token<'t, A> { type Annotation = A; - fn into_tokens(self) -> Vec> { - vec![self] + fn into_token(self) -> Token<'t, Self::Annotation> { + self } - fn tokens(&self) -> &[Token<'t, Self::Annotation>] { - slice::from_ref(self) + fn as_token(&self) -> &Token<'t, Self::Annotation> { + self } } -impl<'i, 't, A> UnitBreadth for &'i Token<'t, A> { - fn unit_breadth(self) -> Boundedness { - self.kind.unit_breadth() +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Topology { + Branch(B), + Leaf(L), +} + +pub type TokenTopology<'t, A> = Topology, LeafKind<'t>>; + +impl<'t, A> TokenTopology<'t, A> { + pub fn into_owned(self) -> TokenTopology<'static, A> { + match self { + TokenTopology::Branch(kind) => TokenTopology::Branch(kind.into_owned()), + TokenTopology::Leaf(kind) => TokenTopology::Leaf(kind.into_owned()), + } } } -impl<'i, 't, A> UnitDepth for &'i Token<'t, A> { - fn unit_depth(self) -> Boundedness { - self.kind.unit_depth() +impl<'t, A> From> for TokenTopology<'t, A> { + fn from(kind: BranchKind<'t, A>) -> Self { + TokenTopology::Branch(kind) } } -impl<'i, 't, A, T> UnitVariance for &'i Token<'t, A> -where - &'i TokenKind<'t, A>: UnitVariance, - T: Invariance, -{ - fn unit_variance(self) -> Variance { - self.kind.unit_variance() +impl<'t, A> From> for TokenTopology<'t, A> { + fn from(kind: LeafKind<'t>) -> Self { + TokenTopology::Leaf(kind) } } #[derive(Clone, Debug)] -pub enum TokenKind<'t, A = ()> { - Alternative(Alternative<'t, A>), - Class(Class), - Literal(Literal<'t>), +pub enum BranchKind<'t, A> { + Alternation(Alternation<'t, A>), + Concatenation(Concatenation<'t, A>), Repetition(Repetition<'t, A>), - Separator(Separator), - Wildcard(Wildcard), } -impl<'t, A> TokenKind<'t, A> { - pub fn into_owned(self) -> TokenKind<'static, A> { +impl<'t, A> BranchKind<'t, A> { + pub fn into_owned(self) -> BranchKind<'static, A> { match self { - TokenKind::Alternative(alternative) => alternative.into_owned().into(), - TokenKind::Class(class) => TokenKind::Class(class), - TokenKind::Literal(Literal { - text, - is_case_insensitive, - }) => TokenKind::Literal(Literal { - text: text.into_owned().into(), - is_case_insensitive, - }), - TokenKind::Repetition(repetition) => repetition.into_owned().into(), - TokenKind::Separator(_) => TokenKind::Separator(Separator), - TokenKind::Wildcard(wildcard) => TokenKind::Wildcard(wildcard), + BranchKind::Alternation(alternative) => alternative.into_owned().into(), + BranchKind::Concatenation(concatenation) => concatenation.into_owned().into(), + BranchKind::Repetition(repetition) => repetition.into_owned().into(), } } - pub fn unannotate(self) -> TokenKind<'t, ()> { - match self { - TokenKind::Alternative(alternative) => TokenKind::Alternative(alternative.unannotate()), - TokenKind::Class(class) => TokenKind::Class(class), - TokenKind::Literal(literal) => TokenKind::Literal(literal), - TokenKind::Repetition(repetition) => TokenKind::Repetition(repetition.unannotate()), - TokenKind::Separator(_) => TokenKind::Separator(Separator), - TokenKind::Wildcard(wildcard) => TokenKind::Wildcard(wildcard), - } - } + pub fn tokens(&self) -> Junction<&[Token<'t, A>]> { + use BranchKind::{Alternation, Concatenation, Repetition}; + use Junction::{Conjunctive, Disjunctive}; - pub fn unroot(&mut self) -> bool { match self { - TokenKind::Wildcard(Wildcard::Tree { ref mut has_root }) => { - mem::replace(has_root, false) - }, - _ => false, + Alternation(alternative) => Disjunctive(alternative.tokens()), + Concatenation(concatenation) => Conjunctive(concatenation.tokens()), + Repetition(repetition) => Conjunctive(repetition.tokens()), } } - pub fn variance(&self) -> Variance - where - T: Invariance, - for<'i> &'i TokenKind<'t, A>: UnitVariance, - { - self.unit_variance() - } - - pub fn has_sub_tokens(&self) -> bool { - // It is not necessary to detect empty branches or sub-expressions. - matches!(self, TokenKind::Alternative(_) | TokenKind::Repetition(_)) - } - - pub fn is_component_boundary(&self) -> bool { - matches!( - self, - TokenKind::Separator(_) | TokenKind::Wildcard(Wildcard::Tree { .. }) - ) - } - pub fn is_capturing(&self) -> bool { - use TokenKind::{Alternative, Class, Repetition, Wildcard}; - - matches!( - self, - Alternative(_) | Class(_) | Repetition(_) | Wildcard(_), - ) + matches!(self, BranchKind::Alternation(_) | BranchKind::Repetition(_)) } } -impl<'t, A> From> for TokenKind<'t, A> { - fn from(alternative: Alternative<'t, A>) -> Self { - TokenKind::Alternative(alternative) +impl<'t, A> From> for BranchKind<'t, A> { + fn from(alternative: Alternation<'t, A>) -> Self { + BranchKind::Alternation(alternative) } } -impl From for TokenKind<'_, A> { - fn from(class: Class) -> Self { - TokenKind::Class(class) +impl<'t, A> From> for BranchKind<'t, A> { + fn from(concatenation: Concatenation<'t, A>) -> Self { + BranchKind::Concatenation(concatenation) } } -impl<'t, A> From> for TokenKind<'t, A> { +impl<'t, A> From> for BranchKind<'t, A> { fn from(repetition: Repetition<'t, A>) -> Self { - TokenKind::Repetition(repetition) + BranchKind::Repetition(repetition) } } -impl From for TokenKind<'static, A> { - fn from(wildcard: Wildcard) -> Self { - TokenKind::Wildcard(wildcard) - } +#[derive(Clone, Debug)] +pub enum LeafKind<'t> { + Class(Class), + Literal(Literal<'t>), + Separator(Separator), + Wildcard(Wildcard), } -impl<'i, 't, A> UnitBreadth for &'i TokenKind<'t, A> { - fn unit_breadth(self) -> Boundedness { +impl<'t> LeafKind<'t> { + pub fn into_owned(self) -> LeafKind<'static> { match self { - TokenKind::Alternative(ref alternative) => alternative.unit_breadth(), - TokenKind::Class(ref class) => class.unit_breadth(), - TokenKind::Literal(ref literal) => literal.unit_breadth(), - TokenKind::Repetition(ref repetition) => repetition.unit_breadth(), - TokenKind::Separator(ref separator) => separator.unit_breadth(), - TokenKind::Wildcard(ref wildcard) => wildcard.unit_breadth(), + LeafKind::Class(class) => LeafKind::Class(class), + LeafKind::Literal(Literal { + text, + is_case_insensitive, + }) => LeafKind::Literal(Literal { + text: text.into_owned().into(), + is_case_insensitive, + }), + LeafKind::Separator(_) => LeafKind::Separator(Separator), + LeafKind::Wildcard(wildcard) => LeafKind::Wildcard(wildcard), } } -} -impl<'i, 't, A> UnitDepth for &'i TokenKind<'t, A> { - fn unit_depth(self) -> Boundedness { + pub fn unroot(&mut self) -> bool { match self { - TokenKind::Alternative(ref alternative) => alternative.unit_depth(), - TokenKind::Class(ref class) => class.unit_depth(), - TokenKind::Literal(ref literal) => literal.unit_depth(), - TokenKind::Repetition(ref repetition) => repetition.unit_depth(), - TokenKind::Separator(ref separator) => separator.unit_depth(), - TokenKind::Wildcard(ref wildcard) => wildcard.unit_depth(), + LeafKind::Wildcard(ref mut wildcard) => wildcard.unroot(), + _ => false, } } -} -impl<'i, 't, A, T> UnitVariance for &'i TokenKind<'t, A> -where - &'i Class: UnitVariance, - &'i Literal<'t>: UnitVariance, - &'i Separator: UnitVariance, - T: Invariance, -{ - fn unit_variance(self) -> Variance { + pub fn boundary(&self) -> Option { match self { - TokenKind::Alternative(ref alternative) => alternative.unit_variance(), - TokenKind::Class(ref class) => class.unit_variance(), - TokenKind::Literal(ref literal) => literal.unit_variance(), - TokenKind::Repetition(ref repetition) => repetition.unit_variance(), - TokenKind::Separator(ref separator) => separator.unit_variance(), - TokenKind::Wildcard(_) => Variance::Variant(Boundedness::Open), + LeafKind::Separator(_) => Some(Boundary::Separator), + LeafKind::Wildcard(Wildcard::ZeroOrMore(_)) => Some(Boundary::Component), + _ => None, } } -} -#[derive(Clone, Debug)] -pub struct Alternative<'t, A = ()>(Vec>>); - -impl<'t, A> Alternative<'t, A> { - pub fn into_owned(self) -> Alternative<'static, A> { - Alternative( - self.0 - .into_iter() - .map(|tokens| tokens.into_iter().map(Token::into_owned).collect()) - .collect(), - ) + pub fn is_capturing(&self) -> bool { + matches!(self, LeafKind::Class(_) | LeafKind::Wildcard(_)) } +} - pub fn unannotate(self) -> Alternative<'t, ()> { - let Alternative(branches) = self; - Alternative( - branches - .into_iter() - .map(|branch| branch.into_iter().map(Token::unannotate).collect()) - .collect(), - ) +impl From for LeafKind<'static> { + fn from(class: Class) -> Self { + LeafKind::Class(class) } +} - pub fn branches(&self) -> &Vec>> { - &self.0 +impl<'t> From> for LeafKind<'static> { + fn from(literal: Literal<'t>) -> Self { + LeafKind::Literal(literal) } } -impl<'t, A> From>>> for Alternative<'t, A> { - fn from(alternatives: Vec>>) -> Self { - Alternative(alternatives) +impl From for LeafKind<'static> { + fn from(separator: Separator) -> Self { + LeafKind::Separator(separator) } } -impl<'i, 't, A> UnitBreadth for &'i Alternative<'t, A> { - fn unit_breadth(self) -> Boundedness { - self.branches() - .iter() - .map(|tokens| tokens.iter().composite_breadth()) - .composite_breadth() +impl From for LeafKind<'static> { + fn from(wildcard: Wildcard) -> Self { + LeafKind::Wildcard(wildcard) } } -impl<'i, 't, A> UnitDepth for &'i Alternative<'t, A> { - fn unit_depth(self) -> Boundedness { - self.branches() - .iter() - .map(|tokens| tokens.iter().composite_depth()) - .composite_depth() +#[derive(Clone, Debug)] +pub struct Alternation<'t, A>(Vec>); + +impl<'t, A> Alternation<'t, A> { + pub fn into_owned(self) -> Alternation<'static, A> { + Alternation(self.0.into_iter().map(Token::into_owned).collect()) } -} -impl<'i, 't, A, T> UnitVariance for &'i Alternative<'t, A> -where - T: Invariance, - &'i Token<'t, A>: UnitVariance, -{ - fn unit_variance(self) -> Variance { - self.branches() - .iter() - .map(|tokens| tokens.iter().conjunctive_variance()) - .disjunctive_variance() + pub fn tokens(&self) -> &[Token<'t, A>] { + &self.0 } } +impl<'t, A> From>> for Alternation<'t, A> { + fn from(tokens: Vec>) -> Self { + Alternation(tokens) + } +} + +//impl<'i, 't, A> UnitBreadth for &'i Alternation<'t, A> { +// fn unit_breadth(self) -> Bound { +// self.branches() +// .iter() +// .map(|tokens| tokens.iter().composite_breadth()) +// .composite_breadth() +// } +//} +// +//impl<'i, 't, A> UnitDepth for &'i Alternation<'t, A> { +// fn unit_depth(self) -> Bound { +// self.branches() +// .iter() +// .map(|tokens| tokens.iter().composite_depth()) +// .composite_depth() +// } +//} +// +//impl<'i, 't, A, T> UnitVariance for &'i Alternation<'t, A> +//where +// T: Invariance, +// &'i Token<'t, A>: UnitVariance, +//{ +// fn unit_variance(self) -> Variance { +// self.branches() +// .iter() +// .map(|tokens| tokens.iter().conjunctive_variance()) +// .disjunctive_variance() +// } +//} + #[derive(Clone, Copy, Debug)] pub enum Archetype { Character(char), + // TODO: A range archetype spans Unicode code points. This should be clearly documented and + // should elegantly handle Unicode arguments that cannot be represented this way. For + // example, what happens if a user specifies a range between two grapheme clusters that + // each require more than one 32-bit code point? Range(char, char), } impl Archetype { - fn domain_variance(&self) -> Variance { + fn variance(&self) -> Variance { match self { Archetype::Character(x) => { if PATHS_ARE_CASE_INSENSITIVE { - Variance::Variant(Boundedness::Closed) + Variance::Variant(Bound::Bounded) } else { Variance::Invariant(*x) @@ -475,7 +671,7 @@ impl Archetype { }, Archetype::Range(a, b) => { if (a != b) || PATHS_ARE_CASE_INSENSITIVE { - Variance::Variant(Boundedness::Closed) + Variance::Variant(Bound::Bounded) } else { Variance::Invariant(*a) @@ -499,7 +695,7 @@ impl From<(char, char)> for Archetype { impl<'i, 't> UnitVariance> for &'i Archetype { fn unit_variance(self) -> Variance> { - self.domain_variance() + self.variance() .map_invariance(|invariance| invariance.to_string().into_nominal_text()) } } @@ -509,7 +705,7 @@ impl<'i> UnitVariance for &'i Archetype { // This is pessimistic and assumes that the code point will require four bytes when encoded // as UTF-8. This is technically possible, but most commonly only one or two bytes will be // required. - self.domain_variance().map_invariance(|_| 4.into()) + self.variance().map_invariance(|_| 4.into()) } } @@ -536,14 +732,14 @@ impl<'i> UnitDepth for &'i Class {} impl<'i, T> UnitVariance for &'i Class where &'i Archetype: UnitVariance, - T: Invariance, + T: Invariant, { fn unit_variance(self) -> Variance { if self.is_negated { // It is not feasible to encode a character class that matches all UTF-8 text and // therefore nothing when negated, and so a character class must be variant if it is // negated. - Variance::Variant(Boundedness::Closed) + Variance::Variant(Bound::Bounded) } else { // TODO: This ignores casing groups, such as in the pattern `[aA]`. @@ -552,6 +748,25 @@ where } } +#[derive(Clone, Debug)] +pub struct Concatenation<'t, A>(Vec>); + +impl<'t, A> Concatenation<'t, A> { + pub fn into_owned(self) -> Concatenation<'static, A> { + Concatenation(self.0.into_iter().map(Token::into_owned).collect()) + } + + pub fn tokens(&self) -> &[Token<'t, A>] { + &self.0 + } +} + +impl<'t, A> From>> for Concatenation<'t, A> { + fn from(tokens: Vec>) -> Self { + Concatenation(tokens) + } +} + #[derive(Clone, Copy, Debug)] pub enum Evaluation { Eager, @@ -569,9 +784,9 @@ impl<'t> Literal<'t> { self.text.as_ref() } - fn domain_variance(&self) -> Variance<&Cow<'t, str>> { + fn variance(&self) -> Variance<&Cow<'t, str>> { if self.has_variant_casing() { - Variance::Variant(Boundedness::Closed) + Variance::Variant(Bound::Bounded) } else { Variance::Invariant(&self.text) @@ -595,20 +810,20 @@ impl<'i, 't> UnitDepth for &'i Literal<'t> {} impl<'i, 't> UnitVariance> for &'i Literal<'t> { fn unit_variance(self) -> Variance> { - self.domain_variance() + self.variance() .map_invariance(|invariance| invariance.clone().into_nominal_text()) } } impl<'i, 't> UnitVariance for &'i Literal<'t> { fn unit_variance(self) -> Variance { - self.domain_variance() + self.variance() .map_invariance(|invariance| invariance.as_bytes().len().into()) } } #[derive(Clone, Debug)] -pub struct Repetition<'t, A = ()> { +pub struct Repetition<'t, A> { tokens: Vec>, lower: usize, // This representation is not ideal, as it does not statically enforce the invariant that the @@ -633,20 +848,7 @@ impl<'t, A> Repetition<'t, A> { } } - pub fn unannotate(self) -> Repetition<'t, ()> { - let Repetition { - tokens, - lower, - upper, - } = self; - Repetition { - tokens: tokens.into_iter().map(Token::unannotate).collect(), - lower, - upper, - } - } - - pub fn tokens(&self) -> &Vec> { + pub fn tokens(&self) -> &[Token<'t, A>] { &self.tokens } @@ -657,37 +859,33 @@ impl<'t, A> Repetition<'t, A> { pub fn is_converged(&self) -> bool { self.upper.map_or(false, |upper| self.lower == upper) } - - fn walk(&self) -> Walk<'_, 't, A> { - Walk::from(&self.tokens) - } } impl<'i, 't, A> UnitBreadth for &'i Repetition<'t, A> { - fn unit_breadth(self) -> Boundedness { + fn unit_breadth(self) -> Bound { self.tokens().iter().composite_breadth() } } impl<'i, 't, A> UnitDepth for &'i Repetition<'t, A> { - fn unit_depth(self) -> Boundedness { + fn unit_depth(self) -> Bound { let (_, upper) = self.bounds(); if upper.is_none() && self.walk().any(|(_, token)| token.is_component_boundary()) { - Boundedness::Open + Bound::Unbounded } else { - Boundedness::Closed + Bound::Bounded } } } impl<'i, 't, A, T> UnitVariance for &'i Repetition<'t, A> where - T: Invariance, + T: Invariant, &'i Token<'t, A>: UnitVariance, { fn unit_variance(self) -> Variance { - use Boundedness::Open; + use Bound::Unbounded; use TokenKind::Separator; use Variance::Variant; @@ -703,8 +901,8 @@ where (left.kind(), left.unit_variance()), (right.kind(), right.unit_variance()), ) { - ((Separator(_), _), (_, Variant(Open))) => Ok(right), - ((_, Variant(Open)), (Separator(_), _)) => Ok(left), + ((Separator(_), _), (_, Variant(Unbounded))) => Ok(right), + ((_, Variant(Unbounded)), (Separator(_), _)) => Ok(left), _ => Err((left, right)), } }) @@ -719,7 +917,7 @@ where Some(_) if self.is_converged() => { variance.map_invariance(|invariance| invariance * self.lower) }, - _ => variance + Variant(Open), + _ => variance + Variant(Unbounded), } } } @@ -756,25 +954,34 @@ pub enum Wildcard { Tree { has_root: bool }, } +impl Wildcard { + pub fn unroot(&mut self) -> bool { + match self { + Wildcard::Tree { ref mut has_root } => mem::replace(has_root, false), + _ => false, + } + } +} + impl<'i> UnitBreadth for &'i Wildcard { - fn unit_breadth(self) -> Boundedness { + fn unit_breadth(self) -> Bound { match self { - Wildcard::One => Boundedness::Closed, - _ => Boundedness::Open, + Wildcard::One => Bound::Bounded, + _ => Bound::Unbounded, } } } impl<'i> UnitDepth for &'i Wildcard { - fn unit_depth(self) -> Boundedness { + fn unit_depth(self) -> Bound { match self { - Wildcard::Tree { .. } => Boundedness::Open, - _ => Boundedness::Closed, + Wildcard::Tree { .. } => Bound::Unbounded, + _ => Bound::Bounded, } } } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum Position { Conjunctive { depth: usize }, Disjunctive { depth: usize, branch: usize }, @@ -883,27 +1090,23 @@ where fn next(&mut self) -> Option { if let Some((position, token)) = self.buffer.pop_front() { - match token.kind() { - TokenKind::Alternative(ref alternative) => { - self.buffer - .extend(alternative.branches().iter().enumerate().flat_map( - |(branch, tokens)| { - tokens - .iter() - .map(move |token| (position.diverge(branch), token)) - }, - )); - }, - TokenKind::Repetition(ref repetition) => { - self.buffer.extend( - repetition - .tokens() - .iter() - .map(|token| (position.converge(), token)), - ); - }, + match token.tokens() { + Some(Junction::Conjunctive(tokens)) => self + .buffer + .extend(tokens.iter().map(|token| (position.converge(), token))), + // TODO: Previously, this pushed a divergent position **for each token in an + // implicitly concatenated branch**. This code does this for each token + // within an alternation, but these are (almost?) always themselves + // concatenations now. The tokens within such a concatenation will be + // conjunctive. Some code may break due to bad assumptions here. + Some(Junction::Disjunctive(tokens)) => self.buffer.extend( + tokens + .iter() + .enumerate() + .map(|(branch, token)| (position.diverge(branch), token)), + ), _ => {}, - } + }; Some((position, token)) } else { @@ -949,7 +1152,7 @@ pub struct Component<'i, 't, A = ()>(Vec<&'i Token<'t, A>>); impl<'i, 't, A> Component<'i, 't, A> { pub fn tokens(&self) -> &[&'i Token<'t, A>] { - self.0.as_ref() + &self.0 } pub fn literal(&self) -> Option> { @@ -976,13 +1179,13 @@ impl<'i, 't, A> Component<'i, 't, A> { pub fn variance(&self) -> Variance where - T: Invariance, + T: Invariant, &'i Token<'t, A>: UnitVariance, { self.0.iter().copied().conjunctive_variance() } - pub fn depth(&self) -> Boundedness { + pub fn depth(&self) -> Bound { self.0.iter().copied().composite_depth() } } @@ -999,7 +1202,7 @@ where I::Item: IntoIterator>, { Token { - kind: Alternative( + topology: Alternation( tokens .into_iter() .map(|tokens| tokens.into_iter().map(Token::unannotate).collect()) @@ -1069,6 +1272,8 @@ where }) } +// TODO: Test against token trees constructed directly from tokens (rather than parsed from +// expressions). #[cfg(test)] mod tests { use crate::token::{self, TokenKind, TokenTree}; @@ -1079,7 +1284,7 @@ mod tests { let literals: Vec<_> = tokenized .tokens() .iter() - .filter_map(|token| match token.kind { + .filter_map(|token| match token.topology { TokenKind::Literal(ref literal) => Some(literal), _ => None, }) diff --git a/src/token/parse.rs b/src/token/parse.rs index e883d76..bb3f281 100644 --- a/src/token/parse.rs +++ b/src/token/parse.rs @@ -9,13 +9,11 @@ use thiserror::Error; use crate::diagnostics::{LocatedError, Span}; use crate::token::{ - Alternative, Archetype, Class, Evaluation, Literal, Repetition, Separator, Token, TokenKind, - Tokenized, Wildcard, + Alternation, Archetype, Class, Evaluation, ExpressionMetadata, Literal, Repetition, Separator, + Token, TokenKind, Tokenized, Wildcard, }; use crate::PATHS_ARE_CASE_INSENSITIVE; -pub type Annotation = Span; - type Expression<'i> = Located<'i, str>; type Input<'i> = Stateful, ParserState>; type ErrorStack<'i> = NomError>; @@ -235,7 +233,7 @@ pub fn parse(expression: &str) -> Result { flags(move |_| move |input: Input<'i>| Ok((input, ())))(input) } - fn literal(input: Input) -> ParseResult> { + fn literal(input: Input) -> ParseResult> { combinator::map( combinator::verify( bytes::escaped_transform( @@ -268,13 +266,13 @@ pub fn parse(expression: &str) -> Result { )(input) } - fn separator(input: Input) -> ParseResult> { + fn separator(input: Input) -> ParseResult> { combinator::value(TokenKind::Separator(Separator), bytes::tag("/"))(input) } fn wildcard<'i>( terminator: impl Clone + Parser, Input<'i>, ErrorStack<'i>>, - ) -> impl FnMut(Input<'i>) -> ParseResult<'i, TokenKind<'i, Annotation>> { + ) -> impl FnMut(Input<'i>) -> ParseResult<'i, TokenKind<'i, ExpressionMetadata>> { branch::alt(( error::context( "exactly-one", @@ -358,7 +356,7 @@ pub fn parse(expression: &str) -> Result { )) } - fn repetition(input: Input) -> ParseResult> { + fn repetition(input: Input) -> ParseResult> { fn bounds(input: Input) -> ParseResult<(usize, Option)> { type BoundResult = Result::Err>; @@ -421,7 +419,7 @@ pub fn parse(expression: &str) -> Result { )(input) } - fn class(input: Input) -> ParseResult> { + fn class(input: Input) -> ParseResult> { fn archetypes(input: Input) -> ParseResult> { let escaped_character = |input| { branch::alt(( @@ -459,7 +457,7 @@ pub fn parse(expression: &str) -> Result { )(input) } - fn alternative(input: Input) -> ParseResult> { + fn alternative(input: Input) -> ParseResult> { sequence::delimited( bytes::tag("{"), combinator::map( @@ -472,7 +470,7 @@ pub fn parse(expression: &str) -> Result { }), ), ), - |alternatives: Vec>| Alternative::from(alternatives).into(), + |alternatives: Vec>| Alternation::from(alternatives).into(), ), bytes::tag("}"), )(input) @@ -480,12 +478,12 @@ pub fn parse(expression: &str) -> Result { fn glob<'i>( terminator: impl 'i + Clone + Parser, Input<'i>, ErrorStack<'i>>, - ) -> impl Parser, Vec>, ErrorStack<'i>> { + ) -> impl Parser, Vec>, ErrorStack<'i>> { fn annotate<'i, F>( parser: F, - ) -> impl FnMut(Input<'i>) -> ParseResult<'i, Token<'i, Annotation>> + ) -> impl FnMut(Input<'i>) -> ParseResult<'i, Token<'i, ExpressionMetadata>> where - F: 'i + Parser, TokenKind<'i, Annotation>, ErrorStack<'i>>, + F: 'i + Parser, TokenKind<'i, ExpressionMetadata>, ErrorStack<'i>>, { combinator::map(pori::span(parser), |(span, kind)| Token::new(kind, span)) } diff --git a/src/token/variance.rs b/src/token/variance/mod.rs similarity index 59% rename from src/token/variance.rs rename to src/token/variance/mod.rs index 249fd49..70f126a 100644 --- a/src/token/variance.rs +++ b/src/token/variance/mod.rs @@ -1,18 +1,29 @@ +mod text; + use itertools::Itertools as _; use std::borrow::Cow; -use std::collections::VecDeque; +use std::cmp::Ordering; use std::ops::{Add, Mul}; -use crate::encode; use crate::token::{self, Separator, Token}; -use crate::PATHS_ARE_CASE_INSENSITIVE; -pub trait Invariance: +pub use crate::token::variance::text::{IntoInvariantText, InvariantText}; + +pub trait Invariant: Add + Eq + Mul + PartialEq + Sized { fn empty() -> Self; } +pub trait VarianceTerm { + fn term(&self) -> Variance; +} + +pub trait VarianceFold { + fn fold(&self) -> Variance; +} + +// TODO: Replace with `VarianceTerm`. pub trait UnitVariance { fn unit_variance(self) -> Variance; } @@ -26,7 +37,7 @@ impl UnitVariance for Variance { pub trait ConjunctiveVariance: Iterator + Sized where Self::Item: UnitVariance, - T: Invariance, + T: Invariant, { fn conjunctive_variance(self) -> Variance { self.map(UnitVariance::unit_variance) @@ -39,14 +50,14 @@ impl ConjunctiveVariance for I where I: Iterator, I::Item: UnitVariance, - T: Invariance, + T: Invariant, { } pub trait DisjunctiveVariance: Iterator + Sized where Self::Item: UnitVariance, - T: Invariance, + T: Invariant, { fn disjunctive_variance(self) -> Variance { // TODO: This implementation is incomplete. Unbounded variance (and unbounded depth) are @@ -69,7 +80,7 @@ where first } else { - Variance::Variant(Boundedness::Closed) + Variance::Variant(Bound::Bounded) } } } @@ -78,24 +89,24 @@ impl DisjunctiveVariance for I where I: Iterator, I::Item: UnitVariance, - T: Invariance, + T: Invariant, { } pub trait UnitDepth: Sized { - fn unit_depth(self) -> Boundedness { - Boundedness::Closed + fn unit_depth(self) -> Bound { + Bound::Bounded } } -impl UnitDepth for Boundedness { - fn unit_depth(self) -> Boundedness { +impl UnitDepth for Bound { + fn unit_depth(self) -> Bound { self } } pub trait CompositeDepth: Iterator + Sized { - fn composite_depth(self) -> Boundedness; + fn composite_depth(self) -> Bound; } impl CompositeDepth for I @@ -103,30 +114,33 @@ where I: Iterator, I::Item: UnitDepth, { - fn composite_depth(self) -> Boundedness { - if self.map(UnitDepth::unit_depth).any(|depth| depth.is_open()) { - Boundedness::Open + fn composite_depth(self) -> Bound { + if self + .map(UnitDepth::unit_depth) + .any(|depth| depth.is_unbounded()) + { + Bound::Unbounded } else { - Boundedness::Closed + Bound::Bounded } } } pub trait UnitBreadth: Sized { - fn unit_breadth(self) -> Boundedness { - Boundedness::Closed + fn unit_breadth(self) -> Bound { + Bound::Bounded } } -impl UnitBreadth for Boundedness { - fn unit_breadth(self) -> Boundedness { +impl UnitBreadth for Bound { + fn unit_breadth(self) -> Bound { self } } pub trait CompositeBreadth: Iterator + Sized { - fn composite_breadth(self) -> Boundedness; + fn composite_breadth(self) -> Bound; } impl CompositeBreadth for I @@ -134,45 +148,19 @@ where I: Iterator, I::Item: UnitBreadth, { - fn composite_breadth(self) -> Boundedness { + fn composite_breadth(self) -> Bound { if self .map(UnitBreadth::unit_breadth) - .any(|breadth| breadth.is_open()) + .any(|breadth| breadth.is_unbounded()) { - Boundedness::Open + Bound::Unbounded } else { - Boundedness::Closed + Bound::Bounded } } } -pub trait IntoInvariantText<'t> { - fn into_nominal_text(self) -> InvariantText<'t>; - - fn into_structural_text(self) -> InvariantText<'t>; -} - -impl<'t> IntoInvariantText<'t> for Cow<'t, str> { - fn into_nominal_text(self) -> InvariantText<'t> { - InvariantFragment::Nominal(self).into() - } - - fn into_structural_text(self) -> InvariantText<'t> { - InvariantFragment::Structural(self).into() - } -} - -impl IntoInvariantText<'static> for String { - fn into_nominal_text(self) -> InvariantText<'static> { - InvariantFragment::Nominal(self.into()).into() - } - - fn into_structural_text(self) -> InvariantText<'static> { - InvariantFragment::Structural(self.into()).into() - } -} - #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] pub struct InvariantSize(usize); @@ -202,7 +190,7 @@ impl From for InvariantSize { } } -impl Invariance for InvariantSize { +impl Invariant for InvariantSize { fn empty() -> Self { InvariantSize(0) } @@ -220,199 +208,64 @@ impl Mul for InvariantSize { } } -// TODO: The derived `PartialEq` implementation is incomplete and does not detect contiguous like -// fragments that are equivalent to an aggregated fragment. This works, but relies on -// constructing `InvariantText` by consistently appending fragments. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct InvariantText<'t> { - fragments: VecDeque>, -} - -impl<'t> InvariantText<'t> { - pub fn new() -> Self { - InvariantText { - fragments: VecDeque::new(), - } - } - - pub fn into_owned(self) -> InvariantText<'static> { - let InvariantText { fragments } = self; - InvariantText { - fragments: fragments - .into_iter() - .map(InvariantFragment::into_owned) - .collect(), - } - } - - pub fn to_string(&self) -> Cow<'t, str> { - self.fragments - .iter() - .map(|fragment| fragment.as_string().clone()) - .reduce(|text, fragment| text + fragment) - .unwrap_or(Cow::Borrowed("")) - } - - pub fn repeat(self, n: usize) -> Self { - if n == 0 { - self - } - else { - let InvariantText { fragments } = self; - let n = (n - 1) - .checked_mul(fragments.len()) - .expect("overflow determining invariant text"); - let first = fragments.clone(); - InvariantText { - fragments: first - .into_iter() - .chain(fragments.into_iter().cycle().take(n)) - .collect(), - } - } - } -} - -impl<'t> Add for InvariantText<'t> { - type Output = Self; - - fn add(self, other: Self) -> Self::Output { - let InvariantText { - fragments: mut left, - } = self; - let InvariantText { - fragments: mut right, - } = other; - let end = left.pop_back(); - let start = right.pop_front(); - let InvariantText { fragments: middle } = match (end, start) { - (Some(left), Some(right)) => left + right, - (Some(middle), None) | (None, Some(middle)) => middle.into(), - (None, None) => InvariantText::new(), - }; - InvariantText { - fragments: left.into_iter().chain(middle).chain(right).collect(), - } - } -} - -impl<'t> Add> for InvariantText<'t> { - type Output = Self; - - fn add(self, fragment: InvariantFragment<'t>) -> Self::Output { - self + Self::from(fragment) - } -} - -impl<'t> Default for InvariantText<'t> { - fn default() -> Self { - Self::new() - } -} - -impl<'t> From> for InvariantText<'t> { - fn from(fragment: InvariantFragment<'t>) -> Self { - InvariantText { - fragments: [fragment].into_iter().collect(), - } - } -} - -impl<'t> Invariance for InvariantText<'t> { - fn empty() -> Self { - InvariantText::new() - } -} - -impl<'t> Mul for InvariantText<'t> { - type Output = Self; - - fn mul(self, n: usize) -> Self::Output { - self.repeat(n) - } -} - -#[derive(Clone, Debug, Eq)] -enum InvariantFragment<'t> { - Nominal(Cow<'t, str>), - Structural(Cow<'t, str>), +// TODO: Remove the default for type parameter `T` and introduce types that can represent the +// bounds of invariants. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum Bound { + Bounded(T), + Unbounded, } -impl<'t> InvariantFragment<'t> { - pub fn into_owned(self) -> InvariantFragment<'static> { - use InvariantFragment::{Nominal, Structural}; - +impl Bound { + pub fn map_bounded(self, f: F) -> Bound + where + F: FnOnce(T) -> U, + { match self { - Nominal(text) => Nominal(text.into_owned().into()), - Structural(text) => Structural(text.into_owned().into()), + Bound::Bounded(bound) => Bound::Bounded(f(bound)), + _ => Bound::Unbounded, } } - pub fn as_string(&self) -> &Cow<'t, str> { + pub fn as_ref(&self) -> Bound<&T> { match self { - InvariantFragment::Nominal(ref text) | InvariantFragment::Structural(ref text) => text, + Bound::Bounded(ref bound) => Bound::Bounded(bound), + _ => Bound::Unbounded, } } -} - -impl<'t> Add for InvariantFragment<'t> { - type Output = InvariantText<'t>; - - fn add(self, other: Self) -> Self::Output { - use InvariantFragment::{Nominal, Structural}; - match (self, other) { - (Nominal(left), Nominal(right)) => InvariantText { - fragments: [Nominal(left + right)].into_iter().collect(), - }, - (Structural(left), Structural(right)) => InvariantText { - fragments: [Structural(left + right)].into_iter().collect(), - }, - (left, right) => InvariantText { - fragments: [left, right].into_iter().collect(), - }, - } + pub fn is_bounded(&self) -> bool { + matches!(self, Bound::Bounded(_)) } -} -impl<'t> PartialEq for InvariantFragment<'t> { - fn eq(&self, other: &Self) -> bool { - use InvariantFragment::{Nominal, Structural}; - - match (self, other) { - (Nominal(ref left), Nominal(ref right)) => { - if PATHS_ARE_CASE_INSENSITIVE { - // This comparison uses Unicode simple case folding. It would be better to use - // full case folding (and better still to use case folding appropriate for the - // language of the text), but this approach is used to have consistent results - // with the regular expression encoding of compiled globs. A more comprehensive - // alternative would be to use something like the `focaccia` crate. See also - // `CharExt::has_casing`. - encode::case_folded_eq(left.as_ref(), right.as_ref()) - } - else { - left == right - } - }, - (Structural(ref left), Structural(ref right)) => left == right, - _ => false, - } + pub fn is_unbounded(&self) -> bool { + matches!(self, Bound::Unbounded) } } -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub enum Boundedness { - Closed, - Open, +impl Ord for Bound +where + T: Ord, +{ + fn cmp(&self, rhs: &Self) -> Ordering { + self.partial_cmp(rhs).unwrap() + } } -impl Boundedness { - pub fn is_closed(&self) -> bool { - matches!(self, Boundedness::Closed) - } +impl PartialOrd for Bound +where + T: PartialOrd, +{ + fn partial_cmp(&self, rhs: &Self) -> Option { + use Bound::{Bounded, Unbounded}; + use Ordering::{Equal, Greater, Less}; - pub fn is_open(&self) -> bool { - matches!(self, Boundedness::Open) + match (self, rhs) { + (Unbounded, Bounded(_)) => Some(Greater), + (Bounded(_), Unbounded) => Some(Less), + (Bounded(ref lhs), Bounded(ref rhs)) => lhs.partial_cmp(rhs), + _ => Some(Equal), + } } } @@ -426,7 +279,7 @@ pub enum Variance { // does *not* match any component. Boundedness does **not** consider length, only whether or // not some part of an expression is constrained to a known set of matches. As such, both the // expressions `?` and `*` are variant with open bounds. - Variant(Boundedness), + Variant(Bound), } impl Variance { @@ -444,10 +297,10 @@ impl Variance { } } - pub fn boundedness(&self) -> Boundedness { + pub fn boundedness(&self) -> Bound { match self { Variance::Variant(ref boundedness) => *boundedness, - _ => Boundedness::Closed, + _ => Bound::Bounded, } } @@ -467,13 +320,15 @@ where type Output = Self; fn add(self, rhs: Self) -> Self::Output { - use Boundedness::{Closed, Open}; + use Bound::{Bounded, Unbounded}; use Variance::{Invariant, Variant}; match (self, rhs) { (Invariant(left), Invariant(right)) => Invariant(left + right), - (Variant(Open), Variant(Open)) => Variant(Open), - (Invariant(_) | Variant(_), Variant(_)) | (Variant(_), Invariant(_)) => Variant(Closed), + (Variant(Unbounded), Variant(Unbounded)) => Variant(Unbounded), + (Invariant(_) | Variant(_), Variant(_)) | (Variant(_), Invariant(_)) => { + Variant(Bounded) + }, } } } @@ -577,7 +432,7 @@ where component.variance::().boundedness(), ) }), - Some((Boundedness::Open, Boundedness::Open)), + Some((Bound::Unbounded, Bound::Unbounded)), ) } @@ -585,7 +440,7 @@ where mod tests { use std::path::{Path, PathBuf}; - use crate::token::variance::{self, Boundedness, InvariantSize, Variance}; + use crate::token::variance::{self, Bound, InvariantSize, Variance}; use crate::token::{self, TokenTree}; #[test] @@ -616,34 +471,34 @@ mod tests { #[test] fn tree_expression_variance() { - use Boundedness::{Closed, Open}; + use Bound::{Bounded, Unbounded}; use Variance::Variant; let tokenized = token::parse("**").unwrap(); assert!(matches!( tokenized.variance::(), - Variant(Open) + Variant(Unbounded) )); let tokenized = token::parse("<*/>*").unwrap(); assert!(matches!( tokenized.variance::(), - Variant(Open) + Variant(Unbounded) )); let tokenized = token::parse("</>*").unwrap(); assert!(matches!( tokenized.variance::(), - Variant(Open) + Variant(Unbounded) )); let tokenized = token::parse("foo/**").unwrap(); assert!(matches!( tokenized.variance::(), - Variant(Closed) + Variant(Bounded) )); let tokenized = token::parse("*").unwrap(); assert!(matches!( tokenized.variance::(), - Variant(Closed) + Variant(Bounded) )); } diff --git a/src/token/variance/text.rs b/src/token/variance/text.rs new file mode 100644 index 0000000..9edb93d --- /dev/null +++ b/src/token/variance/text.rs @@ -0,0 +1,213 @@ +use std::borrow::Cow; +use std::collections::VecDeque; +use std::ops::{Add, Mul}; + +use crate::encode; +use crate::token::variance::Invariant; +use crate::PATHS_ARE_CASE_INSENSITIVE; + +pub trait IntoInvariantText<'t> { + fn into_nominal_text(self) -> InvariantText<'t>; + + fn into_structural_text(self) -> InvariantText<'t>; +} + +impl<'t> IntoInvariantText<'t> for Cow<'t, str> { + fn into_nominal_text(self) -> InvariantText<'t> { + InvariantFragment::Nominal(self).into() + } + + fn into_structural_text(self) -> InvariantText<'t> { + InvariantFragment::Structural(self).into() + } +} + +impl IntoInvariantText<'static> for String { + fn into_nominal_text(self) -> InvariantText<'static> { + InvariantFragment::Nominal(self.into()).into() + } + + fn into_structural_text(self) -> InvariantText<'static> { + InvariantFragment::Structural(self.into()).into() + } +} + +// TODO: The derived `PartialEq` implementation is incomplete and does not detect contiguous like +// fragments that are equivalent to an aggregated fragment. This works, but relies on +// constructing `InvariantText` by consistently appending fragments. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct InvariantText<'t> { + fragments: VecDeque>, +} + +impl<'t> InvariantText<'t> { + pub fn new() -> Self { + InvariantText { + fragments: VecDeque::new(), + } + } + + pub fn into_owned(self) -> InvariantText<'static> { + let InvariantText { fragments } = self; + InvariantText { + fragments: fragments + .into_iter() + .map(InvariantFragment::into_owned) + .collect(), + } + } + + pub fn to_string(&self) -> Cow<'t, str> { + self.fragments + .iter() + .map(|fragment| fragment.as_string().clone()) + .reduce(|text, fragment| text + fragment) + .unwrap_or(Cow::Borrowed("")) + } + + pub fn repeat(self, n: usize) -> Self { + if n == 0 { + self + } + else { + let InvariantText { fragments } = self; + let n = (n - 1) + .checked_mul(fragments.len()) + .expect("overflow determining invariant text"); + let first = fragments.clone(); + InvariantText { + fragments: first + .into_iter() + .chain(fragments.into_iter().cycle().take(n)) + .collect(), + } + } + } +} + +impl<'t> Add for InvariantText<'t> { + type Output = Self; + + fn add(self, other: Self) -> Self::Output { + let InvariantText { + fragments: mut left, + } = self; + let InvariantText { + fragments: mut right, + } = other; + let end = left.pop_back(); + let start = right.pop_front(); + let InvariantText { fragments: middle } = match (end, start) { + (Some(left), Some(right)) => left + right, + (Some(middle), None) | (None, Some(middle)) => middle.into(), + (None, None) => InvariantText::new(), + }; + InvariantText { + fragments: left.into_iter().chain(middle).chain(right).collect(), + } + } +} + +impl<'t> Add> for InvariantText<'t> { + type Output = Self; + + fn add(self, fragment: InvariantFragment<'t>) -> Self::Output { + self + Self::from(fragment) + } +} + +impl<'t> Default for InvariantText<'t> { + fn default() -> Self { + Self::new() + } +} + +impl<'t> From> for InvariantText<'t> { + fn from(fragment: InvariantFragment<'t>) -> Self { + InvariantText { + fragments: [fragment].into_iter().collect(), + } + } +} + +impl<'t> Invariant for InvariantText<'t> { + fn empty() -> Self { + InvariantText::new() + } +} + +impl<'t> Mul for InvariantText<'t> { + type Output = Self; + + fn mul(self, n: usize) -> Self::Output { + self.repeat(n) + } +} + +#[derive(Clone, Debug, Eq)] +enum InvariantFragment<'t> { + Nominal(Cow<'t, str>), + Structural(Cow<'t, str>), +} + +impl<'t> InvariantFragment<'t> { + pub fn into_owned(self) -> InvariantFragment<'static> { + use InvariantFragment::{Nominal, Structural}; + + match self { + Nominal(text) => Nominal(text.into_owned().into()), + Structural(text) => Structural(text.into_owned().into()), + } + } + + pub fn as_string(&self) -> &Cow<'t, str> { + match self { + InvariantFragment::Nominal(ref text) | InvariantFragment::Structural(ref text) => text, + } + } +} + +impl<'t> Add for InvariantFragment<'t> { + type Output = InvariantText<'t>; + + fn add(self, other: Self) -> Self::Output { + use InvariantFragment::{Nominal, Structural}; + + match (self, other) { + (Nominal(left), Nominal(right)) => InvariantText { + fragments: [Nominal(left + right)].into_iter().collect(), + }, + (Structural(left), Structural(right)) => InvariantText { + fragments: [Structural(left + right)].into_iter().collect(), + }, + (left, right) => InvariantText { + fragments: [left, right].into_iter().collect(), + }, + } + } +} + +impl<'t> PartialEq for InvariantFragment<'t> { + fn eq(&self, other: &Self) -> bool { + use InvariantFragment::{Nominal, Structural}; + + match (self, other) { + (Nominal(ref left), Nominal(ref right)) => { + if PATHS_ARE_CASE_INSENSITIVE { + // This comparison uses Unicode simple case folding. It would be better to use + // full case folding (and better still to use case folding appropriate for the + // language of the text), but this approach is used to have consistent results + // with the regular expression encoding of compiled globs. A more comprehensive + // alternative would be to use something like the `focaccia` crate. See also + // `CharExt::has_casing`. + encode::case_folded_eq(left.as_ref(), right.as_ref()) + } + else { + left == right + } + }, + (Structural(ref left), Structural(ref right)) => left == right, + _ => false, + } + } +}