Skip to content

Commit

Permalink
Implement Token::fold_map.
Browse files Browse the repository at this point in the history
This change highlights a serious disadvantage to an intrusive tree
representation: it is cumbersome and fragile to separate the
non-topological data (i.e., adjacent nodes) and domain data in a branch.
To implement `fold_map`, which consumes and moves its data, bespoke
intermediate branch token representations and de/compositions are
needed.

This is fairly gross. There are a few options for a cleaner
implementation:

  1. Use an unintrusive tree implementation.
  2. Always copy the tree (implement `fold_map` over `&self`).
  3. Drain and fill branches instead of extracting tokens.

Ultimately, [1] is probably the way to go. In the meantime, [2] may look
a bit odd, betray the term "map", and perhaps compile to less optimal
code (not too sure about that last one though). [3] eliminates the need
for abstracting over branch composition (including strongly coupled
types like `BranchFold`), but requires making `Repetition` drainable
(`token` becomes `Option<Token<_>>`) and presents a somewhat unusual API
where `FoldMap::fold` receives a _drained_ `BranchKind` (using normal
looking APIs like `BranchKind::tokens` becomes an error in this
context). Making the APIs in [3] robust likely requires at least as much
code as is used in this change.
  • Loading branch information
olson-sean-k committed Jan 17, 2024
1 parent 5d737b9 commit c140681
Showing 1 changed file with 257 additions and 3 deletions.
260 changes: 257 additions & 3 deletions src/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ pub use crate::token::variance::{Bound, Cardinality, Variance};
// TODO: Expression and glob trees differ only in their annotation data. This supports the
// distinction, but is inflexible and greatly limits any intermediate representation of a
// glob. Consider completely disjoint types for expression and glob trees, perhaps using an
// unintrusive tree data structure for both.
// unintrusive tree data structure for both. An unintrusive tree implementation would better
// support mapping operations like `Token::fold_map` and could share code if this crate is
// ever decomposed into intermediate crates for syntax, semantics, and automata.

pub type ExpressionToken<'t> = Token<'t, ExpressionMetadata>;
pub type ExpressionTree<'t> = Tokenized<'t, ExpressionMetadata>;
Expand Down Expand Up @@ -270,13 +272,36 @@ pub trait FoldMap<'t, A> {
fn fold(
&mut self,
annotation: A,
branch: BranchKind<'t, A>,
branch: BranchFold<'t, A>,
tokens: Vec<Token<'t, Self::Annotation>>,
) -> Option<Token<'t, Self::Annotation>>;

fn map(&mut self, annotation: A, leaf: LeafKind<'t>) -> Option<Token<'t, Self::Annotation>>;
}

impl<'t, A, B, F> FoldMap<'t, A> for F
where
F: FnMut(A) -> B,
{
type Annotation = B;

fn fold(
&mut self,
annotation: A,
branch: BranchFold<'t, A>,
tokens: Vec<Token<'t, Self::Annotation>>,
) -> Option<Token<'t, Self::Annotation>> {
branch
.fold(tokens)
.ok()
.map(|branch| Token::new(branch, (self)(annotation)))
}

fn map(&mut self, annotation: A, leaf: LeafKind<'t>) -> Option<Token<'t, Self::Annotation>> {
Some(Token::new(leaf, (self)(annotation)))
}
}

#[derive(Clone, Debug)]
pub struct Token<'t, A> {
topology: TokenTopology<'t, A>,
Expand Down Expand Up @@ -342,6 +367,8 @@ impl<'t, A> Token<'t, A> {
.map_or(false, |leaf| leaf.unroot(annotation))
}

// TODO: The use of terms like "fold", "accumulate", "summand", "sum", and "term" are unclear
// and maybe inconsistent here (and elsewhere). Revisit such names.
pub fn fold<F>(&self, f: F) -> Option<F::Term>
where
F: Fold<'t, A>,
Expand Down Expand Up @@ -448,7 +475,117 @@ impl<'t, A> Token<'t, A> {
where
F: FoldMap<'t, A>,
{
todo!()
#[derive(Debug)]
struct Path<'t, A, F>
where
F: FoldMap<'t, A>,
{
branches: Vec<BranchNode<'t, A, F>>,
f: F,
}

impl<'t, A, F> Path<'t, A, F>
where
F: FoldMap<'t, A>,
{
pub fn push(&mut self, annotation: A, branch: BranchFold<'t, A>, hint: Option<usize>) {
self.branches.push(BranchNode {
annotation,
branch,
tokens: match hint {
Some(hint) => Vec::with_capacity(hint),
_ => vec![],
},
});
}

pub fn pop(&mut self, depth: usize) {
if let Some(n) = self.branches.len().checked_sub(depth) {
self.fold_n(n);
}
}

pub fn fold(mut self) -> Option<Token<'t, F::Annotation>> {
self.fold_n(usize::MAX);
self.branches
.pop()
.and_then(|branch| branch.fold(&mut self.f))
}

pub fn map(
&mut self,
annotation: A,
leaf: LeafKind<'t>,
) -> Result<(), Token<'t, F::Annotation>> {
let token = self.f.map(annotation, leaf);
match self.branches.last_mut() {
Some(branch) => {
branch.push(token);
Ok(())
},
None => Err(token),
}
}

fn fold_n(&mut self, n: usize) {
for _ in 0..cmp::min(n, self.branches.len().saturating_sub(1)) {
if let Some(token) = self.branches.pop().unwrap().fold(&mut self.f) {
self.branches.last_mut().unwrap().push(token);
}
}
}
}

#[derive(Debug)]
struct BranchNode<'t, A, F>
where
F: FoldMap<'t, A>,
{
annotation: A,
branch: BranchFold<'t, A>,
tokens: Vec<Token<'t, F::Annotation>>,
}

impl<'t, A, F> BranchNode<'t, A, F>
where
F: FoldMap<'t, A>,
{
fn push(&mut self, token: Token<'t, F::Annotation>) {
self.tokens.push(token)
}

fn fold(self, f: &mut F) -> Option<Token<'t, F::Annotation>> {
let BranchNode {
annotation,
branch,
tokens,
} = self;
f.fold(annotation, branch, tokens)
}
}

let mut path = Path {
branches: vec![],
f,
};
let mut tokens = vec![(self, 0usize)];
while let Some((token, depth)) = tokens.pop() {
path.pop(depth);
match token.topology {
Topology::Branch(branch) => {
let (branch, children) = branch.decompose();
let n = children.len();
tokens.extend(children.into_iter().map(|token| (token, depth + 1)));
path.push(token.annotation, branch, Some(n));
},
Topology::Leaf(leaf) => {
if let Err(token) = path.map(token.annotation, leaf) {
return Some(token);
}
},
}
}
path.fold()
}

pub fn walk(&self) -> Walk<'_, 't, A> {
Expand Down Expand Up @@ -756,6 +893,43 @@ impl<'t, A> From<LeafKind<'t>> for TokenTopology<'t, A> {
}
}

// TODO: The use of this trait and `BranchFold` in `Token::fold_map` is a very unfortunate
// consequence of using an intrusive tree. Remove these APIs if and when the tree is
// implemented via an unintrusive data structure. See other TODO comments near the beginning
// of this module.
trait BranchComposition<'t> {
type Annotation;
type BranchData;

fn compose(
data: Self::BranchData,
tokens: Vec<Token<'t, Self::Annotation>>,
) -> Result<Self, ()>;

fn decompose(self) -> (Self::BranchData, Vec<Token<'t, Self::Annotation>>);
}

#[derive(Debug)]
pub enum BranchFold<'t, A> {
Alternation(<Alternation<'t, A> as BranchComposition<'t>>::BranchData),
Concatenation(<Concatenation<'t, A> as BranchComposition<'t>>::BranchData),
Repetition(<Repetition<'t, A> as BranchComposition<'t>>::BranchData),
}

impl<'t, A> BranchFold<'t, A> {
pub fn fold(self, tokens: Vec<Token<'t, A>>) -> Result<BranchKind<'t, A>, ()> {
match self {
BranchFold::Alternation(data) => {
BranchKind::compose::<Alternation<'t, A>>(data, tokens)
},
BranchFold::Concatenation(data) => {
BranchKind::compose::<Concatenation<'t, A>>(data, tokens)
},
BranchFold::Repetition(data) => BranchKind::compose::<Repetition<'t, A>>(data, tokens),
}
}
}

#[derive(Clone, Debug)]
pub enum BranchKind<'t, A> {
Alternation(Alternation<'t, A>),
Expand All @@ -764,6 +938,31 @@ pub enum BranchKind<'t, A> {
}

impl<'t, A> BranchKind<'t, A> {
fn compose<T>(data: T::BranchData, tokens: Vec<Token<'t, A>>) -> Result<Self, ()>
where
Self: From<T>,
T: BranchComposition<'t, Annotation = A>,
{
T::compose(data, tokens).into()
}

fn decompose(self) -> (BranchFold<'t, A>, Vec<Token<'t, A>>) {
match self {
BranchKind::Alternation(alternation) => {
let (data, tokens) = alternation.decompose();
(BranchFold::Alternation(data), tokens)
},
BranchKind::Concatenation(concatenation) => {
let (data, tokens) = concatenation.decompose();
(BranchFold::Concatenation(data), tokens)
},
BranchKind::Repetition(repetition) => {
let (data, tokens) = repetition.decompose();
(BranchFold::Repetition(data), tokens)
},
}
}

pub fn into_owned(self) -> BranchKind<'static, A> {
match self {
BranchKind::Alternation(alternation) => alternation.into_owned().into(),
Expand Down Expand Up @@ -944,6 +1143,19 @@ impl<'t, A> Alternation<'t, A> {
}
}

impl<'t, A> BranchComposition<'t> for Alternation<'t, A> {
type Annotation = A;
type BranchData = ();

fn compose(_: Self::BranchData, tokens: Vec<Token<'t, Self::Annotation>>) -> Result<Self, ()> {
Ok(Alternation(tokens))
}

fn decompose(self) -> (Self::BranchData, Vec<Token<'t, Self::Annotation>>) {
((), self.0)
}
}

impl<'t, A> From<Vec<Token<'t, A>>> for Alternation<'t, A> {
fn from(tokens: Vec<Token<'t, A>>) -> Self {
Alternation(tokens)
Expand Down Expand Up @@ -1107,6 +1319,19 @@ impl<'t, A> Concatenation<'t, A> {
}
}

impl<'t, A> BranchComposition<'t> for Concatenation<'t, A> {
type Annotation = A;
type BranchData = ();

fn compose(_: Self::BranchData, tokens: Vec<Token<'t, Self::Annotation>>) -> Result<Self, ()> {
Ok(Alternation(tokens))
}

fn decompose(self) -> (Self::BranchData, Vec<Token<'t, Self::Annotation>>) {
((), self.0)
}
}

impl<'t, A> From<Vec<Token<'t, A>>> for Concatenation<'t, A> {
fn from(tokens: Vec<Token<'t, A>>) -> Self {
Concatenation(tokens)
Expand Down Expand Up @@ -1285,6 +1510,30 @@ impl<'t, A> Repetition<'t, A> {
}
}

impl<'t, A> BranchComposition<'t> for Repetition<'t, A> {
type Annotation = A;
type BranchData = Cardinality<usize>;

fn compose(
cardinality: Self::BranchData,
tokens: Vec<Token<'t, Self::Annotation>>,
) -> Result<Self, ()> {
tokens.into_iter().next().ok_or(()).map(|token| Repetition {
token,
lower: *cardinality.lower(),
upper: match cardinality.upper() {
Bound::Bounded(upper) => Some(*upper),
_ => None,
},
})
}

fn decompose(self) -> (Self::BranchData, Vec<Token<'t, Self::Annotation>>) {
let cardinality = self.cardinality();
(cardinality, vec![self.token])
}
}

impl<'t, A> VarianceFold<Breadth> for Repetition<'t, A> {
fn fold(&self, terms: Vec<Variance<Breadth>>) -> Option<Variance<Breadth>> {
terms.into_iter().reduce(variance::disjunction)
Expand Down Expand Up @@ -1459,6 +1708,11 @@ impl Default for Position {
}
}

// TODO: Move this iterator and its associated types into a `walk` module.
// TODO: Consider implementing `HierarchicalIterator`. Note that filters like `starting` and
// `ending` read the entire tree. The `rule` module could also leverage this. For example,
// `rule::group` is only interested in non-concatenation branch tokens, which is probably a
// good case for `HierarchicalIterator::filter_map_tree`.
#[derive(Clone, Debug)]
pub struct Walk<'i, 't, A> {
buffer: VecDeque<(Position, &'i Token<'t, A>)>,
Expand Down

0 comments on commit c140681

Please sign in to comment.