From 0620b103e1c9a8eaa71a7bc9e5c1b75ee2d5aef0 Mon Sep 17 00:00:00 2001 From: Sean Olson Date: Fri, 29 Mar 2024 17:44:34 -0700 Subject: [PATCH] Consider adjacent tokens when constructing a HIR. --- src/hir.rs | 34 ++++++++++++++++++++-------------- src/lib.rs | 4 ++++ src/token/walk.rs | 20 +++++++++++++++++--- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/hir.rs b/src/hir.rs index d689909..a6f09c9 100644 --- a/src/hir.rs +++ b/src/hir.rs @@ -128,11 +128,12 @@ where term } - fn term(&mut self, _: impl FoldPosition<'t, A>, leaf: &LeafKind<'t>) -> Self::Term { + fn term(&mut self, position: impl FoldPosition<'t, A>, leaf: &LeafKind<'t>) -> Self::Term { use token::Wildcard::{One, Tree, ZeroOrMore}; use Archetype::{Character, Range}; use LeafKind::{Class, Literal, Separator, Wildcard}; + let adjacency = position.adjacency(); match leaf { Class(ref class) => { let is_negated = class.is_negated(); @@ -161,11 +162,17 @@ where Hir::literal(literal.text().as_bytes()) } }, - // TODO: Separators should probably also match the end of text when they are at the - // end of a glob expression. This may not be possible in a fold with simple - // terms though, since that positional information isn't available until - // reaching the root of the token tree. - Separator(_) => self::separator().into_hir(), + Separator(_) => { + if adjacency.right.is_some() { + self::separator().into_hir() + } + else { + Hir::alternation(vec![ + self::separator().into_hir(), + Hir::look(hir::Look::End), + ]) + } + }, Wildcard(ref wildcard) => match wildcard { One => Hir::class(hir::Class::Unicode(self::not_separator())), Tree { has_root } => Hir::alternation(vec![ @@ -190,12 +197,13 @@ where self::separator().into_hir(), Hir::empty(), ]), - // TODO: Zero or more wildcards should match **one** or more if they comprise - // the entirety of a component, such as in `a/*/b`. This may not be - // possible in a fold with simple terms though, since adjacency - // information isn't available until reaching the root of the token tree. ZeroOrMore(ref evaluation) => Hir::repetition(hir::Repetition { - min: 0, + min: if adjacency.is_open() || adjacency.is_closed_boundary() { + 1 + } + else { + 0 + }, max: None, greedy: evaluation.is_eager(), sub: Box::new(self::not_separator().into_hir()), @@ -215,9 +223,7 @@ where .iter() .adjacent() .map(|token| { - let hir = token - .fold_with_adjacent(Compile::default()) - .unwrap_or_else(Hir::empty); + let hir = token.fold_with_adjacent(Compile).unwrap_or_else(Hir::empty); if token.into_item().is_capturing() { let index = capture_group_index; capture_group_index = capture_group_index diff --git a/src/lib.rs b/src/lib.rs index 1e3d411..7f44172 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -268,6 +268,10 @@ pub trait SliceProjection: Index { fn get(&self, index: usize) -> Option<&Self::Item>; fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } } impl SliceProjection for [T] { diff --git a/src/token/walk.rs b/src/token/walk.rs index e4876c3..5c40b55 100644 --- a/src/token/walk.rs +++ b/src/token/walk.rs @@ -38,7 +38,7 @@ impl<'i, 't, A> ParentToken<'i, 't, A> for &'i BranchKind<'t, A> { type Child = &'i Token<'t, A>; fn as_ref(&self) -> &BranchKind<'t, A> { - *self + self } fn into_tokens(self) -> impl DoubleEndedIterator { @@ -67,8 +67,8 @@ impl<'t, A> ChildToken<'t, A> for Token<'t, A> { #[derive(Debug)] pub struct Adjacency<'i, 't, A> { - left: Option<&'i Token<'t, A>>, - right: Option<&'i Token<'t, A>>, + pub left: Option<&'i Token<'t, A>>, + pub right: Option<&'i Token<'t, A>>, } impl<'i, 't, A> Adjacency<'i, 't, A> { @@ -80,6 +80,20 @@ impl<'i, 't, A> Adjacency<'i, 't, A> { right: right.or(self.right), } } + + pub fn is_open(&self) -> bool { + self.left.is_none() && self.right.is_none() + } + + pub fn is_closed(&self) -> bool { + self.left.is_some() && self.right.is_some() + } + + pub fn is_closed_boundary(&self) -> bool { + let is_boundary = + |token: Option<&Token<'_, _>>| token.map_or(false, |token| token.boundary().is_some()); + is_boundary(self.left) && is_boundary(self.right) + } } impl<'i, 't, A> Clone for Adjacency<'i, 't, A> {