diff --git a/src/token/parse.rs b/src/token/parse.rs index 4b6a85f..fdd7746 100644 --- a/src/token/parse.rs +++ b/src/token/parse.rs @@ -1,6 +1,8 @@ #[cfg(feature = "miette")] use miette::{self, Diagnostic, LabeledSpan, SourceCode}; -use nom::error::{VerboseError as NomError, VerboseErrorKind as NomErrorKind}; +use nom::{InputTakeAtPosition, FindToken}; +use nom::character::complete::none_of; +use nom::error::{VerboseError as NomError, VerboseErrorKind as NomErrorKind, ErrorKind}; use pori::{Located, Location, Stateful}; use std::borrow::Cow; use std::fmt::{self, Display, Formatter}; @@ -235,37 +237,54 @@ pub fn parse(expression: &str) -> Result { flags(move |_| move |input: Input<'i>| Ok((input, ())))(input) } - fn literal(input: Input) -> ParseResult> { - combinator::map( - combinator::verify( - bytes::escaped_transform( - bytes::is_not("/?*$:<>()[]{},\\"), - '\\', - branch::alt(( - combinator::value("?", bytes::tag("?")), - combinator::value("*", bytes::tag("*")), - combinator::value("$", bytes::tag("$")), - combinator::value(":", bytes::tag(":")), - combinator::value("<", bytes::tag("<")), - combinator::value(">", bytes::tag(">")), - combinator::value("(", bytes::tag("(")), - combinator::value(")", bytes::tag(")")), - combinator::value("[", bytes::tag("[")), - combinator::value("]", bytes::tag("]")), - combinator::value("{", bytes::tag("{")), - combinator::value("}", bytes::tag("}")), - combinator::value(",", bytes::tag(",")), - )), + + fn literal<'i>( + cant_contain: &'i str, + ) -> impl FnMut(Input<'i>) -> ParseResult<'i, TokenKind<'i, Annotation>> + { + fn is_not_both<'i>( + arr1: &'i str, + arr2: &'i str, + ) -> impl Fn(Input<'i>) -> ParseResult<'i,Input<'i>> + { + move |i: Input| { + let e: ErrorKind = ErrorKind::IsNot; + i.split_at_position1_complete(|c| arr1.find_token(c) || arr2.find_token(c), e) + } + } + + move |input: Input| { + combinator::map( + combinator::verify( + bytes::escaped_transform( + is_not_both("/?*$<>()[]{}\\", cant_contain), + '\\', + branch::alt(( + combinator::value("?", bytes::tag("?")), + combinator::value("*", bytes::tag("*")), + combinator::value("$", bytes::tag("$")), + combinator::value("<", bytes::tag("<")), + combinator::value(">", bytes::tag(">")), + combinator::value("(", bytes::tag("(")), + combinator::value(")", bytes::tag(")")), + combinator::value("[", bytes::tag("[")), + combinator::value("]", bytes::tag("]")), + combinator::value("{", bytes::tag("{")), + combinator::value("}", bytes::tag("}")), + combinator::value(",", bytes::tag(",")), + combinator::value(":", bytes::tag(":")), + )), + ), + |text: &str| !text.is_empty(), ), - |text: &str| !text.is_empty(), - ), - move |text| { - TokenKind::Literal(Literal { - text: text.into(), - is_case_insensitive: input.state.flags.is_case_insensitive, - }) - }, - )(input) + move |text| { + TokenKind::Literal(Literal { + text: text.into(), + is_case_insensitive: input.state.flags.is_case_insensitive, + }) + }, + )(input) + } } fn separator(input: Input) -> ParseResult> { @@ -404,7 +423,7 @@ pub fn parse(expression: &str) -> Result { "sub-glob", glob(move |input| { combinator::peek(branch::alt((bytes::tag(":"), bytes::tag(">"))))(input) - }), + }, ":"), ), error::context("bounds", bounds), )), @@ -469,7 +488,7 @@ pub fn parse(expression: &str) -> Result { "sub-glob", glob(move |input| { combinator::peek(branch::alt((bytes::tag(","), bytes::tag("}"))))(input) - }), + }, ","), ), ), |alternatives: Vec>| Alternative::from(alternatives).into(), @@ -480,6 +499,7 @@ pub fn parse(expression: &str) -> Result { fn glob<'i>( terminator: impl 'i + Clone + Parser, Input<'i>, ErrorStack<'i>>, + literal_cant_contain: &'i str, ) -> impl Parser, Vec>, ErrorStack<'i>> { fn annotate<'i, F>( parser: F, @@ -496,7 +516,7 @@ pub fn parse(expression: &str) -> Result { multi::many1(branch::alt(( annotate(error::context( "literal", - sequence::preceded(flags_with_state, literal), + sequence::preceded(flags_with_state, literal(literal_cant_contain)), )), annotate(error::context( "repetition", @@ -533,7 +553,7 @@ pub fn parse(expression: &str) -> Result { } else { let input = Input::new(Expression::from(expression), ParserState::default()); - let tokens = combinator::all_consuming(glob(combinator::eof))(input) + let tokens = combinator::all_consuming(glob(combinator::eof, ""))(input) .map(|(_, tokens)| tokens) .map_err(|error| ParseError::new(expression, error))?; Ok(Tokenized { diff --git a/tests/parse.rs b/tests/parse.rs new file mode 100644 index 0000000..99c1b01 --- /dev/null +++ b/tests/parse.rs @@ -0,0 +1,124 @@ +use std::path::PathBuf; + +use build_fs_tree::{FileSystemTree, dir}; + +use wax::{Glob}; + +use build_fs_tree::{file, Build}; +use tempfile::{self, TempDir}; + + +#[test] +fn test_parse_litteral(){ + let (_root, path) = temptree(); + + test("README.md", &["README.md"], &path); + test("tests/walk.rs", &["walk.rs"], &path); + + test("extra:dots.txt", &["extra:dots.txt"], &path); + test("extra,comma.txt", &["extra,comma.txt"], &path); +} + +#[test] +fn test_parse_wildcards(){ + let (_root, path) = temptree(); + + test("RE*.md", &["README.md"], &path); + test("READM?.md", &["README.md"], &path); + test("**/*.md", &["guide.md", "README.md"], &path); + + test("*:dots.txt", &["extra:dots.txt"], &path); + test("*,comma.txt", &["extra,comma.txt"], &path); +} + +#[test] +fn test_parse_char_classes(){ + let (_root, path) = temptree(); + + test("READM[AEIOU].md", &["README.md"], &path); + test("READM[A-Z].md", &["README.md"], &path); + test("READM[a-zA-Z].md", &["README.md"], &path); + + test("READM[!AIOU].md", &["README.md"], &path); + + test("extr[a-z]:dots.txt", &["extra:dots.txt"], &path); + test("extr[a-z],comma.txt", &["extra,comma.txt"], &path); +} + +#[test] +fn test_parse_alternatives(){ + let (_root, path) = temptree(); + + test("READM{A,E}.md", &["README.md"], &path); + test("READ{M?,Z?}.md", &["README.md"], &path); + test("README.{txt,md}", &["README.md"], &path); + + test("extra{:dots,:nots}.txt", &["extra:dots.txt"], &path); + test("extra{\\,comma,\\,nots}.txt", &["extra,comma.txt"], &path); +} + +#[test] +fn test_parse_repetitions(){ + let (_root, path) = temptree(); + + test("walk.rs", &["walk.rs"], &path); + test("walk.rs", &["walk.rs"], &path); + test("walk.rs", &["walk.rs"], &path); + test("flt.txt", &["fleet.txt"], &path); + test("flt.txt", &["fleet.txt"], &path); + + test("extra:dots.txt", &["extra:dots.txt"], &path); + test("extra,comma.txt", &["extra,comma.txt"], &path); +} + +#[test] +fn test_parse_combined(){ + let (_root, path) = temptree(); + + test("extra{[:,\\-]dots,:nots}.txt", &["extra:dots.txt"], &path); +} + + + + +fn test(expression: &str, expected: &[&str], path: &PathBuf){ + println!("\n\n\nTest {expression} {:?}", expected); + let glob = Glob::new(expression).unwrap(); + println!("Captures: {:?}", glob.captures().collect::>()); + println!("Glob: {:#?}", glob); + + let names: Vec = glob.walk(path).filter_map(|e| { + Some(e.unwrap().path().file_name().unwrap().to_string_lossy().to_string()) + }).collect(); + + assert_eq!(expected, names); +} + +// TODO: Rust's testing framework does not provide a mechanism for maintaining +// shared state. This means that tests that write to the file system must +// do so individually rather than writing before and after all tests have +// run. This should probably be avoided. + +/// Writes a testing directory tree to a temporary location on the file system. +fn temptree() -> (TempDir, PathBuf) { + let root = tempfile::tempdir().unwrap(); + let tree: FileSystemTree<&str, &str> = dir! { + "doc" => dir! { + "guide.md" => file!(""), + }, + "src" => dir! { + "glob.rs" => file!(""), + "lib.rs" => file!(""), + }, + "tests" => dir! { + "walk.rs" => file!(""), + }, + "README.md" => file!(""), + "fleet.txt" => file!(""), + "extra:dots.txt" => file!(""), + "extra,comma.txt" => file!(""), + }; + let path = root.path().join("project"); + tree.build(&path).unwrap(); + (root, path) +} \ No newline at end of file