olson-sean-k · brunoschmidt · Mar 14, 2023
diff --git a/src/token/parse.rs b/src/token/parse.rs
@@ -1,6 +1,8 @@
 #[cfg(feature = "miette")]
 use miette::{self, Diagnostic, LabeledSpan, SourceCode};
-use nom::error::{VerboseError as NomError, VerboseErrorKind as NomErrorKind};
+use nom::{InputTakeAtPosition, FindToken};
+use nom::character::complete::none_of;
+use nom::error::{VerboseError as NomError, VerboseErrorKind as NomErrorKind, ErrorKind};
 use pori::{Located, Location, Stateful};
 use std::borrow::Cow;
 use std::fmt::{self, Display, Formatter};
@@ -235,37 +237,54 @@ pub fn parse(expression: &str) -> Result<Tokenized, ParseError> {
         flags(move |_| move |input: Input<'i>| Ok((input, ())))(input)
     }
 
-    fn literal(input: Input) -> ParseResult<TokenKind<Annotation>> {
-        combinator::map(
-            combinator::verify(
-                bytes::escaped_transform(
-                    bytes::is_not("/?*$:<>()[]{},\\"),
-                    '\\',
-                    branch::alt((
-                        combinator::value("?", bytes::tag("?")),
-                        combinator::value("*", bytes::tag("*")),
-                        combinator::value("$", bytes::tag("$")),
-                        combinator::value(":", bytes::tag(":")),
-                        combinator::value("<", bytes::tag("<")),
-                        combinator::value(">", bytes::tag(">")),
-                        combinator::value("(", bytes::tag("(")),
-                        combinator::value(")", bytes::tag(")")),
-                        combinator::value("[", bytes::tag("[")),
-                        combinator::value("]", bytes::tag("]")),
-                        combinator::value("{", bytes::tag("{")),
-                        combinator::value("}", bytes::tag("}")),
-                        combinator::value(",", bytes::tag(",")),
-                    )),
+
+    fn literal<'i>(
+        cant_contain: &'i str,
+    ) -> impl FnMut(Input<'i>) -> ParseResult<'i, TokenKind<'i, Annotation>>
+    {
+        fn is_not_both<'i>(
+            arr1: &'i str,
+            arr2: &'i str,
+        ) -> impl Fn(Input<'i>) -> ParseResult<'i,Input<'i>>
+        {
+            move |i: Input| {
+                let e: ErrorKind = ErrorKind::IsNot;
+                i.split_at_position1_complete(|c| arr1.find_token(c) || arr2.find_token(c), e)
+            }
+        }
+
+        move |input: Input| {
+            combinator::map(
+                combinator::verify(
+                    bytes::escaped_transform(
+                        is_not_both("/?*$<>()[]{}\\", cant_contain),
+                        '\\',
+                        branch::alt((
+                            combinator::value("?", bytes::tag("?")),
+                            combinator::value("*", bytes::tag("*")),
+                            combinator::value("$", bytes::tag("$")),
+                            combinator::value("<", bytes::tag("<")),
+                            combinator::value(">", bytes::tag(">")),
+                            combinator::value("(", bytes::tag("(")),
+                            combinator::value(")", bytes::tag(")")),
+                            combinator::value("[", bytes::tag("[")),
+                            combinator::value("]", bytes::tag("]")),
+                            combinator::value("{", bytes::tag("{")),
+                            combinator::value("}", bytes::tag("}")),
+                            combinator::value(",", bytes::tag(",")),
+                            combinator::value(":", bytes::tag(":")),
+                        )),
+                    ),
+                    |text: &str| !text.is_empty(),
                 ),
-                |text: &str| !text.is_empty(),
-            ),
-            move |text| {
-                TokenKind::Literal(Literal {
-                    text: text.into(),
-                    is_case_insensitive: input.state.flags.is_case_insensitive,
-                })
-            },
-        )(input)
+                move |text| {
+                    TokenKind::Literal(Literal {
+                        text: text.into(),
+                        is_case_insensitive: input.state.flags.is_case_insensitive,
+                    })
+                },
+            )(input)
+        }
     }
 
     fn separator(input: Input) -> ParseResult<TokenKind<Annotation>> {
@@ -404,7 +423,7 @@ pub fn parse(expression: &str) -> Result<Tokenized, ParseError> {
                         "sub-glob",
                         glob(move |input| {
                             combinator::peek(branch::alt((bytes::tag(":"), bytes::tag(">"))))(input)
-                        }),
+                        }, ":"),
                     ),
                     error::context("bounds", bounds),
                 )),
@@ -469,7 +488,7 @@ pub fn parse(expression: &str) -> Result<Tokenized, ParseError> {
                         "sub-glob",
                         glob(move |input| {
                             combinator::peek(branch::alt((bytes::tag(","), bytes::tag("}"))))(input)
-                        }),
+                        }, ","),
                     ),
                 ),
                 |alternatives: Vec<Vec<_>>| Alternative::from(alternatives).into(),
@@ -480,6 +499,7 @@ pub fn parse(expression: &str) -> Result<Tokenized, ParseError> {
 
     fn glob<'i>(
         terminator: impl 'i + Clone + Parser<Input<'i>, Input<'i>, ErrorStack<'i>>,
+        literal_cant_contain: &'i str,
     ) -> impl Parser<Input<'i>, Vec<Token<'i, Annotation>>, ErrorStack<'i>> {
         fn annotate<'i, F>(
             parser: F,
@@ -496,7 +516,7 @@ pub fn parse(expression: &str) -> Result<Tokenized, ParseError> {
                 multi::many1(branch::alt((
                     annotate(error::context(
                         "literal",
-                        sequence::preceded(flags_with_state, literal),
+                        sequence::preceded(flags_with_state, literal(literal_cant_contain)),
                     )),
                     annotate(error::context(
                         "repetition",
@@ -533,7 +553,7 @@ pub fn parse(expression: &str) -> Result<Tokenized, ParseError> {
     }
     else {
         let input = Input::new(Expression::from(expression), ParserState::default());
-        let tokens = combinator::all_consuming(glob(combinator::eof))(input)
+        let tokens = combinator::all_consuming(glob(combinator::eof, ""))(input)
             .map(|(_, tokens)| tokens)
             .map_err(|error| ParseError::new(expression, error))?;
         Ok(Tokenized {

diff --git a/tests/parse.rs b/tests/parse.rs
@@ -0,0 +1,124 @@
+use std::path::PathBuf;
+
+use build_fs_tree::{FileSystemTree, dir};
+
+use wax::{Glob};
+
+use build_fs_tree::{file, Build};
+use tempfile::{self, TempDir};
+
+
+#[test]
+fn test_parse_litteral(){
+    let (_root, path) = temptree();
+
+    test("README.md", &["README.md"], &path);
+    test("tests/walk.rs", &["walk.rs"], &path);
+
+    test("extra:dots.txt", &["extra:dots.txt"], &path);
+    test("extra,comma.txt", &["extra,comma.txt"], &path);
+}
+
+#[test]
+fn test_parse_wildcards(){
+    let (_root, path) = temptree();
+
+    test("RE*.md", &["README.md"], &path);
+    test("READM?.md", &["README.md"], &path);
+    test("**/*.md", &["guide.md", "README.md"], &path);
+
+    test("*:dots.txt", &["extra:dots.txt"], &path);
+    test("*,comma.txt", &["extra,comma.txt"], &path);
+}
+
+#[test]
+fn test_parse_char_classes(){
+    let (_root, path) = temptree();
+
+    test("READM[AEIOU].md", &["README.md"], &path);
+    test("READM[A-Z].md", &["README.md"], &path);
+    test("READM[a-zA-Z].md", &["README.md"], &path);
+
+    test("READM[!AIOU].md", &["README.md"], &path);
+
+    test("extr[a-z]:dots.txt", &["extra:dots.txt"], &path);
+    test("extr[a-z],comma.txt", &["extra,comma.txt"], &path);
+}
+
+#[test]
+fn test_parse_alternatives(){
+    let (_root, path) = temptree();
+
+    test("READM{A,E}.md", &["README.md"], &path);
+    test("READ{M?,Z?}.md", &["README.md"], &path);
+    test("README.{txt,md}", &["README.md"], &path);
+
+    test("extra{:dots,:nots}.txt", &["extra:dots.txt"], &path);
+    test("extra{\\,comma,\\,nots}.txt", &["extra,comma.txt"], &path);
+}
+
+#[test]
+fn test_parse_repetitions(){
+    let (_root, path) = temptree();
+
+    test("<t*/>walk.rs", &["walk.rs"], &path);
+    test("<t*/:1>walk.rs", &["walk.rs"], &path);
+    test("<t*/:0,1>walk.rs", &["walk.rs"], &path);
+    test("fl<?:2>t.txt", &["fleet.txt"], &path);
+    test("fl<e:2>t.txt", &["fleet.txt"], &path);
+
+    test("extra:dots.txt", &["extra:dots.txt"], &path);
+    test("extra,comma.txt", &["extra,comma.txt"], &path);
+}
+
+#[test]
+fn test_parse_combined(){
+    let (_root, path) = temptree();
+
+    test("extra{[:,\\-]dots,:nots}.txt", &["extra:dots.txt"], &path);
+}
+
+
+
+
+fn test(expression: &str, expected: &[&str], path: &PathBuf){
+    println!("\n\n\nTest {expression} {:?}", expected);
+    let glob = Glob::new(expression).unwrap();
+    println!("Captures: {:?}", glob.captures().collect::<Vec<_>>());
+    println!("Glob: {:#?}", glob);
+
+    let names: Vec<String> = glob.walk(path).filter_map(|e| {
+        Some(e.unwrap().path().file_name().unwrap().to_string_lossy().to_string())
+    }).collect();
+
+    assert_eq!(expected, names);
+}
+
+// TODO: Rust's testing framework does not provide a mechanism for maintaining
+//       shared state. This means that tests that write to the file system must
+//       do so individually rather than writing before and after all tests have
+//       run. This should probably be avoided.
+
+/// Writes a testing directory tree to a temporary location on the file system.
+fn temptree() -> (TempDir, PathBuf) {
+    let root = tempfile::tempdir().unwrap();
+    let tree: FileSystemTree<&str, &str> = dir! {
+        "doc" => dir! {
+            "guide.md" => file!(""),
+        },
+        "src" => dir! {
+            "glob.rs" => file!(""),
+            "lib.rs" => file!(""),
+        },
+        "tests" => dir! {
+            "walk.rs" => file!(""),
+        },
+        "README.md" => file!(""),
+        "fleet.txt" => file!(""),
+        "extra:dots.txt" => file!(""),
+        "extra,comma.txt" => file!(""),
+    };
+    let path = root.path().join("project");
+    tree.build(&path).unwrap();
+    (root, path)
+}