Testing: Set up basic example parse tests (#404)

* Indentation * Build testing for example files * Fmt
Cypher1 · May 1, 2024 · 6e32f77 · 6e32f77
1 parent d3e2c3c
commit 6e32f77
Show file tree

Hide file tree

Showing 6 changed files with 142 additions and 45 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/takolib/Cargo.toml b/takolib/Cargo.toml
@@ -58,3 +58,4 @@ strum = "0.26.2"
 strum_macros = "0.26.2"
 pretty_assertions = "1.0"
 rand = "0.9.0-alpha.1"
+walkdir = "2.5.0"
diff --git a/takolib/src/lib.rs b/takolib/src/lib.rs
@@ -11,6 +11,8 @@ pub mod lowerer;
 pub mod parser;
 pub mod primitives;
 pub mod tasks;
+#[cfg(test)]
+pub mod test;
 pub mod ui;
 
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");

diff --git a/takolib/src/parser/mod.rs b/takolib/src/parser/mod.rs
@@ -490,8 +490,10 @@ impl<'src, 'toks, T: Iterator<Item = &'toks Token>> ParseState<'src, 'toks, T> {
             self.call_or_definition(token, binding)?
         } else if let Ok(token) = self.token_of_type(TokenType::Atom) {
             self.atom(token, location)
-        } else if let Ok(token) = self.token_of_type(TokenType::NumLit) {
+        } else if let Ok(token) = self.token_of_type(TokenType::NumberLit) {
             self.number_literal(token, location)
+        } else if let Ok(token) = self.token_of_type(TokenType::StringLit) {
+            self.string_literal(token, location)
         } else {
             return Err(ParseError::UnexpectedTokenTypeInExpression {
                 got: token.kind,
@@ -577,8 +579,18 @@ impl<'src, 'toks, T: Iterator<Item = &'toks Token>> ParseState<'src, 'toks, T> {
         self.ast.add_atom(Atom { name }, location)
     }
 
+    fn string_literal(&mut self, res: Token, location: Location) -> NodeId {
+        assert!(res.kind == TokenType::StringLit);
+        trace!("Saving literal: {res:?}");
+        let _id = self
+            .ast
+            .string_interner
+            .register_str_by_loc(res.get_src(self.contents), location.start);
+        self.ast.add_literal(Literal::Text, location)
+    }
+
     fn number_literal(&mut self, res: Token, location: Location) -> NodeId {
-        assert!(res.kind == TokenType::NumLit);
+        assert!(res.kind == TokenType::NumberLit);
         trace!("Saving literal: {res:?}");
         let _id = self
             .ast
@@ -1013,29 +1025,29 @@ pub mod tests {
     }
 
     /*
-        TODO(testing): Type annotations:
-            - "12 : Int"
-            - "3 * 4 : Int"
-            - "3 * (4 : Int)"
-            - "(3 * 4) : 12"
-            - "\"hello world\" : String"
+    TODO(testing): Type annotations:
+        - "12 : Int"
+        - "3 * 4 : Int"
+        - "3 * (4 : Int)"
+        - "(3 * 4) : 12"
+        - "\"hello world\" : String"
 
-        TODO(testing): String literals:
-            - "\"hello world\""
+    TODO(testing): String literals:
+        - "\"hello world\""
 
         TODO(testing): Numeric literals:
-            - "-12"
+        - "-12"
 
-        TODO(testing): Operations:
-            - "14-12"
-            - "\"hello\"+\" world\""
+    TODO(testing): Operations:
+        - "14-12"
+        - "\"hello\"+\" world\""
 
-        TODO(testing): Errors:
-            - "\"hello world\"\n7"
+    TODO(testing): Errors:
+        - "\"hello world\"\n7"
 
-        TODO(testing): Definitions:
-            - "f(arg=\"hello world\")"
-            - "mul(x, y)= x*y"
-            - "x()= !\"hello world\";\n7"
+    TODO(testing): Definitions:
+        - "f(arg=\"hello world\")"
+        - "mul(x, y)= x*y"
+        - "x()= !\"hello world\";\n7"
     */
 }
diff --git a/takolib/src/parser/tokens.rs b/takolib/src/parser/tokens.rs
@@ -383,7 +383,7 @@ pub enum TokenType {
     Ident,      // A named value.
     Atom,       // A symbol starting with a '$', used differently to symbols which have values.
     // Literals (i.e. tokens representing values):
-    NumLit,
+    NumberLit,
     ColorLit,
     // Short strings can be stored as symbols.
     StringLit,
@@ -402,7 +402,7 @@ impl fmt::Display for TokenType {
             Self::Op(sym) => write!(f, "a '{sym:?}' symbol"),
             Self::Ident => write!(f, "an identifier"),
             Self::Atom => write!(f, "an atom"),
-            Self::NumLit => write!(f, "a number"),
+            Self::NumberLit => write!(f, "a number"),
             Self::ColorLit => write!(f, "a color"),
             Self::StringLit => write!(f, "a string literal"),
             Self::FmtStringLitStart => write!(f, "the start of a format string literal"),
@@ -457,7 +457,7 @@ const _MULTI_COMMENT: &str = "/*";
 #[inline]
 fn classify_char(ch: char) -> CharacterType {
     use CharacterType::{AtomHead, HexSym, PartialToken, Whitespace};
-    use TokenType::{Ident, NumLit, Op, StringLit};
+    use TokenType::{Ident, NumberLit, Op, StringLit};
     PartialToken(match ch {
         '\n' | '\r' | '\t' | ' ' => return Whitespace,
         '$' => return AtomHead,
@@ -494,7 +494,7 @@ fn classify_char(ch: char) -> CharacterType {
         'Σ' => Op(Symbol::Sigma),
         '∀' => Op(Symbol::Forall),
         '∃' => Op(Symbol::Exists),
-        '0'..='9' => NumLit,
+        '0'..='9' => NumberLit,
         'A'..='Z' | 'a'..='z' | '_' => Ident, // Overlapped by colors.
         '"' | '\'' => StringLit,
         _ => panic!("Unknown token character {ch}"),
@@ -617,7 +617,7 @@ pub fn lex_head(characters: &mut Characters<'_>, tokens: &mut Vec<Token>) -> boo
     }
     // TODO(usability): Work out a better way of printing pretty spaces.
     use CharacterType::{AtomHead, HexSym, PartialToken};
-    use TokenType::{Atom, ColorLit, Ident, NumLit, Op, StringLit};
+    use TokenType::{Atom, ColorLit, Ident, NumberLit, Op, StringLit};
     let chr = if let Some(chr) = characters.next() {
         chr
     } else {
@@ -663,17 +663,17 @@ pub fn lex_head(characters: &mut Characters<'_>, tokens: &mut Vec<Token>) -> boo
                     (_, _) => break,
                 }))
             }
-            (PartialToken(Op(Symbol::Hash)), HexSym | PartialToken(NumLit)) => {
+            (PartialToken(Op(Symbol::Hash)), HexSym | PartialToken(NumberLit)) => {
                 PartialToken(ColorLit)
             } // Color Literal.
-            (PartialToken(ColorLit), HexSym | PartialToken(NumLit)) => PartialToken(ColorLit), // Color Literal.
-            (AtomHead, HexSym | PartialToken(NumLit | Ident)) => PartialToken(Atom), // Atom.
-            (PartialToken(Atom), HexSym | PartialToken(NumLit | Ident)) => PartialToken(Atom), // Atom.
-            (HexSym | PartialToken(Ident), HexSym | PartialToken(NumLit | Ident)) => {
+            (PartialToken(ColorLit), HexSym | PartialToken(NumberLit)) => PartialToken(ColorLit), // Color Literal.
+            (AtomHead, HexSym | PartialToken(NumberLit | Ident)) => PartialToken(Atom), // Atom.
+            (PartialToken(Atom), HexSym | PartialToken(NumberLit | Ident)) => PartialToken(Atom), // Atom.
+            (HexSym | PartialToken(Ident), HexSym | PartialToken(NumberLit | Ident)) => {
                 PartialToken(Ident)
             } // Symbol.
-            (PartialToken(NumLit), PartialToken(NumLit)) => PartialToken(NumLit), // Continuation
-            (PartialToken(NumLit), PartialToken(Ident)) => PartialToken(NumLit), // Number with suffix.
+            (PartialToken(NumberLit), PartialToken(NumberLit)) => PartialToken(NumberLit), // Continuation
+            (PartialToken(NumberLit), PartialToken(Ident)) => PartialToken(NumberLit), // Number with suffix.
             _ => break, // Token finished can't continue here.
         };
         characters.next(); // Continue past the character.
@@ -777,7 +777,7 @@ mod tests {
     use super::*;
     use super::{
         CharacterType::{PartialToken, Whitespace},
-        TokenType::{Atom, ColorLit, Ident, NumLit, Op, StringLit},
+        TokenType::{Atom, ColorLit, Ident, NumberLit, Op, StringLit},
     };
     use better_std::assert_eq;
     use strum::IntoEnumIterator; // TODO(cleanup): Make these test only
@@ -805,9 +805,9 @@ mod tests {
 
     #[test]
     fn classify_number() {
-        assert_eq!(classify_char('0'), PartialToken(NumLit));
-        assert_eq!(classify_char('1'), PartialToken(NumLit));
-        assert_eq!(classify_char('2'), PartialToken(NumLit));
+        assert_eq!(classify_char('0'), PartialToken(NumberLit));
+        assert_eq!(classify_char('1'), PartialToken(NumberLit));
+        assert_eq!(classify_char('2'), PartialToken(NumberLit));
     }
 
     #[test]
@@ -816,7 +816,7 @@ mod tests {
         assert_eq!(
             tokens,
             vec![Token {
-                kind: NumLit,
+                kind: NumberLit,
                 start: 0,
                 length: 3
             }]
@@ -914,7 +914,7 @@ mod tests {
         assert_eq!(
             tokens,
             vec![Token {
-                kind: NumLit,
+                kind: NumberLit,
                 start: 1,
                 length: 2
             }]
@@ -927,7 +927,7 @@ mod tests {
         assert_eq!(
             tokens,
             vec![Token {
-                kind: NumLit,
+                kind: NumberLit,
                 start: 2,
                 length: 2
             }]
@@ -941,7 +941,7 @@ mod tests {
         assert_eq!(
             tokens,
             vec![Token {
-                kind: NumLit,
+                kind: NumberLit,
                 start: 1,
                 length: 2
             }]
@@ -1010,7 +1010,7 @@ mod tests {
                     length: 13
                 },
                 Token {
-                    kind: NumLit,
+                    kind: NumberLit,
                     start: 15,
                     length: 1
                 },
@@ -1092,7 +1092,7 @@ mod tests {
     fn lex_strings_with_operators() {
         let contents = "!\"hello world\"\n7";
         let tokens = setup(contents);
-        let expected = vec![Op(Symbol::LogicalNot), StringLit, NumLit];
+        let expected = vec![Op(Symbol::LogicalNot), StringLit, NumberLit];
         assert_eq!(
             tokens
                 .iter()
@@ -1138,7 +1138,7 @@ mod tests {
         assert_eq!(
             tokens,
             vec![Token {
-                kind: NumLit,
+                kind: NumberLit,
                 start: length as IndexIntoFile,
                 length: 3,
             },]
@@ -1157,7 +1157,7 @@ mod tests {
         assert_eq!(
             tokens,
             vec![Token {
-                kind: NumLit,
+                kind: NumberLit,
                 start: length as IndexIntoFile,
                 length: 3,
             },]
@@ -1213,7 +1213,7 @@ mod tests {
                         length: length as SymbolLength,
                     },
                     Token {
-                        kind: NumLit,
+                        kind: NumberLit,
                         start: length as IndexIntoFile,
                         length: 3,
                     },

diff --git a/takolib/src/test.rs b/takolib/src/test.rs
@@ -0,0 +1,81 @@
+// Integration tests
+use log::info;
+use std::fs;
+use std::path::PathBuf;
+use walkdir::{DirEntry, WalkDir};
+
+const EXAMPLES_DIR: &'static str = "../examples";
+
+const EXCEPTIONS: &'static [&'static str] = &[
+    "contexts.tk",
+    "enums.tk",
+    "fib.tk",
+    "fib_acc.tk",
+    "generic_abstract_data_types.tk",
+    "instances.tk",
+    "vector_transpose.tk",
+    "vector_transpose_failing.tk",
+];
+
+fn is_hidden(entry: &DirEntry) -> bool {
+    entry
+        .file_name()
+        .to_str()
+        .map(|s| s.starts_with("."))
+        .unwrap_or(false)
+}
+
+fn find_files() -> Result<Vec<PathBuf>, walkdir::Error> {
+    let walker = WalkDir::new(EXAMPLES_DIR).into_iter();
+    let mut paths = vec![];
+    for entry in walker.filter_entry(|e| !is_hidden(e)) {
+        let entry = entry?;
+        let meta = entry.metadata()?;
+        if !meta.is_file() {
+            continue;
+        }
+        let path = entry.path();
+        let name = path
+            .file_name()
+            .expect("Require that file names can be printed")
+            .to_str()
+            .expect("OSString to str");
+        if EXCEPTIONS.contains(&name) {
+            info!("Skipping exception: {name}");
+            continue;
+        }
+        paths.push(path.to_path_buf());
+    }
+    Ok(paths)
+}
+
+#[test]
+fn find_example_files() {
+    let files = find_files().expect("Should be able to walk files");
+    info!("Files: {files:#?}");
+    let num_files = files.len();
+    assert_eq!(
+        num_files, 1,
+        "Example files appear to be missing, found {num_files}"
+    );
+}
+
+#[test]
+fn parse_example_files() {
+    let mut files = find_files().expect("Should be able to walk files");
+    files.sort();
+
+    for file in files {
+        info!("Start: {file:#?}");
+        let contents = fs::read_to_string(&file).expect("Should have been able to read the file");
+
+        let tokens = crate::parser::tokens::lex(&contents)
+            .expect(&format!("Should be able to lex: {file:?}"));
+
+        info!("Tokens: {tokens:#?}");
+
+        let _ast = crate::parser::parse(&file, &contents, &tokens)
+            .expect(&format!("Should be able to parse: {file:?}"));
+        info!("Done: {file:#?}");
+    }
+}