From 95fb5c80312f6b62f4276c37a82c2c85067a1f45 Mon Sep 17 00:00:00 2001 From: JohnnyMorganz Date: Sat, 26 Oct 2024 13:50:32 -0500 Subject: [PATCH] Fix attachment of trailing trivia with tab characters --- CHANGELOG.md | 5 + full-moon/src/tokenizer/lexer.rs | 5 +- .../cases/pass/table-constructors-9/ast.snap | 342 ++++++++++++++++++ .../pass/table-constructors-9/source.lua | 6 + .../pass/table-constructors-9/tokens.snap | 301 +++++++++++++++ 5 files changed, 658 insertions(+), 1 deletion(-) create mode 100644 full-moon/tests/cases/pass/table-constructors-9/ast.snap create mode 100644 full-moon/tests/cases/pass/table-constructors-9/source.lua create mode 100644 full-moon/tests/cases/pass/table-constructors-9/tokens.snap diff --git a/CHANGELOG.md b/CHANGELOG.md index 18b9606a..25e281b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed +- Fixed regression in trivia attachment causing trivia that begins with `\t` tab characters to be attached as leading trivia of the next token rather than trailing trivia of the current token. + ## [1.1.0] - 2024-10-12 ### Added - Added `access` fields that contain a `Option` to `TypeInfo::Array` and `TypeField`. diff --git a/full-moon/src/tokenizer/lexer.rs b/full-moon/src/tokenizer/lexer.rs index 9680d1b2..47a2e7d1 100644 --- a/full-moon/src/tokenizer/lexer.rs +++ b/full-moon/src/tokenizer/lexer.rs @@ -218,7 +218,10 @@ impl Lexer { let start_position: LexerPosition = self.source.lexer_position; // All things that can start trivia, so we can avoid when something definitely can't - if matches!(self.source.current(), Some('\n' | '\r' | '#' | '-' | ' ')) { + if matches!( + self.source.current(), + Some('\n' | '\r' | '\t' | '#' | '-' | ' ') + ) { if let Some(LexerResult::Ok(token)) = self.process_next() { if token.token_type().is_trivia() { // Take all trivia up to and including the newline character. If we see a newline character diff --git a/full-moon/tests/cases/pass/table-constructors-9/ast.snap b/full-moon/tests/cases/pass/table-constructors-9/ast.snap new file mode 100644 index 00000000..93e57034 --- /dev/null +++ b/full-moon/tests/cases/pass/table-constructors-9/ast.snap @@ -0,0 +1,342 @@ +--- +source: full-moon/tests/pass_cases.rs +expression: ast.nodes() +input_file: full-moon/tests/cases/pass/table-constructors-9 +--- +stmts: + - - LocalAssignment: + local_token: + leading_trivia: + - start_position: + bytes: 0 + line: 1 + character: 1 + end_position: + bytes: 102 + line: 1 + character: 103 + token_type: + type: SingleLineComment + comment: " comments separated by tab chars, should be parsed as trailing trivia of the tokens they are next to" + - start_position: + bytes: 102 + line: 1 + character: 103 + end_position: + bytes: 103 + line: 1 + character: 103 + token_type: + type: Whitespace + characters: "\n" + - start_position: + bytes: 103 + line: 2 + character: 1 + end_position: + bytes: 120 + line: 2 + character: 18 + token_type: + type: SingleLineComment + comment: " stylua: ignore" + - start_position: + bytes: 120 + line: 2 + character: 18 + end_position: + bytes: 121 + line: 2 + character: 18 + token_type: + type: Whitespace + characters: "\n" + token: + start_position: + bytes: 121 + line: 3 + character: 1 + end_position: + bytes: 126 + line: 3 + character: 6 + token_type: + type: Symbol + symbol: local + trailing_trivia: + - start_position: + bytes: 126 + line: 3 + character: 6 + end_position: + bytes: 127 + line: 3 + character: 7 + token_type: + type: Whitespace + characters: " " + name_list: + pairs: + - End: + leading_trivia: [] + token: + start_position: + bytes: 127 + line: 3 + character: 7 + end_position: + bytes: 130 + line: 3 + character: 10 + token_type: + type: Identifier + identifier: too + trailing_trivia: + - start_position: + bytes: 130 + line: 3 + character: 10 + end_position: + bytes: 131 + line: 3 + character: 11 + token_type: + type: Whitespace + characters: " " + equal_token: + leading_trivia: [] + token: + start_position: + bytes: 131 + line: 3 + character: 11 + end_position: + bytes: 132 + line: 3 + character: 12 + token_type: + type: Symbol + symbol: "=" + trailing_trivia: + - start_position: + bytes: 132 + line: 3 + character: 12 + end_position: + bytes: 133 + line: 3 + character: 13 + token_type: + type: Whitespace + characters: " " + expr_list: + pairs: + - End: + TableConstructor: + braces: + tokens: + - leading_trivia: [] + token: + start_position: + bytes: 133 + line: 3 + character: 13 + end_position: + bytes: 134 + line: 3 + character: 14 + token_type: + type: Symbol + symbol: "{" + trailing_trivia: + - start_position: + bytes: 134 + line: 3 + character: 14 + end_position: + bytes: 135 + line: 3 + character: 14 + token_type: + type: Whitespace + characters: "\n" + - leading_trivia: [] + token: + start_position: + bytes: 165 + line: 6 + character: 1 + end_position: + bytes: 166 + line: 6 + character: 2 + token_type: + type: Symbol + symbol: "}" + trailing_trivia: + - start_position: + bytes: 166 + line: 6 + character: 2 + end_position: + bytes: 167 + line: 6 + character: 2 + token_type: + type: Whitespace + characters: "\n" + fields: + pairs: + - Punctuated: + - NoKey: + Var: + Name: + leading_trivia: + - start_position: + bytes: 135 + line: 4 + character: 1 + end_position: + bytes: 136 + line: 4 + character: 2 + token_type: + type: Whitespace + characters: "\t" + token: + start_position: + bytes: 136 + line: 4 + character: 2 + end_position: + bytes: 137 + line: 4 + character: 3 + token_type: + type: Identifier + identifier: x + trailing_trivia: [] + - leading_trivia: [] + token: + start_position: + bytes: 137 + line: 4 + character: 3 + end_position: + bytes: 138 + line: 4 + character: 4 + token_type: + type: Symbol + symbol: "," + trailing_trivia: + - start_position: + bytes: 138 + line: 4 + character: 4 + end_position: + bytes: 140 + line: 4 + character: 6 + token_type: + type: Whitespace + characters: "\t\t" + - start_position: + bytes: 140 + line: 4 + character: 6 + end_position: + bytes: 149 + line: 4 + character: 15 + token_type: + type: SingleLineComment + comment: " string" + - start_position: + bytes: 149 + line: 4 + character: 15 + end_position: + bytes: 150 + line: 4 + character: 15 + token_type: + type: Whitespace + characters: "\n" + - Punctuated: + - NoKey: + Var: + Name: + leading_trivia: + - start_position: + bytes: 150 + line: 5 + character: 1 + end_position: + bytes: 151 + line: 5 + character: 2 + token_type: + type: Whitespace + characters: "\t" + token: + start_position: + bytes: 151 + line: 5 + character: 2 + end_position: + bytes: 152 + line: 5 + character: 3 + token_type: + type: Identifier + identifier: y + trailing_trivia: [] + - leading_trivia: [] + token: + start_position: + bytes: 152 + line: 5 + character: 3 + end_position: + bytes: 153 + line: 5 + character: 4 + token_type: + type: Symbol + symbol: "," + trailing_trivia: + - start_position: + bytes: 153 + line: 5 + character: 4 + end_position: + bytes: 155 + line: 5 + character: 6 + token_type: + type: Whitespace + characters: "\t\t" + - start_position: + bytes: 155 + line: 5 + character: 6 + end_position: + bytes: 164 + line: 5 + character: 15 + token_type: + type: SingleLineComment + comment: " string" + - start_position: + bytes: 164 + line: 5 + character: 15 + end_position: + bytes: 165 + line: 5 + character: 15 + token_type: + type: Whitespace + characters: "\n" + - ~ diff --git a/full-moon/tests/cases/pass/table-constructors-9/source.lua b/full-moon/tests/cases/pass/table-constructors-9/source.lua new file mode 100644 index 00000000..2ec60090 --- /dev/null +++ b/full-moon/tests/cases/pass/table-constructors-9/source.lua @@ -0,0 +1,6 @@ +-- comments separated by tab chars, should be parsed as trailing trivia of the tokens they are next to +-- stylua: ignore +local too = { + x, -- string + y, -- string +} diff --git a/full-moon/tests/cases/pass/table-constructors-9/tokens.snap b/full-moon/tests/cases/pass/table-constructors-9/tokens.snap new file mode 100644 index 00000000..b3c138cc --- /dev/null +++ b/full-moon/tests/cases/pass/table-constructors-9/tokens.snap @@ -0,0 +1,301 @@ +--- +source: full-moon/tests/pass_cases.rs +expression: tokens +input_file: full-moon/tests/cases/pass/table-constructors-9 +--- +- start_position: + bytes: 0 + line: 1 + character: 1 + end_position: + bytes: 102 + line: 1 + character: 103 + token_type: + type: SingleLineComment + comment: " comments separated by tab chars, should be parsed as trailing trivia of the tokens they are next to" +- start_position: + bytes: 102 + line: 1 + character: 103 + end_position: + bytes: 103 + line: 1 + character: 103 + token_type: + type: Whitespace + characters: "\n" +- start_position: + bytes: 103 + line: 2 + character: 1 + end_position: + bytes: 120 + line: 2 + character: 18 + token_type: + type: SingleLineComment + comment: " stylua: ignore" +- start_position: + bytes: 120 + line: 2 + character: 18 + end_position: + bytes: 121 + line: 2 + character: 18 + token_type: + type: Whitespace + characters: "\n" +- start_position: + bytes: 121 + line: 3 + character: 1 + end_position: + bytes: 126 + line: 3 + character: 6 + token_type: + type: Symbol + symbol: local +- start_position: + bytes: 126 + line: 3 + character: 6 + end_position: + bytes: 127 + line: 3 + character: 7 + token_type: + type: Whitespace + characters: " " +- start_position: + bytes: 127 + line: 3 + character: 7 + end_position: + bytes: 130 + line: 3 + character: 10 + token_type: + type: Identifier + identifier: too +- start_position: + bytes: 130 + line: 3 + character: 10 + end_position: + bytes: 131 + line: 3 + character: 11 + token_type: + type: Whitespace + characters: " " +- start_position: + bytes: 131 + line: 3 + character: 11 + end_position: + bytes: 132 + line: 3 + character: 12 + token_type: + type: Symbol + symbol: "=" +- start_position: + bytes: 132 + line: 3 + character: 12 + end_position: + bytes: 133 + line: 3 + character: 13 + token_type: + type: Whitespace + characters: " " +- start_position: + bytes: 133 + line: 3 + character: 13 + end_position: + bytes: 134 + line: 3 + character: 14 + token_type: + type: Symbol + symbol: "{" +- start_position: + bytes: 134 + line: 3 + character: 14 + end_position: + bytes: 135 + line: 3 + character: 14 + token_type: + type: Whitespace + characters: "\n" +- start_position: + bytes: 135 + line: 4 + character: 1 + end_position: + bytes: 136 + line: 4 + character: 2 + token_type: + type: Whitespace + characters: "\t" +- start_position: + bytes: 136 + line: 4 + character: 2 + end_position: + bytes: 137 + line: 4 + character: 3 + token_type: + type: Identifier + identifier: x +- start_position: + bytes: 137 + line: 4 + character: 3 + end_position: + bytes: 138 + line: 4 + character: 4 + token_type: + type: Symbol + symbol: "," +- start_position: + bytes: 138 + line: 4 + character: 4 + end_position: + bytes: 140 + line: 4 + character: 6 + token_type: + type: Whitespace + characters: "\t\t" +- start_position: + bytes: 140 + line: 4 + character: 6 + end_position: + bytes: 149 + line: 4 + character: 15 + token_type: + type: SingleLineComment + comment: " string" +- start_position: + bytes: 149 + line: 4 + character: 15 + end_position: + bytes: 150 + line: 4 + character: 15 + token_type: + type: Whitespace + characters: "\n" +- start_position: + bytes: 150 + line: 5 + character: 1 + end_position: + bytes: 151 + line: 5 + character: 2 + token_type: + type: Whitespace + characters: "\t" +- start_position: + bytes: 151 + line: 5 + character: 2 + end_position: + bytes: 152 + line: 5 + character: 3 + token_type: + type: Identifier + identifier: y +- start_position: + bytes: 152 + line: 5 + character: 3 + end_position: + bytes: 153 + line: 5 + character: 4 + token_type: + type: Symbol + symbol: "," +- start_position: + bytes: 153 + line: 5 + character: 4 + end_position: + bytes: 155 + line: 5 + character: 6 + token_type: + type: Whitespace + characters: "\t\t" +- start_position: + bytes: 155 + line: 5 + character: 6 + end_position: + bytes: 164 + line: 5 + character: 15 + token_type: + type: SingleLineComment + comment: " string" +- start_position: + bytes: 164 + line: 5 + character: 15 + end_position: + bytes: 165 + line: 5 + character: 15 + token_type: + type: Whitespace + characters: "\n" +- start_position: + bytes: 165 + line: 6 + character: 1 + end_position: + bytes: 166 + line: 6 + character: 2 + token_type: + type: Symbol + symbol: "}" +- start_position: + bytes: 166 + line: 6 + character: 2 + end_position: + bytes: 167 + line: 6 + character: 2 + token_type: + type: Whitespace + characters: "\n" +- start_position: + bytes: 167 + line: 7 + character: 1 + end_position: + bytes: 167 + line: 7 + character: 1 + token_type: + type: Eof