From 95fb5c80312f6b62f4276c37a82c2c85067a1f45 Mon Sep 17 00:00:00 2001
From: JohnnyMorganz <johnnymorganz@outlook.com>
Date: Sat, 26 Oct 2024 13:50:32 -0500
Subject: [PATCH] Fix attachment of trailing trivia with tab characters

---
 CHANGELOG.md                                  |   5 +
 full-moon/src/tokenizer/lexer.rs              |   5 +-
 .../cases/pass/table-constructors-9/ast.snap  | 342 ++++++++++++++++++
 .../pass/table-constructors-9/source.lua      |   6 +
 .../pass/table-constructors-9/tokens.snap     | 301 +++++++++++++++
 5 files changed, 658 insertions(+), 1 deletion(-)
 create mode 100644 full-moon/tests/cases/pass/table-constructors-9/ast.snap
 create mode 100644 full-moon/tests/cases/pass/table-constructors-9/source.lua
 create mode 100644 full-moon/tests/cases/pass/table-constructors-9/tokens.snap
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 18b9606a..25e281b5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Fixed
+- Fixed regression in trivia attachment causing trivia that begins with `\t` tab characters to be attached as leading trivia of the next token rather than trailing trivia of the current token.
+
 ## [1.1.0] - 2024-10-12
 ### Added
 - Added `access` fields that contain a `Option<TokenReference>` to `TypeInfo::Array` and `TypeField`.
diff --git a/full-moon/src/tokenizer/lexer.rs b/full-moon/src/tokenizer/lexer.rs
index 9680d1b2..47a2e7d1 100644
--- a/full-moon/src/tokenizer/lexer.rs
+++ b/full-moon/src/tokenizer/lexer.rs
@@ -218,7 +218,10 @@ impl Lexer {
             let start_position: LexerPosition = self.source.lexer_position;
 
             // All things that can start trivia, so we can avoid when something definitely can't
-            if matches!(self.source.current(), Some('\n' | '\r' | '#' | '-' | ' ')) {
+            if matches!(
+                self.source.current(),
+                Some('\n' | '\r' | '\t' | '#' | '-' | ' ')
+            ) {
                 if let Some(LexerResult::Ok(token)) = self.process_next() {
                     if token.token_type().is_trivia() {
                         // Take all trivia up to and including the newline character. If we see a newline character
diff --git a/full-moon/tests/cases/pass/table-constructors-9/ast.snap b/full-moon/tests/cases/pass/table-constructors-9/ast.snap
new file mode 100644
index 00000000..93e57034
--- /dev/null
+++ b/full-moon/tests/cases/pass/table-constructors-9/ast.snap
@@ -0,0 +1,342 @@
+---
+source: full-moon/tests/pass_cases.rs
+expression: ast.nodes()
+input_file: full-moon/tests/cases/pass/table-constructors-9
+---
+stmts:
+  - - LocalAssignment:
+        local_token:
+          leading_trivia:
+            - start_position:
+                bytes: 0
+                line: 1
+                character: 1
+              end_position:
+                bytes: 102
+                line: 1
+                character: 103
+              token_type:
+                type: SingleLineComment
+                comment: " comments separated by tab chars, should be parsed as trailing trivia of the tokens they are next to"
+            - start_position:
+                bytes: 102
+                line: 1
+                character: 103
+              end_position:
+                bytes: 103
+                line: 1
+                character: 103
+              token_type:
+                type: Whitespace
+                characters: "\n"
+            - start_position:
+                bytes: 103
+                line: 2
+                character: 1
+              end_position:
+                bytes: 120
+                line: 2
+                character: 18
+              token_type:
+                type: SingleLineComment
+                comment: " stylua: ignore"
+            - start_position:
+                bytes: 120
+                line: 2
+                character: 18
+              end_position:
+                bytes: 121
+                line: 2
+                character: 18
+              token_type:
+                type: Whitespace
+                characters: "\n"
+          token:
+            start_position:
+              bytes: 121
+              line: 3
+              character: 1
+            end_position:
+              bytes: 126
+              line: 3
+              character: 6
+            token_type:
+              type: Symbol
+              symbol: local
+          trailing_trivia:
+            - start_position:
+                bytes: 126
+                line: 3
+                character: 6
+              end_position:
+                bytes: 127
+                line: 3
+                character: 7
+              token_type:
+                type: Whitespace
+                characters: " "
+        name_list:
+          pairs:
+            - End:
+                leading_trivia: []
+                token:
+                  start_position:
+                    bytes: 127
+                    line: 3
+                    character: 7
+                  end_position:
+                    bytes: 130
+                    line: 3
+                    character: 10
+                  token_type:
+                    type: Identifier
+                    identifier: too
+                trailing_trivia:
+                  - start_position:
+                      bytes: 130
+                      line: 3
+                      character: 10
+                    end_position:
+                      bytes: 131
+                      line: 3
+                      character: 11
+                    token_type:
+                      type: Whitespace
+                      characters: " "
+        equal_token:
+          leading_trivia: []
+          token:
+            start_position:
+              bytes: 131
+              line: 3
+              character: 11
+            end_position:
+              bytes: 132
+              line: 3
+              character: 12
+            token_type:
+              type: Symbol
+              symbol: "="
+          trailing_trivia:
+            - start_position:
+                bytes: 132
+                line: 3
+                character: 12
+              end_position:
+                bytes: 133
+                line: 3
+                character: 13
+              token_type:
+                type: Whitespace
+                characters: " "
+        expr_list:
+          pairs:
+            - End:
+                TableConstructor:
+                  braces:
+                    tokens:
+                      - leading_trivia: []
+                        token:
+                          start_position:
+                            bytes: 133
+                            line: 3
+                            character: 13
+                          end_position:
+                            bytes: 134
+                            line: 3
+                            character: 14
+                          token_type:
+                            type: Symbol
+                            symbol: "{"
+                        trailing_trivia:
+                          - start_position:
+                              bytes: 134
+                              line: 3
+                              character: 14
+                            end_position:
+                              bytes: 135
+                              line: 3
+                              character: 14
+                            token_type:
+                              type: Whitespace
+                              characters: "\n"
+                      - leading_trivia: []
+                        token:
+                          start_position:
+                            bytes: 165
+                            line: 6
+                            character: 1
+                          end_position:
+                            bytes: 166
+                            line: 6
+                            character: 2
+                          token_type:
+                            type: Symbol
+                            symbol: "}"
+                        trailing_trivia:
+                          - start_position:
+                              bytes: 166
+                              line: 6
+                              character: 2
+                            end_position:
+                              bytes: 167
+                              line: 6
+                              character: 2
+                            token_type:
+                              type: Whitespace
+                              characters: "\n"
+                  fields:
+                    pairs:
+                      - Punctuated:
+                          - NoKey:
+                              Var:
+                                Name:
+                                  leading_trivia:
+                                    - start_position:
+                                        bytes: 135
+                                        line: 4
+                                        character: 1
+                                      end_position:
+                                        bytes: 136
+                                        line: 4
+                                        character: 2
+                                      token_type:
+                                        type: Whitespace
+                                        characters: "\t"
+                                  token:
+                                    start_position:
+                                      bytes: 136
+                                      line: 4
+                                      character: 2
+                                    end_position:
+                                      bytes: 137
+                                      line: 4
+                                      character: 3
+                                    token_type:
+                                      type: Identifier
+                                      identifier: x
+                                  trailing_trivia: []
+                          - leading_trivia: []
+                            token:
+                              start_position:
+                                bytes: 137
+                                line: 4
+                                character: 3
+                              end_position:
+                                bytes: 138
+                                line: 4
+                                character: 4
+                              token_type:
+                                type: Symbol
+                                symbol: ","
+                            trailing_trivia:
+                              - start_position:
+                                  bytes: 138
+                                  line: 4
+                                  character: 4
+                                end_position:
+                                  bytes: 140
+                                  line: 4
+                                  character: 6
+                                token_type:
+                                  type: Whitespace
+                                  characters: "\t\t"
+                              - start_position:
+                                  bytes: 140
+                                  line: 4
+                                  character: 6
+                                end_position:
+                                  bytes: 149
+                                  line: 4
+                                  character: 15
+                                token_type:
+                                  type: SingleLineComment
+                                  comment: " string"
+                              - start_position:
+                                  bytes: 149
+                                  line: 4
+                                  character: 15
+                                end_position:
+                                  bytes: 150
+                                  line: 4
+                                  character: 15
+                                token_type:
+                                  type: Whitespace
+                                  characters: "\n"
+                      - Punctuated:
+                          - NoKey:
+                              Var:
+                                Name:
+                                  leading_trivia:
+                                    - start_position:
+                                        bytes: 150
+                                        line: 5
+                                        character: 1
+                                      end_position:
+                                        bytes: 151
+                                        line: 5
+                                        character: 2
+                                      token_type:
+                                        type: Whitespace
+                                        characters: "\t"
+                                  token:
+                                    start_position:
+                                      bytes: 151
+                                      line: 5
+                                      character: 2
+                                    end_position:
+                                      bytes: 152
+                                      line: 5
+                                      character: 3
+                                    token_type:
+                                      type: Identifier
+                                      identifier: y
+                                  trailing_trivia: []
+                          - leading_trivia: []
+                            token:
+                              start_position:
+                                bytes: 152
+                                line: 5
+                                character: 3
+                              end_position:
+                                bytes: 153
+                                line: 5
+                                character: 4
+                              token_type:
+                                type: Symbol
+                                symbol: ","
+                            trailing_trivia:
+                              - start_position:
+                                  bytes: 153
+                                  line: 5
+                                  character: 4
+                                end_position:
+                                  bytes: 155
+                                  line: 5
+                                  character: 6
+                                token_type:
+                                  type: Whitespace
+                                  characters: "\t\t"
+                              - start_position:
+                                  bytes: 155
+                                  line: 5
+                                  character: 6
+                                end_position:
+                                  bytes: 164
+                                  line: 5
+                                  character: 15
+                                token_type:
+                                  type: SingleLineComment
+                                  comment: " string"
+                              - start_position:
+                                  bytes: 164
+                                  line: 5
+                                  character: 15
+                                end_position:
+                                  bytes: 165
+                                  line: 5
+                                  character: 15
+                                token_type:
+                                  type: Whitespace
+                                  characters: "\n"
+    - ~
diff --git a/full-moon/tests/cases/pass/table-constructors-9/source.lua b/full-moon/tests/cases/pass/table-constructors-9/source.lua
new file mode 100644
index 00000000..2ec60090
--- /dev/null
+++ b/full-moon/tests/cases/pass/table-constructors-9/source.lua
@@ -0,0 +1,6 @@
+-- comments separated by tab chars, should be parsed as trailing trivia of the tokens they are next to
+-- stylua: ignore
+local too = {
+	x,		-- string
+	y,		-- string
+}
diff --git a/full-moon/tests/cases/pass/table-constructors-9/tokens.snap b/full-moon/tests/cases/pass/table-constructors-9/tokens.snap
new file mode 100644
index 00000000..b3c138cc
--- /dev/null
+++ b/full-moon/tests/cases/pass/table-constructors-9/tokens.snap
@@ -0,0 +1,301 @@
+---
+source: full-moon/tests/pass_cases.rs
+expression: tokens
+input_file: full-moon/tests/cases/pass/table-constructors-9
+---
+- start_position:
+    bytes: 0
+    line: 1
+    character: 1
+  end_position:
+    bytes: 102
+    line: 1
+    character: 103
+  token_type:
+    type: SingleLineComment
+    comment: " comments separated by tab chars, should be parsed as trailing trivia of the tokens they are next to"
+- start_position:
+    bytes: 102
+    line: 1
+    character: 103
+  end_position:
+    bytes: 103
+    line: 1
+    character: 103
+  token_type:
+    type: Whitespace
+    characters: "\n"
+- start_position:
+    bytes: 103
+    line: 2
+    character: 1
+  end_position:
+    bytes: 120
+    line: 2
+    character: 18
+  token_type:
+    type: SingleLineComment
+    comment: " stylua: ignore"
+- start_position:
+    bytes: 120
+    line: 2
+    character: 18
+  end_position:
+    bytes: 121
+    line: 2
+    character: 18
+  token_type:
+    type: Whitespace
+    characters: "\n"
+- start_position:
+    bytes: 121
+    line: 3
+    character: 1
+  end_position:
+    bytes: 126
+    line: 3
+    character: 6
+  token_type:
+    type: Symbol
+    symbol: local
+- start_position:
+    bytes: 126
+    line: 3
+    character: 6
+  end_position:
+    bytes: 127
+    line: 3
+    character: 7
+  token_type:
+    type: Whitespace
+    characters: " "
+- start_position:
+    bytes: 127
+    line: 3
+    character: 7
+  end_position:
+    bytes: 130
+    line: 3
+    character: 10
+  token_type:
+    type: Identifier
+    identifier: too
+- start_position:
+    bytes: 130
+    line: 3
+    character: 10
+  end_position:
+    bytes: 131
+    line: 3
+    character: 11
+  token_type:
+    type: Whitespace
+    characters: " "
+- start_position:
+    bytes: 131
+    line: 3
+    character: 11
+  end_position:
+    bytes: 132
+    line: 3
+    character: 12
+  token_type:
+    type: Symbol
+    symbol: "="
+- start_position:
+    bytes: 132
+    line: 3
+    character: 12
+  end_position:
+    bytes: 133
+    line: 3
+    character: 13
+  token_type:
+    type: Whitespace
+    characters: " "
+- start_position:
+    bytes: 133
+    line: 3
+    character: 13
+  end_position:
+    bytes: 134
+    line: 3
+    character: 14
+  token_type:
+    type: Symbol
+    symbol: "{"
+- start_position:
+    bytes: 134
+    line: 3
+    character: 14
+  end_position:
+    bytes: 135
+    line: 3
+    character: 14
+  token_type:
+    type: Whitespace
+    characters: "\n"
+- start_position:
+    bytes: 135
+    line: 4
+    character: 1
+  end_position:
+    bytes: 136
+    line: 4
+    character: 2
+  token_type:
+    type: Whitespace
+    characters: "\t"
+- start_position:
+    bytes: 136
+    line: 4
+    character: 2
+  end_position:
+    bytes: 137
+    line: 4
+    character: 3
+  token_type:
+    type: Identifier
+    identifier: x
+- start_position:
+    bytes: 137
+    line: 4
+    character: 3
+  end_position:
+    bytes: 138
+    line: 4
+    character: 4
+  token_type:
+    type: Symbol
+    symbol: ","
+- start_position:
+    bytes: 138
+    line: 4
+    character: 4
+  end_position:
+    bytes: 140
+    line: 4
+    character: 6
+  token_type:
+    type: Whitespace
+    characters: "\t\t"
+- start_position:
+    bytes: 140
+    line: 4
+    character: 6
+  end_position:
+    bytes: 149
+    line: 4
+    character: 15
+  token_type:
+    type: SingleLineComment
+    comment: " string"
+- start_position:
+    bytes: 149
+    line: 4
+    character: 15
+  end_position:
+    bytes: 150
+    line: 4
+    character: 15
+  token_type:
+    type: Whitespace
+    characters: "\n"
+- start_position:
+    bytes: 150
+    line: 5
+    character: 1
+  end_position:
+    bytes: 151
+    line: 5
+    character: 2
+  token_type:
+    type: Whitespace
+    characters: "\t"
+- start_position:
+    bytes: 151
+    line: 5
+    character: 2
+  end_position:
+    bytes: 152
+    line: 5
+    character: 3
+  token_type:
+    type: Identifier
+    identifier: y
+- start_position:
+    bytes: 152
+    line: 5
+    character: 3
+  end_position:
+    bytes: 153
+    line: 5
+    character: 4
+  token_type:
+    type: Symbol
+    symbol: ","
+- start_position:
+    bytes: 153
+    line: 5
+    character: 4
+  end_position:
+    bytes: 155
+    line: 5
+    character: 6
+  token_type:
+    type: Whitespace
+    characters: "\t\t"
+- start_position:
+    bytes: 155
+    line: 5
+    character: 6
+  end_position:
+    bytes: 164
+    line: 5
+    character: 15
+  token_type:
+    type: SingleLineComment
+    comment: " string"
+- start_position:
+    bytes: 164
+    line: 5
+    character: 15
+  end_position:
+    bytes: 165
+    line: 5
+    character: 15
+  token_type:
+    type: Whitespace
+    characters: "\n"
+- start_position:
+    bytes: 165
+    line: 6
+    character: 1
+  end_position:
+    bytes: 166
+    line: 6
+    character: 2
+  token_type:
+    type: Symbol
+    symbol: "}"
+- start_position:
+    bytes: 166
+    line: 6
+    character: 2
+  end_position:
+    bytes: 167
+    line: 6
+    character: 2
+  token_type:
+    type: Whitespace
+    characters: "\n"
+- start_position:
+    bytes: 167
+    line: 7
+    character: 1
+  end_position:
+    bytes: 167
+    line: 7
+    character: 1
+  token_type:
+    type: Eof