From 8910d5a8a025ee589ba2526e2dfa8b2ec86f65f5 Mon Sep 17 00:00:00 2001
From: adamhutchings <adam.abahot@gmail.com>
Date: Wed, 26 Jun 2024 00:22:21 -0400
Subject: [PATCH 1/6] Replace all getc/ungetc with new wrapper

---
 src/lexer/lex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lexer/lex.c b/src/lexer/lex.c
index 0fd7146..15c2569 100644
--- a/src/lexer/lex.c
+++ b/src/lexer/lex.c
@@ -248,11 +248,11 @@ int real_lex(Lexer *l, Token *t) {
     // return./
     if (starts_operator(init)) {
         while (valid_operator_sequence(t->contents)) {
-            t->contents[pos++] = (c = getc(l->fp));
+            t->contents[pos++] = (c = lexer_getchar(l->fp));
         }
         // We've ended!
         // Can we reduce this code duplication from above in a smart way?
-        ungetc(c, l->fp);
+        lexer_ungetchar(l);
         t->contents[pos - 1] = '\0';
         t->type = ttype_from_string(t->contents);
         t->length = pos;

From 7fd0973e21e747f0370a724998984111f522a1e0 Mon Sep 17 00:00:00 2001
From: Jake <jakeroggenbuck2@gmail.com>
Date: Tue, 25 Jun 2024 22:40:45 -0700
Subject: [PATCH 2/6] Add basic parsing code

---
 src/codegen/x86/codegen.c | 50 ++++++++++++---------
 src/codegen/x86/codegen.h |  2 +
 src/driver/main.c         | 35 ++++++++-------
 src/parser/parse.c        | 94 +++++++++++++++++++++++++++++++++++++++
 src/parser/parse.h        |  5 +++
 tests/simplemain.c        |  3 ++
 6 files changed, 153 insertions(+), 36 deletions(-)
 create mode 100644 src/parser/parse.c
 create mode 100644 src/parser/parse.h
 create mode 100644 tests/simplemain.c

diff --git a/src/codegen/x86/codegen.c b/src/codegen/x86/codegen.c
index e0b317d..fae49b3 100644
--- a/src/codegen/x86/codegen.c
+++ b/src/codegen/x86/codegen.c
@@ -21,41 +21,49 @@ void code_gen_init() {
 
 char *start_main() {
     static char start[256] = "\
-global _start\
-section .text\
-\
-_start:";
+global _start\n\
+section .text\n\
+\n\
+_start:\n";
 
     return start;
 }
 
 char *end_main() {
     static char end[256] = "\
-mov rax, 60\
-mov rdi, 0\
-syscall";
+	mov rax, 60\
+	mov rdi, 0\
+	syscall";
+
+    return end;
+}
+
+char *end_main_custom_return(int val) {
+    char *end;
+    end = (char *)malloc(256 * sizeof(char));
+    sprintf(end, "	mov rax, 60\n	mov rdi, %d\n	syscall\n", val);
 
     return end;
 }
 
 char *start_func() {
     static char start[256] = "\
-sub rsp, 32\
-mov [rsp], r12\
-mov [rsp+8], r13\
-mov [rsp+16], r14\
-mov [rsp+24], r15";
+	sub rsp, 32\
+	mov [rsp], r12\
+	mov [rsp+8], r13\
+	mov [rsp+16], r14\
+	mov [rsp+24], r15";
 
     return start;
 }
 
 char *end_func() {
     static char end[256] = "\
-mov r12, [rsp]\
-mov r13, [rsp+8]\
-mov r14, [rsp+16]\
-mov r15, [rsp+24]\
-add rsp, 32";
+	mov r12, [rsp]\
+	mov r13, [rsp+8]\
+	mov r14, [rsp+16]\
+	mov r15, [rsp+24]\
+	add rsp, 32";
 
     return end;
 }
@@ -65,16 +73,16 @@ char *init_int_literal(int val) {
 
     char *init;
     init = (char *)malloc(256 * sizeof(char));
-    sprintf(init, "mov [rsp+%d], %d", GEN_STATE.rsp_offset, val);
+    sprintf(init, "	mov [rsp+%d], %d", GEN_STATE.rsp_offset, val);
 
     return init;
 }
 
 int test_init_int_literal() {
-	testing_func_setup();
+    testing_func_setup();
     code_gen_init();
 
-    tassert(strcmp(init_int_literal(100), "mov [rsp+8], 100") == 0);
+    tassert(strcmp(init_int_literal(100), "	mov [rsp+8], 100") == 0);
 
-	return 0;
+    return 0;
 }
diff --git a/src/codegen/x86/codegen.h b/src/codegen/x86/codegen.h
index 4af672d..5e3a489 100644
--- a/src/codegen/x86/codegen.h
+++ b/src/codegen/x86/codegen.h
@@ -8,6 +8,8 @@ char *end_main();
 
 char *start_func();
 
+char *end_main_custom_return(int val);
+
 char *end_func();
 
 char *init_int_literal(int val);
diff --git a/src/driver/main.c b/src/driver/main.c
index 42ee68b..4a7d3ea 100644
--- a/src/driver/main.c
+++ b/src/driver/main.c
@@ -6,13 +6,14 @@
 #include <string.h> // strcmp
 
 #include <lexer/lex.h>
+#include <parser/parse.h>
 #include <util/out.h>
 
-int lexer_dump(const char* filename) {
+int lexer_dump(const char *filename) {
 
     // Initialization of everything
     Lexer lexer;
-    FILE * fp = fopen(filename, "r");
+    FILE *fp = fopen(filename, "r");
     if (!fp) {
         PRINT_ERROR("File %s not found", filename);
         return 1;
@@ -24,12 +25,13 @@ int lexer_dump(const char* filename) {
     Token t;
     do {
         // Return if some non-zero (error) code is returned
-        if (lex(&lexer, &t)) return 1;
-        printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, ttype_name(t.type), t.line, t.column);
+        if (lex(&lexer, &t))
+            return 1;
+        printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents,
+               ttype_name(t.type), t.line, t.column);
     } while (t.type != TT_EOF);
 
     return 0;
-
 }
 
 int main(int argc, char **argv) {
@@ -38,14 +40,16 @@ int main(int argc, char **argv) {
 
     // Skip the name of the executable.
     --argc, ++argv;
-    
+
     if (argc == 0) {
         PRINT_DEFAULT("Usage: --token-dump <filename> to see all tokens");
         return 0;
     }
 
     if (argc == 1) {
-        PRINT_DEFAULT("default compilation not supported yet -- try 'jccc --token-dump %s' instead.", argv[0]);
+        PRINT_DEFAULT("default compilation not supported yet -- try 'jccc "
+                      "--token-dump %s' instead.",
+                      argv[0]);
         return 1;
     }
 
@@ -55,13 +59,14 @@ int main(int argc, char **argv) {
     }
 
     // Two arguments now.
-    if (strcmp(argv[0], "--token-dump")) {
-        PRINT_ERROR("option %s not recognized.", argv[1]);
-        return 1;
-    }
-
-    // Finally, we can do the lexer test properly!
-    return lexer_dump(argv[1]);
+    if (strcmp(argv[0], "--token-dump") == 0) {
+        // Finally, we can do the lexer test properly!
+        return lexer_dump(argv[1]);
+    } else if (strcmp(argv[0], "--test-parse") == 0) {
+		parse(argv[1]);
+		return 0;
+	}
 
-    return 0;
+	PRINT_ERROR("option %s not recognized.", argv[1]);
+	return 1;
 }
diff --git a/src/parser/parse.c b/src/parser/parse.c
new file mode 100644
index 0000000..d1da408
--- /dev/null
+++ b/src/parser/parse.c
@@ -0,0 +1,94 @@
+/* Parser
+ *
+ */
+
+#include <codegen/x86/codegen.h>
+#include <lexer/lex.h>
+#include <lexer/token.h>
+#include <stdlib.h> // calloc
+#include <string.h> // strcmp
+#include <util/out.h>
+
+int parse(const char *filename) {
+
+    Lexer lexer;
+
+    FILE *fp = fopen(filename, "r");
+
+    if (!fp) {
+        PRINT_ERROR("File %s not found", filename);
+        return 1;
+    }
+
+    lexer.fp = fp;
+    lexer.unlexed_count = 0;
+    lexer.column = lexer.line = 1;
+
+    Token t;
+
+    int i = 0;
+    int buffer_size = 16;
+    Token *tokens = calloc(buffer_size, sizeof(Token));
+
+    do {
+        if (lex(&lexer, &t)) {
+            return 1;
+        }
+
+        if (buffer_size <= i) {
+            buffer_size *= 2;
+            tokens = calloc(buffer_size, sizeof(Token));
+        }
+
+        tokens[i] = t;
+
+        printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents,
+               ttype_name(t.type), t.line, t.column);
+
+        i++;
+    } while (t.type != TT_EOF);
+
+    // Main function
+    if (tokens[0].type == TT_INT && tokens[1].type == TT_IDENTIFIER &&
+        (strcmp(tokens[1].contents, "main") == 0)) {
+
+        // Correct empty function body
+        if (tokens[2].type == TT_OPAREN && tokens[3].type == TT_CPAREN &&
+            tokens[4].type == TT_OBRACE) {
+
+            // Return value
+            if (tokens[5].type == TT_RETURN && tokens[6].type == TT_LITERAL &&
+                tokens[7].type == TT_SEMI) {
+
+                // Correct matched closed brace
+                if (tokens[8].type == TT_CBRACE) {
+					printf("\n");
+					
+					// Generate preamble main code
+                    char *code_start = start_main();
+
+					printf(code_start);
+
+					// Add custom return code
+                    char *code_end =
+                        end_main_custom_return(atoi(tokens[6].contents));
+
+                    printf(code_end);
+
+                } else {
+                    PRINT_ERROR("Wrong closing brace.\n");
+                }
+            } else {
+                PRINT_ERROR("Return value is wrong.\n");
+            }
+        } else {
+            PRINT_ERROR("Wrong main function body.\n");
+        }
+    } else {
+        PRINT_ERROR("Not correct main function.\n");
+    }
+
+    return 0;
+}
+
+int parse_simple_main_func() {}
diff --git a/src/parser/parse.h b/src/parser/parse.h
new file mode 100644
index 0000000..8ea9ad8
--- /dev/null
+++ b/src/parser/parse.h
@@ -0,0 +1,5 @@
+/* Parser
+ *
+ */
+
+int parse(const char *filename);
diff --git a/tests/simplemain.c b/tests/simplemain.c
new file mode 100644
index 0000000..aa5fd90
--- /dev/null
+++ b/tests/simplemain.c
@@ -0,0 +1,3 @@
+int main() {
+	return 5;
+}

From ef78539671f7566417acb0501a5d6ef1f284f3a3 Mon Sep 17 00:00:00 2001
From: adamhutchings <adam.abahot@gmail.com>
Date: Wed, 26 Jun 2024 13:19:32 -0400
Subject: [PATCH 3/6] Add structures we'll need for the first parser iteration

---
 src/parser/cst.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 src/parser/cst.h

diff --git a/src/parser/cst.h b/src/parser/cst.h
new file mode 100644
index 0000000..59fa469
--- /dev/null
+++ b/src/parser/cst.h
@@ -0,0 +1,62 @@
+/**
+ * The structures for a concrete syntax tree.
+ * For now, the subset of C we are parsing is quite simple:
+ * - Parameterless functions.
+ * - Return statements, which accept integers or function calls.
+ */
+
+#pragma once
+
+#include "list.h"
+
+// A list of all node types.
+typedef enum {
+    NT_STMT,
+    NT_EXPR,
+    NT_BLOCK_STMT,
+    NT_RETURN_STMT,
+    NT_FUNCDECL,
+    NT_FUNCCALL,
+    NT_LITERAL,
+} NodeType;
+
+// A block statement is just a list of statements.
+typedef struct {
+    List* stmts; // A list of Statement structs.
+} BlockStatement;
+
+typedef struct {
+    // TODO -- add parameters whe we get there
+    BlockStatement body;
+    const char name[256]; // The actual name of the function.
+} FunctionDeclaration;
+
+// An entire program is just a list of top level declarations.
+// For now, such declarations are only functions.
+typedef struct {
+    union {
+        FunctionDeclaration fd;
+        // VariableDeclaration vd; when we get there
+    } u;
+    NodeType type;
+} TopLevelDeclaration;
+
+// Right now, a function call doesn't have any parameters so it's just the name
+// of the function being called.
+typedef struct {
+    const char name[256];
+} FunctionCall;
+
+// An expression for now is an integer or a function call.
+typedef struct {
+    union {
+        FunctionCall fc;
+        const char literal[256];
+    } u;
+    NodeType type;
+} Expression;
+
+// Finally, an entire source file is a list of top-level declarations.
+typedef struct {
+    List* decls; // list of TopLevelDeclaration
+} ConcreteFileTree;

From e669ebf8f2f0305d8a8ed04df118674b3af5bc9f Mon Sep 17 00:00:00 2001
From: adamhutchings <adam.abahot@gmail.com>
Date: Wed, 26 Jun 2024 13:21:48 -0400
Subject: [PATCH 4/6] Remove consts (because we need to set these names
 somehow)

---
 src/parser/cst.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/parser/cst.h b/src/parser/cst.h
index 59fa469..3184cf5 100644
--- a/src/parser/cst.h
+++ b/src/parser/cst.h
@@ -28,7 +28,7 @@ typedef struct {
 typedef struct {
     // TODO -- add parameters whe we get there
     BlockStatement body;
-    const char name[256]; // The actual name of the function.
+    char name[256]; // The actual name of the function.
 } FunctionDeclaration;
 
 // An entire program is just a list of top level declarations.
@@ -44,14 +44,14 @@ typedef struct {
 // Right now, a function call doesn't have any parameters so it's just the name
 // of the function being called.
 typedef struct {
-    const char name[256];
+    char name[256];
 } FunctionCall;
 
 // An expression for now is an integer or a function call.
 typedef struct {
     union {
         FunctionCall fc;
-        const char literal[256];
+        char literal[256];
     } u;
     NodeType type;
 } Expression;

From 16ee5342f97fae19890b167119d0eb3b7959174e Mon Sep 17 00:00:00 2001
From: Jake <jakeroggenbuck2@gmail.com>
Date: Wed, 26 Jun 2024 15:21:31 -0700
Subject: [PATCH 5/6] Close files and check for numeric

---
 src/driver/main.c  |  2 ++
 src/parser/parse.c | 15 +++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/driver/main.c b/src/driver/main.c
index 4a7d3ea..d963669 100644
--- a/src/driver/main.c
+++ b/src/driver/main.c
@@ -31,6 +31,8 @@ int lexer_dump(const char *filename) {
                ttype_name(t.type), t.line, t.column);
     } while (t.type != TT_EOF);
 
+	fclose(fp);
+
     return 0;
 }
 
diff --git a/src/parser/parse.c b/src/parser/parse.c
index d1da408..5a7190c 100644
--- a/src/parser/parse.c
+++ b/src/parser/parse.c
@@ -7,6 +7,7 @@
 #include <lexer/token.h>
 #include <stdlib.h> // calloc
 #include <string.h> // strcmp
+#include <ctype.h> // isdigit
 #include <util/out.h>
 
 int parse(const char *filename) {
@@ -58,18 +59,18 @@ int parse(const char *filename) {
 
             // Return value
             if (tokens[5].type == TT_RETURN && tokens[6].type == TT_LITERAL &&
-                tokens[7].type == TT_SEMI) {
+                isdigit(tokens[6].contents[0]) && tokens[7].type == TT_SEMI) {
 
                 // Correct matched closed brace
                 if (tokens[8].type == TT_CBRACE) {
-					printf("\n");
-					
-					// Generate preamble main code
+                    printf("\n");
+
+                    // Generate preamble main code
                     char *code_start = start_main();
 
-					printf(code_start);
+                    printf(code_start);
 
-					// Add custom return code
+                    // Add custom return code
                     char *code_end =
                         end_main_custom_return(atoi(tokens[6].contents));
 
@@ -88,6 +89,8 @@ int parse(const char *filename) {
         PRINT_ERROR("Not correct main function.\n");
     }
 
+    fclose(fp);
+
     return 0;
 }
 

From 336bee13eca8dc6c445198575034b3741a4f985a Mon Sep 17 00:00:00 2001
From: adamhutchings <adam.abahot@gmail.com>
Date: Thu, 27 Jun 2024 17:04:14 -0400
Subject: [PATCH 6/6] Add parser framework

---
 src/lexer/lex.c    |  2 +-
 src/parser/cst.h   |  2 +-
 src/parser/parse.c | 35 ++++++++++++++++++++++++++++++++---
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/lexer/lex.c b/src/lexer/lex.c
index c94a9cc..7d6a61e 100644
--- a/src/lexer/lex.c
+++ b/src/lexer/lex.c
@@ -248,7 +248,7 @@ int real_lex(Lexer *l, Token *t) {
     // return./
     if (starts_operator(init)) {
         while (valid_operator_sequence(t->contents)) {
-            t->contents[pos++] = (c = lexer_getchar(l->fp));
+            t->contents[pos++] = (c = lexer_getchar(l));
         }
         // We've ended!
         // Can we reduce this code duplication from above in a smart way?
diff --git a/src/parser/cst.h b/src/parser/cst.h
index 3184cf5..f68ec16 100644
--- a/src/parser/cst.h
+++ b/src/parser/cst.h
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "list.h"
+#include <util/list.h>
 
 // A list of all node types.
 typedef enum {
diff --git a/src/parser/parse.c b/src/parser/parse.c
index 5a7190c..6176d52 100644
--- a/src/parser/parse.c
+++ b/src/parser/parse.c
@@ -2,12 +2,14 @@
  *
  */
 
-#include <codegen/x86/codegen.h>
-#include <lexer/lex.h>
-#include <lexer/token.h>
 #include <stdlib.h> // calloc
 #include <string.h> // strcmp
 #include <ctype.h> // isdigit
+
+#include <codegen/x86/codegen.h>
+#include <lexer/lex.h>
+#include <lexer/token.h>
+#include <parser/cst.h>
 #include <util/out.h>
 
 int parse(const char *filename) {
@@ -95,3 +97,30 @@ int parse(const char *filename) {
 }
 
 int parse_simple_main_func() {}
+
+/**
+ * Proper parsing code below -- producing a concrete syntax tree from a file.
+ * Each of these functions will probably reference the others numerous times.
+ */
+
+int parse_expr(Lexer* l, Expression* ex) {
+    // TODO (just a literal or a function call for now).
+}
+
+int parse_funccall(Lexer* l, Expression* ex) {
+    // TODO
+}
+
+int parse_blockstmt(Lexer* l, BlockStatement* bs) {
+    // TODO
+}
+
+int parse_funcdecl(Lexer* l, FunctionDeclaration* fd) {
+    // TODO
+}
+
+// Parse function -- takes a lexer and produces a concrete syntax tree. Fill the
+// struct which we have given with the data.
+int make_cst(Lexer* l, ConcreteFileTree* tree) {
+    // TODO
+}