From 7fd0973e21e747f0370a724998984111f522a1e0 Mon Sep 17 00:00:00 2001 From: Jake Date: Tue, 25 Jun 2024 22:40:45 -0700 Subject: [PATCH 1/2] Add basic parsing code --- src/codegen/x86/codegen.c | 50 ++++++++++++--------- src/codegen/x86/codegen.h | 2 + src/driver/main.c | 35 ++++++++------- src/parser/parse.c | 94 +++++++++++++++++++++++++++++++++++++++ src/parser/parse.h | 5 +++ tests/simplemain.c | 3 ++ 6 files changed, 153 insertions(+), 36 deletions(-) create mode 100644 src/parser/parse.c create mode 100644 src/parser/parse.h create mode 100644 tests/simplemain.c diff --git a/src/codegen/x86/codegen.c b/src/codegen/x86/codegen.c index e0b317d..fae49b3 100644 --- a/src/codegen/x86/codegen.c +++ b/src/codegen/x86/codegen.c @@ -21,41 +21,49 @@ void code_gen_init() { char *start_main() { static char start[256] = "\ -global _start\ -section .text\ -\ -_start:"; +global _start\n\ +section .text\n\ +\n\ +_start:\n"; return start; } char *end_main() { static char end[256] = "\ -mov rax, 60\ -mov rdi, 0\ -syscall"; + mov rax, 60\ + mov rdi, 0\ + syscall"; + + return end; +} + +char *end_main_custom_return(int val) { + char *end; + end = (char *)malloc(256 * sizeof(char)); + sprintf(end, " mov rax, 60\n mov rdi, %d\n syscall\n", val); return end; } char *start_func() { static char start[256] = "\ -sub rsp, 32\ -mov [rsp], r12\ -mov [rsp+8], r13\ -mov [rsp+16], r14\ -mov [rsp+24], r15"; + sub rsp, 32\ + mov [rsp], r12\ + mov [rsp+8], r13\ + mov [rsp+16], r14\ + mov [rsp+24], r15"; return start; } char *end_func() { static char end[256] = "\ -mov r12, [rsp]\ -mov r13, [rsp+8]\ -mov r14, [rsp+16]\ -mov r15, [rsp+24]\ -add rsp, 32"; + mov r12, [rsp]\ + mov r13, [rsp+8]\ + mov r14, [rsp+16]\ + mov r15, [rsp+24]\ + add rsp, 32"; return end; } @@ -65,16 +73,16 @@ char *init_int_literal(int val) { char *init; init = (char *)malloc(256 * sizeof(char)); - sprintf(init, "mov [rsp+%d], %d", GEN_STATE.rsp_offset, val); + sprintf(init, " mov [rsp+%d], %d", GEN_STATE.rsp_offset, val); return init; } int test_init_int_literal() { - testing_func_setup(); + testing_func_setup(); code_gen_init(); - tassert(strcmp(init_int_literal(100), "mov [rsp+8], 100") == 0); + tassert(strcmp(init_int_literal(100), " mov [rsp+8], 100") == 0); - return 0; + return 0; } diff --git a/src/codegen/x86/codegen.h b/src/codegen/x86/codegen.h index 4af672d..5e3a489 100644 --- a/src/codegen/x86/codegen.h +++ b/src/codegen/x86/codegen.h @@ -8,6 +8,8 @@ char *end_main(); char *start_func(); +char *end_main_custom_return(int val); + char *end_func(); char *init_int_literal(int val); diff --git a/src/driver/main.c b/src/driver/main.c index 42ee68b..4a7d3ea 100644 --- a/src/driver/main.c +++ b/src/driver/main.c @@ -6,13 +6,14 @@ #include // strcmp #include +#include #include -int lexer_dump(const char* filename) { +int lexer_dump(const char *filename) { // Initialization of everything Lexer lexer; - FILE * fp = fopen(filename, "r"); + FILE *fp = fopen(filename, "r"); if (!fp) { PRINT_ERROR("File %s not found", filename); return 1; @@ -24,12 +25,13 @@ int lexer_dump(const char* filename) { Token t; do { // Return if some non-zero (error) code is returned - if (lex(&lexer, &t)) return 1; - printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, ttype_name(t.type), t.line, t.column); + if (lex(&lexer, &t)) + return 1; + printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, + ttype_name(t.type), t.line, t.column); } while (t.type != TT_EOF); return 0; - } int main(int argc, char **argv) { @@ -38,14 +40,16 @@ int main(int argc, char **argv) { // Skip the name of the executable. --argc, ++argv; - + if (argc == 0) { PRINT_DEFAULT("Usage: --token-dump to see all tokens"); return 0; } if (argc == 1) { - PRINT_DEFAULT("default compilation not supported yet -- try 'jccc --token-dump %s' instead.", argv[0]); + PRINT_DEFAULT("default compilation not supported yet -- try 'jccc " + "--token-dump %s' instead.", + argv[0]); return 1; } @@ -55,13 +59,14 @@ int main(int argc, char **argv) { } // Two arguments now. - if (strcmp(argv[0], "--token-dump")) { - PRINT_ERROR("option %s not recognized.", argv[1]); - return 1; - } - - // Finally, we can do the lexer test properly! - return lexer_dump(argv[1]); + if (strcmp(argv[0], "--token-dump") == 0) { + // Finally, we can do the lexer test properly! + return lexer_dump(argv[1]); + } else if (strcmp(argv[0], "--test-parse") == 0) { + parse(argv[1]); + return 0; + } - return 0; + PRINT_ERROR("option %s not recognized.", argv[1]); + return 1; } diff --git a/src/parser/parse.c b/src/parser/parse.c new file mode 100644 index 0000000..d1da408 --- /dev/null +++ b/src/parser/parse.c @@ -0,0 +1,94 @@ +/* Parser + * + */ + +#include +#include +#include +#include // calloc +#include // strcmp +#include + +int parse(const char *filename) { + + Lexer lexer; + + FILE *fp = fopen(filename, "r"); + + if (!fp) { + PRINT_ERROR("File %s not found", filename); + return 1; + } + + lexer.fp = fp; + lexer.unlexed_count = 0; + lexer.column = lexer.line = 1; + + Token t; + + int i = 0; + int buffer_size = 16; + Token *tokens = calloc(buffer_size, sizeof(Token)); + + do { + if (lex(&lexer, &t)) { + return 1; + } + + if (buffer_size <= i) { + buffer_size *= 2; + tokens = calloc(buffer_size, sizeof(Token)); + } + + tokens[i] = t; + + printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, + ttype_name(t.type), t.line, t.column); + + i++; + } while (t.type != TT_EOF); + + // Main function + if (tokens[0].type == TT_INT && tokens[1].type == TT_IDENTIFIER && + (strcmp(tokens[1].contents, "main") == 0)) { + + // Correct empty function body + if (tokens[2].type == TT_OPAREN && tokens[3].type == TT_CPAREN && + tokens[4].type == TT_OBRACE) { + + // Return value + if (tokens[5].type == TT_RETURN && tokens[6].type == TT_LITERAL && + tokens[7].type == TT_SEMI) { + + // Correct matched closed brace + if (tokens[8].type == TT_CBRACE) { + printf("\n"); + + // Generate preamble main code + char *code_start = start_main(); + + printf(code_start); + + // Add custom return code + char *code_end = + end_main_custom_return(atoi(tokens[6].contents)); + + printf(code_end); + + } else { + PRINT_ERROR("Wrong closing brace.\n"); + } + } else { + PRINT_ERROR("Return value is wrong.\n"); + } + } else { + PRINT_ERROR("Wrong main function body.\n"); + } + } else { + PRINT_ERROR("Not correct main function.\n"); + } + + return 0; +} + +int parse_simple_main_func() {} diff --git a/src/parser/parse.h b/src/parser/parse.h new file mode 100644 index 0000000..8ea9ad8 --- /dev/null +++ b/src/parser/parse.h @@ -0,0 +1,5 @@ +/* Parser + * + */ + +int parse(const char *filename); diff --git a/tests/simplemain.c b/tests/simplemain.c new file mode 100644 index 0000000..aa5fd90 --- /dev/null +++ b/tests/simplemain.c @@ -0,0 +1,3 @@ +int main() { + return 5; +} From 16ee5342f97fae19890b167119d0eb3b7959174e Mon Sep 17 00:00:00 2001 From: Jake Date: Wed, 26 Jun 2024 15:21:31 -0700 Subject: [PATCH 2/2] Close files and check for numeric --- src/driver/main.c | 2 ++ src/parser/parse.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/driver/main.c b/src/driver/main.c index 4a7d3ea..d963669 100644 --- a/src/driver/main.c +++ b/src/driver/main.c @@ -31,6 +31,8 @@ int lexer_dump(const char *filename) { ttype_name(t.type), t.line, t.column); } while (t.type != TT_EOF); + fclose(fp); + return 0; } diff --git a/src/parser/parse.c b/src/parser/parse.c index d1da408..5a7190c 100644 --- a/src/parser/parse.c +++ b/src/parser/parse.c @@ -7,6 +7,7 @@ #include #include // calloc #include // strcmp +#include // isdigit #include int parse(const char *filename) { @@ -58,18 +59,18 @@ int parse(const char *filename) { // Return value if (tokens[5].type == TT_RETURN && tokens[6].type == TT_LITERAL && - tokens[7].type == TT_SEMI) { + isdigit(tokens[6].contents[0]) && tokens[7].type == TT_SEMI) { // Correct matched closed brace if (tokens[8].type == TT_CBRACE) { - printf("\n"); - - // Generate preamble main code + printf("\n"); + + // Generate preamble main code char *code_start = start_main(); - printf(code_start); + printf(code_start); - // Add custom return code + // Add custom return code char *code_end = end_main_custom_return(atoi(tokens[6].contents)); @@ -88,6 +89,8 @@ int parse(const char *filename) { PRINT_ERROR("Not correct main function.\n"); } + fclose(fp); + return 0; }