diff --git a/src/codegen/x86/codegen.c b/src/codegen/x86/codegen.c index 3c118db..4b11fed 100644 --- a/src/codegen/x86/codegen.c +++ b/src/codegen/x86/codegen.c @@ -21,41 +21,49 @@ void code_gen_init() { char *start_main() { static char start[256] = "\ -global _start\ -section .text\ -\ -_start:"; +global _start\n\ +section .text\n\ +\n\ +_start:\n"; return start; } char *end_main() { static char end[256] = "\ -mov rax, 60\ -mov rdi, 0\ -syscall"; + mov rax, 60\ + mov rdi, 0\ + syscall"; + + return end; +} + +char *end_main_custom_return(int val) { + char *end; + end = (char *)malloc(256 * sizeof(char)); + sprintf(end, " mov rax, 60\n mov rdi, %d\n syscall\n", val); return end; } char *start_func() { static char start[256] = "\ -sub rsp, 32\ -mov [rsp], r12\ -mov [rsp+8], r13\ -mov [rsp+16], r14\ -mov [rsp+24], r15"; + sub rsp, 32\ + mov [rsp], r12\ + mov [rsp+8], r13\ + mov [rsp+16], r14\ + mov [rsp+24], r15"; return start; } char *end_func() { static char end[256] = "\ -mov r12, [rsp]\ -mov r13, [rsp+8]\ -mov r14, [rsp+16]\ -mov r15, [rsp+24]\ -add rsp, 32"; + mov r12, [rsp]\ + mov r13, [rsp+8]\ + mov r14, [rsp+16]\ + mov r15, [rsp+24]\ + add rsp, 32"; return end; } @@ -65,16 +73,16 @@ char *init_int_literal(int val) { char *init; init = (char *)malloc(256 * sizeof(char)); - sprintf(init, "mov [rsp+%d], %d", GEN_STATE.rsp_offset, val); + sprintf(init, " mov [rsp+%d], %d", GEN_STATE.rsp_offset, val); return init; } int test_init_int_literal() { - testing_func_setup(); + testing_func_setup(); code_gen_init(); - tassert(strcmp(init_int_literal(100), "mov [rsp+8], 100") == 0); + tassert(strcmp(init_int_literal(100), " mov [rsp+8], 100") == 0); - return 0; + return 0; } diff --git a/src/codegen/x86/codegen.h b/src/codegen/x86/codegen.h index 604d87a..4559d91 100644 --- a/src/codegen/x86/codegen.h +++ b/src/codegen/x86/codegen.h @@ -8,6 +8,8 @@ char *end_main(); char *start_func(); +char *end_main_custom_return(int val); + char *end_func(); char *init_int_literal(int val); diff --git a/src/driver/main.c b/src/driver/main.c index 960c42b..87b82bb 100644 --- a/src/driver/main.c +++ b/src/driver/main.c @@ -6,13 +6,14 @@ #include // strcmp #include "../lexer/lex.h" +#include "../parser/parse.h" #include "../util/out.h" -int lexer_dump(const char* filename) { +int lexer_dump(const char *filename) { // Initialization of everything Lexer lexer; - FILE * fp = fopen(filename, "r"); + FILE *fp = fopen(filename, "r"); if (!fp) { PRINT_ERROR("File %s not found", filename); return 1; @@ -24,12 +25,15 @@ int lexer_dump(const char* filename) { Token t; do { // Return if some non-zero (error) code is returned - if (lex(&lexer, &t)) return 1; - printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, ttype_name(t.type), t.line, t.column); + if (lex(&lexer, &t)) + return 1; + printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, + ttype_name(t.type), t.line, t.column); } while (t.type != TT_EOF); - return 0; + fclose(fp); + return 0; } int main(int argc, char **argv) { @@ -38,14 +42,16 @@ int main(int argc, char **argv) { // Skip the name of the executable. --argc, ++argv; - + if (argc == 0) { PRINT_DEFAULT("Usage: --token-dump to see all tokens"); return 0; } if (argc == 1) { - PRINT_DEFAULT("default compilation not supported yet -- try 'jccc --token-dump %s' instead.", argv[0]); + PRINT_DEFAULT("default compilation not supported yet -- try 'jccc " + "--token-dump %s' instead.", + argv[0]); return 1; } @@ -55,13 +61,14 @@ int main(int argc, char **argv) { } // Two arguments now. - if (strcmp(argv[0], "--token-dump")) { - PRINT_ERROR("option %s not recognized.", argv[1]); - return 1; - } - - // Finally, we can do the lexer test properly! - return lexer_dump(argv[1]); + if (strcmp(argv[0], "--token-dump") == 0) { + // Finally, we can do the lexer test properly! + return lexer_dump(argv[1]); + } else if (strcmp(argv[0], "--test-parse") == 0) { + parse(argv[1]); + return 0; + } - return 0; + PRINT_ERROR("option %s not recognized.", argv[1]); + return 1; } diff --git a/src/lexer/lex.c b/src/lexer/lex.c index 1dcb78e..f798e56 100644 --- a/src/lexer/lex.c +++ b/src/lexer/lex.c @@ -248,11 +248,11 @@ int real_lex(Lexer *l, Token *t) { // return./ if (starts_operator(init)) { while (valid_operator_sequence(t->contents)) { - t->contents[pos++] = (c = getc(l->fp)); + t->contents[pos++] = (c = lexer_getchar(l)); } // We've ended! // Can we reduce this code duplication from above in a smart way? - ungetc(c, l->fp); + lexer_ungetchar(l); t->contents[pos - 1] = '\0'; t->type = ttype_from_string(t->contents); t->length = pos; diff --git a/src/parser/cst.h b/src/parser/cst.h new file mode 100644 index 0000000..f68ec16 --- /dev/null +++ b/src/parser/cst.h @@ -0,0 +1,62 @@ +/** + * The structures for a concrete syntax tree. + * For now, the subset of C we are parsing is quite simple: + * - Parameterless functions. + * - Return statements, which accept integers or function calls. + */ + +#pragma once + +#include + +// A list of all node types. +typedef enum { + NT_STMT, + NT_EXPR, + NT_BLOCK_STMT, + NT_RETURN_STMT, + NT_FUNCDECL, + NT_FUNCCALL, + NT_LITERAL, +} NodeType; + +// A block statement is just a list of statements. +typedef struct { + List* stmts; // A list of Statement structs. +} BlockStatement; + +typedef struct { + // TODO -- add parameters whe we get there + BlockStatement body; + char name[256]; // The actual name of the function. +} FunctionDeclaration; + +// An entire program is just a list of top level declarations. +// For now, such declarations are only functions. +typedef struct { + union { + FunctionDeclaration fd; + // VariableDeclaration vd; when we get there + } u; + NodeType type; +} TopLevelDeclaration; + +// Right now, a function call doesn't have any parameters so it's just the name +// of the function being called. +typedef struct { + char name[256]; +} FunctionCall; + +// An expression for now is an integer or a function call. +typedef struct { + union { + FunctionCall fc; + char literal[256]; + } u; + NodeType type; +} Expression; + +// Finally, an entire source file is a list of top-level declarations. +typedef struct { + List* decls; // list of TopLevelDeclaration +} ConcreteFileTree; diff --git a/src/parser/parse.c b/src/parser/parse.c new file mode 100644 index 0000000..6176d52 --- /dev/null +++ b/src/parser/parse.c @@ -0,0 +1,126 @@ +/* Parser + * + */ + +#include // calloc +#include // strcmp +#include // isdigit + +#include +#include +#include +#include +#include + +int parse(const char *filename) { + + Lexer lexer; + + FILE *fp = fopen(filename, "r"); + + if (!fp) { + PRINT_ERROR("File %s not found", filename); + return 1; + } + + lexer.fp = fp; + lexer.unlexed_count = 0; + lexer.column = lexer.line = 1; + + Token t; + + int i = 0; + int buffer_size = 16; + Token *tokens = calloc(buffer_size, sizeof(Token)); + + do { + if (lex(&lexer, &t)) { + return 1; + } + + if (buffer_size <= i) { + buffer_size *= 2; + tokens = calloc(buffer_size, sizeof(Token)); + } + + tokens[i] = t; + + printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, + ttype_name(t.type), t.line, t.column); + + i++; + } while (t.type != TT_EOF); + + // Main function + if (tokens[0].type == TT_INT && tokens[1].type == TT_IDENTIFIER && + (strcmp(tokens[1].contents, "main") == 0)) { + + // Correct empty function body + if (tokens[2].type == TT_OPAREN && tokens[3].type == TT_CPAREN && + tokens[4].type == TT_OBRACE) { + + // Return value + if (tokens[5].type == TT_RETURN && tokens[6].type == TT_LITERAL && + isdigit(tokens[6].contents[0]) && tokens[7].type == TT_SEMI) { + + // Correct matched closed brace + if (tokens[8].type == TT_CBRACE) { + printf("\n"); + + // Generate preamble main code + char *code_start = start_main(); + + printf(code_start); + + // Add custom return code + char *code_end = + end_main_custom_return(atoi(tokens[6].contents)); + + printf(code_end); + + } else { + PRINT_ERROR("Wrong closing brace.\n"); + } + } else { + PRINT_ERROR("Return value is wrong.\n"); + } + } else { + PRINT_ERROR("Wrong main function body.\n"); + } + } else { + PRINT_ERROR("Not correct main function.\n"); + } + + fclose(fp); + + return 0; +} + +int parse_simple_main_func() {} + +/** + * Proper parsing code below -- producing a concrete syntax tree from a file. + * Each of these functions will probably reference the others numerous times. + */ + +int parse_expr(Lexer* l, Expression* ex) { + // TODO (just a literal or a function call for now). +} + +int parse_funccall(Lexer* l, Expression* ex) { + // TODO +} + +int parse_blockstmt(Lexer* l, BlockStatement* bs) { + // TODO +} + +int parse_funcdecl(Lexer* l, FunctionDeclaration* fd) { + // TODO +} + +// Parse function -- takes a lexer and produces a concrete syntax tree. Fill the +// struct which we have given with the data. +int make_cst(Lexer* l, ConcreteFileTree* tree) { + // TODO +} diff --git a/src/parser/parse.h b/src/parser/parse.h new file mode 100644 index 0000000..8ea9ad8 --- /dev/null +++ b/src/parser/parse.h @@ -0,0 +1,5 @@ +/* Parser + * + */ + +int parse(const char *filename); diff --git a/tests/simplemain.c b/tests/simplemain.c new file mode 100644 index 0000000..aa5fd90 --- /dev/null +++ b/tests/simplemain.c @@ -0,0 +1,3 @@ +int main() { + return 5; +}