-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/jabacat/jccc
- Loading branch information
Showing
8 changed files
with
251 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/** | ||
* The structures for a concrete syntax tree. | ||
* For now, the subset of C we are parsing is quite simple: | ||
* - Parameterless functions. | ||
* - Return statements, which accept integers or function calls. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <util/list.h> | ||
|
||
// A list of all node types. | ||
typedef enum { | ||
NT_STMT, | ||
NT_EXPR, | ||
NT_BLOCK_STMT, | ||
NT_RETURN_STMT, | ||
NT_FUNCDECL, | ||
NT_FUNCCALL, | ||
NT_LITERAL, | ||
} NodeType; | ||
|
||
// A block statement is just a list of statements. | ||
typedef struct { | ||
List* stmts; // A list of Statement structs. | ||
} BlockStatement; | ||
|
||
typedef struct { | ||
// TODO -- add parameters whe we get there | ||
BlockStatement body; | ||
char name[256]; // The actual name of the function. | ||
} FunctionDeclaration; | ||
|
||
// An entire program is just a list of top level declarations. | ||
// For now, such declarations are only functions. | ||
typedef struct { | ||
union { | ||
FunctionDeclaration fd; | ||
// VariableDeclaration vd; when we get there | ||
} u; | ||
NodeType type; | ||
} TopLevelDeclaration; | ||
|
||
// Right now, a function call doesn't have any parameters so it's just the name | ||
// of the function being called. | ||
typedef struct { | ||
char name[256]; | ||
} FunctionCall; | ||
|
||
// An expression for now is an integer or a function call. | ||
typedef struct { | ||
union { | ||
FunctionCall fc; | ||
char literal[256]; | ||
} u; | ||
NodeType type; | ||
} Expression; | ||
|
||
// Finally, an entire source file is a list of top-level declarations. | ||
typedef struct { | ||
List* decls; // list of TopLevelDeclaration | ||
} ConcreteFileTree; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* Parser | ||
* | ||
*/ | ||
|
||
#include <stdlib.h> // calloc | ||
#include <string.h> // strcmp | ||
#include <ctype.h> // isdigit | ||
|
||
#include <codegen/x86/codegen.h> | ||
#include <lexer/lex.h> | ||
#include <lexer/token.h> | ||
#include <parser/cst.h> | ||
#include <util/out.h> | ||
|
||
int parse(const char *filename) { | ||
|
||
Lexer lexer; | ||
|
||
FILE *fp = fopen(filename, "r"); | ||
|
||
if (!fp) { | ||
PRINT_ERROR("File %s not found", filename); | ||
return 1; | ||
} | ||
|
||
lexer.fp = fp; | ||
lexer.unlexed_count = 0; | ||
lexer.column = lexer.line = 1; | ||
|
||
Token t; | ||
|
||
int i = 0; | ||
int buffer_size = 16; | ||
Token *tokens = calloc(buffer_size, sizeof(Token)); | ||
|
||
do { | ||
if (lex(&lexer, &t)) { | ||
return 1; | ||
} | ||
|
||
if (buffer_size <= i) { | ||
buffer_size *= 2; | ||
tokens = calloc(buffer_size, sizeof(Token)); | ||
} | ||
|
||
tokens[i] = t; | ||
|
||
printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, | ||
ttype_name(t.type), t.line, t.column); | ||
|
||
i++; | ||
} while (t.type != TT_EOF); | ||
|
||
// Main function | ||
if (tokens[0].type == TT_INT && tokens[1].type == TT_IDENTIFIER && | ||
(strcmp(tokens[1].contents, "main") == 0)) { | ||
|
||
// Correct empty function body | ||
if (tokens[2].type == TT_OPAREN && tokens[3].type == TT_CPAREN && | ||
tokens[4].type == TT_OBRACE) { | ||
|
||
// Return value | ||
if (tokens[5].type == TT_RETURN && tokens[6].type == TT_LITERAL && | ||
isdigit(tokens[6].contents[0]) && tokens[7].type == TT_SEMI) { | ||
|
||
// Correct matched closed brace | ||
if (tokens[8].type == TT_CBRACE) { | ||
printf("\n"); | ||
|
||
// Generate preamble main code | ||
char *code_start = start_main(); | ||
|
||
printf(code_start); | ||
|
||
// Add custom return code | ||
char *code_end = | ||
end_main_custom_return(atoi(tokens[6].contents)); | ||
|
||
printf(code_end); | ||
|
||
} else { | ||
PRINT_ERROR("Wrong closing brace.\n"); | ||
} | ||
} else { | ||
PRINT_ERROR("Return value is wrong.\n"); | ||
} | ||
} else { | ||
PRINT_ERROR("Wrong main function body.\n"); | ||
} | ||
} else { | ||
PRINT_ERROR("Not correct main function.\n"); | ||
} | ||
|
||
fclose(fp); | ||
|
||
return 0; | ||
} | ||
|
||
int parse_simple_main_func() {} | ||
|
||
/** | ||
* Proper parsing code below -- producing a concrete syntax tree from a file. | ||
* Each of these functions will probably reference the others numerous times. | ||
*/ | ||
|
||
int parse_expr(Lexer* l, Expression* ex) { | ||
// TODO (just a literal or a function call for now). | ||
} | ||
|
||
int parse_funccall(Lexer* l, Expression* ex) { | ||
// TODO | ||
} | ||
|
||
int parse_blockstmt(Lexer* l, BlockStatement* bs) { | ||
// TODO | ||
} | ||
|
||
int parse_funcdecl(Lexer* l, FunctionDeclaration* fd) { | ||
// TODO | ||
} | ||
|
||
// Parse function -- takes a lexer and produces a concrete syntax tree. Fill the | ||
// struct which we have given with the data. | ||
int make_cst(Lexer* l, ConcreteFileTree* tree) { | ||
// TODO | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
/* Parser | ||
* | ||
*/ | ||
|
||
int parse(const char *filename); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
int main() { | ||
return 5; | ||
} |