Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/jabacat/jccc
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoBliss committed Jul 8, 2024
2 parents d0aa0ee + 3056da3 commit 1632e0f
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 38 deletions.
50 changes: 29 additions & 21 deletions src/codegen/x86/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,41 +21,49 @@ void code_gen_init() {

char *start_main() {
static char start[256] = "\
global _start\
section .text\
\
_start:";
global _start\n\
section .text\n\
\n\
_start:\n";

return start;
}

char *end_main() {
static char end[256] = "\
mov rax, 60\
mov rdi, 0\
syscall";
mov rax, 60\
mov rdi, 0\
syscall";

return end;
}

char *end_main_custom_return(int val) {
char *end;
end = (char *)malloc(256 * sizeof(char));
sprintf(end, " mov rax, 60\n mov rdi, %d\n syscall\n", val);

return end;
}

char *start_func() {
static char start[256] = "\
sub rsp, 32\
mov [rsp], r12\
mov [rsp+8], r13\
mov [rsp+16], r14\
mov [rsp+24], r15";
sub rsp, 32\
mov [rsp], r12\
mov [rsp+8], r13\
mov [rsp+16], r14\
mov [rsp+24], r15";

return start;
}

char *end_func() {
static char end[256] = "\
mov r12, [rsp]\
mov r13, [rsp+8]\
mov r14, [rsp+16]\
mov r15, [rsp+24]\
add rsp, 32";
mov r12, [rsp]\
mov r13, [rsp+8]\
mov r14, [rsp+16]\
mov r15, [rsp+24]\
add rsp, 32";

return end;
}
Expand All @@ -65,16 +73,16 @@ char *init_int_literal(int val) {

char *init;
init = (char *)malloc(256 * sizeof(char));
sprintf(init, "mov [rsp+%d], %d", GEN_STATE.rsp_offset, val);
sprintf(init, " mov [rsp+%d], %d", GEN_STATE.rsp_offset, val);

return init;
}

int test_init_int_literal() {
testing_func_setup();
testing_func_setup();
code_gen_init();

tassert(strcmp(init_int_literal(100), "mov [rsp+8], 100") == 0);
tassert(strcmp(init_int_literal(100), " mov [rsp+8], 100") == 0);

return 0;
return 0;
}
2 changes: 2 additions & 0 deletions src/codegen/x86/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ char *end_main();

char *start_func();

char *end_main_custom_return(int val);

char *end_func();

char *init_int_literal(int val);
Expand Down
37 changes: 22 additions & 15 deletions src/driver/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
#include <string.h> // strcmp

#include "../lexer/lex.h"
#include "../parser/parse.h"
#include "../util/out.h"

int lexer_dump(const char* filename) {
int lexer_dump(const char *filename) {

// Initialization of everything
Lexer lexer;
FILE * fp = fopen(filename, "r");
FILE *fp = fopen(filename, "r");
if (!fp) {
PRINT_ERROR("File %s not found", filename);
return 1;
Expand All @@ -24,12 +25,15 @@ int lexer_dump(const char* filename) {
Token t;
do {
// Return if some non-zero (error) code is returned
if (lex(&lexer, &t)) return 1;
printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents, ttype_name(t.type), t.line, t.column);
if (lex(&lexer, &t))
return 1;
printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents,
ttype_name(t.type), t.line, t.column);
} while (t.type != TT_EOF);

return 0;
fclose(fp);

return 0;
}

int main(int argc, char **argv) {
Expand All @@ -38,14 +42,16 @@ int main(int argc, char **argv) {

// Skip the name of the executable.
--argc, ++argv;

if (argc == 0) {
PRINT_DEFAULT("Usage: --token-dump <filename> to see all tokens");
return 0;
}

if (argc == 1) {
PRINT_DEFAULT("default compilation not supported yet -- try 'jccc --token-dump %s' instead.", argv[0]);
PRINT_DEFAULT("default compilation not supported yet -- try 'jccc "
"--token-dump %s' instead.",
argv[0]);
return 1;
}

Expand All @@ -55,13 +61,14 @@ int main(int argc, char **argv) {
}

// Two arguments now.
if (strcmp(argv[0], "--token-dump")) {
PRINT_ERROR("option %s not recognized.", argv[1]);
return 1;
}

// Finally, we can do the lexer test properly!
return lexer_dump(argv[1]);
if (strcmp(argv[0], "--token-dump") == 0) {
// Finally, we can do the lexer test properly!
return lexer_dump(argv[1]);
} else if (strcmp(argv[0], "--test-parse") == 0) {
parse(argv[1]);
return 0;
}

return 0;
PRINT_ERROR("option %s not recognized.", argv[1]);
return 1;
}
4 changes: 2 additions & 2 deletions src/lexer/lex.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,11 @@ int real_lex(Lexer *l, Token *t) {
// return./
if (starts_operator(init)) {
while (valid_operator_sequence(t->contents)) {
t->contents[pos++] = (c = getc(l->fp));
t->contents[pos++] = (c = lexer_getchar(l));
}
// We've ended!
// Can we reduce this code duplication from above in a smart way?
ungetc(c, l->fp);
lexer_ungetchar(l);
t->contents[pos - 1] = '\0';
t->type = ttype_from_string(t->contents);
t->length = pos;
Expand Down
62 changes: 62 additions & 0 deletions src/parser/cst.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* The structures for a concrete syntax tree.
* For now, the subset of C we are parsing is quite simple:
* - Parameterless functions.
* - Return statements, which accept integers or function calls.
*/

#pragma once

#include <util/list.h>

// A list of all node types.
typedef enum {
NT_STMT,
NT_EXPR,
NT_BLOCK_STMT,
NT_RETURN_STMT,
NT_FUNCDECL,
NT_FUNCCALL,
NT_LITERAL,
} NodeType;

// A block statement is just a list of statements.
typedef struct {
List* stmts; // A list of Statement structs.
} BlockStatement;

typedef struct {
// TODO -- add parameters whe we get there
BlockStatement body;
char name[256]; // The actual name of the function.
} FunctionDeclaration;

// An entire program is just a list of top level declarations.
// For now, such declarations are only functions.
typedef struct {
union {
FunctionDeclaration fd;
// VariableDeclaration vd; when we get there
} u;
NodeType type;
} TopLevelDeclaration;

// Right now, a function call doesn't have any parameters so it's just the name
// of the function being called.
typedef struct {
char name[256];
} FunctionCall;

// An expression for now is an integer or a function call.
typedef struct {
union {
FunctionCall fc;
char literal[256];
} u;
NodeType type;
} Expression;

// Finally, an entire source file is a list of top-level declarations.
typedef struct {
List* decls; // list of TopLevelDeclaration
} ConcreteFileTree;
126 changes: 126 additions & 0 deletions src/parser/parse.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/* Parser
*
*/

#include <stdlib.h> // calloc
#include <string.h> // strcmp
#include <ctype.h> // isdigit

#include <codegen/x86/codegen.h>
#include <lexer/lex.h>
#include <lexer/token.h>
#include <parser/cst.h>
#include <util/out.h>

int parse(const char *filename) {

Lexer lexer;

FILE *fp = fopen(filename, "r");

if (!fp) {
PRINT_ERROR("File %s not found", filename);
return 1;
}

lexer.fp = fp;
lexer.unlexed_count = 0;
lexer.column = lexer.line = 1;

Token t;

int i = 0;
int buffer_size = 16;
Token *tokens = calloc(buffer_size, sizeof(Token));

do {
if (lex(&lexer, &t)) {
return 1;
}

if (buffer_size <= i) {
buffer_size *= 2;
tokens = calloc(buffer_size, sizeof(Token));
}

tokens[i] = t;

printf("Contents: %20s, type: %20s, position: %d/%d\n", t.contents,
ttype_name(t.type), t.line, t.column);

i++;
} while (t.type != TT_EOF);

// Main function
if (tokens[0].type == TT_INT && tokens[1].type == TT_IDENTIFIER &&
(strcmp(tokens[1].contents, "main") == 0)) {

// Correct empty function body
if (tokens[2].type == TT_OPAREN && tokens[3].type == TT_CPAREN &&
tokens[4].type == TT_OBRACE) {

// Return value
if (tokens[5].type == TT_RETURN && tokens[6].type == TT_LITERAL &&
isdigit(tokens[6].contents[0]) && tokens[7].type == TT_SEMI) {

// Correct matched closed brace
if (tokens[8].type == TT_CBRACE) {
printf("\n");

// Generate preamble main code
char *code_start = start_main();

printf(code_start);

// Add custom return code
char *code_end =
end_main_custom_return(atoi(tokens[6].contents));

printf(code_end);

} else {
PRINT_ERROR("Wrong closing brace.\n");
}
} else {
PRINT_ERROR("Return value is wrong.\n");
}
} else {
PRINT_ERROR("Wrong main function body.\n");
}
} else {
PRINT_ERROR("Not correct main function.\n");
}

fclose(fp);

return 0;
}

int parse_simple_main_func() {}

/**
* Proper parsing code below -- producing a concrete syntax tree from a file.
* Each of these functions will probably reference the others numerous times.
*/

int parse_expr(Lexer* l, Expression* ex) {
// TODO (just a literal or a function call for now).
}

int parse_funccall(Lexer* l, Expression* ex) {
// TODO
}

int parse_blockstmt(Lexer* l, BlockStatement* bs) {
// TODO
}

int parse_funcdecl(Lexer* l, FunctionDeclaration* fd) {
// TODO
}

// Parse function -- takes a lexer and produces a concrete syntax tree. Fill the
// struct which we have given with the data.
int make_cst(Lexer* l, ConcreteFileTree* tree) {
// TODO
}
5 changes: 5 additions & 0 deletions src/parser/parse.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* Parser
*
*/

int parse(const char *filename);
3 changes: 3 additions & 0 deletions tests/simplemain.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
int main() {
return 5;
}

0 comments on commit 1632e0f

Please sign in to comment.