diff --git a/.gitignore b/.gitignore index cd531cf..b0ab508 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ -# ---> C -# Prerequisites -*.d +# ASM files +*.s +*.ll # Object files *.o @@ -23,7 +23,7 @@ *.la *.lo -# Shared objects (inc. Windows DLLs) +# Shared objects *.dll *.so *.so.* @@ -37,18 +37,10 @@ *.x86_64 *.hex -# Debug files -*.dSYM/ -*.su -*.idb -*.pdb - -# Kernel Module Compile Results -*.mod* -*.cmd -.tmp_versions/ -modules.order -Module.symvers -Mkfile.old -dkms.conf +# editors +.vscode/ +*.swp +# builder +.xmake/ +build/ \ No newline at end of file diff --git a/README.md b/README.md index 623963d..30a6949 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,33 @@ # acc +A zero-dependence (sub) C compiler working in progress written in pure ISO C11. -another C Compiler \ No newline at end of file +## Build from source + +Make sure you have `gcc` and `make` on your PATH. +If you don't, here's command for ubuntu: + +``` +sudo apt install build-essential +``` + +To build, run: +``` +make +``` + +## Usage + +``` +acc target inputfile (outputfile) +``` + +Output targets now includes: +- `x86_64`: Intel's x84-64 ASM +- `llvm`: LLVM's IR +- `ast`: (used for debugging) Abstruct Syntax Tree + +Example: + +``` +acc x86_64 test.c +``` diff --git a/include/ast.h b/include/ast.h new file mode 100644 index 0000000..89c26f1 --- /dev/null +++ b/include/ast.h @@ -0,0 +1,84 @@ +#ifndef ACC_AST_H +#define ACC_AST_H + +#include "util/linklist.h" + +// AST operation types +enum { + A_ASSIGN, + A_ADD, A_SUB, A_MUL, A_DIV, + A_EQ, A_NE, A_LT, A_GT, A_LE, A_GE, + A_INTLIT, A_VAR, + A_BLOCK, + A_PRINT, A_IF, A_WHILE, + A_SOUL // what? +}; + +// AST nodde types +enum { + N_BIN, N_UN, N_MULTI, N_LEAF, N_ASSIGN +}; + +// AST structure (common) +struct ASTnode { + int op; //operator +}; + +// AST binary operation node +struct ASTbinnode { + int op; + struct ASTnode *left; + struct ASTnode *right; +}; + +// AST if statement node +struct ASTifnode { + int op; + struct ASTnode *left; // condition true branch + struct ASTnode *right; // condition false branch + struct ASTnode *cond; +}; + +// AST unary operation node +struct ASTunnode { + int op; + struct ASTnode *c; +}; + +// AST block node +struct ASTblocknode { + int op; + struct linklist st; // statements linklist +}; + +// AST int literal node +struct ASTintnode { + int op; + int val; +}; + +// AST assign literal node +struct ASTassignnode { + int op; + int left; + struct ASTnode* right; +}; + +// AST variable value node +struct ASTvarnode { + int op; + int id; +}; + +struct ASTnode* ast_make_binary(int op, struct ASTnode *left, struct ASTnode *right); +struct ASTnode* ast_make_intlit(int val); +struct ASTnode* ast_make_unary(int op, struct ASTnode *c); +struct ASTnode* ast_make_block(); +struct ASTnode* ast_make_var(int id); +struct ASTnode* ast_make_assign(int op, int left, struct ASTnode *right); +struct ASTnode* ast_make_if(struct ASTnode *left, struct ASTnode *right, struct ASTnode *cond); +int ast_type(int t); +void ast_free(struct ASTnode *x); + +#endif + diff --git a/include/cg.h b/include/cg.h new file mode 100644 index 0000000..bcce7ee --- /dev/null +++ b/include/cg.h @@ -0,0 +1,29 @@ +#ifndef ACC_CG_H +#define ACC_CG_H + +#include "ast.h" + +extern FILE *Outfile; + +// cg.c +void cg_main(int target, struct ASTnode *rt); +void open_outputfile(char *filename); +void cg_unload(void); + +// cg_x64.c +void cgx64_generate(struct ASTnode *rt); + +// cg_llvm.c +void cgllvm_generate(struct ASTnode *rt); + +// cg_ast.c +void cgast_generate(struct ASTnode *rt); + +// targets +enum { + CG_X64, // Intel x86_64 + CG_LLVM, // LLVM IR + CG_AST, // Abstruct Syntax Tree +}; + +#endif diff --git a/include/fatals.h b/include/fatals.h new file mode 100644 index 0000000..335cd43 --- /dev/null +++ b/include/fatals.h @@ -0,0 +1,12 @@ +#ifndef ACC_FATALS_H +#define ACC_FATALS_H + +#include "noreturn.h" + +noreturn void fail_malloc(const char *func_name); +noreturn void fail_ast_op(int op, const char *func_name); +noreturn void fail_ce_expect(const char *expected, const char *got); +noreturn void fail_ce(const char *reason); +noreturn void fail_char(int c); + +#endif diff --git a/include/noreturn.h b/include/noreturn.h new file mode 100644 index 0000000..169bcc8 --- /dev/null +++ b/include/noreturn.h @@ -0,0 +1,6 @@ +#ifndef ACC_NORETURN_H +#define ACC_NORETURN_H + +#define noreturn _Noreturn + +#endif \ No newline at end of file diff --git a/include/parse.h b/include/parse.h new file mode 100644 index 0000000..5515f10 --- /dev/null +++ b/include/parse.h @@ -0,0 +1,6 @@ +#ifndef ACC_PARSE_H +#define ACC_PARSE_H + +struct ASTnode* parse(const char *name); + +#endif diff --git a/include/scan.h b/include/scan.h new file mode 100644 index 0000000..be4468d --- /dev/null +++ b/include/scan.h @@ -0,0 +1,9 @@ +#ifndef ACC_SCAN_H +#define ACC_SCAN_H + +#include "token.h" + +extern int Line; +struct linklist scan_tokens(const char *name); + +#endif diff --git a/include/symbol.h b/include/symbol.h new file mode 100644 index 0000000..edc5cb3 --- /dev/null +++ b/include/symbol.h @@ -0,0 +1,13 @@ +#ifndef ACC_SYMBOL_H +#define ACC_SYMBOL_H + +#include "util/array.h" + +extern struct array Gsym; + +void symbol_init(void); +void symbol_unload(void); +int findglob(char *s); +int addglob(char *s); + +#endif diff --git a/include/token.h b/include/token.h new file mode 100644 index 0000000..54e16a6 --- /dev/null +++ b/include/token.h @@ -0,0 +1,29 @@ +#ifndef ACC_TOKEN_H +#define ACC_TOKEN_H + +#include "util/linklist.h" + +// Token structure +struct token { + int type; // token type + void* val; // hold the value of the literal that we scanned in +}; + +// Tokens +enum { + T_EOF, + T_SEMI, + T_LB, T_RB, T_LP, T_RP, + T_ASSIGN, + T_PLUS, T_MINUS, T_STAR, T_SLASH, + T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, + T_INT, T_VOID, T_CHAR, T_LONG, + T_PRINT, T_IF, T_ELSE, T_WHILE, T_FOR, + T_INTLIT, T_LONGLIT, T_INDENT, +}; +extern const char *token_typename[29]; + +void token_free(struct token *t); +struct token token_make_eof(void); + +#endif diff --git a/include/util/array.h b/include/util/array.h new file mode 100644 index 0000000..5369e40 --- /dev/null +++ b/include/util/array.h @@ -0,0 +1,16 @@ +#ifndef ACC_UTIL_ARRAY_H +#define ACC_UTIL_ARRAY_H + +struct array { + int length; + int cap; + void **begin; +}; + +void array_init(struct array *a); +void array_pushback(struct array *a, void *val); +void array_free(struct array *a); +void* array_get(struct array *a, int index); +void array_set(struct array *a, int index, void *val); + +#endif diff --git a/include/util/linklist.h b/include/util/linklist.h new file mode 100644 index 0000000..80c95bc --- /dev/null +++ b/include/util/linklist.h @@ -0,0 +1,25 @@ +#ifndef ACC_UTIL_LINKLIST_H +#define ACC_UTIL_LINKLIST_H + +struct llist_node { + void *val; + struct llist_node *nxt; +}; + +struct linklist { + int length; + struct llist_node *head; + struct llist_node *tail; +}; + +struct llist_node* llist_createnode(void *val); +void llist_pushback(struct linklist *l, void *val); +void llist_pushback_notnull(struct linklist *l, void *val); +void* llist_get(struct linklist *l, int x); +void llist_set(struct linklist *l, int x, void *val); +void llist_init(struct linklist *l); +void llist_free(struct linklist *l); +void llist_insert(struct linklist *l, int x, void *val); +void llist_popfront(struct linklist *l); + +#endif diff --git a/include/util/misc.h b/include/util/misc.h new file mode 100644 index 0000000..f2ddde6 --- /dev/null +++ b/include/util/misc.h @@ -0,0 +1,7 @@ +#ifndef ACC_UTIL_MISC_H +#define ACC_UTIL_MISC_H + +int strequal(const char *s1, const char *s2); +char *strclone(const char *s); + +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..e72f12d --- /dev/null +++ b/main.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include "scan.h" +#include "parse.h" +#include "cg.h" +#include "ast.h" +#include "symbol.h" + +// Print out a usage if started incorrectly +static void usage(char *prog) { + fprintf(stderr, "ACC the C compiler. built on: %s.\n", __DATE__); + fprintf(stderr, "Usage: %s target infile (outfile)\n", prog); + exit(1); +} + +//Do clean up job +void unload(void) { + cg_unload(); + symbol_unload(); +} + +int main(int argc, char *argv[]) { + atexit(unload); + if (argc < 3) { + usage(argv[0]); + } + + int outfile_opened = 0; + if (argc >= 4) { + open_outputfile(argv[3]); + outfile_opened = 1; + } + + int target; + if (!strcmp(argv[1], "x86_64")) { + target = CG_X64; + if (!outfile_opened) { + open_outputfile("out.s"); + } + } else if (!strcmp(argv[1], "llvm")) { + target = CG_LLVM; + if (!outfile_opened) { + open_outputfile("out.ll"); + } + } else if (!strcmp(argv[1], "ast")) { + target = CG_AST; + if (!outfile_opened) { + open_outputfile("out.txt"); + } + } else { + fprintf(stderr, "Unknow target %s.\n", argv[1]); + exit(1); + } + + symbol_init(); + struct ASTnode *rt = parse(argv[2]); + cg_main(target, rt); + ast_free(rt); + return (0); +} diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..81b7e3b --- /dev/null +++ b/src/ast.c @@ -0,0 +1,146 @@ +#include +#include +#include "ast.h" +#include "fatals.h" +#include "util/linklist.h" + +// Build and return a binary AST node +struct ASTnode* ast_make_binary(int op, struct ASTnode *left, struct ASTnode *right) { + struct ASTbinnode *x = malloc(sizeof(struct ASTbinnode)); + if (x == NULL) { + fail_malloc(__FUNCTION__); + } + + x->op = op; + x->left = left; + x->right = right; + return ((struct ASTnode*)x); +} + +// Make an AST int literal node +struct ASTnode* ast_make_intlit(int val) { + struct ASTintnode *x = malloc(sizeof(struct ASTintnode)); + if (x == NULL) { + fail_malloc(__FUNCTION__); + } + + x->op = A_INTLIT; + x->val = val; + return ((struct ASTnode*)x); +} + +// Make an AST variable value node +struct ASTnode* ast_make_var(int id) { + struct ASTvarnode *x = malloc(sizeof(struct ASTvarnode)); + if (x == NULL) { + fail_malloc(__FUNCTION__); + } + + x->op = A_VAR; + x->id = id; + return ((struct ASTnode*)x); +} + +// Make a unary AST node: only one child +struct ASTnode* ast_make_unary(int op, struct ASTnode *c) { + struct ASTunnode *x = malloc(sizeof(struct ASTunnode)); + if (x == NULL) { + fail_malloc(__FUNCTION__); + } + + x->op = op; + x->c = c; + return ((struct ASTnode*)x); +} + +// Make a block ast node +struct ASTnode* ast_make_block() { + struct ASTblocknode *x = malloc(sizeof(struct ASTblocknode)); + if (x == NULL) { + fail_malloc(__FUNCTION__); + } + + x->op = A_BLOCK; + llist_init(&x->st); + return ((struct ASTnode*)x); +} + +// Make a assignment ast node +struct ASTnode* ast_make_assign(int op, int left, struct ASTnode *right) { + struct ASTassignnode *x = malloc(sizeof(struct ASTassignnode)); + if (x == NULL) { + fail_malloc(__FUNCTION__); + } + + x->op = op; + x->left = left; + x->right = right; + return ((struct ASTnode*)x); +} + +// Make a if statement ast node +struct ASTnode* ast_make_if(struct ASTnode *left, struct ASTnode *right, struct ASTnode *cond) { + struct ASTifnode *x = malloc(sizeof(struct ASTifnode)); + if (x == NULL) { + fail_malloc(__FUNCTION__); + } + + x->op = A_IF; + x->left = left; + x->right = right; + x->cond = cond; + return ((struct ASTnode*)x); +} + +// Translate ast operation type to ast node type +int ast_type(int t) { + switch (t) { + case A_ADD: case A_SUB: case A_MUL: case A_DIV: + case A_EQ: case A_NE: case A_GT: case A_LT: case A_GE: case A_LE: + case A_IF: case A_WHILE: + return (N_BIN); + case A_ASSIGN: + return (N_ASSIGN); + case A_INTLIT: case A_VAR: + return (N_LEAF); + case A_BLOCK: + return (N_MULTI); + case A_PRINT: + return (N_UN); + default: + fprintf(stderr, "%s: unknown operation type %d.\n", __FUNCTION__, t); + exit(1); + } +} + +// free an AST's memory +void ast_free(struct ASTnode *x) { + if (x == NULL) { + return; + } + + int nt = ast_type(x->op); + if (nt == N_ASSIGN) { + struct ASTassignnode *t = (struct ASTassignnode*)x; + ast_free(t->right); + } else if (nt == N_BIN) { + struct ASTbinnode *t = (struct ASTbinnode*)x; + ast_free(t->left); + ast_free(t->right); + if (x->op == A_IF) { + ast_free(((struct ASTifnode*)x)->cond); + } + } else if (nt == N_UN) { + struct ASTunnode *t = (struct ASTunnode*)x; + ast_free(t->c); + } else if (nt == N_MULTI) { + struct ASTblocknode *t = (struct ASTblocknode*)x; + struct llist_node *p = t->st.head; + while (p) { + ast_free(p->val); + p = p->nxt; + } + llist_free(&t->st); + } + free(x); +} diff --git a/src/cg.c b/src/cg.c new file mode 100644 index 0000000..edf15e4 --- /dev/null +++ b/src/cg.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include "cg.h" + +FILE *Outfile; + +// open output file of generated code +void open_outputfile(char *filename) { + Outfile = fopen(filename, "w"); + if (Outfile == NULL) { + fprintf(stderr, "Unable to open %s: %s\n", filename, strerror(errno)); + exit(1); + } +} + +// close Outfile at exit. +void cg_unload(void) { + if (Outfile) { + fclose(Outfile); + } +} + +// generates code +void cg_main(int target, struct ASTnode *rt) { + if (target == CG_X64) { + cgx64_generate(rt); + } else if (target == CG_LLVM) { + cgllvm_generate(rt); + } else if (target == CG_AST) { + cgast_generate(rt); + } else { + fprintf(stderr, "Unknow target %d.\n", target); + exit(1); + } +} + diff --git a/src/cg_ast.c b/src/cg_ast.c new file mode 100644 index 0000000..8e2d468 --- /dev/null +++ b/src/cg_ast.c @@ -0,0 +1,109 @@ +#include +#include +#include "ast.h" +#include "cg.h" +#include "symbol.h" +#include "fatals.h" +#include "util/array.h" + +static const char *ast_opname[] = { + "=", + "+", "-", "*", "/", + "==", "!=", "<", ">", "<=", ">=", + "int", "var", + "block", + "print", "if", "while" +}; + +static int tabs; + +static void cgprint_tabs() { + for (int i = 0; i < tabs; ++i) { + fprintf(Outfile, "\t"); + } +} + +static void cgenerate_dfs(struct ASTnode *x) { + if (x == NULL) { + cgprint_tabs(); + fprintf(Outfile, "--->NULL.\n"); + return; + } + + int nt = ast_type(x->op); + if (nt == N_LEAF) { + if (x->op == A_INTLIT) { + struct ASTintnode *t = (struct ASTintnode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->INT %d.\n", t->val); + } else if (x->op == A_VAR) { + struct ASTvarnode *t = (struct ASTvarnode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->VAR @%s.\n", (char*)array_get(&Gsym, t->id)); + } else { + fail_ast_op(x->op, __FUNCTION__); + } + } else if (nt == N_ASSIGN) { + struct ASTassignnode *t = (struct ASTassignnode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->ASSIGN(%s) to @%s (right)\n", ast_opname[t->op], (char*)array_get(&Gsym, t->left)); + tabs += 1; + cgenerate_dfs(t->right); + tabs -= 1; + } else if (nt == N_BIN) { + if (x->op == A_IF) { + struct ASTifnode *t = (struct ASTifnode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->IF (cond left right)\n"); + tabs += 1; + cgenerate_dfs(t->cond); + cgenerate_dfs(t->left); + cgenerate_dfs(t->right); + tabs -= 1; + } else if (x->op == A_WHILE) { + struct ASTbinnode *t = (struct ASTbinnode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->WHILE(%s) (cond body)\n", ast_opname[t->op]); + tabs += 1; + cgenerate_dfs(t->left); + cgenerate_dfs(t->right); + tabs -= 1; + } else { + struct ASTbinnode *t = (struct ASTbinnode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->BINOP(%s) (left right)\n", ast_opname[t->op]); + tabs += 1; + cgenerate_dfs(t->left); + cgenerate_dfs(t->right); + tabs -= 1; + } + } else if (nt == N_UN) { + if (x->op == A_PRINT) { + struct ASTunnode *t = (struct ASTunnode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->PRINT (value)\n"); + tabs += 1; + cgenerate_dfs(t->c); + tabs -= 1; + } else { + fail_ast_op(x->op, __FUNCTION__); + } + } else if (nt == N_MULTI) { + struct ASTblocknode *t = (struct ASTblocknode*)x; + cgprint_tabs(); + fprintf(Outfile, "--->BLOCK (%d childs)\n", t->st.length); + tabs += 1; + struct llist_node *p = t->st.head; + while (p) { + cgenerate_dfs(p->val); + p = p->nxt; + } + tabs -= 1; + } +} + +void cgast_generate(struct ASTnode *rt) { + tabs = 0; + cgenerate_dfs(rt); +} + diff --git a/src/cg_llvm.c b/src/cg_llvm.c new file mode 100644 index 0000000..7a4977b --- /dev/null +++ b/src/cg_llvm.c @@ -0,0 +1,216 @@ +#include +#include +#include "ast.h" +#include "cg.h" +#include "symbol.h" +#include "fatals.h" +#include "util/array.h" +#include "util/linklist.h" + +static int alloc_tag(void) { + static int id = 0; + return (id++); +} + +static int alloc_label(void) { + static int id = 0; + return (id++); +} + +// Print out the ir preamble +static void cgpreamble(void) { + fputs( "@.printint.format = constant [4 x i8] c\"%d\\0A\\00\", align 1\n" + "declare dso_local i32 @printf(i8* readonly nocapture, ...)\n" + "define dso_local void @printint(i32 %0) {\n" + "\t%2 = getelementptr inbounds [4 x i8], [4 x i8]* @.printint.format, i32 0, i32 0\n" + "\t%3 = call i32 (i8*, ...) @printf(i8* %2, i32 %0)\n" + "\tret void\n" + "}\n" + "\n" + "define dso_local i32 @main() {\n" + "entry:\n", Outfile); +} + +// Print out the ir postamble +static void cgpostamble(void) { + fputs("\tret i32 0\n}\n", Outfile); +} + +// init as global value +static void cginit_glob(char *name) { + fprintf(Outfile, "@%s = dso_local global i32 0, align 4\n", name); +} + +// Preform arithmetic operation between two i32 +static int cgarith_i32(int x, int y, char *op) { + int r = alloc_tag(); + fprintf(Outfile, "\t%%%d = %s i32 %%%d, %%%d\n", r, op, x, y); + return (r); +} + +// Preform comparision between integers +static int cgcomp_i(int x, int y, char *op, char *ty) { + int r1 = alloc_tag(); + fprintf(Outfile, "\t%%%d = icmp %s %s %%%d, %%%d\n", r1, op, ty, x, y); + int r2 = alloc_tag(); + if (ty[0] != 'i' || ty[1] != '1' || ty[2] != '\0') { + fprintf(Outfile, "\t%%%d = zext i1 %%%d to %s\n", r2, r1, ty); + } + return (r2); +} + +// Preform comparision between two i32 +static int cgcomp_i32(int x, int y, char *op) { + return (cgcomp_i(x, y, op, "i32")); +} + +// Load an int literal +static int cgload_lit_i32(int val) { + int r = alloc_tag(); + fprintf(Outfile, "\t%%%d = select i1 true, i32 %d, i32 undef\n", r, val); + return (r); +} + +// Load an int from a global variable +static int cgload_glob_i32(char *name) { + int r = alloc_tag(); + fprintf(Outfile, "\t%%%d = load i32, i32* @%s, align 4\n", r, name); + return (r); +} + +// Store an int into a global variable +static int cgstore_glob_i32(int x, char *name) { + fprintf(Outfile, "\tstore i32 %%%d, i32* @%s, align 4\n", x, name); + return (x); +} + +// Print a i32 +static void cgprint(int x) { + fprintf(Outfile, "\tcall void (i32) @printint(i32 %%%d)\n", x); +} + +// Jump to a label no matter what +static void cgjmp_always(int x) { + fprintf(Outfile, "\tbr label %%L%d\n", x); +} + +// Conditional jump +static void cgjmp_if_i32(int cond, int Lthen, int Lelse) { + int rc = alloc_tag(); + fprintf(Outfile, "\t%%%d = icmp ne i32 0, %%%d\n", rc, cond); + fprintf(Outfile, "\tbr i1 %%%d, label %%L%d, label %%L%d\n", rc, Lthen, Lelse); +} + +static void cgprint_label(int L) { + fprintf(Outfile, "L%d:\n", L); +} + +// generates llvm ir from ast +static int cgenerate_ast(struct ASTnode *rt) { + int nt = ast_type(rt->op); + if (nt == N_LEAF) { + if (rt->op == A_INTLIT) { + return (cgload_lit_i32(((struct ASTintnode*)rt)->val)); + } else if (rt->op == A_VAR) { + return (cgload_glob_i32(array_get(&Gsym, ((struct ASTvarnode*)rt)->id))); + } + fail_ast_op(rt->op, __FUNCTION__); + } else if (nt == N_BIN) { + if (rt->op == A_IF) { + struct ASTifnode *x = (struct ASTifnode*)rt; + int Lthen = alloc_label(), Lelse = alloc_label(), Lend = alloc_label(); + int condv = cgenerate_ast(x->cond); + cgjmp_if_i32(condv, Lthen, Lelse); + + cgprint_label(Lthen); + cgenerate_ast(x->left); + cgjmp_always(Lend); + + cgprint_label(Lelse); + cgenerate_ast(x->right); + cgjmp_always(Lend); + + cgprint_label(Lend); + return (-1); + } else if (rt->op == A_WHILE) { + struct ASTbinnode *x = (struct ASTbinnode*)rt; + int Lstart = alloc_label(), Lbody = alloc_label(), Lend = alloc_label(); + cgjmp_always(Lstart); + + cgprint_label(Lstart); + int condv = cgenerate_ast(x->left); + cgjmp_if_i32(condv, Lbody, Lend); + + cgprint_label(Lbody); + cgenerate_ast(x->right); + cgjmp_always(Lstart); + + cgprint_label(Lend); + return (-1); + } + + struct ASTbinnode *x = (struct ASTbinnode*)rt; + int lc = cgenerate_ast(x->left); + int rc = cgenerate_ast(x->right); + + if (rt->op == A_ADD) { + return (cgarith_i32(lc, rc, "add nsw")); + } else if (rt->op == A_SUB) { + return (cgarith_i32(lc, rc, "sub nsw")); + } else if (rt->op == A_MUL) { + return (cgarith_i32(lc, rc, "mul nsw")); + } else if (rt->op == A_DIV) { + return (cgarith_i32(lc, rc, "sdiv")); + } else if (rt->op == A_EQ) { + return (cgcomp_i32(lc, rc, "eq")); + } else if (rt->op == A_NE) { + return (cgcomp_i32(lc, rc, "ne")); + } else if (rt->op == A_GT) { + return (cgcomp_i32(lc, rc, "sgt")); + } else if (rt->op == A_GE) { + return (cgcomp_i32(lc, rc, "sge")); + } else if (rt->op == A_LT) { + return (cgcomp_i32(lc, rc, "slt")); + } else if (rt->op == A_LE) { + return (cgcomp_i32(lc, rc, "sle")); + } + fail_ast_op(rt->op, __FUNCTION__); + } else if (nt == N_UN) { + struct ASTunnode *x = (struct ASTunnode*)rt; + int cv = cgenerate_ast(x->c); + + if (rt->op == A_PRINT) { + cgprint(cv); + return (-1); + } + fail_ast_op(rt->op, __FUNCTION__); + } else if (nt == N_ASSIGN) { + struct ASTassignnode *x = (struct ASTassignnode*)rt; + int cv = cgenerate_ast(x->right); + + if (rt->op == A_ASSIGN) { + return (cgstore_glob_i32(cv, array_get(&Gsym, x->left))); + } + fail_ast_op(rt->op, __FUNCTION__); + } else if (nt == N_MULTI) { + struct ASTblocknode *x = (struct ASTblocknode*)rt; + int val = -1; + struct llist_node *p = x->st.head; + while (p) { + val = cgenerate_ast(p->val); + p = p->nxt; + } + return val; + } + fail_ast_op(rt->op, __FUNCTION__); +} + +// generate and write ir to Outfile +void cgllvm_generate(struct ASTnode* rt) { + for (int i = 0; i < Gsym.length; ++i) { + cginit_glob(array_get(&Gsym, i)); + } + cgpreamble(); + cgenerate_ast(rt); + cgpostamble(); +} diff --git a/src/cg_x64.c b/src/cg_x64.c new file mode 100644 index 0000000..e2f0452 --- /dev/null +++ b/src/cg_x64.c @@ -0,0 +1,308 @@ +#include +#include +#include +#include "cg.h" +#include "ast.h" +#include "symbol.h" +#include "fatals.h" +#include "util/array.h" +#include "util/linklist.h" + +// List of available registers +// and their names +static const int reg_count = 4; +static char *reglist[4] = { "%r8", "%r9", "%r10", "%r11" }; +static int usedreg[4]; + +// Get a label number for jump +static int alloc_label(void) { + static int id = 0; + return (id++); +} + +// Set all registers free +static void free_all_reg(void) { + for (int i = 0; i < reg_count; ++i) { + usedreg[i] = 0; + } +} + +// Allocate a new register to use or report "Out of registers" +static int alloc_reg(void) { + for (int i = 0; i < reg_count; ++i) { + if (!usedreg[i]) { + usedreg[i] = 1; + return (i); + } + } + fprintf(stderr, "Out of registers.\n"); + exit(0); +} + +// Return a register to the list of available registers. +// Check to see if it's not already there. +static void free_reg(int r) { + if (r == -1) { + return; + } + + if (!usedreg[r]) { + fprintf(stderr, "Error trying to free register %d: not allocated.\n", r); + exit(1); + } + usedreg[r] = 0; +} + +// Print out the assembly preamble +static void cgpreamble(void) { + free_all_reg(); + fputs( "\t.text\n" + ".LC0:\n" + "\t.string\t\"%d\\n\"\n" + "printint:\n" + "\tpushq\t%rbp\n" + "\tmovq\t%rsp, %rbp\n" + "\tsubq\t$16, %rsp\n" + "\tmovl\t%edi, -4(%rbp)\n" + "\tmovl\t-4(%rbp), %eax\n" + "\tmovl\t%eax, %esi\n" + "\tleaq .LC0(%rip), %rdi\n" + "\tmovl $0, %eax\n" + "\tcall printf@PLT\n" + "\tnop\n" + "\tleave\n" + "\tret\n" + "\n" + "\t.globl\tmain\n" + "main:\n" + "\tpushq\t%rbp\n" + "\tmovq %rsp, %rbp\n", Outfile); +} + +// Print out the assembly postamble +static void cgpostamble(void) { + fputs( "\tmovl $0, %eax\n" + "\tpopq %rbp\n" + "\tret\n", Outfile); +} + +// Load an integer literal to a register. +// Return the id of the register +static int cgload_int(int val) { + int r = alloc_reg(); + fprintf(Outfile, "\tmovl\t$%d, %sd\n", val, reglist[r]); + return (r); +} + +// Add two registers together and return +// the number of the register with the result +static int cgadd(int r1, int r2) { + fprintf(Outfile, "\taddq\t%s, %s\n", reglist[r1], reglist[r2]); + free_reg(r1); + return (r2); +} + +// Subtract the second register from the first and +// return the number of the register with the result +static int cgsub(int r1, int r2) { + fprintf(Outfile, "\tsubq\t%s, %s\n", reglist[r2], reglist[r1]); + free_reg(r2); + return (r1); +} + +// Multiply two registers together and return +// the number of the register with the result +static int cgmul(int r1, int r2) { + fprintf(Outfile, "\timulq\t%s, %s\n", reglist[r1], reglist[r2]); + free_reg(r1); + return (r2); +} + +// Divide the first register by the second and +// return the number of the register with the result +static int cgdiv(int r1, int r2) { + fprintf(Outfile, "\tmovq\t%s,%%rax\n", reglist[r1]); + fprintf(Outfile, "\tcqo\n"); + fprintf(Outfile, "\tidivq\t%s\n", reglist[r2]); + fprintf(Outfile, "\tmovq\t%%rax,%s\n", reglist[r1]); + free_reg(r2); + return (r1); +} + +// Call printint() with the given register +static void cgprint(int r) { + fprintf(Outfile, "\tmovq\t%s, %%rdi\n", reglist[r]); + fprintf(Outfile, "\tcall\tprintint\n"); + free_reg(r); +} + +// Load a global variable into a register +static int cgload_glob(char *name) { + int r = alloc_reg(); + fprintf(Outfile, "\tmovq\t%s(%%rip), %s\n", name, reglist[r]); + return (r); +} + +// Store a register's value into a variable +static int cgstore_glob(int r, char *name) { + fprintf(Outfile, "\tmovq\t%s, %s(%%rip)\n", reglist[r], name); + return (r); +} + +// Compare two registers. +static int cgcompare(int r1, int r2, int op) { + static const int map_s[] = { A_EQ, A_NE, A_LT, A_LE, A_GT, A_GE, 0}; + static const char *map_t[] = { "sete", "setne","setl", "setle","setg", "setge",NULL}; + int how = -1; + for (int i = 0; map_t[i] != NULL; ++i) { + if (map_s[i] == op) { + how = i; + break; + } + } + + if (how == -1) { + fprintf(stderr, "%s: unknown compare operator %d.\n", __FUNCTION__, op); + exit(1); + } + + fprintf(Outfile, "\tcmpq\t%s, %s\n", reglist[r2], reglist[r1]); + fprintf(Outfile, "\t%s\t%sb\n", map_t[how], reglist[r2]); + fprintf(Outfile, "\tandq\t$255,%s\n", reglist[r2]); + free_reg(r1); + return (r2); +} + +// Jump to label when condition register is false(0). +static void cgjmp_condfalse(int Lt, int x) { + fprintf(Outfile, "\tcmpq\t$0, %s\n", reglist[x]); + fprintf(Outfile, "\tje\t.L%d\n", Lt); + free_reg(x); +} + +// Jump to label no matter what. +static void cgjmp_always(int Lt) { + fprintf(Outfile, "\tjmp\t.L%d\n", Lt); +} + +// Print label to Outfile +static void cgprint_label(int Lt) { + fprintf(Outfile, ".L%d:\n", Lt); +} + +// init a global variable +static void cginit_glob(char *name) { + fprintf(Outfile, "\t.comm\t%s,8,8\n", name); +} + +// Given a AST('s root) +// Generate ASM code. +// Return value register id. +static int cgenerate_ast(struct ASTnode *rt) { + int nt = ast_type(rt->op); + + if (nt == N_LEAF) { + if (rt->op == A_INTLIT) { + struct ASTintnode *x = (struct ASTintnode*)rt; + return (cgload_int(x->val)); + } else if (rt->op == A_VAR) { + struct ASTvarnode *x = (struct ASTvarnode*)rt; + return (cgload_glob(array_get(&Gsym, x->id))); + } else { + fail_ast_op(rt->op, __FUNCTION__); + } + } else if (nt == N_BIN) { + if (rt->op == A_IF) { + struct ASTifnode *x = (struct ASTifnode*)rt; + int Lelse = alloc_label(); + int Lend = alloc_label(); + int condv = cgenerate_ast(x->cond); + + cgjmp_condfalse(Lelse, condv); + free_reg(cgenerate_ast(x->left)); + cgjmp_always(Lend); + + cgprint_label(Lelse); + free_reg(cgenerate_ast(x->right)); + + cgprint_label(Lend); + return (-1); + } else if (rt->op == A_WHILE) { + struct ASTbinnode *x = (struct ASTbinnode*)rt; + int Lstart = alloc_label(); + int Lend = alloc_label(); + + cgprint_label(Lstart); + int condv = cgenerate_ast(x->left); + cgjmp_condfalse(Lend, condv); + free_reg(cgenerate_ast(x->right)); + cgjmp_always(Lstart); + + cgprint_label(Lend); + return (-1); + } + + struct ASTbinnode *x = (struct ASTbinnode*)rt; + int lv = cgenerate_ast(x->left); + int rv = cgenerate_ast(x->right); + + if (rt->op == A_ADD) { + return (cgadd(lv, rv)); + } else if (rt->op == A_SUB) { + return (cgsub(lv, rv)); + } else if (rt->op == A_MUL) { + return (cgmul(lv, rv)); + } else if (rt->op == A_DIV) { + return (cgdiv(lv, rv)); + } else if (A_EQ <= rt->op && rt->op <= A_GE) { + // a compare operator + return (cgcompare(lv, rv, rt->op)); + } else { + fail_ast_op(rt->op, __FUNCTION__); + } + } else if (nt == N_UN) { + struct ASTunnode *x = (struct ASTunnode*)rt; + int cv = cgenerate_ast(x->c); + + if (rt->op == A_PRINT) { + cgprint(cv); + return (-1); + } else { + fail_ast_op(rt->op, __FUNCTION__); + } + } else if (nt == N_ASSIGN) { + struct ASTassignnode *x = (struct ASTassignnode*)rt; + int cv = cgenerate_ast(x->right); + + if (rt->op == A_ASSIGN) { + return (cgstore_glob(cv, array_get(&Gsym, x->left))); + } else { + fail_ast_op(rt->op, __FUNCTION__); + } + } else if (nt == N_MULTI) { + struct ASTblocknode *x = (struct ASTblocknode*)rt; + int val = -1; + struct llist_node *p = x->st.head; + while (p) { + val = cgenerate_ast(p->val); + if (p->nxt) { + free_reg(val); + } + p = p->nxt; + } + return val; + } else { + fail_ast_op(rt->op, __FUNCTION__); + } +} + +// generates code +void cgx64_generate(struct ASTnode *rt) { + for (int i = 0; i < Gsym.length; ++i) { + cginit_glob(array_get(&Gsym, i)); + } + + cgpreamble(); + free_reg(cgenerate_ast(rt)); + cgpostamble(); +} diff --git a/src/fatals.c b/src/fatals.c new file mode 100644 index 0000000..f9dba62 --- /dev/null +++ b/src/fatals.c @@ -0,0 +1,29 @@ +#include +#include +#include "scan.h" + +void fail_malloc(const char *func_name) { + fprintf(stderr, "%s: unable to malloc.\n", func_name); + exit(1); +} + +void fail_ast_op(int op, const char *func_name) { + fprintf(stderr, "%s: unknown ast operator %d.\n", func_name, op); + exit(1); +} + +void fail_ce_expect(const char *expected, const char *got) { + fprintf(stderr, "syntax error on line %d: expected %s, got %s.\n", Line, expected, got); + exit(1); +} + +void fail_ce(const char *reason) { + fprintf(stderr, "syntax error on line %d: %s.\n", Line, reason); + exit(1); +} + +void fail_char(int c) { + fprintf(stderr, "Unrecognised character %c on line %d.\n", c, Line); + exit(1); +} + diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..2259866 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,336 @@ +#include +#include +#include "scan.h" +#include "token.h" +#include "ast.h" +#include "symbol.h" +#include "fatals.h" + +static struct linklist Tokens; // current token for parsing +static int skip_semi = 0; // can skip statement semi (after block) + +// Check that we have a binary operator and return its precedence. +// operators with larger precedence value will be evaluated first +static int op_precedence(int t) { + switch (t) { + case T_ASSIGN: + return (20); + case T_GT: case T_GE: case T_LT: case T_LE: + return (40); + case T_EQ: case T_NE: + return (50); + case T_PLUS: case T_MINUS: + return (70); + case T_STAR: case T_SLASH: + return (90); + default: + fail_ce_expect("an operator", token_typename[t]); + } +} + +// Convert a arithmetic token into an AST operation. +static int arithop(int t) { + static const int map[][2] = { + {T_PLUS, A_ADD}, + {T_MINUS, A_SUB}, + {T_STAR, A_MUL}, + {T_SLASH, A_DIV}, + {T_EQ, A_EQ}, + {T_NE, A_NE}, + {T_LT, A_LT}, + {T_LE, A_LE}, + {T_GT, A_GT}, + {T_GE, A_GE}, + {T_ASSIGN, A_ASSIGN}, + {T_EOF} + }; + + for (int i = 0; map[i][0] != T_EOF; ++i) { + if (t == map[i][0]) { + return map[i][1]; + } + } + fail_ce_expect("an binary operator", token_typename[t]); +} + +// operator ssociativity direction +// Return 0 if left to right, e.g. + +// 1 if right to left, e.g. = +static int direction_rtl(int t) { + switch(t) { + case T_ASSIGN: + return (1); + default: + return (0); + } +} + +// Next token +static void next(void) { + if (Tokens.head) { + token_free(Tokens.head->val); + llist_popfront(&Tokens); + } +} + +// preview next kth token from input stream +static struct token preview(int k) { + if (Tokens.length <= k) { + return (token_make_eof()); + } + return (*((struct token*)llist_get(&Tokens, k))); +} + +// return current token from input stream +static struct token current(void) { + return (preview(0)); +} + +// match a token or report syntax error +static void match(int t) { + if (t == T_SEMI && skip_semi) { + skip_semi = 0; + } else if (current().type == t) { + next(); + } else { + fail_ce_expect(token_typename[current().type], token_typename[t]); + } +} + +// check current token's type or report syntax error. +static void check(int t) { + if (current().type != t) { + fail_ce_expect(token_typename[current().type], token_typename[t]); + } +} + +static struct ASTnode* statement(void); +static struct ASTnode* expression(void); + +// Parse a primary factor and return an +// AST node representing it. +static struct ASTnode* primary(void) { + struct ASTnode *res; + + if (current().type == T_LP) { + // ( expr ) considered as primary + next(); + res = expression(); + match(T_RP); + } else if (current().type == T_INTLIT) { + res = ast_make_intlit(*((int*)current().val)); + next(); + } else if (current().type == T_LONGLIT) { + // todo + fprintf(stderr, "TOOD.\n"); + exit(1); + } else if (current().type == T_INDENT) { + int id = findglob((char*)current().val); + if (id == -1) { + fprintf(stderr, "syntax error on line %d: unknown indentifier %s.\n", Line, (char*)current().val); + exit(1); + } + next(); + return (ast_make_var(id)); + } else { + fprintf(stderr, "syntax error on line %d: primary expression excpeted.\n", Line); + exit(1); + } + return (res); +} + +// Check if it is binary operator +static int is_binop(int t) { + return (T_ASSIGN <= t && t <= T_GE); +} + +// Return an AST tree whose root is a binary operator +static struct ASTnode* binexpr(int precedence) { + struct ASTnode *left, *right; + + left = primary(); + int tt = current().type; + if (!is_binop(tt)) { + return (left); + } + + int tp = op_precedence(tt); + while (tp > precedence) { + next(); + + if (direction_rtl(tt)) { + right = binexpr(precedence); + left = ast_make_assign(arithop(tt), ((struct ASTvarnode*)left)->id, right); + } else { + right = binexpr(tp); + left = ast_make_binary(arithop(tt), left, right); // join right into left + } + + tt = current().type; + if (!is_binop(tt)) { + return (left); + } + tp = op_precedence(tt); + } + return (left); +} + +// parse one block of code, e.g. { a; b; } +static struct ASTnode* parse_block(void) { + match(T_LB); + if (current().type == T_RB) { + next(); + return NULL; + } + + struct ASTblocknode* res = (struct ASTblocknode*)ast_make_block(); + while (current().type != T_RB) { + struct ASTnode *x; + x = statement(); + llist_pushback_notnull(&res->st, x); + + if (current().type == T_EOF) { + break; + } + } + match(T_RB); + skip_semi = 1; + return ((struct ASTnode*)res); +} + +// parse an expression +static struct ASTnode* expression(void) { + if (current().type == T_LB) { + return (parse_block()); + } + return (binexpr(0)); +} + +// parse one print statement +static struct ASTnode* print_statement(void) { + match(T_PRINT); + struct ASTnode *res = ast_make_unary(A_PRINT, expression()); + match(T_SEMI); + return (res); +} + +// parse variable declaration statement +static struct ASTnode* var_declaration(void) { + match(T_INT); + check(T_INDENT); + if (findglob((char*)current().val) != -1) { + fail_ce("variable declared twice."); + } + addglob((char*)current().val); + next(); + match(T_SEMI); + return (NULL); +} + +// parse an if statement +static struct ASTnode* if_statement(void) { + match(T_IF); // if + match(T_LP); // ( + struct ASTnode* cond = expression(); + match(T_RP); // ) + struct ASTnode* then = statement(); + struct ASTnode* else_then; + if (current().type == T_ELSE) { + next(); // else + else_then = statement(); + } else { + else_then = NULL; // empty block + } + return (ast_make_if(then, else_then, cond)); +} + +// parse an while statement +static struct ASTnode* while_statement(void) { + match(T_WHILE); + match(T_LP); + struct ASTnode* cond = expression(); + match(T_RP); + struct ASTnode* body = statement(); + return (ast_make_binary(A_WHILE, cond, body)); +} + +// parse a for statement (into a while loop) +static struct ASTnode* for_statement(void) { + match(T_FOR); + match(T_LP); + struct ASTnode *init = statement(); + + struct ASTnode *cond; + if (current().type != T_SEMI) { + cond = expression(); + } else { + cond = ast_make_intlit(1); + } + next(); // skip the ; + + struct ASTnode *inc; + if (current().type != T_RP) { + inc = expression(); + } else { + inc = NULL; + } + + match(T_RP); + struct ASTnode *body = statement(); + struct ASTblocknode *container = (struct ASTblocknode*)ast_make_block(); + struct ASTnode *wbody; + + if (body == NULL && inc == NULL) { + wbody = NULL; + } else { + struct ASTblocknode* wt = (struct ASTblocknode*)ast_make_block(); + llist_pushback_notnull(&wt->st, body); + llist_pushback_notnull(&wt->st, inc); + wbody = (struct ASTnode*)wt; + } + + llist_pushback_notnull(&container->st, init); + llist_pushback(&container->st, ast_make_binary(A_WHILE, cond, wbody)); + return (struct ASTnode*)container; +} + +// parse one statement +static struct ASTnode* statement(void) { + if (current().type == T_SEMI) { + return (NULL); + } + else if (current().type == T_PRINT) { + return (print_statement()); + } + else if (current().type == T_INT) { + return (var_declaration()); + } + else if (current().type == T_IF) { + return (if_statement()); + } + else if (current().type == T_WHILE) { + return (while_statement()); + } + else if (current().type == T_FOR) { + return (for_statement()); + } + else { + skip_semi = 0; + struct ASTnode* res = expression(); + match(T_SEMI); + return (res); + } +} + +// Parse ans return the full ast +struct ASTnode* parse(const char *name) { + Tokens = scan_tokens(name); + struct ASTnode* res = statement(); + struct llist_node *p = Tokens.head; + while (p != Tokens.tail) { + free(p->val); + p = p->nxt; + } + llist_free(&Tokens); + return (res); +} diff --git a/src/scan.c b/src/scan.c new file mode 100644 index 0000000..392cc07 --- /dev/null +++ b/src/scan.c @@ -0,0 +1,266 @@ +#include +#include +#include +#include +#include +#include "token.h" +#include "fatals.h" +#include "util/misc.h" + +int Line = 1; +static int Preview; +static FILE *Infile; + +const char *token_typename[] = { + "EOF", + ";", + "{", "}", "(", ")", + "=", + "+", "-", "*", "/", + "==", "!=", "<", ">", "<=", ">=", + "int", "void", "char", "long", + "print", "if", "else", "while", "for", + "an integer literal (type int)", "an integer literal (type long)", "an indentifier" +}; + +// preview one char, not getting it out from the stream +static int preview(void) { + if (!Preview) { + Preview = fgetc(Infile); + } + return (Preview); +} + +// Get the next char from the input file +static void next(void) { + int c = preview(); + Preview = 0; + if (c == '\n') { + Line += 1; + } +} + +// Skip past input that we don't need to deal with, +// i.e. whitespace, newlines. +static void skip_whitespaces(void) { + int c; + + c = preview(); + while (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f') { + next(); + c = preview(); + } +} + +// Scan and return an integer literal value from the input file. +static void scanint(struct token *t) { + long long res = 0; + int c = preview(); + while ('0' <= c && c <= '9') { + res = res * 10 + (c - '0'); + next(); + c = preview(); + } + + if (INT_MIN <= res && res <= INT_MAX) { + t->type = T_INTLIT; + t->val = malloc(sizeof(int)); + if (t->val == NULL) { + fail_malloc(__FUNCTION__); + } + *((int *)t->val) = (int)res; + } else { + t->type = T_LONGLIT; + t->val = malloc(sizeof(long long)); + if (t->val == NULL) { + fail_malloc(__FUNCTION__); + } + *((long long *)t->val) = res; + } +} + +// Scan an identifier from the input file and +// Return the identifier (char*) +static char* scan_indentifier(int *n) { + int sz = 128, len = 0; + + char *res = malloc(sz * sizeof(char)); + memset(res, 0, sz * sizeof(char)); + + int c = preview(); + while (isdigit(c) || isalpha(c) || c == '_') { + if (len >= sz - 1) { + sz *= 2; + char *old = res; + res = malloc(sz * sizeof(char)); + memcpy(res, old, len * sizeof(char)); + memset(res + len * sizeof(char), 0, (sz - len) * sizeof(char)); + free(old); + } + res[len++] = c; + next(); + c = preview(); + } + + if (n) { + *n = len; + } + return (res); +} + +// Given a word from the input, scan if it is a keyword +static int scan_keyword(struct token *t, char *s) { + static const char *map_s[] = { + "print", + "int", + "void", + "long", + "if", + "else", + "while", + "for", + NULL + }; + + static const int map_t[] = { + T_PRINT, + T_INT, + T_VOID, + T_LONG, + T_IF, + T_ELSE, + T_WHILE, + T_FOR, + -1 + }; + + for (int i = 0; map_s[i] != NULL; ++i) { + if (strequal(map_s[i], s)) { + t->type = map_t[i]; + return (1); + } + } + return (0); +} + +// Scan one char token +// Return 1 if found +static int scan_1c(struct token *t) { + static const int map[][2] = { + {'+', T_PLUS}, + {'-', T_MINUS}, + {'*', T_STAR}, + {'/', T_SLASH}, + {'{', T_LB}, + {'}', T_RB}, + {'(', T_LP}, + {')', T_RP}, + {';', T_SEMI}, + {'\0', -1} + }; + + int c = preview(); + for (int i = 0; map[i][0] != '\0'; ++i) { + if (map[i][0] == c) { + t->type = map[i][1]; + next(); + return (1); + } + } + return (0); +} + +// Scan and return the next token found in the input. +static struct token* scan(void) { + struct token *t = malloc(sizeof(struct token)); + if (t == NULL) { + fail_malloc(__FUNCTION__); + } + t->val = NULL; + + skip_whitespaces(); + int c = preview(); + if (c == EOF) { + t->type = T_EOF; + return (t); + } + + if (scan_1c(t)) { + return (t); + } + + if (c == '=') { + t->type = T_ASSIGN; + next(); + c = preview(); + if (c == '=') { + t->type = T_EQ; + next(); + } + } else if (c == '!') { + next(); + c = preview(); + if (c == '=') { + t->type = T_NE; + next(); + } else { + fprintf(stderr, "Unrecognised character %c on line %d.\n", c, Line); + exit(1); + } + } else if (c == '<') { + t->type = T_LT; + next(); + c = preview(); + if (c == '=') { + t->type = T_LE; + next(); + } + } else if (c == '>') { + t->type = T_GT; + next(); + c = preview(); + if (c == '=') { + t->type = T_GE; + next(); + } + } else { + // If it's a digit, scan the integer literal value in + if (isdigit(c)) { + scanint(t); + } else if (isalpha(c) || c == '_') { + t->val = scan_indentifier(NULL); + if (scan_keyword(t, t->val)) { + // got a keyword + free(t->val); + t->val = NULL; + } else { + // not a keyword, so it should be an indentifier. + t->type = T_INDENT; + } + } else { + fail_char(c); + } + } + return (t); +} + +struct linklist scan_tokens(const char *name) { + Infile = fopen(name, "r"); + if (Infile == NULL) { + fprintf(stderr, "Cannot open file %s.\n", name); + exit(1); + } + + struct linklist res; + llist_init(&res); + while (1) { + struct token *t = scan(); + llist_pushback(&res, t); + if (t->type == T_EOF) { + break; + } + } + + fclose(Infile); + return (res); +} \ No newline at end of file diff --git a/src/symbol.c b/src/symbol.c new file mode 100644 index 0000000..9c46b87 --- /dev/null +++ b/src/symbol.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include "util/misc.h" +#include "util/array.h" + +#define INDENT_CHARS 63 // 10 digits + 26 * 2(cap) alpha + 1 _ + +static int indent_char_id(char c) { + if ('0' <= c && c <= '9') { + return (c - '0'); + } + if ('a' <= c && c <= 'z') { + return (c - 'a' + 10); + } + if ('A' <= c && c <= 'Z') { + return (c - 'A' + 36); + } + if (c == '_') { + return (62); + } + fprintf(stderr, "%c is not a acceptable char in indentifier.\n", c); + exit(1); +} + +struct trie_node { + struct trie_node *c[INDENT_CHARS]; // childs + int sz; // size + int val; // endpoint value, -1 for not ended +}; + +static struct trie_node *root; +struct array Gsym; // symbol id to name map + +static void trie_free(struct trie_node *p) { + for (int i = 0; i < INDENT_CHARS; ++i) { + if (p->c[i]) { + trie_free(p->c[i]); + } + } + free(p); +} + +static struct trie_node* trie_createnode(void) { + struct trie_node *x = malloc(sizeof(struct trie_node)); + if (x == NULL) { + fprintf(stderr, "%s: failed to malloc trie node in symbol table.\n", __FUNCTION__); + exit(1); + } + + memset(x, 0, sizeof(struct trie_node)); + x->val = -1; + return (x); +} + +static void trie_set(char *str, int val) { + struct trie_node *p = root; + + int n = strlen(str); + for (int i = 0; i < n; ++i) { + int x = indent_char_id(str[i]); + if (!p->c[x]) { + p->c[x] = trie_createnode(); + } + p->sz += 1; + p = p->c[x]; + } + p->val = val; +} + +static int trie_get(char *str) { + struct trie_node *p = root; + + int n = strlen(str); + for (int i = 0; i < n; ++i) { + int x = indent_char_id(str[i]); + if (!p->c[x]) { // not found + return (-1); + } + p = p->c[x]; + } + return (p->val); +} + +static int IsSymbolListLoaded = 0; + +// init global symbol table +void symbol_init(void) { + IsSymbolListLoaded = 1; + array_init(&Gsym); + root = trie_createnode(); +} + +// unload global symbol table +void symbol_unload(void) { + if (!IsSymbolListLoaded) { + return; + } + + for (int i = 0; i < Gsym.length; ++i) { + free(array_get(&Gsym, i)); + } + array_free(&Gsym); + trie_free(root); +} + +// Determine if the symbol s is in the global symbol table. +// Return its slot position or -1 if not found. +int findglob(char *s) { + return (trie_get(s)); +} + +// Add a global symbol to the symbol table. +// Return the slot number in the symbol table. +int addglob(char *s) { + char *ss = strclone(s); + array_pushback(&Gsym, ss); + int res = Gsym.length - 1; + trie_set(ss, res); + return (res); +} + diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..fdd07c4 --- /dev/null +++ b/src/token.c @@ -0,0 +1,17 @@ +#include +#include +#include "token.h" + +void token_free(struct token *t) { + if (t->val) { + free(t->val); + } + free(t); +} + +struct token token_make_eof(void) { + struct token res; + res.type = T_EOF; + res.val = NULL; + return (res); +} \ No newline at end of file diff --git a/src/util/array.c b/src/util/array.c new file mode 100644 index 0000000..e002d03 --- /dev/null +++ b/src/util/array.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include "util/array.h" + +void array_init(struct array *a) { + a->length = 0; + a->cap = 0; + a->begin = NULL; +} + +void array_free(struct array *a) { + free(a->begin); + a->begin = NULL; + a->length = 0; + a->cap = 0; +} + +static void array_enlarge(struct array *a) { + if (a->cap == 0) { + a->cap = 128; + } else { + a->cap *= 2; + } + + void **old = a->begin; + a->begin = malloc(sizeof(void*) * a->cap); + if (old) { + memcpy(a->begin, old, a->length * sizeof(void*)); + } +} + +void array_pushback(struct array *a, void *val) { + if (a->length == a->cap) { + array_enlarge(a); + } + + a->begin[a->length] = val; + a->length += 1; +} + +void* array_get(struct array *a, int index) { + if (index >= a->length) { + fprintf(stderr, "%s: out of range.\n", __FUNCTION__); + abort(); + } + + return (a->begin[index]); +} + +void array_set(struct array *a, int index, void *val) { + if (index >= a->length) { + fprintf(stderr, "%s: out of range.\n", __FUNCTION__); + abort(); + } + + a->begin[index] = val; +} diff --git a/src/util/linklist.c b/src/util/linklist.c new file mode 100644 index 0000000..3aba276 --- /dev/null +++ b/src/util/linklist.c @@ -0,0 +1,111 @@ +#include +#include +#include "fatals.h" +#include "util/linklist.h" + +struct llist_node* llist_createnode(void *val) { + struct llist_node *res = malloc(sizeof(struct llist_node)); + if (res == NULL) { + fail_malloc(__FUNCTION__); + } + res->nxt = NULL; + res->val = val; + return (res); +} + +void llist_pushback(struct linklist *l, void *val) { + l->length += 1; + if (!l->tail) { + l->head = l->tail = llist_createnode(val); + return; + } + l->tail->nxt = llist_createnode(val); + l->tail = l->tail->nxt; +} + +void llist_pushback_notnull(struct linklist *l, void *val) { + if (val) { + llist_pushback(l, val); + } +} + +void* llist_get(struct linklist *l, int x) { + if (x >= l->length) { + fprintf(stderr, "linklist out of range.\n"); + abort(); + } + + struct llist_node *p = l->head; + for (int i = 0; i < x; ++i) { + p = p->nxt; + } + return (p->val); +} + +void llist_set(struct linklist *l, int x, void *val) { + if (x >= l->length) { + fprintf(stderr, "linklist out of range.\n"); + abort(); + } + + struct llist_node *p = l->head; + for (int i = 0; i < x; ++i) { + p = p->nxt; + } + p->val = val; +} + +void llist_init(struct linklist *l) { + l->length = 0; + l->head = NULL; + l->tail = NULL; +} + +void llist_free(struct linklist *l) { + struct llist_node *p = l->head; + struct llist_node *nxt; + while (p) { + nxt = p->nxt; + free(p); + p = nxt; + } + llist_init(l); +} + +void llist_insert(struct linklist *l, int index, void *val) { + if (index >= l->length) { + llist_pushback(l, val); + return; + } + + l->length += 1; + struct llist_node *x = llist_createnode(val); + if (index == 0) { + x->nxt = l->head; + l->head = x; + return; + } + + struct llist_node *p = l->head; + for (int i = 0; i < index - 1; ++i) { + p = p->nxt; + } + x->nxt = p->nxt; + p->nxt = x; +} + +void llist_popfront(struct linklist *l) { + if (l->head == NULL) { + return; + } + + l->length -= 1; + if (l->length == 0) { + free(l->head); + l->head = l->tail = NULL; + } + + struct llist_node *p = l->head; + l->head = p->nxt; + free(p); +} diff --git a/src/util/misc.c b/src/util/misc.c new file mode 100644 index 0000000..cf6a2f7 --- /dev/null +++ b/src/util/misc.c @@ -0,0 +1,26 @@ +#include +#include + +// check if two string are the same +int strequal(const char *s1, const char *s2) { + for (int i = 1; ; ++i) { + if (s1[i] != s2[i]) { + return (0); + } + + if (s1[i] == '\0') { + break; + } + } + return (1); +} + +// A impl of C23 strdup() +char* strclone(char *s) { + int n = strlen(s); + char *res = malloc(n + 1); + memcpy(res, s, n * sizeof(char)); + res[n] = '\0'; + return res; +} + diff --git a/tests/input01 b/tests/input01 new file mode 100644 index 0000000..ed710cf --- /dev/null +++ b/tests/input01 @@ -0,0 +1,4 @@ +{ print 12 * 3; + print 18 - 2 * 4; + print 1 + 2 + 9 - 5/2 + 3*5; +} diff --git a/tests/input02 b/tests/input02 new file mode 100644 index 0000000..8627485 --- /dev/null +++ b/tests/input02 @@ -0,0 +1,7 @@ +{ + int fred; + int jim; + fred= 5; + jim= 12; + print fred + jim; +} diff --git a/tests/input03 b/tests/input03 new file mode 100644 index 0000000..9882a08 --- /dev/null +++ b/tests/input03 @@ -0,0 +1,8 @@ +{ + int x; + x= 1; print x; + x= x + 1; print x; + x= x + 1; print x; + x= x + 1; print x; + x= x + 1; print x; +} diff --git a/tests/input04 b/tests/input04 new file mode 100644 index 0000000..d5edc09 --- /dev/null +++ b/tests/input04 @@ -0,0 +1,12 @@ +{ + int x; + x= 7 < 9; print x; + x= 7 <= 9; print x; + x= 7 != 9; print x; + x= 7 == 7; print x; + x= 7 >= 7; print x; + x= 7 <= 7; print x; + x= 9 > 7; print x; + x= 9 >= 7; print x; + x= 9 != 7; print x; +} diff --git a/tests/input05 b/tests/input05 new file mode 100644 index 0000000..e6fab21 --- /dev/null +++ b/tests/input05 @@ -0,0 +1,9 @@ +{ + int i; int j; + i=6; j=12; + if (i < j) { + print i; + } else { + print j; + } +} diff --git a/tests/input06 b/tests/input06 new file mode 100644 index 0000000..173722d --- /dev/null +++ b/tests/input06 @@ -0,0 +1,7 @@ +{ int i; + i=1; + while (i <= 10) { + print i; + i= i + 1; + } +} diff --git a/tests/input07 b/tests/input07 new file mode 100644 index 0000000..dfa01ad --- /dev/null +++ b/tests/input07 @@ -0,0 +1,6 @@ +{ + int i; + for (i= 1; i <= 10; i= i + 1) { + print i; + } +} diff --git a/tests/out.input01 b/tests/out.input01 new file mode 100644 index 0000000..4502630 --- /dev/null +++ b/tests/out.input01 @@ -0,0 +1,3 @@ +36 +10 +25 diff --git a/tests/out.input02 b/tests/out.input02 new file mode 100644 index 0000000..98d9bcb --- /dev/null +++ b/tests/out.input02 @@ -0,0 +1 @@ +17 diff --git a/tests/out.input03 b/tests/out.input03 new file mode 100644 index 0000000..8a1218a --- /dev/null +++ b/tests/out.input03 @@ -0,0 +1,5 @@ +1 +2 +3 +4 +5 diff --git a/tests/out.input04 b/tests/out.input04 new file mode 100644 index 0000000..bb08505 --- /dev/null +++ b/tests/out.input04 @@ -0,0 +1,9 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/out.input05 b/tests/out.input05 new file mode 100644 index 0000000..1e8b314 --- /dev/null +++ b/tests/out.input05 @@ -0,0 +1 @@ +6 diff --git a/tests/out.input06 b/tests/out.input06 new file mode 100644 index 0000000..f00c965 --- /dev/null +++ b/tests/out.input06 @@ -0,0 +1,10 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 diff --git a/tests/out.input07 b/tests/out.input07 new file mode 100644 index 0000000..f00c965 --- /dev/null +++ b/tests/out.input07 @@ -0,0 +1,10 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 diff --git a/tests/test_llvm.sh b/tests/test_llvm.sh new file mode 100644 index 0000000..dfca175 --- /dev/null +++ b/tests/test_llvm.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# Run each test and compare +# against known good output + +set -e + +echo "Testing for target llvm..." + +if [ ! -f ../acc ] +then echo "Need to build ../acc first!"; exit 1 +fi + +for i in input* +do if [ ! -f "out.$i" ] + then echo "Can't run test on $i, no answer file!" + else + echo -n $i + ../acc llvm $i + clang -o out out.ll -w + ./out > trial.$i + cmp -s "out.$i" "trial.$i" + if [ "$?" -eq "1" ] + then echo ": failed" + diff -c "out.$i" "trial.$i" + echo + else echo ": OK" + fi + rm -f out out.ll "trial.$i" + fi +done diff --git a/tests/test_x64.sh b/tests/test_x64.sh new file mode 100644 index 0000000..f081768 --- /dev/null +++ b/tests/test_x64.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# Run each test and compare +# against known good output + +set -e + +echo "Testing for target x86_64..." + +if [ ! -f ../acc ] +then echo "Need to build ../acc first!"; exit 1 +fi + +for i in input* +do if [ ! -f "out.$i" ] + then echo "Can't run test on $i, no answer file!" + else + echo -n $i + ../acc x86_64 $i + gcc -o out out.s + ./out > trial.$i + cmp -s "out.$i" "trial.$i" + if [ "$?" -eq "1" ] + then echo ": failed" + diff -c "out.$i" "trial.$i" + echo + else echo ": OK" + fi + rm -f out out.s "trial.$i" + fi +done diff --git a/xmake.lua b/xmake.lua new file mode 100644 index 0000000..cbce619 --- /dev/null +++ b/xmake.lua @@ -0,0 +1,23 @@ +set_project("acc") +set_version("0.1a1") +set_basename("acc") +set_languages("c11") +set_targetdir(".") + +target("build") + set_kind("binary") + set_default(true) + set_warnings("allextra") + add_files("src/**.c") + add_files("main.c") + add_includedirs("include/") + if is_mode("release") then + set_strip("all") + set_optimize("O2") + end + + if is_mode("debug") then + set_optimize("none") + set_symbols("debug") + add_defines("DEBUG") + end