From a8d5e0ebf4d393b1732fa4c397d0820c75f88ae0 Mon Sep 17 00:00:00 2001 From: szdytom Date: Wed, 21 Jun 2023 16:10:06 +0800 Subject: [PATCH] add types to IR --- include/acir.h | 55 +++++++++++++--- include/fatals.h | 1 + include/target.h | 23 ++++++- include/token.h | 1 - include/vtype.h | 6 +- main.c | 16 +++-- src/acir.c | 160 +++++++++++++++++++++++++++++++++++++-------- src/fatals.c | 5 ++ src/target.c | 46 +++++++++++-- src/util/critbit.c | 7 +- 10 files changed, 260 insertions(+), 60 deletions(-) diff --git a/include/acir.h b/include/acir.h index 1105ec7..63d396f 100644 --- a/include/acir.h +++ b/include/acir.h @@ -9,11 +9,16 @@ // Operation code definations in the ACC IR(ACIR). enum { // Loads - IR_IMM_I32, // immediate integer (32bits) + IR_IMM, // load immediate // SSA IR_PHI, // phi node in SSA + // Type casts + IR_ZEXT, // zero extend an integer + IR_SEXT, // signed extend an integer + IR_TRUNC, // truncate an integer + // Arithmetic operations IR_NEG, // negation IR_NOT, // bitwise not @@ -28,6 +33,18 @@ enum { IR_NULL, }; +// Defination of IR type code, which simplier than VType. +enum { + IRT_VOID, // void + IRT_I1, // bool + IRT_I32, // 32bits integer + IRT_I64, // 64bits integer + IRT_PTR, // pointer + + // Guard + IRT_EXCEED +}; + // Argument of phi function. struct IRphi_arg { struct llist_node n; // linklist header @@ -41,13 +58,16 @@ struct IRinstruction { struct llist_node n; // linklist header int op; // operation code int id; // value identifier + int type; // value type in IR type code struct IRblock *owner; // the basic block containing this instruction union { struct { struct IRinstruction *left, *right; }; // left/right operands for calculations struct { struct IRinstruction *cond; // jump condition struct IRblock *bt, *bf; }; // true branch & false branch for conditional jump struct linklist phi; // Phi instruction argument list - int32_t val_i32; // immediate: integer (32bits) + int32_t val_i32; // immediate: 32bits integer + int64_t val_i64; // immediate: 64bits integer + bool val_i1; // immediate: bool }; }; @@ -64,20 +84,26 @@ struct IRblock { // Function containing IR instructions. // TODO: paramaters struct IRfunction { - struct llist_node n; // linklist header - char *name; // function name - struct linklist bs; // basic blocks - int ins_count; // number of instructions, used for allocating instruction identifier. + struct llist_node n; // linklist header + char *name; // function name + struct linklist bs; // basic blocks + int ins_count; // number of instructions, used for allocating instruction identifier. + struct IRinstruction *null; // an instruction with a value void, considered to have instruction id 0. + // This is used for the null object patern. }; // Constructs an IRinstruction with an operator, and two operands. -struct IRinstruction* IRinstruction_new(struct IRblock *owner, int op, +struct IRinstruction* IRinstruction_new(struct IRblock *owner, int op, int type, struct IRinstruction *left, struct IRinstruction *right); -// Constructs a IRinstruction with an integer immediate (32bits). +// Constructs an IRinstruction with an integer immediate (32bits). struct IRinstruction* IRinstruction_new_i32(struct IRblock *owner, int32_t v); -// Constructs a IRinstuction with instruction Q_JMP or Q_BR (which is conditional jump). +// Contructs an IRinstruction with an void immediate only. +// Use IRfunction.null instead of contructing a new void immediate. +struct IRinstruction* IRinstruction_new_void(struct IRblock *owner); + +// Constructs a IRinstuction with instruction IR_JMP or IR_BR (which is conditional jump). struct IRinstruction* IRinstruction_new_jmp(struct IRblock *owner, int op, struct IRinstruction *cond, struct IRblock *bt, struct IRblock *bf); @@ -94,7 +120,16 @@ struct IRblock* IRblock_new(struct IRfunction *owner); // Allocating instruction identifiers in IRfunction int IRfunction_alloc_ins(struct IRfunction *self); -// Generates Quad Repersentation from an AST +// Translates a VType into an IR type code. +int IRTypecode_from_VType(const struct VType *v); + +// Returns a string identifier for the given type. +const char *IRTypecode_stringify(int self); + +// Returns a string identifier for the given operation code. +const char* IRopcode_stringify(int op); + +// Generates IR Repersentation from an AST struct IRfunction* IRfunction_from_ast(struct Afunction *afunc); // Frees a IRinstruction and all its components. diff --git a/include/fatals.h b/include/fatals.h index 2d74ed7..78d66ed 100644 --- a/include/fatals.h +++ b/include/fatals.h @@ -4,6 +4,7 @@ #include #include +noreturn void fail_unreachable(const char *func_name); noreturn void fail_type(int line); noreturn void fail_todo(const char *func_name); noreturn void fail_target(const char *target_name); diff --git a/include/target.h b/include/target.h index bca3716..3dc4f8e 100644 --- a/include/target.h +++ b/include/target.h @@ -1,13 +1,30 @@ #ifndef ACC_TARGET_H #define ACC_TARGET_H -// Target types +#include "vtype.h" + +// Defination of targets. enum { - TARGET_AST, - TARGET_ACIR, + TARGET_X86_64, // Intel/AMD x86-64 + TARGET_X86_32, // Intel/AMD x86-32 + TARGET_UNKNOWN_16, // unspecified 16bit instruction set + TARGET_UNKNOWN_32, // unspecified 32bit instruction set + TARGET_RISCV_32, // RISC-V 32 bits (32 registers) + TARGET_RISCV_64, // RISC-V 64 bits (32 registers) + + // Guard TARGET_NULL, }; +// Target archtechture infomation. +struct target_info { + int int_size; // size of int(in bytes). + int long_size; // size of long(in bytes). +}; + +extern struct target_info Tinfo; + int target_parse(const char *target_string); +void Tinfo_load(int target); #endif diff --git a/include/token.h b/include/token.h index 693fa31..25ef677 100644 --- a/include/token.h +++ b/include/token.h @@ -10,7 +10,6 @@ struct token { int line; // token location line number int type; // token type union { // hold the value of the literal that we scanned in - int16_t val_i16; int32_t val_i32; int64_t val_i64; char *val_s; diff --git a/include/vtype.h b/include/vtype.h index a1326aa..3eed394 100644 --- a/include/vtype.h +++ b/include/vtype.h @@ -7,8 +7,8 @@ enum { VT_VOID, // void VT_BOOL, // bool - VT_I32, // int32_t - VT_I64, // int64_t + VT_I32, // signed 32 bits integer + VT_I64, // signed 64 bits integer // Guard VT_EXCEED @@ -16,7 +16,7 @@ enum { // Value type in C. struct VType { - int bt; // base type(first class) + int bt; // base type. }; // Find out the type after appling the give ast operator(unary arithmetic variant). diff --git a/main.c b/main.c index 992efde..ce82cb8 100644 --- a/main.c +++ b/main.c @@ -5,11 +5,12 @@ #include "ast.h" #include "target.h" #include "acir.h" +#include "util/misc.h" // Print out a usage if started incorrectly static void usage(char *prog) { fprintf(stderr, "ACC the C compiler. built on: %s.\n", __DATE__); - fprintf(stderr, "Usage: %s target infile (outfile)\n", prog); + fprintf(stderr, "Usage: %s target format infile (outfile)\n", prog); exit(1); } @@ -24,21 +25,22 @@ void unload(void) { int main(int argc, char *argv[]) { atexit(unload); - if (argc < 3) { + if (argc < 4) { usage(argv[0]); } - if (argc >= 4) { - Outfile = fopen(argv[3], "w"); + if (argc >= 5) { + Outfile = fopen(argv[4], "w"); } else { Outfile = stdout; } int target = target_parse(argv[1]); - struct Afunction *afunc = Afunction_from_source(argv[2]); - if (target == TARGET_AST) { + Tinfo_load(target); + struct Afunction *afunc = Afunction_from_source(argv[3]); + if (strequal(argv[2], "_ast")) { Afunction_print(Outfile, afunc); - } else if (target == TARGET_ACIR) { + } else if (strequal(argv[2], "_ir")) { struct IRfunction *ir = IRfunction_from_ast(afunc); IRfunction_print(ir, Outfile); IRfunction_free(ir); diff --git a/src/acir.c b/src/acir.c index 21cb93f..e533ed2 100644 --- a/src/acir.c +++ b/src/acir.c @@ -19,35 +19,48 @@ static void IRblock_add_ins(struct IRblock *self, struct IRinstruction *x) { } // Constructs an IRinstruction with an operator, and two operands. -struct IRinstruction* IRinstruction_new(struct IRblock *owner, int op, +struct IRinstruction* IRinstruction_new(struct IRblock *owner, int op, int type, struct IRinstruction *left, struct IRinstruction *right) { IRinstruction_constructor_shared_code self->op = op; + self->type = type; self->left = left; self->right = right; - + if (IRis_terminate(self->op)) { owner->is_complete = true; } return (self); } -// Constructs a IRinstruction with an integer immediate (32bits). +// Constructs an IRinstruction with an integer immediate (32bits). struct IRinstruction* IRinstruction_new_i32(struct IRblock *owner, int32_t v) { IRinstruction_constructor_shared_code - self->op = IR_IMM_I32; + self->op = IR_IMM; + self->type = IRT_I32; self->val_i32 = v; return (self); } -// Constructs a IRinstuction with instruction Q_JMP or Q_BR (which is conditional jump). +// Contructs an IRinstruction with an void immediate only. +// Use IRfunction.null instead of contructing a new void immediate. +struct IRinstruction* IRinstruction_new_void(struct IRblock *owner) { + IRinstruction_constructor_shared_code + + self->op = IR_IMM; + self->type = IRT_VOID; + return (self); +} + +// Constructs a IRinstuction with instruction IR_JMP or IR_BR (which is conditional jump). struct IRinstruction* IRinstruction_new_jmp(struct IRblock *owner, int op, struct IRinstruction *cond, struct IRblock *bt, struct IRblock *bf) { IRinstruction_constructor_shared_code self->op = op; + self->type = IRT_VOID; self->cond = cond; self->bt = bt; self->bf = bf; @@ -103,8 +116,52 @@ int IRfunction_alloc_ins(struct IRfunction *self) { return self->ins_count++; } +// Translates a VType into an IR type code. +int IRTypecode_from_VType(const struct VType *v) { + int map[][2] = { + {VT_VOID, IRT_VOID}, + {VT_BOOL, IRT_I1}, + {VT_I32, IRT_I32}, + {VT_I64, IRT_I64}, + {VT_EXCEED} + }; + + for (int i = 0; map[i][0] != VT_EXCEED; ++i) { + if (map[i][0] == v->bt) { + return (map[i][1]); + } + } + + fail_unreachable(__FUNCTION__); +} + +int IRTypecode_integer_promote(int self) { + switch(self) { + case IRT_I1: case IRT_I32: + return (IRT_I32); + case IRT_I64: + return (IRT_I64); + default: + fail_unreachable(__FUNCTION__); + } +} + +// Returns a string identifier for the given type. +const char *IRTypecode_stringify(int self) { + static const char *map[] = { + "void", + "i1", + "i32", + "i64", + "ptr", + NULL + }; + + return map[self]; +} + // Translate an AST unary arithmetic opcode to a IR opcode. -static int translate_ast_unary_op(int op) { +static int IRopcode_from_ast_unary(int op) { switch (op) { case A_NEG: return (IR_NEG); case A_BNOT: return (IR_NOT); @@ -112,18 +169,40 @@ static int translate_ast_unary_op(int op) { } } +// Returns a string identifier for the given operation code. +const char* IRopcode_stringify(int self) { + static const char *map[] = { + "imm", + "phi", + "zext", + "sext", + "trunc", + "neg", + "not", + "eq", + "ret", + "jmp", + "br", + NULL + }; + + return map[self]; +} + +// DFS on an AST and build IR. static struct IRinstruction* IRcg_dfs(struct ASTnode *x, struct IRfunction *f, struct IRblock *b) { + // nothing to do, return the null object. if (x == NULL) { - return (NULL); + return (f->null); } switch (x->op) { case A_RETURN: { struct ASTunnode *t = (void*)x; struct IRinstruction *value = IRcg_dfs(t->left, f, b); - IRinstruction_new(b, IR_RET, value, NULL); + IRinstruction_new(b, IR_RET, IRT_VOID, value, NULL); b->is_complete = true; - return (NULL); + return (f->null); } case A_BLOCK: { @@ -133,7 +212,7 @@ static struct IRinstruction* IRcg_dfs(struct ASTnode *x, struct IRfunction *f, s IRcg_dfs((struct ASTnode*)p, f, b); p = p->nxt; } - return (NULL); + return (f->null); } case A_LIT_I32: { @@ -144,14 +223,21 @@ static struct IRinstruction* IRcg_dfs(struct ASTnode *x, struct IRfunction *f, s case A_NEG: case A_BNOT: { struct ASTunnode *t = (void*)x; struct IRinstruction *value = IRcg_dfs(t->left, f, b); - return (IRinstruction_new(b, translate_ast_unary_op(x->op), value, NULL)); + + int type = IRTypecode_integer_promote(value->type); + if (type != value->type) { + value = IRinstruction_new(b, IR_SEXT, type, value, NULL); + } + + return (IRinstruction_new(b, IRopcode_from_ast_unary(x->op), type, value, NULL)); } case A_LNOT: { struct ASTunnode *t = (void*)x; + // A logical not operation is basicly equivlant to comparing the value to 0. struct IRinstruction *value = IRcg_dfs(t->left, f, b), *zero = IRinstruction_new_i32(b, 0); - return (IRinstruction_new(b, IR_CMP_EQ, value, zero)); + return (IRinstruction_new(b, IR_CMP_EQ, IRT_I1, value, zero)); } default: { @@ -160,7 +246,7 @@ static struct IRinstruction* IRcg_dfs(struct ASTnode *x, struct IRfunction *f, s } } -// Generates Quad Repersentation from an AST +// Generates IR Repersentation from an AST struct IRfunction* IRfunction_from_ast(struct Afunction *afunc) { struct IRfunction *self = try_malloc(sizeof(struct IRfunction), __FUNCTION__); @@ -170,8 +256,9 @@ struct IRfunction* IRfunction_from_ast(struct Afunction *afunc) { self->ins_count = 0; llist_init(&self->bs); - struct IRblock *entry = IRblock_new(self); - IRcg_dfs(afunc->rt, self, entry); + struct IRblock *entry = IRblock_new(self); // construct the function entry block. + self->null = IRinstruction_new_void(entry); // initialize the null object. + IRcg_dfs(afunc->rt, self, entry); // generate code by doing a DFS in our AST. return (self); } @@ -213,28 +300,41 @@ void IRfunction_free(struct IRfunction *self) { // Outputs the instruction. void IRinstruction_print(struct IRinstruction *self, FILE *Outfile) { switch(self->op) { - case IR_IMM_I32: { - fprintf(Outfile, "\t$%d = i32 %d;\n", self->id, self->val_i32); + case IR_IMM: { + fprintf(Outfile, "\t$%d = %s", self->id, IRTypecode_stringify(self->type)); + switch (self->type) { + case IRT_VOID: { + } break; + + case IRT_I1: { + fprintf(Outfile, " %s", self->val_i1 ? "true" : "false"); + } break; + + case IRT_I32: { + fprintf(Outfile, " %d", self->val_i32); + } break; + + case IRT_I64: { + fprintf(Outfile, " %lld", self->val_i64); + } break; + } + fputs(";\n", Outfile); } break; case IR_RET: { - if (self->left) { - fprintf(Outfile, "\tret $%d.\n", self->left->id); - } else { - fputs("\tret.", Outfile); - } + fprintf(Outfile, "\tret $%d.\n", self->left->id); } break; - case IR_NEG: { - fprintf(Outfile, "\t$%d = neg $%d;\n", self->id, self->left->id); - } break; - - case IR_NOT: { - fprintf(Outfile, "\t$%d = not $%d;\n", self->id, self->left->id); + case IR_SEXT: case IR_ZEXT: case IR_TRUNC: + case IR_NEG: case IR_NOT: { + fprintf(Outfile, "\t$%d = %s %s $%d;\n", self->id, + IRTypecode_stringify(self->type), IRopcode_stringify(self->op), self->left->id); } break; case IR_CMP_EQ: { - fprintf(Outfile, "\t$%d = eq $%d, $%d;\n", self->id, self->left->id, self->right->id); + fprintf(Outfile, "\t$%d = %s %s $%d $%d;\n", self->id + , IRTypecode_stringify(self->type), IRopcode_stringify(self->op), self->left->id + , self->right->id); } break; default: { @@ -246,6 +346,7 @@ void IRinstruction_print(struct IRinstruction *self, FILE *Outfile) { // Outputs the containing instructions of the IRblock. void IRblock_print(struct IRblock *self, FILE *Outfile) { fprintf(Outfile, "L%d:\n", self->id); + struct llist_node *p = self->ins.head; while (p) { IRinstruction_print((void*)p, Outfile); @@ -256,6 +357,7 @@ void IRblock_print(struct IRblock *self, FILE *Outfile) { // Outputs the containing instructions of the IRfunction. void IRfunction_print(struct IRfunction *self, FILE *Outfile) { fprintf(Outfile, "%s:\n", self->name); + struct llist_node *p = self->bs.head; while (p) { IRblock_print((void*)p, Outfile); diff --git a/src/fatals.c b/src/fatals.c index 18c9fc9..9f5c7e8 100644 --- a/src/fatals.c +++ b/src/fatals.c @@ -10,6 +10,11 @@ void fail_todo(const char *func_name) { exit(1); } +void fail_unreachable(const char *func_name) { + fprintf(stderr, "%s: Unreachable reached.", func_name); + exit(1); +} + void fail_type(int line) { fprintf(stderr, "sytax error on line %d: incorrect or incomplete type.\n", line); exit(1); diff --git a/src/target.c b/src/target.c index 65790f9..a436ba7 100644 --- a/src/target.c +++ b/src/target.c @@ -6,14 +6,22 @@ // Parse the target string int target_parse(const char *target_string) { static const char *target_map_k[] = { - "_ast", - "_acir", + "x86_64", + "x86", + "unknown16", + "unknown32", + "riscv_32", + "riscv_64", NULL }; static const int target_map_v[] = { - TARGET_AST, - TARGET_ACIR, + TARGET_X86_64, + TARGET_X86_32, + TARGET_UNKNOWN_16, + TARGET_UNKNOWN_32, + TARGET_RISCV_32, + TARGET_RISCV_64, }; for (int i = 0; target_map_k[i]; ++i) { @@ -24,3 +32,33 @@ int target_parse(const char *target_string) { fail_target(target_string); } + +struct target_info Tinfo; + +void Tinfo_load(int target) { + static struct target_info map[] = { + { // x86-64 + .int_size = 4, + .long_size = 8, + }, { // x84 + .int_size = 4, + .long_size = 4, + }, { // unknown16 + .int_size = 2, + .long_size = 2, + }, { // unknown32 + .int_size = 4, + .long_size = 4, + }, { // riscv_32 + .int_size = 4, + .long_size = 4, + }, { // riscv_64 + .int_size = 4, + .long_size = 8 + }}; + + if (target < 0 || target >= TARGET_NULL) { + fail_unreachable(__FUNCTION__); + } + Tinfo = map[target]; +} \ No newline at end of file diff --git a/src/util/critbit.c b/src/util/critbit.c index ac409f4..edba713 100644 --- a/src/util/critbit.c +++ b/src/util/critbit.c @@ -4,6 +4,7 @@ #include "util/critbit.h" #include "util/misc.h" #include +#include // critbit tree internal node struct critbit_intern { @@ -179,7 +180,7 @@ struct critbit_node *critbit_erase(struct critbit_tree *self, const char *key) { void **wherep = &self->rt; void **whereq = NULL; struct critbit_intern *p = self->rt, *q; - const size_t len = strlen(str); + const size_t len = strlen(key); while (p->type == 0) { whereq = wherep; @@ -197,10 +198,10 @@ struct critbit_node *critbit_erase(struct critbit_tree *self, const char *key) { if (!whereq) { self->rt = NULL; - return (p); + return ((void*)p); } *whereq = q->child[1 - dir]; free(q); - return (p); + return ((void*)p); }