acc/src/parse.c
szdytom e1211ebf61 add value type in ast
Signed-off-by: szdytom <szdytom@163.com>
2023-06-15 14:53:14 +08:00

466 lines
10 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include "scan.h"
#include "token.h"
#include "ast.h"
#include "fatals.h"
// Parsing Context
struct Pcontext {
struct linklist tokens; // token list
struct llist_node *cur; // current token
struct VType func_type; // current function return type
};
// Checks that we have a binary operator and return its precedence.
// Operators with larger precedence value will be evaluated first.
static int op_precedence(struct token *t) {
switch (t->type) {
case T_ASSIGN:
return (20);
case T_GT: case T_GE: case T_LT: case T_LE:
return (40);
case T_EQ: case T_NE:
return (50);
case T_PLUS: case T_MINUS:
return (70);
case T_STAR: case T_SLASH:
return (90);
default:
fail_ce_expect(t->line, "an operator", token_typename[t->type]);
}
}
// Converts a binary arithmetic token into an AST operation.
static int binary_arithop(struct token *t) {
static const int map[][2] = {
{T_PLUS, A_ADD},
{T_MINUS, A_SUB},
{T_STAR, A_MUL},
{T_SLASH, A_DIV},
{T_EQ, A_EQ},
{T_NE, A_NE},
{T_LT, A_LT},
{T_LE, A_LE},
{T_GT, A_GT},
{T_GE, A_GE},
{T_ASSIGN, A_ASSIGN},
{T_LAND, A_LAND},
{T_LOR, A_LOR},
{-1}
};
for (int i = 0; map[i][0] != -1; ++i) {
if (t->type == map[i][0]) {
return map[i][1];
}
}
fail_ce_expect(t->line, "an binary operator", token_typename[t->type]);
}
// Converts a unary arithmetic token into an AST operation.
static int unary_arithop(struct token *t) {
static const int map[][2] = {
{T_MINUS, A_NEG},
{T_LNOT, A_LNOT},
{T_BNOT, A_BNOT},
{-1}
};
for (int i = 0; map[i][0] != -1; ++i) {
if (t->type == map[i][0]) {
return map[i][1];
}
}
fail_ce_expect(t->line, "an unary operator", token_typename[t->type]);
}
// Operator associativity direction
// Returns false if left to right, e.g. +
// true if right to left, e.g. =
static bool direction_rtl(int t) {
switch(t) {
case T_ASSIGN:
return (true);
default:
return (false);
}
}
// Next token
static void next(struct Pcontext *ctx) {
if (ctx->cur) {
ctx->cur = ctx->cur->nxt;
}
}
// return current token from input stream
static struct token* current(struct Pcontext *ctx) {
static struct token token_eof = {
.type = T_EOF
};
if (ctx->cur) {
return ((void*)ctx->cur);
}
return (&token_eof);
}
// match a token or report syntax error
static void match(struct Pcontext *ctx, int t) {
if (current(ctx)->type == t) {
next(ctx);
} else {
fail_ce_expect(current(ctx)->line, token_typename[t], token_typename[current(ctx)->type]);
}
}
// check current token's type or report syntax error.
static void expect(struct Pcontext *ctx, int t) {
if (current(ctx)->type != t) {
fail_ce_expect(current(ctx)->line, token_typename[t], token_typename[current(ctx)->type]);
}
}
static struct ASTnode* statement(struct Pcontext *ctx);
static struct ASTnode* expression(struct Pcontext *ctx);
// Parse a primary factor and return an
// AST node representing it.
static struct ASTnode* primary(struct Pcontext *ctx) {
struct ASTnode *res;
struct token *t = current(ctx);
if (t->type == T_LP) {
// ( expr ) considered as primary
next(ctx);
res = expression(ctx);
match(ctx, T_RP);
} else if (t->type == T_I32_LIT) {
res = ASTi32node_new(t->val_i32);
next(ctx);
} else if (t->type == T_I64_LIT) {
res = ASTi64node_new(current(ctx)->val_i64);
next(ctx);
} else if (t->type == T_ID) {
// TODO: identifier.
fail_ce(t->line, "got an identifier");
/*
int id = findglob((char*)current(ctx)->val);
if (id == -1) {
fprintf(stderr, "syntax error on line %d: unknown indentifier %s.\n", Line, (char*)current(ctx)->val);
exit(1);
}
next(ctx);
return (ASTvarnode_new(id));
*/
} else {
fail_ce(t->line, "primary expression expected");
}
return (res);
}
// Returns whether the given token type can be a prefix operator (negation, logical not, bitwise not)
static bool is_prefix_op(int op) {
switch (op) {
case T_MINUS: case T_LNOT: case T_BNOT:
return (true);
default:
return (false);
}
}
// Parses a primary expression with prefixes, e.g. ~10
static struct ASTnode* prefixed_primary(struct Pcontext *ctx) {
struct token *t = current(ctx);
if (is_prefix_op(t->type)) {
next(ctx);
struct ASTnode *child = prefixed_primary(ctx);
return (ASTunnode_new(unary_arithop(t), child, t->line));
}
return (primary(ctx));
}
// Returns whether the given token type can be a binary operator.
static bool is_binop(int t) {
switch (t) {
case T_ASSIGN:
case T_PLUS: case T_MINUS: case T_STAR: case T_SLASH:
case T_LAND: case T_LOR:
case T_EQ: case T_NE: case T_LT:
case T_GT: case T_LE: case T_GE:
return (true);
default:
return (false);
}
}
// Return an AST tree whose root is a binary operator
static struct ASTnode* binexpr(struct Pcontext *ctx, int precedence) {
struct ASTnode *left, *right;
left = prefixed_primary(ctx);
struct token *op = current(ctx);
if (!is_binop(op->type)) {
return (left);
}
int tp = op_precedence(op);
while (tp > precedence) {
next(ctx);
if (direction_rtl(op->type)) {
right = binexpr(ctx, precedence);
left = ASTassignnode_new(binary_arithop(op), left, right);
} else {
right = binexpr(ctx, tp);
left = ASTbinnode_new(binary_arithop(op), left, right); // join right into left
}
op = current(ctx);
if (!is_binop(op->type)) {
return (left);
}
tp = op_precedence(op);
}
return (left);
}
// parse one block of code, e.g. { a; b; }
static struct ASTnode* block(struct Pcontext *ctx) {
match(ctx, T_LB);
if (current(ctx)->type == T_RB) {
next(ctx);
return (NULL);
}
struct ASTblocknode* res = (struct ASTblocknode*)ASTblocknode_new();
while (current(ctx)->type != T_RB) {
struct ASTnode *x;
x = statement(ctx);
llist_pushback_notnull(&res->st, x);
if (current(ctx)->type == T_EOF) {
break;
}
}
match(ctx, T_RB);
return ((struct ASTnode*)res);
}
// parse an expression
static struct ASTnode* expression(struct Pcontext *ctx) {
if (current(ctx)->type == T_SEMI) {
return (NULL);
}
return (binexpr(ctx, 0));
}
/*
// parse variable declaration statement
static struct ASTnode* var_declaration(void) {
match(ctx, T_INT);
expect(ctx, T_IDENT);
if (findglob((char*)current(ctx)->val) != -1) {
fail_ce("variable declared twice.");
}
addglob((char*)current(ctx)->val);
next(ctx);
match(ctx, T_SEMI);
return (NULL);
}
*/
// parse an if statement
static struct ASTnode* if_statement(struct Pcontext *ctx) {
match(ctx, T_IF); // if
match(ctx, T_LP); // (
struct ASTnode* cond = expression(ctx);
match(ctx, T_RP); // )
struct ASTnode* then = statement(ctx);
struct ASTnode* else_then;
if (current(ctx)->type == T_ELSE) {
next(ctx); // else
else_then = statement(ctx);
} else {
else_then = NULL; // empty block
}
return (ASTifnode_new(then, else_then, cond));
}
// parse an while statement
static struct ASTnode* while_statement(struct Pcontext *ctx) {
match(ctx, T_WHILE);
match(ctx, T_LP);
struct ASTnode* cond = expression(ctx);
match(ctx, T_RP);
struct ASTnode* body = statement(ctx);
return (ASTbinnode_new(A_WHILE, cond, body));
}
// parse a for statement (into a while loop)
static struct ASTnode* for_statement(struct Pcontext *ctx) {
match(ctx, T_FOR);
match(ctx, T_LP);
struct ASTnode *init = statement(ctx);
struct ASTnode *cond;
if (current(ctx)->type != T_SEMI) {
cond = expression(ctx);
} else {
cond = ASTi32node_new(1);
}
match(ctx, T_SEMI);
struct ASTnode *inc;
if (current(ctx)->type != T_RP) {
inc = expression(ctx);
} else {
inc = NULL;
}
match(ctx, T_RP);
struct ASTnode *body = statement(ctx);
struct ASTblocknode *container = (void*)ASTblocknode_new();
struct ASTnode *wbody;
if (body == NULL && inc == NULL) {
wbody = NULL;
} else if (body == NULL) {
wbody = inc;
} else if (inc == NULL) {
wbody = body;
} else {
struct ASTblocknode* wt = (void*)ASTblocknode_new();
llist_pushback_notnull(&wt->st, body);
llist_pushback_notnull(&wt->st, inc);
wbody = (void*)wt;
}
llist_pushback_notnull(&container->st, init);
llist_pushback(&container->st, ASTbinnode_new(A_WHILE, cond, wbody));
return ((void*)container);
}
static struct ASTnode* return_statement(struct Pcontext *ctx) {
match(ctx, T_RETURN);
struct ASTnode *res = expression(ctx);
match(ctx, T_SEMI);
return (ASTunnode_new(A_RETURN, res, current(ctx)->line));
}
// parse one statement
static struct ASTnode* statement(struct Pcontext *ctx) {
switch (current(ctx)->type) {
case T_LB:
return (block(ctx));
case T_SEMI:
return (NULL);
// case T_INT:
// return (var_declaration());
case T_IF:
return (if_statement(ctx));
case T_WHILE:
return (while_statement(ctx));
case T_FOR:
return (for_statement(ctx));
case T_RETURN:
return (return_statement(ctx));
default: {
struct ASTnode* res = expression(ctx);
match(ctx, T_SEMI);
return (res);
}
}
}
static bool parse_type(struct VType *self, struct Pcontext *ctx, bool ce) {
struct token *t = current(ctx);
switch (t->type) {
case T_INT: {
self->bt = VT_I32;
next(ctx);
} break;
case T_VOID: {
self->bt = VT_VOID;
next(ctx);
} break;
case T_LONG: {
self->bt = VT_I64;
next(ctx);
} break;
default: {
if (ce) {
fail_ce_expect(t->line, "a typename or type classifier", token_typename[t->type]);
} else {
return (false);
}
}
}
return (true);
}
// Parse one top-level function
// Sets the func_name param.
static struct Afunction* function(struct Pcontext *ctx) {
struct Afunction *res = Afunction_new();
parse_type(&ctx->func_type, ctx, true);
expect(ctx, T_ID);
res->name = current(ctx)->val_s; // transfer ownership of the identifier string to caller
current(ctx)->val_s = NULL; // prevent it from being freed in token_free() called by next(ctx).
next(ctx);
match(ctx, T_LP);
if (current(ctx)->type == T_VOID) {
next(ctx);
goto END_PARAM_LIST;
}
// TODO: parameter list
END_PARAM_LIST:
match(ctx, T_RP);
res->rt = block(ctx);
return (res);
}
// Frees a Pcontext and all its components.
static void Pcontext_free(struct Pcontext *ctx) {
struct llist_node *p = ctx->tokens.head, *nxt;
while (p) {
nxt = p->nxt;
token_free((void*)p);
p = nxt;
}
}
// Parse source into AST.
struct Afunction* Afunction_from_source(const char *filename) {
struct Pcontext ctx = {
.tokens = scan_tokens(filename),
};
ctx.cur = ctx.tokens.head;
struct Afunction* res = function(&ctx);
Pcontext_free(&ctx);
return (res);
}