466 lines
10 KiB
C
466 lines
10 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include "scan.h"
|
|
#include "token.h"
|
|
#include "ast.h"
|
|
#include "fatals.h"
|
|
|
|
// Parsing Context
|
|
struct Pcontext {
|
|
struct linklist tokens; // token list
|
|
struct llist_node *cur; // current token
|
|
struct VType func_type; // current function return type
|
|
};
|
|
|
|
// Checks that we have a binary operator and return its precedence.
|
|
// Operators with larger precedence value will be evaluated first.
|
|
static int op_precedence(struct token *t) {
|
|
switch (t->type) {
|
|
case T_ASSIGN:
|
|
return (20);
|
|
case T_GT: case T_GE: case T_LT: case T_LE:
|
|
return (40);
|
|
case T_EQ: case T_NE:
|
|
return (50);
|
|
case T_PLUS: case T_MINUS:
|
|
return (70);
|
|
case T_STAR: case T_SLASH:
|
|
return (90);
|
|
default:
|
|
fail_ce_expect(t->line, "an operator", token_typename[t->type]);
|
|
}
|
|
}
|
|
|
|
// Converts a binary arithmetic token into an AST operation.
|
|
static int binary_arithop(struct token *t) {
|
|
static const int map[][2] = {
|
|
{T_PLUS, A_ADD},
|
|
{T_MINUS, A_SUB},
|
|
{T_STAR, A_MUL},
|
|
{T_SLASH, A_DIV},
|
|
{T_EQ, A_EQ},
|
|
{T_NE, A_NE},
|
|
{T_LT, A_LT},
|
|
{T_LE, A_LE},
|
|
{T_GT, A_GT},
|
|
{T_GE, A_GE},
|
|
{T_ASSIGN, A_ASSIGN},
|
|
{T_LAND, A_LAND},
|
|
{T_LOR, A_LOR},
|
|
{-1}
|
|
};
|
|
|
|
for (int i = 0; map[i][0] != -1; ++i) {
|
|
if (t->type == map[i][0]) {
|
|
return map[i][1];
|
|
}
|
|
}
|
|
fail_ce_expect(t->line, "an binary operator", token_typename[t->type]);
|
|
}
|
|
|
|
// Converts a unary arithmetic token into an AST operation.
|
|
static int unary_arithop(struct token *t) {
|
|
static const int map[][2] = {
|
|
{T_MINUS, A_NEG},
|
|
{T_LNOT, A_LNOT},
|
|
{T_BNOT, A_BNOT},
|
|
{-1}
|
|
};
|
|
|
|
for (int i = 0; map[i][0] != -1; ++i) {
|
|
if (t->type == map[i][0]) {
|
|
return map[i][1];
|
|
}
|
|
}
|
|
|
|
fail_ce_expect(t->line, "an unary operator", token_typename[t->type]);
|
|
}
|
|
|
|
// Operator associativity direction
|
|
// Returns false if left to right, e.g. +
|
|
// true if right to left, e.g. =
|
|
static bool direction_rtl(int t) {
|
|
switch(t) {
|
|
case T_ASSIGN:
|
|
return (true);
|
|
default:
|
|
return (false);
|
|
}
|
|
}
|
|
|
|
// Next token
|
|
static void next(struct Pcontext *ctx) {
|
|
if (ctx->cur) {
|
|
ctx->cur = ctx->cur->nxt;
|
|
}
|
|
}
|
|
|
|
// return current token from input stream
|
|
static struct token* current(struct Pcontext *ctx) {
|
|
static struct token token_eof = {
|
|
.type = T_EOF
|
|
};
|
|
|
|
if (ctx->cur) {
|
|
return ((void*)ctx->cur);
|
|
}
|
|
return (&token_eof);
|
|
}
|
|
|
|
// match a token or report syntax error
|
|
static void match(struct Pcontext *ctx, int t) {
|
|
if (current(ctx)->type == t) {
|
|
next(ctx);
|
|
} else {
|
|
fail_ce_expect(current(ctx)->line, token_typename[t], token_typename[current(ctx)->type]);
|
|
}
|
|
}
|
|
|
|
// check current token's type or report syntax error.
|
|
static void expect(struct Pcontext *ctx, int t) {
|
|
if (current(ctx)->type != t) {
|
|
fail_ce_expect(current(ctx)->line, token_typename[t], token_typename[current(ctx)->type]);
|
|
}
|
|
}
|
|
|
|
static struct ASTnode* statement(struct Pcontext *ctx);
|
|
static struct ASTnode* expression(struct Pcontext *ctx);
|
|
|
|
// Parse a primary factor and return an
|
|
// AST node representing it.
|
|
static struct ASTnode* primary(struct Pcontext *ctx) {
|
|
struct ASTnode *res;
|
|
struct token *t = current(ctx);
|
|
|
|
if (t->type == T_LP) {
|
|
// ( expr ) considered as primary
|
|
next(ctx);
|
|
res = expression(ctx);
|
|
match(ctx, T_RP);
|
|
} else if (t->type == T_I32_LIT) {
|
|
res = ASTi32node_new(t->val_i32);
|
|
next(ctx);
|
|
} else if (t->type == T_I64_LIT) {
|
|
res = ASTi64node_new(current(ctx)->val_i64);
|
|
next(ctx);
|
|
} else if (t->type == T_ID) {
|
|
// TODO: identifier.
|
|
fail_ce(t->line, "got an identifier");
|
|
/*
|
|
int id = findglob((char*)current(ctx)->val);
|
|
if (id == -1) {
|
|
fprintf(stderr, "syntax error on line %d: unknown indentifier %s.\n", Line, (char*)current(ctx)->val);
|
|
exit(1);
|
|
}
|
|
next(ctx);
|
|
return (ASTvarnode_new(id));
|
|
*/
|
|
} else {
|
|
fail_ce(t->line, "primary expression expected");
|
|
}
|
|
return (res);
|
|
}
|
|
|
|
// Returns whether the given token type can be a prefix operator (negation, logical not, bitwise not)
|
|
static bool is_prefix_op(int op) {
|
|
switch (op) {
|
|
case T_MINUS: case T_LNOT: case T_BNOT:
|
|
return (true);
|
|
|
|
default:
|
|
return (false);
|
|
}
|
|
}
|
|
|
|
// Parses a primary expression with prefixes, e.g. ~10
|
|
static struct ASTnode* prefixed_primary(struct Pcontext *ctx) {
|
|
struct token *t = current(ctx);
|
|
|
|
if (is_prefix_op(t->type)) {
|
|
next(ctx);
|
|
struct ASTnode *child = prefixed_primary(ctx);
|
|
return (ASTunnode_new(unary_arithop(t), child, t->line));
|
|
}
|
|
|
|
return (primary(ctx));
|
|
}
|
|
|
|
// Returns whether the given token type can be a binary operator.
|
|
static bool is_binop(int t) {
|
|
switch (t) {
|
|
case T_ASSIGN:
|
|
case T_PLUS: case T_MINUS: case T_STAR: case T_SLASH:
|
|
case T_LAND: case T_LOR:
|
|
case T_EQ: case T_NE: case T_LT:
|
|
case T_GT: case T_LE: case T_GE:
|
|
return (true);
|
|
|
|
default:
|
|
return (false);
|
|
}
|
|
}
|
|
|
|
// Return an AST tree whose root is a binary operator
|
|
static struct ASTnode* binexpr(struct Pcontext *ctx, int precedence) {
|
|
struct ASTnode *left, *right;
|
|
|
|
left = prefixed_primary(ctx);
|
|
struct token *op = current(ctx);
|
|
if (!is_binop(op->type)) {
|
|
return (left);
|
|
}
|
|
|
|
int tp = op_precedence(op);
|
|
while (tp > precedence) {
|
|
next(ctx);
|
|
|
|
if (direction_rtl(op->type)) {
|
|
right = binexpr(ctx, precedence);
|
|
left = ASTassignnode_new(binary_arithop(op), left, right);
|
|
} else {
|
|
right = binexpr(ctx, tp);
|
|
left = ASTbinnode_new(binary_arithop(op), left, right); // join right into left
|
|
}
|
|
|
|
op = current(ctx);
|
|
if (!is_binop(op->type)) {
|
|
return (left);
|
|
}
|
|
tp = op_precedence(op);
|
|
}
|
|
return (left);
|
|
}
|
|
|
|
// parse one block of code, e.g. { a; b; }
|
|
static struct ASTnode* block(struct Pcontext *ctx) {
|
|
match(ctx, T_LB);
|
|
if (current(ctx)->type == T_RB) {
|
|
next(ctx);
|
|
return (NULL);
|
|
}
|
|
|
|
struct ASTblocknode* res = (struct ASTblocknode*)ASTblocknode_new();
|
|
while (current(ctx)->type != T_RB) {
|
|
struct ASTnode *x;
|
|
x = statement(ctx);
|
|
llist_pushback_notnull(&res->st, x);
|
|
|
|
if (current(ctx)->type == T_EOF) {
|
|
break;
|
|
}
|
|
}
|
|
match(ctx, T_RB);
|
|
return ((struct ASTnode*)res);
|
|
}
|
|
|
|
// parse an expression
|
|
static struct ASTnode* expression(struct Pcontext *ctx) {
|
|
if (current(ctx)->type == T_SEMI) {
|
|
return (NULL);
|
|
}
|
|
|
|
return (binexpr(ctx, 0));
|
|
}
|
|
|
|
/*
|
|
// parse variable declaration statement
|
|
static struct ASTnode* var_declaration(void) {
|
|
match(ctx, T_INT);
|
|
expect(ctx, T_IDENT);
|
|
if (findglob((char*)current(ctx)->val) != -1) {
|
|
fail_ce("variable declared twice.");
|
|
}
|
|
addglob((char*)current(ctx)->val);
|
|
next(ctx);
|
|
match(ctx, T_SEMI);
|
|
return (NULL);
|
|
}
|
|
*/
|
|
|
|
// parse an if statement
|
|
static struct ASTnode* if_statement(struct Pcontext *ctx) {
|
|
match(ctx, T_IF); // if
|
|
match(ctx, T_LP); // (
|
|
struct ASTnode* cond = expression(ctx);
|
|
match(ctx, T_RP); // )
|
|
struct ASTnode* then = statement(ctx);
|
|
struct ASTnode* else_then;
|
|
if (current(ctx)->type == T_ELSE) {
|
|
next(ctx); // else
|
|
else_then = statement(ctx);
|
|
} else {
|
|
else_then = NULL; // empty block
|
|
}
|
|
return (ASTifnode_new(then, else_then, cond));
|
|
}
|
|
|
|
// parse an while statement
|
|
static struct ASTnode* while_statement(struct Pcontext *ctx) {
|
|
match(ctx, T_WHILE);
|
|
match(ctx, T_LP);
|
|
struct ASTnode* cond = expression(ctx);
|
|
match(ctx, T_RP);
|
|
struct ASTnode* body = statement(ctx);
|
|
return (ASTbinnode_new(A_WHILE, cond, body));
|
|
}
|
|
|
|
// parse a for statement (into a while loop)
|
|
static struct ASTnode* for_statement(struct Pcontext *ctx) {
|
|
match(ctx, T_FOR);
|
|
match(ctx, T_LP);
|
|
struct ASTnode *init = statement(ctx);
|
|
|
|
struct ASTnode *cond;
|
|
if (current(ctx)->type != T_SEMI) {
|
|
cond = expression(ctx);
|
|
} else {
|
|
cond = ASTi32node_new(1);
|
|
}
|
|
match(ctx, T_SEMI);
|
|
|
|
struct ASTnode *inc;
|
|
if (current(ctx)->type != T_RP) {
|
|
inc = expression(ctx);
|
|
} else {
|
|
inc = NULL;
|
|
}
|
|
|
|
match(ctx, T_RP);
|
|
struct ASTnode *body = statement(ctx);
|
|
struct ASTblocknode *container = (void*)ASTblocknode_new();
|
|
struct ASTnode *wbody;
|
|
|
|
if (body == NULL && inc == NULL) {
|
|
wbody = NULL;
|
|
} else if (body == NULL) {
|
|
wbody = inc;
|
|
} else if (inc == NULL) {
|
|
wbody = body;
|
|
} else {
|
|
struct ASTblocknode* wt = (void*)ASTblocknode_new();
|
|
llist_pushback_notnull(&wt->st, body);
|
|
llist_pushback_notnull(&wt->st, inc);
|
|
wbody = (void*)wt;
|
|
}
|
|
|
|
llist_pushback_notnull(&container->st, init);
|
|
llist_pushback(&container->st, ASTbinnode_new(A_WHILE, cond, wbody));
|
|
return ((void*)container);
|
|
}
|
|
|
|
static struct ASTnode* return_statement(struct Pcontext *ctx) {
|
|
match(ctx, T_RETURN);
|
|
struct ASTnode *res = expression(ctx);
|
|
match(ctx, T_SEMI);
|
|
return (ASTunnode_new(A_RETURN, res, current(ctx)->line));
|
|
}
|
|
|
|
// parse one statement
|
|
static struct ASTnode* statement(struct Pcontext *ctx) {
|
|
switch (current(ctx)->type) {
|
|
case T_LB:
|
|
return (block(ctx));
|
|
|
|
case T_SEMI:
|
|
return (NULL);
|
|
|
|
// case T_INT:
|
|
// return (var_declaration());
|
|
case T_IF:
|
|
return (if_statement(ctx));
|
|
|
|
case T_WHILE:
|
|
return (while_statement(ctx));
|
|
|
|
case T_FOR:
|
|
return (for_statement(ctx));
|
|
|
|
case T_RETURN:
|
|
return (return_statement(ctx));
|
|
|
|
default: {
|
|
struct ASTnode* res = expression(ctx);
|
|
match(ctx, T_SEMI);
|
|
return (res);
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool parse_type(struct VType *self, struct Pcontext *ctx, bool ce) {
|
|
struct token *t = current(ctx);
|
|
switch (t->type) {
|
|
case T_INT: {
|
|
self->bt = VT_I32;
|
|
next(ctx);
|
|
} break;
|
|
|
|
case T_VOID: {
|
|
self->bt = VT_VOID;
|
|
next(ctx);
|
|
} break;
|
|
|
|
case T_LONG: {
|
|
self->bt = VT_I64;
|
|
next(ctx);
|
|
} break;
|
|
|
|
default: {
|
|
if (ce) {
|
|
fail_ce_expect(t->line, "a typename or type classifier", token_typename[t->type]);
|
|
} else {
|
|
return (false);
|
|
}
|
|
}
|
|
}
|
|
|
|
return (true);
|
|
}
|
|
|
|
// Parse one top-level function
|
|
// Sets the func_name param.
|
|
static struct Afunction* function(struct Pcontext *ctx) {
|
|
struct Afunction *res = Afunction_new();
|
|
|
|
parse_type(&ctx->func_type, ctx, true);
|
|
expect(ctx, T_ID);
|
|
res->name = current(ctx)->val_s; // transfer ownership of the identifier string to caller
|
|
current(ctx)->val_s = NULL; // prevent it from being freed in token_free() called by next(ctx).
|
|
next(ctx);
|
|
|
|
match(ctx, T_LP);
|
|
if (current(ctx)->type == T_VOID) {
|
|
next(ctx);
|
|
goto END_PARAM_LIST;
|
|
}
|
|
// TODO: parameter list
|
|
|
|
END_PARAM_LIST:
|
|
match(ctx, T_RP);
|
|
res->rt = block(ctx);
|
|
return (res);
|
|
}
|
|
|
|
// Frees a Pcontext and all its components.
|
|
static void Pcontext_free(struct Pcontext *ctx) {
|
|
struct llist_node *p = ctx->tokens.head, *nxt;
|
|
while (p) {
|
|
nxt = p->nxt;
|
|
token_free((void*)p);
|
|
p = nxt;
|
|
}
|
|
}
|
|
|
|
// Parse source into AST.
|
|
struct Afunction* Afunction_from_source(const char *filename) {
|
|
struct Pcontext ctx = {
|
|
.tokens = scan_tokens(filename),
|
|
};
|
|
ctx.cur = ctx.tokens.head;
|
|
|
|
struct Afunction* res = function(&ctx);
|
|
Pcontext_free(&ctx);
|
|
return (res);
|
|
}
|