629 lines
13 KiB
C

/* Assembler for Y86-64 instruction set */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "yas.h"
#include "isa.h"
void add_symbol(char *, int);
int find_symbol(char *);
int instr_size(char *);
int gui_mode = 0;
FILE *outfile;
int verbose = 0;
/* Generate initialized memory for Verilog? */
int vcode = 0;
/* Should it generate code for banked memory? */
int block_factor = 0;
int lineno = 1; /* Line number of input file */
int bytepos = 0; /* Address of current instruction being processed */
int error_mode = 0; /* Am I trying to finish off a line with an error? */
int hit_error = 0; /* Have I hit any errors? */
int pass = 1; /* Am I in pass 1 or 2? */
/* General strategy is to read tokens for a complete line and then
process them.
*/
#define TOK_PER_LINE 12
/* Token types */
typedef enum{ TOK_IDENT, TOK_NUM, TOK_REG, TOK_INSTR, TOK_PUNCT, TOK_ERR }
token_t;
/* Token representation */
typedef struct {
char *sval; /* String */
word_t ival; /* Integer */
char cval; /* Character */
token_t type; /* Type */
} token_rec, *token_ptr;
/* Information about current input line */
token_rec tokens[TOK_PER_LINE];
int lineno; /* What line number am I processing? */
int bytepos; /* What byte address is the current instruction */
int tcount; /* How many tokens are there in this line? */
int tpos; /* What token am I currently processing */
/* Storage for strings in current line */
#define STRMAX 4096
char strbuf[STRMAX];
int strpos;
/* Storage of current line */
char input_line[STRMAX];
void save_line(char *s)
{
int len = strlen(s);
int i;
if (len >= STRMAX)
fail("Input Line too long");
strcpy(input_line, s);
for (i = len-1; input_line[i] == '\n' || input_line[i] == '\r'; i--)
input_line[i] = '\0'; /* Remove terminator */
}
/* Information about current instruction being generated */
char code[10]; /* Byte encoding */
int codepos = 0; /* Current position in byte encoding */
int bcount = 0; /* Length of current instruction */
/* Debugging information */
char token_type_names[] = {'I', 'N', 'R', 'X', 'P'};
void print_token(FILE *out, token_ptr t)
{
fprintf(out, " [%c ", token_type_names[t->type]);
switch(t->type) {
case TOK_IDENT:
case TOK_REG:
case TOK_INSTR:
fprintf(out, "%s]", t->sval);
break;
case TOK_NUM:
fprintf(out, "%lld]", t->ival);
break;
case TOK_PUNCT:
fprintf(out, "%c]", t->cval);
break;
case TOK_ERR:
fprintf(out, "ERR]");
break;
default:
fprintf(out, "?]");
fail("Unknown token type");
}
}
/* For debugging */
void print_instruction(FILE *out)
{
int i;
fprintf(out, "Line %d, Byte %d: ", lineno, bytepos);
for (i = 0; i < tcount; i++)
print_token(out, &tokens[i]);
fprintf(out, " Code: ");
for (i = 0; i < bcount; i++)
fprintf(out, "%.2x ", code[i] & 0xFF);
fprintf(out, "\n");
}
/* Write len least significant hex digits of value at dest.
Don't null terminate */
static void hexstuff(char *dest, word_t value, int len)
{
int i;
for (i = 0; i < len; i++) {
char c;
int h = (value >> 4*i) & 0xF;
c = h < 10 ? h + '0' : h - 10 + 'a';
dest[len-i-1] = c;
}
}
void print_code(FILE *out, int pos)
{
char outstring[33];
if (pos > 0xFFF) {
/* Printing format:
0xHHHH: cccccccccccccccccccc | <line>
where HHHH is address
cccccccccccccccccccc is code
*/
if (tcount) {
int i;
if (pos > 0xFFFF) {
fail("Code address limit exceeded");
exit(1);
}
strcpy(outstring, "0x0000: | ");
hexstuff(outstring+2, pos, 4);
for (i = 0; i < bcount; i++)
hexstuff(outstring+7+2*i, code[i]&0xFF, 2);
}
else
strcpy(outstring, " | ");
} else {
/* Printing format:
0xHHH: cccccccccccccccccccc | <line>
where HHH is address
cccccccccccccccccccc is code
*/
if (tcount) {
int i;
if (pos > 0xFFF) {
fail("Code address limit exceeded");
exit(1);
}
strcpy(outstring, "0x000: | ");
hexstuff(outstring+2, pos, 3);
for (i = 0; i < bcount; i++)
hexstuff(outstring+7+2*i, code[i]&0xFF, 2);
}
else
strcpy(outstring, " | ");
}
if (vcode) {
fprintf(out, "//%s%s\n", outstring, input_line);
if (tcount) {
int i;
for (i = 0; tcount && i < bcount; i++) {
if (block_factor) {
fprintf(out, " bank%d[%d] = 8\'h%.2x;\n", (pos+i)%block_factor, (pos+i)/block_factor, code[i] & 0xFF);
} else {
fprintf(out, " mem[%d] = 8\'h%.2x;\n", pos+i, code[i] & 0xFF);
}
}
}
} else {
fprintf(out, "%s%s\n", outstring, input_line);
}
}
void fail(char *message)
{
if (!error_mode) {
fprintf(stderr, "Error on line %d: %s\n", lineno, message);
fprintf(stderr, "Line %d, Byte 0x%.4x: %s\n",
lineno, bytepos, input_line);
}
error_mode = 1;
hit_error = 1;
}
/* Parse Register from set of tokens and put into high or low
4 bits of code[codepos] */
void get_reg(int codepos, int hi)
{
int rval = REG_NONE;
char c;
if (tokens[tpos].type != TOK_REG) {
fail("Expecting Register ID");
return;
} else {
rval = find_register(tokens[tpos].sval);
}
/* Insert into output */
c = code[codepos];
if (hi)
c = (c & 0x0F) | (rval << 4);
else
c = (c & 0xF0) | rval;
code[codepos] = c;
tpos++;
}
/* Get numeric value of given number of bytes */
/* Offset indicates value to subtract from number (for PC relative) */
void get_num(int codepos, int bytes, int offset)
{
word_t val = 0;
int i;
if (tokens[tpos].type == TOK_NUM) {
val = tokens[tpos].ival;
} else if (tokens[tpos].type == TOK_IDENT) {
val = find_symbol(tokens[tpos].sval);
} else {
fail("Number Expected");
return;
}
val -= offset;
for (i = 0; i < bytes; i++)
code[codepos+i] = (val >> (i * 8)) & 0xFF;
tpos++;
}
/* Get memory reference.
Can be of form:
Num(Reg)
(Reg)
Num
Ident
Ident(Reg)
Put Reg in low position of current byte, and Number in following bytes
*/
void get_mem(int codepos)
{
char rval = REG_NONE;
word_t val = 0;
int i;
char c;
token_t type = tokens[tpos].type;
/* Deal with optional displacement */
if (type == TOK_NUM) {
val = tokens[tpos++].ival;
type = tokens[tpos].type;
} else if (type == TOK_IDENT) {
val = find_symbol(tokens[tpos++].sval);
type = tokens[tpos].type;
}
/* Check for optional register */
if (type == TOK_PUNCT) {
if (tokens[tpos].cval == '(') {
tpos++;
if (tokens[tpos].type == TOK_REG)
rval = find_register(tokens[tpos++].sval);
else {
fail("Expecting Register Id");
return;
}
if (tokens[tpos].type != TOK_PUNCT ||
tokens[tpos++].cval != ')') {
fail("Expecting ')'");
return;
}
}
}
c = (code[codepos] & 0xF0) | (rval & 0xF);
code[codepos++] = c;
for (i = 0; i < 8; i++)
code[codepos+i] = (val >> (i*8)) & 0xFF;
}
void start_line()
{
int t;
error_mode = 0;
tpos = 0;
tcount = 0;
bcount = 0;
strpos = 0;
for (t = 0; t < TOK_PER_LINE; t++)
tokens[t].type = TOK_ERR;
}
void finish_line()
{
int size;
instr_ptr instr;
int savebytepos = bytepos;
tpos = 0;
codepos = 0;
if (tcount == 0) {
if (pass > 1)
print_code(outfile, savebytepos);
start_line();
return; /* Empty line */
}
/* Completion of an erroneous line */
if (error_mode) {
start_line();
return;
}
/* See if this is a labeled line */
if (tokens[0].type == TOK_IDENT) {
if (tokens[1].type != TOK_PUNCT ||
tokens[1].cval != ':') {
fail("Missing Colon");
start_line();
return;
} else {
if (pass == 1)
add_symbol(tokens[0].sval, bytepos);
tpos+=2;
if (tcount == 2) {
/* That's all for this line */
if (pass > 1)
print_code(outfile, savebytepos);
start_line();
return;
}
}
}
/* Get instruction */
if (tokens[tpos].type != TOK_INSTR) {
fail("Bad Instruction");
start_line();
return;
}
/* Process .pos */
if (strcmp(tokens[tpos].sval, ".pos") == 0) {
if (tokens[++tpos].type != TOK_NUM) {
fail("Invalid Address");
start_line();
return;
}
bytepos = tokens[tpos].ival;
if (pass > 1) {
print_code(outfile, bytepos);
}
start_line();
return;
}
/* Process .align */
if (strcmp(tokens[tpos].sval, ".align") == 0) {
int a;
if (tokens[++tpos].type != TOK_NUM || (a=tokens[tpos].ival) <= 0) {
fail("Invalid Alignment");
start_line();
return;
}
bytepos = ((bytepos+a-1)/a)*a;
if (pass > 1) {
print_code(outfile, bytepos);
}
start_line();
return;
}
/* Get instruction size */
instr = find_instr(tokens[tpos++].sval);
if (instr == NULL) {
fail("Invalid Instruction");
instr = bad_instr();
}
size = instr->bytes;
bytepos += size;
bcount = size;
/* If this is pass 1, then we're done */
if (pass == 1) {
start_line();
return;
}
/* Here's where we really process the instructions */
code[0] = instr->code;
code[1] = HPACK(REG_NONE, REG_NONE);
switch(instr->arg1) {
case R_ARG:
get_reg(instr->arg1pos, instr->arg1hi);
break;
case M_ARG:
get_mem(instr->arg1pos);
break;
case I_ARG:
get_num(instr->arg1pos, instr->arg1hi, 0);
break;
case NO_ARG:
default:
break;
}
if (instr->arg2 != NO_ARG) {
/* Get comma */
if (tokens[tpos].type != TOK_PUNCT ||
tokens[tpos].cval != ',') {
fail("Expecting Comma");
start_line();
return;
}
tpos++;
/* Get second argument */
switch(instr->arg2) {
case R_ARG:
get_reg(instr->arg2pos, instr->arg2hi);
break;
case M_ARG:
get_mem(instr->arg2pos);
break;
case I_ARG:
get_num(instr->arg2pos, instr->arg2hi, 0);
break;
case NO_ARG:
default:
break;
}
}
print_code(outfile, savebytepos);
start_line();
}
void add_token(token_t type, char *s, word_t i, char c)
{
char *t = NULL;
if (!tcount)
start_line();
if (tpos >= TOK_PER_LINE-1) {
fail("Line too long");
return;
}
if (s) {
int len = strlen(s)+1;
if (strpos + len > STRMAX) {
fail("Line too long");
return;
}
t = strcpy(strbuf+strpos, s);
strpos+= len;
}
tokens[tcount].type = type;
tokens[tcount].sval = t;
tokens[tcount].ival = i;
tokens[tcount].cval = c;
tcount++;
}
void add_ident(char *s)
{
add_token(TOK_IDENT, s, 0, ' ');
}
void add_instr(char *s)
{
add_token(TOK_INSTR, s, 0, ' ');
}
void add_reg(char *s)
{
add_token(TOK_REG, s, 0, ' ');
}
void add_num(long long i)
{
add_token(TOK_NUM, NULL, i, ' ');
}
void add_punct(char c)
{
add_token(TOK_PUNCT, NULL, 0, c);
}
#define STAB 1000
#define INIT_CNT 0
int symbol_cnt = INIT_CNT;
struct {
char *name;
int pos;
} symbol_table[STAB];
void add_symbol(char *name, int p)
{
char *t = (char *) malloc(strlen(name)+1);
strcpy(t, name);
symbol_table[symbol_cnt].name = t;
symbol_table[symbol_cnt].pos = p;
symbol_cnt++;
}
int find_symbol(char *name)
{
int i;
for (i = 0; i < symbol_cnt; i++)
if (strcmp(name, symbol_table[i].name) == 0)
return symbol_table[i].pos;
fail("Can't find label");
return -1;
}
int yywrap()
{
int i;
if (tcount > 0) {
fail("Missing end-of-line on final line\n");
}
if (verbose && pass > 1) {
printf("Symbol Table:\n");
for (i = INIT_CNT; i < symbol_cnt; i++)
printf(" %s\t0x%x\n", symbol_table[i].name, symbol_table[i].pos);
}
return 1;
}
extern FILE *yyin;
int yylex();
static void usage(char *pname)
{
printf("Usage: %s [-V[n]] file.ys\n", pname);
printf(" -V[n] Generate memory initialization in Verilog format (n-way blocking)\n");
exit(0);
}
int main(int argc, char *argv[])
{
int rootlen;
char infname[512];
char outfname[512];
int nextarg = 1;
if (argc < 2)
usage(argv[0]);
if (argv[nextarg][0] == '-') {
char flag = argv[nextarg][1];
switch (flag) {
case 'V':
vcode = 1;
if (argv[nextarg][2]) {
block_factor = atoi(argv[nextarg]+2);
if (block_factor != 8) {
fprintf(stderr, "Unknown blocking factor %d\n", block_factor);
exit(1);
}
}
nextarg++;
break;
default:
usage(argv[0]);
}
}
rootlen = strlen(argv[nextarg])-3;
if (strcmp(argv[nextarg]+rootlen, ".ys"))
usage(argv[0]);
if (rootlen > 500) {
fprintf(stderr, "File name too long\n");
exit(1);
}
strncpy(infname, argv[nextarg], rootlen);
strcpy(infname+rootlen, ".ys");
yyin = fopen(infname, "r");
if (!yyin) {
fprintf(stderr, "Can't open input file '%s'\n", infname);
exit(1);
}
if (vcode) {
outfile = stdout;
} else {
strncpy(outfname, argv[nextarg], rootlen);
strcpy(outfname+rootlen, ".yo");
outfile = fopen(outfname, "w");
if (!outfile) {
fprintf(stderr, "Can't open output file '%s'\n", outfname);
exit(1);
}
}
pass = 1;
yylex();
fclose(yyin);
if (hit_error)
exit(1);
pass = 2;
lineno = 1;
error_mode = 0;
bytepos = 0;
yyin = fopen(infname, "r");
if (!yyin) {
fprintf(stderr, "Can't open input file '%s'\n", infname);
exit(1);
}
yylex();
fclose(yyin);
fclose(outfile);
return hit_error;
}
unsigned long long atollh(const char *p) {
return strtoull(p, (char **) NULL, 16);
}