RVBTCC/boot.c
2024-12-24 16:16:03 +08:00

2017 lines
51 KiB
C

/*
* RVBTCC By Yaossg
* A lightweight bootstrapping compiler in 2000 lines.
*
* It aims to demonstrate how to write a bootstrapping compiler in no time.
* Syntax is similar to C, output is RISC-V assembly.
* Only dependent on some glibc functions for I/O.
* Purely for educational purposes. Do not use in production.
*/
// glibc dependency
extern void* stdin;
extern void* stdout;
extern void* stderr;
int printf(char* format, ...);
int getchar();
void exit(int status);
int fprintf(void* file, char* format, ...);
int ungetc(int ch, void* file);
void ungetchar(int ch) {
ungetc(ch, stdin);
}
// limitations
enum {
STRING_TABLE_SIZE = 65536,
STRING_LUT_SIZE = 4096,
ID_TABLE_SIZE = 65536,
ID_LUT_SIZE = 4096,
LOCAL_SIZE = 4096,
REG_SIZE = 4096,
};
// constants
enum {
TOKEN_EOF = -1,
TOKEN_ADD,
TOKEN_SUB,
TOKEN_MUL,
TOKEN_DIV,
TOKEN_REM,
TOKEN_AND,
TOKEN_OR,
TOKEN_XOR,
TOKEN_LSHIFT,
TOKEN_RSHIFT,
TOKEN_ASSIGN,
TOKEN_ADD_ASSIGN,
TOKEN_SUB_ASSIGN,
TOKEN_MUL_ASSIGN,
TOKEN_DIV_ASSIGN,
TOKEN_REM_ASSIGN,
TOKEN_AND_ASSIGN,
TOKEN_OR_ASSIGN,
TOKEN_XOR_ASSIGN,
TOKEN_LSHIFT_ASSIGN,
TOKEN_RSHIFT_ASSIGN,
TOKEN_INV,
TOKEN_NOT,
TOKEN_LAND,
TOKEN_LOR,
TOKEN_INC,
TOKEN_DEC,
TOKEN_QUESTION,
TOKEN_COLON,
TOKEN_SEMICOLON,
TOKEN_COMMA,
TOKEN_ELLIPSIS,
TOKEN_EQ,
TOKEN_NE,
TOKEN_LT,
TOKEN_GT,
TOKEN_LE,
TOKEN_GE,
TOKEN_PAREN_LEFT = 50,
TOKEN_PAREN_RIGHT,
TOKEN_BRACKET_LEFT,
TOKEN_BRACKET_RIGHT,
TOKEN_BRACE_LEFT,
TOKEN_BRACE_RIGHT,
TOKEN_STRING = 99,
TOKEN_NUMBER,
TOKEN_ID,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_WHILE,
TOKEN_FOR,
TOKEN_DO,
TOKEN_BREAK,
TOKEN_CONTINUE,
TOKEN_RETURN,
TOKEN_ENUM,
TOKEN_EXTERN,
TOKEN_VOID = 128,
TOKEN_INT,
TOKEN_CHAR,
};
enum {
TYPE_VOID,
TYPE_INT,
TYPE_CHAR,
TYPE_VOID_PTR = 16,
TYPE_INT_PTR,
TYPE_CHAR_PTR,
TYPE_PTR_MASK = TYPE_VOID_PTR,
TYPE_TOKEN_MASK = TOKEN_VOID,
};
enum {
KIND_TEMP,
KIND_SCALAR,
KIND_ARRAY,
KIND_FUNCTION,
};
enum {
REG_ZERO,
REG_RA,
REG_SP,
REG_GP,
REG_TP,
REG_T0,
REG_T1,
REG_T2,
REG_FP,
REG_S1,
REG_A0,
REG_A1,
REG_A2,
REG_A3,
REG_A4,
REG_A5,
REG_A6,
REG_A7,
REG_S2,
REG_S3,
REG_S4,
REG_S5,
REG_S6,
REG_S7,
REG_S8,
REG_S9,
REG_S10,
REG_S11,
REG_T3,
REG_T4,
REG_T5,
REG_T6,
};
char* reg_name(int reg) {
// special begin
if (reg == REG_ZERO) return "zero";
if (reg == REG_RA) return "ra";
if (reg == REG_SP) return "sp";
if (reg == REG_GP) return "gp";
if (reg == REG_TP) return "tp";
if (reg == REG_T0) return "t0";
if (reg == REG_T1) return "t1";
if (reg == REG_T2) return "t2";
if (reg == REG_FP) return "fp";
if (reg == REG_S1) return "s1";
if (reg == REG_A0) return "a0";
if (reg == REG_A1) return "a1";
if (reg == REG_A2) return "a2";
if (reg == REG_A3) return "a3";
if (reg == REG_A4) return "a4";
if (reg == REG_A5) return "a5";
if (reg == REG_A6) return "a6";
if (reg == REG_A7) return "a7";
// allocation begin
if (reg == REG_S2) return "s2";
if (reg == REG_S3) return "s3";
if (reg == REG_S4) return "s4";
if (reg == REG_S5) return "s5";
if (reg == REG_S6) return "s6";
if (reg == REG_S7) return "s7";
if (reg == REG_S8) return "s8";
if (reg == REG_S9) return "s9";
if (reg == REG_S10) return "s10";
if (reg == REG_S11) return "s11";
if (reg == REG_T3) return "t3";
if (reg == REG_T4) return "t4";
if (reg == REG_T5) return "t5";
if (reg == REG_T6) return "t6";
// overflow begin
return 0;
}
// lexer
int streq(char* s1, char* s2) {
while (*s1 && *s2 && *s1 == *s2) {
s1++;
s2++;
}
return *s1 == *s2;
}
int is_digit(int ch) {
return '0' <= ch && ch <= '9';
}
int is_id_start(int ch) {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_';
}
int is_id_cont(int ch) {
return is_id_start(ch) || is_digit(ch);
}
int parse_int(int ch) {
int num = ch - '0';
while (is_digit(ch = getchar())) {
num *= 10;
num += ch - '0';
}
ungetchar(ch);
return num;
}
int get_escaped_char() {
int ch = getchar();
if (ch == 'n') {
ch = '\n';
} else if (ch == 't') {
ch = '\t';
} else if (ch == 'r') {
ch = '\r';
} else if (ch == '0') {
ch = '\0';
} else if (ch == '\\') {
ch = '\\';
} else if (ch == '\'') {
ch = '\'';
} else if (ch == '\"') {
ch = '\"';
} else {
fprintf(stderr, "unexpected escaped character: '\\%c'\n", ch);
exit(1);
}
return ch;
}
int token_state;
int token_type;
int token_data;
char string_table[STRING_TABLE_SIZE];
int string_offset;
int string_lut[STRING_LUT_SIZE];
int string_lut_size;
int parse_string() {
int offset = string_offset;
int ch;
while ((ch = getchar()) != '"') {
if (ch == -1 || ch == '\n') {
fprintf(stderr, "expecting '\"'\n");
exit(1);
}
if (ch == '\\') {
ch = get_escaped_char();
}
string_table[string_offset++] = ch;
}
string_table[string_offset++] = 0;
string_lut[string_lut_size] = offset;
return string_lut_size++;
}
void rewind_string(int new_data) {
string_offset = string_lut[token_data];
token_data = new_data;
--string_lut_size;
}
void dedup_string() {
int last_string = string_lut_size - 1;
char* latest = string_table + string_lut[last_string];
for (int i = 0; i < last_string; i++) {
char* candidate = string_table + string_lut[i];
if (streq(candidate, latest)) {
rewind_string(i);
return;
}
}
}
char id_table[ID_TABLE_SIZE];
int id_offset;
int id_lut[ID_LUT_SIZE];
int id_lut_size;
int parse_id(int ch) {
int offset = id_offset;
id_table[id_offset++] = ch;
while (is_id_cont(ch = getchar())) {
id_table[id_offset++] = ch;
}
ungetchar(ch);
id_table[id_offset++] = 0;
id_lut[id_lut_size] = offset;
return id_lut_size++;
}
void rewind_id(int new_data) {
id_offset = id_lut[token_data];
token_data = new_data;
--id_lut_size;
}
void dedup_id() {
int last_id = id_lut_size - 1;
char* latest = id_table + id_lut[last_id];
for (int i = 0; i < last_id; i++) {
char* candidate = id_table + id_lut[i];
if (streq(candidate, latest)) {
rewind_id(i);
return;
}
}
}
void parse_id_like(int ch) {
token_type = TOKEN_ID;
token_data = parse_id(ch);
char* id = id_table + id_lut[token_data];
if (streq(id, "int")) {
token_type = TOKEN_INT;
} else if (streq(id, "if")) {
token_type = TOKEN_IF;
} else if (streq(id, "else")) {
token_type = TOKEN_ELSE;
} else if (streq(id, "while")) {
token_type = TOKEN_WHILE;
} else if (streq(id, "break")) {
token_type = TOKEN_BREAK;
} else if (streq(id, "continue")) {
token_type = TOKEN_CONTINUE;
} else if (streq(id, "return")) {
token_type = TOKEN_RETURN;
} else if (streq(id, "void")) {
token_type = TOKEN_VOID;
} else if (streq(id, "char")) {
token_type = TOKEN_CHAR;
} else if (streq(id, "for")) {
token_type = TOKEN_FOR;
} else if (streq(id, "do")) {
token_type = TOKEN_DO;
} else if (streq(id, "extern")) {
token_type = TOKEN_EXTERN;
} else if (streq(id, "enum")) {
token_type = TOKEN_ENUM;
}
if (token_type != TOKEN_ID) {
rewind_id(0);
} else {
dedup_id();
}
}
void unget_token() {
token_state = 1;
}
void next_token() {
if (token_state) {
token_state = 0;
return;
}
int ch = getchar();
while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
ch = getchar();
}
if (ch == -1) {
token_type = TOKEN_EOF;
} else if (ch == '(') {
token_type = TOKEN_PAREN_LEFT;
} else if (ch == ')') {
token_type = TOKEN_PAREN_RIGHT;
} else if (ch == '[') {
token_type = TOKEN_BRACKET_LEFT;
} else if (ch == ']') {
token_type = TOKEN_BRACKET_RIGHT;
} else if (ch == '{') {
token_type = TOKEN_BRACE_LEFT;
} else if (ch == '}') {
token_type = TOKEN_BRACE_RIGHT;
} else if (ch == '+') {
int ch2 = getchar();
if (ch2 == '+') {
token_type = TOKEN_INC;
} else if (ch2 == '=') {
token_type = TOKEN_ADD_ASSIGN;
} else {
ungetchar(ch2);
token_type = TOKEN_ADD;
}
} else if (ch == '-') {
int ch2 = getchar();
if (ch2 == '-') {
token_type = TOKEN_DEC;
} else if (ch2 == '=') {
token_type = TOKEN_SUB_ASSIGN;
} else {
ungetchar(ch2);
token_type = TOKEN_SUB;
}
} else if (ch == '*') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_MUL_ASSIGN;
} else {
ungetchar(ch2);
token_type = TOKEN_MUL;
}
} else if (ch == '/') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_DIV_ASSIGN;
} else if (ch2 == '/') {
do ch = getchar(); while (ch != -1 && ch != '\n');
next_token();
return;
} else if (ch2 == '*') {
while (1) {
ch = getchar();
if (ch == -1) {
fprintf(stderr, "expecting '*/'\n");
exit(1);
}
if (ch == '*') {
ch = getchar();
if (ch == '/') {
break;
}
}
}
next_token();
return;
} else {
ungetchar(ch2);
token_type = TOKEN_DIV;
}
} else if (ch == '%') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_REM_ASSIGN;
} else {
ungetchar(ch2);
token_type = TOKEN_REM;
}
} else if (ch == ';') {
token_type = TOKEN_SEMICOLON;
} else if (ch == '?') {
token_type = TOKEN_QUESTION;
} else if (ch == ':') {
token_type = TOKEN_COLON;
} else if (ch == ',') {
token_type = TOKEN_COMMA;
} else if (ch == '<') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_LE;
} else if (ch2 == '<') {
int ch3 = getchar();
if (ch3 == '=') {
token_type = TOKEN_LSHIFT_ASSIGN;
} else {
ungetchar(ch3);
token_type = TOKEN_LSHIFT;
}
} else {
ungetchar(ch2);
token_type = TOKEN_LT;
}
} else if (ch == '>') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_GE;
} else if (ch2 == '>') {
int ch3 = getchar();
if (ch3 == '=') {
token_type = TOKEN_RSHIFT_ASSIGN;
} else {
ungetchar(ch3);
token_type = TOKEN_RSHIFT;
}
} else {
ungetchar(ch2);
token_type = TOKEN_GT;
}
} else if (ch == '=') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_EQ;
} else {
ungetchar(ch2);
token_type = TOKEN_ASSIGN;
}
} else if (ch == '!') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_NE;
} else {
ungetchar(ch2);
token_type = TOKEN_NOT;
}
} else if (ch == '&') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_AND_ASSIGN;
} else if (ch2 == '&') {
token_type = TOKEN_LAND;
} else {
ungetchar(ch2);
token_type = TOKEN_AND;
}
} else if (ch == '|') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_OR_ASSIGN;
} else if (ch2 == '|') {
token_type = TOKEN_LOR;
} else {
ungetchar(ch2);
token_type = TOKEN_OR;
}
} else if (ch == '^') {
int ch2 = getchar();
if (ch2 == '=') {
token_type = TOKEN_XOR_ASSIGN;
} else {
ungetchar(ch2);
token_type = TOKEN_XOR;
}
} else if (ch == '~') {
token_type = TOKEN_INV;
} else if (ch == '\'') {
token_type = TOKEN_NUMBER;
token_data = getchar();
if (token_data == '\\') {
token_data = get_escaped_char();
}
if (getchar() != '\'') {
fprintf(stderr, "expecting '\n");
exit(1);
}
} else if (ch == '"') {
token_type = TOKEN_STRING;
token_data = parse_string();
dedup_string();
} else if (ch == '.') {
token_type = 0;
if (getchar() == '.') {
if (getchar() == '.') {
token_type = TOKEN_ELLIPSIS;
}
}
if (token_type != TOKEN_ELLIPSIS) {
fprintf(stderr, "expecting '...'\n");
exit(1);
}
} else if (is_digit(ch)) {
token_type = TOKEN_NUMBER;
token_data = parse_int(ch);
} else if (is_id_start(ch)) {
parse_id_like(ch);
} else {
fprintf(stderr, "unexpected character: %c(%d)\n", ch, ch);
exit(1);
}
// debug info
if (0) {
fprintf(stderr, "token: %d\n", token_type);
if (token_type == TOKEN_ID) {
char* name = id_table + id_lut[token_data];
fprintf(stderr, " id: %s\n", name);
} else if (token_type == TOKEN_NUMBER) {
fprintf(stderr, " number: %d\n", token_data);
}
}
}
void expect_token(int expected_type) {
next_token();
if (token_type != expected_type) {
fprintf(stderr, "unexpected token: %d, should be %d\n", token_type, expected_type);
exit(1);
}
}
int parse_type() {
if (token_type == TOKEN_INT || token_type == TOKEN_CHAR || token_type == TOKEN_VOID) {
int type = token_type & ~TYPE_TOKEN_MASK;
next_token();
if (token_type == TOKEN_MUL) {
next_token();
type |= TYPE_PTR_MASK;
}
unget_token();
return type;
}
return -1;
}
// assembly context
// use id as index
int local_table[ID_LUT_SIZE]; // id -> local id
// use local id as index
int next_local_id = 1;
int max_local_id = 1;
int local_kind[LOCAL_SIZE];
int local_type[LOCAL_SIZE];
// use id as index
int global_kind[ID_LUT_SIZE];
int global_type[ID_LUT_SIZE];
// use reg id as index
int next_reg_id = REG_S2;
int max_reg_id = REG_S2;
int reg_type[REG_SIZE];
char indirection[REG_SIZE];
int overflow[REG_SIZE]; // reg -> local id
// use id as index
int const_table[ID_LUT_SIZE]; // id -> value
char is_const[ID_LUT_SIZE];
int expect_const() {
next_token();
int value = 1;
if (token_type == TOKEN_ADD) {
next_token();
} else if (token_type == TOKEN_SUB) {
value = -1;
next_token();
}
if (token_type == TOKEN_NUMBER) {
value *= token_data;
} else if (token_type == TOKEN_ID && !local_table[token_data] && is_const[token_data]) {
value *= const_table[token_data];
} else {
fprintf(stderr, "expecting a constant\n");
exit(1);
}
return value;
}
int expect_array_size() {
int size = expect_const();
if (size <= 0) {
fprintf(stderr, "array size must be positive\n");
exit(1);
}
return size;
}
void reset_reg() {
next_reg_id = REG_S2;
for (int i = 0; i < REG_SIZE; ++i) {
reg_type[i] = TYPE_VOID;
indirection[i] = 0;
overflow[i] = 0;
}
reg_type[REG_ZERO] = TYPE_INT;
}
void reset_local_table() {
for (int i = 0; i < ID_LUT_SIZE; ++i) {
local_table[i] = 0;
}
}
void reset_local() {
next_local_id = 1;
max_local_id = 1;
max_reg_id = REG_S2;
for (int i = 0; i < LOCAL_SIZE; ++i) {
local_kind[i] = KIND_TEMP;
local_type[i] = TYPE_VOID;
}
reset_reg();
}
void reset_temp() {
while (next_local_id > 1 && local_kind[next_local_id - 1] == KIND_TEMP) {
--next_local_id;
}
reset_reg();
}
int next_local_slot(int type) {
int slot = next_local_id++;
local_type[slot] = type;
if (next_local_id > max_local_id) {
max_local_id = next_local_id;
}
return slot;
}
int declare_local(int id, int type) {
if (local_table[id] != 0) return local_table[id];
int slot = next_local_slot(type);
local_kind[slot] = KIND_SCALAR;
return local_table[id] = slot;
}
int array_size_of(int type, int size) {
return type == TYPE_CHAR ? size : 4 * size;
}
int declare_local_array(int id, int type, int size) {
if (local_table[id] != 0) return local_table[id];
int slot = next_local_slot(type);
int array_size = array_size_of(type, size);
int slot_size = (array_size + 7) / 8;
local_kind[slot] = KIND_ARRAY;
for (int i = 1; i < slot_size; ++i) local_kind[next_local_slot(type)] = KIND_ARRAY;
return local_table[id] = slot;
}
void declare_global(int id, int kind, int type) {
global_kind[id] = kind;
global_type[id] = type;
}
int is_overflow(int reg) {
return reg > REG_T6;
}
int next_reg(int type) {
int reg = next_reg_id++;
if (is_overflow(reg)) {
int slot = next_local_slot(type);
local_kind[slot] = KIND_TEMP;
overflow[reg] = slot;
}
reg_type[reg] = type;
if (next_reg_id > max_reg_id) {
max_reg_id = next_reg_id;
}
return reg;
}
// prolog & epilog helpers
int check_itype_immediate(int value) {
return value >= -2048 && value <= 2047;
}
void asm_ld(char* rd, int imm, char* rs) {
if (check_itype_immediate(imm)) {
printf(" ld %s, %d(%s)\n", rd, imm, rs);
} else {
printf(" li t0, %d\n", imm);
printf(" add t0, %s, t0\n", rs);
printf(" ld %s, 0(t0)\n", rd);
}
}
void asm_sd(char* rs1, int imm, char* rs2) {
if (check_itype_immediate(imm)) {
printf(" sd %s, %d(%s)\n", rs1, imm, rs2);
} else {
printf(" li t0, %d\n", imm);
printf(" add t0, %s, t0\n", rs2);
printf(" sd %s, 0(t0)\n", rs1);
}
}
void asm_addi(char* rd, char* rs, int imm) {
if (check_itype_immediate(imm)) {
printf(" addi %s, %s, %d\n", rd, rs, imm);
} else {
printf(" li t0, %d\n", imm);
printf(" add %s, %s, t0\n", rd, rs);
}
}
// assembly helpers
char* load_op_of_type(int type) {
if (type & TYPE_PTR_MASK) {
return "ld";
} else if (type == TYPE_CHAR) {
return "lb";
} else { // int
return "lw";
}
}
char* store_op_of_type(int type) {
if (type & TYPE_PTR_MASK) {
return "sd";
} else if (type == TYPE_CHAR) {
return "sb";
} else { // int
return "sw";
}
}
// address loaders
// rd must be one of t0, t1, t2
void load_local_address(int rd, int slot_id) {
int offset = slot_id * 8 - 8;
char* rd_name = reg_name(rd);
if (check_itype_immediate(offset)) {
printf(" addi %s, sp, %d\n", rd_name, offset);
} else {
printf(" li %s, %d\n", rd_name, offset);
printf(" add %s, sp, %s\n", rd_name, rd_name);
}
}
// load a non-trivial register into trivial one
void load(int rd, int rs) {
char* op = load_op_of_type(reg_type[rs]);
char* rd_name = reg_name(rd);
if (is_overflow(rs)) {
load_local_address(rd, overflow[rs]);
if (indirection[rs]) {
printf(" ld %s, 0(%s)\n", rd_name, rd_name);
}
rs = rd;
}
printf(" %s %s, 0(%s) # load non-trivial register\n", op, rd_name, reg_name(rs));
}
// store a trivial register into a non-trivial one
void store(char* rs, int reg) {
char* op = store_op_of_type(reg_type[reg]);
if (is_overflow(reg)) {
load_local_address(REG_T2, overflow[reg]);
if (indirection[reg]) {
printf(" ld t2, 0(t2)\n");
}
reg = REG_T2;
}
printf(" %s %s, 0(%s) # store non-trivial register\n", op, rs, reg_name(reg));
}
int is_nontrivial(int reg) {
return is_overflow(reg) || indirection[reg];
}
char* trivialize(int rs, int t) {
if (is_nontrivial(rs)) {
load(t, rs);
return reg_name(t);
}
return reg_name(rs);
}
void _asm_r(char* op, int rd, int rs1) {
char* rd_name = reg_name(rd);
if (is_nontrivial(rd)) rd_name = "t0";
char* rs1_name = trivialize(rs1, REG_T0);
printf(" %s %s, %s\n", op, rd_name, rs1_name);
if (is_nontrivial(rd)) {
store("t0", rd);
}
}
void asm_mv(int rd, int rs1) {
char* rs1_name = trivialize(rs1, REG_T0);
if (is_nontrivial(rd)) {
store(rs1_name, rd);
} else {
char* rd_name = reg_name(rd);
if (!streq(rd_name, rs1_name))
printf(" mv %s, %s\n", rd_name, rs1_name);
}
}
void _asm_rr(char* op, int rd, int rs1, int rs2) {
char* rd_name = reg_name(rd);
char* rs1_name = trivialize(rs1, REG_T0);
char* rs2_name = trivialize(rs2, REG_T1);
if (is_nontrivial(rd)) rd_name = "t0";
printf(" %s %s, %s, %s\n", op, rd_name, rs1_name, rs2_name);
if (is_nontrivial(rd)) {
store("t0", rd);
}
}
void _asm_ri(char* op, int rd, int rs1, int imm) {
char* rd_name = reg_name(rd);
if (is_nontrivial(rd)) rd_name = "t0";
char* rs1_name = trivialize(rs1, REG_T0);
printf(" %s %s, %s, %d\n", op, rd_name, rs1_name, imm);
if (is_nontrivial(rd)) {
store("t0", rd);
}
}
void asm_branch(char* op, int rs1, int label) {
char* rs1_name = trivialize(rs1, REG_T0);
printf(" %s %s, L%d\n", op, rs1_name, label);
}
void _asm_i(char* op, int rd, char* prefix1, char* prefix2, int imm) {
char* rd_name = reg_name(rd);
if (is_nontrivial(rd)) rd_name = "t0";
printf(" %s %s, %s%s%d\n", op, rd_name, prefix1, prefix2, imm);
if (is_nontrivial(rd)) {
store("t0", rd);
}
}
int is_not_reusable(int rs1, int expected_type) {
return indirection[rs1] || reg_type[rs1] != expected_type || rs1 == REG_ZERO;
}
int asm_r(int type, char* op, int rs1) {
int rd = rs1;
if (is_not_reusable(rs1, type)) rd = next_reg(type);
_asm_r(op, rd, rs1);
return rd;
}
int asm_rr(int type, char* op, int rs1, int rs2) {
int rd = rs1;
if (is_not_reusable(rs1, type)) rd = rs2;
if (is_not_reusable(rs2, type)) rd = next_reg(type);
_asm_rr(op, rd, rs1, rs2);
return rd;
}
void store_into_local(int rs1, int slot) {
char* rs1_name = trivialize(rs1, REG_T0);
load_local_address(REG_T2, slot);
printf(" %s %s, 0(t2)\n", store_op_of_type(local_type[slot]), rs1_name);
}
int materialize_address(int rd, int type, int kind) {
if (kind == KIND_ARRAY) {
type |= TYPE_PTR_MASK;
}
reg_type[rd] = type;
indirection[rd] = kind == KIND_SCALAR;
return rd;
}
int lookup_from_slot(int slot) {
int rd = next_reg(TYPE_VOID_PTR);
if (is_nontrivial(rd)) {
load_local_address(REG_T0, slot);
asm_mv(rd, REG_T0);
} else {
load_local_address(rd, slot);
}
return materialize_address(rd, local_type[slot], local_kind[slot]);
}
int load_imm(int imm) {
if (imm == 0) return REG_ZERO;
int reg = next_reg(TYPE_INT);
_asm_i("li", reg, "", "", imm);
return reg;
}
int lookup(int id) {
if (local_table[id]) {
return lookup_from_slot(local_table[id]);
}
if (is_const[id]) {
return load_imm(const_table[id]);
}
char* name = id_table + id_lut[id];
if (global_kind[id]) {
if (global_kind[id] == KIND_FUNCTION) {
fprintf(stderr, "function name must not appear outside function call: %s\n", name);
exit(1);
}
int rd = next_reg(TYPE_VOID_PTR);
_asm_i("la", rd, name, " # id: ", id);
return materialize_address(rd, global_type[id], global_kind[id]);
}
fprintf(stderr, "unresolved identifier: %s\n", name);
exit(1);
}
int asm_r_arith(char* op, int rs1) {
if (reg_type[rs1] & TYPE_PTR_MASK) {
fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op);
exit(1);
}
return asm_r(TYPE_INT, op, rs1);
}
int asm_rr_arith(char* op, int rs1, int rs2) {
if (reg_type[rs1] & TYPE_PTR_MASK || reg_type[rs2] & TYPE_PTR_MASK) {
fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op);
exit(1);
}
return asm_rr(TYPE_INT, op, rs1, rs2);
}
int asm_rr_cmp(char* op, int rs1, int rs2) {
// since NULL is virtually 0, it is considered a valid example of a pointer comparing with an integer
return asm_rr(TYPE_INT, op, rs1, rs2);
}
void asm_beqz(int rs1, int label) {
asm_branch("beqz", rs1, label);
}
void asm_bnez(int rs1, int label) {
asm_branch("bnez", rs1, label);
}
void asm_j(int label) {
printf(" j L%d\n", label);
}
int next_label_id = 0;
int next_label() {
return next_label_id++;
}
int asm_label(int label) {
printf("L%d:\n", label);
return label;
}
int break_label_stack[4096];
int cont_label_stack[4096];
int break_label_stack_size;
int cont_label_stack_size;
void asm_break() {
if (break_label_stack_size == 0) {
fprintf(stderr, "break without loop\n");
exit(1);
}
asm_j(break_label_stack[break_label_stack_size - 1]);
}
void asm_continue() {
if (cont_label_stack_size == 0) {
fprintf(stderr, "continue without loop\n");
exit(1);
}
asm_j(cont_label_stack[cont_label_stack_size - 1]);
}
void asm_push_label(int break_label, int cont_label) {
break_label_stack[break_label_stack_size++] = break_label;
cont_label_stack[cont_label_stack_size++] = cont_label;
}
void asm_pop_label() {
--break_label_stack_size;
--cont_label_stack_size;
}
int epilog_label;
void asm_return() {
asm_j(epilog_label);
}
int log_step_of(int type) {
return type == TYPE_INT_PTR ? 2 : 0;
}
int step_of(int type) {
return 1 << log_step_of(type);
}
int asm_add(int lhs, int rhs) {
int type1 = reg_type[lhs] & TYPE_PTR_MASK;
int type2 = reg_type[rhs] & TYPE_PTR_MASK;
if (type1 != type2) {
int ptr;
int idx;
if (type1) {
ptr = lhs;
idx = rhs;
} else {
ptr = rhs;
idx = lhs;
}
int ptr_type = reg_type[ptr];
int offset = next_reg(TYPE_INT);
_asm_ri("slli", offset, idx, log_step_of(ptr_type));
return asm_rr(ptr_type, "add", ptr, offset);
}
if (type1 && type2) {
fprintf(stderr, "operands of addition cannot be both pointers\n");
exit(1);
}
return asm_rr(TYPE_INT, "add", lhs, rhs);
}
int asm_sub(int lhs, int rhs) {
int lhs_type = reg_type[lhs];
int rhs_type = reg_type[rhs];
int type1 = lhs_type & TYPE_PTR_MASK;
int type2 = rhs_type & TYPE_PTR_MASK;
if (type1 && type2) {
if (lhs_type != rhs_type) {
fprintf(stderr, "pointer type mismatch\n");
exit(1);
}
int diff = asm_rr(TYPE_INT, "sub", lhs, rhs);
_asm_ri("srai", diff, diff, log_step_of(lhs_type));
return diff;
}
if (type1) {
int neg = asm_r_arith("neg", rhs);
return asm_add(lhs, neg);
}
return asm_rr_arith("sub", lhs, rhs);
}
int dereference(int reg) {
if (indirection[reg]) {
load(reg, reg);
} else {
indirection[reg] = 1;
}
reg_type[reg] = reg_type[reg] & ~TYPE_PTR_MASK;
return reg;
}
int addressof(int reg) {
if (indirection[reg] && !(reg_type[reg] & TYPE_PTR_MASK)) {
reg_type[reg] = reg_type[reg] | TYPE_PTR_MASK;
indirection[reg] = 0;
} else {
printf("cannot take address of this expression");
exit(1);
}
return reg;
}
// parser
int parse_expr();
int parse_assign_expr();
int parse_function_call(int id) {
char* name = id_table + id_lut[id];
if (global_kind[id] != KIND_FUNCTION) {
fprintf(stderr, "not a function name: %s\n", name);
exit(1);
}
int arg = 0;
int args[8];
while (1) {
next_token();
if (token_type == TOKEN_PAREN_RIGHT) {
break;
}
unget_token();
if (arg >= 8) {
fprintf(stderr, "too many arguments\n");
exit(1);
}
args[arg++] = parse_assign_expr();
next_token();
if (token_type == TOKEN_COMMA) {
// continue;
} else if (token_type == TOKEN_PAREN_RIGHT) {
break;
} else {
fprintf(stderr, "expecting ',' or ')'\n");
exit(1);
}
}
for (int i = 0; i < arg; ++i) {
asm_mv(i + REG_A0, args[i]);
}
for (int i = REG_T3; i <= REG_T6; ++i) {
if (i < max_reg_id) {
asm_sd(reg_name(i), (REG_S2 - i) * 8 - 24, "fp");
}
}
printf(" call %s\n", name);
for (int i = REG_T3; i <= REG_T6; ++i) {
if (i < max_reg_id) {
asm_ld(reg_name(i), (REG_S2 - i) * 8 - 24, "fp");
}
}
int type = global_type[id];
if (type != TYPE_VOID) {
int rd = next_reg(type);
asm_mv(rd, REG_A0);
return rd;
}
return REG_ZERO;
}
int parse_primary_expr() {
next_token();
if (token_type == TOKEN_NUMBER) {
return load_imm(token_data);
} else if (token_type == TOKEN_ID) {
next_token();
if (token_type == TOKEN_PAREN_LEFT) {
return parse_function_call(token_data);
}
unget_token();
return lookup(token_data);
} else if (token_type == TOKEN_STRING) {
int reg = next_reg(TYPE_CHAR_PTR);
_asm_i("la", reg, ".LC", "", token_data);
return reg;
} else if (token_type == TOKEN_PAREN_LEFT) {
int reg = parse_expr();
expect_token(TOKEN_PAREN_RIGHT);
return reg;
} else {
fprintf(stderr, "unexpected token in primary expression: %d\n", token_type);
exit(1);
}
}
int parse_postfix_expr() {
int lhs = parse_primary_expr();
while (1) {
next_token();
if (token_type == TOKEN_INC) {
int type = reg_type[lhs];
int reg = next_reg(type);
asm_mv(reg, lhs);
_asm_ri("addi", lhs, lhs, step_of(type));
lhs = reg;
} else if (token_type == TOKEN_DEC) {
int type = reg_type[lhs];
int reg = next_reg(type);
asm_mv(reg, lhs);
_asm_ri("addi", lhs, lhs, -step_of(type));
lhs = reg;
} else if (token_type == TOKEN_BRACKET_LEFT) {
int rhs = parse_expr();
expect_token(TOKEN_BRACKET_RIGHT);
lhs = dereference(asm_add(lhs, rhs));
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_prefix_expr() {
next_token();
if (token_type == TOKEN_AND) {
int reg = parse_prefix_expr();
int type = reg_type[reg];
if (type & TYPE_PTR_MASK) {
fprintf(stderr, "cannot take address of a pointer\n");
exit(1);
}
return addressof(reg);
} else if (token_type == TOKEN_MUL) {
int reg = parse_prefix_expr();
int type = reg_type[reg];
if (!(type & TYPE_PTR_MASK)) {
fprintf(stderr, "cannot dereference a non-pointer\n");
exit(1);
}
if (type == TYPE_VOID_PTR) {
fprintf(stderr, "cannot dereference void pointer\n");
exit(1);
}
return dereference(reg);
} else if (token_type == TOKEN_ADD) {
return parse_prefix_expr();
} else if (token_type == TOKEN_SUB) {
int reg = parse_prefix_expr();
return asm_r_arith("neg", reg);
} else if (token_type == TOKEN_INV) {
int reg = parse_prefix_expr();
return asm_r_arith("not", reg);
} else if (token_type == TOKEN_NOT) {
int reg = parse_prefix_expr();
return asm_r(TYPE_INT, "seqz", reg);
} else if (token_type == TOKEN_INC) {
int reg = parse_prefix_expr();
_asm_ri("addi", reg, reg, step_of(reg_type[reg]));
return reg;
} else if (token_type == TOKEN_DEC) {
int reg = parse_prefix_expr();
_asm_ri("addi", reg, reg, -step_of(reg_type[reg]));
return reg;
} else {
unget_token();
return parse_postfix_expr();
}
}
int parse_mul_expr() {
int lhs = parse_prefix_expr();
while (1) {
next_token();
if (token_type == TOKEN_MUL) {
int rhs = parse_prefix_expr();
lhs = asm_rr_arith("mul", lhs, rhs);
} else if (token_type == TOKEN_DIV) {
int rhs = parse_prefix_expr();
lhs = asm_rr_arith("div", lhs, rhs);
} else if (token_type == TOKEN_REM) {
int rhs = parse_prefix_expr();
lhs = asm_rr_arith("rem", lhs, rhs);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_add_expr() {
int lhs = parse_mul_expr();
while (1) {
next_token();
if (token_type == TOKEN_ADD) {
int rhs = parse_mul_expr();
lhs = asm_add(lhs, rhs);
} else if (token_type == TOKEN_SUB) {
int rhs = parse_mul_expr();
lhs = asm_sub(lhs, rhs);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_shift_expr() {
int lhs = parse_add_expr();
while (1) {
next_token();
if (token_type == TOKEN_LSHIFT) {
int rhs = parse_add_expr();
lhs = asm_rr_arith("sll", lhs, rhs);
} else if (token_type == TOKEN_RSHIFT) {
int rhs = parse_add_expr();
lhs = asm_rr_arith("sra", lhs, rhs);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_cmp_expr() {
int lhs = parse_shift_expr();
while (1) {
next_token();
if (token_type == TOKEN_LT) {
int rhs = parse_shift_expr();
lhs = asm_rr_cmp("slt", lhs, rhs);
} else if (token_type == TOKEN_GT) {
int rhs = parse_shift_expr();
lhs = asm_rr_cmp("sgt", lhs, rhs);
} else if (token_type == TOKEN_LE) {
int rhs = parse_shift_expr();
int sgt = asm_rr_cmp("sgt", lhs, rhs);
lhs = asm_r(TYPE_INT, "seqz", sgt);
} else if (token_type == TOKEN_GE) {
int rhs = parse_shift_expr();
int slt = asm_rr_cmp("slt", lhs, rhs);
lhs = asm_r(TYPE_INT, "seqz", slt);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_eq_expr() {
int lhs = parse_cmp_expr();
while (1) {
next_token();
if (token_type == TOKEN_EQ) {
int rhs = parse_cmp_expr();
int xor = asm_rr_cmp("xor", lhs, rhs);
lhs = asm_r(TYPE_INT, "seqz", xor);
} else if (token_type == TOKEN_NE) {
int rhs = parse_cmp_expr();
int xor = asm_rr_cmp("xor", lhs, rhs);
lhs = asm_r(TYPE_INT, "snez", xor);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_bitwise_and_expr() {
int lhs = parse_eq_expr();
while (1) {
next_token();
if (token_type == TOKEN_AND) {
int rhs = parse_eq_expr();
lhs = asm_rr_arith("and", lhs, rhs);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_bitwise_xor_expr() {
int lhs = parse_bitwise_and_expr();
while (1) {
next_token();
if (token_type == TOKEN_XOR) {
int rhs = parse_bitwise_and_expr();
lhs = asm_rr_arith("xor", lhs, rhs);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_bitwise_or_expr() {
int lhs = parse_bitwise_xor_expr();
while (1) {
next_token();
if (token_type == TOKEN_OR) {
int rhs = parse_bitwise_xor_expr();
lhs = asm_rr_arith("or", lhs, rhs);
} else {
unget_token();
break;
}
}
return lhs;
}
int parse_logical_and_expr() {
int lhs = parse_bitwise_or_expr();
int logical = 0;
int label;
int result;
while (1) {
next_token();
if (token_type == TOKEN_LAND) {
if (!logical) {
logical = 1;
label = next_label();
result = next_reg(TYPE_INT);
_asm_r("snez", result, lhs);
}
asm_beqz(result, label);
int rhs = parse_bitwise_or_expr();
_asm_r("snez", result, rhs);
} else {
unget_token();
break;
}
}
if (logical) {
asm_label(label);
return result;
}
return lhs;
}
int parse_logical_or_expr() {
int lhs = parse_logical_and_expr();
int logical = 0;
int label;
int result;
while (1) {
next_token();
if (token_type == TOKEN_LOR) {
if (!logical) {
logical = 1;
label = next_label();
result = next_reg(TYPE_INT);
_asm_r("snez", result, lhs);
}
asm_bnez(result, label);
int rhs = parse_logical_and_expr();
_asm_r("snez", result, rhs);
} else {
unget_token();
break;
}
}
if (logical) {
asm_label(label);
return result;
}
return lhs;
}
int parse_conditional_expr() {
int cond = parse_logical_or_expr();
next_token();
if (token_type == TOKEN_QUESTION) {
int label1 = next_label();
int label2 = next_label();
asm_beqz(cond, label1);
int lhs = parse_expr();
int result = next_reg(reg_type[lhs]);
asm_mv(result, lhs);
asm_j(label2);
expect_token(TOKEN_COLON);
asm_label(label1);
int rhs = parse_conditional_expr();
if (reg_type[lhs] != reg_type[rhs]) {
fprintf(stderr, "type mismatch in conditional expression\n");
exit(1);
}
asm_mv(result, rhs);
asm_label(label2);
return result;
} else {
unget_token();
return cond;
}
}
int parse_assign_expr() {
int lhs = parse_conditional_expr();
next_token();
if (token_type == TOKEN_ASSIGN) {
int rhs = parse_assign_expr();
asm_mv(lhs, rhs);
return lhs;
} else if (token_type == TOKEN_ADD_ASSIGN) {
int rhs = parse_assign_expr();
int sum = asm_add(lhs, rhs);
asm_mv(lhs, sum);
return lhs;
} else if (token_type == TOKEN_SUB_ASSIGN) {
int rhs = parse_assign_expr();
int diff = asm_sub(lhs, rhs);
asm_mv(lhs, diff);
return lhs;
} else if (token_type == TOKEN_MUL_ASSIGN) {
int rhs = parse_assign_expr();
int prod = asm_rr_arith("mul", lhs, rhs);
asm_mv(lhs, prod);
return lhs;
} else if (token_type == TOKEN_DIV_ASSIGN) {
int rhs = parse_assign_expr();
int quot = asm_rr_arith("div", lhs, rhs);
asm_mv(lhs, quot);
return lhs;
} else if (token_type == TOKEN_REM_ASSIGN) {
int rhs = parse_assign_expr();
int rem = asm_rr_arith("rem", lhs, rhs);
asm_mv(lhs, rem);
return lhs;
} else if (token_type == TOKEN_LSHIFT_ASSIGN) {
int rhs = parse_assign_expr();
int lshift = asm_rr_arith("sll", lhs, rhs);
asm_mv(lhs, lshift);
return lhs;
} else if (token_type == TOKEN_RSHIFT_ASSIGN) {
int rhs = parse_assign_expr();
int rshift = asm_rr_arith("sra", lhs, rhs);
asm_mv(lhs, rshift);
return lhs;
} else if (token_type == TOKEN_AND_ASSIGN) {
int rhs = parse_assign_expr();
int and = asm_rr_arith("and", lhs, rhs);
asm_mv(lhs, and);
return lhs;
} else if (token_type == TOKEN_XOR_ASSIGN) {
int rhs = parse_assign_expr();
int xor = asm_rr_arith("xor", lhs, rhs);
asm_mv(lhs, xor);
return lhs;
} else if (token_type == TOKEN_OR_ASSIGN) {
int rhs = parse_assign_expr();
int or = asm_rr_arith("or", lhs, rhs);
asm_mv(lhs, or);
return lhs;
} else {
unget_token();
return lhs;
}
}
int parse_expr() {
int lhs = parse_assign_expr();
while (1) {
next_token();
if (token_type == TOKEN_COMMA) {
int rhs = parse_assign_expr();
lhs = rhs;
} else {
unget_token();
break;
}
}
return lhs;
}
void parse_local_variable(int type) {
if (type == TYPE_VOID) {
fprintf(stderr, "variable cannot be of void type\n");
exit(1);
}
expect_token(TOKEN_ID);
int id = token_data;
next_token();
if (token_type == TOKEN_BRACKET_LEFT) {
if (type & TYPE_PTR_MASK) {
fprintf(stderr, "array of pointers is not supported\n");
exit(1);
}
int size = expect_array_size();
expect_token(TOKEN_BRACKET_RIGHT);
declare_local_array(id, type, size);
return;
}
int slot = declare_local(id, type);
if (token_type == TOKEN_SEMICOLON) {
unget_token();
return;
}
unget_token();
expect_token(TOKEN_ASSIGN);
int reg = parse_expr();
store_into_local(reg, slot);
}
void parse_stmt();
void parse_if() {
expect_token(TOKEN_PAREN_LEFT);
int cond = parse_expr();
int label1 = next_label();
int label2 = next_label();
asm_beqz(cond, label1);
reset_temp();
expect_token(TOKEN_PAREN_RIGHT);
parse_stmt();
asm_j(label2);
asm_label(label1);
next_token();
if (token_type == TOKEN_ELSE) {
parse_stmt();
} else {
unget_token();
}
asm_label(label2);
}
void parse_while() {
expect_token(TOKEN_PAREN_LEFT);
int break_label = next_label();
int cont_label = next_label();
asm_push_label(break_label, cont_label);
asm_label(cont_label);
int cond = parse_expr();
asm_beqz(cond, break_label);
reset_temp();
expect_token(TOKEN_PAREN_RIGHT);
parse_stmt();
asm_j(cont_label);
asm_label(break_label);
asm_pop_label();
}
void parse_for() {
expect_token(TOKEN_PAREN_LEFT);
int cont_label = next_label();
int break_label = next_label();
int cond_label = next_label();
int body_label = next_label();
asm_push_label(break_label, cont_label);
parse_stmt(); // init
asm_label(cond_label);
int cond = parse_expr();
asm_beqz(cond, break_label);
asm_j(body_label);
reset_temp();
expect_token(TOKEN_SEMICOLON);
asm_label(cont_label);
parse_expr(); // update
reset_temp();
expect_token(TOKEN_PAREN_RIGHT);
asm_j(cond_label);
asm_label(body_label);
parse_stmt(); // body
asm_j(cont_label);
asm_label(break_label);
asm_pop_label();
}
void parse_do_while() {
int cont_label = next_label();
int break_label = next_label();
asm_push_label(break_label, cont_label);
asm_label(cont_label);
parse_stmt(); // body
expect_token(TOKEN_WHILE);
expect_token(TOKEN_PAREN_LEFT);
int cond = parse_expr();
asm_bnez(cond, cont_label);
expect_token(TOKEN_PAREN_RIGHT);
asm_label(break_label);
asm_pop_label();
}
void parse_stmt() {
next_token();
int decl_type;
if (token_type == TOKEN_IF) {
parse_if();
return;
} else if (token_type == TOKEN_WHILE) {
parse_while();
return;
} else if (token_type == TOKEN_FOR) {
parse_for();
return;
} else if (token_type == TOKEN_DO) {
parse_do_while();
} else if (token_type == TOKEN_BRACE_LEFT) {
while (1) {
next_token();
if (token_type == TOKEN_BRACE_RIGHT) {
break;
}
unget_token();
parse_stmt();
}
return;
} else if (token_type == TOKEN_RETURN) {
next_token();
if (token_type == TOKEN_SEMICOLON) {
asm_return();
return;
}
unget_token();
int rs1 = parse_expr();
asm_mv(REG_A0, rs1);
asm_return();
} else if (token_type == TOKEN_BREAK) {
asm_break();
} else if (token_type == TOKEN_CONTINUE) {
asm_continue();
} else if (token_type == TOKEN_SEMICOLON) {
unget_token();
} else if ((decl_type = parse_type()) >= 0) {
parse_local_variable(decl_type);
} else {
unget_token();
parse_expr();
}
expect_token(TOKEN_SEMICOLON);
reset_temp();
}
void parse_function(char* name) {
reset_local();
int arg = 0;
int args[8];
while (1) {
next_token();
if (token_type == TOKEN_PAREN_RIGHT) {
break;
}
if (token_type == TOKEN_ELLIPSIS) {
expect_token(TOKEN_PAREN_RIGHT);
break;
}
int arg_type = parse_type();
if (arg_type < 0 || arg_type == TYPE_VOID) {
fprintf(stderr, "expecting a non-void argument type: %d\n", arg_type);
exit(1);
}
expect_token(TOKEN_ID);
int arg_name = token_data;
next_token();
if (token_type == TOKEN_BRACKET_LEFT) {
expect_token(TOKEN_BRACKET_RIGHT);
next_token();
if (arg_type & TYPE_PTR_MASK) {
fprintf(stderr, "array of pointers is not supported\n");
exit(1);
}
arg_type |= TYPE_PTR_MASK;
}
if (arg >= 8) {
fprintf(stderr, "too many arguments\n");
exit(1);
}
args[arg++] = declare_local(token_data, arg_type);
if (token_type == TOKEN_COMMA) {
// continue;
} else if (token_type == TOKEN_PAREN_RIGHT) {
break;
} else {
fprintf(stderr, "expecting ',' or ')'\n");
exit(1);
}
}
next_token();
if (token_type == TOKEN_SEMICOLON) {
reset_local_table();
return;
}
unget_token();
expect_token(TOKEN_BRACE_LEFT);
printf(".text\n");
printf(".global %s\n", name);
printf("%s:\n", name);
int label = next_label();
int prolog_label = next_label();
epilog_label = next_label();
asm_j(prolog_label);
asm_label(label);
while (1) {
next_token();
if (token_type == TOKEN_BRACE_RIGHT) {
break;
}
unget_token();
parse_stmt();
}
if (streq(name, "main")) {
asm_mv(REG_A0, REG_ZERO);
}
asm_return();
int reg_used = max_reg_id - REG_S2;
if (reg_used > 14) reg_used = 14;
int frame_size = (max_local_id - 1 + reg_used + 2) * 8;
if (reg_used > 10) reg_used = 10;
if (frame_size % 16 != 0) {
frame_size += 8;
}
// prolog
asm_label(prolog_label);
asm_addi("sp", "sp", -frame_size);
asm_sd("ra", frame_size - 8, "sp");
asm_sd("fp", frame_size - 16, "sp");
for (int i = 0; i < reg_used; ++i) {
int reg = REG_S2 + i;
asm_sd(reg_name(reg), frame_size - 24 - i * 8, "sp");
}
asm_addi("fp", "sp", frame_size);
for (int i = 0; i < arg; ++i) {
store_into_local(REG_A0 + i, args[i]);
}
asm_j(label);
// epilog
asm_label(epilog_label);
asm_ld("ra", frame_size - 8, "sp");
asm_ld("fp", frame_size - 16, "sp");
for (int i = 0; i < reg_used; ++i) {
int reg = REG_S2 + i;
asm_ld(reg_name(reg), frame_size - 24 - i * 8, "sp");
}
asm_addi("sp", "sp", frame_size);
printf(" ret\n");
reset_local_table();
}
void parse_global_variable(int id, char* name, int type) {
printf(".data\n");
printf(".globl %s\n", name);
printf(".align 5\n");
printf("%s:\n", name);
if (token_type == TOKEN_ASSIGN) {
printf(" .dword %d\n", expect_const());
} else if (token_type == TOKEN_BRACKET_LEFT) {
if (type & TYPE_PTR_MASK) {
fprintf(stderr, "array of pointers is not supported\n");
exit(1);
}
int size = expect_array_size();
expect_token(TOKEN_BRACKET_RIGHT);
int array_size = array_size_of(type, size);
printf(" .zero %d\n", array_size);
declare_global(id, KIND_ARRAY, type);
} else {
printf(" .zero %d\n", 8);
unget_token();
}
}
void parse_global_declaration() {
int external = 0;
if (token_type == TOKEN_EXTERN) {
external = 1;
next_token();
}
int type = parse_type();
if (type < 0) {
fprintf(stderr, "expecting type for global declaration\n");
exit(1);
}
expect_token(TOKEN_ID);
int id = token_data;
char* name = id_table + id_lut[id];
next_token();
if (token_type == TOKEN_PAREN_LEFT) {
declare_global(id, KIND_FUNCTION, type);
parse_function(name);
} else {
if (type == TYPE_VOID) {
fprintf(stderr, "variable cannot be of void type\n");
exit(1);
}
declare_global(id, KIND_SCALAR, type);
if (external) {
unget_token();
} else {
parse_global_variable(id, name, type);
}
expect_token(TOKEN_SEMICOLON);
}
}
void parse_enum() {
expect_token(TOKEN_BRACE_LEFT);
int value = 0;
while (1) {
next_token();
if (token_type == TOKEN_BRACE_RIGHT) {
break;
}
if (token_type != TOKEN_ID) {
fprintf(stderr, "expecting identifier in enum\n");
exit(1);
}
int id = token_data;
next_token();
if (token_type == TOKEN_ASSIGN) {
value = expect_const();
} else {
unget_token();
}
const_table[id] = value++;
is_const[id] = 1;
next_token();
if (token_type == TOKEN_COMMA) {
// continue;
} else if (token_type == TOKEN_BRACE_RIGHT) {
break;
} else {
fprintf(stderr, "expecting ',' or '}'\n");
exit(1);
}
}
expect_token(TOKEN_SEMICOLON);
}
void parse_top_level() {
next_token();
if (token_type == TOKEN_EOF)
return;
if (token_type == TOKEN_ENUM) {
parse_enum();
} else {
parse_global_declaration();
}
parse_top_level();
}
void dump_string_table() {
printf(".data\n");
for (int i = 0; i < string_lut_size; ++i) {
printf(".LC%d: .string \"", i);
char* p = string_table + string_lut[i];
int ch;
while ((ch = *p++) != 0) {
if (ch == '\n') {
printf("\\n");
} else if (ch == '\t') {
printf("\\t");
} else if (ch == '\r') {
printf("\\r");
} else if (ch == '\\') {
printf("\\\\");
} else if (ch == '\'') {
printf("\\'");
} else if (ch == '\"') {
printf("\\\"");
} else {
printf("%c", ch);
}
}
printf("\"\n");
}
}
int main() {
parse_top_level();
dump_string_table();
}