1356 lines
34 KiB
C
1356 lines
34 KiB
C
#include "boot-lib.h"
|
|
|
|
// lexer
|
|
|
|
int is_digit(int ch) {
|
|
return '0' <= ch && ch <= '9';
|
|
}
|
|
|
|
int is_id_start(int ch) {
|
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_';
|
|
}
|
|
|
|
int is_id_cont(int ch) {
|
|
return is_id_start(ch) || is_digit(ch);
|
|
}
|
|
|
|
int token_state;
|
|
int token_type;
|
|
int token_data;
|
|
|
|
const int TOKEN_EOF = 0;
|
|
const int TOKEN_SEMICOLON = 1;
|
|
const int TOKEN_ADD = 2;
|
|
const int TOKEN_MINUS = 3;
|
|
const int TOKEN_STAR = 4;
|
|
const int TOKEN_DIV = 5;
|
|
const int TOKEN_REM = 6;
|
|
const int TOKEN_ASSIGN = 7;
|
|
const int TOKEN_COMMA = 8;
|
|
const int TOKEN_DOT = 9;
|
|
const int TOKEN_LSHIFT = 10; // unused
|
|
const int TOKEN_RSHIFT = 11; // unused
|
|
const int TOKEN_AND = 12;
|
|
const int TOKEN_OR = 13;
|
|
const int TOKEN_XOR = 14;
|
|
const int TOKEN_COMPL = 15;
|
|
const int TOKEN_NOT = 16;
|
|
const int TOKEN_LAND = 17;
|
|
const int TOKEN_LOR = 18;
|
|
const int TOKEN_ELLIPSIS = 19;
|
|
const int TOKEN_INC = 20;
|
|
const int TOKEN_DEC = 21;
|
|
|
|
const int TOKEN_EQ = 40;
|
|
const int TOKEN_NE = 41;
|
|
const int TOKEN_LT = 42;
|
|
const int TOKEN_GT = 43;
|
|
const int TOKEN_LE = 44;
|
|
const int TOKEN_GE = 45;
|
|
|
|
const int TOKEN_PAREN_LEFT = 50;
|
|
const int TOKEN_PAREN_RIGHT = 51;
|
|
const int TOKEN_BRACKET_LEFT = 52;
|
|
const int TOKEN_BRACKET_RIGHT = 53;
|
|
const int TOKEN_BRACE_LEFT = 54;
|
|
const int TOKEN_BRACE_RIGHT = 55;
|
|
|
|
const int TOKEN_NUMBER = 100;
|
|
const int TOKEN_ID = 101;
|
|
const int TOKEN_INT = 102;
|
|
const int TOKEN_IF = 103;
|
|
const int TOKEN_ELSE = 104;
|
|
const int TOKEN_WHILE = 105;
|
|
const int TOKEN_BREAK = 106;
|
|
const int TOKEN_CONTINUE = 107;
|
|
const int TOKEN_RETURN = 108;
|
|
const int TOKEN_VOID = 109;
|
|
const int TOKEN_CONST = 110;
|
|
const int TOKEN_CHAR = 111;
|
|
const int TOKEN_FOR = 112;
|
|
const int TOKEN_STRING = 150;
|
|
|
|
int parse_int(int ch) {
|
|
int num = ch - '0';
|
|
while (is_digit(ch = getchar())) {
|
|
num = num * 10;
|
|
num = num + ch - '0';
|
|
}
|
|
ungetchar(ch);
|
|
return num;
|
|
}
|
|
|
|
int get_escaped_char() {
|
|
int ch = getchar();
|
|
if (ch == 'n') {
|
|
ch = '\n';
|
|
} else if (ch == 't') {
|
|
ch = '\t';
|
|
} else if (ch == 'r') {
|
|
ch = '\r';
|
|
} else if (ch == '0') {
|
|
ch = '\0';
|
|
} else if (ch == '\\') {
|
|
ch = '\\';
|
|
} else if (ch == '\'') {
|
|
ch = '\'';
|
|
} else if (ch == '\"') {
|
|
ch = '\"';
|
|
} else {
|
|
eprintf("unexpected escaped character: %c\n", ch);
|
|
exit(1);
|
|
}
|
|
return ch;
|
|
}
|
|
|
|
int streq(const char* s1, const char* s2) {
|
|
while (*s1 && *s2 && *s1 == *s2) {
|
|
s1++;
|
|
s2++;
|
|
}
|
|
return *s1 == *s2;
|
|
}
|
|
|
|
char string_table[65536];
|
|
int string_offset;
|
|
int string_lut[4096];
|
|
int string_lut_size;
|
|
int parse_string() {
|
|
int offset = string_offset;
|
|
int ch;
|
|
while ((ch = getchar()) != '"') {
|
|
if (ch == -1 || ch == '\n') {
|
|
eprintf("expecting '\"'\n");
|
|
exit(1);
|
|
}
|
|
if (ch == '\\') {
|
|
ch = get_escaped_char();
|
|
}
|
|
string_table[string_offset++] = ch;
|
|
}
|
|
string_table[string_offset++] = 0;
|
|
string_lut[string_lut_size] = offset;
|
|
return string_lut_size++;
|
|
}
|
|
|
|
char id_table[65536];
|
|
int id_offset;
|
|
int id_lut[4096];
|
|
int id_lut_size;
|
|
int parse_id(int ch) {
|
|
int offset = id_offset;
|
|
id_table[id_offset++] = ch;
|
|
while (is_id_cont(ch = getchar())) {
|
|
id_table[id_offset++] = ch;
|
|
}
|
|
ungetchar(ch);
|
|
id_table[id_offset++] = 0;
|
|
id_lut[id_lut_size] = offset;
|
|
return id_lut_size++;
|
|
}
|
|
|
|
void rewind_id(int new_data) {
|
|
id_offset = id_lut[token_data];
|
|
token_data = new_data;
|
|
--id_lut_size;
|
|
}
|
|
|
|
void dedup_id() {
|
|
int last_id = id_lut_size - 1;
|
|
char* latest = id_table + id_lut[last_id];
|
|
for (int i = 0; i < last_id; i++) {
|
|
char* candidate = id_table + id_lut[i];
|
|
if (streq(candidate, latest)) {
|
|
rewind_id(i);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_id_like(int ch) {
|
|
token_type = TOKEN_ID;
|
|
token_data = parse_id(ch);
|
|
char* id = id_table + id_lut[token_data];
|
|
if (streq(id, "int")) {
|
|
token_type = TOKEN_INT;
|
|
} else if (streq(id, "if")) {
|
|
token_type = TOKEN_IF;
|
|
} else if (streq(id, "else")) {
|
|
token_type = TOKEN_ELSE;
|
|
} else if (streq(id, "while")) {
|
|
token_type = TOKEN_WHILE;
|
|
} else if (streq(id, "break")) {
|
|
token_type = TOKEN_BREAK;
|
|
} else if (streq(id, "continue")) {
|
|
token_type = TOKEN_CONTINUE;
|
|
} else if (streq(id, "return")) {
|
|
token_type = TOKEN_RETURN;
|
|
} else if (streq(id, "void")) {
|
|
token_type = TOKEN_VOID;
|
|
} else if (streq(id, "const")) {
|
|
token_type = TOKEN_CONST;
|
|
} else if (streq(id, "char")) {
|
|
token_type = TOKEN_CHAR;
|
|
} else if (streq(id, "for")) {
|
|
token_type = TOKEN_FOR;
|
|
}
|
|
if (token_type != TOKEN_ID) {
|
|
rewind_id(0);
|
|
} else {
|
|
dedup_id();
|
|
}
|
|
}
|
|
|
|
void unget_token() {
|
|
token_state = 1;
|
|
}
|
|
|
|
void next_token() {
|
|
if (token_state) {
|
|
token_state = 0;
|
|
return;
|
|
}
|
|
int ch = getchar();
|
|
while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
|
|
ch = getchar();
|
|
}
|
|
if (ch == -1) {
|
|
token_type = TOKEN_EOF;
|
|
} else if (ch == '(') {
|
|
token_type = TOKEN_PAREN_LEFT;
|
|
} else if (ch == ')') {
|
|
token_type = TOKEN_PAREN_RIGHT;
|
|
} else if (ch == '[') {
|
|
token_type = TOKEN_BRACKET_LEFT;
|
|
} else if (ch == ']') {
|
|
token_type = TOKEN_BRACKET_RIGHT;
|
|
} else if (ch == '{') {
|
|
token_type = TOKEN_BRACE_LEFT;
|
|
} else if (ch == '}') {
|
|
token_type = TOKEN_BRACE_RIGHT;
|
|
} else if (ch == '+') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '+') {
|
|
token_type = TOKEN_INC;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_ADD;
|
|
}
|
|
} else if (ch == '-') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '-') {
|
|
token_type = TOKEN_DEC;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_MINUS;
|
|
}
|
|
} else if (ch == '*') {
|
|
token_type = TOKEN_STAR;
|
|
} else if (ch == '/') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '/') {
|
|
while ((ch = getchar()) != '\n');
|
|
next_token();
|
|
return;
|
|
} else if (ch2 == '*') {
|
|
while (1) {
|
|
ch = getchar();
|
|
if (ch == '*') {
|
|
ch = getchar();
|
|
if (ch == '/') {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
next_token();
|
|
return;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_DIV;
|
|
}
|
|
} else if (ch == '%') {
|
|
token_type = TOKEN_REM;
|
|
} else if (ch == ';') {
|
|
token_type = TOKEN_SEMICOLON;
|
|
} else if (ch == ',') {
|
|
token_type = TOKEN_COMMA;
|
|
} else if (ch == '<') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '=') {
|
|
token_type = TOKEN_LE;
|
|
} else if (ch2 == '<') {
|
|
token_type = TOKEN_LSHIFT;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_LT;
|
|
}
|
|
} else if (ch == '>') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '=') {
|
|
token_type = TOKEN_GE;
|
|
} else if (ch2 == '>') {
|
|
token_type = TOKEN_RSHIFT;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_GT;
|
|
}
|
|
} else if (ch == '=') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '=') {
|
|
token_type = TOKEN_EQ;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_ASSIGN;
|
|
}
|
|
} else if (ch == '!') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '=') {
|
|
token_type = TOKEN_NE;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_NOT;
|
|
}
|
|
} else if (ch == '&') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '&') {
|
|
token_type = TOKEN_LAND;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_AND;
|
|
}
|
|
} else if (ch == '|') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '|') {
|
|
token_type = TOKEN_LOR;
|
|
} else {
|
|
ungetchar(ch2);
|
|
token_type = TOKEN_OR;
|
|
}
|
|
} else if (ch == '^') {
|
|
token_type = TOKEN_XOR;
|
|
} else if (ch == '~') {
|
|
token_type = TOKEN_COMPL;
|
|
} else if (ch == '\'') {
|
|
token_type = TOKEN_NUMBER;
|
|
token_data = getchar();
|
|
if (token_data == '\\') {
|
|
token_data = get_escaped_char();
|
|
}
|
|
if (getchar() != '\'') {
|
|
eprintf("expecting '\n");
|
|
exit(1);
|
|
}
|
|
} else if (ch == '"') {
|
|
token_type = TOKEN_STRING;
|
|
token_data = parse_string();
|
|
} else if (ch == '.') {
|
|
int ch2 = getchar();
|
|
if (ch2 == '.') {
|
|
int ch3 = getchar();
|
|
if (ch3 == '.') {
|
|
token_type = TOKEN_ELLIPSIS;
|
|
} else {
|
|
eprintf("unexpected character: %c\n", ch3);
|
|
exit(1);
|
|
}
|
|
} else {
|
|
eprintf("unexpected character: %c\n", ch2);
|
|
exit(1);
|
|
}
|
|
} else if (is_digit(ch)) {
|
|
token_type = TOKEN_NUMBER;
|
|
token_data = parse_int(ch);
|
|
} else if (is_id_start(ch)) {
|
|
parse_id_like(ch);
|
|
} else {
|
|
eprintf("unexpected character: %c(%d)\n", ch, ch);
|
|
exit(1);
|
|
}
|
|
eprintf("token: %d\n", token_type);
|
|
if (token_type == TOKEN_ID) {
|
|
const char* name = id_table + id_lut[token_data];
|
|
eprintf(" id: %s\n", name);
|
|
} else if (token_type == TOKEN_NUMBER) {
|
|
eprintf(" number: %d\n", token_data);
|
|
}
|
|
}
|
|
|
|
void expect_token(int expected_type) {
|
|
next_token();
|
|
if (token_type != expected_type) {
|
|
eprintf("unexpected token: %d, should be %d\n", token_type, expected_type);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
const int TYPE_VOID = 0;
|
|
const int TYPE_INT = 1;
|
|
const int TYPE_CHAR = 2;
|
|
const int TYPE_VOID_PTR = 16;
|
|
const int TYPE_INT_PTR = 17;
|
|
const int TYPE_CHAR_PTR = 18;
|
|
const int TYPE_PTR_MASK = 16;
|
|
|
|
int parse_type() {
|
|
if (token_type == TOKEN_CONST) {
|
|
next_token();
|
|
}
|
|
if (token_type == TOKEN_INT) {
|
|
next_token();
|
|
if (token_type == TOKEN_STAR) {
|
|
return TYPE_INT_PTR;
|
|
}
|
|
unget_token();
|
|
return TYPE_INT;
|
|
} else if (token_type == TOKEN_CHAR) {
|
|
next_token();
|
|
if (token_type == TOKEN_STAR) {
|
|
return TYPE_CHAR_PTR;
|
|
}
|
|
unget_token();
|
|
return TYPE_CHAR;
|
|
} else if (token_type == TOKEN_VOID) {
|
|
next_token();
|
|
if (token_type == TOKEN_STAR) {
|
|
return TYPE_VOID_PTR;
|
|
}
|
|
unget_token();
|
|
return TYPE_VOID;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// asm
|
|
|
|
|
|
int epilog_label;
|
|
|
|
int local_table[4096]; // id -> local id
|
|
int next_local_id = 2;
|
|
int max_local_id = 2;
|
|
|
|
const int MARKER_TEMP = 0;
|
|
const int MARKER_SCALAR = 1;
|
|
const int MARKER_ARRAY = 2;
|
|
const int MARKER_FUNCTION = 3;
|
|
|
|
int local_marker[4096];
|
|
int global_marker[4096];
|
|
int local_type[4096];
|
|
int global_type[4096];
|
|
|
|
void reset_local() {
|
|
next_local_id = 2;
|
|
max_local_id = 2;
|
|
for (int i = 0; i < 4096; ++i) {
|
|
local_table[i] = 0;
|
|
local_marker[i] = MARKER_TEMP;
|
|
local_type[i] = TYPE_VOID;
|
|
}
|
|
}
|
|
|
|
void reset_temp() {
|
|
while (next_local_id > 2 && local_marker[next_local_id - 1] == MARKER_TEMP) {
|
|
--next_local_id;
|
|
}
|
|
}
|
|
|
|
int next_reg(int type) {
|
|
int reg = next_local_id++;
|
|
local_type[reg] = type;
|
|
if (next_local_id > max_local_id) {
|
|
max_local_id = next_local_id;
|
|
}
|
|
return reg;
|
|
}
|
|
|
|
int declare_local(int id, int type) {
|
|
if (local_table[id] != 0) return local_table[id];
|
|
int reg = next_reg(type);
|
|
local_marker[reg] = MARKER_SCALAR;
|
|
return local_table[id] = reg;
|
|
}
|
|
|
|
int declare_local_array(int id, int type, int size) {
|
|
if (local_table[id] != 0) return local_table[id];
|
|
int reg;
|
|
for (int i = 0; i < size; ++i) local_marker[reg = next_reg(type)] = MARKER_ARRAY;
|
|
return local_table[id] = reg;
|
|
}
|
|
|
|
void declare_global(int id, int marker, int type) {
|
|
global_marker[id] = marker;
|
|
global_type[id] = type;
|
|
}
|
|
|
|
int check_itype_immediate(int value) {
|
|
return value >= -2048 && value <= 2047;
|
|
}
|
|
|
|
void asm_ld(const char* rd, int imm, const char* rs) {
|
|
if (check_itype_immediate(imm)) {
|
|
printf(" ld %s, %d(%s)\n", rd, imm, rs);
|
|
} else {
|
|
printf(" li t0, %d\n", imm);
|
|
printf(" add t0, %s, t0\n", rs);
|
|
printf(" ld %s, 0(t0)\n", rd);
|
|
}
|
|
}
|
|
|
|
void asm_sd(const char* rs1, int imm, const char* rs2) {
|
|
if (check_itype_immediate(imm)) {
|
|
printf(" sd %s, %d(%s)\n", rs1, imm, rs2);
|
|
} else {
|
|
printf(" li t0, %d\n", imm);
|
|
printf(" add t0, %s, t0\n", rs2);
|
|
printf(" sd %s, 0(t0)\n", rs1);
|
|
}
|
|
|
|
}
|
|
|
|
void asm_addi(const char* rd, const char* rs, int imm) {
|
|
if (check_itype_immediate(imm)) {
|
|
printf(" addi %s, %s, %d\n", rd, rs, imm);
|
|
} else {
|
|
printf(" li t0, %d\n", imm);
|
|
printf(" add %s, %s, t0\n", rd, rs);
|
|
}
|
|
}
|
|
|
|
const int INDIRECTION = 1048576; // 2**20
|
|
|
|
int local_type_of(int rs1) {
|
|
return local_type[rs1 & ~INDIRECTION];
|
|
}
|
|
|
|
void load_address(int rd, int id) {
|
|
if (id & INDIRECTION) {
|
|
id = id & ~INDIRECTION;
|
|
int offset = -id * 8 - 8;
|
|
if (check_itype_immediate(offset)) {
|
|
printf(" ld t%d, %d(fp) # indirection\n", rd, offset);
|
|
} else {
|
|
printf(" li t%d, %d\n", rd, offset);
|
|
printf(" add t%d, fp, t%d\n", rd, rd);
|
|
printf(" ld t%d, 0(t%d) # indirection\n", rd, rd);
|
|
}
|
|
} else {
|
|
int offset = -id * 8 - 8;
|
|
if (check_itype_immediate(offset)) {
|
|
printf(" addi t%d, fp, %d\n", rd, offset);
|
|
} else {
|
|
printf(" li t%d, %d\n", rd, offset);
|
|
printf(" add t%d, fp, t%d\n", rd, rd);
|
|
}
|
|
}
|
|
}
|
|
|
|
void load(int rd, int id) {
|
|
load_address(rd, id);
|
|
int type = local_type_of(id);
|
|
const char* op = "lw"; // int
|
|
if (type == TYPE_CHAR) {
|
|
op = "lb";
|
|
}
|
|
if (type & TYPE_PTR_MASK) {
|
|
op = "ld";
|
|
}
|
|
printf(" %s t%d, 0(t%d) # id: type %d\n", op, rd, rd, type);
|
|
}
|
|
|
|
void store_t0(int id) {
|
|
load_address(1, id);
|
|
int type = local_type_of(id);
|
|
const char* op = "sw"; // int
|
|
if (type == TYPE_CHAR) {
|
|
op = "sb";
|
|
}
|
|
if (type & TYPE_PTR_MASK) {
|
|
op = "sd";
|
|
}
|
|
printf(" %s t0, 0(t1) # id: type %d\n", op, type);
|
|
}
|
|
|
|
int materialize_t0(int type) {
|
|
int reg = next_reg(type);
|
|
store_t0(reg);
|
|
return reg;
|
|
}
|
|
|
|
int indirection_of(int reg) {
|
|
local_type[reg] = local_type[reg] & ~TYPE_PTR_MASK;
|
|
return reg | INDIRECTION;
|
|
}
|
|
|
|
int lookup(int id) {
|
|
int local = local_table[id];
|
|
if (local) {
|
|
if (local_marker[local] == MARKER_ARRAY) {
|
|
load_address(0, local);
|
|
return materialize_t0(local_type[local] | TYPE_PTR_MASK);
|
|
}
|
|
return local;
|
|
}
|
|
const char* name = id_table + id_lut[id];
|
|
if (global_marker[id]) {
|
|
printf(" la t0, %s # id: %d\n", name, id);
|
|
int reg = materialize_t0(global_type[id] | TYPE_PTR_MASK);
|
|
if (global_marker[id] == MARKER_SCALAR) {
|
|
reg = indirection_of(reg);
|
|
}
|
|
if (global_marker[id] == MARKER_FUNCTION) {
|
|
reg = reg | INDIRECTION;
|
|
}
|
|
return reg;
|
|
}
|
|
eprintf("unresolved identifier: %s\n", name);
|
|
exit(1);
|
|
}
|
|
|
|
|
|
int next_label_id = 0;
|
|
int next_label() {
|
|
return next_label_id++;
|
|
}
|
|
|
|
int asm_label(int label) {
|
|
printf("L%d:\n", label);
|
|
return label;
|
|
}
|
|
|
|
int is_not_reusable(int rs1) {
|
|
return (rs1 & INDIRECTION) || local_marker[rs1] != MARKER_TEMP;
|
|
}
|
|
|
|
int asm_r(const char* op, int rs1) {
|
|
load(0, rs1);
|
|
printf(" %s t0, t0\n", op);
|
|
int rd = rs1;
|
|
if (is_not_reusable(rs1)) rd = next_reg(local_type_of(rs1));
|
|
store_t0(rd);
|
|
return rd;
|
|
}
|
|
|
|
int asm_rr(const char* op, int rs1, int rs2) {
|
|
load(0, rs1);
|
|
load(1, rs2);
|
|
printf(" %s t0, t0, t1\n", op);
|
|
int rd = rs1;
|
|
if (is_not_reusable(rs1)) rd = rs2;
|
|
if (is_not_reusable(rs2)) rd = next_reg(local_type_of(rs1));
|
|
store_t0(rd);
|
|
return rd;
|
|
}
|
|
|
|
void asm_beqz(int rs1, int label) {
|
|
load(0, rs1);
|
|
printf(" beqz t0, L%d\n", label);
|
|
}
|
|
|
|
void asm_bnez(int rs1, int label) {
|
|
load(0, rs1);
|
|
printf(" bnez t0, L%d\n", label);
|
|
}
|
|
|
|
void asm_j(int label) {
|
|
printf(" j L%d\n", label);
|
|
}
|
|
|
|
int break_label_stack[4096];
|
|
int cont_label_stack[4096];
|
|
int break_label_stack_size;
|
|
int cont_label_stack_size;
|
|
|
|
int asm_get_break_label() {
|
|
return break_label_stack[break_label_stack_size - 1];
|
|
}
|
|
|
|
int asm_get_cont_label() {
|
|
return cont_label_stack[cont_label_stack_size - 1];
|
|
}
|
|
|
|
void asm_push_label(int break_label, int cont_label) {
|
|
break_label_stack[break_label_stack_size++] = break_label;
|
|
cont_label_stack[cont_label_stack_size++] = cont_label;
|
|
}
|
|
|
|
void asm_pop_label() {
|
|
--break_label_stack_size;
|
|
--cont_label_stack_size;
|
|
}
|
|
|
|
int step_of(int type) {
|
|
if (type == TYPE_INT_PTR) {
|
|
return 4;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
void asm_slli_t0(int type) {
|
|
if (type == TYPE_INT_PTR) {
|
|
printf(" slli t0, t0, 2\n");
|
|
}
|
|
}
|
|
|
|
int asm_add(int lhs, int rhs) {
|
|
int type1 = local_type_of(lhs) & TYPE_PTR_MASK;
|
|
int type2 = local_type_of(rhs) & TYPE_PTR_MASK;
|
|
if (type1 != type2) {
|
|
int ptr;
|
|
int idx;
|
|
if (type1) {
|
|
ptr = lhs;
|
|
idx = rhs;
|
|
} else {
|
|
ptr = rhs;
|
|
idx = lhs;
|
|
}
|
|
int ptr_type = local_type_of(ptr);
|
|
load(0, idx);
|
|
load(1, ptr);
|
|
asm_slli_t0(ptr_type);
|
|
printf(" add t0, t0, t1\n");
|
|
return materialize_t0(ptr_type);
|
|
}
|
|
if (type1 && type2) {
|
|
eprintf("operands cannot be both pointers\n");
|
|
exit(1);
|
|
}
|
|
return asm_rr("add", lhs, rhs);
|
|
}
|
|
|
|
// parser
|
|
int parse_expr();
|
|
|
|
int parse_primary_expr() {
|
|
next_token();
|
|
if (token_type == TOKEN_EOF) {
|
|
exit(1);
|
|
} else if (token_type == TOKEN_NUMBER) {
|
|
printf(" li t0, %d\n", token_data);
|
|
return materialize_t0(TYPE_INT);
|
|
} else if (token_type == TOKEN_ID) {
|
|
return lookup(token_data);
|
|
} else if (token_type == TOKEN_STRING) {
|
|
printf(" la t0, .LC%d\n", token_data);
|
|
return materialize_t0(TYPE_CHAR_PTR);
|
|
} else if (token_type == TOKEN_PAREN_LEFT) {
|
|
int reg = parse_expr();
|
|
expect_token(TOKEN_PAREN_RIGHT);
|
|
return reg;
|
|
} else {
|
|
eprintf("unexpected token: %d\n", token_type);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
int parse_postfix_expr() {
|
|
int lhs = parse_primary_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_INC) {
|
|
int type = local_type_of(lhs);
|
|
int reg = next_reg(type);
|
|
load(0, lhs);
|
|
store_t0(reg);
|
|
printf(" addi t0, t0, %d\n", step_of(type));
|
|
store_t0(lhs);
|
|
return reg;
|
|
} else if (token_type == TOKEN_DEC) {
|
|
int type = local_type_of(lhs);
|
|
int reg = next_reg(type);
|
|
load(0, lhs);
|
|
store_t0(reg);
|
|
printf(" addi t0, t0, -%d\n", step_of(type));
|
|
store_t0(lhs);
|
|
return reg;
|
|
} else if (token_type == TOKEN_BRACKET_LEFT) {
|
|
int rhs = parse_expr();
|
|
expect_token(TOKEN_BRACKET_RIGHT);
|
|
return indirection_of(asm_add(lhs, rhs));
|
|
} else if (token_type == TOKEN_PAREN_LEFT) {
|
|
int arg = 0;
|
|
int args[8];
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_PAREN_RIGHT) {
|
|
break;
|
|
}
|
|
unget_token();
|
|
if (arg >= 8) {
|
|
eprintf("too many arguments\n");
|
|
exit(1);
|
|
}
|
|
args[arg++] = parse_expr();
|
|
next_token();
|
|
if (token_type == TOKEN_COMMA) {
|
|
// continue;
|
|
} else if (token_type == TOKEN_PAREN_RIGHT) {
|
|
break;
|
|
} else {
|
|
eprintf("expecting ',' or ')'\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
for (int i = 0; i < arg; ++i) {
|
|
load(0, args[i]);
|
|
printf(" mv a%d, t0\n", i);
|
|
}
|
|
load_address(0, lhs);
|
|
printf(" jalr t0\n");
|
|
printf(" mv t0, a0\n");
|
|
return materialize_t0(local_type_of(lhs));
|
|
} else {
|
|
unget_token();
|
|
return lhs;
|
|
}
|
|
}
|
|
}
|
|
|
|
int parse_prefix_expr() {
|
|
next_token();
|
|
if (token_type == TOKEN_AND) {
|
|
int reg = parse_postfix_expr();
|
|
load_address(0, reg);
|
|
return materialize_t0(local_type_of(reg) | TYPE_PTR_MASK);
|
|
} else if (token_type == TOKEN_STAR) {
|
|
int reg = parse_postfix_expr();
|
|
load(0, reg);
|
|
return indirection_of(materialize_t0(local_type_of(reg)));
|
|
} else if (token_type == TOKEN_MINUS) {
|
|
int reg = parse_postfix_expr();
|
|
return asm_r("neg", reg);
|
|
} else if (token_type == TOKEN_COMPL) {
|
|
int reg = parse_postfix_expr();
|
|
return asm_r("not", reg);
|
|
} else if (token_type == TOKEN_NOT) {
|
|
int reg = parse_postfix_expr();
|
|
return asm_r("seqz", reg);
|
|
} else if (token_type == TOKEN_INC) {
|
|
int reg = parse_postfix_expr();
|
|
load(0, reg);
|
|
printf(" addi t0, t0, %d\n", step_of(local_type_of(reg)));
|
|
store_t0(reg);
|
|
return reg;
|
|
} else if (token_type == TOKEN_DEC) {
|
|
int reg = parse_postfix_expr();
|
|
load(0, reg);
|
|
printf(" addi t0, t0, -%d\n", step_of(local_type_of(reg)));
|
|
store_t0(reg);
|
|
return reg;
|
|
} else {
|
|
unget_token();
|
|
return parse_postfix_expr();
|
|
}
|
|
}
|
|
|
|
int parse_mul_expr() {
|
|
int lhs = parse_prefix_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_STAR) {
|
|
int rhs = parse_prefix_expr();
|
|
lhs = asm_rr("mul", lhs, rhs);
|
|
} else if (token_type == TOKEN_DIV) {
|
|
int rhs = parse_prefix_expr();
|
|
lhs = asm_rr("div", lhs, rhs);
|
|
} else if (token_type == TOKEN_REM) {
|
|
int rhs = parse_prefix_expr();
|
|
lhs = asm_rr("rem", lhs, rhs);
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_add_expr() {
|
|
int lhs = parse_mul_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_ADD) {
|
|
int rhs = parse_mul_expr();
|
|
lhs = asm_add(lhs, rhs);
|
|
} else if (token_type == TOKEN_MINUS) {
|
|
int rhs = parse_mul_expr();
|
|
lhs = asm_rr("sub", lhs, rhs);
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_cmp_expr() {
|
|
int lhs = parse_add_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_LT) {
|
|
int rhs = parse_add_expr();
|
|
lhs = asm_rr("slt", lhs, rhs);
|
|
} else if (token_type == TOKEN_GT) {
|
|
int rhs = parse_add_expr();
|
|
lhs = asm_rr("sgt", lhs, rhs);
|
|
} else if (token_type == TOKEN_LE) {
|
|
int rhs = parse_add_expr();
|
|
int sgt = asm_rr("sgt", lhs, rhs);
|
|
lhs = asm_r("seqz", sgt);
|
|
} else if (token_type == TOKEN_GE) {
|
|
int rhs = parse_add_expr();
|
|
int slt = asm_rr("slt", lhs, rhs);
|
|
lhs = asm_r("seqz", slt);
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_eq_expr() {
|
|
int lhs = parse_cmp_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_EQ) {
|
|
int rhs = parse_cmp_expr();
|
|
int xor0 = asm_rr("xor", lhs, rhs);
|
|
lhs = asm_r("seqz", xor0);
|
|
} else if (token_type == TOKEN_NE) {
|
|
int rhs = parse_cmp_expr();
|
|
int xor0 = asm_rr("xor", lhs, rhs);
|
|
lhs = asm_r("snez", xor0);
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_bitwise_and_expr() {
|
|
int lhs = parse_eq_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_AND) {
|
|
int rhs = parse_eq_expr();
|
|
lhs = asm_rr("and", lhs, rhs);
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
|
|
int parse_bitwise_xor_expr() {
|
|
int lhs = parse_bitwise_and_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_XOR) {
|
|
int rhs = parse_bitwise_and_expr();
|
|
lhs = asm_rr("xor", lhs, rhs);
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_bitwise_or_expr() {
|
|
int lhs = parse_bitwise_xor_expr();
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_OR) {
|
|
int rhs = parse_bitwise_xor_expr();
|
|
lhs = asm_rr("or", lhs, rhs);
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_logical_and_expr() {
|
|
int lhs = parse_bitwise_or_expr();
|
|
int label = next_label();
|
|
int label_used = 0;
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_LAND) {
|
|
lhs = asm_r("snez", lhs);
|
|
asm_beqz(lhs, label);
|
|
int rhs = parse_bitwise_or_expr();
|
|
rhs = asm_r("snez", rhs);
|
|
lhs = asm_rr("and", lhs, rhs);
|
|
label_used = 1;
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
if (label_used) {
|
|
asm_label(label);
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_logical_or_expr() {
|
|
int lhs = parse_logical_and_expr();
|
|
int label = next_label();
|
|
int label_used = 0;
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_LOR) {
|
|
lhs = asm_r("snez", lhs);
|
|
asm_bnez(lhs, label);
|
|
int rhs = parse_logical_and_expr();
|
|
rhs = asm_r("snez", rhs);
|
|
lhs = asm_rr("or", lhs, rhs);
|
|
label_used = 1;
|
|
} else {
|
|
unget_token();
|
|
break;
|
|
}
|
|
}
|
|
if (label_used) {
|
|
asm_label(label);
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
int parse_assign_expr() {
|
|
int lhs = parse_logical_or_expr();
|
|
next_token();
|
|
if (token_type == TOKEN_ASSIGN) {
|
|
int rhs = parse_assign_expr();
|
|
load(0, rhs);
|
|
store_t0(lhs);
|
|
return lhs;
|
|
} else {
|
|
unget_token();
|
|
return lhs;
|
|
}
|
|
}
|
|
|
|
int parse_expr() {
|
|
return parse_assign_expr();
|
|
}
|
|
|
|
void parse_local_variable(int type) {
|
|
expect_token(TOKEN_ID);
|
|
int id = token_data;
|
|
next_token();
|
|
if (token_type == TOKEN_BRACKET_LEFT) {
|
|
if (type & TYPE_PTR_MASK) {
|
|
eprintf("local variable of array of pointers is not supported\n");
|
|
exit(1);
|
|
}
|
|
expect_token(TOKEN_NUMBER);
|
|
int size = token_data;
|
|
expect_token(TOKEN_BRACKET_RIGHT);
|
|
declare_local_array(id, type, size);
|
|
next_token();
|
|
} else {
|
|
declare_local(id, type);
|
|
}
|
|
if (token_type == TOKEN_SEMICOLON) {
|
|
unget_token();
|
|
return;
|
|
}
|
|
unget_token();
|
|
expect_token(TOKEN_ASSIGN);
|
|
int reg = parse_expr();
|
|
load(0, reg);
|
|
store_t0(local_table[id]);
|
|
}
|
|
|
|
void parse_stmt();
|
|
|
|
void parse_if() {
|
|
expect_token(TOKEN_PAREN_LEFT);
|
|
int cond = parse_expr();
|
|
int label1 = next_label();
|
|
int label2 = next_label();
|
|
asm_beqz(cond, label1);
|
|
reset_temp();
|
|
expect_token(TOKEN_PAREN_RIGHT);
|
|
parse_stmt();
|
|
asm_j(label2);
|
|
asm_label(label1);
|
|
next_token();
|
|
if (token_type == TOKEN_ELSE) {
|
|
parse_stmt();
|
|
} else {
|
|
unget_token();
|
|
}
|
|
asm_label(label2);
|
|
}
|
|
|
|
void parse_while() {
|
|
expect_token(TOKEN_PAREN_LEFT);
|
|
int break_label = next_label();
|
|
int cont_label = next_label();
|
|
asm_push_label(break_label, cont_label);
|
|
asm_label(cont_label);
|
|
int cond = parse_expr();
|
|
asm_beqz(cond, break_label);
|
|
reset_temp();
|
|
expect_token(TOKEN_PAREN_RIGHT);
|
|
parse_stmt();
|
|
asm_j(cont_label);
|
|
asm_label(break_label);
|
|
asm_pop_label();
|
|
}
|
|
|
|
void parse_for() {
|
|
expect_token(TOKEN_PAREN_LEFT);
|
|
int cont_label = next_label();
|
|
int break_label = next_label();
|
|
int cond_label = next_label();
|
|
int body_label = next_label();
|
|
asm_push_label(break_label, cont_label);
|
|
parse_stmt(); // init
|
|
asm_label(cond_label);
|
|
int cond = parse_expr();
|
|
asm_beqz(cond, break_label);
|
|
asm_j(body_label);
|
|
reset_temp();
|
|
expect_token(TOKEN_SEMICOLON);
|
|
asm_label(cont_label);
|
|
parse_expr(); // update
|
|
reset_temp();
|
|
expect_token(TOKEN_PAREN_RIGHT);
|
|
asm_j(cond_label);
|
|
asm_label(body_label);
|
|
parse_stmt(); // body
|
|
asm_j(cont_label);
|
|
asm_label(break_label);
|
|
asm_pop_label();
|
|
}
|
|
|
|
void parse_stmt() {
|
|
next_token();
|
|
int decl_type;
|
|
if (token_type == TOKEN_IF) {
|
|
parse_if();
|
|
return;
|
|
} else if (token_type == TOKEN_WHILE) {
|
|
parse_while();
|
|
return;
|
|
} else if (token_type == TOKEN_FOR) {
|
|
parse_for();
|
|
return;
|
|
} else if (token_type == TOKEN_BRACE_LEFT) {
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_BRACE_RIGHT) {
|
|
break;
|
|
}
|
|
unget_token();
|
|
parse_stmt();
|
|
}
|
|
return;
|
|
} else if (token_type == TOKEN_RETURN) {
|
|
next_token();
|
|
if (token_type == TOKEN_SEMICOLON) {
|
|
asm_j(epilog_label);
|
|
return;
|
|
}
|
|
unget_token();
|
|
int reg = parse_expr();
|
|
load(0, reg);
|
|
printf(" mv a0, t0\n");
|
|
asm_j(epilog_label);
|
|
} else if (token_type == TOKEN_BREAK) {
|
|
int label = asm_get_break_label();
|
|
asm_j(label);
|
|
} else if (token_type == TOKEN_CONTINUE) {
|
|
int label = asm_get_cont_label();
|
|
asm_j(label);
|
|
} else if (token_type == TOKEN_SEMICOLON) {
|
|
unget_token();
|
|
} else if ((decl_type = parse_type()) >= 0) {
|
|
parse_local_variable(decl_type);
|
|
} else {
|
|
unget_token();
|
|
parse_expr();
|
|
}
|
|
expect_token(TOKEN_SEMICOLON);
|
|
reset_temp();
|
|
}
|
|
|
|
void parse_function(const char* name) {
|
|
reset_local();
|
|
int arg = 0;
|
|
int args[8];
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_PAREN_RIGHT) {
|
|
break;
|
|
}
|
|
if (token_type == TOKEN_ELLIPSIS) {
|
|
expect_token(TOKEN_PAREN_RIGHT);
|
|
break;
|
|
}
|
|
int decl_type = parse_type();
|
|
expect_token(TOKEN_ID);
|
|
args[arg++] = declare_local(token_data, decl_type);
|
|
next_token();
|
|
if (token_type == TOKEN_BRACKET_LEFT) {
|
|
expect_token(TOKEN_BRACKET_RIGHT);
|
|
next_token();
|
|
}
|
|
if (token_type == TOKEN_COMMA) {
|
|
// continue;
|
|
} else if (token_type == TOKEN_PAREN_RIGHT) {
|
|
break;
|
|
} else {
|
|
eprintf("expecting ',' or ')'\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
next_token();
|
|
if (token_type == TOKEN_SEMICOLON) {
|
|
return;
|
|
}
|
|
unget_token();
|
|
expect_token(TOKEN_BRACE_LEFT);
|
|
printf(".text\n");
|
|
printf(".global %s\n", name);
|
|
printf("%s:\n", name);
|
|
int label = next_label();
|
|
int prolog_label = next_label();
|
|
epilog_label = next_label();
|
|
asm_j(prolog_label);
|
|
asm_label(label);
|
|
while (1) {
|
|
next_token();
|
|
if (token_type == TOKEN_BRACE_RIGHT) {
|
|
break;
|
|
}
|
|
unget_token();
|
|
parse_stmt();
|
|
}
|
|
asm_j(epilog_label);
|
|
int shift = max_local_id * 8;
|
|
if (shift % 16 != 0) {
|
|
shift = shift + 8;
|
|
}
|
|
// prolog
|
|
asm_label(prolog_label);
|
|
asm_addi("sp", "sp", -shift);
|
|
asm_sd("ra", shift - 8, "sp");
|
|
asm_sd("fp", shift - 16, "sp");
|
|
asm_addi("fp", "sp", shift);
|
|
for (int i = 0; i < arg; ++i) {
|
|
printf(" mv t0, a%d\n", i);
|
|
store_t0(args[i]);
|
|
}
|
|
asm_j(label);
|
|
// epilog
|
|
asm_label(epilog_label);
|
|
asm_ld("fp", shift - 16, "sp");
|
|
asm_ld("ra", shift - 8, "sp");
|
|
asm_addi("sp", "sp", shift);
|
|
printf(" ret\n");
|
|
}
|
|
|
|
void parse_global_variable(int id, const char* name, int type) {
|
|
if (type & TYPE_PTR_MASK) {
|
|
eprintf("global variable of pointer is not supported\n");
|
|
exit(1);
|
|
}
|
|
printf(".data\n");
|
|
printf(".globl %s\n", name);
|
|
printf(".align 5\n");
|
|
printf("%s:\n", name);
|
|
if (token_type == TOKEN_ASSIGN) {
|
|
expect_token(TOKEN_NUMBER);
|
|
printf(" .word %d\n", token_data);
|
|
} else if (token_type == TOKEN_BRACKET_LEFT) {
|
|
expect_token(TOKEN_NUMBER);
|
|
int size = token_data;
|
|
expect_token(TOKEN_BRACKET_RIGHT);
|
|
int array_size = 4 * size;
|
|
if (type == TYPE_CHAR) {
|
|
array_size = size;
|
|
}
|
|
printf(" .zero %d\n", array_size);
|
|
declare_global(id, MARKER_ARRAY, type);
|
|
} else {
|
|
printf(" .zero %d\n", 4);
|
|
unget_token();
|
|
}
|
|
expect_token(TOKEN_SEMICOLON);
|
|
}
|
|
|
|
void parse_decl(int type) {
|
|
expect_token(TOKEN_ID);
|
|
int id = token_data;
|
|
char* name = id_table + id_lut[id];
|
|
next_token();
|
|
if (token_type == TOKEN_PAREN_LEFT) {
|
|
declare_global(id, MARKER_FUNCTION, type);
|
|
parse_function(name);
|
|
} else {
|
|
declare_global(id, MARKER_SCALAR, type);
|
|
parse_global_variable(id, name, type);
|
|
}
|
|
}
|
|
|
|
void parse_top_level() {
|
|
next_token();
|
|
int decl_type;
|
|
if (token_type == TOKEN_EOF) {
|
|
return;
|
|
} else if ((decl_type = parse_type()) >= 0) {
|
|
parse_decl(decl_type);
|
|
} else {
|
|
eprintf("unexpected token: %d\n", token_type);
|
|
exit(1);
|
|
}
|
|
parse_top_level();
|
|
}
|
|
|
|
void dump_string_table() {
|
|
printf(".data\n");
|
|
for (int i = 0; i < string_lut_size; ++i) {
|
|
printf(".LC%d: .string \"", i);
|
|
int offset = 0;
|
|
int ch;
|
|
while ((ch = string_table[string_lut[i] + offset]) != 0) {
|
|
if (ch == '\n') {
|
|
printf("\\n");
|
|
} else if (ch == '\t') {
|
|
printf("\\t");
|
|
} else if (ch == '\r') {
|
|
printf("\\r");
|
|
} else if (ch == '\0') {
|
|
printf("\\0");
|
|
} else if (ch == '\\') {
|
|
printf("\\\\");
|
|
} else if (ch == '\'') {
|
|
printf("\\'");
|
|
} else if (ch == '\"') {
|
|
printf("\\\"");
|
|
} else {
|
|
printf("%c", ch);
|
|
}
|
|
offset++;
|
|
}
|
|
printf("\"\n");
|
|
}
|
|
}
|
|
|
|
int main() {
|
|
parse_top_level();
|
|
dump_string_table();
|
|
return 0;
|
|
}
|