diff --git a/boot-lib.c b/boot-lib.c new file mode 100644 index 0000000..6a53fe4 --- /dev/null +++ b/boot-lib.c @@ -0,0 +1,32 @@ +#include +#include + + +int eprintf(const char format[], ...) { + va_list args; + va_start(args, format); + int ret = vfprintf(stderr, format, args); + va_end(args); + return ret; +} + +void ungetchar(int ch) { + ungetc(ch, stdin); +} + +int eof() { + return feof(stdin); +} + +int CA_get(char array[], int index) { + return array[index]; +} + +void CA_set(char array[], int index, int value) { + array[index] = value; +} + +// this may be unnecessary +char* CA_offset(char array[], int offset) { + return array + offset; +} diff --git a/boot-lib.h b/boot-lib.h new file mode 100644 index 0000000..3010a27 --- /dev/null +++ b/boot-lib.h @@ -0,0 +1,18 @@ +#include +#include +#include + +// std +int printf(const char* format, ...); +int getchar(); +int putchar(int ch); +int strcmp(const char* s1, const char* s2); +void exit(int status); + +// ext +void ungetchar(int ch); +int eof(); +int CA_get(char array[], int index); +void CA_set(char array[], int index, int value); +char* CA_offset(char array[], int offset); +int eprintf(const char* format, ...); diff --git a/boot.c b/boot.c new file mode 100644 index 0000000..c35e623 --- /dev/null +++ b/boot.c @@ -0,0 +1,1247 @@ +#include "boot-lib.h" + +// lexer + +int is_digit(int ch) { + return '0' <= ch && ch <= '9'; +} + +int is_id_start(int ch) { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'; +} + +int is_id_cont(int ch) { + return is_id_start(ch) || is_digit(ch); +} + +int token_state; +int token_type; +int token_data; + +const int TOKEN_EOF = 0; +const int TOKEN_SEMICOLON = 1; +const int TOKEN_ADD = 2; +const int TOKEN_MINUS = 3; +const int TOKEN_STAR = 4; +const int TOKEN_DIV = 5; +const int TOKEN_REM = 6; +const int TOKEN_ASSIGN = 7; +const int TOKEN_COMMA = 8; +const int TOKEN_DOT = 9; +const int TOKEN_LSHIFT = 10; // unused +const int TOKEN_RSHIFT = 11; // unused +const int TOKEN_AND = 12; +const int TOKEN_OR = 13; +const int TOKEN_XOR = 14; +const int TOKEN_COMPL = 15; +const int TOKEN_NOT = 16; +const int TOKEN_LAND = 17; +const int TOKEN_LOR = 18; +const int TOKEN_ELLIPSIS = 19; +const int TOKEN_INC = 20; +const int TOKEN_DEC = 21; + +const int TOKEN_EQ = 40; +const int TOKEN_NE = 41; +const int TOKEN_LT = 42; +const int TOKEN_GT = 43; +const int TOKEN_LE = 44; +const int TOKEN_GE = 45; + +const int TOKEN_PAREN_LEFT = 50; +const int TOKEN_PAREN_RIGHT = 51; +const int TOKEN_BRACKET_LEFT = 52; +const int TOKEN_BRACKET_RIGHT = 53; +const int TOKEN_BRACE_LEFT = 54; +const int TOKEN_BRACE_RIGHT = 55; + +const int TOKEN_NUMBER = 100; +const int TOKEN_ID = 101; +const int TOKEN_INT = 102; +const int TOKEN_IF = 103; +const int TOKEN_ELSE = 104; +const int TOKEN_WHILE = 105; +const int TOKEN_BREAK = 106; +const int TOKEN_CONTINUE = 107; +const int TOKEN_RETURN = 108; +const int TOKEN_VOID = 109; +const int TOKEN_CONST = 110; +const int TOKEN_CHAR = 111; +const int TOKEN_FOR = 112; +const int TOKEN_STRING = 150; + +int parse_int(int ch) { + int num = ch - '0'; + while (!eof() && is_digit(ch = getchar())) { + num = num * 10; + num = num + ch - '0'; + } + ungetchar(ch); + return num; +} + +int get_escaped_char() { + int ch = getchar(); + if (ch == 'n') { + ch = '\n'; + } else if (ch == 't') { + ch = '\t'; + } else if (ch == 'r') { + ch = '\r'; + } else if (ch == '0') { + ch = '\0'; + } else if (ch == '\\') { + ch = '\\'; + } else if (ch == '\'') { + ch = '\''; + } else if (ch == '\"') { + ch = '\"'; + } else { + eprintf("unexpected escaped character: %c\n", ch); + exit(1); + } + return ch; +} + + +char string_table[65536]; +int string_offset; +int string_lut[4096]; +int string_lut_size; +int parse_string() { + int offset = string_offset; + char ch; + while (!eof() && (ch = getchar()) != '"') { + if (ch == '\\') { + ch = get_escaped_char(); + } + CA_set(string_table, string_offset++, ch); + } + CA_set(string_table, string_offset++, 0); + string_lut[string_lut_size] = offset; + return string_lut_size++; +} + +char id_table[65536]; +int id_offset; +int id_lut[4096]; +int id_lut_size; +int parse_id(int ch) { + int offset = id_offset; + CA_set(id_table, id_offset++, ch); + while (!eof() && is_id_cont(ch = getchar())) { + CA_set(id_table, id_offset++, ch); + } + ungetchar(ch); + CA_set(id_table, id_offset++, 0); + id_lut[id_lut_size] = offset; + return id_lut_size++; +} + +void rewind_id(int new_data) { + id_offset = id_lut[token_data]; + token_data = new_data; + --id_lut_size; +} + +void dedup_id() { + char* latest = CA_offset(id_table, id_lut[id_lut_size - 1]); + for (int i = 0; i < id_lut_size - 1; i++) { + char* candidate = CA_offset(id_table, id_lut[i]); + if (!strcmp(candidate, latest)) { + rewind_id(i); + return; + } + } +} + +void parse_id_like(int ch) { + token_type = TOKEN_ID; + token_data = parse_id(ch); + char* term = CA_offset(id_table, id_lut[token_data]); + if (!strcmp(term, "int")) { + token_type = TOKEN_INT; + } else if (!strcmp(term, "if")) { + token_type = TOKEN_IF; + } else if (!strcmp(term, "else")) { + token_type = TOKEN_ELSE; + } else if (!strcmp(term, "while")) { + token_type = TOKEN_WHILE; + } else if (!strcmp(term, "break")) { + token_type = TOKEN_BREAK; + } else if (!strcmp(term, "continue")) { + token_type = TOKEN_CONTINUE; + } else if (!strcmp(term, "return")) { + token_type = TOKEN_RETURN; + } else if (!strcmp(term, "void")) { + token_type = TOKEN_VOID; + } else if (!strcmp(term, "const")) { + token_type = TOKEN_CONST; + } else if (!strcmp(term, "char")) { + token_type = TOKEN_CHAR; + } else if (!strcmp(term, "for")) { + token_type = TOKEN_FOR; + } + if (token_type != TOKEN_ID) { + rewind_id(0); + } else { + dedup_id(); + } +} + +void unget_token() { + token_state = 1; +} + +void next_token() { + if (token_state) { + token_state = 0; + return; + } + int ch = getchar(); + while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { + ch = getchar(); + } + if (ch == -1) { + token_type = TOKEN_EOF; + } else if (ch == '(') { + token_type = TOKEN_PAREN_LEFT; + } else if (ch == ')') { + token_type = TOKEN_PAREN_RIGHT; + } else if (ch == '[') { + token_type = TOKEN_BRACKET_LEFT; + } else if (ch == ']') { + token_type = TOKEN_BRACKET_RIGHT; + } else if (ch == '{') { + token_type = TOKEN_BRACE_LEFT; + } else if (ch == '}') { + token_type = TOKEN_BRACE_RIGHT; + } else if (ch == '+') { + int ch2 = getchar(); + if (ch2 == '+') { + token_type = TOKEN_INC; + } else { + ungetchar(ch2); + token_type = TOKEN_ADD; + } + } else if (ch == '-') { + int ch2 = getchar(); + if (ch2 == '-') { + token_type = TOKEN_DEC; + } else { + ungetchar(ch2); + token_type = TOKEN_MINUS; + } + } else if (ch == '*') { + token_type = TOKEN_STAR; + } else if (ch == '/') { + int ch2 = getchar(); + if (ch2 == '/') { + while ((ch = getchar()) != '\n'); + next_token(); + return; + } else if (ch2 == '*') { + while (1) { + ch = getchar(); + if (ch == '*') { + ch = getchar(); + if (ch == '/') { + break; + } + } + } + next_token(); + return; + } else { + ungetchar(ch2); + token_type = TOKEN_DIV; + } + } else if (ch == '%') { + token_type = TOKEN_REM; + } else if (ch == ';') { + token_type = TOKEN_SEMICOLON; + } else if (ch == ',') { + token_type = TOKEN_COMMA; + } else if (ch == '<') { + int ch2 = getchar(); + if (ch2 == '=') { + token_type = TOKEN_LE; + } else if (ch2 == '<') { + token_type = TOKEN_LSHIFT; + } else { + ungetchar(ch2); + token_type = TOKEN_LT; + } + } else if (ch == '>') { + int ch2 = getchar(); + if (ch2 == '=') { + token_type = TOKEN_GE; + } else if (ch2 == '>') { + token_type = TOKEN_RSHIFT; + } else { + ungetchar(ch2); + token_type = TOKEN_GT; + } + } else if (ch == '=') { + int ch2 = getchar(); + if (ch2 == '=') { + token_type = TOKEN_EQ; + } else { + ungetchar(ch2); + token_type = TOKEN_ASSIGN; + } + } else if (ch == '!') { + int ch2 = getchar(); + if (ch2 == '=') { + token_type = TOKEN_NE; + } else { + ungetchar(ch2); + token_type = TOKEN_NOT; + } + } else if (ch == '&') { + int ch2 = getchar(); + if (ch2 == '&') { + token_type = TOKEN_LAND; + } else { + ungetchar(ch2); + token_type = TOKEN_AND; + } + } else if (ch == '|') { + int ch2 = getchar(); + if (ch2 == '|') { + token_type = TOKEN_LOR; + } else { + ungetchar(ch2); + token_type = TOKEN_OR; + } + } else if (ch == '^') { + token_type = TOKEN_XOR; + } else if (ch == '~') { + token_type = TOKEN_COMPL; + } else if (ch == '\'') { + token_type = TOKEN_NUMBER; + token_data = getchar(); + if (token_data == '\\') { + token_data = get_escaped_char(); + } + if (getchar() != '\'') { + eprintf("expecting '\n"); + exit(1); + } + } else if (ch == '"') { + token_type = TOKEN_STRING; + token_data = parse_string(); + } else if (ch == '.') { + int ch2 = getchar(); + if (ch2 == '.') { + int ch3 = getchar(); + if (ch3 == '.') { + token_type = TOKEN_ELLIPSIS; + } else { + eprintf("unexpected character: %c\n", ch3); + exit(1); + } + } else { + eprintf("unexpected character: %c\n", ch2); + exit(1); + } + } else if (is_digit(ch)) { + token_type = TOKEN_NUMBER; + token_data = parse_int(ch); + } else if (is_id_start(ch)) { + parse_id_like(ch); + } else { + eprintf("unexpected character: %c(%d)\n", ch, ch); + exit(1); + } + eprintf("token: %d\n", token_type); + if (token_type == TOKEN_ID) { + const char* name = CA_offset(id_table, id_lut[token_data]); + eprintf(" id: %s\n", name); + } else if (token_type == TOKEN_NUMBER) { + eprintf(" number: %d\n", token_data); + } +} + +void expect_token(int expected_type) { + next_token(); + if (token_type != expected_type) { + eprintf("unexpected token: %d, should be %d\n", token_type, expected_type); + exit(1); + } +} + +const int TYPE_VOID = 0; +const int TYPE_INT = 1; +const int TYPE_CHAR = 2; +const int TYPE_VOID_PTR = 16; +const int TYPE_INT_PTR = 17; +const int TYPE_CHAR_PTR = 18; + +int parse_type() { + if (token_type == TOKEN_CONST) { + next_token(); + } + if (token_type == TOKEN_INT) { + next_token(); + if (token_type == TOKEN_STAR) { + return TYPE_INT_PTR; + } + unget_token(); + return TYPE_INT; + } else if (token_type == TOKEN_CHAR) { + next_token(); + if (token_type == TOKEN_STAR) { + return TYPE_CHAR_PTR; + } + unget_token(); + return TYPE_CHAR; + } else if (token_type == TOKEN_VOID) { + next_token(); + if (token_type == TOKEN_STAR) { + return TYPE_VOID_PTR; + } + unget_token(); + return TYPE_VOID; + } else { + return -1; + } +} + +// asm + + +int epilog_label; + +int local_table[4096]; // id -> local id +int next_local_id = 2; +int max_local_id = 2; + +const int NONE = 0; +const int SCALAR = 1; +const int ARRAY = 2; + +int local_marker[4096]; +int global_marker[4096]; + + +void reset_local() { + next_local_id = 2; + max_local_id = 2; + for (int i = 0; i < 4096; ++i) { + local_table[i] = 0; + local_marker[i] = NONE; + } +} + +void reset_temp() { + while (next_local_id > 2 && !local_marker[next_local_id - 1]) { + --next_local_id; + } +} + +int next_reg() { + int reg = next_local_id++; + if (next_local_id > max_local_id) { + max_local_id = next_local_id; + } + return reg; +} + +int declare_local(int id) { + if (local_table[id] != 0) return local_table[id]; + int reg = next_reg(); + local_marker[reg] = SCALAR; + return local_table[id] = reg; +} + +int declare_local_array(int id, int size) { + if (local_table[id] != 0) return local_table[id]; + int reg; + for (int i = 0; i < size; ++i) local_marker[reg = next_reg()] = ARRAY; + return local_table[id] = reg; +} + +void declare_global(int id, int marker) { + global_marker[id] = marker; +} + +const int INDIRECTION = 1048576; // 2**20 +const int GLOBAL = 2097152; // 2**21 + +void load_address(int rd, int id) { + if (id & GLOBAL) { + id = id & ~GLOBAL; + const char* name = CA_offset(id_table, id_lut[id]); + printf(" la t%d, %s # id: %d\n", rd, name, id); + } else { + if (id & INDIRECTION) { + id = id & ~INDIRECTION; + int offset = -id * 8 - 8; + printf(" ld t%d, %d(fp) # indirection\n", rd, offset); + } else { + int offset = -id * 8 - 8; + printf(" addi t%d, fp, %d\n", rd, offset); + } + } +} + +void load(int rd, int id) { + load_address(rd, id); + printf(" ld t%d, 0(t%d)\n", rd, rd); +} + +void store(int rs1, int rs2) { + printf(" sd t%d, 0(t%d)\n", rs1, rs2); +} + +int lookup(int id) { + int local = local_table[id]; + if (local) { + if (local_marker[local] == ARRAY) { + int reg = next_reg(); + load_address(0, local); + load_address(1, reg); + store(0, 1); + return reg; + } + return local; + } + if (global_marker[id]) { + if (global_marker[id] == ARRAY) { + int reg = next_reg(); + load_address(0, id | GLOBAL); + load_address(1, reg); + store(0, 1); + return reg; + } + return id | GLOBAL; + } + const char* name = CA_offset(id_table, id_lut[id]); + eprintf("unresolved identifier: %s\n", name); + exit(1); +} + + +int next_label_id = 0; +int next_label() { + return next_label_id++; +} + +int asm_label(int label) { + printf("L%d:\n", label); + return label; +} + +int is_not_reusable(int rs1) { + return (rs1 & GLOBAL) || (rs1 & INDIRECTION) || local_marker[rs1]; +} + +int asm_r(const char* op, int rs1) { + load(0, rs1); + printf(" %s t0, t0\n", op); + int rd = rs1; + if (is_not_reusable(rs1)) rd = next_reg(); + load_address(1, rd); + store(0, 1); + return rd; +} + +int asm_rr(const char* op, int rs1, int rs2) { + load(0, rs1); + load(1, rs2); + printf(" %s t0, t0, t1\n", op); + int rd = rs1; + if (is_not_reusable(rs1)) rd = rs2; + if (is_not_reusable(rs2)) rd = next_reg(); + load_address(1, rd); + store(0, 1); + return rd; +} + +int asm_li(int imm) { + printf(" li t0, %d\n", imm); + int rd = next_reg(); + load_address(1, rd); + store(0, 1); + return rd; +} + +void asm_beqz(int rs1, int label) { + load(0, rs1); + printf(" beqz t0, L%d\n", label); +} + +void asm_bnez(int rs1, int label) { + load(0, rs1); + printf(" bnez t0, L%d\n", label); +} + +void asm_j(int label) { + printf(" j L%d\n", label); +} + +int break_label_stack[4096]; +int cont_label_stack[4096]; +int break_label_stack_size; +int cont_label_stack_size; + +int asm_get_break_label() { + return break_label_stack[break_label_stack_size - 1]; +} + +int asm_get_cont_label() { + return cont_label_stack[cont_label_stack_size - 1]; +} + +void asm_push_label(int break_label, int cont_label) { + break_label_stack[break_label_stack_size++] = break_label; + cont_label_stack[cont_label_stack_size++] = cont_label; +} + +void asm_pop_label() { + --break_label_stack_size; + --cont_label_stack_size; +} + +// parser +int parse_expr(); + +int parse_primary_expr() { + next_token(); + if (token_type == TOKEN_EOF) { + exit(1); + } else if (token_type == TOKEN_NUMBER) { + return asm_li(token_data); + } else if (token_type == TOKEN_ID) { + return lookup(token_data); + } else if (token_type == TOKEN_STRING) { + int reg = next_reg(); + printf(" la t0, .LC%d\n", token_data); + load_address(1, reg); + store(0, 1); + return reg; + } else if (token_type == TOKEN_PAREN_LEFT) { + int reg = parse_expr(); + expect_token(TOKEN_PAREN_RIGHT); + return reg; + } else { + eprintf("unexpected primary token: %d\n", token_type); + exit(1); + } +} + +int parse_postfix_expr() { + int lhs = parse_primary_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_INC) { + int reg = next_reg(); + load(0, lhs); + load_address(1, reg); + store(0, 1); + printf(" addi t0, t0, 1\n"); + load_address(1, lhs); + store(0, 1); + return reg; + } else if (token_type == TOKEN_DEC) { + int reg = next_reg(); + load(0, lhs); + load_address(1, reg); + store(0, 1); + printf(" addi t0, t0, -1\n"); + load_address(1, lhs); + store(0, 1); + return reg; + } else if (token_type == TOKEN_BRACKET_LEFT) { + int reg = next_reg(); + int rhs = parse_expr(); + expect_token(TOKEN_BRACKET_RIGHT); + load(0, rhs); + load(1, lhs); + printf(" slli t0, t0, 3\n"); + printf(" add t0, t0, t1\n"); + load_address(1, reg); + store(0, 1); + return reg | INDIRECTION; + } else if (token_type == TOKEN_PAREN_LEFT) { + int arg = 0; + int args[8]; + while (1) { + next_token(); + if (token_type == TOKEN_PAREN_RIGHT) { + break; + } + unget_token(); + if (arg >= 8) { + eprintf("too many arguments\n"); + exit(1); + } + args[arg++] = parse_expr(); + next_token(); + if (token_type == TOKEN_COMMA) { + // continue; + } else if (token_type == TOKEN_PAREN_RIGHT) { + break; + } else { + eprintf("expecting ',' or ')'\n"); + exit(1); + } + } + for (int i = 0; i < arg; ++i) { + load(0, args[i]); + printf(" mv a%d, t0\n", i); + } + load_address(0, lhs); + printf(" jalr t0\n"); + printf(" mv t0, a0\n"); + int reg = next_reg(); + load_address(1, reg); + store(0, 1); + return reg; + } else { + unget_token(); + return lhs; + } + } +} + +int parse_prefix_expr() { + next_token(); + if (token_type == TOKEN_AND) { + int id = parse_postfix_expr(); + int reg = next_reg(); + load_address(0, id); + load_address(1, reg); + store(0, 1); + return reg; + } else if (token_type == TOKEN_STAR) { + int reg = parse_postfix_expr(); + return reg | INDIRECTION; + } else if (token_type == TOKEN_MINUS) { + int reg = parse_postfix_expr(); + return asm_r("neg", reg); + } else if (token_type == TOKEN_COMPL) { + int reg = parse_postfix_expr(); + return asm_r("not", reg); + } else if (token_type == TOKEN_NOT) { + int reg = parse_postfix_expr(); + return asm_r("seqz", reg); + } else if (token_type == TOKEN_INC) { + int reg = parse_postfix_expr(); + load(0, reg); + printf(" addi t0, t0, 1\n"); + load_address(1, reg); + store(0, 1); + return reg; + } else if (token_type == TOKEN_DEC) { + int reg = parse_postfix_expr(); + load(0, reg); + printf(" addi t0, t0, -1\n"); + load_address(1, reg); + store(0, 1); + return reg; + } else { + unget_token(); + return parse_postfix_expr(); + } +} + +int parse_mul_expr() { + int lhs = parse_prefix_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_STAR) { + int rhs = parse_prefix_expr(); + lhs = asm_rr("mul", lhs, rhs); + } else if (token_type == TOKEN_DIV) { + int rhs = parse_prefix_expr(); + lhs = asm_rr("div", lhs, rhs); + } else if (token_type == TOKEN_REM) { + int rhs = parse_prefix_expr(); + lhs = asm_rr("rem", lhs, rhs); + } else { + unget_token(); + break; + } + } + return lhs; +} + +int parse_add_expr() { + int lhs = parse_mul_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_ADD) { + int rhs = parse_mul_expr(); + lhs = asm_rr("add", lhs, rhs); + } else if (token_type == TOKEN_MINUS) { + int rhs = parse_mul_expr(); + lhs = asm_rr("sub", lhs, rhs); + } else { + unget_token(); + break; + } + } + return lhs; +} + +int parse_cmp_expr() { + int lhs = parse_add_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_LT) { + int rhs = parse_add_expr(); + lhs = asm_rr("slt", lhs, rhs); + } else if (token_type == TOKEN_GT) { + int rhs = parse_add_expr(); + lhs = asm_rr("sgt", lhs, rhs); + } else if (token_type == TOKEN_LE) { + int rhs = parse_add_expr(); + int sgt = asm_rr("sgt", lhs, rhs); + lhs = asm_r("seqz", sgt); + } else if (token_type == TOKEN_GE) { + int rhs = parse_add_expr(); + int slt = asm_rr("slt", lhs, rhs); + lhs = asm_r("seqz", slt); + } else { + unget_token(); + break; + } + } + return lhs; +} + +int parse_eq_expr() { + int lhs = parse_cmp_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_EQ) { + int rhs = parse_cmp_expr(); + int xor0 = asm_rr("xor", lhs, rhs); + lhs = asm_r("seqz", xor0); + } else if (token_type == TOKEN_NE) { + int rhs = parse_cmp_expr(); + int xor0 = asm_rr("xor", lhs, rhs); + lhs = asm_r("snez", xor0); + } else { + unget_token(); + break; + } + } + return lhs; +} + +int parse_bitwise_and_expr() { + int lhs = parse_eq_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_AND) { + int rhs = parse_eq_expr(); + lhs = asm_rr("and", lhs, rhs); + } else { + unget_token(); + break; + } + } + return lhs; +} + + +int parse_bitwise_xor_expr() { + int lhs = parse_bitwise_and_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_XOR) { + int rhs = parse_bitwise_and_expr(); + lhs = asm_rr("xor", lhs, rhs); + } else { + unget_token(); + break; + } + } + return lhs; +} + +int parse_bitwise_or_expr() { + int lhs = parse_bitwise_xor_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_OR) { + int rhs = parse_bitwise_xor_expr(); + lhs = asm_rr("or", lhs, rhs); + } else { + unget_token(); + break; + } + } + return lhs; +} + +int parse_logical_and_expr() { + int lhs = parse_bitwise_or_expr(); + int label = next_label(); + int label_used = 0; + while (1) { + next_token(); + if (token_type == TOKEN_LAND) { + lhs = asm_r("snez", lhs); + asm_beqz(lhs, label); + int rhs = parse_bitwise_or_expr(); + rhs = asm_r("snez", rhs); + lhs = asm_rr("and", lhs, rhs); + label_used = 1; + } else { + unget_token(); + break; + } + } + if (label_used) { + asm_label(label); + } + return lhs; +} + +int parse_logical_or_expr() { + int lhs = parse_logical_and_expr(); + int label = next_label(); + int label_used = 0; + while (1) { + next_token(); + if (token_type == TOKEN_LOR) { + lhs = asm_r("snez", lhs); + asm_bnez(lhs, label); + int rhs = parse_logical_and_expr(); + rhs = asm_r("snez", rhs); + lhs = asm_rr("or", lhs, rhs); + label_used = 1; + } else { + unget_token(); + break; + } + } + if (label_used) { + asm_label(label); + } + return lhs; +} + +int parse_assign_expr() { + int lhs = parse_logical_or_expr(); + next_token(); + if (token_type == TOKEN_ASSIGN) { + int rhs = parse_assign_expr(); + load(0, rhs); + load_address(1, lhs); + store(0, 1); + return lhs; + } else { + unget_token(); + return lhs; + } +} + +int parse_expr() { + return parse_assign_expr(); +} + +void parse_local_variable() { + expect_token(TOKEN_ID); + int id = token_data; + next_token(); + if (token_type == TOKEN_BRACKET_LEFT) { + expect_token(TOKEN_NUMBER); + int size = token_data; + expect_token(TOKEN_BRACKET_RIGHT); + declare_local_array(id, size); + next_token(); + } else { + declare_local(id); + } + if (token_type == TOKEN_SEMICOLON) { + unget_token(); + return; + } + unget_token(); + expect_token(TOKEN_ASSIGN); + int reg = parse_expr(); + load(0, reg); + load_address(1, local_table[id]); + store(0, 1); +} + +void parse_stmt(); + +void parse_if() { + expect_token(TOKEN_PAREN_LEFT); + int cond = parse_expr(); + int label1 = next_label(); + int label2 = next_label(); + asm_beqz(cond, label1); + reset_temp(); + expect_token(TOKEN_PAREN_RIGHT); + parse_stmt(); + asm_j(label2); + asm_label(label1); + next_token(); + if (token_type == TOKEN_ELSE) { + parse_stmt(); + } else { + unget_token(); + } + asm_label(label2); +} + +void parse_while() { + expect_token(TOKEN_PAREN_LEFT); + int break_label = next_label(); + int cont_label = next_label(); + asm_push_label(break_label, cont_label); + asm_label(cont_label); + int cond = parse_expr(); + asm_beqz(cond, break_label); + reset_temp(); + expect_token(TOKEN_PAREN_RIGHT); + parse_stmt(); + asm_j(cont_label); + asm_label(break_label); + asm_pop_label(); +} + +void parse_for() { + expect_token(TOKEN_PAREN_LEFT); + int cont_label = next_label(); + int break_label = next_label(); + int cond_label = next_label(); + int body_label = next_label(); + asm_push_label(break_label, cont_label); + parse_stmt(); // init + asm_label(cond_label); + int cond = parse_expr(); + asm_beqz(cond, break_label); + asm_j(body_label); + reset_temp(); + expect_token(TOKEN_SEMICOLON); + asm_label(cont_label); + parse_expr(); // update + reset_temp(); + expect_token(TOKEN_PAREN_RIGHT); + asm_j(cond_label); + asm_label(body_label); + parse_stmt(); // body + asm_j(cont_label); + asm_label(break_label); + asm_pop_label(); +} + +void parse_stmt() { + next_token(); + if (token_type == TOKEN_IF) { + parse_if(); + return; + } else if (token_type == TOKEN_WHILE) { + parse_while(); + return; + } else if (token_type == TOKEN_FOR) { + parse_for(); + return; + } else if (token_type == TOKEN_BRACE_LEFT) { + while (1) { + next_token(); + if (token_type == TOKEN_BRACE_RIGHT) { + break; + } + unget_token(); + parse_stmt(); + } + return; + } else if (token_type == TOKEN_RETURN) { + next_token(); + if (token_type == TOKEN_SEMICOLON) { + asm_j(epilog_label); + return; + } + unget_token(); + int reg = parse_expr(); + load(0, reg); + printf(" mv a0, t0\n"); + asm_j(epilog_label); + } else if (token_type == TOKEN_BREAK) { + int label = asm_get_break_label(); + asm_j(label); + } else if (token_type == TOKEN_CONTINUE) { + int label = asm_get_cont_label(); + asm_j(label); + } else if (parse_type() >= 0) { + parse_local_variable(); + } else if (token_type == TOKEN_SEMICOLON) { + unget_token(); + } else { + unget_token(); + parse_expr(); + } + expect_token(TOKEN_SEMICOLON); + reset_temp(); +} + +void parse_function(const char* name) { + reset_local(); + int arg = 0; + int args[8]; + while (1) { + next_token(); + if (token_type == TOKEN_PAREN_RIGHT) { + break; + } + if (token_type == TOKEN_ELLIPSIS) { + expect_token(TOKEN_PAREN_RIGHT); + break; + } + parse_type(); + expect_token(TOKEN_ID); + args[arg++] = declare_local(token_data); + next_token(); + if (token_type == TOKEN_BRACKET_LEFT) { + expect_token(TOKEN_BRACKET_RIGHT); + next_token(); + } + if (token_type == TOKEN_COMMA) { + // continue; + } else if (token_type == TOKEN_PAREN_RIGHT) { + break; + } else { + eprintf("expecting ',' or ')'\n"); + exit(1); + } + } + next_token(); + if (token_type == TOKEN_SEMICOLON) { + return; + } + unget_token(); + expect_token(TOKEN_BRACE_LEFT); + printf(".text\n"); + printf(".global %s\n", name); + printf("%s:\n", name); + int label = next_label(); + int prolog_label = next_label(); + epilog_label = next_label(); + asm_j(prolog_label); + asm_label(label); + while (1) { + next_token(); + if (token_type == TOKEN_BRACE_RIGHT) { + break; + } + unget_token(); + parse_stmt(); + } + asm_j(epilog_label); + int shift = max_local_id * 8; + if (shift % 16 != 0) { + shift = shift + 8; + } + // prolog + asm_label(prolog_label); + printf(" addi sp, sp, %d\n", -shift); + printf(" sd ra, %d(sp)\n", shift - 8); + printf(" sd fp, %d(sp)\n", shift - 16); + printf(" addi fp, sp, %d\n", shift); + for (int i = 0; i < arg; ++i) { + load_address(1, args[i]); + printf(" mv t0, a%d\n", i); + store(0, 1); + } + asm_j(label); + // epilog + asm_label(epilog_label); + printf(" ld fp, %d(sp)\n", shift - 16); + printf(" ld ra, %d(sp)\n", shift - 8); + printf(" addi sp, sp, %d\n", shift); + printf(" ret\n"); +} + +void parse_global_variable(int id, const char* name) { + printf(".data\n"); + printf(".globl %s\n", name); + printf(".align 5\n"); + printf("%s:\n", name); + if (token_type == TOKEN_ASSIGN) { + expect_token(TOKEN_NUMBER); + printf(" .word %d\n", token_data); + } else if (token_type == TOKEN_BRACKET_LEFT) { + expect_token(TOKEN_NUMBER); + int size = token_data; + expect_token(TOKEN_BRACKET_RIGHT); + printf(" .zero %d\n", 8 * size); + declare_global(id, ARRAY); + } else { + printf(" .zero %d\n", 8); + unget_token(); + } + expect_token(TOKEN_SEMICOLON); +} + +void parse_decl() { + expect_token(TOKEN_ID); + int id = token_data; + declare_global(id, SCALAR); + char* name = CA_offset(id_table, id_lut[id]); + next_token(); + if (token_type == TOKEN_PAREN_LEFT) { + parse_function(name); + } else { + parse_global_variable(id, name); + } +} + +void parse_top_level() { + next_token(); + int decl_type; + if (token_type == TOKEN_EOF) { + return; + } else if (parse_type() >= 0) { + parse_decl(); + } else { + eprintf("unexpected token: %d\n", token_type); + exit(1); + } +} + +void dump_string_table() { + printf(".data\n"); + for (int i = 0; i < string_lut_size; ++i) { + printf(".LC%d: .string \"", i); + int offset = 0; + int ch; + while ((ch = CA_get(string_table, string_lut[i] + offset)) != 0) { + if (ch == '\n') { + printf("\\n"); + } else if (ch == '\t') { + printf("\\t"); + } else if (ch == '\r') { + printf("\\r"); + } else if (ch == '\0') { + printf("\\0"); + } else if (ch == '\\') { + printf("\\\\"); + } else if (ch == '\'') { + printf("\\'"); + } else if (ch == '\"') { + printf("\\\""); + } else { + putchar(ch); + } + offset++; + } + printf("\"\n"); + } +} + +int main() { + while (!eof()) { + parse_top_level(); + } + dump_string_table(); + return 0; +}