#include "boot-lib.h" // lexer int is_digit(int ch) { return '0' <= ch && ch <= '9'; } int is_id_start(int ch) { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'; } int is_id_cont(int ch) { return is_id_start(ch) || is_digit(ch); } int token_state; int token_type; int token_data; const int TOKEN_EOF = 0; const int TOKEN_SEMICOLON = 1; const int TOKEN_ADD = 2; const int TOKEN_MINUS = 3; const int TOKEN_STAR = 4; const int TOKEN_DIV = 5; const int TOKEN_REM = 6; const int TOKEN_ASSIGN = 7; const int TOKEN_COMMA = 8; const int TOKEN_DOT = 9; const int TOKEN_LSHIFT = 10; const int TOKEN_RSHIFT = 11; const int TOKEN_AND = 12; const int TOKEN_OR = 13; const int TOKEN_XOR = 14; const int TOKEN_COMPL = 15; const int TOKEN_NOT = 16; const int TOKEN_LAND = 17; const int TOKEN_LOR = 18; const int TOKEN_ELLIPSIS = 19; const int TOKEN_INC = 20; const int TOKEN_DEC = 21; const int TOKEN_EQ = 40; const int TOKEN_NE = 41; const int TOKEN_LT = 42; const int TOKEN_GT = 43; const int TOKEN_LE = 44; const int TOKEN_GE = 45; const int TOKEN_PAREN_LEFT = 50; const int TOKEN_PAREN_RIGHT = 51; const int TOKEN_BRACKET_LEFT = 52; const int TOKEN_BRACKET_RIGHT = 53; const int TOKEN_BRACE_LEFT = 54; const int TOKEN_BRACE_RIGHT = 55; const int TOKEN_STRING = 99; const int TOKEN_NUMBER = 100; const int TOKEN_ID = 101; const int TOKEN_IF = 102; const int TOKEN_ELSE = 103; const int TOKEN_WHILE = 104; const int TOKEN_FOR = 105; const int TOKEN_DO = 106; const int TOKEN_BREAK = 107; const int TOKEN_CONTINUE = 108; const int TOKEN_RETURN = 109; const int TOKEN_CONST = 127; const int TOKEN_VOID = 128; const int TOKEN_INT = 129; const int TOKEN_CHAR = 130; const int TYPE_VOID = 0; const int TYPE_INT = 1; const int TYPE_CHAR = 2; const int TYPE_VOID_PTR = 16; const int TYPE_INT_PTR = 17; const int TYPE_CHAR_PTR = 18; const int TYPE_PTR_MASK = 16; const int TYPE_TOKEN_MASK = 128; int parse_int(int ch) { int num = ch - '0'; while (is_digit(ch = getchar())) { num = num * 10; num = num + ch - '0'; } ungetchar(ch); return num; } int get_escaped_char() { int ch = getchar(); if (ch == 'n') { ch = '\n'; } else if (ch == 't') { ch = '\t'; } else if (ch == 'r') { ch = '\r'; } else if (ch == '0') { ch = '\0'; } else if (ch == '\\') { ch = '\\'; } else if (ch == '\'') { ch = '\''; } else if (ch == '\"') { ch = '\"'; } else { eprintf("unexpected escaped character: %c\n", ch); exit(1); } return ch; } int streq(const char* s1, const char* s2) { while (*s1 && *s2 && *s1 == *s2) { s1++; s2++; } return *s1 == *s2; } char string_table[65536]; int string_offset; int string_lut[4096]; int string_lut_size; int parse_string() { int offset = string_offset; int ch; while ((ch = getchar()) != '"') { if (ch == -1 || ch == '\n') { eprintf("expecting '\"'\n"); exit(1); } if (ch == '\\') { ch = get_escaped_char(); } string_table[string_offset++] = ch; } string_table[string_offset++] = 0; string_lut[string_lut_size] = offset; return string_lut_size++; } void rewind_string(int new_data) { string_offset = string_lut[token_data]; token_data = new_data; --string_lut_size; } void dedup_string() { int last_string = string_lut_size - 1; char* latest = string_table + string_lut[last_string]; for (int i = 0; i < last_string; i++) { char* candidate = string_table + string_lut[i]; if (streq(candidate, latest)) { rewind_string(i); return; } } } char id_table[65536]; int id_offset; int id_lut[4096]; int id_lut_size; int parse_id(int ch) { int offset = id_offset; id_table[id_offset++] = ch; while (is_id_cont(ch = getchar())) { id_table[id_offset++] = ch; } ungetchar(ch); id_table[id_offset++] = 0; id_lut[id_lut_size] = offset; return id_lut_size++; } void rewind_id(int new_data) { id_offset = id_lut[token_data]; token_data = new_data; --id_lut_size; } void dedup_id() { int last_id = id_lut_size - 1; char* latest = id_table + id_lut[last_id]; for (int i = 0; i < last_id; i++) { char* candidate = id_table + id_lut[i]; if (streq(candidate, latest)) { rewind_id(i); return; } } } void parse_id_like(int ch) { token_type = TOKEN_ID; token_data = parse_id(ch); char* id = id_table + id_lut[token_data]; if (streq(id, "int")) { token_type = TOKEN_INT; } else if (streq(id, "if")) { token_type = TOKEN_IF; } else if (streq(id, "else")) { token_type = TOKEN_ELSE; } else if (streq(id, "while")) { token_type = TOKEN_WHILE; } else if (streq(id, "break")) { token_type = TOKEN_BREAK; } else if (streq(id, "continue")) { token_type = TOKEN_CONTINUE; } else if (streq(id, "return")) { token_type = TOKEN_RETURN; } else if (streq(id, "void")) { token_type = TOKEN_VOID; } else if (streq(id, "const")) { token_type = TOKEN_CONST; } else if (streq(id, "char")) { token_type = TOKEN_CHAR; } else if (streq(id, "for")) { token_type = TOKEN_FOR; } else if (streq(id, "do")) { token_type = TOKEN_DO; } if (token_type != TOKEN_ID) { rewind_id(0); } else { dedup_id(); } } void unget_token() { token_state = 1; } void next_token() { if (token_state) { token_state = 0; return; } int ch = getchar(); while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { ch = getchar(); } if (ch == -1) { token_type = TOKEN_EOF; } else if (ch == '(') { token_type = TOKEN_PAREN_LEFT; } else if (ch == ')') { token_type = TOKEN_PAREN_RIGHT; } else if (ch == '[') { token_type = TOKEN_BRACKET_LEFT; } else if (ch == ']') { token_type = TOKEN_BRACKET_RIGHT; } else if (ch == '{') { token_type = TOKEN_BRACE_LEFT; } else if (ch == '}') { token_type = TOKEN_BRACE_RIGHT; } else if (ch == '+') { int ch2 = getchar(); if (ch2 == '+') { token_type = TOKEN_INC; } else { ungetchar(ch2); token_type = TOKEN_ADD; } } else if (ch == '-') { int ch2 = getchar(); if (ch2 == '-') { token_type = TOKEN_DEC; } else { ungetchar(ch2); token_type = TOKEN_MINUS; } } else if (ch == '*') { token_type = TOKEN_STAR; } else if (ch == '/') { int ch2 = getchar(); if (ch2 == '/') { do ch = getchar(); while (ch != -1 && ch != '\n'); next_token(); return; } else if (ch2 == '*') { while (1) { ch = getchar(); if (ch == -1) { eprintf("expecting '*/'\n"); exit(1); } if (ch == '*') { ch = getchar(); if (ch == '/') { break; } } } next_token(); return; } else { ungetchar(ch2); token_type = TOKEN_DIV; } } else if (ch == '%') { token_type = TOKEN_REM; } else if (ch == ';') { token_type = TOKEN_SEMICOLON; } else if (ch == ',') { token_type = TOKEN_COMMA; } else if (ch == '<') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_LE; } else if (ch2 == '<') { token_type = TOKEN_LSHIFT; } else { ungetchar(ch2); token_type = TOKEN_LT; } } else if (ch == '>') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_GE; } else if (ch2 == '>') { token_type = TOKEN_RSHIFT; } else { ungetchar(ch2); token_type = TOKEN_GT; } } else if (ch == '=') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_EQ; } else { ungetchar(ch2); token_type = TOKEN_ASSIGN; } } else if (ch == '!') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_NE; } else { ungetchar(ch2); token_type = TOKEN_NOT; } } else if (ch == '&') { int ch2 = getchar(); if (ch2 == '&') { token_type = TOKEN_LAND; } else { ungetchar(ch2); token_type = TOKEN_AND; } } else if (ch == '|') { int ch2 = getchar(); if (ch2 == '|') { token_type = TOKEN_LOR; } else { ungetchar(ch2); token_type = TOKEN_OR; } } else if (ch == '^') { token_type = TOKEN_XOR; } else if (ch == '~') { token_type = TOKEN_COMPL; } else if (ch == '\'') { token_type = TOKEN_NUMBER; token_data = getchar(); if (token_data == '\\') { token_data = get_escaped_char(); } if (getchar() != '\'') { eprintf("expecting '\n"); exit(1); } } else if (ch == '"') { token_type = TOKEN_STRING; token_data = parse_string(); dedup_string(); } else if (ch == '.') { token_type = 0; if (getchar() == '.') { if (getchar() == '.') { token_type = TOKEN_ELLIPSIS; } } if (token_type != TOKEN_ELLIPSIS) { eprintf("expecting '...'\n"); exit(1); } } else if (is_digit(ch)) { token_type = TOKEN_NUMBER; token_data = parse_int(ch); } else if (is_id_start(ch)) { parse_id_like(ch); } else { eprintf("unexpected character: %c(%d)\n", ch, ch); exit(1); } eprintf("token: %d\n", token_type); if (token_type == TOKEN_ID) { const char* name = id_table + id_lut[token_data]; eprintf(" id: %s\n", name); } else if (token_type == TOKEN_NUMBER) { eprintf(" number: %d\n", token_data); } } void expect_token(int expected_type) { next_token(); if (token_type != expected_type) { eprintf("unexpected token: %d, should be %d\n", token_type, expected_type); exit(1); } } void ignore_const() { if (token_type == TOKEN_CONST) { next_token(); } } int parse_type() { ignore_const(); if (token_type == TOKEN_INT || token_type == TOKEN_CHAR || token_type == TOKEN_VOID) { int type = token_type & ~TYPE_TOKEN_MASK; next_token(); ignore_const(); if (token_type == TOKEN_STAR) { next_token(); ignore_const(); type = type | TYPE_PTR_MASK; } unget_token(); return type; } return -1; } // asm int local_table[4096]; // id -> local id int next_local_id = 1; int max_local_id = 1; const int MARKER_TEMP = 0; const int MARKER_SCALAR = 1; const int MARKER_ARRAY = 2; const int MARKER_FUNCTION = 3; int local_marker[4096]; int global_marker[4096]; int local_type[4096]; int global_type[4096]; int reg_type[4096]; int next_reg_id = 18; int max_reg_id = 18; int indirection[4096]; int overflow[4096]; int const_table[4096]; // id -> value int is_const[4096]; const int REG_ZERO = 0; const int REG_RA = 1; const int REG_SP = 2; const int REG_GP = 3; const int REG_TP = 4; const int REG_T0 = 5; const int REG_T1 = 6; const int REG_T2 = 7; const int REG_FP = 8; const int REG_S1 = 9; const int REG_A0 = 10; const int REG_A1 = 11; const int REG_A2 = 12; const int REG_A3 = 13; const int REG_A4 = 14; const int REG_A5 = 15; const int REG_A6 = 16; const int REG_A7 = 17; const int REG_S2 = 18; const int REG_S3 = 19; const int REG_S4 = 20; const int REG_S5 = 21; const int REG_S6 = 22; const int REG_S7 = 23; const int REG_S8 = 24; const int REG_S9 = 25; const int REG_S10 = 26; const int REG_S11 = 27; const int REG_T3 = 28; const int REG_T4 = 29; const int REG_T5 = 30; const int REG_T6 = 31; void reset_reg() { next_reg_id = REG_S2; for (int i = 0; i < 4096; ++i) { reg_type[i] = TYPE_VOID; indirection[i] = 0; overflow[i] = 0; } reg_type[REG_ZERO] = TYPE_INT; } const char* reg_name(int reg) { if (reg == 0) return "zero"; if (reg == 1) return "ra"; if (reg == 2) return "sp"; if (reg == 3) return "gp"; if (reg == 4) return "tp"; if (reg == 5) return "t0"; if (reg == 6) return "t1"; if (reg == 7) return "t2"; if (reg == 8) return "fp"; // reserved begin if (reg == 9) return "s1"; if (reg == 10) return "a0"; if (reg == 11) return "a1"; if (reg == 12) return "a2"; if (reg == 13) return "a3"; if (reg == 14) return "a4"; if (reg == 15) return "a5"; if (reg == 16) return "a6"; if (reg == 17) return "a7"; // allocation begin if (reg == 18) return "s2"; if (reg == 19) return "s3"; if (reg == 20) return "s4"; if (reg == 21) return "s5"; if (reg == 22) return "s6"; if (reg == 23) return "s7"; if (reg == 24) return "s8"; if (reg == 25) return "s9"; if (reg == 26) return "s10"; if (reg == 27) return "s11"; if (reg == 28) return "t3"; if (reg == 29) return "t4"; if (reg == 30) return "t5"; if (reg == 31) return "t6"; // overflow begin return 0; } int is_overflow(int reg) { return reg > REG_T6; } void reset_local() { next_local_id = 1; max_local_id = 1; max_reg_id = REG_S2; for (int i = 0; i < 4096; ++i) { local_table[i] = 0; local_marker[i] = MARKER_TEMP; local_type[i] = TYPE_VOID; } reset_reg(); } void reset_temp() { while (next_local_id > 1 && local_marker[next_local_id - 1] == MARKER_TEMP) { --next_local_id; } reset_reg(); } int next_local_slot(int type) { int slot = next_local_id++; local_type[slot] = type; if (next_local_id > max_local_id) { max_local_id = next_local_id; } return slot; } int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; int slot = next_local_slot(type); local_marker[slot] = MARKER_SCALAR; return local_table[id] = slot; } int declare_local_array(int id, int type, int size) { if (local_table[id] != 0) return local_table[id]; int slot = next_local_slot(type); local_marker[slot] = MARKER_ARRAY; for (int i = 1; i < size; ++i) local_marker[next_local_slot(type)] = MARKER_ARRAY; return local_table[id] = slot; } void declare_global(int id, int marker, int type) { global_marker[id] = marker; global_type[id] = type; } int next_reg(int type) { int reg = next_reg_id++; if (is_overflow(reg)) { int slot = next_local_slot(type); local_marker[slot] = MARKER_TEMP; overflow[reg] = slot; } reg_type[reg] = type; if (next_reg_id > max_reg_id) { max_reg_id = next_reg_id; } return reg; } // prolog & epilog helpers int check_itype_immediate(int value) { return value >= -2048 && value <= 2047; } void asm_ld(const char* rd, int imm, const char* rs) { if (check_itype_immediate(imm)) { printf(" ld %s, %d(%s)\n", rd, imm, rs); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs); printf(" ld %s, 0(t0)\n", rd); } } void asm_sd(const char* rs1, int imm, const char* rs2) { if (check_itype_immediate(imm)) { printf(" sd %s, %d(%s)\n", rs1, imm, rs2); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs2); printf(" sd %s, 0(t0)\n", rs1); } } void asm_addi(const char* rd, const char* rs, int imm) { if (check_itype_immediate(imm)) { printf(" addi %s, %s, %d\n", rd, rs, imm); } else { printf(" li t0, %d\n", imm); printf(" add %s, %s, t0\n", rd, rs); } } // assembly helpers const char* load_op_of_type(int type) { if (type & TYPE_PTR_MASK) { return "ld"; } else if (type == TYPE_CHAR) { return "lb"; } else { // int return "lw"; } } const char* store_op_of_type(int type) { if (type & TYPE_PTR_MASK) { return "sd"; } else if (type == TYPE_CHAR) { return "sb"; } else { // int return "sw"; } } // address loaders // rd must be one of t0, t1, t2 void load_local_address(int rd, int slot_id) { asm_addi(reg_name(rd), "sp", slot_id * 8 - 8); } // load a non-trivial register into trivial one void load(int rd, int rs) { const char* op = load_op_of_type(reg_type[rs]); const char* rd_name = reg_name(rd); if (is_overflow(rs)) { load_local_address(rd, overflow[rs]); if (indirection[rs]) { printf(" ld %s, 0(%s)\n", rd_name, rd_name); } rs = rd; } printf(" %s %s, 0(%s) # load non-trivial register\n", op, rd_name, reg_name(rs)); } // store a trivial register into a non-trivial one void store(const char* rs, int reg) { const char* op = store_op_of_type(reg_type[reg]); if (is_overflow(reg)) { load_local_address(REG_T2, overflow[reg]); if (indirection[reg]) { printf(" ld t2, 0(t2)\n"); } reg = REG_T2; } printf(" %s %s, 0(%s) # store non-trivial register\n", op, rs, reg_name(reg)); } int is_nontrivial(int reg) { return is_overflow(reg) || indirection[reg]; } const char* trivialize(int rs, int t) { if (is_nontrivial(rs)) { load(t, rs); return reg_name(t); } return reg_name(rs); } void _asm_r(const char* op, int rd, int rs1) { const char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; const char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, %s\n", op, rd_name, rs1_name); if (is_nontrivial(rd)) { store("t0", rd); } } void asm_mv(int rd, int rs1) { const char* rs1_name = trivialize(rs1, REG_T0); if (is_nontrivial(rd)) { store(rs1_name, rd); } else { const char* rd_name = reg_name(rd); if (!streq(rd_name, rs1_name)) printf(" mv %s, %s\n", rd_name, rs1_name); } } void _asm_rr(const char* op, int rd, int rs1, int rs2) { const char* rd_name = reg_name(rd); const char* rs1_name = trivialize(rs1, REG_T0); const char* rs2_name = trivialize(rs2, REG_T1); if (is_nontrivial(rd)) rd_name = "t0"; printf(" %s %s, %s, %s\n", op, rd_name, rs1_name, rs2_name); if (is_nontrivial(rd)) { store("t0", rd); } } void _asm_ri(const char* op, int rd, int rs1, int imm) { const char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; const char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, %s, %d\n", op, rd_name, rs1_name, imm); if (is_nontrivial(rd)) { store("t0", rd); } } void asm_branch(const char* op, int rs1, int label) { const char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, L%d\n", op, rs1_name, label); } void _asm_i(const char* op, int rd, const char* prefix1, const char* prefix2, int imm) { const char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; printf(" %s %s, %s%s%d\n", op, rd_name, prefix1, prefix2, imm); if (is_nontrivial(rd)) { store("t0", rd); } } int is_not_reusable(int rs1, int expected_type) { return indirection[rs1] || reg_type[rs1] != expected_type || rs1 == REG_ZERO; } int asm_r(int type, const char* op, int rs1) { int rd = rs1; if (is_not_reusable(rs1, type)) rd = next_reg(type); _asm_r(op, rd, rs1); return rd; } int asm_rr(int type, const char* op, int rs1, int rs2) { int rd = rs1; if (is_not_reusable(rs1, type)) rd = rs2; if (is_not_reusable(rs2, type)) rd = next_reg(type); _asm_rr(op, rd, rs1, rs2); return rd; } void store_into_local(int rs1, int slot) { const char* rs1_name = trivialize(rs1, REG_T0); load_local_address(REG_T2, slot); printf(" %s %s, 0(t2)\n", store_op_of_type(local_type[slot]), rs1_name); } int materialize_address(int rd, int type, int marker) { if (marker == MARKER_ARRAY) { type = type | TYPE_PTR_MASK; } reg_type[rd] = type; indirection[rd] = marker == MARKER_SCALAR; return rd; } int lookup_from_slot(int slot) { int rd = next_reg(TYPE_VOID_PTR); if (is_nontrivial(rd)) { load_local_address(REG_T0, slot); asm_mv(rd, REG_T0); } else { load_local_address(rd, slot); } return materialize_address(rd, local_type[slot], local_marker[slot]); } int load_imm(int imm) { if (imm == 0) return REG_ZERO; int reg = next_reg(TYPE_INT); _asm_i("li", reg, "", "", imm); return reg; } int lookup(int id) { if (local_table[id]) { return lookup_from_slot(local_table[id]); } if (is_const[id]) { return load_imm(const_table[id]); } const char* name = id_table + id_lut[id]; if (global_marker[id]) { if (global_marker[id] == MARKER_FUNCTION) { eprintf("function name must not appear outside function call: %s\n", name); exit(1); } int rd = next_reg(TYPE_VOID_PTR); _asm_i("la", rd, name, " # id: ", id); return materialize_address(rd, global_type[id], global_marker[id]); } eprintf("unresolved identifier: %s\n", name); exit(1); } int asm_r_arith(const char* op, int rs1) { if (reg_type[rs1] & TYPE_PTR_MASK) { eprintf("pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_r(TYPE_INT, op, rs1); } int asm_rr_arith(const char* op, int rs1, int rs2) { if (reg_type[rs1] & TYPE_PTR_MASK || reg_type[rs2] & TYPE_PTR_MASK) { eprintf("pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_rr(TYPE_INT, op, rs1, rs2); } int asm_rr_cmp(const char* op, int rs1, int rs2) { // since NULL is virtually 0, it is considered a valid example of a pointer comparing with an integer return asm_rr(TYPE_INT, op, rs1, rs2); } void asm_beqz(int rs1, int label) { asm_branch("beqz", rs1, label); } void asm_bnez(int rs1, int label) { asm_branch("bnez", rs1, label); } void asm_j(int label) { printf(" j L%d\n", label); } int next_label_id = 0; int next_label() { return next_label_id++; } int asm_label(int label) { printf("L%d:\n", label); return label; } int break_label_stack[4096]; int cont_label_stack[4096]; int break_label_stack_size; int cont_label_stack_size; void asm_break() { if (break_label_stack_size == 0) { eprintf("break without loop\n"); exit(1); } asm_j(break_label_stack[break_label_stack_size - 1]); } void asm_continue() { if (cont_label_stack_size == 0) { eprintf("continue without loop\n"); exit(1); } asm_j(cont_label_stack[cont_label_stack_size - 1]); } void asm_push_label(int break_label, int cont_label) { break_label_stack[break_label_stack_size++] = break_label; cont_label_stack[cont_label_stack_size++] = cont_label; } void asm_pop_label() { --break_label_stack_size; --cont_label_stack_size; } int epilog_label; void asm_return() { asm_j(epilog_label); } int log_step_of(int type) { return 2 * (type == TYPE_INT_PTR); } int step_of(int type) { return 1 << log_step_of(type); } int asm_add(int lhs, int rhs) { int type1 = reg_type[lhs] & TYPE_PTR_MASK; int type2 = reg_type[rhs] & TYPE_PTR_MASK; if (type1 != type2) { int ptr; int idx; if (type1) { ptr = lhs; idx = rhs; } else { ptr = rhs; idx = lhs; } int ptr_type = reg_type[ptr]; if (ptr_type == TYPE_VOID_PTR) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } int offset = next_reg(TYPE_INT); _asm_ri("slli", offset, idx, log_step_of(ptr_type)); return asm_rr(ptr_type, "add", ptr, offset); } if (type1 && type2) { eprintf("operands of addition cannot be both pointers\n"); exit(1); } return asm_rr(TYPE_INT, "add", lhs, rhs); } int asm_sub(int lhs, int rhs) { int lhs_type = reg_type[lhs]; int rhs_type = reg_type[rhs]; int type1 = lhs_type & TYPE_PTR_MASK; int type2 = rhs_type & TYPE_PTR_MASK; if (type1 && type2) { if (lhs_type != rhs_type) { eprintf("pointer type mismatch\n"); exit(1); } if (lhs_type == TYPE_VOID_PTR) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } int difference = asm_rr(TYPE_INT, "sub", lhs, rhs); _asm_ri("slli", difference, difference, log_step_of(lhs_type)); return difference; } if (type1) { int neg = asm_r_arith("neg", rhs); return asm_add(lhs, neg); } return asm_rr_arith("sub", lhs, rhs); } int dereference(int reg) { if (indirection[reg]) { load(reg, reg); } else { indirection[reg] = 1; } reg_type[reg] = reg_type[reg] & ~TYPE_PTR_MASK; return reg; } int addressof(int reg) { if (indirection[reg] && !(reg_type[reg] & TYPE_PTR_MASK)) { reg_type[reg] = reg_type[reg] | TYPE_PTR_MASK; indirection[reg] = 0; } else { printf("cannot take address of this expression"); } return reg; } // parser int parse_expr(); int parse_function_call(int id) { const char* name = id_table + id_lut[id]; if (global_marker[id] != MARKER_FUNCTION) { eprintf("not a function name: %s\n", name); exit(1); } int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } unget_token(); if (arg >= 8) { eprintf("too many arguments\n"); exit(1); } args[arg++] = parse_expr(); next_token(); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { eprintf("expecting ',' or ')'\n"); exit(1); } } for (int i = 0; i < arg; ++i) { asm_mv(i + REG_A0, args[i]); } for (int i = REG_T3; i <= REG_T6; ++i) { if (i < max_reg_id) { asm_sd(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); } } printf(" call %s\n", name); for (int i = REG_T3; i <= REG_T6; ++i) { if (i < max_reg_id) { asm_ld(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); } } int type = global_type[id]; if (type != TYPE_VOID) { int rd = next_reg(type); asm_mv(rd, REG_A0); return rd; } return REG_ZERO; } int parse_primary_expr() { next_token(); if (token_type == TOKEN_EOF) { exit(1); } else if (token_type == TOKEN_NUMBER) { return load_imm(token_data); } else if (token_type == TOKEN_ID) { next_token(); if (token_type == TOKEN_PAREN_LEFT) { return parse_function_call(token_data); } unget_token(); return lookup(token_data); } else if (token_type == TOKEN_STRING) { int reg = next_reg(TYPE_CHAR_PTR); _asm_i("la", reg, ".LC", "", token_data); return reg; } else if (token_type == TOKEN_PAREN_LEFT) { int reg = parse_expr(); expect_token(TOKEN_PAREN_RIGHT); return reg; } else { eprintf("unexpected token in primary expression: %d\n", token_type); exit(1); } } int parse_postfix_expr() { int lhs = parse_primary_expr(); while (1) { next_token(); if (token_type == TOKEN_INC) { int type = reg_type[lhs]; int reg = next_reg(type); asm_mv(reg, lhs); _asm_ri("addi", lhs, lhs, step_of(type)); lhs = reg; } else if (token_type == TOKEN_DEC) { int type = reg_type[lhs]; int reg = next_reg(type); asm_mv(reg, lhs); _asm_ri("addi", lhs, lhs, -step_of(type)); lhs = reg; } else if (token_type == TOKEN_BRACKET_LEFT) { int rhs = parse_expr(); expect_token(TOKEN_BRACKET_RIGHT); lhs = dereference(asm_add(lhs, rhs)); } else { unget_token(); break; } } return lhs; } int parse_prefix_expr() { next_token(); if (token_type == TOKEN_AND) { int reg = parse_postfix_expr(); int type = reg_type[reg]; if (type & TYPE_PTR_MASK) { eprintf("cannot take address of a pointer\n"); exit(1); } return addressof(reg); } else if (token_type == TOKEN_STAR) { int reg = parse_postfix_expr(); int type = reg_type[reg]; if (!(type & TYPE_PTR_MASK)) { eprintf("cannot dereference a non-pointer\n"); exit(1); } if (type == TYPE_VOID_PTR) { eprintf("cannot dereference void pointer\n"); exit(1); } return dereference(reg); } else if (token_type == TOKEN_MINUS) { int reg = parse_postfix_expr(); return asm_r_arith("neg", reg); } else if (token_type == TOKEN_COMPL) { int reg = parse_postfix_expr(); return asm_r_arith("not", reg); } else if (token_type == TOKEN_NOT) { int reg = parse_postfix_expr(); return asm_r(TYPE_INT, "seqz", reg); } else if (token_type == TOKEN_INC) { int reg = parse_postfix_expr(); _asm_ri("addi", reg, reg, step_of(reg_type[reg])); return reg; } else if (token_type == TOKEN_DEC) { int reg = parse_postfix_expr(); _asm_ri("addi", reg, reg, -step_of(reg_type[reg])); return reg; } else { unget_token(); return parse_postfix_expr(); } } int parse_mul_expr() { int lhs = parse_prefix_expr(); while (1) { next_token(); if (token_type == TOKEN_STAR) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("mul", lhs, rhs); } else if (token_type == TOKEN_DIV) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("div", lhs, rhs); } else if (token_type == TOKEN_REM) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("rem", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_add_expr() { int lhs = parse_mul_expr(); while (1) { next_token(); if (token_type == TOKEN_ADD) { int rhs = parse_mul_expr(); lhs = asm_add(lhs, rhs); } else if (token_type == TOKEN_MINUS) { int rhs = parse_mul_expr(); lhs = asm_sub(lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_shift_expr() { int lhs = parse_add_expr(); while (1) { next_token(); if (token_type == TOKEN_LSHIFT) { int rhs = parse_add_expr(); lhs = asm_rr_arith("sll", lhs, rhs); } else if (token_type == TOKEN_RSHIFT) { int rhs = parse_add_expr(); lhs = asm_rr_arith("sra", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_cmp_expr() { int lhs = parse_shift_expr(); while (1) { next_token(); if (token_type == TOKEN_LT) { int rhs = parse_shift_expr(); lhs = asm_rr_cmp("slt", lhs, rhs); } else if (token_type == TOKEN_GT) { int rhs = parse_shift_expr(); lhs = asm_rr_cmp("sgt", lhs, rhs); } else if (token_type == TOKEN_LE) { int rhs = parse_shift_expr(); int sgt = asm_rr_cmp("sgt", lhs, rhs); lhs = asm_r(TYPE_INT, "seqz", sgt); } else if (token_type == TOKEN_GE) { int rhs = parse_shift_expr(); int slt = asm_rr_cmp("slt", lhs, rhs); lhs = asm_r(TYPE_INT, "seqz", slt); } else { unget_token(); break; } } return lhs; } int parse_eq_expr() { int lhs = parse_cmp_expr(); while (1) { next_token(); if (token_type == TOKEN_EQ) { int rhs = parse_cmp_expr(); int xor0 = asm_rr_cmp("xor", lhs, rhs); lhs = asm_r(TYPE_INT, "seqz", xor0); } else if (token_type == TOKEN_NE) { int rhs = parse_cmp_expr(); int xor0 = asm_rr_cmp("xor", lhs, rhs); lhs = asm_r(TYPE_INT, "snez", xor0); } else { unget_token(); break; } } return lhs; } int parse_bitwise_and_expr() { int lhs = parse_eq_expr(); while (1) { next_token(); if (token_type == TOKEN_AND) { int rhs = parse_eq_expr(); lhs = asm_rr_arith("and", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_xor_expr() { int lhs = parse_bitwise_and_expr(); while (1) { next_token(); if (token_type == TOKEN_XOR) { int rhs = parse_bitwise_and_expr(); lhs = asm_rr_arith("xor", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_or_expr() { int lhs = parse_bitwise_xor_expr(); while (1) { next_token(); if (token_type == TOKEN_OR) { int rhs = parse_bitwise_xor_expr(); lhs = asm_rr_arith("or", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_logical_and_expr() { int lhs = parse_bitwise_or_expr(); int logical = 0; int label; int result; while (1) { next_token(); if (token_type == TOKEN_LAND) { if (!logical) { logical = 1; label = next_label(); result = next_reg(TYPE_INT); _asm_r("snez", result, lhs); } asm_beqz(result, label); int rhs = parse_bitwise_or_expr(); _asm_r("snez", result, rhs); } else { unget_token(); break; } } if (logical) { asm_label(label); return result; } return lhs; } int parse_logical_or_expr() { int lhs = parse_logical_and_expr(); int logical = 0; int label; int result; while (1) { next_token(); if (token_type == TOKEN_LOR) { if (!logical) { logical = 1; label = next_label(); result = next_reg(TYPE_INT); _asm_r("snez", result, lhs); } asm_bnez(result, label); int rhs = parse_logical_and_expr(); _asm_r("snez", result, rhs); } else { unget_token(); break; } } if (logical) { asm_label(label); return result; } return lhs; } int parse_assign_expr() { int lhs = parse_logical_or_expr(); next_token(); if (token_type == TOKEN_ASSIGN) { int rhs = parse_assign_expr(); asm_mv(lhs, rhs); return lhs; } else { unget_token(); return lhs; } } int parse_expr() { return parse_assign_expr(); } void parse_local_variable(int type) { if (type == TYPE_VOID) { eprintf("variable cannot be of void type\n"); exit(1); } expect_token(TOKEN_ID); int id = token_data; next_token(); if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { eprintf("array of pointers is not supported\n"); exit(1); } expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); declare_local_array(id, type, size); return; } int slot = declare_local(id, type); if (token_type == TOKEN_SEMICOLON) { unget_token(); return; } unget_token(); expect_token(TOKEN_ASSIGN); int reg = parse_expr(); if (type != reg_type[reg]) { eprintf("type mismatch in assignment\n"); exit(1); } store_into_local(reg, slot); } void parse_stmt(); void parse_if() { expect_token(TOKEN_PAREN_LEFT); int cond = parse_expr(); int label1 = next_label(); int label2 = next_label(); asm_beqz(cond, label1); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(label2); asm_label(label1); next_token(); if (token_type == TOKEN_ELSE) { parse_stmt(); } else { unget_token(); } asm_label(label2); } void parse_while() { expect_token(TOKEN_PAREN_LEFT); int break_label = next_label(); int cont_label = next_label(); asm_push_label(break_label, cont_label); asm_label(cont_label); int cond = parse_expr(); asm_beqz(cond, break_label); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_for() { expect_token(TOKEN_PAREN_LEFT); int cont_label = next_label(); int break_label = next_label(); int cond_label = next_label(); int body_label = next_label(); asm_push_label(break_label, cont_label); parse_stmt(); // init asm_label(cond_label); int cond = parse_expr(); asm_beqz(cond, break_label); asm_j(body_label); reset_temp(); expect_token(TOKEN_SEMICOLON); asm_label(cont_label); parse_expr(); // update reset_temp(); expect_token(TOKEN_PAREN_RIGHT); asm_j(cond_label); asm_label(body_label); parse_stmt(); // body asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_do_while() { int cont_label = next_label(); int break_label = next_label(); asm_push_label(break_label, cont_label); asm_label(cont_label); parse_stmt(); // body expect_token(TOKEN_WHILE); expect_token(TOKEN_PAREN_LEFT); int cond = parse_expr(); asm_bnez(cond, cont_label); expect_token(TOKEN_PAREN_RIGHT); asm_label(break_label); asm_pop_label(); } void parse_stmt() { next_token(); int decl_type; if (token_type == TOKEN_IF) { parse_if(); return; } else if (token_type == TOKEN_WHILE) { parse_while(); return; } else if (token_type == TOKEN_FOR) { parse_for(); return; } else if (token_type == TOKEN_DO) { parse_do_while(); } else if (token_type == TOKEN_BRACE_LEFT) { while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } return; } else if (token_type == TOKEN_RETURN) { next_token(); if (token_type == TOKEN_SEMICOLON) { asm_return(); return; } unget_token(); int rs1 = parse_expr(); asm_mv(REG_A0, rs1); asm_return(); } else if (token_type == TOKEN_BREAK) { asm_break(); } else if (token_type == TOKEN_CONTINUE) { asm_continue(); } else if (token_type == TOKEN_SEMICOLON) { unget_token(); } else if ((decl_type = parse_type()) >= 0) { parse_local_variable(decl_type); } else { unget_token(); parse_expr(); } expect_token(TOKEN_SEMICOLON); reset_temp(); } void parse_function(const char* name) { reset_local(); int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } if (token_type == TOKEN_ELLIPSIS) { expect_token(TOKEN_PAREN_RIGHT); break; } if (token_type == TOKEN_VOID) { if (arg != 0) { eprintf("void should be the only argument\n"); exit(1); } expect_token(TOKEN_PAREN_RIGHT); break; } int arg_type = parse_type(); if (arg_type < 0 || arg_type == TYPE_VOID) { eprintf("expecting a non-void argument type: %d\n", arg_type); exit(1); } expect_token(TOKEN_ID); int arg_name = token_data; next_token(); if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_BRACKET_RIGHT); next_token(); if (arg_type & TYPE_PTR_MASK) { eprintf("array of pointers is not supported\n"); exit(1); } arg_type = arg_type | TYPE_PTR_MASK; } if (arg >= 8) { eprintf("too many arguments\n"); exit(1); } args[arg++] = declare_local(token_data, arg_type); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { eprintf("expecting ',' or ')'\n"); exit(1); } } next_token(); if (token_type == TOKEN_SEMICOLON) { return; } unget_token(); expect_token(TOKEN_BRACE_LEFT); printf(".text\n"); printf(".global %s\n", name); printf("%s:\n", name); int label = next_label(); int prolog_label = next_label(); epilog_label = next_label(); asm_j(prolog_label); asm_label(label); while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } if (streq(name, "main")) { asm_mv(REG_A0, REG_ZERO); } asm_return(); int reg_used = max_reg_id - REG_S2; if (reg_used > 14) reg_used = 14; int frame_size = (max_local_id - 1 + reg_used + 2) * 8; if (reg_used > 10) reg_used = 10; if (frame_size % 16 != 0) { frame_size = frame_size + 8; } // prolog asm_label(prolog_label); asm_addi("sp", "sp", -frame_size); asm_sd("ra", frame_size - 8, "sp"); asm_sd("fp", frame_size - 16, "sp"); for (int i = 0; i < reg_used; ++i) { int reg = REG_S2 + i; asm_sd(reg_name(reg), frame_size - 24 - i * 8, "sp"); } asm_addi("fp", "sp", frame_size); for (int i = 0; i < arg; ++i) { store_into_local(REG_A0 + i, args[i]); } asm_j(label); // epilog asm_label(epilog_label); asm_ld("ra", frame_size - 8, "sp"); asm_ld("fp", frame_size - 16, "sp"); for (int i = 0; i < reg_used; ++i) { int reg = REG_S2 + i; asm_ld(reg_name(reg), frame_size - 24 - i * 8, "sp"); } asm_addi("sp", "sp", frame_size); printf(" ret\n"); } void parse_global_variable(int id, const char* name, int type) { if (type == TYPE_VOID) { eprintf("variable cannot be of void type\n"); exit(1); } printf(".data\n"); printf(".globl %s\n", name); printf(".align 5\n"); printf("%s:\n", name); if (token_type == TOKEN_ASSIGN) { expect_token(TOKEN_NUMBER); printf(" .word %d\n", token_data); } else if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { eprintf("array of pointers is not supported\n"); exit(1); } expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); int array_size = 4 * size; if (type == TYPE_CHAR) { array_size = size; } printf(" .zero %d\n", array_size); declare_global(id, MARKER_ARRAY, type); } else { printf(" .zero %d\n", 4); unget_token(); } expect_token(TOKEN_SEMICOLON); } void parse_global_declaration() { int is_const_int = 1; if (token_type != TOKEN_CONST) { is_const_int = 0; } int type = parse_type(); if (type < 0) { eprintf("expecting type for global declaration\n"); exit(1); } if (type != TYPE_INT) { is_const_int = 0; } expect_token(TOKEN_ID); int id = token_data; char* name = id_table + id_lut[id]; next_token(); if (is_const_int && token_type == TOKEN_ASSIGN) { expect_token(TOKEN_NUMBER); const_table[id] = token_data; is_const[id] = 1; expect_token(TOKEN_SEMICOLON); } else if (token_type == TOKEN_PAREN_LEFT) { declare_global(id, MARKER_FUNCTION, type); parse_function(name); } else { declare_global(id, MARKER_SCALAR, type); parse_global_variable(id, name, type); } } void parse_top_level() { next_token(); if (token_type == TOKEN_EOF) return; parse_global_declaration(); parse_top_level(); } void dump_string_table() { printf(".data\n"); for (int i = 0; i < string_lut_size; ++i) { printf(".LC%d: .string \"", i); char* p = string_table + string_lut[i]; int ch; while ((ch = *p++) != 0) { if (ch == '\n') { printf("\\n"); } else if (ch == '\t') { printf("\\t"); } else if (ch == '\r') { printf("\\r"); } else if (ch == '\0') { printf("\\0"); } else if (ch == '\\') { printf("\\\\"); } else if (ch == '\'') { printf("\\'"); } else if (ch == '\"') { printf("\\\""); } else { printf("%c", ch); } } printf("\"\n"); } } int main() { parse_top_level(); dump_string_table(); }