diff --git a/README.md b/README.md index 7b37e2a..f2c2324 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # RVBTCC -- 约 1800 行的轻量级自举编译器。 +- 约 1500 行的轻量级自举编译器。 - 编译器和自举编译器行为一致。 - 语法类似 C,输出 RISC-V 汇编。 - 依赖几个 libc 函数用于输入输出。 @@ -64,21 +64,7 @@ $ sh boot.sh 本语言包含的关键字即为支持的标量类型的关键字和流程控制的关键字,还有 `const`。 -### `const` 关键字 - -`const` 关键字可以在类型中使用,在大部分情况下会被直接忽略。支持它是为了更好兼容 C 程序。 - -但是当在出现 - -- 全局,标量(即不是数组) -- 类型为 `const int` 或 `const int const` -- 带有初始化 - -的声明时,将会被解析为整数常量。 - -整数常量在使用的时候会被直接替换为对应的右值,失去作为全局变量左值的性质。 - -使用 `int const` 或 `int` 可以避免这样的特殊处理。 +`const` 关键字可以在类型中使用,但会被直接忽略。支持它是为了更好兼容 C 程序。 ### 支持六个基本类型 @@ -89,11 +75,13 @@ $ sh boot.sh | `int` | `int*` | - 注意指针类型不是复合得来的,而是被视作整体。因此也不存在二重指针。 + - 函数和数组不是类型系统的一部分。 - 可以认为数组的类型就是其元素对应的指针类型。 - 函数的参数类型和个数不会检查,返回值会参与类型检查。 - 函数名只能被用于调用,函数调用被视为初等表达式。 - 数组只支持一维数组,且数组的元素不能是指针类型。 +- 全局变量不能是指针类型。 - 整数和字符字面量的类型是 `int`,字符串字面量的类型是 `char*` ### 支持的流程控制 @@ -128,6 +116,8 @@ $ sh boot.sh - 算术运算的结果总是被提升为 `int` 类型。布尔值用 `int` 类型表示。 - 由于空指针就是 `0`,因此指针和整数之间的比较运算没有禁止。 - 逻辑与和逻辑或支持短路求值。 +- 表达式没有左值和右值之分。可以认为右值总是存在一个临时的变量中。 +- 赋值不检查类型。强制类型转换可以用赋值给特定类型的变量实现。 ### 其它支持与不支持 diff --git a/boot.c b/boot.c index 0586617..5fbab63 100644 --- a/boot.c +++ b/boot.c @@ -145,24 +145,6 @@ int parse_string() { return string_lut_size++; } -void rewind_string(int new_data) { - string_offset = string_lut[token_data]; - token_data = new_data; - --string_lut_size; -} - -void dedup_string() { - int last_string = string_lut_size - 1; - char* latest = string_table + string_lut[last_string]; - for (int i = 0; i < last_string; i++) { - char* candidate = string_table + string_lut[i]; - if (streq(candidate, latest)) { - rewind_string(i); - return; - } - } -} - char id_table[65536]; int id_offset; int id_lut[4096]; @@ -379,16 +361,18 @@ void next_token() { } else if (ch == '"') { token_type = TOKEN_STRING; token_data = parse_string(); - dedup_string(); } else if (ch == '.') { - token_type = 0; - if (getchar() == '.') { - if (getchar() == '.') { + int ch2 = getchar(); + if (ch2 == '.') { + int ch3 = getchar(); + if (ch3 == '.') { token_type = TOKEN_ELLIPSIS; + } else { + eprintf("unexpected character: %c\n", ch3); + exit(1); } - } - if (token_type != TOKEN_ELLIPSIS) { - eprintf("expecting '...'\n"); + } else { + eprintf("unexpected character: %c\n", ch2); exit(1); } } else if (is_digit(ch)) { @@ -445,8 +429,8 @@ int parse_type() { int epilog_label; int local_table[4096]; // id -> local id -int next_local_id = 1; -int max_local_id = 1; +int next_local_id = 2; +int max_local_id = 2; const int MARKER_TEMP = 0; const int MARKER_SCALAR = 1; @@ -457,143 +441,47 @@ int local_marker[4096]; int global_marker[4096]; int local_type[4096]; int global_type[4096]; - -int reg_type[4096]; -int next_reg_id = 18; -int max_reg_id = 18; int indirection[4096]; -int overflow[4096]; - -int const_table[4096]; // id -> value -int is_const[4096]; - -const int REG_ZERO = 0; -const int REG_RA = 1; -const int REG_SP = 2; -const int REG_GP = 3; -const int REG_TP = 4; -const int REG_T0 = 5; -const int REG_T1 = 6; -const int REG_T2 = 7; -const int REG_FP = 8; -const int REG_S1 = 9; -const int REG_A0 = 10; -const int REG_A1 = 11; -const int REG_A2 = 12; -const int REG_A3 = 13; -const int REG_A4 = 14; -const int REG_A5 = 15; -const int REG_A6 = 16; -const int REG_A7 = 17; -const int REG_S2 = 18; -const int REG_S3 = 19; -const int REG_S4 = 20; -const int REG_S5 = 21; -const int REG_S6 = 22; -const int REG_S7 = 23; -const int REG_S8 = 24; -const int REG_S9 = 25; -const int REG_S10 = 26; -const int REG_S11 = 27; -const int REG_T3 = 28; -const int REG_T4 = 29; -const int REG_T5 = 30; -const int REG_T6 = 31; - -void reset_reg() { - next_reg_id = REG_S2; - for (int i = 0; i < 4096; ++i) { - reg_type[i] = TYPE_VOID; - indirection[i] = 0; - overflow[i] = 0; - } - reg_type[REG_ZERO] = TYPE_INT; -} - -const char* reg_name(int reg) { - if (reg == 0) return "zero"; - if (reg == 1) return "ra"; - if (reg == 2) return "sp"; - if (reg == 3) return "gp"; - if (reg == 4) return "tp"; - if (reg == 5) return "t0"; - if (reg == 6) return "t1"; - if (reg == 7) return "t2"; - if (reg == 8) return "fp"; - // reserved begin - if (reg == 9) return "s1"; - if (reg == 10) return "a0"; - if (reg == 11) return "a1"; - if (reg == 12) return "a2"; - if (reg == 13) return "a3"; - if (reg == 14) return "a4"; - if (reg == 15) return "a5"; - if (reg == 16) return "a6"; - if (reg == 17) return "a7"; - // allocation begin - if (reg == 18) return "s2"; - if (reg == 19) return "s3"; - if (reg == 20) return "s4"; - if (reg == 21) return "s5"; - if (reg == 22) return "s6"; - if (reg == 23) return "s7"; - if (reg == 24) return "s8"; - if (reg == 25) return "s9"; - if (reg == 26) return "s10"; - if (reg == 27) return "s11"; - if (reg == 28) return "t3"; - if (reg == 29) return "t4"; - if (reg == 30) return "t5"; - if (reg == 31) return "t6"; - // overflow begin - return 0; -} - -int is_overflow(int reg) { - return reg > REG_T6; -} void reset_local() { - next_local_id = 1; - max_local_id = 1; - max_reg_id = REG_S2; + next_local_id = 2; + max_local_id = 2; for (int i = 0; i < 4096; ++i) { local_table[i] = 0; local_marker[i] = MARKER_TEMP; local_type[i] = TYPE_VOID; + indirection[i] = 0; } - reset_reg(); } void reset_temp() { - while (next_local_id > 1 && local_marker[next_local_id - 1] == MARKER_TEMP) { + while (next_local_id > 2 && local_marker[next_local_id - 1] == MARKER_TEMP) { --next_local_id; } - reset_reg(); } -int next_local_slot(int type) { - int slot = next_local_id++; - local_type[slot] = type; +int next_reg(int type) { + int reg = next_local_id++; + local_type[reg] = type; + indirection[reg] = 0; if (next_local_id > max_local_id) { max_local_id = next_local_id; } - return slot; + return reg; } int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; - int slot = next_local_slot(type); - local_marker[slot] = MARKER_SCALAR; - return local_table[id] = slot; + int reg = next_reg(type); + local_marker[reg] = MARKER_SCALAR; + return local_table[id] = reg; } int declare_local_array(int id, int type, int size) { if (local_table[id] != 0) return local_table[id]; - int slot = next_local_slot(type); - local_marker[slot] = MARKER_ARRAY; - for (int i = 1; i < size; ++i) local_marker[next_local_slot(type)] = MARKER_ARRAY; - return local_table[id] = slot; + int reg; + for (int i = 0; i < size; ++i) local_marker[reg = next_reg(type)] = MARKER_ARRAY; + return local_table[id] = reg; } void declare_global(int id, int marker, int type) { @@ -601,23 +489,6 @@ void declare_global(int id, int marker, int type) { global_type[id] = type; } -int next_reg(int type) { - int reg = next_reg_id++; - if (is_overflow(reg)) { - int slot = next_local_slot(type); - local_marker[slot] = MARKER_TEMP; - overflow[reg] = slot; - } - reg_type[reg] = type; - if (next_reg_id > max_reg_id) { - max_reg_id = next_reg_id; - } - return reg; -} - - -// prolog & epilog helpers - int check_itype_immediate(int value) { return value >= -2048 && value <= 2047; } @@ -651,215 +522,87 @@ void asm_addi(const char* rd, const char* rs, int imm) { } } -// assembly helpers - -// address loaders -// rd must be one of t0, t1, t2 -void load_local_address(int rd, int slot_id) { - int offset = slot_id * 8 - 8; - const char* rd_name = reg_name(rd); - if (check_itype_immediate(offset)) { - printf(" addi %s, sp, %d\n", rd_name, offset); - } else { - printf(" li %s, %d\n", rd_name, offset); - printf(" add %s, sp, %s\n", rd_name, rd_name); - } -} - -const char* load_op_of_type(int type) { - if (type & TYPE_PTR_MASK) { - return "ld"; - } else if (type == TYPE_CHAR) { - return "lb"; - } else { // int - return "lw"; +void load_address(int rd, int id) { + if (id == -1) { + eprintf("void cannot be arithmetically operated\n"); + exit(1); } -} - -const char* store_op_of_type(int type) { - if (type & TYPE_PTR_MASK) { - return "sd"; - } else if (type == TYPE_CHAR) { - return "sb"; - } else { // int - return "sw"; - } -} - -// load a non-trivial register into t0, t1 or t2 -// rd must be one of t0, t1, t2 -void load(int rd, int reg) { - const char* op = load_op_of_type(reg_type[reg]); - const char* rd_name = reg_name(rd); - if (is_overflow(reg)) { - load_local_address(rd, overflow[reg]); - if (indirection[reg]) { - printf(" ld %s, 0(%s)\n", rd_name, rd_name); + int offset = -id * 8 - 8; + if (indirection[id]) { + if (check_itype_immediate(offset)) { + printf(" ld t%d, %d(fp) # indirection\n", rd, offset); + } else { + printf(" li t%d, %d\n", rd, offset); + printf(" add t%d, fp, t%d\n", rd, rd); + printf(" ld t%d, 0(t%d) # indirection\n", rd, rd); } - reg = rd; - } - printf(" %s %s, 0(%s) # load non-trivial register\n", op, rd_name, reg_name(reg)); -} - -// store t0 into a non-trivial register -void store_t0(int reg) { - const char* op = store_op_of_type(reg_type[reg]); - if (is_overflow(reg)) { - load_local_address(REG_T2, overflow[reg]); - if (indirection[reg]) { - printf(" ld t2, 0(t2)\n"); - } - reg = REG_T2; - } - printf(" %s t0, 0(%s) # store non-trivial register\n", op, reg_name(reg)); -} - -int is_nontrivial(int reg) { - return is_overflow(reg) || indirection[reg]; -} - -void _asm_r(const char* op, int rd, int rs1) { - const char* rd_name = reg_name(rd); - const char* rs1_name = reg_name(rs1); - if (is_nontrivial(rd)) rd_name = "t0"; - if (is_nontrivial(rs1)) { - rs1_name = "t0"; - load(REG_T0, rs1); - } - if (!(streq(op, "mv") && streq(rd_name, rs1_name))) - printf(" %s %s, %s\n", op, rd_name, rs1_name); - if (is_nontrivial(rd)) { - store_t0(rd); - } -} - -void _asm_rr(const char* op, int rd, int rs1, int rs2) { - const char* rd_name = reg_name(rd); - const char* rs1_name = reg_name(rs1); - const char* rs2_name = reg_name(rs2); - if (is_nontrivial(rd)) rd_name = "t0"; - if (is_nontrivial(rs1)) { - rs1_name = "t0"; - load(REG_T0, rs1); - } - if (is_nontrivial(rs2)) { - rs2_name = "t1"; - load(REG_T1, rs2); - } - printf(" %s %s, %s, %s\n", op, rd_name, rs1_name, rs2_name); - if (is_nontrivial(rd)) { - store_t0(rd); - } -} - -void _asm_ri(const char* op, int rd, int rs1, int imm) { - const char* rd_name = reg_name(rd); - const char* rs1_name = reg_name(rs1); - if (is_nontrivial(rd)) rd_name = "t0"; - if (is_nontrivial(rs1)) { - rs1_name = "t0"; - load(REG_T0, rs1); - } - printf(" %s %s, %s, %d\n", op, rd_name, rs1_name, imm); - if (is_nontrivial(rd)) { - store_t0(rd); - } -} - -void _asm_branch(const char* op, int rs1, int label) { - const char* rs1_name = reg_name(rs1); - if (is_nontrivial(rs1)) { - rs1_name = "t0"; - load(REG_T0, rs1); - } - printf(" %s %s, L%d\n", op, rs1_name, label); -} - -void _asm_i(const char* op, int rd, const char* prefix1, const char* prefix2, int imm) { - const char* rd_name = reg_name(rd); - if (is_nontrivial(rd)) rd_name = "t0"; - printf(" %s %s, %s%s%d\n", op, rd_name, prefix1, prefix2, imm); - if (is_nontrivial(rd)) { - store_t0(rd); - } -} - -int is_not_reusable(int rs1, int expected_type) { - return indirection[rs1] || reg_type[rs1] != expected_type || rs1 == REG_ZERO; -} - -int asm_r(int type, const char* op, int rs1) { - int rd = rs1; - if (is_not_reusable(rs1, type)) rd = next_reg(type); - _asm_r(op, rd, rs1); - return rd; -} - -int asm_rr(int type, const char* op, int rs1, int rs2) { - int rd = rs1; - if (is_not_reusable(rs1, type)) rd = rs2; - if (is_not_reusable(rs2, type)) rd = next_reg(type); - _asm_rr(op, rd, rs1, rs2); - return rd; -} - -void asm_mv(int rd, int rs1) { - _asm_r("mv", rd, rs1); -} - -void store_into_local(int rs1, int slot) { - const char* rs1_name = reg_name(rs1); - if (is_nontrivial(rs1)) { - rs1_name = "t0"; - load(REG_T0, rs1); - } - load_local_address(REG_T2, slot); - printf(" %s %s, 0(t2)\n", store_op_of_type(local_type[slot]), rs1_name); -} - -int materialize_address(int rd, int type, int marker) { - if (marker == MARKER_ARRAY) { - type = type | TYPE_PTR_MASK; - } - reg_type[rd] = type; - indirection[rd] = marker == MARKER_SCALAR; - return rd; -} - -int lookup_from_slot(int slot) { - int reg = next_reg(TYPE_VOID_PTR); - if (is_nontrivial(reg)) { - load_local_address(REG_T0, slot); - asm_mv(reg, REG_T0); } else { - load_local_address(reg, slot); + if (check_itype_immediate(offset)) { + printf(" addi t%d, fp, %d\n", rd, offset); + } else { + printf(" li t%d, %d\n", rd, offset); + printf(" add t%d, fp, t%d\n", rd, rd); + } } - return materialize_address(reg, local_type[slot], local_marker[slot]); } -int load_imm(int imm) { - if (imm == 0) return REG_ZERO; - int reg = next_reg(TYPE_INT); - _asm_i("li", reg, "", "", imm); +void load(int rd, int id) { + load_address(rd, id); + int type = local_type[id]; + const char* op = "lw"; // int + if (type == TYPE_CHAR) { + op = "lb"; + } else if (type & TYPE_PTR_MASK) { + op = "ld"; + } + printf(" %s t%d, 0(t%d) # id: type %d\n", op, rd, rd, type); +} + +void store_t0(int id) { + load_address(1, id); + int type = local_type[id]; + const char* op = "sw"; // int + if (type == TYPE_CHAR) { + op = "sb"; + } else if (type & TYPE_PTR_MASK) { + op = "sd"; + } + printf(" %s t0, 0(t1) # id: type %d\n", op, type); +} + +int materialize_t0(int type) { + int reg = next_reg(type); + store_t0(reg); + return reg; +} + +int dereference(int reg) { + local_type[reg] = local_type[reg] & ~TYPE_PTR_MASK; + indirection[reg] = 1; return reg; } int lookup(int id) { - if (local_table[id]) { - return lookup_from_slot(local_table[id]); - } - if (is_const[id]) { - return load_imm(const_table[id]); + int local = local_table[id]; + if (local) { + if (local_marker[local] == MARKER_ARRAY) { + load_address(0, local); + return materialize_t0(local_type[local] | TYPE_PTR_MASK); + } + return local; } const char* name = id_table + id_lut[id]; if (global_marker[id]) { if (global_marker[id] == MARKER_FUNCTION) { eprintf("function name must not appear outside function call: %s\n", name); exit(1); - } - int reg = next_reg(TYPE_VOID_PTR); - _asm_i("la", reg, name, " # id: ", id); - return materialize_address(reg, global_type[id], global_marker[id]); + } + printf(" la t0, %s # id: %d\n", name, id); + int reg = materialize_t0(global_type[id] | TYPE_PTR_MASK); + if (global_marker[id] == MARKER_SCALAR) { + reg = dereference(reg); + } + return reg; } eprintf("unresolved identifier: %s\n", name); exit(1); @@ -876,33 +619,65 @@ int asm_label(int label) { return label; } +int is_not_reusable(int rs1, int expected_type) { + return indirection[rs1] || local_marker[rs1] != MARKER_TEMP || local_type[rs1] != expected_type; +} + +int asm_r(const char* op, int rs1) { + load(0, rs1); + printf(" %s t0, t0\n", op); + int rd = rs1; + if (is_not_reusable(rs1, TYPE_INT)) { + rd = next_reg(TYPE_INT); + } + store_t0(rd); + return rd; +} + int asm_r_arith(const char* op, int rs1) { - if (reg_type[rs1] & TYPE_PTR_MASK) { + if (local_type[rs1] & TYPE_PTR_MASK) { eprintf("pointer cannot be arithmetically operated by %s\n", op); exit(1); } - return asm_r(TYPE_INT, op, rs1); + return asm_r(op, rs1); +} + +int asm_rr(const char* op, int rs1, int rs2) { + load(0, rs1); + load(1, rs2); + printf(" %s t0, t0, t1\n", op); + int rd = rs1; + if (is_not_reusable(rd, TYPE_INT)) { + rd = rs2; + if (is_not_reusable(rd, TYPE_INT)) { + rd = next_reg(TYPE_INT); + } + } + store_t0(rd); + return rd; } int asm_rr_arith(const char* op, int rs1, int rs2) { - if (reg_type[rs1] & TYPE_PTR_MASK || reg_type[rs2] & TYPE_PTR_MASK) { + if (local_type[rs1] & TYPE_PTR_MASK || local_type[rs2] & TYPE_PTR_MASK) { eprintf("pointer cannot be arithmetically operated by %s\n", op); exit(1); } - return asm_rr(TYPE_INT, op, rs1, rs2); + return asm_rr(op, rs1, rs2); } int asm_rr_cmp(const char* op, int rs1, int rs2) { - // since NULL is virtually 0, it is considered a valid example of a pointer comparing with an integer - return asm_rr(TYPE_INT, op, rs1, rs2); + // since NULL is virtually 0, it is considered valid example of a pointer comparing with an integer + return asm_rr(op, rs1, rs2); } void asm_beqz(int rs1, int label) { - _asm_branch("beqz", rs1, label); + load(0, rs1); + printf(" beqz t0, L%d\n", label); } void asm_bnez(int rs1, int label) { - _asm_branch("bnez", rs1, label); + load(0, rs1); + printf(" bnez t0, L%d\n", label); } void asm_j(int label) { @@ -939,9 +714,15 @@ int step_of(int type) { return 1; } +void asm_shift_t0(const char* op, int type) { + if (type == TYPE_INT_PTR) { + printf(" %s t0, t0, 2\n", op); + } +} + int asm_add(int lhs, int rhs) { - int type1 = reg_type[lhs] & TYPE_PTR_MASK; - int type2 = reg_type[rhs] & TYPE_PTR_MASK; + int type1 = local_type[lhs] & TYPE_PTR_MASK; + int type2 = local_type[rhs] & TYPE_PTR_MASK; if (type1 != type2) { int ptr; int idx; @@ -952,26 +733,27 @@ int asm_add(int lhs, int rhs) { ptr = rhs; idx = lhs; } - int ptr_type = reg_type[ptr]; + int ptr_type = local_type[ptr]; if (ptr_type == TYPE_VOID_PTR) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } - int offset = next_reg(TYPE_INT); - int shift = 2 * (ptr_type == TYPE_INT_PTR); - _asm_ri("slli", offset, idx, shift); - return asm_rr(ptr_type, "add", ptr, offset); + load(0, idx); + load(1, ptr); + asm_shift_t0("slli", ptr_type); + printf(" add t0, t0, t1\n"); + return materialize_t0(ptr_type); } if (type1 && type2) { eprintf("operands of addition cannot be both pointers\n"); exit(1); } - return asm_rr(TYPE_INT, "add", lhs, rhs); + return asm_rr("add", lhs, rhs); } int asm_sub(int lhs, int rhs) { - int lhs_type = reg_type[lhs]; - int rhs_type = reg_type[rhs]; + int lhs_type = local_type[lhs]; + int rhs_type = local_type[rhs]; int type1 = lhs_type & TYPE_PTR_MASK; int type2 = rhs_type & TYPE_PTR_MASK; if (type1 && type2) { @@ -983,36 +765,17 @@ int asm_sub(int lhs, int rhs) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } - int difference = asm_rr(TYPE_INT, "sub", lhs, rhs); - int shift = 2 * (lhs_type == TYPE_INT_PTR); - _asm_ri("slli", difference, difference, shift); - return difference; + load(0, lhs); + load(1, rhs); + printf(" sub t0, t0, t1\n"); + asm_shift_t0("srai", lhs_type); + return materialize_t0(TYPE_INT); } if (type1) { int neg = asm_r_arith("neg", rhs); return asm_add(lhs, neg); } - return asm_rr_arith("sub", lhs, rhs); -} - -int dereference(int reg) { - if (indirection[reg]) { - load(reg, reg); - } else { - indirection[reg] = 1; - } - reg_type[reg] = reg_type[reg] & ~TYPE_PTR_MASK; - return reg; -} - -int addressof(int reg) { - if (indirection[reg] && !(reg_type[reg] & TYPE_PTR_MASK)) { - reg_type[reg] = reg_type[reg] | TYPE_PTR_MASK; - indirection[reg] = 0; - } else { - printf("cannot take address of this expression"); - } - return reg; + return asm_rr("sub", lhs, rhs); } // parser @@ -1048,24 +811,14 @@ int parse_function_call(int id) { } } for (int i = 0; i < arg; ++i) { - asm_mv(i + REG_A0, args[i]); - } - for (int i = REG_T3; i <= REG_T6; ++i) { - if (i < max_reg_id) { - asm_sd(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); - } + load(0, args[i]); + printf(" mv a%d, t0\n", i); } printf(" call %s\n", name); - for (int i = REG_T3; i <= REG_T6; ++i) { - if (i < max_reg_id) { - asm_ld(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); - } - } int type = global_type[id]; if (type != TYPE_VOID) { - int rd = next_reg(type); - asm_mv(rd, REG_A0); - return rd; + printf(" mv t0, a0\n"); + return materialize_t0(type); } return -1; } @@ -1075,7 +828,8 @@ int parse_primary_expr() { if (token_type == TOKEN_EOF) { exit(1); } else if (token_type == TOKEN_NUMBER) { - return load_imm(token_data); + printf(" li t0, %d\n", token_data); + return materialize_t0(TYPE_INT); } else if (token_type == TOKEN_ID) { next_token(); if (token_type == TOKEN_PAREN_LEFT) { @@ -1084,9 +838,8 @@ int parse_primary_expr() { unget_token(); return lookup(token_data); } else if (token_type == TOKEN_STRING) { - int reg = next_reg(TYPE_CHAR_PTR); - _asm_i("la", reg, ".LC", "", token_data); - return reg; + printf(" la t0, .LC%d\n", token_data); + return materialize_t0(TYPE_CHAR_PTR); } else if (token_type == TOKEN_PAREN_LEFT) { int reg = parse_expr(); expect_token(TOKEN_PAREN_RIGHT); @@ -1102,16 +855,20 @@ int parse_postfix_expr() { while (1) { next_token(); if (token_type == TOKEN_INC) { - int type = reg_type[lhs]; + int type = local_type[lhs]; int reg = next_reg(type); - asm_mv(reg, lhs); - _asm_ri("addi", lhs, lhs, step_of(type)); + load(0, lhs); + store_t0(reg); + printf(" addi t0, t0, %d\n", step_of(type)); + store_t0(lhs); lhs = reg; } else if (token_type == TOKEN_DEC) { - int type = reg_type[lhs]; + int type = local_type[lhs]; int reg = next_reg(type); - asm_mv(reg, lhs); - _asm_ri("addi", lhs, lhs, -step_of(type)); + load(0, lhs); + store_t0(reg); + printf(" addi t0, t0, -%d\n", step_of(type)); + store_t0(lhs); lhs = reg; } else if (token_type == TOKEN_BRACKET_LEFT) { int rhs = parse_expr(); @@ -1129,15 +886,16 @@ int parse_prefix_expr() { next_token(); if (token_type == TOKEN_AND) { int reg = parse_postfix_expr(); - int type = reg_type[reg]; + int type = local_type[reg]; if (type & TYPE_PTR_MASK) { eprintf("cannot take address of a pointer\n"); exit(1); } - return addressof(reg); + load_address(0, reg); + return materialize_t0(type | TYPE_PTR_MASK); } else if (token_type == TOKEN_STAR) { int reg = parse_postfix_expr(); - int type = reg_type[reg]; + int type = local_type[reg]; if (!(type & TYPE_PTR_MASK)) { eprintf("cannot dereference a non-pointer\n"); exit(1); @@ -1146,7 +904,8 @@ int parse_prefix_expr() { eprintf("cannot dereference void pointer\n"); exit(1); } - return dereference(reg); + load(0, reg); + return dereference(materialize_t0(type)); } else if (token_type == TOKEN_MINUS) { int reg = parse_postfix_expr(); return asm_r_arith("neg", reg); @@ -1155,14 +914,18 @@ int parse_prefix_expr() { return asm_r_arith("not", reg); } else if (token_type == TOKEN_NOT) { int reg = parse_postfix_expr(); - return asm_r(TYPE_INT, "seqz", reg); + return asm_r("seqz", reg); } else if (token_type == TOKEN_INC) { int reg = parse_postfix_expr(); - _asm_ri("addi", reg, reg, step_of(reg_type[reg])); + load(0, reg); + printf(" addi t0, t0, %d\n", step_of(local_type[reg])); + store_t0(reg); return reg; } else if (token_type == TOKEN_DEC) { int reg = parse_postfix_expr(); - _asm_ri("addi", reg, reg, -step_of(reg_type[reg])); + load(0, reg); + printf(" addi t0, t0, -%d\n", step_of(local_type[reg])); + store_t0(reg); return reg; } else { unget_token(); @@ -1240,11 +1003,11 @@ int parse_cmp_expr() { } else if (token_type == TOKEN_LE) { int rhs = parse_shift_expr(); int sgt = asm_rr_cmp("sgt", lhs, rhs); - lhs = asm_r(TYPE_INT, "seqz", sgt); + lhs = asm_r("seqz", sgt); } else if (token_type == TOKEN_GE) { int rhs = parse_shift_expr(); int slt = asm_rr_cmp("slt", lhs, rhs); - lhs = asm_r(TYPE_INT, "seqz", slt); + lhs = asm_r("seqz", slt); } else { unget_token(); break; @@ -1260,11 +1023,11 @@ int parse_eq_expr() { if (token_type == TOKEN_EQ) { int rhs = parse_cmp_expr(); int xor0 = asm_rr_cmp("xor", lhs, rhs); - lhs = asm_r(TYPE_INT, "seqz", xor0); + lhs = asm_r("seqz", xor0); } else if (token_type == TOKEN_NE) { int rhs = parse_cmp_expr(); int xor0 = asm_rr_cmp("xor", lhs, rhs); - lhs = asm_r(TYPE_INT, "snez", xor0); + lhs = asm_r("snez", xor0); } else { unget_token(); break; @@ -1321,58 +1084,48 @@ int parse_bitwise_or_expr() { int parse_logical_and_expr() { int lhs = parse_bitwise_or_expr(); - int logical = 0; - int label; - int result; + int label = next_label(); + int label_used = 0; while (1) { next_token(); if (token_type == TOKEN_LAND) { - if (!logical) { - logical = 1; - label = next_label(); - result = next_reg(TYPE_INT); - _asm_r("snez", result, lhs); - } - asm_beqz(result, label); + lhs = asm_r("snez", lhs); + asm_beqz(lhs, label); int rhs = parse_bitwise_or_expr(); - _asm_r("snez", result, rhs); + rhs = asm_r("snez", rhs); + lhs = asm_rr("and", lhs, rhs); + label_used = 1; } else { unget_token(); break; } } - if (logical) { + if (label_used) { asm_label(label); - return result; } return lhs; } int parse_logical_or_expr() { int lhs = parse_logical_and_expr(); - int logical = 0; - int label; - int result; + int label = next_label(); + int label_used = 0; while (1) { next_token(); if (token_type == TOKEN_LOR) { - if (!logical) { - logical = 1; - label = next_label(); - result = next_reg(TYPE_INT); - _asm_r("snez", result, lhs); - } - asm_bnez(result, label); + lhs = asm_r("snez", lhs); + asm_bnez(lhs, label); int rhs = parse_logical_and_expr(); - _asm_r("snez", result, rhs); + rhs = asm_r("snez", rhs); + lhs = asm_rr("or", lhs, rhs); + label_used = 1; } else { unget_token(); break; } } - if (logical) { + if (label_used) { asm_label(label); - return result; } return lhs; } @@ -1382,7 +1135,8 @@ int parse_assign_expr() { next_token(); if (token_type == TOKEN_ASSIGN) { int rhs = parse_assign_expr(); - asm_mv(lhs, rhs); + load(0, rhs); + store_t0(lhs); return lhs; } else { unget_token(); @@ -1396,7 +1150,7 @@ int parse_expr() { void parse_local_variable(int type) { if (type == TYPE_VOID) { - eprintf("variable cannot be of void type\n"); + eprintf("local variable of void type is not supported\n"); exit(1); } expect_token(TOKEN_ID); @@ -1404,16 +1158,17 @@ void parse_local_variable(int type) { next_token(); if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { - eprintf("array of pointers is not supported\n"); + eprintf("local variable of array of pointers is not supported\n"); exit(1); } expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); declare_local_array(id, type, size); - return; - } - int slot = declare_local(id, type); + next_token(); + } else { + declare_local(id, type); + } if (token_type == TOKEN_SEMICOLON) { unget_token(); return; @@ -1421,11 +1176,8 @@ void parse_local_variable(int type) { unget_token(); expect_token(TOKEN_ASSIGN); int reg = parse_expr(); - if (type != reg_type[reg]) { - eprintf("type mismatch in assignment\n"); - exit(1); - } - store_into_local(reg, slot); + load(0, reg); + store_t0(local_table[id]); } void parse_stmt(); @@ -1538,8 +1290,9 @@ void parse_stmt() { return; } unget_token(); - int rs1 = parse_expr(); - asm_mv(REG_A0, rs1); + int reg = parse_expr(); + load(0, reg); + printf(" mv a0, t0\n"); asm_j(epilog_label); } else if (token_type == TOKEN_BREAK) { int label = asm_get_break_label(); @@ -1582,7 +1335,7 @@ void parse_function(const char* name) { } int arg_type = parse_type(); if (arg_type < 0 || arg_type == TYPE_VOID) { - eprintf("unexpected a non-void argument type: %d\n", arg_type); + eprintf("unexpected a non-void argument type"); exit(1); } expect_token(TOKEN_ID); @@ -1592,15 +1345,11 @@ void parse_function(const char* name) { expect_token(TOKEN_BRACKET_RIGHT); next_token(); if (arg_type & TYPE_PTR_MASK) { - eprintf("array of pointers is not supported\n"); + eprintf("local variable of array of pointers is not supported\n"); exit(1); } arg_type = arg_type | TYPE_PTR_MASK; } - if (arg >= 8) { - eprintf("too many arguments\n"); - exit(1); - } args[arg++] = declare_local(token_data, arg_type); if (token_type == TOKEN_COMMA) { // continue; @@ -1634,10 +1383,7 @@ void parse_function(const char* name) { parse_stmt(); } asm_j(epilog_label); - int reg_used = max_reg_id - REG_S2; - if (reg_used > 14) reg_used = 14; - int frame_size = (max_local_id - 1 + reg_used + 2) * 8; - if (reg_used > 10) reg_used = 10; + int frame_size = max_local_id * 8; if (frame_size % 16 != 0) { frame_size = frame_size + 8; } @@ -1646,30 +1392,27 @@ void parse_function(const char* name) { asm_addi("sp", "sp", -frame_size); asm_sd("ra", frame_size - 8, "sp"); asm_sd("fp", frame_size - 16, "sp"); - for (int i = 0; i < reg_used; ++i) { - int reg = REG_S2 + i; - asm_sd(reg_name(reg), frame_size - 24 - i * 8, "sp"); - } asm_addi("fp", "sp", frame_size); for (int i = 0; i < arg; ++i) { - store_into_local(REG_A0 + i, args[i]); + printf(" mv t0, a%d\n", i); + store_t0(args[i]); } asm_j(label); // epilog asm_label(epilog_label); - asm_ld("ra", frame_size - 8, "sp"); asm_ld("fp", frame_size - 16, "sp"); - for (int i = 0; i < reg_used; ++i) { - int reg = REG_S2 + i; - asm_ld(reg_name(reg), frame_size - 24 - i * 8, "sp"); - } + asm_ld("ra", frame_size - 8, "sp"); asm_addi("sp", "sp", frame_size); printf(" ret\n"); } void parse_global_variable(int id, const char* name, int type) { if (type == TYPE_VOID) { - eprintf("variable cannot be of void type\n"); + eprintf("global variable of void type is not supported\n"); + exit(1); + } + if (type & TYPE_PTR_MASK) { + eprintf("global variable of pointer is not supported\n"); exit(1); } printf(".data\n"); @@ -1680,10 +1423,6 @@ void parse_global_variable(int id, const char* name, int type) { expect_token(TOKEN_NUMBER); printf(" .word %d\n", token_data); } else if (token_type == TOKEN_BRACKET_LEFT) { - if (type & TYPE_PTR_MASK) { - eprintf("array of pointers is not supported\n"); - exit(1); - } expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); @@ -1701,28 +1440,16 @@ void parse_global_variable(int id, const char* name, int type) { } void parse_global_declaration() { - int is_const_int = 1; - if (token_type != TOKEN_CONST) { - is_const_int = 0; - } int type = parse_type(); if (type < 0) { eprintf("expecting type for global declaration\n"); exit(1); } - if (type != TYPE_INT) { - is_const_int = 0; - } expect_token(TOKEN_ID); int id = token_data; char* name = id_table + id_lut[id]; next_token(); - if (is_const_int && token_type == TOKEN_ASSIGN) { - expect_token(TOKEN_NUMBER); - const_table[id] = token_data; - is_const[id] = 1; - expect_token(TOKEN_SEMICOLON); - } else if (token_type == TOKEN_PAREN_LEFT) { + if (token_type == TOKEN_PAREN_LEFT) { declare_global(id, MARKER_FUNCTION, type); parse_function(name); } else { diff --git a/boot.sh b/boot.sh index c4a4ecb..91909ec 100644 --- a/boot.sh +++ b/boot.sh @@ -5,7 +5,7 @@ gcc ../boot.c ../boot-lib.c -o gcc.out && riscv64-linux-gnu-gcc-12 -static boot1.s ../boot-lib.c -o boot1.out && qemu-riscv64 boot1.out < boot-all.c > boot2.s && riscv64-linux-gnu-gcc-12 -static boot2.s ../boot-lib.c -o boot2.out && -qemu-riscv64 boot2.out < boot-all.c > boot3.s +qemu-riscv64 boot2.out < boot-all.c > boot3.s && cmp --silent boot1.s boot2.s && echo "boot1.s == boot2.s" || echo "boot1.s != boot2.s" cmp --silent boot2.s boot3.s && echo "boot2.s == boot3.s" || echo "boot2.s != boot3.s" cmp --silent boot1.s boot3.s && echo "boot1.s == boot3.s" || echo "boot1.s != boot3.s" diff --git a/demo/add.c b/demo/add.c deleted file mode 100644 index 6c54b5f..0000000 --- a/demo/add.c +++ /dev/null @@ -1,17 +0,0 @@ -int printf(const char format[], ...); -int scanf(const char format[], ...); -int putchar(int ch); - -int* p; -int f1() { - int a = 1; - return *(a+(a+(a+(a+(a+(a+(a+(a+(a+(a+(p))))))))))); // a[10] -} - - -int main() { - int a[15]; - p = a; - for (int i = 0; i < 15; ++i) a[i] = i; - return f1(); -} \ No newline at end of file diff --git a/demo/lut.c b/demo/lut.c deleted file mode 100644 index f5b23ba..0000000 --- a/demo/lut.c +++ /dev/null @@ -1,47 +0,0 @@ -int printf(const char format[], ...); -int getchar(); - -char string_table[65536]; -int string_offset; -int string_lut[4096]; -int string_lut_size; - -int parse_string() { - int offset = string_offset; - int ch; - while ((ch = getchar()) != '"') { - if (ch == -1 || ch == '\n') { - printf("expecting '\"'\n"); - return 1; - } - string_table[string_offset++] = ch; - } - string_table[string_offset++] = 0; - string_lut[string_lut_size] = offset; - return string_lut_size++; -} - - -int streq(const char* s1, const char* s2) { - while (*s1 && *s2 && *s1 == *s2) { - s1++; - s2++; - } - return *s1 == *s2; -} - -void dump_string_table() { - printf(".data\n"); - for (int i = 0; i < string_lut_size; ++i) { - char* id = string_table + string_lut[i]; - printf(".LC%d: .string \"%s\", const: %d\n", - i, id, streq(id, "const")); - } -} - -int main() { - char ch; - while ((ch = getchar()) == '"') parse_string(); - dump_string_table(); - return 0; -} \ No newline at end of file diff --git a/demo/parse.c b/demo/parse.c deleted file mode 100644 index 98b7e9d..0000000 --- a/demo/parse.c +++ /dev/null @@ -1,19 +0,0 @@ -int getchar(); - - -int is_digit(int ch) { - return '0' <= ch && ch <= '9'; -} - -int parse_int(int ch) { - int num = ch - '0'; - while (is_digit(ch = getchar())) { - num = num * 10; - num = num + ch - '0'; - } - return num; -} - -int main() { - return parse_int(getchar()); -} \ No newline at end of file diff --git a/demo/strcmp.c b/demo/strcmp.c deleted file mode 100644 index 6d4793e..0000000 --- a/demo/strcmp.c +++ /dev/null @@ -1,17 +0,0 @@ -int printf(const char* format, ...); - -int strcmp(const char* s1, const char* s2) { - while (*s1 && *s2 && *s1 == *s2) { - s1++; - s2++; - } - return *s1 - *s2; -} - -int main() { - const char* s1 = "helloworld"; - const char* s2 = "world"; - printf("%d\n", strcmp(s1, s2)); - printf("%d\n", strcmp(s1 + 5, s2)); - return 0; -} \ No newline at end of file