diff --git a/README.md b/README.md index f2c2324..91db7ee 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # RVBTCC -- 约 1500 行的轻量级自举编译器。 +- 约 1900 行的轻量级自举编译器。 - 编译器和自举编译器行为一致。 - 语法类似 C,输出 RISC-V 汇编。 - 依赖几个 libc 函数用于输入输出。 @@ -12,7 +12,6 @@ ### 真机运行 - 编译运行程序,src 为本语言源代码。可以编译 demo 文件夹下的实例。 ```sh @@ -25,19 +24,6 @@ $ sh run-native.sh $ sh boot-native.sh ``` -输出六个文件: - -| 源代码 | 编译器 | 汇编 | 可执行 | 命名 | -| ----------------- | --------- | ------- | --------- | ---------------------- | -| boot.c boot-lib.c | gcc | | gcc.out | 自制编译器 | -| boot.c boot-lib.h | gcc.out | boot1.s | boot1.out | 自举自制编译器 | -| boot.c boot-lib.h | boot1.out | boot2.s | boot2.out | 自举自举自制编译器 | -| boot.c boot-lib.h | boot2.out | boot3.s | | 验证自举自举自制编译器 | - -后三次编译时,boot-lib.h 的内容被手动导入 boot.c 开头进行编译,boot-lib.c 提供的库通过链接引入。 - -自举的目标为 boot1.s == boot2.s == boot3.s - ### 模拟运行 安装以下依赖 @@ -58,13 +44,31 @@ $ sh run.sh $ sh boot.sh ``` +### 自举过程 + + +自举会输出六个文件,三个汇编文件和三个可执行文件: + +| 源代码 | 编译器 | 汇编 | 可执行 | 代号 | 命名 | +| ----------------- | --------- | ------- | --------- | ---- | ---------------------- | +| boot.c boot-lib.c | gcc | | gcc.out | G | 自制编译器 | +| boot.c boot-lib.h | gcc.out | boot1.s | boot1.out | B1 | 自举自制编译器 | +| boot.c boot-lib.h | boot1.out | boot2.s | boot2.out | B2 | 自举自举自制编译器 | +| boot.c boot-lib.h | boot2.out | boot3.s | | B3 | 验证自举自举自制编译器 | + +后三次编译时,boot-lib.h 的内容被手动导入 boot.c 开头进行编译,boot-lib.c 提供的库通过链接引入。 + +整个自举及其验证的过程如下图所示: + +![](bootstrapping.png) + +自举的目标为 G、B1、B2 的可执行文件行为一致,也就是说 B1、B2、B3 的汇编代码一致。 + ## 语言文档 -### 关键字 +### 注释 -本语言包含的关键字即为支持的标量类型的关键字和流程控制的关键字,还有 `const`。 - -`const` 关键字可以在类型中使用,但会被直接忽略。支持它是为了更好兼容 C 程序。 +支持多行 `/* ... */` 和单行 `//` 两种注释 ### 支持六个基本类型 @@ -75,13 +79,11 @@ $ sh boot.sh | `int` | `int*` | - 注意指针类型不是复合得来的,而是被视作整体。因此也不存在二重指针。 - - 函数和数组不是类型系统的一部分。 - 可以认为数组的类型就是其元素对应的指针类型。 - 函数的参数类型和个数不会检查,返回值会参与类型检查。 - 函数名只能被用于调用,函数调用被视为初等表达式。 - 数组只支持一维数组,且数组的元素不能是指针类型。 -- 全局变量不能是指针类型。 - 整数和字符字面量的类型是 `int`,字符串字面量的类型是 `char*` ### 支持的流程控制 @@ -91,6 +93,25 @@ $ sh boot.sh - `break` `continue` - `return` +### 关键字 + +本语言包含的关键字即为支持的标量类型的关键字和流程控制的关键字,还有 `const`。 + +#### `const` 关键字 + +`const` 关键字可以在类型中使用,在大部分情况下会被直接忽略。支持它是为了更好兼容 C 程序。 + +但是当在出现 + +- 全局,标量(即不是数组) +- 类型为 `const int` 或 `const int const` +- 带有初始化 + +的声明时,将会被解析为整数常量。 + +整数常量在使用的时候会被直接替换为对应的右值,失去作为全局变量左值的性质。 + +使用 `int const` 或 `int` 可以避免这样的特殊处理。 ### 支持以下运算符 @@ -109,15 +130,15 @@ $ sh boot.sh | | | 按位或 | 从左到右 | | `&&` | 逻辑与 | 从左到右 | | || | 逻辑或 | 从左到右 | -| `=` | 赋值 | 从右到左 | +| `?:` | 条件 | 从右到左 | +| `=` `+=` `-=` `*=` `/=` `%=` `<<=` `>>=` `&=` `^=` |= | 赋值 | 从右到左 | +| `,` | 逗号 | 从左到右 | - 同级表达式的求值顺序与结合性一致。 - 加减号支持整数之间,指针与整数,指针之间的运算。 - 算术运算的结果总是被提升为 `int` 类型。布尔值用 `int` 类型表示。 - 由于空指针就是 `0`,因此指针和整数之间的比较运算没有禁止。 - 逻辑与和逻辑或支持短路求值。 -- 表达式没有左值和右值之分。可以认为右值总是存在一个临时的变量中。 -- 赋值不检查类型。强制类型转换可以用赋值给特定类型的变量实现。 ### 其它支持与不支持 @@ -125,7 +146,9 @@ $ sh boot.sh - 不支持局部变量之间的遮挡,重名的局部变量为同一变量。 - 支持函数声明,可以通过函数声明来调用 C 语言库。不支持变量声明。 - 函数只支持最多八个参数。函数声明中支持可变参数,仅用于兼容 C 语言库。 -- 类型检查可能有遗漏,若 C 编译器报错,而本语言编译通过,就可以认为是 UB。 +- 类型检查有遗漏,若 C 编译器报错,而本语言编译通过,就可以认为是 UB。 + - 例如函数调用的参数和 `return` 语句不会检查类型。 + ## 限制 diff --git a/boot.c b/boot.c index 5fbab63..f5a33e7 100644 --- a/boot.c +++ b/boot.c @@ -21,8 +21,8 @@ int token_data; const int TOKEN_EOF = 0; const int TOKEN_SEMICOLON = 1; const int TOKEN_ADD = 2; -const int TOKEN_MINUS = 3; -const int TOKEN_STAR = 4; +const int TOKEN_SUB = 3; +const int TOKEN_MUL = 4; const int TOKEN_DIV = 5; const int TOKEN_REM = 6; const int TOKEN_ASSIGN = 7; @@ -40,6 +40,18 @@ const int TOKEN_LOR = 18; const int TOKEN_ELLIPSIS = 19; const int TOKEN_INC = 20; const int TOKEN_DEC = 21; +const int TOKEN_ADD_ASSIGN = 22; +const int TOKEN_SUB_ASSIGN = 23; +const int TOKEN_MUL_ASSIGN = 24; +const int TOKEN_DIV_ASSIGN = 25; +const int TOKEN_REM_ASSIGN = 26; +const int TOKEN_AND_ASSIGN = 27; +const int TOKEN_OR_ASSIGN = 28; +const int TOKEN_XOR_ASSIGN = 29; +const int TOKEN_LSHIFT_ASSIGN = 30; +const int TOKEN_RSHIFT_ASSIGN = 31; +const int TOKEN_QUESTION = 32; +const int TOKEN_COLON = 33; const int TOKEN_EQ = 40; const int TOKEN_NE = 41; @@ -85,8 +97,8 @@ const int TYPE_TOKEN_MASK = 128; int parse_int(int ch) { int num = ch - '0'; while (is_digit(ch = getchar())) { - num = num * 10; - num = num + ch - '0'; + num *= 10; + num += ch - '0'; } ungetchar(ch); return num; @@ -145,6 +157,24 @@ int parse_string() { return string_lut_size++; } +void rewind_string(int new_data) { + string_offset = string_lut[token_data]; + token_data = new_data; + --string_lut_size; +} + +void dedup_string() { + int last_string = string_lut_size - 1; + char* latest = string_table + string_lut[last_string]; + for (int i = 0; i < last_string; i++) { + char* candidate = string_table + string_lut[i]; + if (streq(candidate, latest)) { + rewind_string(i); + return; + } + } +} + char id_table[65536]; int id_offset; int id_lut[4096]; @@ -246,6 +276,8 @@ void next_token() { int ch2 = getchar(); if (ch2 == '+') { token_type = TOKEN_INC; + } else if (ch2 == '=') { + token_type = TOKEN_ADD_ASSIGN; } else { ungetchar(ch2); token_type = TOKEN_ADD; @@ -254,15 +286,25 @@ void next_token() { int ch2 = getchar(); if (ch2 == '-') { token_type = TOKEN_DEC; + } else if (ch2 == '=') { + token_type = TOKEN_SUB_ASSIGN; } else { ungetchar(ch2); - token_type = TOKEN_MINUS; + token_type = TOKEN_SUB; } } else if (ch == '*') { - token_type = TOKEN_STAR; + int ch2 = getchar(); + if (ch2 == '=') { + token_type = TOKEN_MUL_ASSIGN; + } else { + ungetchar(ch2); + token_type = TOKEN_MUL; + } } else if (ch == '/') { int ch2 = getchar(); - if (ch2 == '/') { + if (ch2 == '=') { + token_type = TOKEN_DIV_ASSIGN; + } if (ch2 == '/') { do ch = getchar(); while (ch != -1 && ch != '\n'); next_token(); return; @@ -287,9 +329,19 @@ void next_token() { token_type = TOKEN_DIV; } } else if (ch == '%') { - token_type = TOKEN_REM; + int ch2 = getchar(); + if (ch2 == '=') { + token_type = TOKEN_REM_ASSIGN; + } else { + ungetchar(ch2); + token_type = TOKEN_REM; + } } else if (ch == ';') { token_type = TOKEN_SEMICOLON; + } else if (ch == '?') { + token_type = TOKEN_QUESTION; + } else if (ch == ':') { + token_type = TOKEN_COLON; } else if (ch == ',') { token_type = TOKEN_COMMA; } else if (ch == '<') { @@ -297,7 +349,13 @@ void next_token() { if (ch2 == '=') { token_type = TOKEN_LE; } else if (ch2 == '<') { - token_type = TOKEN_LSHIFT; + int ch3 = getchar(); + if (ch3 == '=') { + token_type = TOKEN_LSHIFT_ASSIGN; + } else { + ungetchar(ch3); + token_type = TOKEN_LSHIFT; + } } else { ungetchar(ch2); token_type = TOKEN_LT; @@ -307,7 +365,13 @@ void next_token() { if (ch2 == '=') { token_type = TOKEN_GE; } else if (ch2 == '>') { - token_type = TOKEN_RSHIFT; + int ch3 = getchar(); + if (ch3 == '=') { + token_type = TOKEN_RSHIFT_ASSIGN; + } else { + ungetchar(ch3); + token_type = TOKEN_RSHIFT; + } } else { ungetchar(ch2); token_type = TOKEN_GT; @@ -330,7 +394,9 @@ void next_token() { } } else if (ch == '&') { int ch2 = getchar(); - if (ch2 == '&') { + if (ch2 == '=') { + token_type = TOKEN_AND_ASSIGN; + } else if (ch2 == '&') { token_type = TOKEN_LAND; } else { ungetchar(ch2); @@ -338,14 +404,22 @@ void next_token() { } } else if (ch == '|') { int ch2 = getchar(); - if (ch2 == '|') { + if (ch2 == '=') { + token_type = TOKEN_OR_ASSIGN; + } else if (ch2 == '|') { token_type = TOKEN_LOR; } else { ungetchar(ch2); token_type = TOKEN_OR; } } else if (ch == '^') { - token_type = TOKEN_XOR; + int ch2 = getchar(); + if (ch2 == '=') { + token_type = TOKEN_XOR_ASSIGN; + } else { + ungetchar(ch2); + token_type = TOKEN_XOR; + } } else if (ch == '~') { token_type = TOKEN_COMPL; } else if (ch == '\'') { @@ -361,18 +435,16 @@ void next_token() { } else if (ch == '"') { token_type = TOKEN_STRING; token_data = parse_string(); + dedup_string(); } else if (ch == '.') { - int ch2 = getchar(); - if (ch2 == '.') { - int ch3 = getchar(); - if (ch3 == '.') { + token_type = 0; + if (getchar() == '.') { + if (getchar() == '.') { token_type = TOKEN_ELLIPSIS; - } else { - eprintf("unexpected character: %c\n", ch3); - exit(1); } - } else { - eprintf("unexpected character: %c\n", ch2); + } + if (token_type != TOKEN_ELLIPSIS) { + eprintf("expecting '...'\n"); exit(1); } } else if (is_digit(ch)) { @@ -413,24 +485,22 @@ int parse_type() { int type = token_type & ~TYPE_TOKEN_MASK; next_token(); ignore_const(); - if (token_type == TOKEN_STAR) { + if (token_type == TOKEN_MUL) { + next_token(); ignore_const(); - return type | TYPE_PTR_MASK; + type |= TYPE_PTR_MASK; } unget_token(); return type; - } else { - return -1; } + return -1; } // asm -int epilog_label; - int local_table[4096]; // id -> local id -int next_local_id = 2; -int max_local_id = 2; +int next_local_id = 1; +int max_local_id = 1; const int MARKER_TEMP = 0; const int MARKER_SCALAR = 1; @@ -441,47 +511,143 @@ int local_marker[4096]; int global_marker[4096]; int local_type[4096]; int global_type[4096]; + +int reg_type[4096]; +int next_reg_id = 18; +int max_reg_id = 18; int indirection[4096]; +int overflow[4096]; + +int const_table[4096]; // id -> value +int is_const[4096]; + +const int REG_ZERO = 0; +const int REG_RA = 1; +const int REG_SP = 2; +const int REG_GP = 3; +const int REG_TP = 4; +const int REG_T0 = 5; +const int REG_T1 = 6; +const int REG_T2 = 7; +const int REG_FP = 8; +const int REG_S1 = 9; +const int REG_A0 = 10; +const int REG_A1 = 11; +const int REG_A2 = 12; +const int REG_A3 = 13; +const int REG_A4 = 14; +const int REG_A5 = 15; +const int REG_A6 = 16; +const int REG_A7 = 17; +const int REG_S2 = 18; +const int REG_S3 = 19; +const int REG_S4 = 20; +const int REG_S5 = 21; +const int REG_S6 = 22; +const int REG_S7 = 23; +const int REG_S8 = 24; +const int REG_S9 = 25; +const int REG_S10 = 26; +const int REG_S11 = 27; +const int REG_T3 = 28; +const int REG_T4 = 29; +const int REG_T5 = 30; +const int REG_T6 = 31; + +void reset_reg() { + next_reg_id = REG_S2; + for (int i = 0; i < 4096; ++i) { + reg_type[i] = TYPE_VOID; + indirection[i] = 0; + overflow[i] = 0; + } + reg_type[REG_ZERO] = TYPE_INT; +} + +const char* reg_name(int reg) { + if (reg == 0) return "zero"; + if (reg == 1) return "ra"; + if (reg == 2) return "sp"; + if (reg == 3) return "gp"; + if (reg == 4) return "tp"; + if (reg == 5) return "t0"; + if (reg == 6) return "t1"; + if (reg == 7) return "t2"; + if (reg == 8) return "fp"; + // reserved begin + if (reg == 9) return "s1"; + if (reg == 10) return "a0"; + if (reg == 11) return "a1"; + if (reg == 12) return "a2"; + if (reg == 13) return "a3"; + if (reg == 14) return "a4"; + if (reg == 15) return "a5"; + if (reg == 16) return "a6"; + if (reg == 17) return "a7"; + // allocation begin + if (reg == 18) return "s2"; + if (reg == 19) return "s3"; + if (reg == 20) return "s4"; + if (reg == 21) return "s5"; + if (reg == 22) return "s6"; + if (reg == 23) return "s7"; + if (reg == 24) return "s8"; + if (reg == 25) return "s9"; + if (reg == 26) return "s10"; + if (reg == 27) return "s11"; + if (reg == 28) return "t3"; + if (reg == 29) return "t4"; + if (reg == 30) return "t5"; + if (reg == 31) return "t6"; + // overflow begin + return 0; +} + +int is_overflow(int reg) { + return reg > REG_T6; +} void reset_local() { - next_local_id = 2; - max_local_id = 2; + next_local_id = 1; + max_local_id = 1; + max_reg_id = REG_S2; for (int i = 0; i < 4096; ++i) { local_table[i] = 0; local_marker[i] = MARKER_TEMP; local_type[i] = TYPE_VOID; - indirection[i] = 0; } + reset_reg(); } void reset_temp() { - while (next_local_id > 2 && local_marker[next_local_id - 1] == MARKER_TEMP) { + while (next_local_id > 1 && local_marker[next_local_id - 1] == MARKER_TEMP) { --next_local_id; } + reset_reg(); } -int next_reg(int type) { - int reg = next_local_id++; - local_type[reg] = type; - indirection[reg] = 0; +int next_local_slot(int type) { + int slot = next_local_id++; + local_type[slot] = type; if (next_local_id > max_local_id) { max_local_id = next_local_id; } - return reg; + return slot; } int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; - int reg = next_reg(type); - local_marker[reg] = MARKER_SCALAR; - return local_table[id] = reg; + int slot = next_local_slot(type); + local_marker[slot] = MARKER_SCALAR; + return local_table[id] = slot; } int declare_local_array(int id, int type, int size) { if (local_table[id] != 0) return local_table[id]; - int reg; - for (int i = 0; i < size; ++i) local_marker[reg = next_reg(type)] = MARKER_ARRAY; - return local_table[id] = reg; + int slot = next_local_slot(type); + local_marker[slot] = MARKER_ARRAY; + for (int i = 1; i < size; ++i) local_marker[next_local_slot(type)] = MARKER_ARRAY; + return local_table[id] = slot; } void declare_global(int id, int marker, int type) { @@ -489,6 +655,23 @@ void declare_global(int id, int marker, int type) { global_type[id] = type; } +int next_reg(int type) { + int reg = next_reg_id++; + if (is_overflow(reg)) { + int slot = next_local_slot(type); + local_marker[slot] = MARKER_TEMP; + overflow[reg] = slot; + } + reg_type[reg] = type; + if (next_reg_id > max_reg_id) { + max_reg_id = next_reg_id; + } + return reg; +} + + +// prolog & epilog helpers + int check_itype_immediate(int value) { return value >= -2048 && value <= 2047; } @@ -522,74 +705,187 @@ void asm_addi(const char* rd, const char* rs, int imm) { } } -void load_address(int rd, int id) { - if (id == -1) { - eprintf("void cannot be arithmetically operated\n"); - exit(1); +// assembly helpers + +const char* load_op_of_type(int type) { + if (type & TYPE_PTR_MASK) { + return "ld"; + } else if (type == TYPE_CHAR) { + return "lb"; + } else { // int + return "lw"; } - int offset = -id * 8 - 8; - if (indirection[id]) { - if (check_itype_immediate(offset)) { - printf(" ld t%d, %d(fp) # indirection\n", rd, offset); - } else { - printf(" li t%d, %d\n", rd, offset); - printf(" add t%d, fp, t%d\n", rd, rd); - printf(" ld t%d, 0(t%d) # indirection\n", rd, rd); +} + +const char* store_op_of_type(int type) { + if (type & TYPE_PTR_MASK) { + return "sd"; + } else if (type == TYPE_CHAR) { + return "sb"; + } else { // int + return "sw"; + } +} + +// address loaders +// rd must be one of t0, t1, t2 +void load_local_address(int rd, int slot_id) { + asm_addi(reg_name(rd), "sp", slot_id * 8 - 8); +} + +// load a non-trivial register into trivial one +void load(int rd, int rs) { + const char* op = load_op_of_type(reg_type[rs]); + const char* rd_name = reg_name(rd); + if (is_overflow(rs)) { + load_local_address(rd, overflow[rs]); + if (indirection[rs]) { + printf(" ld %s, 0(%s)\n", rd_name, rd_name); } + rs = rd; + } + printf(" %s %s, 0(%s) # load non-trivial register\n", op, rd_name, reg_name(rs)); +} + +// store a trivial register into a non-trivial one +void store(const char* rs, int reg) { + const char* op = store_op_of_type(reg_type[reg]); + if (is_overflow(reg)) { + load_local_address(REG_T2, overflow[reg]); + if (indirection[reg]) { + printf(" ld t2, 0(t2)\n"); + } + reg = REG_T2; + } + printf(" %s %s, 0(%s) # store non-trivial register\n", op, rs, reg_name(reg)); +} + +int is_nontrivial(int reg) { + return is_overflow(reg) || indirection[reg]; +} + +const char* trivialize(int rs, int t) { + if (is_nontrivial(rs)) { + load(t, rs); + return reg_name(t); + } + return reg_name(rs); +} + +void _asm_r(const char* op, int rd, int rs1) { + const char* rd_name = reg_name(rd); + if (is_nontrivial(rd)) rd_name = "t0"; + const char* rs1_name = trivialize(rs1, REG_T0); + printf(" %s %s, %s\n", op, rd_name, rs1_name); + if (is_nontrivial(rd)) { + store("t0", rd); + } +} + +void asm_mv(int rd, int rs1) { + const char* rs1_name = trivialize(rs1, REG_T0); + if (is_nontrivial(rd)) { + store(rs1_name, rd); } else { - if (check_itype_immediate(offset)) { - printf(" addi t%d, fp, %d\n", rd, offset); - } else { - printf(" li t%d, %d\n", rd, offset); - printf(" add t%d, fp, t%d\n", rd, rd); - } + const char* rd_name = reg_name(rd); + if (!streq(rd_name, rs1_name)) + printf(" mv %s, %s\n", rd_name, rs1_name); } } -void load(int rd, int id) { - load_address(rd, id); - int type = local_type[id]; - const char* op = "lw"; // int - if (type == TYPE_CHAR) { - op = "lb"; - } else if (type & TYPE_PTR_MASK) { - op = "ld"; +void _asm_rr(const char* op, int rd, int rs1, int rs2) { + const char* rd_name = reg_name(rd); + const char* rs1_name = trivialize(rs1, REG_T0); + const char* rs2_name = trivialize(rs2, REG_T1); + if (is_nontrivial(rd)) rd_name = "t0"; + printf(" %s %s, %s, %s\n", op, rd_name, rs1_name, rs2_name); + if (is_nontrivial(rd)) { + store("t0", rd); } - printf(" %s t%d, 0(t%d) # id: type %d\n", op, rd, rd, type); } -void store_t0(int id) { - load_address(1, id); - int type = local_type[id]; - const char* op = "sw"; // int - if (type == TYPE_CHAR) { - op = "sb"; - } else if (type & TYPE_PTR_MASK) { - op = "sd"; +void _asm_ri(const char* op, int rd, int rs1, int imm) { + const char* rd_name = reg_name(rd); + if (is_nontrivial(rd)) rd_name = "t0"; + const char* rs1_name = trivialize(rs1, REG_T0); + printf(" %s %s, %s, %d\n", op, rd_name, rs1_name, imm); + if (is_nontrivial(rd)) { + store("t0", rd); } - printf(" %s t0, 0(t1) # id: type %d\n", op, type); } -int materialize_t0(int type) { - int reg = next_reg(type); - store_t0(reg); - return reg; +void asm_branch(const char* op, int rs1, int label) { + const char* rs1_name = trivialize(rs1, REG_T0); + printf(" %s %s, L%d\n", op, rs1_name, label); } -int dereference(int reg) { - local_type[reg] = local_type[reg] & ~TYPE_PTR_MASK; - indirection[reg] = 1; +void _asm_i(const char* op, int rd, const char* prefix1, const char* prefix2, int imm) { + const char* rd_name = reg_name(rd); + if (is_nontrivial(rd)) rd_name = "t0"; + printf(" %s %s, %s%s%d\n", op, rd_name, prefix1, prefix2, imm); + if (is_nontrivial(rd)) { + store("t0", rd); + } +} + +int is_not_reusable(int rs1, int expected_type) { + return indirection[rs1] || reg_type[rs1] != expected_type || rs1 == REG_ZERO; +} + +int asm_r(int type, const char* op, int rs1) { + int rd = rs1; + if (is_not_reusable(rs1, type)) rd = next_reg(type); + _asm_r(op, rd, rs1); + return rd; +} + +int asm_rr(int type, const char* op, int rs1, int rs2) { + int rd = rs1; + if (is_not_reusable(rs1, type)) rd = rs2; + if (is_not_reusable(rs2, type)) rd = next_reg(type); + _asm_rr(op, rd, rs1, rs2); + return rd; +} + +void store_into_local(int rs1, int slot) { + const char* rs1_name = trivialize(rs1, REG_T0); + load_local_address(REG_T2, slot); + printf(" %s %s, 0(t2)\n", store_op_of_type(local_type[slot]), rs1_name); +} + +int materialize_address(int rd, int type, int marker) { + if (marker == MARKER_ARRAY) { + type |= TYPE_PTR_MASK; + } + reg_type[rd] = type; + indirection[rd] = marker == MARKER_SCALAR; + return rd; +} + +int lookup_from_slot(int slot) { + int rd = next_reg(TYPE_VOID_PTR); + if (is_nontrivial(rd)) { + load_local_address(REG_T0, slot); + asm_mv(rd, REG_T0); + } else { + load_local_address(rd, slot); + } + return materialize_address(rd, local_type[slot], local_marker[slot]); +} + +int load_imm(int imm) { + if (imm == 0) return REG_ZERO; + int reg = next_reg(TYPE_INT); + _asm_i("li", reg, "", "", imm); return reg; } int lookup(int id) { - int local = local_table[id]; - if (local) { - if (local_marker[local] == MARKER_ARRAY) { - load_address(0, local); - return materialize_t0(local_type[local] | TYPE_PTR_MASK); - } - return local; + if (local_table[id]) { + return lookup_from_slot(local_table[id]); + } + if (is_const[id]) { + return load_imm(const_table[id]); } const char* name = id_table + id_lut[id]; if (global_marker[id]) { @@ -597,17 +893,46 @@ int lookup(int id) { eprintf("function name must not appear outside function call: %s\n", name); exit(1); } - printf(" la t0, %s # id: %d\n", name, id); - int reg = materialize_t0(global_type[id] | TYPE_PTR_MASK); - if (global_marker[id] == MARKER_SCALAR) { - reg = dereference(reg); - } - return reg; + int rd = next_reg(TYPE_VOID_PTR); + _asm_i("la", rd, name, " # id: ", id); + return materialize_address(rd, global_type[id], global_marker[id]); } eprintf("unresolved identifier: %s\n", name); exit(1); } +int asm_r_arith(const char* op, int rs1) { + if (reg_type[rs1] & TYPE_PTR_MASK) { + eprintf("pointer cannot be arithmetically operated by %s\n", op); + exit(1); + } + return asm_r(TYPE_INT, op, rs1); +} + +int asm_rr_arith(const char* op, int rs1, int rs2) { + if (reg_type[rs1] & TYPE_PTR_MASK || reg_type[rs2] & TYPE_PTR_MASK) { + eprintf("pointer cannot be arithmetically operated by %s\n", op); + exit(1); + } + return asm_rr(TYPE_INT, op, rs1, rs2); +} + +int asm_rr_cmp(const char* op, int rs1, int rs2) { + // since NULL is virtually 0, it is considered a valid example of a pointer comparing with an integer + return asm_rr(TYPE_INT, op, rs1, rs2); +} + +void asm_beqz(int rs1, int label) { + asm_branch("beqz", rs1, label); +} + +void asm_bnez(int rs1, int label) { + asm_branch("bnez", rs1, label); +} + +void asm_j(int label) { + printf(" j L%d\n", label); +} int next_label_id = 0; int next_label() { @@ -619,82 +944,25 @@ int asm_label(int label) { return label; } -int is_not_reusable(int rs1, int expected_type) { - return indirection[rs1] || local_marker[rs1] != MARKER_TEMP || local_type[rs1] != expected_type; -} - -int asm_r(const char* op, int rs1) { - load(0, rs1); - printf(" %s t0, t0\n", op); - int rd = rs1; - if (is_not_reusable(rs1, TYPE_INT)) { - rd = next_reg(TYPE_INT); - } - store_t0(rd); - return rd; -} - -int asm_r_arith(const char* op, int rs1) { - if (local_type[rs1] & TYPE_PTR_MASK) { - eprintf("pointer cannot be arithmetically operated by %s\n", op); - exit(1); - } - return asm_r(op, rs1); -} - -int asm_rr(const char* op, int rs1, int rs2) { - load(0, rs1); - load(1, rs2); - printf(" %s t0, t0, t1\n", op); - int rd = rs1; - if (is_not_reusable(rd, TYPE_INT)) { - rd = rs2; - if (is_not_reusable(rd, TYPE_INT)) { - rd = next_reg(TYPE_INT); - } - } - store_t0(rd); - return rd; -} - -int asm_rr_arith(const char* op, int rs1, int rs2) { - if (local_type[rs1] & TYPE_PTR_MASK || local_type[rs2] & TYPE_PTR_MASK) { - eprintf("pointer cannot be arithmetically operated by %s\n", op); - exit(1); - } - return asm_rr(op, rs1, rs2); -} - -int asm_rr_cmp(const char* op, int rs1, int rs2) { - // since NULL is virtually 0, it is considered valid example of a pointer comparing with an integer - return asm_rr(op, rs1, rs2); -} - -void asm_beqz(int rs1, int label) { - load(0, rs1); - printf(" beqz t0, L%d\n", label); -} - -void asm_bnez(int rs1, int label) { - load(0, rs1); - printf(" bnez t0, L%d\n", label); -} - -void asm_j(int label) { - printf(" j L%d\n", label); -} - int break_label_stack[4096]; int cont_label_stack[4096]; int break_label_stack_size; int cont_label_stack_size; -int asm_get_break_label() { - return break_label_stack[break_label_stack_size - 1]; +void asm_break() { + if (break_label_stack_size == 0) { + eprintf("break without loop\n"); + exit(1); + } + asm_j(break_label_stack[break_label_stack_size - 1]); } -int asm_get_cont_label() { - return cont_label_stack[cont_label_stack_size - 1]; +void asm_continue() { + if (cont_label_stack_size == 0) { + eprintf("continue without loop\n"); + exit(1); + } + asm_j(cont_label_stack[cont_label_stack_size - 1]); } void asm_push_label(int break_label, int cont_label) { @@ -707,22 +975,23 @@ void asm_pop_label() { --cont_label_stack_size; } -int step_of(int type) { - if (type == TYPE_INT_PTR) { - return 4; - } - return 1; +int epilog_label; + +void asm_return() { + asm_j(epilog_label); } -void asm_shift_t0(const char* op, int type) { - if (type == TYPE_INT_PTR) { - printf(" %s t0, t0, 2\n", op); - } +int log_step_of(int type) { + return type == TYPE_INT_PTR ? 2 : 0; +} + +int step_of(int type) { + return 1 << log_step_of(type); } int asm_add(int lhs, int rhs) { - int type1 = local_type[lhs] & TYPE_PTR_MASK; - int type2 = local_type[rhs] & TYPE_PTR_MASK; + int type1 = reg_type[lhs] & TYPE_PTR_MASK; + int type2 = reg_type[rhs] & TYPE_PTR_MASK; if (type1 != type2) { int ptr; int idx; @@ -733,27 +1002,25 @@ int asm_add(int lhs, int rhs) { ptr = rhs; idx = lhs; } - int ptr_type = local_type[ptr]; + int ptr_type = reg_type[ptr]; if (ptr_type == TYPE_VOID_PTR) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } - load(0, idx); - load(1, ptr); - asm_shift_t0("slli", ptr_type); - printf(" add t0, t0, t1\n"); - return materialize_t0(ptr_type); + int offset = next_reg(TYPE_INT); + _asm_ri("slli", offset, idx, log_step_of(ptr_type)); + return asm_rr(ptr_type, "add", ptr, offset); } if (type1 && type2) { eprintf("operands of addition cannot be both pointers\n"); exit(1); } - return asm_rr("add", lhs, rhs); + return asm_rr(TYPE_INT, "add", lhs, rhs); } int asm_sub(int lhs, int rhs) { - int lhs_type = local_type[lhs]; - int rhs_type = local_type[rhs]; + int lhs_type = reg_type[lhs]; + int rhs_type = reg_type[rhs]; int type1 = lhs_type & TYPE_PTR_MASK; int type2 = rhs_type & TYPE_PTR_MASK; if (type1 && type2) { @@ -765,22 +1032,43 @@ int asm_sub(int lhs, int rhs) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } - load(0, lhs); - load(1, rhs); - printf(" sub t0, t0, t1\n"); - asm_shift_t0("srai", lhs_type); - return materialize_t0(TYPE_INT); + int diff = asm_rr(TYPE_INT, "sub", lhs, rhs); + _asm_ri("slli", diff, diff, log_step_of(lhs_type)); + return diff; } if (type1) { int neg = asm_r_arith("neg", rhs); return asm_add(lhs, neg); } - return asm_rr("sub", lhs, rhs); + return asm_rr_arith("sub", lhs, rhs); +} + +int dereference(int reg) { + if (indirection[reg]) { + load(reg, reg); + } else { + indirection[reg] = 1; + } + reg_type[reg] = reg_type[reg] & ~TYPE_PTR_MASK; + return reg; +} + +int addressof(int reg) { + if (indirection[reg] && !(reg_type[reg] & TYPE_PTR_MASK)) { + reg_type[reg] = reg_type[reg] | TYPE_PTR_MASK; + indirection[reg] = 0; + } else { + printf("cannot take address of this expression"); + } + return reg; } // parser + int parse_expr(); +int parse_assign_expr(); + int parse_function_call(int id) { const char* name = id_table + id_lut[id]; if (global_marker[id] != MARKER_FUNCTION) { @@ -799,7 +1087,7 @@ int parse_function_call(int id) { eprintf("too many arguments\n"); exit(1); } - args[arg++] = parse_expr(); + args[arg++] = parse_assign_expr(); next_token(); if (token_type == TOKEN_COMMA) { // continue; @@ -811,16 +1099,26 @@ int parse_function_call(int id) { } } for (int i = 0; i < arg; ++i) { - load(0, args[i]); - printf(" mv a%d, t0\n", i); + asm_mv(i + REG_A0, args[i]); + } + for (int i = REG_T3; i <= REG_T6; ++i) { + if (i < max_reg_id) { + asm_sd(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); + } } printf(" call %s\n", name); + for (int i = REG_T3; i <= REG_T6; ++i) { + if (i < max_reg_id) { + asm_ld(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); + } + } int type = global_type[id]; if (type != TYPE_VOID) { - printf(" mv t0, a0\n"); - return materialize_t0(type); + int rd = next_reg(type); + asm_mv(rd, REG_A0); + return rd; } - return -1; + return REG_ZERO; } int parse_primary_expr() { @@ -828,8 +1126,7 @@ int parse_primary_expr() { if (token_type == TOKEN_EOF) { exit(1); } else if (token_type == TOKEN_NUMBER) { - printf(" li t0, %d\n", token_data); - return materialize_t0(TYPE_INT); + return load_imm(token_data); } else if (token_type == TOKEN_ID) { next_token(); if (token_type == TOKEN_PAREN_LEFT) { @@ -838,8 +1135,9 @@ int parse_primary_expr() { unget_token(); return lookup(token_data); } else if (token_type == TOKEN_STRING) { - printf(" la t0, .LC%d\n", token_data); - return materialize_t0(TYPE_CHAR_PTR); + int reg = next_reg(TYPE_CHAR_PTR); + _asm_i("la", reg, ".LC", "", token_data); + return reg; } else if (token_type == TOKEN_PAREN_LEFT) { int reg = parse_expr(); expect_token(TOKEN_PAREN_RIGHT); @@ -855,20 +1153,16 @@ int parse_postfix_expr() { while (1) { next_token(); if (token_type == TOKEN_INC) { - int type = local_type[lhs]; + int type = reg_type[lhs]; int reg = next_reg(type); - load(0, lhs); - store_t0(reg); - printf(" addi t0, t0, %d\n", step_of(type)); - store_t0(lhs); + asm_mv(reg, lhs); + _asm_ri("addi", lhs, lhs, step_of(type)); lhs = reg; } else if (token_type == TOKEN_DEC) { - int type = local_type[lhs]; + int type = reg_type[lhs]; int reg = next_reg(type); - load(0, lhs); - store_t0(reg); - printf(" addi t0, t0, -%d\n", step_of(type)); - store_t0(lhs); + asm_mv(reg, lhs); + _asm_ri("addi", lhs, lhs, -step_of(type)); lhs = reg; } else if (token_type == TOKEN_BRACKET_LEFT) { int rhs = parse_expr(); @@ -886,16 +1180,15 @@ int parse_prefix_expr() { next_token(); if (token_type == TOKEN_AND) { int reg = parse_postfix_expr(); - int type = local_type[reg]; + int type = reg_type[reg]; if (type & TYPE_PTR_MASK) { eprintf("cannot take address of a pointer\n"); exit(1); } - load_address(0, reg); - return materialize_t0(type | TYPE_PTR_MASK); - } else if (token_type == TOKEN_STAR) { + return addressof(reg); + } else if (token_type == TOKEN_MUL) { int reg = parse_postfix_expr(); - int type = local_type[reg]; + int type = reg_type[reg]; if (!(type & TYPE_PTR_MASK)) { eprintf("cannot dereference a non-pointer\n"); exit(1); @@ -904,9 +1197,8 @@ int parse_prefix_expr() { eprintf("cannot dereference void pointer\n"); exit(1); } - load(0, reg); - return dereference(materialize_t0(type)); - } else if (token_type == TOKEN_MINUS) { + return dereference(reg); + } else if (token_type == TOKEN_SUB) { int reg = parse_postfix_expr(); return asm_r_arith("neg", reg); } else if (token_type == TOKEN_COMPL) { @@ -914,18 +1206,14 @@ int parse_prefix_expr() { return asm_r_arith("not", reg); } else if (token_type == TOKEN_NOT) { int reg = parse_postfix_expr(); - return asm_r("seqz", reg); + return asm_r(TYPE_INT, "seqz", reg); } else if (token_type == TOKEN_INC) { int reg = parse_postfix_expr(); - load(0, reg); - printf(" addi t0, t0, %d\n", step_of(local_type[reg])); - store_t0(reg); + _asm_ri("addi", reg, reg, step_of(reg_type[reg])); return reg; } else if (token_type == TOKEN_DEC) { int reg = parse_postfix_expr(); - load(0, reg); - printf(" addi t0, t0, -%d\n", step_of(local_type[reg])); - store_t0(reg); + _asm_ri("addi", reg, reg, -step_of(reg_type[reg])); return reg; } else { unget_token(); @@ -937,7 +1225,7 @@ int parse_mul_expr() { int lhs = parse_prefix_expr(); while (1) { next_token(); - if (token_type == TOKEN_STAR) { + if (token_type == TOKEN_MUL) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("mul", lhs, rhs); } else if (token_type == TOKEN_DIV) { @@ -961,7 +1249,7 @@ int parse_add_expr() { if (token_type == TOKEN_ADD) { int rhs = parse_mul_expr(); lhs = asm_add(lhs, rhs); - } else if (token_type == TOKEN_MINUS) { + } else if (token_type == TOKEN_SUB) { int rhs = parse_mul_expr(); lhs = asm_sub(lhs, rhs); } else { @@ -1003,11 +1291,11 @@ int parse_cmp_expr() { } else if (token_type == TOKEN_LE) { int rhs = parse_shift_expr(); int sgt = asm_rr_cmp("sgt", lhs, rhs); - lhs = asm_r("seqz", sgt); + lhs = asm_r(TYPE_INT, "seqz", sgt); } else if (token_type == TOKEN_GE) { int rhs = parse_shift_expr(); int slt = asm_rr_cmp("slt", lhs, rhs); - lhs = asm_r("seqz", slt); + lhs = asm_r(TYPE_INT, "seqz", slt); } else { unget_token(); break; @@ -1022,12 +1310,12 @@ int parse_eq_expr() { next_token(); if (token_type == TOKEN_EQ) { int rhs = parse_cmp_expr(); - int xor0 = asm_rr_cmp("xor", lhs, rhs); - lhs = asm_r("seqz", xor0); + int xor = asm_rr_cmp("xor", lhs, rhs); + lhs = asm_r(TYPE_INT, "seqz", xor); } else if (token_type == TOKEN_NE) { int rhs = parse_cmp_expr(); - int xor0 = asm_rr_cmp("xor", lhs, rhs); - lhs = asm_r("snez", xor0); + int xor = asm_rr_cmp("xor", lhs, rhs); + lhs = asm_r(TYPE_INT, "snez", xor); } else { unget_token(); break; @@ -1084,59 +1372,145 @@ int parse_bitwise_or_expr() { int parse_logical_and_expr() { int lhs = parse_bitwise_or_expr(); - int label = next_label(); - int label_used = 0; + int logical = 0; + int label; + int result; while (1) { next_token(); if (token_type == TOKEN_LAND) { - lhs = asm_r("snez", lhs); - asm_beqz(lhs, label); + if (!logical) { + logical = 1; + label = next_label(); + result = next_reg(TYPE_INT); + _asm_r("snez", result, lhs); + } + asm_beqz(result, label); int rhs = parse_bitwise_or_expr(); - rhs = asm_r("snez", rhs); - lhs = asm_rr("and", lhs, rhs); - label_used = 1; + _asm_r("snez", result, rhs); } else { unget_token(); break; } } - if (label_used) { + if (logical) { asm_label(label); + return result; } return lhs; } int parse_logical_or_expr() { int lhs = parse_logical_and_expr(); - int label = next_label(); - int label_used = 0; + int logical = 0; + int label; + int result; while (1) { next_token(); if (token_type == TOKEN_LOR) { - lhs = asm_r("snez", lhs); - asm_bnez(lhs, label); + if (!logical) { + logical = 1; + label = next_label(); + result = next_reg(TYPE_INT); + _asm_r("snez", result, lhs); + } + asm_bnez(result, label); int rhs = parse_logical_and_expr(); - rhs = asm_r("snez", rhs); - lhs = asm_rr("or", lhs, rhs); - label_used = 1; + _asm_r("snez", result, rhs); } else { unget_token(); break; } } - if (label_used) { + if (logical) { asm_label(label); + return result; } return lhs; } +int parse_conditional_expr() { + int cond = parse_logical_or_expr(); + next_token(); + if (token_type == TOKEN_QUESTION) { + int label1 = next_label(); + int label2 = next_label(); + asm_beqz(cond, label1); + int lhs = parse_expr(); + int result = next_reg(reg_type[lhs]); + asm_mv(result, lhs); + asm_j(label2); + expect_token(TOKEN_COLON); + asm_label(label1); + int rhs = parse_conditional_expr(); + if (reg_type[lhs] != reg_type[rhs]) { + eprintf("type mismatch in conditional expression\n"); + exit(1); + } + asm_mv(result, rhs); + asm_label(label2); + return result; + } else { + unget_token(); + return cond; + } +} + int parse_assign_expr() { - int lhs = parse_logical_or_expr(); + int lhs = parse_conditional_expr(); next_token(); if (token_type == TOKEN_ASSIGN) { int rhs = parse_assign_expr(); - load(0, rhs); - store_t0(lhs); + asm_mv(lhs, rhs); + return lhs; + } else if (token_type == TOKEN_ADD_ASSIGN) { + int rhs = parse_assign_expr(); + int sum = asm_add(lhs, rhs); + asm_mv(lhs, sum); + return lhs; + } else if (token_type == TOKEN_SUB_ASSIGN) { + int rhs = parse_assign_expr(); + int diff = asm_sub(lhs, rhs); + asm_mv(lhs, diff); + return lhs; + } else if (token_type == TOKEN_MUL_ASSIGN) { + int rhs = parse_assign_expr(); + int prod = asm_rr_arith("mul", lhs, rhs); + asm_mv(lhs, prod); + return lhs; + } else if (token_type == TOKEN_DIV_ASSIGN) { + int rhs = parse_assign_expr(); + int quot = asm_rr_arith("div", lhs, rhs); + asm_mv(lhs, quot); + return lhs; + } else if (token_type == TOKEN_REM_ASSIGN) { + int rhs = parse_assign_expr(); + int rem = asm_rr_arith("rem", lhs, rhs); + asm_mv(lhs, rem); + return lhs; + } else if (token_type == TOKEN_LSHIFT_ASSIGN) { + int rhs = parse_assign_expr(); + int lshift = asm_rr_arith("sll", lhs, rhs); + asm_mv(lhs, lshift); + return lhs; + } else if (token_type == TOKEN_RSHIFT_ASSIGN) { + int rhs = parse_assign_expr(); + int rshift = asm_rr_arith("sra", lhs, rhs); + asm_mv(lhs, rshift); + return lhs; + } else if (token_type == TOKEN_AND_ASSIGN) { + int rhs = parse_assign_expr(); + int and = asm_rr_arith("and", lhs, rhs); + asm_mv(lhs, and); + return lhs; + } else if (token_type == TOKEN_XOR_ASSIGN) { + int rhs = parse_assign_expr(); + int xor = asm_rr_arith("xor", lhs, rhs); + asm_mv(lhs, xor); + return lhs; + } else if (token_type == TOKEN_OR_ASSIGN) { + int rhs = parse_assign_expr(); + int or = asm_rr_arith("or", lhs, rhs); + asm_mv(lhs, or); return lhs; } else { unget_token(); @@ -1145,12 +1519,23 @@ int parse_assign_expr() { } int parse_expr() { - return parse_assign_expr(); + int lhs = parse_assign_expr(); + while (1) { + next_token(); + if (token_type == TOKEN_COMMA) { + int rhs = parse_assign_expr(); + lhs = rhs; + } else { + unget_token(); + break; + } + } + return lhs; } void parse_local_variable(int type) { if (type == TYPE_VOID) { - eprintf("local variable of void type is not supported\n"); + eprintf("variable cannot be of void type\n"); exit(1); } expect_token(TOKEN_ID); @@ -1158,17 +1543,16 @@ void parse_local_variable(int type) { next_token(); if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { - eprintf("local variable of array of pointers is not supported\n"); + eprintf("array of pointers is not supported\n"); exit(1); } expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); declare_local_array(id, type, size); - next_token(); - } else { - declare_local(id, type); + return; } + int slot = declare_local(id, type); if (token_type == TOKEN_SEMICOLON) { unget_token(); return; @@ -1176,8 +1560,11 @@ void parse_local_variable(int type) { unget_token(); expect_token(TOKEN_ASSIGN); int reg = parse_expr(); - load(0, reg); - store_t0(local_table[id]); + if (type != reg_type[reg]) { + eprintf("type mismatch in assignment\n"); + exit(1); + } + store_into_local(reg, slot); } void parse_stmt(); @@ -1286,20 +1673,17 @@ void parse_stmt() { } else if (token_type == TOKEN_RETURN) { next_token(); if (token_type == TOKEN_SEMICOLON) { - asm_j(epilog_label); + asm_return(); return; } unget_token(); - int reg = parse_expr(); - load(0, reg); - printf(" mv a0, t0\n"); - asm_j(epilog_label); + int rs1 = parse_expr(); + asm_mv(REG_A0, rs1); + asm_return(); } else if (token_type == TOKEN_BREAK) { - int label = asm_get_break_label(); - asm_j(label); + asm_break(); } else if (token_type == TOKEN_CONTINUE) { - int label = asm_get_cont_label(); - asm_j(label); + asm_continue(); } else if (token_type == TOKEN_SEMICOLON) { unget_token(); } else if ((decl_type = parse_type()) >= 0) { @@ -1335,7 +1719,7 @@ void parse_function(const char* name) { } int arg_type = parse_type(); if (arg_type < 0 || arg_type == TYPE_VOID) { - eprintf("unexpected a non-void argument type"); + eprintf("expecting a non-void argument type: %d\n", arg_type); exit(1); } expect_token(TOKEN_ID); @@ -1345,10 +1729,14 @@ void parse_function(const char* name) { expect_token(TOKEN_BRACKET_RIGHT); next_token(); if (arg_type & TYPE_PTR_MASK) { - eprintf("local variable of array of pointers is not supported\n"); + eprintf("array of pointers is not supported\n"); exit(1); } - arg_type = arg_type | TYPE_PTR_MASK; + arg_type |= TYPE_PTR_MASK; + } + if (arg >= 8) { + eprintf("too many arguments\n"); + exit(1); } args[arg++] = declare_local(token_data, arg_type); if (token_type == TOKEN_COMMA) { @@ -1382,37 +1770,46 @@ void parse_function(const char* name) { unget_token(); parse_stmt(); } - asm_j(epilog_label); - int frame_size = max_local_id * 8; + if (streq(name, "main")) { + asm_mv(REG_A0, REG_ZERO); + } + asm_return(); + int reg_used = max_reg_id - REG_S2; + if (reg_used > 14) reg_used = 14; + int frame_size = (max_local_id - 1 + reg_used + 2) * 8; + if (reg_used > 10) reg_used = 10; if (frame_size % 16 != 0) { - frame_size = frame_size + 8; + frame_size += 8; } // prolog asm_label(prolog_label); asm_addi("sp", "sp", -frame_size); asm_sd("ra", frame_size - 8, "sp"); asm_sd("fp", frame_size - 16, "sp"); + for (int i = 0; i < reg_used; ++i) { + int reg = REG_S2 + i; + asm_sd(reg_name(reg), frame_size - 24 - i * 8, "sp"); + } asm_addi("fp", "sp", frame_size); for (int i = 0; i < arg; ++i) { - printf(" mv t0, a%d\n", i); - store_t0(args[i]); + store_into_local(REG_A0 + i, args[i]); } asm_j(label); // epilog asm_label(epilog_label); - asm_ld("fp", frame_size - 16, "sp"); asm_ld("ra", frame_size - 8, "sp"); + asm_ld("fp", frame_size - 16, "sp"); + for (int i = 0; i < reg_used; ++i) { + int reg = REG_S2 + i; + asm_ld(reg_name(reg), frame_size - 24 - i * 8, "sp"); + } asm_addi("sp", "sp", frame_size); printf(" ret\n"); } void parse_global_variable(int id, const char* name, int type) { if (type == TYPE_VOID) { - eprintf("global variable of void type is not supported\n"); - exit(1); - } - if (type & TYPE_PTR_MASK) { - eprintf("global variable of pointer is not supported\n"); + eprintf("variable cannot be of void type\n"); exit(1); } printf(".data\n"); @@ -1423,6 +1820,10 @@ void parse_global_variable(int id, const char* name, int type) { expect_token(TOKEN_NUMBER); printf(" .word %d\n", token_data); } else if (token_type == TOKEN_BRACKET_LEFT) { + if (type & TYPE_PTR_MASK) { + eprintf("array of pointers is not supported\n"); + exit(1); + } expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); @@ -1440,20 +1841,32 @@ void parse_global_variable(int id, const char* name, int type) { } void parse_global_declaration() { + int is_const_int = 1; + if (token_type != TOKEN_CONST) { + is_const_int = 0; + } int type = parse_type(); if (type < 0) { eprintf("expecting type for global declaration\n"); exit(1); } + if (type != TYPE_INT) { + is_const_int = 0; + } expect_token(TOKEN_ID); int id = token_data; char* name = id_table + id_lut[id]; next_token(); - if (token_type == TOKEN_PAREN_LEFT) { - declare_global(id, MARKER_FUNCTION, type); + if (is_const_int && token_type == TOKEN_ASSIGN) { + expect_token(TOKEN_NUMBER); + const_table[id] = token_data; + is_const[id] = 1; + expect_token(TOKEN_SEMICOLON); + } else if (token_type == TOKEN_PAREN_LEFT) { + declare_global(id, MARKER_FUNCTION, type); parse_function(name); } else { - declare_global(id, MARKER_SCALAR, type); + declare_global(id, MARKER_SCALAR, type); parse_global_variable(id, name, type); } } @@ -1470,9 +1883,9 @@ void dump_string_table() { printf(".data\n"); for (int i = 0; i < string_lut_size; ++i) { printf(".LC%d: .string \"", i); - int offset = string_lut[i]; - int ch; - while ((ch = string_table[offset++]) != 0) { + char* p = string_table + string_lut[i]; + int ch; + while ((ch = *p++) != 0) { if (ch == '\n') { printf("\\n"); } else if (ch == '\t') { @@ -1498,5 +1911,4 @@ void dump_string_table() { int main() { parse_top_level(); dump_string_table(); - return 0; } diff --git a/boot.sh b/boot.sh index 91909ec..c4a4ecb 100644 --- a/boot.sh +++ b/boot.sh @@ -5,7 +5,7 @@ gcc ../boot.c ../boot-lib.c -o gcc.out && riscv64-linux-gnu-gcc-12 -static boot1.s ../boot-lib.c -o boot1.out && qemu-riscv64 boot1.out < boot-all.c > boot2.s && riscv64-linux-gnu-gcc-12 -static boot2.s ../boot-lib.c -o boot2.out && -qemu-riscv64 boot2.out < boot-all.c > boot3.s && +qemu-riscv64 boot2.out < boot-all.c > boot3.s cmp --silent boot1.s boot2.s && echo "boot1.s == boot2.s" || echo "boot1.s != boot2.s" cmp --silent boot2.s boot3.s && echo "boot2.s == boot3.s" || echo "boot2.s != boot3.s" cmp --silent boot1.s boot3.s && echo "boot1.s == boot3.s" || echo "boot1.s != boot3.s" diff --git a/bootstrapping.png b/bootstrapping.png new file mode 100644 index 0000000..221b03a Binary files /dev/null and b/bootstrapping.png differ diff --git a/demo/add.c b/demo/add.c new file mode 100644 index 0000000..f42e07e --- /dev/null +++ b/demo/add.c @@ -0,0 +1,17 @@ +int printf(const char format[], ...); +int scanf(const char format[], ...); +int putchar(int ch); + +int* p; +int f1() { + int a = 1; + return *(a+(a+(a+(a+(a+(a+(a+(a+(a+(a+(p))))))))))); // a[10] +} + + +int main() { + int a[15]; + p = a; + for (int i = 0; i < 15; a[i] = i, ++i); + return f1(); +} \ No newline at end of file diff --git a/demo/hello.c b/demo/hello.c index 4000513..c94a263 100644 --- a/demo/hello.c +++ b/demo/hello.c @@ -2,5 +2,4 @@ int printf(const char* format, ...); int main() { printf("hello world %d\n", 42); - return 0; } \ No newline at end of file diff --git a/demo/lut.c b/demo/lut.c new file mode 100644 index 0000000..b4ed157 --- /dev/null +++ b/demo/lut.c @@ -0,0 +1,46 @@ +int printf(const char format[], ...); +int getchar(); + +char string_table[65536]; +int string_offset; +int string_lut[4096]; +int string_lut_size; + +int parse_string() { + int offset = string_offset; + int ch; + while ((ch = getchar()) != '"') { + if (ch == -1 || ch == '\n') { + printf("expecting '\"'\n"); + return 1; + } + string_table[string_offset++] = ch; + } + string_table[string_offset++] = 0; + string_lut[string_lut_size] = offset; + return string_lut_size++; +} + + +int streq(const char* s1, const char* s2) { + while (*s1 && *s2 && *s1 == *s2) { + s1++; + s2++; + } + return *s1 == *s2; +} + +void dump_string_table() { + printf(".data\n"); + for (int i = 0; i < string_lut_size; ++i) { + char* id = string_table + string_lut[i]; + printf(".LC%d: .string \"%s\", const: %d\n", + i, id, streq(id, "const")); + } +} + +int main() { + char ch; + while ((ch = getchar()) == '"') parse_string(); + dump_string_table(); +} \ No newline at end of file diff --git a/demo/parse.c b/demo/parse.c new file mode 100644 index 0000000..98b7e9d --- /dev/null +++ b/demo/parse.c @@ -0,0 +1,19 @@ +int getchar(); + + +int is_digit(int ch) { + return '0' <= ch && ch <= '9'; +} + +int parse_int(int ch) { + int num = ch - '0'; + while (is_digit(ch = getchar())) { + num = num * 10; + num = num + ch - '0'; + } + return num; +} + +int main() { + return parse_int(getchar()); +} \ No newline at end of file diff --git a/demo/queen.c b/demo/queen.c index ac4a310..2789ed3 100644 --- a/demo/queen.c +++ b/demo/queen.c @@ -43,5 +43,4 @@ void queen(int x) { int main() { queen(1); printf("solutions: %d\n", a[0]); - return 0; } \ No newline at end of file diff --git a/demo/sort.c b/demo/sort.c index b1db39d..8248c54 100644 --- a/demo/sort.c +++ b/demo/sort.c @@ -27,5 +27,4 @@ int main() { printf("%d ", a[i]); } printf("\n"); - return 0; } \ No newline at end of file diff --git a/demo/strcmp.c b/demo/strcmp.c new file mode 100644 index 0000000..50fe8aa --- /dev/null +++ b/demo/strcmp.c @@ -0,0 +1,16 @@ +int printf(const char* format, ...); + +int strcmp(const char* s1, const char* s2) { + while (*s1 && *s2 && *s1 == *s2) { + s1++; + s2++; + } + return *s1 - *s2; +} + +int main() { + const char* s1 = "helloworld"; + const char* s2 = "world"; + printf("%d\n", strcmp(s1, s2)); + printf("%d\n", strcmp(s1 + 5, s2)); +} \ No newline at end of file