#include "boot-lib.h" // lexer int is_digit(int ch) { return '0' <= ch && ch <= '9'; } int is_id_start(int ch) { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'; } int is_id_cont(int ch) { return is_id_start(ch) || is_digit(ch); } int token_state; int token_type; int token_data; const int TOKEN_EOF = 0; const int TOKEN_SEMICOLON = 1; const int TOKEN_ADD = 2; const int TOKEN_MINUS = 3; const int TOKEN_STAR = 4; const int TOKEN_DIV = 5; const int TOKEN_REM = 6; const int TOKEN_ASSIGN = 7; const int TOKEN_COMMA = 8; const int TOKEN_DOT = 9; const int TOKEN_LSHIFT = 10; const int TOKEN_RSHIFT = 11; const int TOKEN_AND = 12; const int TOKEN_OR = 13; const int TOKEN_XOR = 14; const int TOKEN_COMPL = 15; const int TOKEN_NOT = 16; const int TOKEN_LAND = 17; const int TOKEN_LOR = 18; const int TOKEN_ELLIPSIS = 19; const int TOKEN_INC = 20; const int TOKEN_DEC = 21; const int TOKEN_EQ = 40; const int TOKEN_NE = 41; const int TOKEN_LT = 42; const int TOKEN_GT = 43; const int TOKEN_LE = 44; const int TOKEN_GE = 45; const int TOKEN_PAREN_LEFT = 50; const int TOKEN_PAREN_RIGHT = 51; const int TOKEN_BRACKET_LEFT = 52; const int TOKEN_BRACKET_RIGHT = 53; const int TOKEN_BRACE_LEFT = 54; const int TOKEN_BRACE_RIGHT = 55; const int TOKEN_STRING = 99; const int TOKEN_NUMBER = 100; const int TOKEN_ID = 101; const int TOKEN_IF = 102; const int TOKEN_ELSE = 103; const int TOKEN_WHILE = 104; const int TOKEN_FOR = 105; const int TOKEN_DO = 106; const int TOKEN_BREAK = 107; const int TOKEN_CONTINUE = 108; const int TOKEN_RETURN = 109; const int TOKEN_CONST = 127; const int TOKEN_VOID = 128; const int TOKEN_INT = 129; const int TOKEN_CHAR = 130; const int TYPE_VOID = 0; const int TYPE_INT = 1; const int TYPE_CHAR = 2; const int TYPE_VOID_PTR = 16; const int TYPE_INT_PTR = 17; const int TYPE_CHAR_PTR = 18; const int TYPE_PTR_MASK = 16; const int TYPE_TOKEN_MASK = 128; int parse_int(int ch) { int num = ch - '0'; while (is_digit(ch = getchar())) { num = num * 10; num = num + ch - '0'; } ungetchar(ch); return num; } int get_escaped_char() { int ch = getchar(); if (ch == 'n') { ch = '\n'; } else if (ch == 't') { ch = '\t'; } else if (ch == 'r') { ch = '\r'; } else if (ch == '0') { ch = '\0'; } else if (ch == '\\') { ch = '\\'; } else if (ch == '\'') { ch = '\''; } else if (ch == '\"') { ch = '\"'; } else { eprintf("unexpected escaped character: %c\n", ch); exit(1); } return ch; } int streq(const char* s1, const char* s2) { while (*s1 && *s2 && *s1 == *s2) { s1++; s2++; } return *s1 == *s2; } char string_table[65536]; int string_offset; int string_lut[4096]; int string_lut_size; int parse_string() { int offset = string_offset; int ch; while ((ch = getchar()) != '"') { if (ch == -1 || ch == '\n') { eprintf("expecting '\"'\n"); exit(1); } if (ch == '\\') { ch = get_escaped_char(); } string_table[string_offset++] = ch; } string_table[string_offset++] = 0; string_lut[string_lut_size] = offset; return string_lut_size++; } char id_table[65536]; int id_offset; int id_lut[4096]; int id_lut_size; int parse_id(int ch) { int offset = id_offset; id_table[id_offset++] = ch; while (is_id_cont(ch = getchar())) { id_table[id_offset++] = ch; } ungetchar(ch); id_table[id_offset++] = 0; id_lut[id_lut_size] = offset; return id_lut_size++; } void rewind_id(int new_data) { id_offset = id_lut[token_data]; token_data = new_data; --id_lut_size; } void dedup_id() { int last_id = id_lut_size - 1; char* latest = id_table + id_lut[last_id]; for (int i = 0; i < last_id; i++) { char* candidate = id_table + id_lut[i]; if (streq(candidate, latest)) { rewind_id(i); return; } } } void parse_id_like(int ch) { token_type = TOKEN_ID; token_data = parse_id(ch); char* id = id_table + id_lut[token_data]; if (streq(id, "int")) { token_type = TOKEN_INT; } else if (streq(id, "if")) { token_type = TOKEN_IF; } else if (streq(id, "else")) { token_type = TOKEN_ELSE; } else if (streq(id, "while")) { token_type = TOKEN_WHILE; } else if (streq(id, "break")) { token_type = TOKEN_BREAK; } else if (streq(id, "continue")) { token_type = TOKEN_CONTINUE; } else if (streq(id, "return")) { token_type = TOKEN_RETURN; } else if (streq(id, "void")) { token_type = TOKEN_VOID; } else if (streq(id, "const")) { token_type = TOKEN_CONST; } else if (streq(id, "char")) { token_type = TOKEN_CHAR; } else if (streq(id, "for")) { token_type = TOKEN_FOR; } else if (streq(id, "do")) { token_type = TOKEN_DO; } if (token_type != TOKEN_ID) { rewind_id(0); } else { dedup_id(); } } void unget_token() { token_state = 1; } void next_token() { if (token_state) { token_state = 0; return; } int ch = getchar(); while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { ch = getchar(); } if (ch == -1) { token_type = TOKEN_EOF; } else if (ch == '(') { token_type = TOKEN_PAREN_LEFT; } else if (ch == ')') { token_type = TOKEN_PAREN_RIGHT; } else if (ch == '[') { token_type = TOKEN_BRACKET_LEFT; } else if (ch == ']') { token_type = TOKEN_BRACKET_RIGHT; } else if (ch == '{') { token_type = TOKEN_BRACE_LEFT; } else if (ch == '}') { token_type = TOKEN_BRACE_RIGHT; } else if (ch == '+') { int ch2 = getchar(); if (ch2 == '+') { token_type = TOKEN_INC; } else { ungetchar(ch2); token_type = TOKEN_ADD; } } else if (ch == '-') { int ch2 = getchar(); if (ch2 == '-') { token_type = TOKEN_DEC; } else { ungetchar(ch2); token_type = TOKEN_MINUS; } } else if (ch == '*') { token_type = TOKEN_STAR; } else if (ch == '/') { int ch2 = getchar(); if (ch2 == '/') { do ch = getchar(); while (ch != -1 && ch != '\n'); next_token(); return; } else if (ch2 == '*') { while (1) { ch = getchar(); if (ch == -1) { eprintf("expecting '*/'\n"); exit(1); } if (ch == '*') { ch = getchar(); if (ch == '/') { break; } } } next_token(); return; } else { ungetchar(ch2); token_type = TOKEN_DIV; } } else if (ch == '%') { token_type = TOKEN_REM; } else if (ch == ';') { token_type = TOKEN_SEMICOLON; } else if (ch == ',') { token_type = TOKEN_COMMA; } else if (ch == '<') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_LE; } else if (ch2 == '<') { token_type = TOKEN_LSHIFT; } else { ungetchar(ch2); token_type = TOKEN_LT; } } else if (ch == '>') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_GE; } else if (ch2 == '>') { token_type = TOKEN_RSHIFT; } else { ungetchar(ch2); token_type = TOKEN_GT; } } else if (ch == '=') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_EQ; } else { ungetchar(ch2); token_type = TOKEN_ASSIGN; } } else if (ch == '!') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_NE; } else { ungetchar(ch2); token_type = TOKEN_NOT; } } else if (ch == '&') { int ch2 = getchar(); if (ch2 == '&') { token_type = TOKEN_LAND; } else { ungetchar(ch2); token_type = TOKEN_AND; } } else if (ch == '|') { int ch2 = getchar(); if (ch2 == '|') { token_type = TOKEN_LOR; } else { ungetchar(ch2); token_type = TOKEN_OR; } } else if (ch == '^') { token_type = TOKEN_XOR; } else if (ch == '~') { token_type = TOKEN_COMPL; } else if (ch == '\'') { token_type = TOKEN_NUMBER; token_data = getchar(); if (token_data == '\\') { token_data = get_escaped_char(); } if (getchar() != '\'') { eprintf("expecting '\n"); exit(1); } } else if (ch == '"') { token_type = TOKEN_STRING; token_data = parse_string(); } else if (ch == '.') { int ch2 = getchar(); if (ch2 == '.') { int ch3 = getchar(); if (ch3 == '.') { token_type = TOKEN_ELLIPSIS; } else { eprintf("unexpected character: %c\n", ch3); exit(1); } } else { eprintf("unexpected character: %c\n", ch2); exit(1); } } else if (is_digit(ch)) { token_type = TOKEN_NUMBER; token_data = parse_int(ch); } else if (is_id_start(ch)) { parse_id_like(ch); } else { eprintf("unexpected character: %c(%d)\n", ch, ch); exit(1); } eprintf("token: %d\n", token_type); if (token_type == TOKEN_ID) { const char* name = id_table + id_lut[token_data]; eprintf(" id: %s\n", name); } else if (token_type == TOKEN_NUMBER) { eprintf(" number: %d\n", token_data); } } void expect_token(int expected_type) { next_token(); if (token_type != expected_type) { eprintf("unexpected token: %d, should be %d\n", token_type, expected_type); exit(1); } } void ignore_const() { if (token_type == TOKEN_CONST) { next_token(); } } int parse_type() { ignore_const(); if (token_type == TOKEN_INT || token_type == TOKEN_CHAR || token_type == TOKEN_VOID) { int type = token_type & ~TYPE_TOKEN_MASK; next_token(); ignore_const(); if (token_type == TOKEN_STAR) { ignore_const(); return type | TYPE_PTR_MASK; } unget_token(); return type; } else { return -1; } } // asm int epilog_label; int local_table[4096]; // id -> local id int next_local_id = 2; int max_local_id = 2; const int MARKER_TEMP = 0; const int MARKER_SCALAR = 1; const int MARKER_ARRAY = 2; const int MARKER_FUNCTION = 3; int local_marker[4096]; int global_marker[4096]; int local_type[4096]; int global_type[4096]; int indirection[4096]; void reset_local() { next_local_id = 2; max_local_id = 2; for (int i = 0; i < 4096; ++i) { local_table[i] = 0; local_marker[i] = MARKER_TEMP; local_type[i] = TYPE_VOID; indirection[i] = 0; } } void reset_temp() { while (next_local_id > 2 && local_marker[next_local_id - 1] == MARKER_TEMP) { --next_local_id; } } int next_reg(int type) { int reg = next_local_id++; local_type[reg] = type; indirection[reg] = 0; if (next_local_id > max_local_id) { max_local_id = next_local_id; } return reg; } int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; int reg = next_reg(type); local_marker[reg] = MARKER_SCALAR; return local_table[id] = reg; } int declare_local_array(int id, int type, int size) { if (local_table[id] != 0) return local_table[id]; int reg; for (int i = 0; i < size; ++i) local_marker[reg = next_reg(type)] = MARKER_ARRAY; return local_table[id] = reg; } void declare_global(int id, int marker, int type) { global_marker[id] = marker; global_type[id] = type; } int check_itype_immediate(int value) { return value >= -2048 && value <= 2047; } void asm_ld(const char* rd, int imm, const char* rs) { if (check_itype_immediate(imm)) { printf(" ld %s, %d(%s)\n", rd, imm, rs); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs); printf(" ld %s, 0(t0)\n", rd); } } void asm_sd(const char* rs1, int imm, const char* rs2) { if (check_itype_immediate(imm)) { printf(" sd %s, %d(%s)\n", rs1, imm, rs2); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs2); printf(" sd %s, 0(t0)\n", rs1); } } void asm_addi(const char* rd, const char* rs, int imm) { if (check_itype_immediate(imm)) { printf(" addi %s, %s, %d\n", rd, rs, imm); } else { printf(" li t0, %d\n", imm); printf(" add %s, %s, t0\n", rd, rs); } } void load_address(int rd, int id) { if (id == -1) { eprintf("void cannot be arithmetically operated\n"); exit(1); } int offset = -id * 8 - 8; if (indirection[id]) { if (check_itype_immediate(offset)) { printf(" ld t%d, %d(fp) # indirection\n", rd, offset); } else { printf(" li t%d, %d\n", rd, offset); printf(" add t%d, fp, t%d\n", rd, rd); printf(" ld t%d, 0(t%d) # indirection\n", rd, rd); } } else { if (check_itype_immediate(offset)) { printf(" addi t%d, fp, %d\n", rd, offset); } else { printf(" li t%d, %d\n", rd, offset); printf(" add t%d, fp, t%d\n", rd, rd); } } } void load(int rd, int id) { load_address(rd, id); int type = local_type[id]; const char* op = "lw"; // int if (type == TYPE_CHAR) { op = "lb"; } else if (type & TYPE_PTR_MASK) { op = "ld"; } printf(" %s t%d, 0(t%d) # id: type %d\n", op, rd, rd, type); } void store_t0(int id) { load_address(1, id); int type = local_type[id]; const char* op = "sw"; // int if (type == TYPE_CHAR) { op = "sb"; } else if (type & TYPE_PTR_MASK) { op = "sd"; } printf(" %s t0, 0(t1) # id: type %d\n", op, type); } int materialize_t0(int type) { int reg = next_reg(type); store_t0(reg); return reg; } int dereference(int reg) { local_type[reg] = local_type[reg] & ~TYPE_PTR_MASK; indirection[reg] = 1; return reg; } int lookup(int id) { int local = local_table[id]; if (local) { if (local_marker[local] == MARKER_ARRAY) { load_address(0, local); return materialize_t0(local_type[local] | TYPE_PTR_MASK); } return local; } const char* name = id_table + id_lut[id]; if (global_marker[id]) { if (global_marker[id] == MARKER_FUNCTION) { eprintf("function name must not appear outside function call: %s\n", name); exit(1); } printf(" la t0, %s # id: %d\n", name, id); int reg = materialize_t0(global_type[id] | TYPE_PTR_MASK); if (global_marker[id] == MARKER_SCALAR) { reg = dereference(reg); } return reg; } eprintf("unresolved identifier: %s\n", name); exit(1); } int next_label_id = 0; int next_label() { return next_label_id++; } int asm_label(int label) { printf("L%d:\n", label); return label; } int is_not_reusable(int rs1, int expected_type) { return indirection[rs1] || local_marker[rs1] != MARKER_TEMP || local_type[rs1] != expected_type; } int asm_r(const char* op, int rs1) { load(0, rs1); printf(" %s t0, t0\n", op); int rd = rs1; if (is_not_reusable(rs1, TYPE_INT)) { rd = next_reg(TYPE_INT); } store_t0(rd); return rd; } int asm_r_arith(const char* op, int rs1) { if (local_type[rs1] & TYPE_PTR_MASK) { eprintf("pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_r(op, rs1); } int asm_rr(const char* op, int rs1, int rs2) { load(0, rs1); load(1, rs2); printf(" %s t0, t0, t1\n", op); int rd = rs1; if (is_not_reusable(rd, TYPE_INT)) { rd = rs2; if (is_not_reusable(rd, TYPE_INT)) { rd = next_reg(TYPE_INT); } } store_t0(rd); return rd; } int asm_rr_arith(const char* op, int rs1, int rs2) { if (local_type[rs1] & TYPE_PTR_MASK || local_type[rs2] & TYPE_PTR_MASK) { eprintf("pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_rr(op, rs1, rs2); } int asm_rr_cmp(const char* op, int rs1, int rs2) { // since NULL is virtually 0, it is considered valid example of a pointer comparing with an integer return asm_rr(op, rs1, rs2); } void asm_beqz(int rs1, int label) { load(0, rs1); printf(" beqz t0, L%d\n", label); } void asm_bnez(int rs1, int label) { load(0, rs1); printf(" bnez t0, L%d\n", label); } void asm_j(int label) { printf(" j L%d\n", label); } int break_label_stack[4096]; int cont_label_stack[4096]; int break_label_stack_size; int cont_label_stack_size; int asm_get_break_label() { return break_label_stack[break_label_stack_size - 1]; } int asm_get_cont_label() { return cont_label_stack[cont_label_stack_size - 1]; } void asm_push_label(int break_label, int cont_label) { break_label_stack[break_label_stack_size++] = break_label; cont_label_stack[cont_label_stack_size++] = cont_label; } void asm_pop_label() { --break_label_stack_size; --cont_label_stack_size; } int step_of(int type) { if (type == TYPE_INT_PTR) { return 4; } return 1; } void asm_shift_t0(const char* op, int type) { if (type == TYPE_INT_PTR) { printf(" %s t0, t0, 2\n", op); } } int asm_add(int lhs, int rhs) { int type1 = local_type[lhs] & TYPE_PTR_MASK; int type2 = local_type[rhs] & TYPE_PTR_MASK; if (type1 != type2) { int ptr; int idx; if (type1) { ptr = lhs; idx = rhs; } else { ptr = rhs; idx = lhs; } int ptr_type = local_type[ptr]; if (ptr_type == TYPE_VOID_PTR) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } load(0, idx); load(1, ptr); asm_shift_t0("slli", ptr_type); printf(" add t0, t0, t1\n"); return materialize_t0(ptr_type); } if (type1 && type2) { eprintf("operands of addition cannot be both pointers\n"); exit(1); } return asm_rr("add", lhs, rhs); } int asm_sub(int lhs, int rhs) { int lhs_type = local_type[lhs]; int rhs_type = local_type[rhs]; int type1 = lhs_type & TYPE_PTR_MASK; int type2 = rhs_type & TYPE_PTR_MASK; if (type1 && type2) { if (lhs_type != rhs_type) { eprintf("pointer type mismatch\n"); exit(1); } if (lhs_type == TYPE_VOID_PTR) { eprintf("void pointer cannot be arithmetically operated\n"); exit(1); } load(0, lhs); load(1, rhs); printf(" sub t0, t0, t1\n"); asm_shift_t0("srai", lhs_type); return materialize_t0(TYPE_INT); } if (type1) { int neg = asm_r_arith("neg", rhs); return asm_add(lhs, neg); } return asm_rr("sub", lhs, rhs); } // parser int parse_expr(); int parse_function_call(int id) { const char* name = id_table + id_lut[id]; if (global_marker[id] != MARKER_FUNCTION) { eprintf("not a function name: %s\n", name); exit(1); } int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } unget_token(); if (arg >= 8) { eprintf("too many arguments\n"); exit(1); } args[arg++] = parse_expr(); next_token(); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { eprintf("expecting ',' or ')'\n"); exit(1); } } for (int i = 0; i < arg; ++i) { load(0, args[i]); printf(" mv a%d, t0\n", i); } printf(" call %s\n", name); int type = global_type[id]; if (type != TYPE_VOID) { printf(" mv t0, a0\n"); return materialize_t0(type); } return -1; } int parse_primary_expr() { next_token(); if (token_type == TOKEN_EOF) { exit(1); } else if (token_type == TOKEN_NUMBER) { printf(" li t0, %d\n", token_data); return materialize_t0(TYPE_INT); } else if (token_type == TOKEN_ID) { next_token(); if (token_type == TOKEN_PAREN_LEFT) { return parse_function_call(token_data); } unget_token(); return lookup(token_data); } else if (token_type == TOKEN_STRING) { printf(" la t0, .LC%d\n", token_data); return materialize_t0(TYPE_CHAR_PTR); } else if (token_type == TOKEN_PAREN_LEFT) { int reg = parse_expr(); expect_token(TOKEN_PAREN_RIGHT); return reg; } else { eprintf("unexpected token in primary expression: %d\n", token_type); exit(1); } } int parse_postfix_expr() { int lhs = parse_primary_expr(); while (1) { next_token(); if (token_type == TOKEN_INC) { int type = local_type[lhs]; int reg = next_reg(type); load(0, lhs); store_t0(reg); printf(" addi t0, t0, %d\n", step_of(type)); store_t0(lhs); lhs = reg; } else if (token_type == TOKEN_DEC) { int type = local_type[lhs]; int reg = next_reg(type); load(0, lhs); store_t0(reg); printf(" addi t0, t0, -%d\n", step_of(type)); store_t0(lhs); lhs = reg; } else if (token_type == TOKEN_BRACKET_LEFT) { int rhs = parse_expr(); expect_token(TOKEN_BRACKET_RIGHT); lhs = dereference(asm_add(lhs, rhs)); } else { unget_token(); break; } } return lhs; } int parse_prefix_expr() { next_token(); if (token_type == TOKEN_AND) { int reg = parse_postfix_expr(); int type = local_type[reg]; if (type & TYPE_PTR_MASK) { eprintf("cannot take address of a pointer\n"); exit(1); } load_address(0, reg); return materialize_t0(type | TYPE_PTR_MASK); } else if (token_type == TOKEN_STAR) { int reg = parse_postfix_expr(); int type = local_type[reg]; if (!(type & TYPE_PTR_MASK)) { eprintf("cannot dereference a non-pointer\n"); exit(1); } if (type == TYPE_VOID_PTR) { eprintf("cannot dereference void pointer\n"); exit(1); } load(0, reg); return dereference(materialize_t0(type)); } else if (token_type == TOKEN_MINUS) { int reg = parse_postfix_expr(); return asm_r_arith("neg", reg); } else if (token_type == TOKEN_COMPL) { int reg = parse_postfix_expr(); return asm_r_arith("not", reg); } else if (token_type == TOKEN_NOT) { int reg = parse_postfix_expr(); return asm_r("seqz", reg); } else if (token_type == TOKEN_INC) { int reg = parse_postfix_expr(); load(0, reg); printf(" addi t0, t0, %d\n", step_of(local_type[reg])); store_t0(reg); return reg; } else if (token_type == TOKEN_DEC) { int reg = parse_postfix_expr(); load(0, reg); printf(" addi t0, t0, -%d\n", step_of(local_type[reg])); store_t0(reg); return reg; } else { unget_token(); return parse_postfix_expr(); } } int parse_mul_expr() { int lhs = parse_prefix_expr(); while (1) { next_token(); if (token_type == TOKEN_STAR) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("mul", lhs, rhs); } else if (token_type == TOKEN_DIV) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("div", lhs, rhs); } else if (token_type == TOKEN_REM) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("rem", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_add_expr() { int lhs = parse_mul_expr(); while (1) { next_token(); if (token_type == TOKEN_ADD) { int rhs = parse_mul_expr(); lhs = asm_add(lhs, rhs); } else if (token_type == TOKEN_MINUS) { int rhs = parse_mul_expr(); lhs = asm_sub(lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_shift_expr() { int lhs = parse_add_expr(); while (1) { next_token(); if (token_type == TOKEN_LSHIFT) { int rhs = parse_add_expr(); lhs = asm_rr_arith("sll", lhs, rhs); } else if (token_type == TOKEN_RSHIFT) { int rhs = parse_add_expr(); lhs = asm_rr_arith("sra", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_cmp_expr() { int lhs = parse_shift_expr(); while (1) { next_token(); if (token_type == TOKEN_LT) { int rhs = parse_shift_expr(); lhs = asm_rr_cmp("slt", lhs, rhs); } else if (token_type == TOKEN_GT) { int rhs = parse_shift_expr(); lhs = asm_rr_cmp("sgt", lhs, rhs); } else if (token_type == TOKEN_LE) { int rhs = parse_shift_expr(); int sgt = asm_rr_cmp("sgt", lhs, rhs); lhs = asm_r("seqz", sgt); } else if (token_type == TOKEN_GE) { int rhs = parse_shift_expr(); int slt = asm_rr_cmp("slt", lhs, rhs); lhs = asm_r("seqz", slt); } else { unget_token(); break; } } return lhs; } int parse_eq_expr() { int lhs = parse_cmp_expr(); while (1) { next_token(); if (token_type == TOKEN_EQ) { int rhs = parse_cmp_expr(); int xor0 = asm_rr_cmp("xor", lhs, rhs); lhs = asm_r("seqz", xor0); } else if (token_type == TOKEN_NE) { int rhs = parse_cmp_expr(); int xor0 = asm_rr_cmp("xor", lhs, rhs); lhs = asm_r("snez", xor0); } else { unget_token(); break; } } return lhs; } int parse_bitwise_and_expr() { int lhs = parse_eq_expr(); while (1) { next_token(); if (token_type == TOKEN_AND) { int rhs = parse_eq_expr(); lhs = asm_rr_arith("and", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_xor_expr() { int lhs = parse_bitwise_and_expr(); while (1) { next_token(); if (token_type == TOKEN_XOR) { int rhs = parse_bitwise_and_expr(); lhs = asm_rr_arith("xor", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_or_expr() { int lhs = parse_bitwise_xor_expr(); while (1) { next_token(); if (token_type == TOKEN_OR) { int rhs = parse_bitwise_xor_expr(); lhs = asm_rr_arith("or", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_logical_and_expr() { int lhs = parse_bitwise_or_expr(); int label = next_label(); int label_used = 0; while (1) { next_token(); if (token_type == TOKEN_LAND) { lhs = asm_r("snez", lhs); asm_beqz(lhs, label); int rhs = parse_bitwise_or_expr(); rhs = asm_r("snez", rhs); lhs = asm_rr("and", lhs, rhs); label_used = 1; } else { unget_token(); break; } } if (label_used) { asm_label(label); } return lhs; } int parse_logical_or_expr() { int lhs = parse_logical_and_expr(); int label = next_label(); int label_used = 0; while (1) { next_token(); if (token_type == TOKEN_LOR) { lhs = asm_r("snez", lhs); asm_bnez(lhs, label); int rhs = parse_logical_and_expr(); rhs = asm_r("snez", rhs); lhs = asm_rr("or", lhs, rhs); label_used = 1; } else { unget_token(); break; } } if (label_used) { asm_label(label); } return lhs; } int parse_assign_expr() { int lhs = parse_logical_or_expr(); next_token(); if (token_type == TOKEN_ASSIGN) { int rhs = parse_assign_expr(); load(0, rhs); store_t0(lhs); return lhs; } else { unget_token(); return lhs; } } int parse_expr() { return parse_assign_expr(); } void parse_local_variable(int type) { if (type == TYPE_VOID) { eprintf("local variable of void type is not supported\n"); exit(1); } expect_token(TOKEN_ID); int id = token_data; next_token(); if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { eprintf("local variable of array of pointers is not supported\n"); exit(1); } expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); declare_local_array(id, type, size); next_token(); } else { declare_local(id, type); } if (token_type == TOKEN_SEMICOLON) { unget_token(); return; } unget_token(); expect_token(TOKEN_ASSIGN); int reg = parse_expr(); load(0, reg); store_t0(local_table[id]); } void parse_stmt(); void parse_if() { expect_token(TOKEN_PAREN_LEFT); int cond = parse_expr(); int label1 = next_label(); int label2 = next_label(); asm_beqz(cond, label1); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(label2); asm_label(label1); next_token(); if (token_type == TOKEN_ELSE) { parse_stmt(); } else { unget_token(); } asm_label(label2); } void parse_while() { expect_token(TOKEN_PAREN_LEFT); int break_label = next_label(); int cont_label = next_label(); asm_push_label(break_label, cont_label); asm_label(cont_label); int cond = parse_expr(); asm_beqz(cond, break_label); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_for() { expect_token(TOKEN_PAREN_LEFT); int cont_label = next_label(); int break_label = next_label(); int cond_label = next_label(); int body_label = next_label(); asm_push_label(break_label, cont_label); parse_stmt(); // init asm_label(cond_label); int cond = parse_expr(); asm_beqz(cond, break_label); asm_j(body_label); reset_temp(); expect_token(TOKEN_SEMICOLON); asm_label(cont_label); parse_expr(); // update reset_temp(); expect_token(TOKEN_PAREN_RIGHT); asm_j(cond_label); asm_label(body_label); parse_stmt(); // body asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_do_while() { int cont_label = next_label(); int break_label = next_label(); asm_push_label(break_label, cont_label); asm_label(cont_label); parse_stmt(); // body expect_token(TOKEN_WHILE); expect_token(TOKEN_PAREN_LEFT); int cond = parse_expr(); asm_bnez(cond, cont_label); expect_token(TOKEN_PAREN_RIGHT); asm_label(break_label); asm_pop_label(); } void parse_stmt() { next_token(); int decl_type; if (token_type == TOKEN_IF) { parse_if(); return; } else if (token_type == TOKEN_WHILE) { parse_while(); return; } else if (token_type == TOKEN_FOR) { parse_for(); return; } else if (token_type == TOKEN_DO) { parse_do_while(); } else if (token_type == TOKEN_BRACE_LEFT) { while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } return; } else if (token_type == TOKEN_RETURN) { next_token(); if (token_type == TOKEN_SEMICOLON) { asm_j(epilog_label); return; } unget_token(); int reg = parse_expr(); load(0, reg); printf(" mv a0, t0\n"); asm_j(epilog_label); } else if (token_type == TOKEN_BREAK) { int label = asm_get_break_label(); asm_j(label); } else if (token_type == TOKEN_CONTINUE) { int label = asm_get_cont_label(); asm_j(label); } else if (token_type == TOKEN_SEMICOLON) { unget_token(); } else if ((decl_type = parse_type()) >= 0) { parse_local_variable(decl_type); } else { unget_token(); parse_expr(); } expect_token(TOKEN_SEMICOLON); reset_temp(); } void parse_function(const char* name) { reset_local(); int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } if (token_type == TOKEN_ELLIPSIS) { expect_token(TOKEN_PAREN_RIGHT); break; } if (token_type == TOKEN_VOID) { if (arg != 0) { eprintf("void should be the only argument\n"); exit(1); } expect_token(TOKEN_PAREN_RIGHT); break; } int arg_type = parse_type(); if (arg_type < 0 || arg_type == TYPE_VOID) { eprintf("unexpected a non-void argument type"); exit(1); } expect_token(TOKEN_ID); int arg_name = token_data; next_token(); if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_BRACKET_RIGHT); next_token(); if (arg_type & TYPE_PTR_MASK) { eprintf("local variable of array of pointers is not supported\n"); exit(1); } arg_type = arg_type | TYPE_PTR_MASK; } args[arg++] = declare_local(token_data, arg_type); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { eprintf("expecting ',' or ')'\n"); exit(1); } } next_token(); if (token_type == TOKEN_SEMICOLON) { return; } unget_token(); expect_token(TOKEN_BRACE_LEFT); printf(".text\n"); printf(".global %s\n", name); printf("%s:\n", name); int label = next_label(); int prolog_label = next_label(); epilog_label = next_label(); asm_j(prolog_label); asm_label(label); while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } asm_j(epilog_label); int frame_size = max_local_id * 8; if (frame_size % 16 != 0) { frame_size = frame_size + 8; } // prolog asm_label(prolog_label); asm_addi("sp", "sp", -frame_size); asm_sd("ra", frame_size - 8, "sp"); asm_sd("fp", frame_size - 16, "sp"); asm_addi("fp", "sp", frame_size); for (int i = 0; i < arg; ++i) { printf(" mv t0, a%d\n", i); store_t0(args[i]); } asm_j(label); // epilog asm_label(epilog_label); asm_ld("fp", frame_size - 16, "sp"); asm_ld("ra", frame_size - 8, "sp"); asm_addi("sp", "sp", frame_size); printf(" ret\n"); } void parse_global_variable(int id, const char* name, int type) { if (type == TYPE_VOID) { eprintf("global variable of void type is not supported\n"); exit(1); } if (type & TYPE_PTR_MASK) { eprintf("global variable of pointer is not supported\n"); exit(1); } printf(".data\n"); printf(".globl %s\n", name); printf(".align 5\n"); printf("%s:\n", name); if (token_type == TOKEN_ASSIGN) { expect_token(TOKEN_NUMBER); printf(" .word %d\n", token_data); } else if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); int array_size = 4 * size; if (type == TYPE_CHAR) { array_size = size; } printf(" .zero %d\n", array_size); declare_global(id, MARKER_ARRAY, type); } else { printf(" .zero %d\n", 4); unget_token(); } expect_token(TOKEN_SEMICOLON); } void parse_global_declaration() { int type = parse_type(); if (type < 0) { eprintf("expecting type for global declaration\n"); exit(1); } expect_token(TOKEN_ID); int id = token_data; char* name = id_table + id_lut[id]; next_token(); if (token_type == TOKEN_PAREN_LEFT) { declare_global(id, MARKER_FUNCTION, type); parse_function(name); } else { declare_global(id, MARKER_SCALAR, type); parse_global_variable(id, name, type); } } void parse_top_level() { next_token(); if (token_type == TOKEN_EOF) return; parse_global_declaration(); parse_top_level(); } void dump_string_table() { printf(".data\n"); for (int i = 0; i < string_lut_size; ++i) { printf(".LC%d: .string \"", i); int offset = string_lut[i]; int ch; while ((ch = string_table[offset++]) != 0) { if (ch == '\n') { printf("\\n"); } else if (ch == '\t') { printf("\\t"); } else if (ch == '\r') { printf("\\r"); } else if (ch == '\0') { printf("\\0"); } else if (ch == '\\') { printf("\\\\"); } else if (ch == '\'') { printf("\\'"); } else if (ch == '\"') { printf("\\\""); } else { printf("%c", ch); } } printf("\"\n"); } } int main() { parse_top_level(); dump_string_table(); return 0; }