#include "boot-lib.h" // lexer int is_digit(int ch) { return '0' <= ch && ch <= '9'; } int is_id_start(int ch) { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'; } int is_id_cont(int ch) { return is_id_start(ch) || is_digit(ch); } int token_state; int token_type; int token_data; const int TOKEN_EOF = 0; const int TOKEN_SEMICOLON = 1; const int TOKEN_ADD = 2; const int TOKEN_MINUS = 3; const int TOKEN_STAR = 4; const int TOKEN_DIV = 5; const int TOKEN_REM = 6; const int TOKEN_ASSIGN = 7; const int TOKEN_COMMA = 8; const int TOKEN_DOT = 9; const int TOKEN_LSHIFT = 10; // unused const int TOKEN_RSHIFT = 11; // unused const int TOKEN_AND = 12; const int TOKEN_OR = 13; const int TOKEN_XOR = 14; const int TOKEN_COMPL = 15; const int TOKEN_NOT = 16; const int TOKEN_LAND = 17; const int TOKEN_LOR = 18; const int TOKEN_ELLIPSIS = 19; const int TOKEN_INC = 20; const int TOKEN_DEC = 21; const int TOKEN_EQ = 40; const int TOKEN_NE = 41; const int TOKEN_LT = 42; const int TOKEN_GT = 43; const int TOKEN_LE = 44; const int TOKEN_GE = 45; const int TOKEN_PAREN_LEFT = 50; const int TOKEN_PAREN_RIGHT = 51; const int TOKEN_BRACKET_LEFT = 52; const int TOKEN_BRACKET_RIGHT = 53; const int TOKEN_BRACE_LEFT = 54; const int TOKEN_BRACE_RIGHT = 55; const int TOKEN_NUMBER = 100; const int TOKEN_ID = 101; const int TOKEN_INT = 102; const int TOKEN_IF = 103; const int TOKEN_ELSE = 104; const int TOKEN_WHILE = 105; const int TOKEN_BREAK = 106; const int TOKEN_CONTINUE = 107; const int TOKEN_RETURN = 108; const int TOKEN_VOID = 109; const int TOKEN_CONST = 110; const int TOKEN_CHAR = 111; const int TOKEN_FOR = 112; const int TOKEN_STRING = 150; int parse_int(int ch) { int num = ch - '0'; while (is_digit(ch = getchar())) { num = num * 10; num = num + ch - '0'; } ungetchar(ch); return num; } int get_escaped_char() { int ch = getchar(); if (ch == 'n') { ch = '\n'; } else if (ch == 't') { ch = '\t'; } else if (ch == 'r') { ch = '\r'; } else if (ch == '0') { ch = '\0'; } else if (ch == '\\') { ch = '\\'; } else if (ch == '\'') { ch = '\''; } else if (ch == '\"') { ch = '\"'; } else { eprintf("unexpected escaped character: %c\n", ch); exit(1); } return ch; } int streq(const char* s1, const char* s2) { while (*s1 && *s2 && *s1 == *s2) { s1++; s2++; } return *s1 == *s2; } char string_table[65536]; int string_offset; int string_lut[4096]; int string_lut_size; int parse_string() { int offset = string_offset; int ch; while ((ch = getchar()) != '"') { if (ch == -1 || ch == '\n') { eprintf("expecting '\"'\n"); exit(1); } if (ch == '\\') { ch = get_escaped_char(); } string_table[string_offset++] = ch; } string_table[string_offset++] = 0; string_lut[string_lut_size] = offset; return string_lut_size++; } char id_table[65536]; int id_offset; int id_lut[4096]; int id_lut_size; int parse_id(int ch) { int offset = id_offset; id_table[id_offset++] = ch; while (is_id_cont(ch = getchar())) { id_table[id_offset++] = ch; } ungetchar(ch); id_table[id_offset++] = 0; id_lut[id_lut_size] = offset; return id_lut_size++; } void rewind_id(int new_data) { id_offset = id_lut[token_data]; token_data = new_data; --id_lut_size; } void dedup_id() { char* latest = &id_table[id_lut[id_lut_size - 1]]; for (int i = 0; i < id_lut_size - 1; i++) { char* candidate = &id_table[id_lut[i]]; if (streq(candidate, latest)) { rewind_id(i); return; } } } void parse_id_like(int ch) { token_type = TOKEN_ID; token_data = parse_id(ch); char* id = &id_table[id_lut[token_data]]; if (streq(id, "int")) { token_type = TOKEN_INT; } else if (streq(id, "if")) { token_type = TOKEN_IF; } else if (streq(id, "else")) { token_type = TOKEN_ELSE; } else if (streq(id, "while")) { token_type = TOKEN_WHILE; } else if (streq(id, "break")) { token_type = TOKEN_BREAK; } else if (streq(id, "continue")) { token_type = TOKEN_CONTINUE; } else if (streq(id, "return")) { token_type = TOKEN_RETURN; } else if (streq(id, "void")) { token_type = TOKEN_VOID; } else if (streq(id, "const")) { token_type = TOKEN_CONST; } else if (streq(id, "char")) { token_type = TOKEN_CHAR; } else if (streq(id, "for")) { token_type = TOKEN_FOR; } if (token_type != TOKEN_ID) { rewind_id(0); } else { dedup_id(); } } void unget_token() { token_state = 1; } void next_token() { if (token_state) { token_state = 0; return; } int ch = getchar(); while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { ch = getchar(); } if (ch == -1) { token_type = TOKEN_EOF; } else if (ch == '(') { token_type = TOKEN_PAREN_LEFT; } else if (ch == ')') { token_type = TOKEN_PAREN_RIGHT; } else if (ch == '[') { token_type = TOKEN_BRACKET_LEFT; } else if (ch == ']') { token_type = TOKEN_BRACKET_RIGHT; } else if (ch == '{') { token_type = TOKEN_BRACE_LEFT; } else if (ch == '}') { token_type = TOKEN_BRACE_RIGHT; } else if (ch == '+') { int ch2 = getchar(); if (ch2 == '+') { token_type = TOKEN_INC; } else { ungetchar(ch2); token_type = TOKEN_ADD; } } else if (ch == '-') { int ch2 = getchar(); if (ch2 == '-') { token_type = TOKEN_DEC; } else { ungetchar(ch2); token_type = TOKEN_MINUS; } } else if (ch == '*') { token_type = TOKEN_STAR; } else if (ch == '/') { int ch2 = getchar(); if (ch2 == '/') { while ((ch = getchar()) != '\n'); next_token(); return; } else if (ch2 == '*') { while (1) { ch = getchar(); if (ch == '*') { ch = getchar(); if (ch == '/') { break; } } } next_token(); return; } else { ungetchar(ch2); token_type = TOKEN_DIV; } } else if (ch == '%') { token_type = TOKEN_REM; } else if (ch == ';') { token_type = TOKEN_SEMICOLON; } else if (ch == ',') { token_type = TOKEN_COMMA; } else if (ch == '<') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_LE; } else if (ch2 == '<') { token_type = TOKEN_LSHIFT; } else { ungetchar(ch2); token_type = TOKEN_LT; } } else if (ch == '>') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_GE; } else if (ch2 == '>') { token_type = TOKEN_RSHIFT; } else { ungetchar(ch2); token_type = TOKEN_GT; } } else if (ch == '=') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_EQ; } else { ungetchar(ch2); token_type = TOKEN_ASSIGN; } } else if (ch == '!') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_NE; } else { ungetchar(ch2); token_type = TOKEN_NOT; } } else if (ch == '&') { int ch2 = getchar(); if (ch2 == '&') { token_type = TOKEN_LAND; } else { ungetchar(ch2); token_type = TOKEN_AND; } } else if (ch == '|') { int ch2 = getchar(); if (ch2 == '|') { token_type = TOKEN_LOR; } else { ungetchar(ch2); token_type = TOKEN_OR; } } else if (ch == '^') { token_type = TOKEN_XOR; } else if (ch == '~') { token_type = TOKEN_COMPL; } else if (ch == '\'') { token_type = TOKEN_NUMBER; token_data = getchar(); if (token_data == '\\') { token_data = get_escaped_char(); } if (getchar() != '\'') { eprintf("expecting '\n"); exit(1); } } else if (ch == '"') { token_type = TOKEN_STRING; token_data = parse_string(); } else if (ch == '.') { int ch2 = getchar(); if (ch2 == '.') { int ch3 = getchar(); if (ch3 == '.') { token_type = TOKEN_ELLIPSIS; } else { eprintf("unexpected character: %c\n", ch3); exit(1); } } else { eprintf("unexpected character: %c\n", ch2); exit(1); } } else if (is_digit(ch)) { token_type = TOKEN_NUMBER; token_data = parse_int(ch); } else if (is_id_start(ch)) { parse_id_like(ch); } else { eprintf("unexpected character: %c(%d)\n", ch, ch); exit(1); } eprintf("token: %d\n", token_type); if (token_type == TOKEN_ID) { const char* name = &id_table[id_lut[token_data]]; eprintf(" id: %s\n", name); } else if (token_type == TOKEN_NUMBER) { eprintf(" number: %d\n", token_data); } } void expect_token(int expected_type) { next_token(); if (token_type != expected_type) { eprintf("unexpected token: %d, should be %d\n", token_type, expected_type); exit(1); } } const int TYPE_VOID = 0; const int TYPE_INT = 1; const int TYPE_CHAR = 2; const int TYPE_VOID_PTR = 16; const int TYPE_INT_PTR = 17; const int TYPE_CHAR_PTR = 18; const int TYPE_PTR_MASK = 16; int parse_type() { if (token_type == TOKEN_CONST) { next_token(); } if (token_type == TOKEN_INT) { next_token(); if (token_type == TOKEN_STAR) { return TYPE_INT_PTR; } unget_token(); return TYPE_INT; } else if (token_type == TOKEN_CHAR) { next_token(); if (token_type == TOKEN_STAR) { return TYPE_CHAR_PTR; } unget_token(); return TYPE_CHAR; } else if (token_type == TOKEN_VOID) { next_token(); if (token_type == TOKEN_STAR) { return TYPE_VOID_PTR; } unget_token(); return TYPE_VOID; } else { return -1; } } // asm int epilog_label; int local_table[4096]; // id -> local id int next_local_id = 2; int max_local_id = 2; const int MARKER_TEMP = 0; const int MARKER_SCALAR = 1; const int MARKER_ARRAY = 2; int local_marker[4096]; int global_marker[4096]; int local_type[4096]; int global_type[4096]; void reset_local() { next_local_id = 2; max_local_id = 2; for (int i = 0; i < 4096; ++i) { local_table[i] = 0; local_marker[i] = MARKER_TEMP; local_type[i] = TYPE_VOID; } } void reset_temp() { while (next_local_id > 2 && local_marker[next_local_id - 1] == MARKER_TEMP) { --next_local_id; } } int next_reg(int type) { int reg = next_local_id++; local_type[reg] = type; if (next_local_id > max_local_id) { max_local_id = next_local_id; } return reg; } int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; int reg = next_reg(type); local_marker[reg] = MARKER_SCALAR; return local_table[id] = reg; } int declare_local_array(int id, int type, int size) { if (local_table[id] != 0) return local_table[id]; int reg; for (int i = 0; i < size; ++i) local_marker[reg = next_reg(type)] = MARKER_ARRAY; return local_table[id] = reg; } void declare_global(int id, int marker, int type) { global_marker[id] = marker; global_type[id] = type; } int check_itype_immediate(int value) { return value >= -2048 && value <= 2047; } void asm_ld(const char* rd, int imm, const char* rs) { if (check_itype_immediate(imm)) { printf(" ld %s, %d(%s)\n", rd, imm, rs); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs); printf(" ld %s, 0(t0)\n", rd); } } void asm_sd(const char* rs1, int imm, const char* rs2) { if (check_itype_immediate(imm)) { printf(" sd %s, %d(%s)\n", rs1, imm, rs2); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs2); printf(" sd %s, 0(t0)\n", rs1); } } void asm_addi(const char* rd, const char* rs, int imm) { if (check_itype_immediate(imm)) { printf(" addi %s, %s, %d\n", rd, rs, imm); } else { printf(" li t0, %d\n", imm); printf(" add %s, %s, t0\n", rd, rs); } } const int INDIRECTION = 1048576; // 2**20 int local_type_of(int rs1) { return local_type[rs1 & ~INDIRECTION]; } void load_address(int rd, int id) { if (id & INDIRECTION) { id = id & ~INDIRECTION; int offset = -id * 8 - 8; if (check_itype_immediate(offset)) { printf(" ld t%d, %d(fp) # indirection\n", rd, offset); } else { printf(" li t%d, %d\n", rd, offset); printf(" add t%d, fp, t%d\n", rd, rd); printf(" ld t%d, 0(t%d) # indirection\n", rd, rd); } } else { int offset = -id * 8 - 8; if (check_itype_immediate(offset)) { printf(" addi t%d, fp, %d\n", rd, offset); } else { printf(" li t%d, %d\n", rd, offset); printf(" add t%d, fp, t%d\n", rd, rd); } } } void load(int rd, int id) { load_address(rd, id); const char* op = "ld"; if (local_type_of(id) == TYPE_CHAR && (id & INDIRECTION)) { op = "lb"; } printf(" %s t%d, 0(t%d) # id: type %d\n", op, rd, rd, local_type_of(id)); } void store_t0(int id) { load_address(1, id); const char* op = "sd"; if (local_type_of(id) == TYPE_CHAR && (id & INDIRECTION)) { op = "sb"; } printf(" %s t0, 0(t1) # id: type %d\n", op, local_type_of(id)); } int materialize_t0(int type) { int reg = next_reg(type); store_t0(reg); return reg; } int indirection_of(int reg) { local_type[reg] = local_type[reg] & ~TYPE_PTR_MASK; return reg | INDIRECTION; } int lookup(int id) { int local = local_table[id]; if (local) { if (local_marker[local] == MARKER_ARRAY) { load_address(0, local); return materialize_t0(local_type[local] | TYPE_PTR_MASK); } return local; } const char* name = &id_table[id_lut[id]]; if (global_marker[id]) { printf(" la t0, %s # id: %d\n", name, id); int reg = materialize_t0(global_type[id] | TYPE_PTR_MASK); if (global_marker[id] != MARKER_ARRAY) { reg = indirection_of(reg); } return reg; } eprintf("unresolved identifier: %s\n", name); exit(1); } int next_label_id = 0; int next_label() { return next_label_id++; } int asm_label(int label) { printf("L%d:\n", label); return label; } int is_not_reusable(int rs1) { return (rs1 & INDIRECTION) || local_marker[rs1] != MARKER_TEMP; } int asm_r(const char* op, int rs1) { load(0, rs1); printf(" %s t0, t0\n", op); int rd = rs1; if (is_not_reusable(rs1)) rd = next_reg(local_type_of(rs1)); store_t0(rd); return rd; } int asm_rr(const char* op, int rs1, int rs2) { load(0, rs1); load(1, rs2); printf(" %s t0, t0, t1\n", op); int rd = rs1; if (is_not_reusable(rs1)) rd = rs2; if (is_not_reusable(rs2)) rd = next_reg(local_type_of(rs1)); store_t0(rd); return rd; } void asm_beqz(int rs1, int label) { load(0, rs1); printf(" beqz t0, L%d\n", label); } void asm_bnez(int rs1, int label) { load(0, rs1); printf(" bnez t0, L%d\n", label); } void asm_j(int label) { printf(" j L%d\n", label); } int break_label_stack[4096]; int cont_label_stack[4096]; int break_label_stack_size; int cont_label_stack_size; int asm_get_break_label() { return break_label_stack[break_label_stack_size - 1]; } int asm_get_cont_label() { return cont_label_stack[cont_label_stack_size - 1]; } void asm_push_label(int break_label, int cont_label) { break_label_stack[break_label_stack_size++] = break_label; cont_label_stack[cont_label_stack_size++] = cont_label; } void asm_pop_label() { --break_label_stack_size; --cont_label_stack_size; } int step_of(int type) { if (type == TYPE_INT_PTR) { return 8; } return 1; } void asm_slli_t0(int type) { if (type == TYPE_INT_PTR) { printf(" slli t0, t0, 3\n"); } } // parser int parse_expr(); int parse_primary_expr() { next_token(); if (token_type == TOKEN_EOF) { exit(1); } else if (token_type == TOKEN_NUMBER) { printf(" li t0, %d\n", token_data); return materialize_t0(TYPE_INT); } else if (token_type == TOKEN_ID) { return lookup(token_data); } else if (token_type == TOKEN_STRING) { printf(" la t0, .LC%d\n", token_data); return materialize_t0(TYPE_CHAR_PTR); } else if (token_type == TOKEN_PAREN_LEFT) { int reg = parse_expr(); expect_token(TOKEN_PAREN_RIGHT); return reg; } else { eprintf("unexpected token: %d\n", token_type); exit(1); } } int parse_postfix_expr() { int lhs = parse_primary_expr(); while (1) { next_token(); if (token_type == TOKEN_INC) { int type = local_type_of(lhs); int reg = next_reg(type); load(0, lhs); store_t0(reg); printf(" addi t0, t0, %d\n", step_of(type)); store_t0(lhs); return reg; } else if (token_type == TOKEN_DEC) { int type = local_type_of(lhs); int reg = next_reg(type); load(0, lhs); store_t0(reg); printf(" addi t0, t0, -%d\n", step_of(type)); store_t0(lhs); return reg; } else if (token_type == TOKEN_BRACKET_LEFT) { int rhs = parse_expr(); expect_token(TOKEN_BRACKET_RIGHT); int type1 = local_type_of(lhs) & TYPE_PTR_MASK; int type2 = local_type_of(rhs) & TYPE_PTR_MASK; if (type1 == type2) { eprintf("there should be exact one pointer and one integer in array access\n"); exit(1); } int ptr; int idx; if (type1) { ptr = lhs; idx = rhs; } else { ptr = rhs; idx = lhs; } int ptr_type = local_type_of(ptr); load(0, idx); load(1, ptr); asm_slli_t0(ptr_type); printf(" add t0, t0, t1\n"); return indirection_of(materialize_t0(ptr_type)); } else if (token_type == TOKEN_PAREN_LEFT) { int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } unget_token(); if (arg >= 8) { eprintf("too many arguments\n"); exit(1); } args[arg++] = parse_expr(); next_token(); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { eprintf("expecting ',' or ')'\n"); exit(1); } } for (int i = 0; i < arg; ++i) { load(0, args[i]); printf(" mv a%d, t0\n", i); } load_address(0, lhs); printf(" jalr t0\n"); printf(" mv t0, a0\n"); return materialize_t0(local_type_of(lhs)); } else { unget_token(); return lhs; } } } int parse_prefix_expr() { next_token(); if (token_type == TOKEN_AND) { int reg = parse_postfix_expr(); load_address(0, reg); return materialize_t0(local_type_of(reg) | TYPE_PTR_MASK); } else if (token_type == TOKEN_STAR) { int reg = parse_postfix_expr(); load(0, reg); return indirection_of(materialize_t0(local_type_of(reg))); } else if (token_type == TOKEN_MINUS) { int reg = parse_postfix_expr(); return asm_r("neg", reg); } else if (token_type == TOKEN_COMPL) { int reg = parse_postfix_expr(); return asm_r("not", reg); } else if (token_type == TOKEN_NOT) { int reg = parse_postfix_expr(); return asm_r("seqz", reg); } else if (token_type == TOKEN_INC) { int reg = parse_postfix_expr(); load(0, reg); printf(" addi t0, t0, %d\n", step_of(local_type_of(reg))); store_t0(reg); return reg; } else if (token_type == TOKEN_DEC) { int reg = parse_postfix_expr(); load(0, reg); printf(" addi t0, t0, -%d\n", step_of(local_type_of(reg))); store_t0(reg); return reg; } else { unget_token(); return parse_postfix_expr(); } } int parse_mul_expr() { int lhs = parse_prefix_expr(); while (1) { next_token(); if (token_type == TOKEN_STAR) { int rhs = parse_prefix_expr(); lhs = asm_rr("mul", lhs, rhs); } else if (token_type == TOKEN_DIV) { int rhs = parse_prefix_expr(); lhs = asm_rr("div", lhs, rhs); } else if (token_type == TOKEN_REM) { int rhs = parse_prefix_expr(); lhs = asm_rr("rem", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_add_expr() { int lhs = parse_mul_expr(); while (1) { next_token(); if (token_type == TOKEN_ADD) { int rhs = parse_mul_expr(); lhs = asm_rr("add", lhs, rhs); } else if (token_type == TOKEN_MINUS) { int rhs = parse_mul_expr(); lhs = asm_rr("sub", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_cmp_expr() { int lhs = parse_add_expr(); while (1) { next_token(); if (token_type == TOKEN_LT) { int rhs = parse_add_expr(); lhs = asm_rr("slt", lhs, rhs); } else if (token_type == TOKEN_GT) { int rhs = parse_add_expr(); lhs = asm_rr("sgt", lhs, rhs); } else if (token_type == TOKEN_LE) { int rhs = parse_add_expr(); int sgt = asm_rr("sgt", lhs, rhs); lhs = asm_r("seqz", sgt); } else if (token_type == TOKEN_GE) { int rhs = parse_add_expr(); int slt = asm_rr("slt", lhs, rhs); lhs = asm_r("seqz", slt); } else { unget_token(); break; } } return lhs; } int parse_eq_expr() { int lhs = parse_cmp_expr(); while (1) { next_token(); if (token_type == TOKEN_EQ) { int rhs = parse_cmp_expr(); int xor0 = asm_rr("xor", lhs, rhs); lhs = asm_r("seqz", xor0); } else if (token_type == TOKEN_NE) { int rhs = parse_cmp_expr(); int xor0 = asm_rr("xor", lhs, rhs); lhs = asm_r("snez", xor0); } else { unget_token(); break; } } return lhs; } int parse_bitwise_and_expr() { int lhs = parse_eq_expr(); while (1) { next_token(); if (token_type == TOKEN_AND) { int rhs = parse_eq_expr(); lhs = asm_rr("and", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_xor_expr() { int lhs = parse_bitwise_and_expr(); while (1) { next_token(); if (token_type == TOKEN_XOR) { int rhs = parse_bitwise_and_expr(); lhs = asm_rr("xor", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_or_expr() { int lhs = parse_bitwise_xor_expr(); while (1) { next_token(); if (token_type == TOKEN_OR) { int rhs = parse_bitwise_xor_expr(); lhs = asm_rr("or", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_logical_and_expr() { int lhs = parse_bitwise_or_expr(); int label = next_label(); int label_used = 0; while (1) { next_token(); if (token_type == TOKEN_LAND) { lhs = asm_r("snez", lhs); asm_beqz(lhs, label); int rhs = parse_bitwise_or_expr(); rhs = asm_r("snez", rhs); lhs = asm_rr("and", lhs, rhs); label_used = 1; } else { unget_token(); break; } } if (label_used) { asm_label(label); } return lhs; } int parse_logical_or_expr() { int lhs = parse_logical_and_expr(); int label = next_label(); int label_used = 0; while (1) { next_token(); if (token_type == TOKEN_LOR) { lhs = asm_r("snez", lhs); asm_bnez(lhs, label); int rhs = parse_logical_and_expr(); rhs = asm_r("snez", rhs); lhs = asm_rr("or", lhs, rhs); label_used = 1; } else { unget_token(); break; } } if (label_used) { asm_label(label); } return lhs; } int parse_assign_expr() { int lhs = parse_logical_or_expr(); next_token(); if (token_type == TOKEN_ASSIGN) { int rhs = parse_assign_expr(); load(0, rhs); store_t0(lhs); return lhs; } else { unget_token(); return lhs; } } int parse_expr() { return parse_assign_expr(); } void parse_local_variable(int type) { expect_token(TOKEN_ID); int id = token_data; next_token(); if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); declare_local_array(id, type, size); next_token(); } else { declare_local(id, type); } if (token_type == TOKEN_SEMICOLON) { unget_token(); return; } unget_token(); expect_token(TOKEN_ASSIGN); int reg = parse_expr(); load(0, reg); store_t0(local_table[id]); } void parse_stmt(); void parse_if() { expect_token(TOKEN_PAREN_LEFT); int cond = parse_expr(); int label1 = next_label(); int label2 = next_label(); asm_beqz(cond, label1); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(label2); asm_label(label1); next_token(); if (token_type == TOKEN_ELSE) { parse_stmt(); } else { unget_token(); } asm_label(label2); } void parse_while() { expect_token(TOKEN_PAREN_LEFT); int break_label = next_label(); int cont_label = next_label(); asm_push_label(break_label, cont_label); asm_label(cont_label); int cond = parse_expr(); asm_beqz(cond, break_label); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_for() { expect_token(TOKEN_PAREN_LEFT); int cont_label = next_label(); int break_label = next_label(); int cond_label = next_label(); int body_label = next_label(); asm_push_label(break_label, cont_label); parse_stmt(); // init asm_label(cond_label); int cond = parse_expr(); asm_beqz(cond, break_label); asm_j(body_label); reset_temp(); expect_token(TOKEN_SEMICOLON); asm_label(cont_label); parse_expr(); // update reset_temp(); expect_token(TOKEN_PAREN_RIGHT); asm_j(cond_label); asm_label(body_label); parse_stmt(); // body asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_stmt() { next_token(); int decl_type; if (token_type == TOKEN_IF) { parse_if(); return; } else if (token_type == TOKEN_WHILE) { parse_while(); return; } else if (token_type == TOKEN_FOR) { parse_for(); return; } else if (token_type == TOKEN_BRACE_LEFT) { while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } return; } else if (token_type == TOKEN_RETURN) { next_token(); if (token_type == TOKEN_SEMICOLON) { asm_j(epilog_label); return; } unget_token(); int reg = parse_expr(); load(0, reg); printf(" mv a0, t0\n"); asm_j(epilog_label); } else if (token_type == TOKEN_BREAK) { int label = asm_get_break_label(); asm_j(label); } else if (token_type == TOKEN_CONTINUE) { int label = asm_get_cont_label(); asm_j(label); } else if (token_type == TOKEN_SEMICOLON) { unget_token(); } else if ((decl_type = parse_type()) >= 0) { parse_local_variable(decl_type); } else { unget_token(); parse_expr(); } expect_token(TOKEN_SEMICOLON); reset_temp(); } void parse_function(const char* name) { reset_local(); int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } if (token_type == TOKEN_ELLIPSIS) { expect_token(TOKEN_PAREN_RIGHT); break; } int decl_type = parse_type(); expect_token(TOKEN_ID); args[arg++] = declare_local(token_data, decl_type); next_token(); if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_BRACKET_RIGHT); next_token(); } if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { eprintf("expecting ',' or ')'\n"); exit(1); } } next_token(); if (token_type == TOKEN_SEMICOLON) { return; } unget_token(); expect_token(TOKEN_BRACE_LEFT); printf(".text\n"); printf(".global %s\n", name); printf("%s:\n", name); int label = next_label(); int prolog_label = next_label(); epilog_label = next_label(); asm_j(prolog_label); asm_label(label); while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } asm_j(epilog_label); int shift = max_local_id * 8; if (shift % 16 != 0) { shift = shift + 8; } // prolog asm_label(prolog_label); asm_addi("sp", "sp", -shift); asm_sd("ra", shift - 8, "sp"); asm_sd("fp", shift - 16, "sp"); asm_addi("fp", "sp", shift); for (int i = 0; i < arg; ++i) { printf(" mv t0, a%d\n", i); store_t0(args[i]); } asm_j(label); // epilog asm_label(epilog_label); asm_ld("fp", shift - 16, "sp"); asm_ld("ra", shift - 8, "sp"); asm_addi("sp", "sp", shift); printf(" ret\n"); } void parse_global_variable(int id, const char* name, int type) { printf(".data\n"); printf(".globl %s\n", name); printf(".align 5\n"); printf("%s:\n", name); if (token_type == TOKEN_ASSIGN) { expect_token(TOKEN_NUMBER); printf(" .word %d\n", token_data); } else if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); printf(" .zero %d\n", 8 * size); declare_global(id, MARKER_ARRAY, type); } else { printf(" .zero %d\n", 8); unget_token(); } expect_token(TOKEN_SEMICOLON); } void parse_decl(int type) { expect_token(TOKEN_ID); int id = token_data; declare_global(id, MARKER_SCALAR, type); char* name = &id_table[id_lut[id]]; next_token(); if (token_type == TOKEN_PAREN_LEFT) { parse_function(name); } else { parse_global_variable(id, name, type); } } void parse_top_level() { next_token(); int decl_type; if (token_type == TOKEN_EOF) { return; } else if ((decl_type = parse_type()) >= 0) { parse_decl(decl_type); } else { eprintf("unexpected token: %d\n", token_type); exit(1); } parse_top_level(); } void dump_string_table() { printf(".data\n"); for (int i = 0; i < string_lut_size; ++i) { printf(".LC%d: .string \"", i); int offset = 0; int ch; while ((ch = string_table[string_lut[i] + offset]) != 0) { if (ch == '\n') { printf("\\n"); } else if (ch == '\t') { printf("\\t"); } else if (ch == '\r') { printf("\\r"); } else if (ch == '\0') { printf("\\0"); } else if (ch == '\\') { printf("\\\\"); } else if (ch == '\'') { printf("\\'"); } else if (ch == '\"') { printf("\\\""); } else { printf("%c", ch); } offset++; } printf("\"\n"); } } int main() { parse_top_level(); dump_string_table(); return 0; }