/* * RVBTCC By Yaossg * A lightweight bootstrapping compiler in 2000 lines. * * It aims to demonstrate how to write a bootstrapping compiler in no time. * Syntax is similar to C, output is RISC-V assembly. * Only dependent on some glibc functions for I/O. * Purely for educational purposes. Do not use in production. */ // glibc dependency extern void* stdin; extern void* stdout; extern void* stderr; int printf(char* format, ...); int getchar(); void exit(int status); int fprintf(void* file, char* format, ...); int ungetc(int ch, void* file); void ungetchar(int ch) { ungetc(ch, stdin); } // limitations enum { STRING_TABLE_SIZE = 65536, STRING_LUT_SIZE = 4096, ID_TABLE_SIZE = 65536, ID_LUT_SIZE = 4096, LOCAL_SIZE = 4096, REG_SIZE = 4096, }; // constants enum { TOKEN_EOF = -1, TOKEN_ADD, TOKEN_SUB, TOKEN_MUL, TOKEN_DIV, TOKEN_REM, TOKEN_AND, TOKEN_OR, TOKEN_XOR, TOKEN_LSHIFT, TOKEN_RSHIFT, TOKEN_ASSIGN, TOKEN_ADD_ASSIGN, TOKEN_SUB_ASSIGN, TOKEN_MUL_ASSIGN, TOKEN_DIV_ASSIGN, TOKEN_REM_ASSIGN, TOKEN_AND_ASSIGN, TOKEN_OR_ASSIGN, TOKEN_XOR_ASSIGN, TOKEN_LSHIFT_ASSIGN, TOKEN_RSHIFT_ASSIGN, TOKEN_INV, TOKEN_NOT, TOKEN_LAND, TOKEN_LOR, TOKEN_INC, TOKEN_DEC, TOKEN_QUESTION, TOKEN_COLON, TOKEN_SEMICOLON, TOKEN_COMMA, TOKEN_ELLIPSIS, TOKEN_EQ, TOKEN_NE, TOKEN_LT, TOKEN_GT, TOKEN_LE, TOKEN_GE, TOKEN_PAREN_LEFT = 50, TOKEN_PAREN_RIGHT, TOKEN_BRACKET_LEFT, TOKEN_BRACKET_RIGHT, TOKEN_BRACE_LEFT, TOKEN_BRACE_RIGHT, TOKEN_STRING = 99, TOKEN_NUMBER, TOKEN_ID, TOKEN_IF, TOKEN_ELSE, TOKEN_WHILE, TOKEN_FOR, TOKEN_DO, TOKEN_BREAK, TOKEN_CONTINUE, TOKEN_RETURN, TOKEN_ENUM, TOKEN_EXTERN, TOKEN_VOID = 128, TOKEN_INT, TOKEN_CHAR, }; enum { TYPE_VOID, TYPE_INT, TYPE_CHAR, TYPE_VOID_PTR = 16, TYPE_INT_PTR, TYPE_CHAR_PTR, TYPE_PTR_MASK = TYPE_VOID_PTR, TYPE_TOKEN_MASK = TOKEN_VOID, }; enum { KIND_TEMP, KIND_SCALAR, KIND_ARRAY, KIND_FUNCTION, }; enum { REG_ZERO, REG_RA, REG_SP, REG_GP, REG_TP, REG_T0, REG_T1, REG_T2, REG_FP, REG_S1, REG_A0, REG_A1, REG_A2, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7, REG_S2, REG_S3, REG_S4, REG_S5, REG_S6, REG_S7, REG_S8, REG_S9, REG_S10, REG_S11, REG_T3, REG_T4, REG_T5, REG_T6, }; char* reg_name(int reg) { // special begin if (reg == REG_ZERO) return "zero"; if (reg == REG_RA) return "ra"; if (reg == REG_SP) return "sp"; if (reg == REG_GP) return "gp"; if (reg == REG_TP) return "tp"; if (reg == REG_T0) return "t0"; if (reg == REG_T1) return "t1"; if (reg == REG_T2) return "t2"; if (reg == REG_FP) return "fp"; if (reg == REG_S1) return "s1"; if (reg == REG_A0) return "a0"; if (reg == REG_A1) return "a1"; if (reg == REG_A2) return "a2"; if (reg == REG_A3) return "a3"; if (reg == REG_A4) return "a4"; if (reg == REG_A5) return "a5"; if (reg == REG_A6) return "a6"; if (reg == REG_A7) return "a7"; // allocation begin if (reg == REG_S2) return "s2"; if (reg == REG_S3) return "s3"; if (reg == REG_S4) return "s4"; if (reg == REG_S5) return "s5"; if (reg == REG_S6) return "s6"; if (reg == REG_S7) return "s7"; if (reg == REG_S8) return "s8"; if (reg == REG_S9) return "s9"; if (reg == REG_S10) return "s10"; if (reg == REG_S11) return "s11"; if (reg == REG_T3) return "t3"; if (reg == REG_T4) return "t4"; if (reg == REG_T5) return "t5"; if (reg == REG_T6) return "t6"; // overflow begin return 0; } // lexer int streq(char* s1, char* s2) { while (*s1 && *s2 && *s1 == *s2) { s1++; s2++; } return *s1 == *s2; } int is_digit(int ch) { return '0' <= ch && ch <= '9'; } int is_id_start(int ch) { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'; } int is_id_cont(int ch) { return is_id_start(ch) || is_digit(ch); } int parse_int(int ch) { int num = ch - '0'; while (is_digit(ch = getchar())) { num *= 10; num += ch - '0'; } ungetchar(ch); return num; } int get_escaped_char() { int ch = getchar(); if (ch == 'n') { ch = '\n'; } else if (ch == 't') { ch = '\t'; } else if (ch == 'r') { ch = '\r'; } else if (ch == '0') { ch = '\0'; } else if (ch == '\\') { ch = '\\'; } else if (ch == '\'') { ch = '\''; } else if (ch == '\"') { ch = '\"'; } else { fprintf(stderr, "unexpected escaped character: '\\%c'\n", ch); exit(1); } return ch; } int token_state; int token_type; int token_data; char string_table[STRING_TABLE_SIZE]; int string_offset; int string_lut[STRING_LUT_SIZE]; int string_lut_size; int parse_string() { int offset = string_offset; int ch; while ((ch = getchar()) != '"') { if (ch == -1 || ch == '\n') { fprintf(stderr, "expecting '\"'\n"); exit(1); } if (ch == '\\') { ch = get_escaped_char(); } string_table[string_offset++] = ch; } string_table[string_offset++] = 0; string_lut[string_lut_size] = offset; return string_lut_size++; } void rewind_string(int new_data) { string_offset = string_lut[token_data]; token_data = new_data; --string_lut_size; } void dedup_string() { int last_string = string_lut_size - 1; char* latest = string_table + string_lut[last_string]; for (int i = 0; i < last_string; i++) { char* candidate = string_table + string_lut[i]; if (streq(candidate, latest)) { rewind_string(i); return; } } } char id_table[ID_TABLE_SIZE]; int id_offset; int id_lut[ID_LUT_SIZE]; int id_lut_size; int parse_id(int ch) { int offset = id_offset; id_table[id_offset++] = ch; while (is_id_cont(ch = getchar())) { id_table[id_offset++] = ch; } ungetchar(ch); id_table[id_offset++] = 0; id_lut[id_lut_size] = offset; return id_lut_size++; } void rewind_id(int new_data) { id_offset = id_lut[token_data]; token_data = new_data; --id_lut_size; } void dedup_id() { int last_id = id_lut_size - 1; char* latest = id_table + id_lut[last_id]; for (int i = 0; i < last_id; i++) { char* candidate = id_table + id_lut[i]; if (streq(candidate, latest)) { rewind_id(i); return; } } } void parse_id_like(int ch) { token_type = TOKEN_ID; token_data = parse_id(ch); char* id = id_table + id_lut[token_data]; if (streq(id, "int")) { token_type = TOKEN_INT; } else if (streq(id, "if")) { token_type = TOKEN_IF; } else if (streq(id, "else")) { token_type = TOKEN_ELSE; } else if (streq(id, "while")) { token_type = TOKEN_WHILE; } else if (streq(id, "break")) { token_type = TOKEN_BREAK; } else if (streq(id, "continue")) { token_type = TOKEN_CONTINUE; } else if (streq(id, "return")) { token_type = TOKEN_RETURN; } else if (streq(id, "void")) { token_type = TOKEN_VOID; } else if (streq(id, "char")) { token_type = TOKEN_CHAR; } else if (streq(id, "for")) { token_type = TOKEN_FOR; } else if (streq(id, "do")) { token_type = TOKEN_DO; } else if (streq(id, "extern")) { token_type = TOKEN_EXTERN; } else if (streq(id, "enum")) { token_type = TOKEN_ENUM; } if (token_type != TOKEN_ID) { rewind_id(0); } else { dedup_id(); } } void unget_token() { token_state = 1; } void next_token() { if (token_state) { token_state = 0; return; } int ch = getchar(); while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { ch = getchar(); } if (ch == -1) { token_type = TOKEN_EOF; } else if (ch == '(') { token_type = TOKEN_PAREN_LEFT; } else if (ch == ')') { token_type = TOKEN_PAREN_RIGHT; } else if (ch == '[') { token_type = TOKEN_BRACKET_LEFT; } else if (ch == ']') { token_type = TOKEN_BRACKET_RIGHT; } else if (ch == '{') { token_type = TOKEN_BRACE_LEFT; } else if (ch == '}') { token_type = TOKEN_BRACE_RIGHT; } else if (ch == '+') { int ch2 = getchar(); if (ch2 == '+') { token_type = TOKEN_INC; } else if (ch2 == '=') { token_type = TOKEN_ADD_ASSIGN; } else { ungetchar(ch2); token_type = TOKEN_ADD; } } else if (ch == '-') { int ch2 = getchar(); if (ch2 == '-') { token_type = TOKEN_DEC; } else if (ch2 == '=') { token_type = TOKEN_SUB_ASSIGN; } else { ungetchar(ch2); token_type = TOKEN_SUB; } } else if (ch == '*') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_MUL_ASSIGN; } else { ungetchar(ch2); token_type = TOKEN_MUL; } } else if (ch == '/') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_DIV_ASSIGN; } else if (ch2 == '/') { do ch = getchar(); while (ch != -1 && ch != '\n'); next_token(); return; } else if (ch2 == '*') { while (1) { ch = getchar(); if (ch == -1) { fprintf(stderr, "expecting '*/'\n"); exit(1); } if (ch == '*') { ch = getchar(); if (ch == '/') { break; } } } next_token(); return; } else { ungetchar(ch2); token_type = TOKEN_DIV; } } else if (ch == '%') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_REM_ASSIGN; } else { ungetchar(ch2); token_type = TOKEN_REM; } } else if (ch == ';') { token_type = TOKEN_SEMICOLON; } else if (ch == '?') { token_type = TOKEN_QUESTION; } else if (ch == ':') { token_type = TOKEN_COLON; } else if (ch == ',') { token_type = TOKEN_COMMA; } else if (ch == '<') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_LE; } else if (ch2 == '<') { int ch3 = getchar(); if (ch3 == '=') { token_type = TOKEN_LSHIFT_ASSIGN; } else { ungetchar(ch3); token_type = TOKEN_LSHIFT; } } else { ungetchar(ch2); token_type = TOKEN_LT; } } else if (ch == '>') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_GE; } else if (ch2 == '>') { int ch3 = getchar(); if (ch3 == '=') { token_type = TOKEN_RSHIFT_ASSIGN; } else { ungetchar(ch3); token_type = TOKEN_RSHIFT; } } else { ungetchar(ch2); token_type = TOKEN_GT; } } else if (ch == '=') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_EQ; } else { ungetchar(ch2); token_type = TOKEN_ASSIGN; } } else if (ch == '!') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_NE; } else { ungetchar(ch2); token_type = TOKEN_NOT; } } else if (ch == '&') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_AND_ASSIGN; } else if (ch2 == '&') { token_type = TOKEN_LAND; } else { ungetchar(ch2); token_type = TOKEN_AND; } } else if (ch == '|') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_OR_ASSIGN; } else if (ch2 == '|') { token_type = TOKEN_LOR; } else { ungetchar(ch2); token_type = TOKEN_OR; } } else if (ch == '^') { int ch2 = getchar(); if (ch2 == '=') { token_type = TOKEN_XOR_ASSIGN; } else { ungetchar(ch2); token_type = TOKEN_XOR; } } else if (ch == '~') { token_type = TOKEN_INV; } else if (ch == '\'') { token_type = TOKEN_NUMBER; token_data = getchar(); if (token_data == '\\') { token_data = get_escaped_char(); } if (getchar() != '\'') { fprintf(stderr, "expecting '\n"); exit(1); } } else if (ch == '"') { token_type = TOKEN_STRING; token_data = parse_string(); dedup_string(); } else if (ch == '.') { token_type = 0; if (getchar() == '.') { if (getchar() == '.') { token_type = TOKEN_ELLIPSIS; } } if (token_type != TOKEN_ELLIPSIS) { fprintf(stderr, "expecting '...'\n"); exit(1); } } else if (is_digit(ch)) { token_type = TOKEN_NUMBER; token_data = parse_int(ch); } else if (is_id_start(ch)) { parse_id_like(ch); } else { fprintf(stderr, "unexpected character: %c(%d)\n", ch, ch); exit(1); } // debug info if (0) { fprintf(stderr, "token: %d\n", token_type); if (token_type == TOKEN_ID) { char* name = id_table + id_lut[token_data]; fprintf(stderr, " id: %s\n", name); } else if (token_type == TOKEN_NUMBER) { fprintf(stderr, " number: %d\n", token_data); } } } void expect_token(int expected_type) { next_token(); if (token_type != expected_type) { fprintf(stderr, "unexpected token: %d, should be %d\n", token_type, expected_type); exit(1); } } int parse_type() { if (token_type == TOKEN_INT || token_type == TOKEN_CHAR || token_type == TOKEN_VOID) { int type = token_type & ~TYPE_TOKEN_MASK; next_token(); if (token_type == TOKEN_MUL) { next_token(); type |= TYPE_PTR_MASK; } unget_token(); return type; } return -1; } // assembly context // use id as index int local_table[ID_LUT_SIZE]; // id -> local id // use local id as index int next_local_id = 1; int max_local_id = 1; int local_kind[LOCAL_SIZE]; int local_type[LOCAL_SIZE]; // use id as index int global_kind[ID_LUT_SIZE]; int global_type[ID_LUT_SIZE]; // use reg id as index int next_reg_id = REG_S2; int max_reg_id = REG_S2; int reg_type[REG_SIZE]; char indirection[REG_SIZE]; int overflow[REG_SIZE]; // reg -> local id // use id as index int const_table[ID_LUT_SIZE]; // id -> value char is_const[ID_LUT_SIZE]; int expect_const() { next_token(); int value = 1; if (token_type == TOKEN_ADD) { next_token(); } else if (token_type == TOKEN_SUB) { value = -1; next_token(); } if (token_type == TOKEN_NUMBER) { value *= token_data; } else if (token_type == TOKEN_ID && !local_table[token_data] && is_const[token_data]) { value *= const_table[token_data]; } else { fprintf(stderr, "expecting a constant\n"); exit(1); } return value; } int expect_array_size() { int size = expect_const(); if (size <= 0) { fprintf(stderr, "array size must be positive\n"); exit(1); } return size; } void reset_reg() { next_reg_id = REG_S2; for (int i = 0; i < REG_SIZE; ++i) { reg_type[i] = TYPE_VOID; indirection[i] = 0; overflow[i] = 0; } reg_type[REG_ZERO] = TYPE_INT; } void reset_local_table() { for (int i = 0; i < ID_LUT_SIZE; ++i) { local_table[i] = 0; } } void reset_local() { next_local_id = 1; max_local_id = 1; max_reg_id = REG_S2; for (int i = 0; i < LOCAL_SIZE; ++i) { local_kind[i] = KIND_TEMP; local_type[i] = TYPE_VOID; } reset_reg(); } void reset_temp() { while (next_local_id > 1 && local_kind[next_local_id - 1] == KIND_TEMP) { --next_local_id; } reset_reg(); } int next_local_slot(int type) { int slot = next_local_id++; local_type[slot] = type; if (next_local_id > max_local_id) { max_local_id = next_local_id; } return slot; } int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; int slot = next_local_slot(type); local_kind[slot] = KIND_SCALAR; return local_table[id] = slot; } int array_size_of(int type, int size) { return type == TYPE_CHAR ? size : 4 * size; } int declare_local_array(int id, int type, int size) { if (local_table[id] != 0) return local_table[id]; int slot = next_local_slot(type); int array_size = array_size_of(type, size); int slot_size = (array_size + 7) / 8; local_kind[slot] = KIND_ARRAY; for (int i = 1; i < slot_size; ++i) local_kind[next_local_slot(type)] = KIND_ARRAY; return local_table[id] = slot; } void declare_global(int id, int kind, int type) { global_kind[id] = kind; global_type[id] = type; } int is_overflow(int reg) { return reg > REG_T6; } int next_reg(int type) { int reg = next_reg_id++; if (is_overflow(reg)) { int slot = next_local_slot(type); local_kind[slot] = KIND_TEMP; overflow[reg] = slot; } reg_type[reg] = type; if (next_reg_id > max_reg_id) { max_reg_id = next_reg_id; } return reg; } // prolog & epilog helpers int check_itype_immediate(int value) { return value >= -2048 && value <= 2047; } void asm_ld(char* rd, int imm, char* rs) { if (check_itype_immediate(imm)) { printf(" ld %s, %d(%s)\n", rd, imm, rs); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs); printf(" ld %s, 0(t0)\n", rd); } } void asm_sd(char* rs1, int imm, char* rs2) { if (check_itype_immediate(imm)) { printf(" sd %s, %d(%s)\n", rs1, imm, rs2); } else { printf(" li t0, %d\n", imm); printf(" add t0, %s, t0\n", rs2); printf(" sd %s, 0(t0)\n", rs1); } } void asm_addi(char* rd, char* rs, int imm) { if (check_itype_immediate(imm)) { printf(" addi %s, %s, %d\n", rd, rs, imm); } else { printf(" li t0, %d\n", imm); printf(" add %s, %s, t0\n", rd, rs); } } // assembly helpers char* load_op_of_type(int type) { if (type & TYPE_PTR_MASK) { return "ld"; } else if (type == TYPE_CHAR) { return "lb"; } else { // int return "lw"; } } char* store_op_of_type(int type) { if (type & TYPE_PTR_MASK) { return "sd"; } else if (type == TYPE_CHAR) { return "sb"; } else { // int return "sw"; } } // address loaders // rd must be one of t0, t1, t2 void load_local_address(int rd, int slot_id) { int offset = slot_id * 8 - 8; char* rd_name = reg_name(rd); if (check_itype_immediate(offset)) { printf(" addi %s, sp, %d\n", rd_name, offset); } else { printf(" li %s, %d\n", rd_name, offset); printf(" add %s, sp, %s\n", rd_name, rd_name); } } // load a non-trivial register into trivial one void load(int rd, int rs) { char* op = load_op_of_type(reg_type[rs]); char* rd_name = reg_name(rd); if (is_overflow(rs)) { load_local_address(rd, overflow[rs]); if (indirection[rs]) { printf(" ld %s, 0(%s)\n", rd_name, rd_name); } rs = rd; } printf(" %s %s, 0(%s) # load non-trivial register\n", op, rd_name, reg_name(rs)); } // store a trivial register into a non-trivial one void store(char* rs, int reg) { char* op = store_op_of_type(reg_type[reg]); if (is_overflow(reg)) { load_local_address(REG_T2, overflow[reg]); if (indirection[reg]) { printf(" ld t2, 0(t2)\n"); } reg = REG_T2; } printf(" %s %s, 0(%s) # store non-trivial register\n", op, rs, reg_name(reg)); } int is_nontrivial(int reg) { return is_overflow(reg) || indirection[reg]; } char* trivialize(int rs, int t) { if (is_nontrivial(rs)) { load(t, rs); return reg_name(t); } return reg_name(rs); } void _asm_r(char* op, int rd, int rs1) { char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, %s\n", op, rd_name, rs1_name); if (is_nontrivial(rd)) { store("t0", rd); } } void asm_mv(int rd, int rs1) { char* rs1_name = trivialize(rs1, REG_T0); if (is_nontrivial(rd)) { store(rs1_name, rd); } else { char* rd_name = reg_name(rd); if (!streq(rd_name, rs1_name)) printf(" mv %s, %s\n", rd_name, rs1_name); } } void _asm_rr(char* op, int rd, int rs1, int rs2) { char* rd_name = reg_name(rd); char* rs1_name = trivialize(rs1, REG_T0); char* rs2_name = trivialize(rs2, REG_T1); if (is_nontrivial(rd)) rd_name = "t0"; printf(" %s %s, %s, %s\n", op, rd_name, rs1_name, rs2_name); if (is_nontrivial(rd)) { store("t0", rd); } } void _asm_ri(char* op, int rd, int rs1, int imm) { char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, %s, %d\n", op, rd_name, rs1_name, imm); if (is_nontrivial(rd)) { store("t0", rd); } } void asm_branch(char* op, int rs1, int label) { char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, L%d\n", op, rs1_name, label); } void _asm_i(char* op, int rd, char* prefix1, char* prefix2, int imm) { char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; printf(" %s %s, %s%s%d\n", op, rd_name, prefix1, prefix2, imm); if (is_nontrivial(rd)) { store("t0", rd); } } int is_not_reusable(int rs1, int expected_type) { return indirection[rs1] || reg_type[rs1] != expected_type || rs1 == REG_ZERO; } int asm_r(int type, char* op, int rs1) { int rd = rs1; if (is_not_reusable(rs1, type)) rd = next_reg(type); _asm_r(op, rd, rs1); return rd; } int asm_rr(int type, char* op, int rs1, int rs2) { int rd = rs1; if (is_not_reusable(rs1, type)) rd = rs2; if (is_not_reusable(rs2, type)) rd = next_reg(type); _asm_rr(op, rd, rs1, rs2); return rd; } void store_into_local(int rs1, int slot) { char* rs1_name = trivialize(rs1, REG_T0); load_local_address(REG_T2, slot); printf(" %s %s, 0(t2)\n", store_op_of_type(local_type[slot]), rs1_name); } int materialize_address(int rd, int type, int kind) { if (kind == KIND_ARRAY) { type |= TYPE_PTR_MASK; } reg_type[rd] = type; indirection[rd] = kind == KIND_SCALAR; return rd; } int lookup_from_slot(int slot) { int rd = next_reg(TYPE_VOID_PTR); if (is_nontrivial(rd)) { load_local_address(REG_T0, slot); asm_mv(rd, REG_T0); } else { load_local_address(rd, slot); } return materialize_address(rd, local_type[slot], local_kind[slot]); } int load_imm(int imm) { if (imm == 0) return REG_ZERO; int reg = next_reg(TYPE_INT); _asm_i("li", reg, "", "", imm); return reg; } int lookup(int id) { if (local_table[id]) { return lookup_from_slot(local_table[id]); } if (is_const[id]) { return load_imm(const_table[id]); } char* name = id_table + id_lut[id]; if (global_kind[id]) { if (global_kind[id] == KIND_FUNCTION) { fprintf(stderr, "function name must not appear outside function call: %s\n", name); exit(1); } int rd = next_reg(TYPE_VOID_PTR); _asm_i("la", rd, name, " # id: ", id); return materialize_address(rd, global_type[id], global_kind[id]); } fprintf(stderr, "unresolved identifier: %s\n", name); exit(1); } int asm_r_arith(char* op, int rs1) { if (reg_type[rs1] & TYPE_PTR_MASK) { fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_r(TYPE_INT, op, rs1); } int asm_rr_arith(char* op, int rs1, int rs2) { if (reg_type[rs1] & TYPE_PTR_MASK || reg_type[rs2] & TYPE_PTR_MASK) { fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_rr(TYPE_INT, op, rs1, rs2); } int asm_rr_cmp(char* op, int rs1, int rs2) { // since NULL is virtually 0, it is considered a valid example of a pointer comparing with an integer return asm_rr(TYPE_INT, op, rs1, rs2); } void asm_beqz(int rs1, int label) { asm_branch("beqz", rs1, label); } void asm_bnez(int rs1, int label) { asm_branch("bnez", rs1, label); } void asm_j(int label) { printf(" j L%d\n", label); } int next_label_id = 0; int next_label() { return next_label_id++; } int asm_label(int label) { printf("L%d:\n", label); return label; } int break_label_stack[4096]; int cont_label_stack[4096]; int break_label_stack_size; int cont_label_stack_size; void asm_break() { if (break_label_stack_size == 0) { fprintf(stderr, "break without loop\n"); exit(1); } asm_j(break_label_stack[break_label_stack_size - 1]); } void asm_continue() { if (cont_label_stack_size == 0) { fprintf(stderr, "continue without loop\n"); exit(1); } asm_j(cont_label_stack[cont_label_stack_size - 1]); } void asm_push_label(int break_label, int cont_label) { break_label_stack[break_label_stack_size++] = break_label; cont_label_stack[cont_label_stack_size++] = cont_label; } void asm_pop_label() { --break_label_stack_size; --cont_label_stack_size; } int epilog_label; void asm_return() { asm_j(epilog_label); } int log_step_of(int type) { return type == TYPE_INT_PTR ? 2 : 0; } int step_of(int type) { return 1 << log_step_of(type); } int asm_add(int lhs, int rhs) { int type1 = reg_type[lhs] & TYPE_PTR_MASK; int type2 = reg_type[rhs] & TYPE_PTR_MASK; if (type1 != type2) { int ptr; int idx; if (type1) { ptr = lhs; idx = rhs; } else { ptr = rhs; idx = lhs; } int ptr_type = reg_type[ptr]; int offset = next_reg(TYPE_INT); _asm_ri("slli", offset, idx, log_step_of(ptr_type)); return asm_rr(ptr_type, "add", ptr, offset); } if (type1 && type2) { fprintf(stderr, "operands of addition cannot be both pointers\n"); exit(1); } return asm_rr(TYPE_INT, "add", lhs, rhs); } int asm_sub(int lhs, int rhs) { int lhs_type = reg_type[lhs]; int rhs_type = reg_type[rhs]; int type1 = lhs_type & TYPE_PTR_MASK; int type2 = rhs_type & TYPE_PTR_MASK; if (type1 && type2) { if (lhs_type != rhs_type) { fprintf(stderr, "pointer type mismatch\n"); exit(1); } int diff = asm_rr(TYPE_INT, "sub", lhs, rhs); _asm_ri("srai", diff, diff, log_step_of(lhs_type)); return diff; } if (type1) { int neg = asm_r_arith("neg", rhs); return asm_add(lhs, neg); } return asm_rr_arith("sub", lhs, rhs); } int dereference(int reg) { if (indirection[reg]) { load(reg, reg); } else { indirection[reg] = 1; } reg_type[reg] = reg_type[reg] & ~TYPE_PTR_MASK; return reg; } int addressof(int reg) { if (indirection[reg] && !(reg_type[reg] & TYPE_PTR_MASK)) { reg_type[reg] = reg_type[reg] | TYPE_PTR_MASK; indirection[reg] = 0; } else { printf("cannot take address of this expression"); exit(1); } return reg; } // parser int parse_expr(); int parse_assign_expr(); int parse_function_call(int id) { char* name = id_table + id_lut[id]; if (global_kind[id] != KIND_FUNCTION) { fprintf(stderr, "not a function name: %s\n", name); exit(1); } int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } unget_token(); if (arg >= 8) { fprintf(stderr, "too many arguments\n"); exit(1); } args[arg++] = parse_assign_expr(); next_token(); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { fprintf(stderr, "expecting ',' or ')'\n"); exit(1); } } for (int i = 0; i < arg; ++i) { asm_mv(i + REG_A0, args[i]); } for (int i = REG_T3; i <= REG_T6; ++i) { if (i < max_reg_id) { asm_sd(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); } } printf(" call %s\n", name); for (int i = REG_T3; i <= REG_T6; ++i) { if (i < max_reg_id) { asm_ld(reg_name(i), (REG_S2 - i) * 8 - 24, "fp"); } } int type = global_type[id]; if (type != TYPE_VOID) { int rd = next_reg(type); asm_mv(rd, REG_A0); return rd; } return REG_ZERO; } int parse_primary_expr() { next_token(); if (token_type == TOKEN_NUMBER) { return load_imm(token_data); } else if (token_type == TOKEN_ID) { next_token(); if (token_type == TOKEN_PAREN_LEFT) { return parse_function_call(token_data); } unget_token(); return lookup(token_data); } else if (token_type == TOKEN_STRING) { int reg = next_reg(TYPE_CHAR_PTR); _asm_i("la", reg, ".LC", "", token_data); return reg; } else if (token_type == TOKEN_PAREN_LEFT) { int reg = parse_expr(); expect_token(TOKEN_PAREN_RIGHT); return reg; } else { fprintf(stderr, "unexpected token in primary expression: %d\n", token_type); exit(1); } } int parse_postfix_expr() { int lhs = parse_primary_expr(); while (1) { next_token(); if (token_type == TOKEN_INC) { int type = reg_type[lhs]; int reg = next_reg(type); asm_mv(reg, lhs); _asm_ri("addi", lhs, lhs, step_of(type)); lhs = reg; } else if (token_type == TOKEN_DEC) { int type = reg_type[lhs]; int reg = next_reg(type); asm_mv(reg, lhs); _asm_ri("addi", lhs, lhs, -step_of(type)); lhs = reg; } else if (token_type == TOKEN_BRACKET_LEFT) { int rhs = parse_expr(); expect_token(TOKEN_BRACKET_RIGHT); lhs = dereference(asm_add(lhs, rhs)); } else { unget_token(); break; } } return lhs; } int parse_prefix_expr() { next_token(); if (token_type == TOKEN_AND) { int reg = parse_prefix_expr(); int type = reg_type[reg]; if (type & TYPE_PTR_MASK) { fprintf(stderr, "cannot take address of a pointer\n"); exit(1); } return addressof(reg); } else if (token_type == TOKEN_MUL) { int reg = parse_prefix_expr(); int type = reg_type[reg]; if (!(type & TYPE_PTR_MASK)) { fprintf(stderr, "cannot dereference a non-pointer\n"); exit(1); } if (type == TYPE_VOID_PTR) { fprintf(stderr, "cannot dereference void pointer\n"); exit(1); } return dereference(reg); } else if (token_type == TOKEN_ADD) { return parse_prefix_expr(); } else if (token_type == TOKEN_SUB) { int reg = parse_prefix_expr(); return asm_r_arith("neg", reg); } else if (token_type == TOKEN_INV) { int reg = parse_prefix_expr(); return asm_r_arith("not", reg); } else if (token_type == TOKEN_NOT) { int reg = parse_prefix_expr(); return asm_r(TYPE_INT, "seqz", reg); } else if (token_type == TOKEN_INC) { int reg = parse_prefix_expr(); _asm_ri("addi", reg, reg, step_of(reg_type[reg])); return reg; } else if (token_type == TOKEN_DEC) { int reg = parse_prefix_expr(); _asm_ri("addi", reg, reg, -step_of(reg_type[reg])); return reg; } else { unget_token(); return parse_postfix_expr(); } } int parse_mul_expr() { int lhs = parse_prefix_expr(); while (1) { next_token(); if (token_type == TOKEN_MUL) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("mul", lhs, rhs); } else if (token_type == TOKEN_DIV) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("div", lhs, rhs); } else if (token_type == TOKEN_REM) { int rhs = parse_prefix_expr(); lhs = asm_rr_arith("rem", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_add_expr() { int lhs = parse_mul_expr(); while (1) { next_token(); if (token_type == TOKEN_ADD) { int rhs = parse_mul_expr(); lhs = asm_add(lhs, rhs); } else if (token_type == TOKEN_SUB) { int rhs = parse_mul_expr(); lhs = asm_sub(lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_shift_expr() { int lhs = parse_add_expr(); while (1) { next_token(); if (token_type == TOKEN_LSHIFT) { int rhs = parse_add_expr(); lhs = asm_rr_arith("sll", lhs, rhs); } else if (token_type == TOKEN_RSHIFT) { int rhs = parse_add_expr(); lhs = asm_rr_arith("sra", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_cmp_expr() { int lhs = parse_shift_expr(); while (1) { next_token(); if (token_type == TOKEN_LT) { int rhs = parse_shift_expr(); lhs = asm_rr_cmp("slt", lhs, rhs); } else if (token_type == TOKEN_GT) { int rhs = parse_shift_expr(); lhs = asm_rr_cmp("sgt", lhs, rhs); } else if (token_type == TOKEN_LE) { int rhs = parse_shift_expr(); int sgt = asm_rr_cmp("sgt", lhs, rhs); lhs = asm_r(TYPE_INT, "seqz", sgt); } else if (token_type == TOKEN_GE) { int rhs = parse_shift_expr(); int slt = asm_rr_cmp("slt", lhs, rhs); lhs = asm_r(TYPE_INT, "seqz", slt); } else { unget_token(); break; } } return lhs; } int parse_eq_expr() { int lhs = parse_cmp_expr(); while (1) { next_token(); if (token_type == TOKEN_EQ) { int rhs = parse_cmp_expr(); int xor = asm_rr_cmp("xor", lhs, rhs); lhs = asm_r(TYPE_INT, "seqz", xor); } else if (token_type == TOKEN_NE) { int rhs = parse_cmp_expr(); int xor = asm_rr_cmp("xor", lhs, rhs); lhs = asm_r(TYPE_INT, "snez", xor); } else { unget_token(); break; } } return lhs; } int parse_bitwise_and_expr() { int lhs = parse_eq_expr(); while (1) { next_token(); if (token_type == TOKEN_AND) { int rhs = parse_eq_expr(); lhs = asm_rr_arith("and", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_xor_expr() { int lhs = parse_bitwise_and_expr(); while (1) { next_token(); if (token_type == TOKEN_XOR) { int rhs = parse_bitwise_and_expr(); lhs = asm_rr_arith("xor", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_bitwise_or_expr() { int lhs = parse_bitwise_xor_expr(); while (1) { next_token(); if (token_type == TOKEN_OR) { int rhs = parse_bitwise_xor_expr(); lhs = asm_rr_arith("or", lhs, rhs); } else { unget_token(); break; } } return lhs; } int parse_logical_and_expr() { int lhs = parse_bitwise_or_expr(); int logical = 0; int label; int result; while (1) { next_token(); if (token_type == TOKEN_LAND) { if (!logical) { logical = 1; label = next_label(); result = next_reg(TYPE_INT); _asm_r("snez", result, lhs); } asm_beqz(result, label); int rhs = parse_bitwise_or_expr(); _asm_r("snez", result, rhs); } else { unget_token(); break; } } if (logical) { asm_label(label); return result; } return lhs; } int parse_logical_or_expr() { int lhs = parse_logical_and_expr(); int logical = 0; int label; int result; while (1) { next_token(); if (token_type == TOKEN_LOR) { if (!logical) { logical = 1; label = next_label(); result = next_reg(TYPE_INT); _asm_r("snez", result, lhs); } asm_bnez(result, label); int rhs = parse_logical_and_expr(); _asm_r("snez", result, rhs); } else { unget_token(); break; } } if (logical) { asm_label(label); return result; } return lhs; } int parse_conditional_expr() { int cond = parse_logical_or_expr(); next_token(); if (token_type == TOKEN_QUESTION) { int label1 = next_label(); int label2 = next_label(); asm_beqz(cond, label1); int lhs = parse_expr(); int result = next_reg(reg_type[lhs]); asm_mv(result, lhs); asm_j(label2); expect_token(TOKEN_COLON); asm_label(label1); int rhs = parse_conditional_expr(); if (reg_type[lhs] != reg_type[rhs]) { fprintf(stderr, "type mismatch in conditional expression\n"); exit(1); } asm_mv(result, rhs); asm_label(label2); return result; } else { unget_token(); return cond; } } int parse_assign_expr() { int lhs = parse_conditional_expr(); next_token(); if (token_type == TOKEN_ASSIGN) { int rhs = parse_assign_expr(); asm_mv(lhs, rhs); return lhs; } else if (token_type == TOKEN_ADD_ASSIGN) { int rhs = parse_assign_expr(); int sum = asm_add(lhs, rhs); asm_mv(lhs, sum); return lhs; } else if (token_type == TOKEN_SUB_ASSIGN) { int rhs = parse_assign_expr(); int diff = asm_sub(lhs, rhs); asm_mv(lhs, diff); return lhs; } else if (token_type == TOKEN_MUL_ASSIGN) { int rhs = parse_assign_expr(); int prod = asm_rr_arith("mul", lhs, rhs); asm_mv(lhs, prod); return lhs; } else if (token_type == TOKEN_DIV_ASSIGN) { int rhs = parse_assign_expr(); int quot = asm_rr_arith("div", lhs, rhs); asm_mv(lhs, quot); return lhs; } else if (token_type == TOKEN_REM_ASSIGN) { int rhs = parse_assign_expr(); int rem = asm_rr_arith("rem", lhs, rhs); asm_mv(lhs, rem); return lhs; } else if (token_type == TOKEN_LSHIFT_ASSIGN) { int rhs = parse_assign_expr(); int lshift = asm_rr_arith("sll", lhs, rhs); asm_mv(lhs, lshift); return lhs; } else if (token_type == TOKEN_RSHIFT_ASSIGN) { int rhs = parse_assign_expr(); int rshift = asm_rr_arith("sra", lhs, rhs); asm_mv(lhs, rshift); return lhs; } else if (token_type == TOKEN_AND_ASSIGN) { int rhs = parse_assign_expr(); int and = asm_rr_arith("and", lhs, rhs); asm_mv(lhs, and); return lhs; } else if (token_type == TOKEN_XOR_ASSIGN) { int rhs = parse_assign_expr(); int xor = asm_rr_arith("xor", lhs, rhs); asm_mv(lhs, xor); return lhs; } else if (token_type == TOKEN_OR_ASSIGN) { int rhs = parse_assign_expr(); int or = asm_rr_arith("or", lhs, rhs); asm_mv(lhs, or); return lhs; } else { unget_token(); return lhs; } } int parse_expr() { int lhs = parse_assign_expr(); while (1) { next_token(); if (token_type == TOKEN_COMMA) { int rhs = parse_assign_expr(); lhs = rhs; } else { unget_token(); break; } } return lhs; } void parse_local_variable(int type) { if (type == TYPE_VOID) { fprintf(stderr, "variable cannot be of void type\n"); exit(1); } expect_token(TOKEN_ID); int id = token_data; next_token(); if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { fprintf(stderr, "array of pointers is not supported\n"); exit(1); } int size = expect_array_size(); expect_token(TOKEN_BRACKET_RIGHT); declare_local_array(id, type, size); return; } int slot = declare_local(id, type); if (token_type == TOKEN_SEMICOLON) { unget_token(); return; } unget_token(); expect_token(TOKEN_ASSIGN); int reg = parse_expr(); store_into_local(reg, slot); } void parse_stmt(); void parse_if() { expect_token(TOKEN_PAREN_LEFT); int cond = parse_expr(); int label1 = next_label(); int label2 = next_label(); asm_beqz(cond, label1); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(label2); asm_label(label1); next_token(); if (token_type == TOKEN_ELSE) { parse_stmt(); } else { unget_token(); } asm_label(label2); } void parse_while() { expect_token(TOKEN_PAREN_LEFT); int break_label = next_label(); int cont_label = next_label(); asm_push_label(break_label, cont_label); asm_label(cont_label); int cond = parse_expr(); asm_beqz(cond, break_label); reset_temp(); expect_token(TOKEN_PAREN_RIGHT); parse_stmt(); asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_for() { expect_token(TOKEN_PAREN_LEFT); int cont_label = next_label(); int break_label = next_label(); int cond_label = next_label(); int body_label = next_label(); asm_push_label(break_label, cont_label); parse_stmt(); // init asm_label(cond_label); int cond = parse_expr(); asm_beqz(cond, break_label); asm_j(body_label); reset_temp(); expect_token(TOKEN_SEMICOLON); asm_label(cont_label); parse_expr(); // update reset_temp(); expect_token(TOKEN_PAREN_RIGHT); asm_j(cond_label); asm_label(body_label); parse_stmt(); // body asm_j(cont_label); asm_label(break_label); asm_pop_label(); } void parse_do_while() { int cont_label = next_label(); int break_label = next_label(); asm_push_label(break_label, cont_label); asm_label(cont_label); parse_stmt(); // body expect_token(TOKEN_WHILE); expect_token(TOKEN_PAREN_LEFT); int cond = parse_expr(); asm_bnez(cond, cont_label); expect_token(TOKEN_PAREN_RIGHT); asm_label(break_label); asm_pop_label(); } void parse_stmt() { next_token(); int decl_type; if (token_type == TOKEN_IF) { parse_if(); return; } else if (token_type == TOKEN_WHILE) { parse_while(); return; } else if (token_type == TOKEN_FOR) { parse_for(); return; } else if (token_type == TOKEN_DO) { parse_do_while(); } else if (token_type == TOKEN_BRACE_LEFT) { while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } return; } else if (token_type == TOKEN_RETURN) { next_token(); if (token_type == TOKEN_SEMICOLON) { asm_return(); return; } unget_token(); int rs1 = parse_expr(); asm_mv(REG_A0, rs1); asm_return(); } else if (token_type == TOKEN_BREAK) { asm_break(); } else if (token_type == TOKEN_CONTINUE) { asm_continue(); } else if (token_type == TOKEN_SEMICOLON) { unget_token(); } else if ((decl_type = parse_type()) >= 0) { parse_local_variable(decl_type); } else { unget_token(); parse_expr(); } expect_token(TOKEN_SEMICOLON); reset_temp(); } void parse_function(char* name) { reset_local(); int arg = 0; int args[8]; while (1) { next_token(); if (token_type == TOKEN_PAREN_RIGHT) { break; } if (token_type == TOKEN_ELLIPSIS) { expect_token(TOKEN_PAREN_RIGHT); break; } int arg_type = parse_type(); if (arg_type < 0 || arg_type == TYPE_VOID) { fprintf(stderr, "expecting a non-void argument type: %d\n", arg_type); exit(1); } expect_token(TOKEN_ID); int arg_name = token_data; next_token(); if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_BRACKET_RIGHT); next_token(); if (arg_type & TYPE_PTR_MASK) { fprintf(stderr, "array of pointers is not supported\n"); exit(1); } arg_type |= TYPE_PTR_MASK; } if (arg >= 8) { fprintf(stderr, "too many arguments\n"); exit(1); } args[arg++] = declare_local(token_data, arg_type); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { fprintf(stderr, "expecting ',' or ')'\n"); exit(1); } } next_token(); if (token_type == TOKEN_SEMICOLON) { reset_local_table(); return; } unget_token(); expect_token(TOKEN_BRACE_LEFT); printf(".text\n"); printf(".global %s\n", name); printf("%s:\n", name); int label = next_label(); int prolog_label = next_label(); epilog_label = next_label(); asm_j(prolog_label); asm_label(label); while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } unget_token(); parse_stmt(); } if (streq(name, "main")) { asm_mv(REG_A0, REG_ZERO); } asm_return(); int reg_used = max_reg_id - REG_S2; if (reg_used > 14) reg_used = 14; int frame_size = (max_local_id - 1 + reg_used + 2) * 8; if (reg_used > 10) reg_used = 10; if (frame_size % 16 != 0) { frame_size += 8; } // prolog asm_label(prolog_label); asm_addi("sp", "sp", -frame_size); asm_sd("ra", frame_size - 8, "sp"); asm_sd("fp", frame_size - 16, "sp"); for (int i = 0; i < reg_used; ++i) { int reg = REG_S2 + i; asm_sd(reg_name(reg), frame_size - 24 - i * 8, "sp"); } asm_addi("fp", "sp", frame_size); for (int i = 0; i < arg; ++i) { store_into_local(REG_A0 + i, args[i]); } asm_j(label); // epilog asm_label(epilog_label); asm_ld("ra", frame_size - 8, "sp"); asm_ld("fp", frame_size - 16, "sp"); for (int i = 0; i < reg_used; ++i) { int reg = REG_S2 + i; asm_ld(reg_name(reg), frame_size - 24 - i * 8, "sp"); } asm_addi("sp", "sp", frame_size); printf(" ret\n"); reset_local_table(); } void parse_global_variable(int id, char* name, int type) { printf(".data\n"); printf(".globl %s\n", name); printf(".align 5\n"); printf("%s:\n", name); if (token_type == TOKEN_ASSIGN) { printf(" .dword %d\n", expect_const()); } else if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { fprintf(stderr, "array of pointers is not supported\n"); exit(1); } int size = expect_array_size(); expect_token(TOKEN_BRACKET_RIGHT); int array_size = array_size_of(type, size); printf(" .zero %d\n", array_size); declare_global(id, KIND_ARRAY, type); } else { printf(" .zero %d\n", 8); unget_token(); } } void parse_global_declaration() { int external = 0; if (token_type == TOKEN_EXTERN) { external = 1; next_token(); } int type = parse_type(); if (type < 0) { fprintf(stderr, "expecting type for global declaration\n"); exit(1); } expect_token(TOKEN_ID); int id = token_data; char* name = id_table + id_lut[id]; next_token(); if (token_type == TOKEN_PAREN_LEFT) { declare_global(id, KIND_FUNCTION, type); parse_function(name); } else { if (type == TYPE_VOID) { fprintf(stderr, "variable cannot be of void type\n"); exit(1); } declare_global(id, KIND_SCALAR, type); if (external) { unget_token(); } else { parse_global_variable(id, name, type); } expect_token(TOKEN_SEMICOLON); } } void parse_enum() { expect_token(TOKEN_BRACE_LEFT); int value = 0; while (1) { next_token(); if (token_type == TOKEN_BRACE_RIGHT) { break; } if (token_type != TOKEN_ID) { fprintf(stderr, "expecting identifier in enum\n"); exit(1); } int id = token_data; next_token(); if (token_type == TOKEN_ASSIGN) { value = expect_const(); } else { unget_token(); } const_table[id] = value++; is_const[id] = 1; next_token(); if (token_type == TOKEN_COMMA) { // continue; } else if (token_type == TOKEN_BRACE_RIGHT) { break; } else { fprintf(stderr, "expecting ',' or '}'\n"); exit(1); } } expect_token(TOKEN_SEMICOLON); } void parse_top_level() { next_token(); if (token_type == TOKEN_EOF) return; if (token_type == TOKEN_ENUM) { parse_enum(); } else { parse_global_declaration(); } parse_top_level(); } void dump_string_table() { printf(".data\n"); for (int i = 0; i < string_lut_size; ++i) { printf(".LC%d: .string \"", i); char* p = string_table + string_lut[i]; int ch; while ((ch = *p++) != 0) { if (ch == '\n') { printf("\\n"); } else if (ch == '\t') { printf("\\t"); } else if (ch == '\r') { printf("\\r"); } else if (ch == '\\') { printf("\\\\"); } else if (ch == '\'') { printf("\\'"); } else if (ch == '\"') { printf("\\\""); } else { printf("%c", ch); } } printf("\"\n"); } } int main() { parse_top_level(); dump_string_table(); }