diff --git a/README.md b/README.md index e124a13..5c3b900 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # RVBTCC -2000 行的轻量级自举编译器。 +不到 2000 行的轻量级自举编译器。 - 旨在展示如何迅速编写一个自举编译器。 - 语法类似 C,输出 RISC-V 汇编。 @@ -96,7 +96,24 @@ $ sh boot.sh ### 关键字 -本语言包含的关键字即为支持的标量类型的关键字和流程控制的关键字,还有 `extern` 和 `enum`。 +本语言包含的关键字即为支持的标量类型的关键字和流程控制的关键字,还有 `const` 和 `extern`。 + +#### `const` 关键字 + +`const` 关键字可以在类型中使用,在大部分情况下会被直接忽略。支持它是为了更好兼容 C 程序。 + +但是当在出现 + +- 全局,标量(即不是数组)。 +- 类型为 `const int` 或 `const int const`。 +- 带有初始化。 +- 不是 `extern` 的。 + +的声明时,将会被解析为整数常量。 + +整数常量在使用的时候会被直接替换为对应的右值,失去作为全局变量左值的性质。 + +使用 `int const` 或 `int` 形式或添加 `extern` 可以避免这样的特殊处理。 #### `extern` 关键字 @@ -111,12 +128,6 @@ $ sh boot.sh - 不可以初始化。 - 不可是数组。 -#### `enum` 关键字 - -用于定义整数常量。enum 的名字必须省略,因此不能用于定义枚举类型。 - -整数常量可以用于数组大小、全局变量初始化等需要常量的地方。 - ### 支持以下运算符 | 运算符 | 含义 | 结合性 | @@ -158,12 +169,13 @@ $ sh boot.sh 编译过程中涉及的以下参数: - 符号表总长度、字符串表总长度 -- 符号数、字符串数、局部变量数、(虚拟)寄存器数 +- 符号数、字符串数、全局变量数、局部变量数 -不能超过源代码中指定的常数。 +不能超过源代码中指定的常数。如果有必要这些常数可以适度加大。 -- 目前源代码中的常数能够保证自举成功。如果有必要可以将它们适度加大。 -- 该设计保证了没有任何的动态内存分配。如果愿意,可以将它们改为 `malloc` 和 `free` 动态管理,本语言是完全支持的。 +目前源代码中的常数能够保证自举。 + +如果愿意,完全可以把程序中的各类表改为 `malloc` 和 `free` 动态管理,本语言是完全支持的。 ## 依赖 diff --git a/boot.c b/boot.c index 0b88404..eb71c13 100644 --- a/boot.c +++ b/boot.c @@ -1,212 +1,32 @@ /* * RVBTCC By Yaossg - * A lightweight bootstrapping compiler in 2000 lines. + * A lightweight bootstrapping compiler in less than 2000 lines. * * It aims to demonstrate how to write a bootstrapping compiler in no time. * Syntax is similar to C, output is RISC-V assembly. * Only dependent on some glibc functions for I/O. * Purely for educational purposes. Do not use in production. + * */ -// glibc dependency +// libc dependency extern void* stdin; extern void* stdout; extern void* stderr; -int printf(char* format, ...); +int printf(const char* format, ...); int getchar(); void exit(int status); -int fprintf(void* file, char* format, ...); +int fprintf(void* file, const char* format, ...); int ungetc(int ch, void* file); void ungetchar(int ch) { ungetc(ch, stdin); } -// limitations - -enum { - STRING_TABLE_SIZE = 65536, - STRING_LUT_SIZE = 4096, - ID_TABLE_SIZE = 65536, - ID_LUT_SIZE = 4096, - LOCAL_SIZE = 4096, - REG_SIZE = 4096, -}; - -// constants - -enum { - TOKEN_EOF, - TOKEN_SEMICOLON, - TOKEN_ADD, - TOKEN_SUB, - TOKEN_MUL, - TOKEN_DIV, - TOKEN_REM, - TOKEN_ASSIGN, - TOKEN_COMMA, - TOKEN_LSHIFT, - TOKEN_RSHIFT, - TOKEN_AND, - TOKEN_OR, - TOKEN_XOR, - TOKEN_COMPL, - TOKEN_NOT, - TOKEN_LAND, - TOKEN_LOR, - TOKEN_ELLIPSIS, - TOKEN_INC, - TOKEN_DEC, - TOKEN_ADD_ASSIGN, - TOKEN_SUB_ASSIGN, - TOKEN_MUL_ASSIGN, - TOKEN_DIV_ASSIGN, - TOKEN_REM_ASSIGN, - TOKEN_AND_ASSIGN, - TOKEN_OR_ASSIGN, - TOKEN_XOR_ASSIGN, - TOKEN_LSHIFT_ASSIGN, - TOKEN_RSHIFT_ASSIGN, - TOKEN_QUESTION, - TOKEN_COLON, - TOKEN_EQ, - TOKEN_NE, - TOKEN_LT, - TOKEN_GT, - TOKEN_LE, - TOKEN_GE, - - TOKEN_PAREN_LEFT = 50, - TOKEN_PAREN_RIGHT, - TOKEN_BRACKET_LEFT, - TOKEN_BRACKET_RIGHT, - TOKEN_BRACE_LEFT, - TOKEN_BRACE_RIGHT, - - TOKEN_STRING = 99, - TOKEN_NUMBER, - TOKEN_ID, - TOKEN_IF, - TOKEN_ELSE, - TOKEN_WHILE, - TOKEN_FOR, - TOKEN_DO, - TOKEN_BREAK, - TOKEN_CONTINUE, - TOKEN_RETURN, - TOKEN_ENUM, - TOKEN_EXTERN, - - TOKEN_VOID = 128, - TOKEN_INT, - TOKEN_CHAR, -}; - -enum { - TYPE_VOID, - TYPE_INT, - TYPE_CHAR, - TYPE_VOID_PTR = 16, - TYPE_INT_PTR, - TYPE_CHAR_PTR, - - TYPE_PTR_MASK = TYPE_VOID_PTR, - TYPE_TOKEN_MASK = TOKEN_VOID, -}; - -enum { - KIND_TEMP, - KIND_SCALAR, - KIND_ARRAY, - KIND_FUNCTION, -}; - -enum { - REG_ZERO, - REG_RA, - REG_SP, - REG_GP, - REG_TP, - REG_T0, - REG_T1, - REG_T2, - REG_FP, - REG_S1, - REG_A0, - REG_A1, - REG_A2, - REG_A3, - REG_A4, - REG_A5, - REG_A6, - REG_A7, - REG_S2, - REG_S3, - REG_S4, - REG_S5, - REG_S6, - REG_S7, - REG_S8, - REG_S9, - REG_S10, - REG_S11, - REG_T3, - REG_T4, - REG_T5, - REG_T6, -}; - -char* reg_name(int reg) { - // special begin - if (reg == REG_ZERO) return "zero"; - if (reg == REG_RA) return "ra"; - if (reg == REG_SP) return "sp"; - if (reg == REG_GP) return "gp"; - if (reg == REG_TP) return "tp"; - if (reg == REG_T0) return "t0"; - if (reg == REG_T1) return "t1"; - if (reg == REG_T2) return "t2"; - if (reg == REG_FP) return "fp"; - if (reg == REG_S1) return "s1"; - if (reg == REG_A0) return "a0"; - if (reg == REG_A1) return "a1"; - if (reg == REG_A2) return "a2"; - if (reg == REG_A3) return "a3"; - if (reg == REG_A4) return "a4"; - if (reg == REG_A5) return "a5"; - if (reg == REG_A6) return "a6"; - if (reg == REG_A7) return "a7"; - // allocation begin - if (reg == REG_S2) return "s2"; - if (reg == REG_S3) return "s3"; - if (reg == REG_S4) return "s4"; - if (reg == REG_S5) return "s5"; - if (reg == REG_S6) return "s6"; - if (reg == REG_S7) return "s7"; - if (reg == REG_S8) return "s8"; - if (reg == REG_S9) return "s9"; - if (reg == REG_S10) return "s10"; - if (reg == REG_S11) return "s11"; - if (reg == REG_T3) return "t3"; - if (reg == REG_T4) return "t4"; - if (reg == REG_T5) return "t5"; - if (reg == REG_T6) return "t6"; - // overflow begin - return 0; -} - // lexer -int streq(char* s1, char* s2) { - while (*s1 && *s2 && *s1 == *s2) { - s1++; - s2++; - } - return *s1 == *s2; -} - int is_digit(int ch) { return '0' <= ch && ch <= '9'; } @@ -219,6 +39,87 @@ int is_id_cont(int ch) { return is_id_start(ch) || is_digit(ch); } +int token_state; +int token_type; +int token_data; + +const int TOKEN_EOF = 0; +const int TOKEN_SEMICOLON = 1; +const int TOKEN_ADD = 2; +const int TOKEN_SUB = 3; +const int TOKEN_MUL = 4; +const int TOKEN_DIV = 5; +const int TOKEN_REM = 6; +const int TOKEN_ASSIGN = 7; +const int TOKEN_COMMA = 8; +const int TOKEN_DOT = 9; +const int TOKEN_LSHIFT = 10; +const int TOKEN_RSHIFT = 11; +const int TOKEN_AND = 12; +const int TOKEN_OR = 13; +const int TOKEN_XOR = 14; +const int TOKEN_COMPL = 15; +const int TOKEN_NOT = 16; +const int TOKEN_LAND = 17; +const int TOKEN_LOR = 18; +const int TOKEN_ELLIPSIS = 19; +const int TOKEN_INC = 20; +const int TOKEN_DEC = 21; +const int TOKEN_ADD_ASSIGN = 22; +const int TOKEN_SUB_ASSIGN = 23; +const int TOKEN_MUL_ASSIGN = 24; +const int TOKEN_DIV_ASSIGN = 25; +const int TOKEN_REM_ASSIGN = 26; +const int TOKEN_AND_ASSIGN = 27; +const int TOKEN_OR_ASSIGN = 28; +const int TOKEN_XOR_ASSIGN = 29; +const int TOKEN_LSHIFT_ASSIGN = 30; +const int TOKEN_RSHIFT_ASSIGN = 31; +const int TOKEN_QUESTION = 32; +const int TOKEN_COLON = 33; + +const int TOKEN_EQ = 40; +const int TOKEN_NE = 41; +const int TOKEN_LT = 42; +const int TOKEN_GT = 43; +const int TOKEN_LE = 44; +const int TOKEN_GE = 45; + +const int TOKEN_PAREN_LEFT = 50; +const int TOKEN_PAREN_RIGHT = 51; +const int TOKEN_BRACKET_LEFT = 52; +const int TOKEN_BRACKET_RIGHT = 53; +const int TOKEN_BRACE_LEFT = 54; +const int TOKEN_BRACE_RIGHT = 55; + +const int TOKEN_STRING = 99; +const int TOKEN_NUMBER = 100; +const int TOKEN_ID = 101; +const int TOKEN_IF = 102; +const int TOKEN_ELSE = 103; +const int TOKEN_WHILE = 104; +const int TOKEN_FOR = 105; +const int TOKEN_DO = 106; +const int TOKEN_BREAK = 107; +const int TOKEN_CONTINUE = 108; +const int TOKEN_RETURN = 109; + +const int TOKEN_EXTERN = 126; +const int TOKEN_CONST = 127; +const int TOKEN_VOID = 128; +const int TOKEN_INT = 129; +const int TOKEN_CHAR = 130; + +const int TYPE_VOID = 0; +const int TYPE_INT = 1; +const int TYPE_CHAR = 2; +const int TYPE_VOID_PTR = 16; +const int TYPE_INT_PTR = 17; +const int TYPE_CHAR_PTR = 18; + +const int TYPE_PTR_MASK = 16; +const int TYPE_TOKEN_MASK = 128; + int parse_int(int ch) { int num = ch - '0'; while (is_digit(ch = getchar())) { @@ -252,13 +153,17 @@ int get_escaped_char() { return ch; } -int token_state; -int token_type; -int token_data; +int streq(const char* s1, const char* s2) { + while (*s1 && *s2 && *s1 == *s2) { + s1++; + s2++; + } + return *s1 == *s2; +} -char string_table[STRING_TABLE_SIZE]; +char string_table[65536]; int string_offset; -int string_lut[STRING_LUT_SIZE]; +int string_lut[4096]; int string_lut_size; int parse_string() { int offset = string_offset; @@ -296,9 +201,9 @@ void dedup_string() { } } -char id_table[ID_TABLE_SIZE]; +char id_table[65536]; int id_offset; -int id_lut[ID_LUT_SIZE]; +int id_lut[4096]; int id_lut_size; int parse_id(int ch) { int offset = id_offset; @@ -350,6 +255,8 @@ void parse_id_like(int ch) { token_type = TOKEN_RETURN; } else if (streq(id, "void")) { token_type = TOKEN_VOID; + } else if (streq(id, "const")) { + token_type = TOKEN_CONST; } else if (streq(id, "char")) { token_type = TOKEN_CHAR; } else if (streq(id, "for")) { @@ -358,8 +265,6 @@ void parse_id_like(int ch) { token_type = TOKEN_DO; } else if (streq(id, "extern")) { token_type = TOKEN_EXTERN; - } else if (streq(id, "enum")) { - token_type = TOKEN_ENUM; } if (token_type != TOKEN_ID) { rewind_id(0); @@ -583,7 +488,7 @@ void next_token() { if (0) { fprintf(stderr, "token: %d\n", token_type); if (token_type == TOKEN_ID) { - char* name = id_table + id_lut[token_data]; + const char* name = id_table + id_lut[token_data]; fprintf(stderr, " id: %s\n", name); } else if (token_type == TOKEN_NUMBER) { fprintf(stderr, " number: %d\n", token_data); @@ -599,12 +504,21 @@ void expect_token(int expected_type) { } } +void ignore_const() { + if (token_type == TOKEN_CONST) { + next_token(); + } +} + int parse_type() { + ignore_const(); if (token_type == TOKEN_INT || token_type == TOKEN_CHAR || token_type == TOKEN_VOID) { int type = token_type & ~TYPE_TOKEN_MASK; next_token(); + ignore_const(); if (token_type == TOKEN_MUL) { next_token(); + ignore_const(); type |= TYPE_PTR_MASK; } unget_token(); @@ -613,47 +527,67 @@ int parse_type() { return -1; } -// assembly context +// asm -// use id as index -int local_table[ID_LUT_SIZE]; // id -> local id - -// use local id as index +int local_table[4096]; // id -> local id int next_local_id = 1; int max_local_id = 1; -int local_kind[LOCAL_SIZE]; -int local_type[LOCAL_SIZE]; -// use id as index -int global_kind[ID_LUT_SIZE]; -int global_type[ID_LUT_SIZE]; +const int MARKER_TEMP = 0; +const int MARKER_SCALAR = 1; +const int MARKER_ARRAY = 2; +const int MARKER_FUNCTION = 3; -// use reg id as index -int next_reg_id = REG_S2; -int max_reg_id = REG_S2; -int reg_type[REG_SIZE]; -char indirection[REG_SIZE]; -int overflow[REG_SIZE]; // reg -> local id +int local_marker[4096]; +int global_marker[4096]; +int local_type[4096]; +int global_type[4096]; -// use id as index -int const_table[ID_LUT_SIZE]; // id -> value -char is_const[ID_LUT_SIZE]; +int reg_type[4096]; +int next_reg_id = 18; +int max_reg_id = 18; +int indirection[4096]; +int overflow[4096]; -int expect_const() { - next_token(); - if (token_type == TOKEN_NUMBER) { - return token_data; - } - if (token_type == TOKEN_ID && !local_table[token_data] && is_const[token_data]) { - return const_table[token_data]; - } - fprintf(stderr, "expecting a constant\n"); - exit(1); -} +int const_table[4096]; // id -> value +int is_const[4096]; + +const int REG_ZERO = 0; +const int REG_RA = 1; +const int REG_SP = 2; +const int REG_GP = 3; +const int REG_TP = 4; +const int REG_T0 = 5; +const int REG_T1 = 6; +const int REG_T2 = 7; +const int REG_FP = 8; +const int REG_S1 = 9; +const int REG_A0 = 10; +const int REG_A1 = 11; +const int REG_A2 = 12; +const int REG_A3 = 13; +const int REG_A4 = 14; +const int REG_A5 = 15; +const int REG_A6 = 16; +const int REG_A7 = 17; +const int REG_S2 = 18; +const int REG_S3 = 19; +const int REG_S4 = 20; +const int REG_S5 = 21; +const int REG_S6 = 22; +const int REG_S7 = 23; +const int REG_S8 = 24; +const int REG_S9 = 25; +const int REG_S10 = 26; +const int REG_S11 = 27; +const int REG_T3 = 28; +const int REG_T4 = 29; +const int REG_T5 = 30; +const int REG_T6 = 31; void reset_reg() { next_reg_id = REG_S2; - for (int i = 0; i < REG_SIZE; ++i) { + for (int i = 0; i < 4096; ++i) { reg_type[i] = TYPE_VOID; indirection[i] = 0; overflow[i] = 0; @@ -661,25 +595,63 @@ void reset_reg() { reg_type[REG_ZERO] = TYPE_INT; } -void reset_local_table() { - for (int i = 0; i < ID_LUT_SIZE; ++i) { - local_table[i] = 0; - } +const char* reg_name(int reg) { + if (reg == 0) return "zero"; + if (reg == 1) return "ra"; + if (reg == 2) return "sp"; + if (reg == 3) return "gp"; + if (reg == 4) return "tp"; + if (reg == 5) return "t0"; + if (reg == 6) return "t1"; + if (reg == 7) return "t2"; + if (reg == 8) return "fp"; + // reserved begin + if (reg == 9) return "s1"; + if (reg == 10) return "a0"; + if (reg == 11) return "a1"; + if (reg == 12) return "a2"; + if (reg == 13) return "a3"; + if (reg == 14) return "a4"; + if (reg == 15) return "a5"; + if (reg == 16) return "a6"; + if (reg == 17) return "a7"; + // allocation begin + if (reg == 18) return "s2"; + if (reg == 19) return "s3"; + if (reg == 20) return "s4"; + if (reg == 21) return "s5"; + if (reg == 22) return "s6"; + if (reg == 23) return "s7"; + if (reg == 24) return "s8"; + if (reg == 25) return "s9"; + if (reg == 26) return "s10"; + if (reg == 27) return "s11"; + if (reg == 28) return "t3"; + if (reg == 29) return "t4"; + if (reg == 30) return "t5"; + if (reg == 31) return "t6"; + // overflow begin + return 0; +} + +int is_overflow(int reg) { + return reg > REG_T6; } void reset_local() { next_local_id = 1; max_local_id = 1; max_reg_id = REG_S2; - for (int i = 0; i < LOCAL_SIZE; ++i) { - local_kind[i] = KIND_TEMP; + for (int i = 0; i < 4096; ++i) { + local_table[i] = 0; + local_marker[i] = MARKER_TEMP; local_type[i] = TYPE_VOID; } reset_reg(); } void reset_temp() { - while (next_local_id > 1 && local_kind[next_local_id - 1] == KIND_TEMP) { + while (next_local_id > 1 && local_marker[next_local_id - 1] == MARKER_TEMP) { --next_local_id; } reset_reg(); @@ -697,7 +669,7 @@ int next_local_slot(int type) { int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; int slot = next_local_slot(type); - local_kind[slot] = KIND_SCALAR; + local_marker[slot] = MARKER_SCALAR; return local_table[id] = slot; } @@ -710,25 +682,21 @@ int declare_local_array(int id, int type, int size) { int slot = next_local_slot(type); int array_size = array_size_of(type, size); int slot_size = (array_size + 7) / 8; - local_kind[slot] = KIND_ARRAY; - for (int i = 1; i < slot_size; ++i) local_kind[next_local_slot(type)] = KIND_ARRAY; + local_marker[slot] = MARKER_ARRAY; + for (int i = 1; i < slot_size; ++i) local_marker[next_local_slot(type)] = MARKER_ARRAY; return local_table[id] = slot; } -void declare_global(int id, int kind, int type) { - global_kind[id] = kind; +void declare_global(int id, int marker, int type) { + global_marker[id] = marker; global_type[id] = type; } -int is_overflow(int reg) { - return reg > REG_T6; -} - int next_reg(int type) { int reg = next_reg_id++; if (is_overflow(reg)) { int slot = next_local_slot(type); - local_kind[slot] = KIND_TEMP; + local_marker[slot] = MARKER_TEMP; overflow[reg] = slot; } reg_type[reg] = type; @@ -745,7 +713,7 @@ int check_itype_immediate(int value) { return value >= -2048 && value <= 2047; } -void asm_ld(char* rd, int imm, char* rs) { +void asm_ld(const char* rd, int imm, const char* rs) { if (check_itype_immediate(imm)) { printf(" ld %s, %d(%s)\n", rd, imm, rs); } else { @@ -755,7 +723,7 @@ void asm_ld(char* rd, int imm, char* rs) { } } -void asm_sd(char* rs1, int imm, char* rs2) { +void asm_sd(const char* rs1, int imm, const char* rs2) { if (check_itype_immediate(imm)) { printf(" sd %s, %d(%s)\n", rs1, imm, rs2); } else { @@ -765,7 +733,7 @@ void asm_sd(char* rs1, int imm, char* rs2) { } } -void asm_addi(char* rd, char* rs, int imm) { +void asm_addi(const char* rd, const char* rs, int imm) { if (check_itype_immediate(imm)) { printf(" addi %s, %s, %d\n", rd, rs, imm); } else { @@ -776,7 +744,7 @@ void asm_addi(char* rd, char* rs, int imm) { // assembly helpers -char* load_op_of_type(int type) { +const char* load_op_of_type(int type) { if (type & TYPE_PTR_MASK) { return "ld"; } else if (type == TYPE_CHAR) { @@ -786,7 +754,7 @@ char* load_op_of_type(int type) { } } -char* store_op_of_type(int type) { +const char* store_op_of_type(int type) { if (type & TYPE_PTR_MASK) { return "sd"; } else if (type == TYPE_CHAR) { @@ -804,8 +772,8 @@ void load_local_address(int rd, int slot_id) { // load a non-trivial register into trivial one void load(int rd, int rs) { - char* op = load_op_of_type(reg_type[rs]); - char* rd_name = reg_name(rd); + const char* op = load_op_of_type(reg_type[rs]); + const char* rd_name = reg_name(rd); if (is_overflow(rs)) { load_local_address(rd, overflow[rs]); if (indirection[rs]) { @@ -817,8 +785,8 @@ void load(int rd, int rs) { } // store a trivial register into a non-trivial one -void store(char* rs, int reg) { - char* op = store_op_of_type(reg_type[reg]); +void store(const char* rs, int reg) { + const char* op = store_op_of_type(reg_type[reg]); if (is_overflow(reg)) { load_local_address(REG_T2, overflow[reg]); if (indirection[reg]) { @@ -833,7 +801,7 @@ int is_nontrivial(int reg) { return is_overflow(reg) || indirection[reg]; } -char* trivialize(int rs, int t) { +const char* trivialize(int rs, int t) { if (is_nontrivial(rs)) { load(t, rs); return reg_name(t); @@ -841,10 +809,10 @@ char* trivialize(int rs, int t) { return reg_name(rs); } -void _asm_r(char* op, int rd, int rs1) { - char* rd_name = reg_name(rd); +void _asm_r(const char* op, int rd, int rs1) { + const char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; - char* rs1_name = trivialize(rs1, REG_T0); + const char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, %s\n", op, rd_name, rs1_name); if (is_nontrivial(rd)) { store("t0", rd); @@ -852,20 +820,20 @@ void _asm_r(char* op, int rd, int rs1) { } void asm_mv(int rd, int rs1) { - char* rs1_name = trivialize(rs1, REG_T0); + const char* rs1_name = trivialize(rs1, REG_T0); if (is_nontrivial(rd)) { store(rs1_name, rd); } else { - char* rd_name = reg_name(rd); + const char* rd_name = reg_name(rd); if (!streq(rd_name, rs1_name)) printf(" mv %s, %s\n", rd_name, rs1_name); } } -void _asm_rr(char* op, int rd, int rs1, int rs2) { - char* rd_name = reg_name(rd); - char* rs1_name = trivialize(rs1, REG_T0); - char* rs2_name = trivialize(rs2, REG_T1); +void _asm_rr(const char* op, int rd, int rs1, int rs2) { + const char* rd_name = reg_name(rd); + const char* rs1_name = trivialize(rs1, REG_T0); + const char* rs2_name = trivialize(rs2, REG_T1); if (is_nontrivial(rd)) rd_name = "t0"; printf(" %s %s, %s, %s\n", op, rd_name, rs1_name, rs2_name); if (is_nontrivial(rd)) { @@ -873,23 +841,23 @@ void _asm_rr(char* op, int rd, int rs1, int rs2) { } } -void _asm_ri(char* op, int rd, int rs1, int imm) { - char* rd_name = reg_name(rd); +void _asm_ri(const char* op, int rd, int rs1, int imm) { + const char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; - char* rs1_name = trivialize(rs1, REG_T0); + const char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, %s, %d\n", op, rd_name, rs1_name, imm); if (is_nontrivial(rd)) { store("t0", rd); } } -void asm_branch(char* op, int rs1, int label) { - char* rs1_name = trivialize(rs1, REG_T0); +void asm_branch(const char* op, int rs1, int label) { + const char* rs1_name = trivialize(rs1, REG_T0); printf(" %s %s, L%d\n", op, rs1_name, label); } -void _asm_i(char* op, int rd, char* prefix1, char* prefix2, int imm) { - char* rd_name = reg_name(rd); +void _asm_i(const char* op, int rd, const char* prefix1, const char* prefix2, int imm) { + const char* rd_name = reg_name(rd); if (is_nontrivial(rd)) rd_name = "t0"; printf(" %s %s, %s%s%d\n", op, rd_name, prefix1, prefix2, imm); if (is_nontrivial(rd)) { @@ -901,14 +869,14 @@ int is_not_reusable(int rs1, int expected_type) { return indirection[rs1] || reg_type[rs1] != expected_type || rs1 == REG_ZERO; } -int asm_r(int type, char* op, int rs1) { +int asm_r(int type, const char* op, int rs1) { int rd = rs1; if (is_not_reusable(rs1, type)) rd = next_reg(type); _asm_r(op, rd, rs1); return rd; } -int asm_rr(int type, char* op, int rs1, int rs2) { +int asm_rr(int type, const char* op, int rs1, int rs2) { int rd = rs1; if (is_not_reusable(rs1, type)) rd = rs2; if (is_not_reusable(rs2, type)) rd = next_reg(type); @@ -917,17 +885,17 @@ int asm_rr(int type, char* op, int rs1, int rs2) { } void store_into_local(int rs1, int slot) { - char* rs1_name = trivialize(rs1, REG_T0); + const char* rs1_name = trivialize(rs1, REG_T0); load_local_address(REG_T2, slot); printf(" %s %s, 0(t2)\n", store_op_of_type(local_type[slot]), rs1_name); } -int materialize_address(int rd, int type, int kind) { - if (kind == KIND_ARRAY) { +int materialize_address(int rd, int type, int marker) { + if (marker == MARKER_ARRAY) { type |= TYPE_PTR_MASK; } reg_type[rd] = type; - indirection[rd] = kind == KIND_SCALAR; + indirection[rd] = marker == MARKER_SCALAR; return rd; } @@ -939,7 +907,7 @@ int lookup_from_slot(int slot) { } else { load_local_address(rd, slot); } - return materialize_address(rd, local_type[slot], local_kind[slot]); + return materialize_address(rd, local_type[slot], local_marker[slot]); } int load_imm(int imm) { @@ -956,21 +924,21 @@ int lookup(int id) { if (is_const[id]) { return load_imm(const_table[id]); } - char* name = id_table + id_lut[id]; - if (global_kind[id]) { - if (global_kind[id] == KIND_FUNCTION) { + const char* name = id_table + id_lut[id]; + if (global_marker[id]) { + if (global_marker[id] == MARKER_FUNCTION) { fprintf(stderr, "function name must not appear outside function call: %s\n", name); exit(1); } int rd = next_reg(TYPE_VOID_PTR); _asm_i("la", rd, name, " # id: ", id); - return materialize_address(rd, global_type[id], global_kind[id]); + return materialize_address(rd, global_type[id], global_marker[id]); } fprintf(stderr, "unresolved identifier: %s\n", name); exit(1); } -int asm_r_arith(char* op, int rs1) { +int asm_r_arith(const char* op, int rs1) { if (reg_type[rs1] & TYPE_PTR_MASK) { fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op); exit(1); @@ -978,7 +946,7 @@ int asm_r_arith(char* op, int rs1) { return asm_r(TYPE_INT, op, rs1); } -int asm_rr_arith(char* op, int rs1, int rs2) { +int asm_rr_arith(const char* op, int rs1, int rs2) { if (reg_type[rs1] & TYPE_PTR_MASK || reg_type[rs2] & TYPE_PTR_MASK) { fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op); exit(1); @@ -986,7 +954,7 @@ int asm_rr_arith(char* op, int rs1, int rs2) { return asm_rr(TYPE_INT, op, rs1, rs2); } -int asm_rr_cmp(char* op, int rs1, int rs2) { +int asm_rr_cmp(const char* op, int rs1, int rs2) { // since NULL is virtually 0, it is considered a valid example of a pointer comparing with an integer return asm_rr(TYPE_INT, op, rs1, rs2); } @@ -1139,8 +1107,8 @@ int parse_expr(); int parse_assign_expr(); int parse_function_call(int id) { - char* name = id_table + id_lut[id]; - if (global_kind[id] != KIND_FUNCTION) { + const char* name = id_table + id_lut[id]; + if (global_marker[id] != MARKER_FUNCTION) { fprintf(stderr, "not a function name: %s\n", name); exit(1); } @@ -1613,7 +1581,8 @@ void parse_local_variable(int type) { fprintf(stderr, "array of pointers is not supported\n"); exit(1); } - int size = expect_const(); + expect_token(TOKEN_NUMBER); + int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); declare_local_array(id, type, size); return; @@ -1762,7 +1731,7 @@ void parse_stmt() { reset_temp(); } -void parse_function(char* name) { +void parse_function(const char* name) { reset_local(); int arg = 0; int args[8]; @@ -1808,7 +1777,6 @@ void parse_function(char* name) { } next_token(); if (token_type == TOKEN_SEMICOLON) { - reset_local_table(); return; } unget_token(); @@ -1864,26 +1832,27 @@ void parse_function(char* name) { } asm_addi("sp", "sp", frame_size); printf(" ret\n"); - reset_local_table(); } -void parse_global_variable(int id, char* name, int type) { +void parse_global_variable(int id, const char* name, int type) { printf(".data\n"); printf(".globl %s\n", name); printf(".align 5\n"); printf("%s:\n", name); if (token_type == TOKEN_ASSIGN) { - printf(" .dword %d\n", expect_const()); + expect_token(TOKEN_NUMBER); + printf(" .dword %d\n", token_data); } else if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { fprintf(stderr, "array of pointers is not supported\n"); exit(1); } - int size = expect_const(); + expect_token(TOKEN_NUMBER); + int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); int array_size = array_size_of(type, size); printf(" .zero %d\n", array_size); - declare_global(id, KIND_ARRAY, type); + declare_global(id, MARKER_ARRAY, type); } else { printf(" .zero %d\n", 8); unget_token(); @@ -1896,24 +1865,36 @@ void parse_global_declaration() { external = 1; next_token(); } + int is_const_int = 1; + if (token_type != TOKEN_CONST) { + is_const_int = 0; + } int type = parse_type(); if (type < 0) { fprintf(stderr, "expecting type for global declaration\n"); exit(1); } + if (type != TYPE_INT) { + is_const_int = 0; + } expect_token(TOKEN_ID); int id = token_data; char* name = id_table + id_lut[id]; next_token(); - if (token_type == TOKEN_PAREN_LEFT) { - declare_global(id, KIND_FUNCTION, type); + if (!external && is_const_int && token_type == TOKEN_ASSIGN) { + expect_token(TOKEN_NUMBER); + const_table[id] = token_data; + is_const[id] = 1; + expect_token(TOKEN_SEMICOLON); + } else if (token_type == TOKEN_PAREN_LEFT) { + declare_global(id, MARKER_FUNCTION, type); parse_function(name); } else { if (type == TYPE_VOID) { fprintf(stderr, "variable cannot be of void type\n"); exit(1); } - declare_global(id, KIND_SCALAR, type); + declare_global(id, MARKER_SCALAR, type); if (external) { unget_token(); } else { @@ -1923,49 +1904,11 @@ void parse_global_declaration() { } } -void parse_enum() { - expect_token(TOKEN_BRACE_LEFT); - int value = 0; - while (1) { - next_token(); - if (token_type == TOKEN_BRACE_RIGHT) { - break; - } - if (token_type != TOKEN_ID) { - fprintf(stderr, "expecting identifier in enum\n"); - exit(1); - } - int id = token_data; - next_token(); - if (token_type == TOKEN_ASSIGN) { - value = expect_const(); - } else { - unget_token(); - } - const_table[id] = value++; - is_const[id] = 1; - next_token(); - if (token_type == TOKEN_COMMA) { - // continue; - } else if (token_type == TOKEN_BRACE_RIGHT) { - break; - } else { - fprintf(stderr, "expecting ',' or '}'\n"); - exit(1); - } - } - expect_token(TOKEN_SEMICOLON); -} - void parse_top_level() { next_token(); if (token_type == TOKEN_EOF) return; - if (token_type == TOKEN_ENUM) { - parse_enum(); - } else { - parse_global_declaration(); - } + parse_global_declaration(); parse_top_level(); } diff --git a/demo/add.c b/demo/add.c index 45876ad..3623da5 100644 --- a/demo/add.c +++ b/demo/add.c @@ -1,4 +1,5 @@ -int printf(char* format, ...); +int printf(const char format[], ...); +int scanf(const char format[], ...); void should_be(int expected, int actual) { if (expected != actual) { diff --git a/demo/hello.c b/demo/hello.c index 65311ff..c94a263 100644 --- a/demo/hello.c +++ b/demo/hello.c @@ -1,4 +1,4 @@ -int printf(char* format, ...); +int printf(const char* format, ...); int main() { printf("hello world %d\n", 42); diff --git a/demo/lut.c b/demo/lut.c new file mode 100644 index 0000000..b4ed157 --- /dev/null +++ b/demo/lut.c @@ -0,0 +1,46 @@ +int printf(const char format[], ...); +int getchar(); + +char string_table[65536]; +int string_offset; +int string_lut[4096]; +int string_lut_size; + +int parse_string() { + int offset = string_offset; + int ch; + while ((ch = getchar()) != '"') { + if (ch == -1 || ch == '\n') { + printf("expecting '\"'\n"); + return 1; + } + string_table[string_offset++] = ch; + } + string_table[string_offset++] = 0; + string_lut[string_lut_size] = offset; + return string_lut_size++; +} + + +int streq(const char* s1, const char* s2) { + while (*s1 && *s2 && *s1 == *s2) { + s1++; + s2++; + } + return *s1 == *s2; +} + +void dump_string_table() { + printf(".data\n"); + for (int i = 0; i < string_lut_size; ++i) { + char* id = string_table + string_lut[i]; + printf(".LC%d: .string \"%s\", const: %d\n", + i, id, streq(id, "const")); + } +} + +int main() { + char ch; + while ((ch = getchar()) == '"') parse_string(); + dump_string_table(); +} \ No newline at end of file diff --git a/demo/queen.c b/demo/queen.c index 855793e..2789ed3 100644 --- a/demo/queen.c +++ b/demo/queen.c @@ -1,4 +1,4 @@ -int printf(char* format, ...); +int printf(const char format[], ...); int putchar(int ch); int a[9]; diff --git a/demo/sort.c b/demo/sort.c index c8e5c0f..8248c54 100644 --- a/demo/sort.c +++ b/demo/sort.c @@ -1,5 +1,5 @@ -int printf(char* format, ...); -int scanf(char* format, ...); +int printf(const char format[], ...); +int scanf(const char format[], ...); void sort(int a[], int n) { for (int i = 0; i < n; i++) { diff --git a/demo/strcmp.c b/demo/strcmp.c index c880e7d..50fe8aa 100644 --- a/demo/strcmp.c +++ b/demo/strcmp.c @@ -1,6 +1,6 @@ -int printf(char* format, ...); +int printf(const char* format, ...); -int strcmp(char* s1, char* s2) { +int strcmp(const char* s1, const char* s2) { while (*s1 && *s2 && *s1 == *s2) { s1++; s2++; @@ -9,8 +9,8 @@ int strcmp(char* s1, char* s2) { } int main() { - char* s1 = "helloworld"; - char* s2 = "world"; + const char* s1 = "helloworld"; + const char* s2 = "world"; printf("%d\n", strcmp(s1, s2)); printf("%d\n", strcmp(s1 + 5, s2)); } \ No newline at end of file