From 1b144230d4f24640cd118f1689d6bf90bd9dbbbd Mon Sep 17 00:00:00 2001 From: Yaossg Date: Sat, 16 Nov 2024 01:12:39 +0800 Subject: [PATCH] char[] support --- boot-lib.c | 13 --- boot-lib.h | 3 - boot.c | 257 +++++++++++++++++++++++++++++++---------------------- 3 files changed, 150 insertions(+), 123 deletions(-) diff --git a/boot-lib.c b/boot-lib.c index 6a53fe4..02192e8 100644 --- a/boot-lib.c +++ b/boot-lib.c @@ -17,16 +17,3 @@ void ungetchar(int ch) { int eof() { return feof(stdin); } - -int CA_get(char array[], int index) { - return array[index]; -} - -void CA_set(char array[], int index, int value) { - array[index] = value; -} - -// this may be unnecessary -char* CA_offset(char array[], int offset) { - return array + offset; -} diff --git a/boot-lib.h b/boot-lib.h index 3010a27..c3b7777 100644 --- a/boot-lib.h +++ b/boot-lib.h @@ -12,7 +12,4 @@ void exit(int status); // ext void ungetchar(int ch); int eof(); -int CA_get(char array[], int index); -void CA_set(char array[], int index, int value); -char* CA_offset(char array[], int offset); int eprintf(const char* format, ...); diff --git a/boot.c b/boot.c index 4099852..9bce0e8 100644 --- a/boot.c +++ b/boot.c @@ -110,14 +110,14 @@ int string_lut[4096]; int string_lut_size; int parse_string() { int offset = string_offset; - char ch; + int ch; while (!eof() && (ch = getchar()) != '"') { if (ch == '\\') { ch = get_escaped_char(); } - CA_set(string_table, string_offset++, ch); + string_table[string_offset++] = ch; } - CA_set(string_table, string_offset++, 0); + string_table[string_offset++] = 0; string_lut[string_lut_size] = offset; return string_lut_size++; } @@ -128,12 +128,12 @@ int id_lut[4096]; int id_lut_size; int parse_id(int ch) { int offset = id_offset; - CA_set(id_table, id_offset++, ch); + id_table[id_offset++] = ch; while (!eof() && is_id_cont(ch = getchar())) { - CA_set(id_table, id_offset++, ch); + id_table[id_offset++] = ch; } ungetchar(ch); - CA_set(id_table, id_offset++, 0); + id_table[id_offset++] = 0; id_lut[id_lut_size] = offset; return id_lut_size++; } @@ -145,9 +145,9 @@ void rewind_id(int new_data) { } void dedup_id() { - char* latest = CA_offset(id_table, id_lut[id_lut_size - 1]); + char* latest = &id_table[id_lut[id_lut_size - 1]]; for (int i = 0; i < id_lut_size - 1; i++) { - char* candidate = CA_offset(id_table, id_lut[i]); + char* candidate = &id_table[id_lut[i]]; if (!strcmp(candidate, latest)) { rewind_id(i); return; @@ -158,28 +158,28 @@ void dedup_id() { void parse_id_like(int ch) { token_type = TOKEN_ID; token_data = parse_id(ch); - char* term = CA_offset(id_table, id_lut[token_data]); - if (!strcmp(term, "int")) { + char* id = &id_table[id_lut[token_data]]; + if (!strcmp(id, "int")) { token_type = TOKEN_INT; - } else if (!strcmp(term, "if")) { + } else if (!strcmp(id, "if")) { token_type = TOKEN_IF; - } else if (!strcmp(term, "else")) { + } else if (!strcmp(id, "else")) { token_type = TOKEN_ELSE; - } else if (!strcmp(term, "while")) { + } else if (!strcmp(id, "while")) { token_type = TOKEN_WHILE; - } else if (!strcmp(term, "break")) { + } else if (!strcmp(id, "break")) { token_type = TOKEN_BREAK; - } else if (!strcmp(term, "continue")) { + } else if (!strcmp(id, "continue")) { token_type = TOKEN_CONTINUE; - } else if (!strcmp(term, "return")) { + } else if (!strcmp(id, "return")) { token_type = TOKEN_RETURN; - } else if (!strcmp(term, "void")) { + } else if (!strcmp(id, "void")) { token_type = TOKEN_VOID; - } else if (!strcmp(term, "const")) { + } else if (!strcmp(id, "const")) { token_type = TOKEN_CONST; - } else if (!strcmp(term, "char")) { + } else if (!strcmp(id, "char")) { token_type = TOKEN_CHAR; - } else if (!strcmp(term, "for")) { + } else if (!strcmp(id, "for")) { token_type = TOKEN_FOR; } if (token_type != TOKEN_ID) { @@ -356,7 +356,7 @@ void next_token() { } eprintf("token: %d\n", token_type); if (token_type == TOKEN_ID) { - const char* name = CA_offset(id_table, id_lut[token_data]); + const char* name = &id_table[id_lut[token_data]]; eprintf(" id: %s\n", name); } else if (token_type == TOKEN_NUMBER) { eprintf(" number: %d\n", token_data); @@ -377,6 +377,7 @@ const int TYPE_CHAR = 2; const int TYPE_VOID_PTR = 16; const int TYPE_INT_PTR = 17; const int TYPE_CHAR_PTR = 18; +const int TYPE_PTR_MASK = 16; int parse_type() { if (token_type == TOKEN_CONST) { @@ -417,53 +418,57 @@ int local_table[4096]; // id -> local id int next_local_id = 2; int max_local_id = 2; -const int NONE = 0; -const int SCALAR = 1; -const int ARRAY = 2; +const int MARKER_TEMP = 0; +const int MARKER_SCALAR = 1; +const int MARKER_ARRAY = 2; int local_marker[4096]; int global_marker[4096]; - +int local_type[4096]; +int global_type[4096]; void reset_local() { next_local_id = 2; max_local_id = 2; for (int i = 0; i < 4096; ++i) { local_table[i] = 0; - local_marker[i] = NONE; + local_marker[i] = MARKER_TEMP; + local_type[i] = TYPE_VOID; } } void reset_temp() { - while (next_local_id > 2 && !local_marker[next_local_id - 1]) { + while (next_local_id > 2 && local_marker[next_local_id - 1] == MARKER_TEMP) { --next_local_id; } } -int next_reg() { +int next_reg(int type) { int reg = next_local_id++; + local_type[reg] = type; if (next_local_id > max_local_id) { max_local_id = next_local_id; } return reg; } -int declare_local(int id) { +int declare_local(int id, int type) { if (local_table[id] != 0) return local_table[id]; - int reg = next_reg(); - local_marker[reg] = SCALAR; + int reg = next_reg(type); + local_marker[reg] = MARKER_SCALAR; return local_table[id] = reg; } -int declare_local_array(int id, int size) { +int declare_local_array(int id, int type, int size) { if (local_table[id] != 0) return local_table[id]; int reg; - for (int i = 0; i < size; ++i) local_marker[reg = next_reg()] = ARRAY; + for (int i = 0; i < size; ++i) local_marker[reg = next_reg(type)] = MARKER_ARRAY; return local_table[id] = reg; } -void declare_global(int id, int marker) { +void declare_global(int id, int marker, int type) { global_marker[id] = marker; + global_type[id] = type; } int check_itype_immediate(int value) { @@ -502,6 +507,10 @@ void asm_addi(const char* rd, const char* rs, int imm) { const int INDIRECTION = 1048576; // 2**20 +int local_type_of(int rs1) { + return local_type[rs1 & ~INDIRECTION]; +} + void load_address(int rd, int id) { if (id & INDIRECTION) { id = id & ~INDIRECTION; @@ -526,39 +535,51 @@ void load_address(int rd, int id) { void load(int rd, int id) { load_address(rd, id); - printf(" ld t%d, 0(t%d)\n", rd, rd); + const char* op = "ld"; + if (local_type_of(id) == TYPE_CHAR && (id & INDIRECTION)) { + op = "lb"; + } + printf(" %s t%d, 0(t%d) # id: type %d\n", op, rd, rd, local_type_of(id)); } -void store(int rs1, int rs2) { - printf(" sd t%d, 0(t%d)\n", rs1, rs2); +void store_t0(int id) { + load_address(1, id); + const char* op = "sd"; + if (local_type_of(id) == TYPE_CHAR && (id & INDIRECTION)) { + op = "sb"; + } + printf(" %s t0, 0(t1) # id: type %d\n", op, local_type_of(id)); } -int materialize_t0() { - int reg = next_reg(); - load_address(1, reg); - store(0, 1); +int materialize_t0(int type) { + int reg = next_reg(type); + store_t0(reg); return reg; } +int indirection_of(int reg) { + local_type[reg] = local_type[reg] & ~TYPE_PTR_MASK; + return reg | INDIRECTION; +} + int lookup(int id) { int local = local_table[id]; if (local) { - if (local_marker[local] == ARRAY) { + if (local_marker[local] == MARKER_ARRAY) { load_address(0, local); - return materialize_t0(); + return materialize_t0(local_type[local] | TYPE_PTR_MASK); } return local; } + const char* name = &id_table[id_lut[id]]; if (global_marker[id]) { - const char* name = CA_offset(id_table, id_lut[id]); printf(" la t0, %s # id: %d\n", name, id); - int reg = materialize_t0(); - if (global_marker[id] != ARRAY) { - reg = reg | INDIRECTION; + int reg = materialize_t0(global_type[id] | TYPE_PTR_MASK); + if (global_marker[id] != MARKER_ARRAY) { + reg = indirection_of(reg); } return reg; } - const char* name = CA_offset(id_table, id_lut[id]); eprintf("unresolved identifier: %s\n", name); exit(1); } @@ -575,16 +596,15 @@ int asm_label(int label) { } int is_not_reusable(int rs1) { - return (rs1 & INDIRECTION) || local_marker[rs1]; + return (rs1 & INDIRECTION) || local_marker[rs1] != MARKER_TEMP; } int asm_r(const char* op, int rs1) { load(0, rs1); printf(" %s t0, t0\n", op); int rd = rs1; - if (is_not_reusable(rs1)) rd = next_reg(); - load_address(1, rd); - store(0, 1); + if (is_not_reusable(rs1)) rd = next_reg(local_type_of(rs1)); + store_t0(rd); return rd; } @@ -594,9 +614,8 @@ int asm_rr(const char* op, int rs1, int rs2) { printf(" %s t0, t0, t1\n", op); int rd = rs1; if (is_not_reusable(rs1)) rd = rs2; - if (is_not_reusable(rs2)) rd = next_reg(); - load_address(1, rd); - store(0, 1); + if (is_not_reusable(rs2)) rd = next_reg(local_type_of(rs1)); + store_t0(rd); return rd; } @@ -637,6 +656,19 @@ void asm_pop_label() { --cont_label_stack_size; } +int step_of(int type) { + if (type == TYPE_INT_PTR) { + return 8; + } + return 1; +} + +void asm_slli_t0(int type) { + if (type == TYPE_INT_PTR) { + printf(" slli t0, t0, 3\n"); + } +} + // parser int parse_expr(); @@ -646,18 +678,18 @@ int parse_primary_expr() { exit(1); } else if (token_type == TOKEN_NUMBER) { printf(" li t0, %d\n", token_data); - return materialize_t0(); + return materialize_t0(TYPE_INT); } else if (token_type == TOKEN_ID) { return lookup(token_data); } else if (token_type == TOKEN_STRING) { printf(" la t0, .LC%d\n", token_data); - return materialize_t0(); + return materialize_t0(TYPE_CHAR_PTR); } else if (token_type == TOKEN_PAREN_LEFT) { int reg = parse_expr(); expect_token(TOKEN_PAREN_RIGHT); return reg; } else { - eprintf("unexpected primary token: %d\n", token_type); + eprintf("unexpected token: %d\n", token_type); exit(1); } } @@ -667,31 +699,45 @@ int parse_postfix_expr() { while (1) { next_token(); if (token_type == TOKEN_INC) { - int reg = next_reg(); + int type = local_type_of(lhs); + int reg = next_reg(type); load(0, lhs); - load_address(1, reg); - store(0, 1); - printf(" addi t0, t0, 1\n"); - load_address(1, lhs); - store(0, 1); + store_t0(reg); + printf(" addi t0, t0, %d\n", step_of(type)); + store_t0(lhs); return reg; } else if (token_type == TOKEN_DEC) { - int reg = next_reg(); + int type = local_type_of(lhs); + int reg = next_reg(type); load(0, lhs); - load_address(1, reg); - store(0, 1); - printf(" addi t0, t0, -1\n"); - load_address(1, lhs); - store(0, 1); + store_t0(reg); + printf(" addi t0, t0, -%d\n", step_of(type)); + store_t0(lhs); return reg; } else if (token_type == TOKEN_BRACKET_LEFT) { int rhs = parse_expr(); expect_token(TOKEN_BRACKET_RIGHT); - load(0, rhs); - load(1, lhs); - printf(" slli t0, t0, 3\n"); + int type1 = local_type_of(lhs) & TYPE_PTR_MASK; + int type2 = local_type_of(rhs) & TYPE_PTR_MASK; + if (type1 == type2) { + eprintf("there should be exact one pointer and one integer in array access\n"); + exit(1); + } + int ptr; + int idx; + if (type1) { + ptr = lhs; + idx = rhs; + } else { + ptr = rhs; + idx = lhs; + } + int ptr_type = local_type_of(ptr); + load(0, idx); + load(1, ptr); + asm_slli_t0(ptr_type); printf(" add t0, t0, t1\n"); - return materialize_t0() | INDIRECTION; + return indirection_of(materialize_t0(ptr_type)); } else if (token_type == TOKEN_PAREN_LEFT) { int arg = 0; int args[8]; @@ -723,7 +769,7 @@ int parse_postfix_expr() { load_address(0, lhs); printf(" jalr t0\n"); printf(" mv t0, a0\n"); - return materialize_t0(); + return materialize_t0(local_type_of(lhs)); } else { unget_token(); return lhs; @@ -734,12 +780,13 @@ int parse_postfix_expr() { int parse_prefix_expr() { next_token(); if (token_type == TOKEN_AND) { - int id = parse_postfix_expr(); - load_address(0, id); - return materialize_t0(); + int reg = parse_postfix_expr(); + load_address(0, reg); + return materialize_t0(local_type_of(reg) | TYPE_PTR_MASK); } else if (token_type == TOKEN_STAR) { int reg = parse_postfix_expr(); - return reg | INDIRECTION; + load(0, reg); + return indirection_of(materialize_t0(local_type_of(reg))); } else if (token_type == TOKEN_MINUS) { int reg = parse_postfix_expr(); return asm_r("neg", reg); @@ -752,16 +799,14 @@ int parse_prefix_expr() { } else if (token_type == TOKEN_INC) { int reg = parse_postfix_expr(); load(0, reg); - printf(" addi t0, t0, 1\n"); - load_address(1, reg); - store(0, 1); + printf(" addi t0, t0, %d\n", step_of(local_type_of(reg))); + store_t0(reg); return reg; } else if (token_type == TOKEN_DEC) { int reg = parse_postfix_expr(); load(0, reg); - printf(" addi t0, t0, -1\n"); - load_address(1, reg); - store(0, 1); + printf(" addi t0, t0, -%d\n", step_of(local_type_of(reg))); + store_t0(reg); return reg; } else { unget_token(); @@ -954,8 +999,7 @@ int parse_assign_expr() { if (token_type == TOKEN_ASSIGN) { int rhs = parse_assign_expr(); load(0, rhs); - load_address(1, lhs); - store(0, 1); + store_t0(lhs); return lhs; } else { unget_token(); @@ -967,7 +1011,7 @@ int parse_expr() { return parse_assign_expr(); } -void parse_local_variable() { +void parse_local_variable(int type) { expect_token(TOKEN_ID); int id = token_data; next_token(); @@ -975,10 +1019,10 @@ void parse_local_variable() { expect_token(TOKEN_NUMBER); int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); - declare_local_array(id, size); + declare_local_array(id, type, size); next_token(); } else { - declare_local(id); + declare_local(id, type); } if (token_type == TOKEN_SEMICOLON) { unget_token(); @@ -988,8 +1032,7 @@ void parse_local_variable() { expect_token(TOKEN_ASSIGN); int reg = parse_expr(); load(0, reg); - load_address(1, local_table[id]); - store(0, 1); + store_t0(local_table[id]); } void parse_stmt(); @@ -1058,6 +1101,7 @@ void parse_for() { void parse_stmt() { next_token(); + int decl_type; if (token_type == TOKEN_IF) { parse_if(); return; @@ -1094,10 +1138,10 @@ void parse_stmt() { } else if (token_type == TOKEN_CONTINUE) { int label = asm_get_cont_label(); asm_j(label); - } else if (parse_type() >= 0) { - parse_local_variable(); } else if (token_type == TOKEN_SEMICOLON) { unget_token(); + } else if ((decl_type = parse_type()) >= 0) { + parse_local_variable(decl_type); } else { unget_token(); parse_expr(); @@ -1119,9 +1163,9 @@ void parse_function(const char* name) { expect_token(TOKEN_PAREN_RIGHT); break; } - parse_type(); + int decl_type = parse_type(); expect_token(TOKEN_ID); - args[arg++] = declare_local(token_data); + args[arg++] = declare_local(token_data, decl_type); next_token(); if (token_type == TOKEN_BRACKET_LEFT) { expect_token(TOKEN_BRACKET_RIGHT); @@ -1170,9 +1214,8 @@ void parse_function(const char* name) { asm_sd("fp", shift - 16, "sp"); asm_addi("fp", "sp", shift); for (int i = 0; i < arg; ++i) { - load_address(1, args[i]); printf(" mv t0, a%d\n", i); - store(0, 1); + store_t0(args[i]); } asm_j(label); // epilog @@ -1183,7 +1226,7 @@ void parse_function(const char* name) { printf(" ret\n"); } -void parse_global_variable(int id, const char* name) { +void parse_global_variable(int id, const char* name, int type) { printf(".data\n"); printf(".globl %s\n", name); printf(".align 5\n"); @@ -1196,7 +1239,7 @@ void parse_global_variable(int id, const char* name) { int size = token_data; expect_token(TOKEN_BRACKET_RIGHT); printf(" .zero %d\n", 8 * size); - declare_global(id, ARRAY); + declare_global(id, MARKER_ARRAY, type); } else { printf(" .zero %d\n", 8); unget_token(); @@ -1204,16 +1247,16 @@ void parse_global_variable(int id, const char* name) { expect_token(TOKEN_SEMICOLON); } -void parse_decl() { +void parse_decl(int type) { expect_token(TOKEN_ID); int id = token_data; - declare_global(id, SCALAR); - char* name = CA_offset(id_table, id_lut[id]); + declare_global(id, MARKER_SCALAR, type); + char* name = &id_table[id_lut[id]]; next_token(); if (token_type == TOKEN_PAREN_LEFT) { parse_function(name); } else { - parse_global_variable(id, name); + parse_global_variable(id, name, type); } } @@ -1222,8 +1265,8 @@ void parse_top_level() { int decl_type; if (token_type == TOKEN_EOF) { return; - } else if (parse_type() >= 0) { - parse_decl(); + } else if ((decl_type = parse_type()) >= 0) { + parse_decl(decl_type); } else { eprintf("unexpected token: %d\n", token_type); exit(1); @@ -1236,7 +1279,7 @@ void dump_string_table() { printf(".LC%d: .string \"", i); int offset = 0; int ch; - while ((ch = CA_get(string_table, string_lut[i] + offset)) != 0) { + while ((ch = string_table[string_lut[i] + offset]) != 0) { if (ch == '\n') { printf("\\n"); } else if (ch == '\t') {