char[] support

This commit is contained in:
Yaossg 2024-11-16 01:12:39 +08:00
parent d73caeb37b
commit 1b144230d4
3 changed files with 150 additions and 123 deletions

View File

@ -17,16 +17,3 @@ void ungetchar(int ch) {
int eof() {
return feof(stdin);
}
int CA_get(char array[], int index) {
return array[index];
}
void CA_set(char array[], int index, int value) {
array[index] = value;
}
// this may be unnecessary
char* CA_offset(char array[], int offset) {
return array + offset;
}

View File

@ -12,7 +12,4 @@ void exit(int status);
// ext
void ungetchar(int ch);
int eof();
int CA_get(char array[], int index);
void CA_set(char array[], int index, int value);
char* CA_offset(char array[], int offset);
int eprintf(const char* format, ...);

257
boot.c
View File

@ -110,14 +110,14 @@ int string_lut[4096];
int string_lut_size;
int parse_string() {
int offset = string_offset;
char ch;
int ch;
while (!eof() && (ch = getchar()) != '"') {
if (ch == '\\') {
ch = get_escaped_char();
}
CA_set(string_table, string_offset++, ch);
string_table[string_offset++] = ch;
}
CA_set(string_table, string_offset++, 0);
string_table[string_offset++] = 0;
string_lut[string_lut_size] = offset;
return string_lut_size++;
}
@ -128,12 +128,12 @@ int id_lut[4096];
int id_lut_size;
int parse_id(int ch) {
int offset = id_offset;
CA_set(id_table, id_offset++, ch);
id_table[id_offset++] = ch;
while (!eof() && is_id_cont(ch = getchar())) {
CA_set(id_table, id_offset++, ch);
id_table[id_offset++] = ch;
}
ungetchar(ch);
CA_set(id_table, id_offset++, 0);
id_table[id_offset++] = 0;
id_lut[id_lut_size] = offset;
return id_lut_size++;
}
@ -145,9 +145,9 @@ void rewind_id(int new_data) {
}
void dedup_id() {
char* latest = CA_offset(id_table, id_lut[id_lut_size - 1]);
char* latest = &id_table[id_lut[id_lut_size - 1]];
for (int i = 0; i < id_lut_size - 1; i++) {
char* candidate = CA_offset(id_table, id_lut[i]);
char* candidate = &id_table[id_lut[i]];
if (!strcmp(candidate, latest)) {
rewind_id(i);
return;
@ -158,28 +158,28 @@ void dedup_id() {
void parse_id_like(int ch) {
token_type = TOKEN_ID;
token_data = parse_id(ch);
char* term = CA_offset(id_table, id_lut[token_data]);
if (!strcmp(term, "int")) {
char* id = &id_table[id_lut[token_data]];
if (!strcmp(id, "int")) {
token_type = TOKEN_INT;
} else if (!strcmp(term, "if")) {
} else if (!strcmp(id, "if")) {
token_type = TOKEN_IF;
} else if (!strcmp(term, "else")) {
} else if (!strcmp(id, "else")) {
token_type = TOKEN_ELSE;
} else if (!strcmp(term, "while")) {
} else if (!strcmp(id, "while")) {
token_type = TOKEN_WHILE;
} else if (!strcmp(term, "break")) {
} else if (!strcmp(id, "break")) {
token_type = TOKEN_BREAK;
} else if (!strcmp(term, "continue")) {
} else if (!strcmp(id, "continue")) {
token_type = TOKEN_CONTINUE;
} else if (!strcmp(term, "return")) {
} else if (!strcmp(id, "return")) {
token_type = TOKEN_RETURN;
} else if (!strcmp(term, "void")) {
} else if (!strcmp(id, "void")) {
token_type = TOKEN_VOID;
} else if (!strcmp(term, "const")) {
} else if (!strcmp(id, "const")) {
token_type = TOKEN_CONST;
} else if (!strcmp(term, "char")) {
} else if (!strcmp(id, "char")) {
token_type = TOKEN_CHAR;
} else if (!strcmp(term, "for")) {
} else if (!strcmp(id, "for")) {
token_type = TOKEN_FOR;
}
if (token_type != TOKEN_ID) {
@ -356,7 +356,7 @@ void next_token() {
}
eprintf("token: %d\n", token_type);
if (token_type == TOKEN_ID) {
const char* name = CA_offset(id_table, id_lut[token_data]);
const char* name = &id_table[id_lut[token_data]];
eprintf(" id: %s\n", name);
} else if (token_type == TOKEN_NUMBER) {
eprintf(" number: %d\n", token_data);
@ -377,6 +377,7 @@ const int TYPE_CHAR = 2;
const int TYPE_VOID_PTR = 16;
const int TYPE_INT_PTR = 17;
const int TYPE_CHAR_PTR = 18;
const int TYPE_PTR_MASK = 16;
int parse_type() {
if (token_type == TOKEN_CONST) {
@ -417,53 +418,57 @@ int local_table[4096]; // id -> local id
int next_local_id = 2;
int max_local_id = 2;
const int NONE = 0;
const int SCALAR = 1;
const int ARRAY = 2;
const int MARKER_TEMP = 0;
const int MARKER_SCALAR = 1;
const int MARKER_ARRAY = 2;
int local_marker[4096];
int global_marker[4096];
int local_type[4096];
int global_type[4096];
void reset_local() {
next_local_id = 2;
max_local_id = 2;
for (int i = 0; i < 4096; ++i) {
local_table[i] = 0;
local_marker[i] = NONE;
local_marker[i] = MARKER_TEMP;
local_type[i] = TYPE_VOID;
}
}
void reset_temp() {
while (next_local_id > 2 && !local_marker[next_local_id - 1]) {
while (next_local_id > 2 && local_marker[next_local_id - 1] == MARKER_TEMP) {
--next_local_id;
}
}
int next_reg() {
int next_reg(int type) {
int reg = next_local_id++;
local_type[reg] = type;
if (next_local_id > max_local_id) {
max_local_id = next_local_id;
}
return reg;
}
int declare_local(int id) {
int declare_local(int id, int type) {
if (local_table[id] != 0) return local_table[id];
int reg = next_reg();
local_marker[reg] = SCALAR;
int reg = next_reg(type);
local_marker[reg] = MARKER_SCALAR;
return local_table[id] = reg;
}
int declare_local_array(int id, int size) {
int declare_local_array(int id, int type, int size) {
if (local_table[id] != 0) return local_table[id];
int reg;
for (int i = 0; i < size; ++i) local_marker[reg = next_reg()] = ARRAY;
for (int i = 0; i < size; ++i) local_marker[reg = next_reg(type)] = MARKER_ARRAY;
return local_table[id] = reg;
}
void declare_global(int id, int marker) {
void declare_global(int id, int marker, int type) {
global_marker[id] = marker;
global_type[id] = type;
}
int check_itype_immediate(int value) {
@ -502,6 +507,10 @@ void asm_addi(const char* rd, const char* rs, int imm) {
const int INDIRECTION = 1048576; // 2**20
int local_type_of(int rs1) {
return local_type[rs1 & ~INDIRECTION];
}
void load_address(int rd, int id) {
if (id & INDIRECTION) {
id = id & ~INDIRECTION;
@ -526,39 +535,51 @@ void load_address(int rd, int id) {
void load(int rd, int id) {
load_address(rd, id);
printf(" ld t%d, 0(t%d)\n", rd, rd);
const char* op = "ld";
if (local_type_of(id) == TYPE_CHAR && (id & INDIRECTION)) {
op = "lb";
}
printf(" %s t%d, 0(t%d) # id: type %d\n", op, rd, rd, local_type_of(id));
}
void store(int rs1, int rs2) {
printf(" sd t%d, 0(t%d)\n", rs1, rs2);
void store_t0(int id) {
load_address(1, id);
const char* op = "sd";
if (local_type_of(id) == TYPE_CHAR && (id & INDIRECTION)) {
op = "sb";
}
printf(" %s t0, 0(t1) # id: type %d\n", op, local_type_of(id));
}
int materialize_t0() {
int reg = next_reg();
load_address(1, reg);
store(0, 1);
int materialize_t0(int type) {
int reg = next_reg(type);
store_t0(reg);
return reg;
}
int indirection_of(int reg) {
local_type[reg] = local_type[reg] & ~TYPE_PTR_MASK;
return reg | INDIRECTION;
}
int lookup(int id) {
int local = local_table[id];
if (local) {
if (local_marker[local] == ARRAY) {
if (local_marker[local] == MARKER_ARRAY) {
load_address(0, local);
return materialize_t0();
return materialize_t0(local_type[local] | TYPE_PTR_MASK);
}
return local;
}
const char* name = &id_table[id_lut[id]];
if (global_marker[id]) {
const char* name = CA_offset(id_table, id_lut[id]);
printf(" la t0, %s # id: %d\n", name, id);
int reg = materialize_t0();
if (global_marker[id] != ARRAY) {
reg = reg | INDIRECTION;
int reg = materialize_t0(global_type[id] | TYPE_PTR_MASK);
if (global_marker[id] != MARKER_ARRAY) {
reg = indirection_of(reg);
}
return reg;
}
const char* name = CA_offset(id_table, id_lut[id]);
eprintf("unresolved identifier: %s\n", name);
exit(1);
}
@ -575,16 +596,15 @@ int asm_label(int label) {
}
int is_not_reusable(int rs1) {
return (rs1 & INDIRECTION) || local_marker[rs1];
return (rs1 & INDIRECTION) || local_marker[rs1] != MARKER_TEMP;
}
int asm_r(const char* op, int rs1) {
load(0, rs1);
printf(" %s t0, t0\n", op);
int rd = rs1;
if (is_not_reusable(rs1)) rd = next_reg();
load_address(1, rd);
store(0, 1);
if (is_not_reusable(rs1)) rd = next_reg(local_type_of(rs1));
store_t0(rd);
return rd;
}
@ -594,9 +614,8 @@ int asm_rr(const char* op, int rs1, int rs2) {
printf(" %s t0, t0, t1\n", op);
int rd = rs1;
if (is_not_reusable(rs1)) rd = rs2;
if (is_not_reusable(rs2)) rd = next_reg();
load_address(1, rd);
store(0, 1);
if (is_not_reusable(rs2)) rd = next_reg(local_type_of(rs1));
store_t0(rd);
return rd;
}
@ -637,6 +656,19 @@ void asm_pop_label() {
--cont_label_stack_size;
}
int step_of(int type) {
if (type == TYPE_INT_PTR) {
return 8;
}
return 1;
}
void asm_slli_t0(int type) {
if (type == TYPE_INT_PTR) {
printf(" slli t0, t0, 3\n");
}
}
// parser
int parse_expr();
@ -646,18 +678,18 @@ int parse_primary_expr() {
exit(1);
} else if (token_type == TOKEN_NUMBER) {
printf(" li t0, %d\n", token_data);
return materialize_t0();
return materialize_t0(TYPE_INT);
} else if (token_type == TOKEN_ID) {
return lookup(token_data);
} else if (token_type == TOKEN_STRING) {
printf(" la t0, .LC%d\n", token_data);
return materialize_t0();
return materialize_t0(TYPE_CHAR_PTR);
} else if (token_type == TOKEN_PAREN_LEFT) {
int reg = parse_expr();
expect_token(TOKEN_PAREN_RIGHT);
return reg;
} else {
eprintf("unexpected primary token: %d\n", token_type);
eprintf("unexpected token: %d\n", token_type);
exit(1);
}
}
@ -667,31 +699,45 @@ int parse_postfix_expr() {
while (1) {
next_token();
if (token_type == TOKEN_INC) {
int reg = next_reg();
int type = local_type_of(lhs);
int reg = next_reg(type);
load(0, lhs);
load_address(1, reg);
store(0, 1);
printf(" addi t0, t0, 1\n");
load_address(1, lhs);
store(0, 1);
store_t0(reg);
printf(" addi t0, t0, %d\n", step_of(type));
store_t0(lhs);
return reg;
} else if (token_type == TOKEN_DEC) {
int reg = next_reg();
int type = local_type_of(lhs);
int reg = next_reg(type);
load(0, lhs);
load_address(1, reg);
store(0, 1);
printf(" addi t0, t0, -1\n");
load_address(1, lhs);
store(0, 1);
store_t0(reg);
printf(" addi t0, t0, -%d\n", step_of(type));
store_t0(lhs);
return reg;
} else if (token_type == TOKEN_BRACKET_LEFT) {
int rhs = parse_expr();
expect_token(TOKEN_BRACKET_RIGHT);
load(0, rhs);
load(1, lhs);
printf(" slli t0, t0, 3\n");
int type1 = local_type_of(lhs) & TYPE_PTR_MASK;
int type2 = local_type_of(rhs) & TYPE_PTR_MASK;
if (type1 == type2) {
eprintf("there should be exact one pointer and one integer in array access\n");
exit(1);
}
int ptr;
int idx;
if (type1) {
ptr = lhs;
idx = rhs;
} else {
ptr = rhs;
idx = lhs;
}
int ptr_type = local_type_of(ptr);
load(0, idx);
load(1, ptr);
asm_slli_t0(ptr_type);
printf(" add t0, t0, t1\n");
return materialize_t0() | INDIRECTION;
return indirection_of(materialize_t0(ptr_type));
} else if (token_type == TOKEN_PAREN_LEFT) {
int arg = 0;
int args[8];
@ -723,7 +769,7 @@ int parse_postfix_expr() {
load_address(0, lhs);
printf(" jalr t0\n");
printf(" mv t0, a0\n");
return materialize_t0();
return materialize_t0(local_type_of(lhs));
} else {
unget_token();
return lhs;
@ -734,12 +780,13 @@ int parse_postfix_expr() {
int parse_prefix_expr() {
next_token();
if (token_type == TOKEN_AND) {
int id = parse_postfix_expr();
load_address(0, id);
return materialize_t0();
int reg = parse_postfix_expr();
load_address(0, reg);
return materialize_t0(local_type_of(reg) | TYPE_PTR_MASK);
} else if (token_type == TOKEN_STAR) {
int reg = parse_postfix_expr();
return reg | INDIRECTION;
load(0, reg);
return indirection_of(materialize_t0(local_type_of(reg)));
} else if (token_type == TOKEN_MINUS) {
int reg = parse_postfix_expr();
return asm_r("neg", reg);
@ -752,16 +799,14 @@ int parse_prefix_expr() {
} else if (token_type == TOKEN_INC) {
int reg = parse_postfix_expr();
load(0, reg);
printf(" addi t0, t0, 1\n");
load_address(1, reg);
store(0, 1);
printf(" addi t0, t0, %d\n", step_of(local_type_of(reg)));
store_t0(reg);
return reg;
} else if (token_type == TOKEN_DEC) {
int reg = parse_postfix_expr();
load(0, reg);
printf(" addi t0, t0, -1\n");
load_address(1, reg);
store(0, 1);
printf(" addi t0, t0, -%d\n", step_of(local_type_of(reg)));
store_t0(reg);
return reg;
} else {
unget_token();
@ -954,8 +999,7 @@ int parse_assign_expr() {
if (token_type == TOKEN_ASSIGN) {
int rhs = parse_assign_expr();
load(0, rhs);
load_address(1, lhs);
store(0, 1);
store_t0(lhs);
return lhs;
} else {
unget_token();
@ -967,7 +1011,7 @@ int parse_expr() {
return parse_assign_expr();
}
void parse_local_variable() {
void parse_local_variable(int type) {
expect_token(TOKEN_ID);
int id = token_data;
next_token();
@ -975,10 +1019,10 @@ void parse_local_variable() {
expect_token(TOKEN_NUMBER);
int size = token_data;
expect_token(TOKEN_BRACKET_RIGHT);
declare_local_array(id, size);
declare_local_array(id, type, size);
next_token();
} else {
declare_local(id);
declare_local(id, type);
}
if (token_type == TOKEN_SEMICOLON) {
unget_token();
@ -988,8 +1032,7 @@ void parse_local_variable() {
expect_token(TOKEN_ASSIGN);
int reg = parse_expr();
load(0, reg);
load_address(1, local_table[id]);
store(0, 1);
store_t0(local_table[id]);
}
void parse_stmt();
@ -1058,6 +1101,7 @@ void parse_for() {
void parse_stmt() {
next_token();
int decl_type;
if (token_type == TOKEN_IF) {
parse_if();
return;
@ -1094,10 +1138,10 @@ void parse_stmt() {
} else if (token_type == TOKEN_CONTINUE) {
int label = asm_get_cont_label();
asm_j(label);
} else if (parse_type() >= 0) {
parse_local_variable();
} else if (token_type == TOKEN_SEMICOLON) {
unget_token();
} else if ((decl_type = parse_type()) >= 0) {
parse_local_variable(decl_type);
} else {
unget_token();
parse_expr();
@ -1119,9 +1163,9 @@ void parse_function(const char* name) {
expect_token(TOKEN_PAREN_RIGHT);
break;
}
parse_type();
int decl_type = parse_type();
expect_token(TOKEN_ID);
args[arg++] = declare_local(token_data);
args[arg++] = declare_local(token_data, decl_type);
next_token();
if (token_type == TOKEN_BRACKET_LEFT) {
expect_token(TOKEN_BRACKET_RIGHT);
@ -1170,9 +1214,8 @@ void parse_function(const char* name) {
asm_sd("fp", shift - 16, "sp");
asm_addi("fp", "sp", shift);
for (int i = 0; i < arg; ++i) {
load_address(1, args[i]);
printf(" mv t0, a%d\n", i);
store(0, 1);
store_t0(args[i]);
}
asm_j(label);
// epilog
@ -1183,7 +1226,7 @@ void parse_function(const char* name) {
printf(" ret\n");
}
void parse_global_variable(int id, const char* name) {
void parse_global_variable(int id, const char* name, int type) {
printf(".data\n");
printf(".globl %s\n", name);
printf(".align 5\n");
@ -1196,7 +1239,7 @@ void parse_global_variable(int id, const char* name) {
int size = token_data;
expect_token(TOKEN_BRACKET_RIGHT);
printf(" .zero %d\n", 8 * size);
declare_global(id, ARRAY);
declare_global(id, MARKER_ARRAY, type);
} else {
printf(" .zero %d\n", 8);
unget_token();
@ -1204,16 +1247,16 @@ void parse_global_variable(int id, const char* name) {
expect_token(TOKEN_SEMICOLON);
}
void parse_decl() {
void parse_decl(int type) {
expect_token(TOKEN_ID);
int id = token_data;
declare_global(id, SCALAR);
char* name = CA_offset(id_table, id_lut[id]);
declare_global(id, MARKER_SCALAR, type);
char* name = &id_table[id_lut[id]];
next_token();
if (token_type == TOKEN_PAREN_LEFT) {
parse_function(name);
} else {
parse_global_variable(id, name);
parse_global_variable(id, name, type);
}
}
@ -1222,8 +1265,8 @@ void parse_top_level() {
int decl_type;
if (token_type == TOKEN_EOF) {
return;
} else if (parse_type() >= 0) {
parse_decl();
} else if ((decl_type = parse_type()) >= 0) {
parse_decl(decl_type);
} else {
eprintf("unexpected token: %d\n", token_type);
exit(1);
@ -1236,7 +1279,7 @@ void dump_string_table() {
printf(".LC%d: .string \"", i);
int offset = 0;
int ch;
while ((ch = CA_get(string_table, string_lut[i] + offset)) != 0) {
while ((ch = string_table[string_lut[i] + offset]) != 0) {
if (ch == '\n') {
printf("\\n");
} else if (ch == '\t') {