From f778cf0670a48d4a6fd58bc6f83f78407157475f Mon Sep 17 00:00:00 2001 From: Yaossg Date: Fri, 6 Dec 2024 15:06:55 +0800 Subject: [PATCH] extern --- README.md | 47 ++++++++++++------- boot-lib.c | 15 ------ boot-lib.h | 11 ----- boot-native.sh | 16 +++---- boot.c | 124 ++++++++++++++++++++++++++++--------------------- boot.sh | 14 +++--- 6 files changed, 113 insertions(+), 114 deletions(-) delete mode 100644 boot-lib.c delete mode 100644 boot-lib.h diff --git a/README.md b/README.md index 91db7ee..9f25aaa 100644 --- a/README.md +++ b/README.md @@ -49,14 +49,14 @@ $ sh boot.sh 自举会输出六个文件,三个汇编文件和三个可执行文件: -| 源代码 | 编译器 | 汇编 | 可执行 | 代号 | 命名 | -| ----------------- | --------- | ------- | --------- | ---- | ---------------------- | -| boot.c boot-lib.c | gcc | | gcc.out | G | 自制编译器 | -| boot.c boot-lib.h | gcc.out | boot1.s | boot1.out | B1 | 自举自制编译器 | -| boot.c boot-lib.h | boot1.out | boot2.s | boot2.out | B2 | 自举自举自制编译器 | -| boot.c boot-lib.h | boot2.out | boot3.s | | B3 | 验证自举自举自制编译器 | +| 源代码 | 编译器 | 汇编 | 可执行 | 代号 | 命名 | +| ------ | --------- | ------- | --------- | ---- | ---------------------- | +| boot.c | gcc | | gcc.out | G | 自制编译器 | +| boot.c | gcc.out | boot1.s | boot1.out | B1 | 自举自制编译器 | +| boot.c | boot1.out | boot2.s | boot2.out | B2 | 自举自举自制编译器 | +| boot.c | boot2.out | boot3.s | | B3 | 验证自举自举自制编译器 | -后三次编译时,boot-lib.h 的内容被手动导入 boot.c 开头进行编译,boot-lib.c 提供的库通过链接引入。 +除了第一次编译全程由 gcc 完成之外,另外三次编译从源码到汇编由本编译器完成,从汇编到可执行文件由 gcc 完成。从汇编到可执行文件时需要将 glibc 链接进去,这对于 gcc 来说是默认的行为。 整个自举及其验证的过程如下图所示: @@ -95,7 +95,7 @@ $ sh boot.sh ### 关键字 -本语言包含的关键字即为支持的标量类型的关键字和流程控制的关键字,还有 `const`。 +本语言包含的关键字即为支持的标量类型的关键字和流程控制的关键字,还有 `const` 和 `extern`。 #### `const` 关键字 @@ -103,15 +103,29 @@ $ sh boot.sh 但是当在出现 -- 全局,标量(即不是数组) -- 类型为 `const int` 或 `const int const` -- 带有初始化 +- 全局,标量(即不是数组)。 +- 类型为 `const int` 或 `const int const`。 +- 带有初始化。 +- 不是 `extern` 的。 的声明时,将会被解析为整数常量。 整数常量在使用的时候会被直接替换为对应的右值,失去作为全局变量左值的性质。 -使用 `int const` 或 `int` 可以避免这样的特殊处理。 +使用 `int const` 或 `int` 形式或添加 `extern` 可以避免这样的特殊处理。 + +### `extern` 关键字 + +`extern` 在全局函数和变量的声明的开头中可以使用。 + +全局函数的声明和定义都会直接忽略这个关键字。全局函数的声明和定义由是否提供函数体决定,与该关键字无关。 + +全局变量如果使用了这个关键字,则有以下特性和限制: + +- 变量仅被声明,而没有被定义。 + - 如果需要使用这样的变量,需要稍后提供定义,或在外部已经定义。 +- 不可以初始化。 +- 不可是数组。 ### 支持以下运算符 @@ -144,7 +158,6 @@ $ sh boot.sh - 支持全局变量和局部变量,局部变量遮挡全局变量。 - 不支持局部变量之间的遮挡,重名的局部变量为同一变量。 -- 支持函数声明,可以通过函数声明来调用 C 语言库。不支持变量声明。 - 函数只支持最多八个参数。函数声明中支持可变参数,仅用于兼容 C 语言库。 - 类型检查有遗漏,若 C 编译器报错,而本语言编译通过,就可以认为是 UB。 - 例如函数调用的参数和 `return` 语句不会检查类型。 @@ -165,14 +178,12 @@ $ sh boot.sh ## 依赖 -直接依赖下面这些 C 语言库函数,在本语言中提供声明后调用。 +直接依赖下面这些 C 语言库函数和变量,在本语言中提供声明后调用。 - `printf` - `getchar` - `exit` -间接依赖下面这些 C 语言库函数,在 C 语言中进行封装后调用。 - -- `ungetc`(理论上非必须,可以在本语言中手动模拟) -- `vfprintf` 和可变参数有关的宏(用于输出调试信息,非必须) +- `ungetc` 和 `stdin`(理论上非必须,可以在本语言中手动模拟) +- `fprintf` 和 `stderr`(理论上非必须,仅用于输出错误信息) diff --git a/boot-lib.c b/boot-lib.c deleted file mode 100644 index 093dedd..0000000 --- a/boot-lib.c +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include - - -int eprintf(const char format[], ...) { - va_list args; - va_start(args, format); - int ret = vfprintf(stderr, format, args); - va_end(args); - return ret; -} - -void ungetchar(int ch) { - ungetc(ch, stdin); -} \ No newline at end of file diff --git a/boot-lib.h b/boot-lib.h deleted file mode 100644 index c6d147a..0000000 --- a/boot-lib.h +++ /dev/null @@ -1,11 +0,0 @@ -#include -#include - -// std -int printf(const char* format, ...); -int getchar(); -void exit(int status); - -// ext -void ungetchar(int ch); -int eprintf(const char* format, ...); diff --git a/boot-native.sh b/boot-native.sh index 3fe3375..dc3a4c9 100644 --- a/boot-native.sh +++ b/boot-native.sh @@ -1,12 +1,10 @@ mkdir -p build && cd build && -cat ../boot-lib.h ../boot.c | sed '/^#/d' > boot-all.c && -gcc ../boot.c ../boot-lib.c -o gcc.out && -./gcc.out < boot-all.c > boot1.s && -gcc -static boot1.s ../boot-lib.c -o boot1.out && -./boot1.out < boot-all.c > boot2.s && -gcc -static boot2.s ../boot-lib.c -o boot2.out && -./boot2.out < boot-all.c > boot3.s && +gcc ../boot.c -o gcc.out && +./gcc.out < ../boot.c > boot1.s && +gcc -static boot1.s -o boot1.out && +./boot1.out < ../boot.c > boot2.s && +gcc -static boot2.s -o boot2.out && +./boot2.out < ../boot.c > boot3.s && cmp --silent boot1.s boot2.s && echo "boot1.s == boot2.s" || echo "boot1.s != boot2.s" cmp --silent boot2.s boot3.s && echo "boot2.s == boot3.s" || echo "boot2.s != boot3.s" -cmp --silent boot1.s boot3.s && echo "boot1.s == boot3.s" || echo "boot1.s != boot3.s" -rm boot-all.c \ No newline at end of file +cmp --silent boot1.s boot3.s && echo "boot1.s == boot3.s" || echo "boot1.s != boot3.s" \ No newline at end of file diff --git a/boot.c b/boot.c index 3476acc..b146149 100644 --- a/boot.c +++ b/boot.c @@ -1,4 +1,18 @@ -#include "boot-lib.h" +// libc dependency + +extern void* stdin; +extern void* stdout; +extern void* stderr; + +int printf(const char* format, ...); +int getchar(); +void exit(int status); +int fprintf(void* file, const char* format, ...); +int ungetc(int ch, void* file); + +void ungetchar(int ch) { + ungetc(ch, stdin); +} // lexer @@ -79,6 +93,7 @@ const int TOKEN_BREAK = 107; const int TOKEN_CONTINUE = 108; const int TOKEN_RETURN = 109; +const int TOKEN_EXTERN = 126; const int TOKEN_CONST = 127; const int TOKEN_VOID = 128; const int TOKEN_INT = 129; @@ -121,7 +136,7 @@ int get_escaped_char() { } else if (ch == '\"') { ch = '\"'; } else { - eprintf("unexpected escaped character: %c\n", ch); + fprintf(stderr, "unexpected escaped character: %c\n", ch); exit(1); } return ch; @@ -144,7 +159,7 @@ int parse_string() { int ch; while ((ch = getchar()) != '"') { if (ch == -1 || ch == '\n') { - eprintf("expecting '\"'\n"); + fprintf(stderr, "expecting '\"'\n"); exit(1); } if (ch == '\\') { @@ -237,6 +252,8 @@ void parse_id_like(int ch) { token_type = TOKEN_FOR; } else if (streq(id, "do")) { token_type = TOKEN_DO; + } else if (streq(id, "extern")) { + token_type = TOKEN_EXTERN; } if (token_type != TOKEN_ID) { rewind_id(0); @@ -312,7 +329,7 @@ void next_token() { while (1) { ch = getchar(); if (ch == -1) { - eprintf("expecting '*/'\n"); + fprintf(stderr, "expecting '*/'\n"); exit(1); } if (ch == '*') { @@ -429,7 +446,7 @@ void next_token() { token_data = get_escaped_char(); } if (getchar() != '\'') { - eprintf("expecting '\n"); + fprintf(stderr, "expecting '\n"); exit(1); } } else if (ch == '"') { @@ -444,7 +461,7 @@ void next_token() { } } if (token_type != TOKEN_ELLIPSIS) { - eprintf("expecting '...'\n"); + fprintf(stderr, "expecting '...'\n"); exit(1); } } else if (is_digit(ch)) { @@ -453,22 +470,22 @@ void next_token() { } else if (is_id_start(ch)) { parse_id_like(ch); } else { - eprintf("unexpected character: %c(%d)\n", ch, ch); + fprintf(stderr, "unexpected character: %c(%d)\n", ch, ch); exit(1); } - eprintf("token: %d\n", token_type); + fprintf(stderr, "token: %d\n", token_type); if (token_type == TOKEN_ID) { const char* name = id_table + id_lut[token_data]; - eprintf(" id: %s\n", name); + fprintf(stderr, " id: %s\n", name); } else if (token_type == TOKEN_NUMBER) { - eprintf(" number: %d\n", token_data); + fprintf(stderr, " number: %d\n", token_data); } } void expect_token(int expected_type) { next_token(); if (token_type != expected_type) { - eprintf("unexpected token: %d, should be %d\n", token_type, expected_type); + fprintf(stderr, "unexpected token: %d, should be %d\n", token_type, expected_type); exit(1); } } @@ -890,20 +907,20 @@ int lookup(int id) { const char* name = id_table + id_lut[id]; if (global_marker[id]) { if (global_marker[id] == MARKER_FUNCTION) { - eprintf("function name must not appear outside function call: %s\n", name); + fprintf(stderr, "function name must not appear outside function call: %s\n", name); exit(1); } int rd = next_reg(TYPE_VOID_PTR); _asm_i("la", rd, name, " # id: ", id); return materialize_address(rd, global_type[id], global_marker[id]); } - eprintf("unresolved identifier: %s\n", name); + fprintf(stderr, "unresolved identifier: %s\n", name); exit(1); } int asm_r_arith(const char* op, int rs1) { if (reg_type[rs1] & TYPE_PTR_MASK) { - eprintf("pointer cannot be arithmetically operated by %s\n", op); + fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_r(TYPE_INT, op, rs1); @@ -911,7 +928,7 @@ int asm_r_arith(const char* op, int rs1) { int asm_rr_arith(const char* op, int rs1, int rs2) { if (reg_type[rs1] & TYPE_PTR_MASK || reg_type[rs2] & TYPE_PTR_MASK) { - eprintf("pointer cannot be arithmetically operated by %s\n", op); + fprintf(stderr, "pointer cannot be arithmetically operated by %s\n", op); exit(1); } return asm_rr(TYPE_INT, op, rs1, rs2); @@ -951,7 +968,7 @@ int cont_label_stack_size; void asm_break() { if (break_label_stack_size == 0) { - eprintf("break without loop\n"); + fprintf(stderr, "break without loop\n"); exit(1); } asm_j(break_label_stack[break_label_stack_size - 1]); @@ -959,7 +976,7 @@ void asm_break() { void asm_continue() { if (cont_label_stack_size == 0) { - eprintf("continue without loop\n"); + fprintf(stderr, "continue without loop\n"); exit(1); } asm_j(cont_label_stack[cont_label_stack_size - 1]); @@ -1004,7 +1021,7 @@ int asm_add(int lhs, int rhs) { } int ptr_type = reg_type[ptr]; if (ptr_type == TYPE_VOID_PTR) { - eprintf("void pointer cannot be arithmetically operated\n"); + fprintf(stderr, "void pointer cannot be arithmetically operated\n"); exit(1); } int offset = next_reg(TYPE_INT); @@ -1012,7 +1029,7 @@ int asm_add(int lhs, int rhs) { return asm_rr(ptr_type, "add", ptr, offset); } if (type1 && type2) { - eprintf("operands of addition cannot be both pointers\n"); + fprintf(stderr, "operands of addition cannot be both pointers\n"); exit(1); } return asm_rr(TYPE_INT, "add", lhs, rhs); @@ -1025,11 +1042,11 @@ int asm_sub(int lhs, int rhs) { int type2 = rhs_type & TYPE_PTR_MASK; if (type1 && type2) { if (lhs_type != rhs_type) { - eprintf("pointer type mismatch\n"); + fprintf(stderr, "pointer type mismatch\n"); exit(1); } if (lhs_type == TYPE_VOID_PTR) { - eprintf("void pointer cannot be arithmetically operated\n"); + fprintf(stderr, "void pointer cannot be arithmetically operated\n"); exit(1); } int diff = asm_rr(TYPE_INT, "sub", lhs, rhs); @@ -1072,7 +1089,7 @@ int parse_assign_expr(); int parse_function_call(int id) { const char* name = id_table + id_lut[id]; if (global_marker[id] != MARKER_FUNCTION) { - eprintf("not a function name: %s\n", name); + fprintf(stderr, "not a function name: %s\n", name); exit(1); } int arg = 0; @@ -1084,7 +1101,7 @@ int parse_function_call(int id) { } unget_token(); if (arg >= 8) { - eprintf("too many arguments\n"); + fprintf(stderr, "too many arguments\n"); exit(1); } args[arg++] = parse_assign_expr(); @@ -1094,7 +1111,7 @@ int parse_function_call(int id) { } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { - eprintf("expecting ',' or ')'\n"); + fprintf(stderr, "expecting ',' or ')'\n"); exit(1); } } @@ -1143,7 +1160,7 @@ int parse_primary_expr() { expect_token(TOKEN_PAREN_RIGHT); return reg; } else { - eprintf("unexpected token in primary expression: %d\n", token_type); + fprintf(stderr, "unexpected token in primary expression: %d\n", token_type); exit(1); } } @@ -1182,7 +1199,7 @@ int parse_prefix_expr() { int reg = parse_postfix_expr(); int type = reg_type[reg]; if (type & TYPE_PTR_MASK) { - eprintf("cannot take address of a pointer\n"); + fprintf(stderr, "cannot take address of a pointer\n"); exit(1); } return addressof(reg); @@ -1190,11 +1207,11 @@ int parse_prefix_expr() { int reg = parse_postfix_expr(); int type = reg_type[reg]; if (!(type & TYPE_PTR_MASK)) { - eprintf("cannot dereference a non-pointer\n"); + fprintf(stderr, "cannot dereference a non-pointer\n"); exit(1); } if (type == TYPE_VOID_PTR) { - eprintf("cannot dereference void pointer\n"); + fprintf(stderr, "cannot dereference void pointer\n"); exit(1); } return dereference(reg); @@ -1443,7 +1460,7 @@ int parse_conditional_expr() { asm_label(label1); int rhs = parse_conditional_expr(); if (reg_type[lhs] != reg_type[rhs]) { - eprintf("type mismatch in conditional expression\n"); + fprintf(stderr, "type mismatch in conditional expression\n"); exit(1); } asm_mv(result, rhs); @@ -1535,7 +1552,7 @@ int parse_expr() { void parse_local_variable(int type) { if (type == TYPE_VOID) { - eprintf("variable cannot be of void type\n"); + fprintf(stderr, "variable cannot be of void type\n"); exit(1); } expect_token(TOKEN_ID); @@ -1543,7 +1560,7 @@ void parse_local_variable(int type) { next_token(); if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { - eprintf("array of pointers is not supported\n"); + fprintf(stderr, "array of pointers is not supported\n"); exit(1); } expect_token(TOKEN_NUMBER); @@ -1561,7 +1578,7 @@ void parse_local_variable(int type) { expect_token(TOKEN_ASSIGN); int reg = parse_expr(); if (type != reg_type[reg]) { - eprintf("type mismatch in assignment\n"); + fprintf(stderr, "type mismatch in assignment\n"); exit(1); } store_into_local(reg, slot); @@ -1709,17 +1726,9 @@ void parse_function(const char* name) { expect_token(TOKEN_PAREN_RIGHT); break; } - if (token_type == TOKEN_VOID) { - if (arg != 0) { - eprintf("void should be the only argument\n"); - exit(1); - } - expect_token(TOKEN_PAREN_RIGHT); - break; - } int arg_type = parse_type(); if (arg_type < 0 || arg_type == TYPE_VOID) { - eprintf("expecting a non-void argument type: %d\n", arg_type); + fprintf(stderr, "expecting a non-void argument type: %d\n", arg_type); exit(1); } expect_token(TOKEN_ID); @@ -1729,13 +1738,13 @@ void parse_function(const char* name) { expect_token(TOKEN_BRACKET_RIGHT); next_token(); if (arg_type & TYPE_PTR_MASK) { - eprintf("array of pointers is not supported\n"); + fprintf(stderr, "array of pointers is not supported\n"); exit(1); } arg_type |= TYPE_PTR_MASK; } if (arg >= 8) { - eprintf("too many arguments\n"); + fprintf(stderr, "too many arguments\n"); exit(1); } args[arg++] = declare_local(token_data, arg_type); @@ -1744,7 +1753,7 @@ void parse_function(const char* name) { } else if (token_type == TOKEN_PAREN_RIGHT) { break; } else { - eprintf("expecting ',' or ')'\n"); + fprintf(stderr, "expecting ',' or ')'\n"); exit(1); } } @@ -1808,10 +1817,6 @@ void parse_function(const char* name) { } void parse_global_variable(int id, const char* name, int type) { - if (type == TYPE_VOID) { - eprintf("variable cannot be of void type\n"); - exit(1); - } printf(".data\n"); printf(".globl %s\n", name); printf(".align 5\n"); @@ -1821,7 +1826,7 @@ void parse_global_variable(int id, const char* name, int type) { printf(" .dword %d\n", token_data); } else if (token_type == TOKEN_BRACKET_LEFT) { if (type & TYPE_PTR_MASK) { - eprintf("array of pointers is not supported\n"); + fprintf(stderr, "array of pointers is not supported\n"); exit(1); } expect_token(TOKEN_NUMBER); @@ -1834,17 +1839,21 @@ void parse_global_variable(int id, const char* name, int type) { printf(" .zero %d\n", 8); unget_token(); } - expect_token(TOKEN_SEMICOLON); } void parse_global_declaration() { + int external = 0; + if (token_type == TOKEN_EXTERN) { + external = 1; + next_token(); + } int is_const_int = 1; if (token_type != TOKEN_CONST) { is_const_int = 0; } int type = parse_type(); if (type < 0) { - eprintf("expecting type for global declaration\n"); + fprintf(stderr, "expecting type for global declaration\n"); exit(1); } if (type != TYPE_INT) { @@ -1854,7 +1863,7 @@ void parse_global_declaration() { int id = token_data; char* name = id_table + id_lut[id]; next_token(); - if (is_const_int && token_type == TOKEN_ASSIGN) { + if (!external && is_const_int && token_type == TOKEN_ASSIGN) { expect_token(TOKEN_NUMBER); const_table[id] = token_data; is_const[id] = 1; @@ -1863,8 +1872,17 @@ void parse_global_declaration() { declare_global(id, MARKER_FUNCTION, type); parse_function(name); } else { + if (type == TYPE_VOID) { + fprintf(stderr, "variable cannot be of void type\n"); + exit(1); + } declare_global(id, MARKER_SCALAR, type); - parse_global_variable(id, name, type); + if (external) { + unget_token(); + } else { + parse_global_variable(id, name, type); + } + expect_token(TOKEN_SEMICOLON); } } diff --git a/boot.sh b/boot.sh index c4a4ecb..0667db3 100644 --- a/boot.sh +++ b/boot.sh @@ -1,12 +1,10 @@ mkdir -p build && cd build && -cat ../boot-lib.h ../boot.c | sed '/^#/d' > boot-all.c && -gcc ../boot.c ../boot-lib.c -o gcc.out && -./gcc.out < boot-all.c > boot1.s && -riscv64-linux-gnu-gcc-12 -static boot1.s ../boot-lib.c -o boot1.out && -qemu-riscv64 boot1.out < boot-all.c > boot2.s && -riscv64-linux-gnu-gcc-12 -static boot2.s ../boot-lib.c -o boot2.out && -qemu-riscv64 boot2.out < boot-all.c > boot3.s +gcc ../boot.c -o gcc.out && +./gcc.out < ../boot.c > boot1.s && +riscv64-linux-gnu-gcc-12 -static boot1.s -o boot1.out && +qemu-riscv64 boot1.out < ../boot.c > boot2.s && +riscv64-linux-gnu-gcc-12 -static boot2.s -o boot2.out && +qemu-riscv64 boot2.out < ../boot.c > boot3.s cmp --silent boot1.s boot2.s && echo "boot1.s == boot2.s" || echo "boot1.s != boot2.s" cmp --silent boot2.s boot3.s && echo "boot2.s == boot3.s" || echo "boot2.s != boot3.s" cmp --silent boot1.s boot3.s && echo "boot1.s == boot3.s" || echo "boot1.s != boot3.s" -rm boot-all.c