From b8a52d3f39abd7cfedc3541379d22e3f215f2152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 30 Dec 2024 01:05:10 +0200 Subject: [PATCH] =?UTF-8?q?More=20parser=20=E2=80=94=20lint+tests=20pass?= =?UTF-8?q?=20again?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- ast.h | 4 +- build.zig | 3 +- common.h | 4 +- main.c | 6 +- parser.c | 323 ++++++++++++++++++++++++++++++++++++++++++++------- parser.h | 4 +- test_all.zig | 2 +- tokenizer.h | 4 +- zig1.c | 6 +- 10 files changed, 299 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 42ece43..60dc9c4 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -zig1 aspires to be a zig 0.13.0-2448-gd48611ba67c7 interpreter. +zig0 aspires to be an interpreter of zig 0.13.0-2578-gec60156f187a C backend. diff --git a/ast.h b/ast.h index 2f032f4..6ad194b 100644 --- a/ast.h +++ b/ast.h @@ -1,5 +1,5 @@ -#ifndef _ZIG1_AST_H__ -#define _ZIG1_AST_H__ +#ifndef _ZIG0_AST_H__ +#define _ZIG0_AST_H__ #include #include diff --git a/build.zig b/build.zig index 6448bf5..edc63cf 100644 --- a/build.zig +++ b/build.zig @@ -9,7 +9,7 @@ const headers = &[_][]const u8{ const c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", - "zig1.c", + "zig0.c", "parser.c", }; @@ -109,6 +109,7 @@ pub fn build(b: *std.Build) !void { "--suppress=checkersReport", "--suppress=unusedFunction", // TODO remove after plumbing is done "--suppress=unusedStructMember", // TODO remove after plumbing is done + "--suppress=knownConditionTrueFalse", // TODO remove after plumbing is done }); for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile)); lint_step.dependOn(&cppcheck.step); diff --git a/common.h b/common.h index 4f410be..262cd80 100644 --- a/common.h +++ b/common.h @@ -1,6 +1,6 @@ // common.h -#ifndef _ZIG1_COMMON_H__ -#define _ZIG1_COMMON_H__ +#ifndef _ZIG0_COMMON_H__ +#define _ZIG0_COMMON_H__ #include #include diff --git a/main.c b/main.c index 9f3ea68..e15443c 100644 --- a/main.c +++ b/main.c @@ -2,8 +2,8 @@ #include #include -int zig1Run(char* program, char** msg); -int zig1RunFile(char* fname, char** msg); +int zig0Run(char* program, char** msg); +int zig0RunFile(char* fname, char** msg); static void usage(const char* argv0) { fprintf(stderr, "Usage: %s program.zig\n", argv0); @@ -16,7 +16,7 @@ int main(int argc, char** argv) { } char* msg; - switch (zig1RunFile(argv[1], &msg)) { + switch (zig0RunFile(argv[1], &msg)) { case 0: return 0; break; diff --git a/parser.c b/parser.c index 401ae63..dcb2a9b 100644 --- a/parser.c +++ b/parser.c @@ -20,8 +20,14 @@ typedef struct { } FieldState; typedef struct { - AstNodeIndex zero_or_one; - AstSubRange multi; + enum { + SMALL_SPAN_ZERO_OR_ONE, + SMALL_SPAN_MULTI + } tag; + union { + AstNodeIndex zero_or_one; + AstSubRange multi; + } payload; } SmallSpan; void parseRoot(Parser* p) { @@ -50,6 +56,16 @@ static void eatDocComments(Parser* p) { while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { } } +static void expectSemicolon(Parser* p) { + bool ok; + eatToken(p, TOKENIZER_TAG_SEMICOLON, &ok); + if (ok) + return; + + fprintf(stderr, "expected semicolon\n"); + exit(1); +} + static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { p->nodes.tags[i] = item.tag; p->nodes.main_tokens[i] = item.main_token; @@ -80,6 +96,13 @@ static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) { return nodes->len++; } +static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t count) { + const AstNodeIndex result = p->extra_data.len; + SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); + memcpy(&p->extra_data.arr, extra, count * sizeof(AstNodeIndex)); + return result; +} + static AstNodeIndex parseTypeExpr(Parser* p); static AstNodeIndex expectTypeExpr(Parser* p) { @@ -92,14 +115,43 @@ static AstNodeIndex expectTypeExpr(Parser* p) { static AstNodeIndex parseByteAlign(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok); - if (!ok) { + if (!ok) return null_node; - } fprintf(stderr, "parseByteAlign cannot parse alginment\n"); exit(1); return 0; // tcc } +static AstNodeIndex parseAddrSpace(Parser* p) { + bool ok; + eatToken(p, TOKENIZER_TAG_KEYWORD_ADDRSPACE, &ok); + if (!ok) + return null_node; + fprintf(stderr, "parseAddrSpace cannot parse addrspace\n"); + exit(1); + return 0; // tcc +} + +static AstNodeIndex parseLinkSection(Parser* p) { + bool ok; + eatToken(p, TOKENIZER_TAG_KEYWORD_LINKSECTION, &ok); + if (!ok) + return null_node; + fprintf(stderr, "parseLinkSection cannot parse linksection\n"); + exit(1); + return 0; // tcc +} + +static AstNodeIndex parseCallconv(Parser* p) { + bool ok; + eatToken(p, TOKENIZER_TAG_KEYWORD_CALLCONV, &ok); + if (!ok) + return null_node; + fprintf(stderr, "parseCallconv cannot parse callconv\n"); + exit(1); + return 0; // tcc +} + typedef struct { AstNodeIndex align_expr, value_expr; } NodeContainerField; @@ -112,13 +164,13 @@ static AstNodeIndex expectContainerField(Parser* p) { const AstNodeIndex type_expr = expectTypeExpr(p); const AstNodeIndex align_expr = parseByteAlign(p); - const AstNodeIndex value_expr = 0; bool ok; eatToken(p, TOKENIZER_TAG_EQUAL, &ok); if (ok) { fprintf(stderr, "expectContainerField does not support expr\n"); exit(1); } + const AstNodeIndex value_expr = 0; if (align_expr == 0) { return addNode( @@ -150,10 +202,8 @@ static AstNodeIndex expectContainerField(Parser* p) { .main_token = main_token, .data = { .lhs = type_expr, - .rhs = addExtra(p, (NodeContainerField) { - .align_expr = align_expr, - .value_expr = value_expr, - }) }, + .rhs = addExtra(p, (AstNodeIndex[]) { align_expr, value_expr }, 2), + }, }); } } @@ -307,7 +357,7 @@ static SmallSpan parseParamDeclList(Parser* p) { } return (SmallSpan) { - .zero_or_one = 0, + .tag = SMALL_SPAN_ZERO_OR_ONE, }; } @@ -328,21 +378,104 @@ static AstNodeIndex parseFnProto(Parser* p) { eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL); SmallSpan params = parseParamDeclList(p); - // const params = try p.parseParamDeclList(); - // const align_expr = try p.parseByteAlign(); - // const addrspace_expr = try p.parseAddrSpace(); - // const section_expr = try p.parseLinkSection(); - // const callconv_expr = try p.parseCallconv(); + const AstNodeIndex align_expr = parseByteAlign(p); + const AstNodeIndex addrspace_expr = parseAddrSpace(p); + const AstNodeIndex section_expr = parseLinkSection(p); + const AstNodeIndex callconv_expr = parseCallconv(p); eatToken(p, TOKENIZER_TAG_BANG, NULL); const AstNodeIndex return_type_expr = parseTypeExpr(p); - return 0; + + if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) { + if (params.tag == SMALL_SPAN_ZERO_OR_ONE) + return setNode( + p, + fn_proto_index, + (AstNodeItem) { + .tag = AST_NODE_TAG_FN_PROTO_SIMPLE, + .main_token = fn_token, + .data = { + .lhs = params.payload.zero_or_one, + .rhs = return_type_expr, + }, + }); + } + + fprintf(stderr, "parseFnProto does not support complex function decls\n"); + exit(1); + return 0; // tcc } +static AstTokenIndex parseBlockLabel(Parser* p) { + if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) { + const AstTokenIndex identifier = p->tok_i; + p->tok_i += 2; + return identifier; + } + return null_node; +} + +static AstNodeIndex parseForStatement(Parser* p) { + bool ok; + const AstNodeIndex for_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FOR, &ok); + if (!ok) + return null_node; + + (void)for_token; + fprintf(stderr, "parseForStatement cannot parse for statements\n"); + return 0; // tcc +} + +static AstNodeIndex parseWhileStatement(Parser* p) { + bool ok; + const AstNodeIndex while_token = eatToken(p, TOKENIZER_TAG_KEYWORD_WHILE, &ok); + if (!ok) + return null_node; + + (void)while_token; + fprintf(stderr, "parseWhileStatement cannot parse while statements\n"); + return 0; // tcc +} + +static AstNodeIndex parseLoopStatement(Parser* p) { + bool ok_inline_token; + eatToken(p, TOKENIZER_TAG_KEYWORD_INLINE, &ok_inline_token); + + const AstNodeIndex for_statement = parseForStatement(p); + if (for_statement != 0) + return for_statement; + + const AstNodeIndex while_statement = parseWhileStatement(p); + if (while_statement != 0) + return while_statement; + + if (!ok_inline_token) + return null_node; + + fprintf(stderr, "If we've seen 'inline', there should have been a 'for' or 'while'\n"); + exit(1); + return 0; // tcc +} + +static AstNodeIndex parseAssignExpr(Parser* p) { + (void)p; + fprintf(stderr, "parseAssignExpr not implemented\n"); + exit(1); + return 0; // tcc +} + +static AstNodeIndex expectVarDeclExprStatement(Parser* p) { + (void)p; + fprintf(stderr, "expectVarDeclExprStatement not implemented\n"); + exit(1); + return 0; // tcc +} + +static AstNodeIndex parseLabeledStatement(Parser*); static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { bool ok; if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) { - fprintf(stderr, "expectStatement: comptime keyword not yet supported\n"); + fprintf(stderr, "expectStatement: comptime keyword not supported\n"); exit(1); } @@ -357,12 +490,20 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { case TOKENIZER_TAG_KEYWORD_STRUCT: case TOKENIZER_TAG_KEYWORD_UNION:; const char* tok_str = tokenizerGetTagString(tok); - fprintf(stderr, "expectStatement does not yet support keyword %s\n", tok_str); + fprintf(stderr, "expectStatement does not support keyword %s\n", tok_str); exit(1); default:; } - // TODO continue - return 1; + + const AstNodeIndex labeled_statement = parseLabeledStatement(p); + if (labeled_statement != 0) + return labeled_statement; + + if (allow_defer_var) { + return expectVarDeclExprStatement(p); + } else { + return parseAssignExpr(p); + } } typedef struct { @@ -452,6 +593,24 @@ static AstNodeIndex parseBlock(Parser* p) { return 0; } +static AstNodeIndex parseLabeledStatement(Parser* p) { + const AstNodeIndex label_token = parseBlockLabel(p); + const AstNodeIndex block = parseBlock(p); + if (block != 0) + return block; + + const AstNodeIndex loop_stmt = parseLoopStatement(p); + if (loop_stmt != 0) + return loop_stmt; + + if (label_token != 0) { + fprintf(stderr, "parseLabeledStatement does not support labels\n"); + exit(1); + } + + return null_node; +} + static AstNodeIndex parseVarDeclProto(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok); @@ -476,23 +635,15 @@ static AstNodeIndex parseGlobalVarDecl(Parser* p) { } static AstNodeIndex expectTopLevelDecl(Parser* p) { - AstTokenIndex extern_export_inline_token = p->tok_i++; - bool is_extern = false; - bool expect_fn = false; - bool expect_var_or_fn = false; + AstTokenIndex extern_export_inline_token = nextToken(p); switch (p->token_tags[extern_export_inline_token]) { case TOKENIZER_TAG_KEYWORD_EXTERN: eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL); - is_extern = true; - expect_var_or_fn = true; break; case TOKENIZER_TAG_KEYWORD_EXPORT: - expect_var_or_fn = true; - break; case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_NOINLINE: - expect_fn = true; break; default: p->tok_i--; @@ -504,10 +655,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) { case TOKENIZER_TAG_SEMICOLON: p->tok_i++; return fn_proto; - case TOKENIZER_TAG_L_BRACE: - if (is_extern) - exit(1); - + case TOKENIZER_TAG_L_BRACE:; AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL); AstNodeIndex body_block = parseBlock(p); return setNode( @@ -523,10 +671,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) { } } - if (expect_fn) - exit(1); - - AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL); + eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL); AstNodeIndex var_decl = parseGlobalVarDecl(p); if (var_decl != 0) { return var_decl; @@ -538,9 +683,72 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) { return 0; // make tcc happy } +void findNextContainerMember(Parser* p) { + uint32_t level = 0; + + while (true) { + AstTokenIndex tok = nextToken(p); + + switch (p->token_tags[tok]) { + // Any of these can start a new top level declaration + case TOKENIZER_TAG_KEYWORD_TEST: + case TOKENIZER_TAG_KEYWORD_COMPTIME: + case TOKENIZER_TAG_KEYWORD_PUB: + case TOKENIZER_TAG_KEYWORD_EXPORT: + case TOKENIZER_TAG_KEYWORD_EXTERN: + case TOKENIZER_TAG_KEYWORD_INLINE: + case TOKENIZER_TAG_KEYWORD_NOINLINE: + case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE: + case TOKENIZER_TAG_KEYWORD_THREADLOCAL: + case TOKENIZER_TAG_KEYWORD_CONST: + case TOKENIZER_TAG_KEYWORD_VAR: + case TOKENIZER_TAG_KEYWORD_FN: + if (level == 0) { + p->tok_i--; + return; + } + break; + case TOKENIZER_TAG_IDENTIFIER: + if (p->token_tags[tok + 1] == TOKENIZER_TAG_COMMA && level == 0) { + p->tok_i--; + return; + } + break; + case TOKENIZER_TAG_COMMA: + case TOKENIZER_TAG_SEMICOLON: + // This decl was likely meant to end here + if (level == 0) + return; + break; + case TOKENIZER_TAG_L_PAREN: + case TOKENIZER_TAG_L_BRACKET: + case TOKENIZER_TAG_L_BRACE: + level++; + break; + case TOKENIZER_TAG_R_PAREN: + case TOKENIZER_TAG_R_BRACKET: + if (level != 0) + level--; + break; + case TOKENIZER_TAG_R_BRACE: + if (level == 0) { + // end of container, exit + p->tok_i--; + return; + } + level--; + break; + case TOKENIZER_TAG_EOF: + p->tok_i--; + return; + default: + break; + } + } +} + static Members parseContainerMembers(Parser* p) { const uint32_t scratch_top = p->scratch.len; - Members res = (Members) {}; // ast_token_index last_field; bool ok; while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok) @@ -597,8 +805,6 @@ static Members parseContainerMembers(Parser* p) { goto break_loop; default:; // skip parseCStyleContainer - - const AstNodeIndex identifier = p->tok_i; const AstNodeIndex container_field = expectContainerField(p); switch (field_state.tag) { case FIELD_STATE_NONE: @@ -628,8 +834,41 @@ static Members parseContainerMembers(Parser* p) { continue; } } -break_loop: +break_loop:; + + const uint32_t scratch_len = p->scratch.len; p->scratch.len = scratch_top; - return res; + + const uint32_t n_items = scratch_len - scratch_top; + switch (n_items) { + case 0: + return (Members) { + .len = 0, + .lhs = 0, + .rhs = 0, + .trailing = trailing, + }; + case 1: + return (Members) { + .len = 1, + .lhs = p->scratch.arr[scratch_top], + .rhs = 0, + .trailing = trailing, + }; + case 2: + return (Members) { + .len = 2, + .lhs = p->scratch.arr[scratch_top], + .rhs = p->scratch.arr[scratch_top + 1], + .trailing = trailing, + }; + default: + return (Members) { + .len = n_items, + .lhs = p->scratch.arr[scratch_top], + .rhs = p->scratch.arr[scratch_len], + .trailing = trailing, + }; + } } diff --git a/parser.h b/parser.h index beae5f8..ca6990f 100644 --- a/parser.h +++ b/parser.h @@ -1,6 +1,6 @@ // parser.h -#ifndef _ZIG1_PARSE_H__ -#define _ZIG1_PARSE_H__ +#ifndef _ZIG0_PARSE_H__ +#define _ZIG0_PARSE_H__ #include "ast.h" #include "common.h" diff --git a/test_all.zig b/test_all.zig index 2ca72aa..7be8d27 100644 --- a/test_all.zig +++ b/test_all.zig @@ -1,3 +1,3 @@ -test "zig1 test suite" { +test "zig0 test suite" { _ = @import("tokenizer_test.zig"); } diff --git a/tokenizer.h b/tokenizer.h index 9d86667..9cafb91 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -1,5 +1,5 @@ -#ifndef _ZIG1_TOKENIZER_H__ -#define _ZIG1_TOKENIZER_H__ +#ifndef _ZIG0_TOKENIZER_H__ +#define _ZIG0_TOKENIZER_H__ #include #include diff --git a/zig1.c b/zig1.c index cdde141..3e765e1 100644 --- a/zig1.c +++ b/zig1.c @@ -6,7 +6,7 @@ // - code = 0: program successfully terminated. // - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg. -int zig1Run(const char* program, char** msg) { +int zig0Run(const char* program, char** msg) { (void)program; (void)msg; return 0; @@ -14,7 +14,7 @@ int zig1Run(const char* program, char** msg) { // API: run and: // code = 3: abnormal error, expect something in stderr. -int zig1RunFile(const char* fname, char** msg) { +int zig0RunFile(const char* fname, char** msg) { FILE* f = fopen(fname, "r"); if (f == NULL) { perror("fopen"); @@ -51,7 +51,7 @@ int zig1RunFile(const char* fname, char** msg) { fclose(f); program[fsize] = 0; - int code = zig1Run(program, msg); + int code = zig0Run(program, msg); free(program); return code; }