From 6ae7d7320d87af37484af685de26e77230d299c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 25 Dec 2024 23:44:33 +0200 Subject: [PATCH] =?UTF-8?q?adding=20more=20parser=20=E2=80=94=20starts=20b?= =?UTF-8?q?reaking=20the=20build?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ast.c | 9 +- ast.h | 3 +- build.zig | 3 +- parser.c | 283 +++++++++++++++++++++++++++++++++++++++++++++++++++- tokenizer.h | 4 +- 5 files changed, 292 insertions(+), 10 deletions(-) diff --git a/ast.c b/ast.c index 719088a..17dfe30 100644 --- a/ast.c +++ b/ast.c @@ -25,14 +25,17 @@ void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) list->cap = new_cap; } -void astNodeListAppend(AstNodeList* list, AstNodeTag tag, - AstTokenIndex main_token, AstData data) +AstNodeIndex astNodeListAppend( + AstNodeList* list, + AstNodeTag tag, + AstTokenIndex main_token, + AstData data) { astNodeListEnsureCapacity(list, 1); list->tags[list->len] = tag; list->main_tokens[list->len] = main_token; list->datas[list->len] = data; - list->len++; + return list->len++; } Ast astParse(const char* source, const uint32_t len) diff --git a/ast.h b/ast.h index 7a98e2f..fba2052 100644 --- a/ast.h +++ b/ast.h @@ -600,8 +600,7 @@ Ast astParse(const char* source, uint32_t len); void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional); void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional); -void astNodeListAppend(AstNodeList* list, AstNodeTag tag, - AstTokenIndex main_token, AstData data); +AstNodeIndex astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstTokenIndex main_token, AstData data); void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start); #endif diff --git a/build.zig b/build.zig index 3c4d82b..ddbc0a8 100644 --- a/build.zig +++ b/build.zig @@ -27,7 +27,6 @@ const cflags = &[_][]const u8{ "-Wformat=2", "-fno-common", "-Wconversion", - "-Wswitch-enum", "-Wuninitialized", "-Wdouble-promotion", "-fstack-protector-all", @@ -80,7 +79,7 @@ pub fn build(b: *std.Build) !void { const lint_step = b.step("lint", "Run linters"); const clang_format = b.addSystemCommand(&.{"clang-format"}); - clang_format.addArgs(&.{ "--style=webkit", "-i" }); + clang_format.addArgs(&.{ "--style=webkit", "--verbose", "-Werror", "-i" }); for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f)); lint_step.dependOn(&clang_format.step); diff --git a/parser.c b/parser.c index 5649d16..7ba9bb2 100644 --- a/parser.c +++ b/parser.c @@ -1,8 +1,11 @@ #include #include +#include "ast.h" #include "parser.h" +const AstNodeIndex null_node = 0; + typedef struct { enum { FIELD_STATE_NONE, @@ -14,6 +17,11 @@ typedef struct { } payload; } FieldState; +typedef struct { + AstNodeIndex zero_or_one; + AstSubRange multi; +} SmallSpan; + void parseRoot(Parser* p) { p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; @@ -27,14 +35,285 @@ static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) { if (p->token_tags[p->tok_i] == tag) { - *ok = true; + if (ok != NULL) + *ok = true; return nextToken(p); } else { - *ok = false; + if (ok != NULL) + *ok = false; return (AstTokenIndex) {}; } } +static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data) +{ + p->nodes.tags[i] = tag; + p->nodes.main_tokens[i] = main_token; + p->nodes.datas[i] = data; + return i; +} + +static AstNodeIndex parseTypeExpr(Parser* p); + +static AstNodeIndex expectTypeExpr(Parser* p) +{ + const AstNodeIndex node = parseTypeExpr(p); + if (node == 0) + exit(1); + return node; +} + +static AstNodeIndex parsePrimaryTypeExpr(Parser* p) +{ + const TokenizerTag tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKENIZER_TAG_CHAR_LITERAL: + case TOKENIZER_TAG_NUMBER_LITERAL: + case TOKENIZER_TAG_KEYWORD_UNREACHABLE: + case TOKENIZER_TAG_KEYWORD_ANYFRAME: + case TOKENIZER_TAG_STRING_LITERAL: + case TOKENIZER_TAG_BUILTIN: + case TOKENIZER_TAG_KEYWORD_FN: + case TOKENIZER_TAG_KEYWORD_IF: + case TOKENIZER_TAG_KEYWORD_SWITCH: + case TOKENIZER_TAG_KEYWORD_EXTERN: + case TOKENIZER_TAG_KEYWORD_PACKED: + case TOKENIZER_TAG_KEYWORD_STRUCT: + case TOKENIZER_TAG_KEYWORD_OPAQUE: + case TOKENIZER_TAG_KEYWORD_ENUM: + case TOKENIZER_TAG_KEYWORD_UNION: + case TOKENIZER_TAG_KEYWORD_COMPTIME: + case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: + fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); + exit(1); + break; + case TOKENIZER_TAG_IDENTIFIER: + if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) { + fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n"); + exit(1); + } + return astNodeListAppend( + &p->nodes, + AST_NODE_TAG_IDENTIFIER, + nextToken(p), + (AstData) {}); + break; + case TOKENIZER_TAG_KEYWORD_INLINE: + case TOKENIZER_TAG_KEYWORD_FOR: + case TOKENIZER_TAG_KEYWORD_WHILE: + case TOKENIZER_TAG_PERIOD: + case TOKENIZER_TAG_KEYWORD_ERROR: + case TOKENIZER_TAG_L_PAREN: + fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); + exit(1); + break; + default: + return null_node; + } +} + +static AstNodeIndex parseSuffixOp(Parser *p) { + const TokenizerTag tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKENIZER_TAG_L_BRACKET: + case TOKENIZER_TAG_PERIOD_ASTERISK: + case TOKENIZER_TAG_INVALID_PERIODASTERISKS: + case TOKENIZER_TAG_PERIOD: + fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); + exit(1); + break; + default: + return null_node; + } +} + +static AstNodeIndex parseSuffixExpr(Parser* p) +{ + bool ok; + eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok); + if (ok) { + fprintf(stderr, "async not supported\n"); + exit(1); + } + + AstNodeIndex res = parsePrimaryTypeExpr(p); + if (res == 0) + return res; + + while(true) { + const AstNodeIndex suffix_op = parseSuffixOp(p); + if (suffix_op != 0) { + res = suffix_op; + continue; + } + eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); + if (ok) { + fprintf(stderr, "parseSuffixExpr does not support expr with parens\n"); + exit(1); + } + return res; + } +} + +static AstNodeIndex parseErrorUnionExpr(Parser* p) +{ + const AstNodeIndex suffix_expr = parseSuffixExpr(p); + if (suffix_expr == 0) + return null_node; + bool ok; + const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok); + if (!ok) + return suffix_expr; + return astNodeListAppend( + &p->nodes, + AST_NODE_TAG_ERROR_UNION, + bang, + (AstData) { + .lhs = suffix_expr, + .rhs = expectTypeExpr(p), + }); +} + +static AstNodeIndex parseTypeExpr(Parser* p) +{ + const AstNodeIndex tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKENIZER_TAG_QUESTION_MARK: + case TOKENIZER_TAG_KEYWORD_ANYFRAME: + case TOKENIZER_TAG_ASTERISK: + case TOKENIZER_TAG_ASTERISK_ASTERISK: + case TOKENIZER_TAG_L_BRACKET: + fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); + exit(1); + break; + default: + return parseErrorUnionExpr(p); + } +} + +static SmallSpan parseParamDeclList(Parser* p) +{ + // can only parse functions with no declarations + bool ok; + AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); + if (!ok) { + fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token)); + exit(1); + } + + got_token = eatToken(p, TOKENIZER_TAG_R_PAREN, &ok); + if (!ok) { + fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token)); + exit(1); + } + + return (SmallSpan) { + .zero_or_one = 0, + }; +} + +static uint32_t reserveNode(Parser* p, AstNodeTag tag) +{ + astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1); + p->nodes.tags[p->nodes.len - 1] = tag; + return p->nodes.len - 1; +} + +static AstNodeIndex parseFnProto(Parser* p) +{ + bool ok; + AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok); + if (!ok) + return null_node; + + AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO); + + eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL); + + SmallSpan params = parseParamDeclList(p); + // const params = try p.parseParamDeclList(); + // const align_expr = try p.parseByteAlign(); + // const addrspace_expr = try p.parseAddrSpace(); + // const section_expr = try p.parseLinkSection(); + // const callconv_expr = try p.parseCallconv(); + eatToken(p, TOKENIZER_TAG_BANG, NULL); + + const AstNodeIndex return_type_expr = parseTypeExpr(p); +} + +static AstNodeIndex parseBlock(Parser *p) { + bool ok; + const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok); + if (!ok) + return null_node; + + const uint32_t scratch_top = p->scratch.len; + +cleanup: +} + +static AstNodeIndex expectTopLevelDecl(Parser* p) +{ + AstTokenIndex extern_export_inline_token = p->tok_i++; + bool is_extern = false; + bool expect_fn = false; + bool expect_var_or_fn = false; + + switch (p->token_tags[extern_export_inline_token]) { + case TOKENIZER_TAG_KEYWORD_EXTERN: + eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL); + is_extern = true; + expect_var_or_fn = true; + break; + case TOKENIZER_TAG_KEYWORD_EXPORT: + expect_var_or_fn = true; + break; + case TOKENIZER_TAG_KEYWORD_INLINE: + case TOKENIZER_TAG_KEYWORD_NOINLINE: + expect_fn = true; + break; + default: + p->tok_i--; + } + + AstNodeIndex fn_proto = parseFnProto(p); + if (fn_proto != 0) { + switch (p->token_tags[p->tok_i]) { + case TOKENIZER_TAG_SEMICOLON: + p->tok_i++; + return fn_proto; + break; + case TOKENIZER_TAG_L_BRACE: + if (is_extern) + exit(1); + + AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL); + AstNodeIndex body_block = parseBlock(p); + return setNode( + p, + fn_decl_index, + AST_NODE_TAG_FN_DECL, + p->nodes.main_tokens[fn_proto], + (AstData) { .lhs = fn_proto, .rhs = body_block }); + default: + exit(1); // Expected semicolon or left brace + } + } + + if (expect_fn) + exit(1); + + AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL); + AstNodeIndex var_decl = parseGlobalVarDecl(p); + if (var_decl != 0) { + return var_decl; + } + + // assuming the program is correct... + fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); + exit(1); +} + static Members parseContainerMembers(Parser* p) { const uint32_t scratch_top = p->scratch.len; diff --git a/tokenizer.h b/tokenizer.h index dea9665..9d86667 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -129,7 +129,9 @@ TAG(TOKENIZER_TAG_KEYWORD_WHILE) #define TOKENIZER_GENERATE_ENUM(ENUM) ENUM, -#define TOKENIZER_GENERATE_CASE(ENUM) case ENUM: return #ENUM; +#define TOKENIZER_GENERATE_CASE(ENUM) \ + case ENUM: \ + return #ENUM; // First define the enum typedef enum {