more parser - zig - fork of https://codeberg.org/ziglang/zig

commit 85dfbe9d094a4477c7bfb38f08d99f3f0d4f04fd (tree)
parent 3264d1747ed17872d36638e3f94729a9eb14b8ef
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Tue, 31 Dec 2024 19:14:12 +0200

more parser

Diffstat:
M build.zig  | 27 +++++++++++++++++----------
M common.h  | 6 ++++--
M parser.c  | 227 ++++++++++++++++++++++++++++++++++++++++---------------------------------------
M tokenizer.h  | 2 +-

4 files changed, 136 insertions(+), 126 deletions(-)
diff --git a/build.zig b/build.zig
@@ -77,14 +77,15 @@ pub fn build(b: *std.Build) !void {
     test_exe.addIncludePath(b.path("."));
     test_step.dependOn(&b.addRunArtifact(test_exe).step);
 
-    const lint_step = b.step("lint", "Run linters");
-    const clang_format = b.addSystemCommand(&.{"clang-format"});
-    clang_format.addArgs(&.{ "-Werror", "-i" });
+    const fmt_step = b.step("fmt", "clang-format");
+    const clang_format = b.addSystemCommand(&.{ "clang-format", "-Werror", "-i" });
     for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f));
-    lint_step.dependOn(&clang_format.step);
+    fmt_step.dependOn(&clang_format.step);
+
+    const lint_step = b.step("lint", "Run linters");
 
-    const clang_analyze = b.addSystemCommand(&.{"clang"});
-    clang_analyze.addArgs(&.{
+    const clang_analyze = b.addSystemCommand(&.{
+        "clang",
         "--analyze",
         "--analyzer-output",
         "text",
@@ -94,13 +95,19 @@ pub fn build(b: *std.Build) !void {
     for (all_c_files) |cfile| clang_analyze.addFileArg(b.path(cfile));
     lint_step.dependOn(&clang_analyze.step);
 
-    const gcc_analyze = b.addSystemCommand(&.{"gcc"});
-    gcc_analyze.addArgs(&.{ "--analyzer", "-Werror", "-o", "/dev/null" });
+    const gcc_analyze = b.addSystemCommand(&.{
+        "gcc",
+        "--analyzer",
+        "-Wno-analyzer-malloc-leak", // TODO remove when wiring is complete and everything's free()d
+        "-Werror",
+        "-o",
+        "/dev/null",
+    });
     for (all_c_files) |cfile| gcc_analyze.addFileArg(b.path(cfile));
     lint_step.dependOn(&gcc_analyze.step);
 
-    const cppcheck = b.addSystemCommand(&.{"cppcheck"});
-    cppcheck.addArgs(&.{
+    const cppcheck = b.addSystemCommand(&.{
+        "cppcheck",
         "--quiet",
         "--error-exitcode=1",
         "--check-level=exhaustive",
diff --git a/common.h b/common.h
@@ -27,10 +27,12 @@
     }
 
 #define SLICE_RESIZE(Type, slice, new_cap) ({                  \
-    uint32_t cap = (new_cap);                                  \
+    const uint32_t cap = (new_cap);                            \
     Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \
-    if (!new_arr)                                              \
+    if (new_arr == NULL) {                                     \
+        free((slice)->arr);                                    \
         exit(1);                                               \
+    }                                                          \
     (slice)->arr = new_arr;                                    \
     (slice)->cap = cap;                                        \
 })
diff --git a/parser.c b/parser.c
@@ -7,12 +7,13 @@
 #include "parser.h"
 
 const AstNodeIndex null_node = 0;
+const AstTokenIndex null_token = ~(AstTokenIndex)(0);
 
 typedef struct {
     enum {
         FIELD_STATE_NONE,
         FIELD_STATE_SEEN,
-        FIELD_STATE_END // sets "end"
+        FIELD_STATE_END
     } tag;
     union {
         uint32_t end;
@@ -30,6 +31,22 @@ typedef struct {
     } payload;
 } SmallSpan;
 
+typedef struct {
+    AstNodeIndexSlice* scratch;
+    uint32_t old_len;
+} CleanupScratch;
+
+static CleanupScratch initCleanupScratch(Parser* p) {
+    return (CleanupScratch) {
+        .scratch = &p->scratch,
+        .old_len = p->scratch.len,
+    };
+}
+
+static void cleanupScratch(CleanupScratch* c) {
+    c->scratch->len = c->old_len;
+}
+
 static AstSubRange listToSpan(Parser* p, const AstNodeIndex* list, uint32_t count) {
     SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
     memcpy(&p->extra_data.arr, list, count * sizeof(AstNodeIndex));
@@ -60,31 +77,16 @@ static AstTokenIndex nextToken(Parser* p) {
     return p->tok_i++;
 }
 
-static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) {
+static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) {
     if (p->token_tags[p->tok_i] == tag) {
-        if (ok != NULL)
-            *ok = true;
         return nextToken(p);
     } else {
-        if (ok != NULL)
-            *ok = false;
-        return 0;
+        return null_token;
     }
 }
 
 static void eatDocComments(Parser* p) {
-    bool ok;
-    while (eatToken(p, TOKEN_DOC_COMMENT, &ok), ok) { }
-}
-
-static void expectSemicolon(Parser* p) {
-    bool ok;
-    eatToken(p, TOKEN_SEMICOLON, &ok);
-    if (ok)
-        return;
-
-    fprintf(stderr, "expected semicolon\n");
-    exit(1);
+    while (eatToken(p, TOKEN_DOC_COMMENT) == null_token) { }
 }
 
 static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
@@ -125,9 +127,7 @@ static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t coun
 }
 
 static AstNodeIndex parseByteAlign(Parser* p) {
-    bool ok;
-    eatToken(p, TOKEN_KEYWORD_ALIGN, &ok);
-    if (!ok)
+    if (eatToken(p, TOKEN_KEYWORD_ALIGN) != null_token)
         return null_node;
     fprintf(stderr, "parseByteAlign cannot parse alginment\n");
     exit(1);
@@ -135,9 +135,7 @@ static AstNodeIndex parseByteAlign(Parser* p) {
 }
 
 static AstNodeIndex parseAddrSpace(Parser* p) {
-    bool ok;
-    eatToken(p, TOKEN_KEYWORD_ADDRSPACE, &ok);
-    if (!ok)
+    if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) != null_token)
         return null_node;
     fprintf(stderr, "parseAddrSpace cannot parse addrspace\n");
     exit(1);
@@ -145,9 +143,7 @@ static AstNodeIndex parseAddrSpace(Parser* p) {
 }
 
 static AstNodeIndex parseLinkSection(Parser* p) {
-    bool ok;
-    eatToken(p, TOKEN_KEYWORD_LINKSECTION, &ok);
-    if (!ok)
+    if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) != null_token)
         return null_node;
     fprintf(stderr, "parseLinkSection cannot parse linksection\n");
     exit(1);
@@ -155,9 +151,7 @@ static AstNodeIndex parseLinkSection(Parser* p) {
 }
 
 static AstNodeIndex parseCallconv(Parser* p) {
-    bool ok;
-    eatToken(p, TOKEN_KEYWORD_CALLCONV, &ok);
-    if (!ok)
+    if (eatToken(p, TOKEN_KEYWORD_CALLCONV) != null_token)
         return null_node;
     fprintf(stderr, "parseCallconv cannot parse callconv\n");
     exit(1);
@@ -171,16 +165,14 @@ typedef struct {
 } NodeContainerField;
 
 static AstNodeIndex expectContainerField(Parser* p) {
-    eatToken(p, TOKEN_KEYWORD_COMPTIME, NULL);
+    eatToken(p, TOKEN_KEYWORD_COMPTIME);
     const AstTokenIndex main_token = p->tok_i;
     if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON)
         p->tok_i += 2;
 
     const AstNodeIndex type_expr = parseTypeExpr(p);
     const AstNodeIndex align_expr = parseByteAlign(p);
-    bool ok;
-    eatToken(p, TOKEN_EQUAL, &ok);
-    if (ok) {
+    if (eatToken(p, TOKEN_EQUAL) != null_token) {
         fprintf(stderr, "expectContainerField does not support expr\n");
         exit(1);
     }
@@ -268,7 +260,8 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
     }
 }
 
-static AstNodeIndex parseSuffixOp(Parser* p) {
+static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) {
+    (void)lhs;
     const TokenizerTag tok = p->token_tags[p->tok_i];
     switch (tok) {
     case TOKEN_L_BRACKET:
@@ -283,9 +276,7 @@ static AstNodeIndex parseSuffixOp(Parser* p) {
 }
 
 static AstNodeIndex parseSuffixExpr(Parser* p) {
-    bool ok;
-    eatToken(p, TOKEN_KEYWORD_ASYNC, &ok);
-    if (ok) {
+    if (eatToken(p, TOKEN_KEYWORD_ASYNC) != null_token) {
         fprintf(stderr, "async not supported\n");
         exit(1);
     }
@@ -295,43 +286,85 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
         return res;
 
     while (true) {
-        const AstNodeIndex suffix_op = parseSuffixOp(p);
+        const AstNodeIndex suffix_op = parseSuffixOp(p, res);
         if (suffix_op != 0) {
             res = suffix_op;
             continue;
         }
-        eatToken(p, TOKEN_L_PAREN, &ok);
-        if (ok) {
-            fprintf(stderr, "parseSuffixExpr does not support expr with parens\n");
+        const AstTokenIndex lparen = eatToken(p, TOKEN_L_PAREN);
+        if (lparen == null_token)
+            return res;
+
+        CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
+        while (true) {
+            if (eatToken(p, TOKEN_R_PAREN) != null_token)
+                break;
+            fprintf(stderr, "parseSuffixExpr can only parse ()\n");
             exit(1);
         }
-        // TODO more work
-        // const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA;
 
-        return res;
+        const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA;
+        const uint32_t params_len = p->scratch.len - scratch_top.old_len;
+        switch (params_len) {
+        case 0:
+            return addNode(
+                &p->nodes,
+                (AstNodeItem) {
+                    .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
+                    .main_token = lparen,
+                    .data = {
+                        .lhs = res,
+                        .rhs = 0,
+                    },
+                });
+        case 1:
+            return addNode(
+                &p->nodes,
+                (AstNodeItem) {
+                    .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
+                    .main_token = lparen,
+                    .data = {
+                        .lhs = res,
+                        .rhs = p->scratch.arr[scratch_top.old_len],
+                    },
+                });
+        default:;
+            const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], params_len);
+            return addNode(
+                &p->nodes,
+                (AstNodeItem) {
+                    .tag = comma ? AST_NODE_TAG_CALL_COMMA : AST_NODE_TAG_CALL,
+                    .main_token = lparen,
+                    .data = {
+                        .lhs = res,
+                        .rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end }, 2),
+                    },
+                });
+        }
     }
 }
 
-static AstNodeIndex expectToken(Parser* p, TokenizerTag tag, bool* ok) {
+static AstTokenIndex expectToken(Parser* p, TokenizerTag tag) {
     if (p->token_tags[p->tok_i] == tag) {
-        if (ok != NULL)
-            *ok = true;
         return nextToken(p);
     } else {
-        if (ok != NULL)
-            *ok = false;
-        return 0;
+        fprintf(stderr, "expected token %s, got %s\n",
+            tokenizerGetTagString(tag),
+            tokenizerGetTagString(p->token_tags[p->tok_i]));
+        exit(1);
     }
+    return 0; // tcc
 }
 
 static AstNodeIndex parseErrorUnionExpr(Parser* p) {
     const AstNodeIndex suffix_expr = parseSuffixExpr(p);
     if (suffix_expr == 0)
         return null_node;
-    bool ok;
-    const AstNodeIndex bang = eatToken(p, TOKEN_BANG, &ok);
-    if (!ok)
+
+    const AstNodeIndex bang = eatToken(p, TOKEN_BANG);
+    if (bang == null_token)
         return suffix_expr;
+
     return addNode(
         &p->nodes,
         (AstNodeItem) {
@@ -361,18 +394,8 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
 
 static SmallSpan parseParamDeclList(Parser* p) {
     // can only parse functions with no declarations
-    bool ok;
-    AstTokenIndex got_token = eatToken(p, TOKEN_L_PAREN, &ok);
-    if (!ok) {
-        fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token));
-        exit(1);
-    }
-
-    got_token = eatToken(p, TOKEN_R_PAREN, &ok);
-    if (!ok) {
-        fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token));
-        exit(1);
-    }
+    expectToken(p, TOKEN_L_PAREN);
+    expectToken(p, TOKEN_R_PAREN);
 
     return (SmallSpan) {
         .tag = SMALL_SPAN_ZERO_OR_ONE,
@@ -386,21 +409,20 @@ static uint32_t reserveNode(Parser* p, AstNodeTag tag) {
 }
 
 static AstNodeIndex parseFnProto(Parser* p) {
-    bool ok;
-    AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN, &ok);
-    if (!ok)
+    AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN);
+    if (fn_token == null_node)
         return null_node;
 
     AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
 
-    eatToken(p, TOKEN_IDENTIFIER, NULL);
+    eatToken(p, TOKEN_IDENTIFIER);
 
     SmallSpan params = parseParamDeclList(p);
     const AstNodeIndex align_expr = parseByteAlign(p);
     const AstNodeIndex addrspace_expr = parseAddrSpace(p);
     const AstNodeIndex section_expr = parseLinkSection(p);
     const AstNodeIndex callconv_expr = parseCallconv(p);
-    eatToken(p, TOKEN_BANG, NULL);
+    eatToken(p, TOKEN_BANG);
 
     const AstNodeIndex return_type_expr = parseTypeExpr(p);
 
@@ -434,9 +456,8 @@ static AstTokenIndex parseBlockLabel(Parser* p) {
 }
 
 static AstNodeIndex parseForStatement(Parser* p) {
-    bool ok;
-    const AstNodeIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR, &ok);
-    if (!ok)
+    const AstNodeIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR);
+    if (for_token == null_token)
         return null_node;
 
     (void)for_token;
@@ -445,9 +466,8 @@ static AstNodeIndex parseForStatement(Parser* p) {
 }
 
 static AstNodeIndex parseWhileStatement(Parser* p) {
-    bool ok;
-    const AstNodeIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE, &ok);
-    if (!ok)
+    const AstNodeIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE);
+    if (while_token == null_token)
         return null_node;
 
     (void)while_token;
@@ -456,8 +476,7 @@ static AstNodeIndex parseWhileStatement(Parser* p) {
 }
 
 static AstNodeIndex parseLoopStatement(Parser* p) {
-    bool ok_inline_token;
-    eatToken(p, TOKEN_KEYWORD_INLINE, &ok_inline_token);
+    const AstTokenIndex inline_token = eatToken(p, TOKEN_KEYWORD_INLINE);
 
     const AstNodeIndex for_statement = parseForStatement(p);
     if (for_statement != 0)
@@ -467,10 +486,10 @@ static AstNodeIndex parseLoopStatement(Parser* p) {
     if (while_statement != 0)
         return while_statement;
 
-    if (!ok_inline_token)
+    if (inline_token == null_token)
         return null_node;
 
-    fprintf(stderr, "If we've seen 'inline', there should have been a 'for' or 'while'\n");
+    fprintf(stderr, "seen 'inline', there should have been a 'for' or 'while'\n");
     exit(1);
     return 0; // tcc
 }
@@ -491,8 +510,7 @@ static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
 
 static AstNodeIndex parseLabeledStatement(Parser*);
 static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
-    bool ok;
-    if (eatToken(p, TOKEN_KEYWORD_COMPTIME, &ok), ok) {
+    if (eatToken(p, TOKEN_KEYWORD_COMPTIME) != null_token) {
         fprintf(stderr, "expectStatement: comptime keyword not supported\n");
         exit(1);
     }
@@ -524,22 +542,12 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
     }
 }
 
-typedef struct {
-    AstNodeIndexSlice* scratch;
-    uint32_t old_len;
-} CleanupScratch;
-static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; }
-
 static AstNodeIndex parseBlock(Parser* p) {
-    bool ok;
-    const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE, &ok);
-    if (!ok)
+    const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE);
+    if (lbrace == null_token)
         return null_node;
 
-    CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = {
-        .scratch = &p->scratch,
-        .old_len = p->scratch.len,
-    };
+    CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
 
     while (1) {
         if (p->token_tags[p->tok_i] == TOKEN_R_BRACE)
@@ -551,7 +559,7 @@ static AstNodeIndex parseBlock(Parser* p) {
             break;
         SLICE_APPEND(AstNodeIndex, &p->scratch, statement);
     }
-    expectToken(p, TOKEN_R_BRACE, NULL);
+    expectToken(p, TOKEN_R_BRACE);
     const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKEN_SEMICOLON);
 
     const uint32_t statements_len = p->scratch.len - scratch_top.old_len;
@@ -625,13 +633,9 @@ static AstNodeIndex parseLabeledStatement(Parser* p) {
 }
 
 static AstNodeIndex parseVarDeclProto(Parser* p) {
-    bool ok;
-    eatToken(p, TOKEN_KEYWORD_CONST, &ok);
-    if (!ok) {
-        eatToken(p, TOKEN_KEYWORD_VAR, &ok);
-        if (!ok)
-            return null_node;
-    }
+    if (eatToken(p, TOKEN_KEYWORD_CONST) == null_token || eatToken(p, TOKEN_KEYWORD_VAR) == null_token)
+        return null_node;
+
     fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n");
     exit(1);
     return 0; // tcc
@@ -652,7 +656,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
 
     switch (p->token_tags[extern_export_inline_token]) {
     case TOKEN_KEYWORD_EXTERN:
-        eatToken(p, TOKEN_STRING_LITERAL, NULL);
+        eatToken(p, TOKEN_STRING_LITERAL);
         break;
     case TOKEN_KEYWORD_EXPORT:
     case TOKEN_KEYWORD_INLINE:
@@ -684,7 +688,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
         }
     }
 
-    eatToken(p, TOKEN_KEYWORD_THREADLOCAL, NULL);
+    eatToken(p, TOKEN_KEYWORD_THREADLOCAL);
     AstNodeIndex var_decl = parseGlobalVarDecl(p);
     if (var_decl != 0) {
         return var_decl;
@@ -761,12 +765,9 @@ void findNextContainerMember(Parser* p) {
 }
 
 static Members parseContainerMembers(Parser* p) {
-    CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = {
-        .scratch = &p->scratch,
-        .old_len = p->scratch.len,
-    };
-    bool ok;
-    while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT, &ok), ok)
+    CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
+
+    while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token)
         ;
 
     FieldState field_state = { .tag = FIELD_STATE_NONE };
diff --git a/tokenizer.h b/tokenizer.h
@@ -4,7 +4,7 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-#define TOKENIZER_FOREACH_TAG_ENUM(TAG)                            \
+#define TOKENIZER_FOREACH_TAG_ENUM(TAG)                    \
     TAG(TOKEN_INVALID)                                     \
     TAG(TOKEN_INVALID_PERIODASTERISKS)                     \
     TAG(TOKEN_IDENTIFIER)                                  \

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	build.zig	\|	27	+++++++++++++++++----------
M	common.h	\|	6	++++--
M	parser.c	\|	227	++++++++++++++++++++++++++++++++++++++++---------------------------------------
M	tokenizer.h	\|	2	+-