commit 85dfbe9d094a4477c7bfb38f08d99f3f0d4f04fd (tree)
parent 3264d1747ed17872d36638e3f94729a9eb14b8ef
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Tue, 31 Dec 2024 19:14:12 +0200
more parser
Diffstat:
4 files changed, 136 insertions(+), 126 deletions(-)
diff --git a/build.zig b/build.zig
@@ -77,14 +77,15 @@ pub fn build(b: *std.Build) !void {
test_exe.addIncludePath(b.path("."));
test_step.dependOn(&b.addRunArtifact(test_exe).step);
- const lint_step = b.step("lint", "Run linters");
- const clang_format = b.addSystemCommand(&.{"clang-format"});
- clang_format.addArgs(&.{ "-Werror", "-i" });
+ const fmt_step = b.step("fmt", "clang-format");
+ const clang_format = b.addSystemCommand(&.{ "clang-format", "-Werror", "-i" });
for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f));
- lint_step.dependOn(&clang_format.step);
+ fmt_step.dependOn(&clang_format.step);
+
+ const lint_step = b.step("lint", "Run linters");
- const clang_analyze = b.addSystemCommand(&.{"clang"});
- clang_analyze.addArgs(&.{
+ const clang_analyze = b.addSystemCommand(&.{
+ "clang",
"--analyze",
"--analyzer-output",
"text",
@@ -94,13 +95,19 @@ pub fn build(b: *std.Build) !void {
for (all_c_files) |cfile| clang_analyze.addFileArg(b.path(cfile));
lint_step.dependOn(&clang_analyze.step);
- const gcc_analyze = b.addSystemCommand(&.{"gcc"});
- gcc_analyze.addArgs(&.{ "--analyzer", "-Werror", "-o", "/dev/null" });
+ const gcc_analyze = b.addSystemCommand(&.{
+ "gcc",
+ "--analyzer",
+ "-Wno-analyzer-malloc-leak", // TODO remove when wiring is complete and everything's free()d
+ "-Werror",
+ "-o",
+ "/dev/null",
+ });
for (all_c_files) |cfile| gcc_analyze.addFileArg(b.path(cfile));
lint_step.dependOn(&gcc_analyze.step);
- const cppcheck = b.addSystemCommand(&.{"cppcheck"});
- cppcheck.addArgs(&.{
+ const cppcheck = b.addSystemCommand(&.{
+ "cppcheck",
"--quiet",
"--error-exitcode=1",
"--check-level=exhaustive",
diff --git a/common.h b/common.h
@@ -27,10 +27,12 @@
}
#define SLICE_RESIZE(Type, slice, new_cap) ({ \
- uint32_t cap = (new_cap); \
+ const uint32_t cap = (new_cap); \
Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \
- if (!new_arr) \
+ if (new_arr == NULL) { \
+ free((slice)->arr); \
exit(1); \
+ } \
(slice)->arr = new_arr; \
(slice)->cap = cap; \
})
diff --git a/parser.c b/parser.c
@@ -7,12 +7,13 @@
#include "parser.h"
const AstNodeIndex null_node = 0;
+const AstTokenIndex null_token = ~(AstTokenIndex)(0);
typedef struct {
enum {
FIELD_STATE_NONE,
FIELD_STATE_SEEN,
- FIELD_STATE_END // sets "end"
+ FIELD_STATE_END
} tag;
union {
uint32_t end;
@@ -30,6 +31,22 @@ typedef struct {
} payload;
} SmallSpan;
+typedef struct {
+ AstNodeIndexSlice* scratch;
+ uint32_t old_len;
+} CleanupScratch;
+
+static CleanupScratch initCleanupScratch(Parser* p) {
+ return (CleanupScratch) {
+ .scratch = &p->scratch,
+ .old_len = p->scratch.len,
+ };
+}
+
+static void cleanupScratch(CleanupScratch* c) {
+ c->scratch->len = c->old_len;
+}
+
static AstSubRange listToSpan(Parser* p, const AstNodeIndex* list, uint32_t count) {
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(&p->extra_data.arr, list, count * sizeof(AstNodeIndex));
@@ -60,31 +77,16 @@ static AstTokenIndex nextToken(Parser* p) {
return p->tok_i++;
}
-static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) {
+static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) {
if (p->token_tags[p->tok_i] == tag) {
- if (ok != NULL)
- *ok = true;
return nextToken(p);
} else {
- if (ok != NULL)
- *ok = false;
- return 0;
+ return null_token;
}
}
static void eatDocComments(Parser* p) {
- bool ok;
- while (eatToken(p, TOKEN_DOC_COMMENT, &ok), ok) { }
-}
-
-static void expectSemicolon(Parser* p) {
- bool ok;
- eatToken(p, TOKEN_SEMICOLON, &ok);
- if (ok)
- return;
-
- fprintf(stderr, "expected semicolon\n");
- exit(1);
+ while (eatToken(p, TOKEN_DOC_COMMENT) == null_token) { }
}
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
@@ -125,9 +127,7 @@ static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t coun
}
static AstNodeIndex parseByteAlign(Parser* p) {
- bool ok;
- eatToken(p, TOKEN_KEYWORD_ALIGN, &ok);
- if (!ok)
+ if (eatToken(p, TOKEN_KEYWORD_ALIGN) != null_token)
return null_node;
fprintf(stderr, "parseByteAlign cannot parse alginment\n");
exit(1);
@@ -135,9 +135,7 @@ static AstNodeIndex parseByteAlign(Parser* p) {
}
static AstNodeIndex parseAddrSpace(Parser* p) {
- bool ok;
- eatToken(p, TOKEN_KEYWORD_ADDRSPACE, &ok);
- if (!ok)
+ if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) != null_token)
return null_node;
fprintf(stderr, "parseAddrSpace cannot parse addrspace\n");
exit(1);
@@ -145,9 +143,7 @@ static AstNodeIndex parseAddrSpace(Parser* p) {
}
static AstNodeIndex parseLinkSection(Parser* p) {
- bool ok;
- eatToken(p, TOKEN_KEYWORD_LINKSECTION, &ok);
- if (!ok)
+ if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) != null_token)
return null_node;
fprintf(stderr, "parseLinkSection cannot parse linksection\n");
exit(1);
@@ -155,9 +151,7 @@ static AstNodeIndex parseLinkSection(Parser* p) {
}
static AstNodeIndex parseCallconv(Parser* p) {
- bool ok;
- eatToken(p, TOKEN_KEYWORD_CALLCONV, &ok);
- if (!ok)
+ if (eatToken(p, TOKEN_KEYWORD_CALLCONV) != null_token)
return null_node;
fprintf(stderr, "parseCallconv cannot parse callconv\n");
exit(1);
@@ -171,16 +165,14 @@ typedef struct {
} NodeContainerField;
static AstNodeIndex expectContainerField(Parser* p) {
- eatToken(p, TOKEN_KEYWORD_COMPTIME, NULL);
+ eatToken(p, TOKEN_KEYWORD_COMPTIME);
const AstTokenIndex main_token = p->tok_i;
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON)
p->tok_i += 2;
const AstNodeIndex type_expr = parseTypeExpr(p);
const AstNodeIndex align_expr = parseByteAlign(p);
- bool ok;
- eatToken(p, TOKEN_EQUAL, &ok);
- if (ok) {
+ if (eatToken(p, TOKEN_EQUAL) != null_token) {
fprintf(stderr, "expectContainerField does not support expr\n");
exit(1);
}
@@ -268,7 +260,8 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
}
}
-static AstNodeIndex parseSuffixOp(Parser* p) {
+static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) {
+ (void)lhs;
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKEN_L_BRACKET:
@@ -283,9 +276,7 @@ static AstNodeIndex parseSuffixOp(Parser* p) {
}
static AstNodeIndex parseSuffixExpr(Parser* p) {
- bool ok;
- eatToken(p, TOKEN_KEYWORD_ASYNC, &ok);
- if (ok) {
+ if (eatToken(p, TOKEN_KEYWORD_ASYNC) != null_token) {
fprintf(stderr, "async not supported\n");
exit(1);
}
@@ -295,43 +286,85 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
return res;
while (true) {
- const AstNodeIndex suffix_op = parseSuffixOp(p);
+ const AstNodeIndex suffix_op = parseSuffixOp(p, res);
if (suffix_op != 0) {
res = suffix_op;
continue;
}
- eatToken(p, TOKEN_L_PAREN, &ok);
- if (ok) {
- fprintf(stderr, "parseSuffixExpr does not support expr with parens\n");
+ const AstTokenIndex lparen = eatToken(p, TOKEN_L_PAREN);
+ if (lparen == null_token)
+ return res;
+
+ CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
+ while (true) {
+ if (eatToken(p, TOKEN_R_PAREN) != null_token)
+ break;
+ fprintf(stderr, "parseSuffixExpr can only parse ()\n");
exit(1);
}
- // TODO more work
- // const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA;
- return res;
+ const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA;
+ const uint32_t params_len = p->scratch.len - scratch_top.old_len;
+ switch (params_len) {
+ case 0:
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
+ .main_token = lparen,
+ .data = {
+ .lhs = res,
+ .rhs = 0,
+ },
+ });
+ case 1:
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
+ .main_token = lparen,
+ .data = {
+ .lhs = res,
+ .rhs = p->scratch.arr[scratch_top.old_len],
+ },
+ });
+ default:;
+ const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], params_len);
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = comma ? AST_NODE_TAG_CALL_COMMA : AST_NODE_TAG_CALL,
+ .main_token = lparen,
+ .data = {
+ .lhs = res,
+ .rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end }, 2),
+ },
+ });
+ }
}
}
-static AstNodeIndex expectToken(Parser* p, TokenizerTag tag, bool* ok) {
+static AstTokenIndex expectToken(Parser* p, TokenizerTag tag) {
if (p->token_tags[p->tok_i] == tag) {
- if (ok != NULL)
- *ok = true;
return nextToken(p);
} else {
- if (ok != NULL)
- *ok = false;
- return 0;
+ fprintf(stderr, "expected token %s, got %s\n",
+ tokenizerGetTagString(tag),
+ tokenizerGetTagString(p->token_tags[p->tok_i]));
+ exit(1);
}
+ return 0; // tcc
}
static AstNodeIndex parseErrorUnionExpr(Parser* p) {
const AstNodeIndex suffix_expr = parseSuffixExpr(p);
if (suffix_expr == 0)
return null_node;
- bool ok;
- const AstNodeIndex bang = eatToken(p, TOKEN_BANG, &ok);
- if (!ok)
+
+ const AstNodeIndex bang = eatToken(p, TOKEN_BANG);
+ if (bang == null_token)
return suffix_expr;
+
return addNode(
&p->nodes,
(AstNodeItem) {
@@ -361,18 +394,8 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
static SmallSpan parseParamDeclList(Parser* p) {
// can only parse functions with no declarations
- bool ok;
- AstTokenIndex got_token = eatToken(p, TOKEN_L_PAREN, &ok);
- if (!ok) {
- fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token));
- exit(1);
- }
-
- got_token = eatToken(p, TOKEN_R_PAREN, &ok);
- if (!ok) {
- fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token));
- exit(1);
- }
+ expectToken(p, TOKEN_L_PAREN);
+ expectToken(p, TOKEN_R_PAREN);
return (SmallSpan) {
.tag = SMALL_SPAN_ZERO_OR_ONE,
@@ -386,21 +409,20 @@ static uint32_t reserveNode(Parser* p, AstNodeTag tag) {
}
static AstNodeIndex parseFnProto(Parser* p) {
- bool ok;
- AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN, &ok);
- if (!ok)
+ AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN);
+ if (fn_token == null_node)
return null_node;
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
- eatToken(p, TOKEN_IDENTIFIER, NULL);
+ eatToken(p, TOKEN_IDENTIFIER);
SmallSpan params = parseParamDeclList(p);
const AstNodeIndex align_expr = parseByteAlign(p);
const AstNodeIndex addrspace_expr = parseAddrSpace(p);
const AstNodeIndex section_expr = parseLinkSection(p);
const AstNodeIndex callconv_expr = parseCallconv(p);
- eatToken(p, TOKEN_BANG, NULL);
+ eatToken(p, TOKEN_BANG);
const AstNodeIndex return_type_expr = parseTypeExpr(p);
@@ -434,9 +456,8 @@ static AstTokenIndex parseBlockLabel(Parser* p) {
}
static AstNodeIndex parseForStatement(Parser* p) {
- bool ok;
- const AstNodeIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR, &ok);
- if (!ok)
+ const AstNodeIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR);
+ if (for_token == null_token)
return null_node;
(void)for_token;
@@ -445,9 +466,8 @@ static AstNodeIndex parseForStatement(Parser* p) {
}
static AstNodeIndex parseWhileStatement(Parser* p) {
- bool ok;
- const AstNodeIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE, &ok);
- if (!ok)
+ const AstNodeIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE);
+ if (while_token == null_token)
return null_node;
(void)while_token;
@@ -456,8 +476,7 @@ static AstNodeIndex parseWhileStatement(Parser* p) {
}
static AstNodeIndex parseLoopStatement(Parser* p) {
- bool ok_inline_token;
- eatToken(p, TOKEN_KEYWORD_INLINE, &ok_inline_token);
+ const AstTokenIndex inline_token = eatToken(p, TOKEN_KEYWORD_INLINE);
const AstNodeIndex for_statement = parseForStatement(p);
if (for_statement != 0)
@@ -467,10 +486,10 @@ static AstNodeIndex parseLoopStatement(Parser* p) {
if (while_statement != 0)
return while_statement;
- if (!ok_inline_token)
+ if (inline_token == null_token)
return null_node;
- fprintf(stderr, "If we've seen 'inline', there should have been a 'for' or 'while'\n");
+ fprintf(stderr, "seen 'inline', there should have been a 'for' or 'while'\n");
exit(1);
return 0; // tcc
}
@@ -491,8 +510,7 @@ static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
static AstNodeIndex parseLabeledStatement(Parser*);
static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
- bool ok;
- if (eatToken(p, TOKEN_KEYWORD_COMPTIME, &ok), ok) {
+ if (eatToken(p, TOKEN_KEYWORD_COMPTIME) != null_token) {
fprintf(stderr, "expectStatement: comptime keyword not supported\n");
exit(1);
}
@@ -524,22 +542,12 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
}
}
-typedef struct {
- AstNodeIndexSlice* scratch;
- uint32_t old_len;
-} CleanupScratch;
-static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; }
-
static AstNodeIndex parseBlock(Parser* p) {
- bool ok;
- const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE, &ok);
- if (!ok)
+ const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE);
+ if (lbrace == null_token)
return null_node;
- CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = {
- .scratch = &p->scratch,
- .old_len = p->scratch.len,
- };
+ CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
while (1) {
if (p->token_tags[p->tok_i] == TOKEN_R_BRACE)
@@ -551,7 +559,7 @@ static AstNodeIndex parseBlock(Parser* p) {
break;
SLICE_APPEND(AstNodeIndex, &p->scratch, statement);
}
- expectToken(p, TOKEN_R_BRACE, NULL);
+ expectToken(p, TOKEN_R_BRACE);
const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKEN_SEMICOLON);
const uint32_t statements_len = p->scratch.len - scratch_top.old_len;
@@ -625,13 +633,9 @@ static AstNodeIndex parseLabeledStatement(Parser* p) {
}
static AstNodeIndex parseVarDeclProto(Parser* p) {
- bool ok;
- eatToken(p, TOKEN_KEYWORD_CONST, &ok);
- if (!ok) {
- eatToken(p, TOKEN_KEYWORD_VAR, &ok);
- if (!ok)
- return null_node;
- }
+ if (eatToken(p, TOKEN_KEYWORD_CONST) == null_token || eatToken(p, TOKEN_KEYWORD_VAR) == null_token)
+ return null_node;
+
fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n");
exit(1);
return 0; // tcc
@@ -652,7 +656,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
switch (p->token_tags[extern_export_inline_token]) {
case TOKEN_KEYWORD_EXTERN:
- eatToken(p, TOKEN_STRING_LITERAL, NULL);
+ eatToken(p, TOKEN_STRING_LITERAL);
break;
case TOKEN_KEYWORD_EXPORT:
case TOKEN_KEYWORD_INLINE:
@@ -684,7 +688,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
}
}
- eatToken(p, TOKEN_KEYWORD_THREADLOCAL, NULL);
+ eatToken(p, TOKEN_KEYWORD_THREADLOCAL);
AstNodeIndex var_decl = parseGlobalVarDecl(p);
if (var_decl != 0) {
return var_decl;
@@ -761,12 +765,9 @@ void findNextContainerMember(Parser* p) {
}
static Members parseContainerMembers(Parser* p) {
- CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = {
- .scratch = &p->scratch,
- .old_len = p->scratch.len,
- };
- bool ok;
- while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT, &ok), ok)
+ CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
+
+ while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token)
;
FieldState field_state = { .tag = FIELD_STATE_NONE };
diff --git a/tokenizer.h b/tokenizer.h
@@ -4,7 +4,7 @@
#include <stdbool.h>
#include <stdint.h>
-#define TOKENIZER_FOREACH_TAG_ENUM(TAG) \
+#define TOKENIZER_FOREACH_TAG_ENUM(TAG) \
TAG(TOKEN_INVALID) \
TAG(TOKEN_INVALID_PERIODASTERISKS) \
TAG(TOKEN_IDENTIFIER) \