From 2a56ea9be26bb3bbadc9850b8abfb45f2917153c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Tue, 7 Jan 2025 22:22:05 +0100 Subject: [PATCH] No types, no fucking types. Just bugs --- ast.c | 52 ++++++++++++++++++++++------------------------ ast.h | 4 +--- parser.c | 28 +++++++++++++++---------- parser_test.zig | 18 +++++++++++----- t/hello.zig | 3 --- tokenizer_test.zig | 12 +++++++++++ 6 files changed, 68 insertions(+), 49 deletions(-) delete mode 100644 t/hello.zig diff --git a/ast.c b/ast.c index 53176ff..43115bf 100644 --- a/ast.c +++ b/ast.c @@ -9,10 +9,23 @@ #define N 1024 +static void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional) { + const uint32_t new_len = list->len + additional; + if (new_len <= list->cap) { + return; + } + + const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; + list->tags = realloc(list->tags, new_cap * sizeof(TokenizerTag)); + list->starts = realloc(list->starts, new_cap * sizeof(AstIndex)); + if (!list->tags || !list->starts) + exit(1); + list->cap = new_cap; +} + Ast astParse(const char* source, const uint32_t len) { uint32_t estimated_token_count = len / 8; - // Initialize token list AstTokenList tokens = { .len = 0, .cap = estimated_token_count, @@ -20,31 +33,18 @@ Ast astParse(const char* source, const uint32_t len) { .starts = ARR_INIT(AstIndex, estimated_token_count) }; - // Tokenize Tokenizer tok = tokenizerInit(source, len); while (true) { - if (tokens.len >= tokens.cap) { - fprintf(stderr, "too many tokens, bump estimated_token_count\n"); - exit(1); - } + astTokenListEnsureCapacity(&tokens, 1); TokenizerToken token = tokenizerNext(&tok); - tokens.tags[++tokens.len] = token.tag; - tokens.starts[tokens.len] = token.loc.start; + tokens.tags[tokens.len] = token.tag; + tokens.starts[tokens.len++] = token.loc.start; if (token.tag == TOKEN_EOF) break; } - // Initialize node list uint32_t estimated_node_count = (tokens.len + 2) / 2; - AstNodeList nodes = { - .len = 0, - .cap = estimated_node_count, - .tags = ARR_INIT(AstNodeTag, estimated_node_count), - .main_tokens = ARR_INIT(AstTokenIndex, estimated_node_count), - .datas = ARR_INIT(AstData, estimated_node_count) - }; - // Initialize parser Parser p = { .source = source, .source_len = len, @@ -52,21 +52,19 @@ Ast astParse(const char* source, const uint32_t len) { .token_starts = tokens.starts, .tokens_len = tokens.len, .tok_i = 0, - .nodes = nodes, + .nodes = { + .len = 0, + .cap = estimated_node_count, + .tags = ARR_INIT(AstNodeTag, estimated_node_count), + .main_tokens = ARR_INIT(AstTokenIndex, estimated_node_count), + .datas = ARR_INIT(AstData, estimated_node_count), + }, .extra_data = SLICE_INIT(AstNodeIndex, N), - .scratch = SLICE_INIT(AstNodeIndex, N) + .scratch = SLICE_INIT(AstNodeIndex, N), }; parseRoot(&p); - p.nodes.cap = p.nodes.len = 0; - free(p.nodes.tags); - free(p.nodes.main_tokens); - free(p.nodes.datas); - - p.extra_data.cap = p.extra_data.len = 0; - free(p.extra_data.arr); - p.scratch.cap = p.scratch.len = 0; free(p.scratch.arr); diff --git a/ast.h b/ast.h index 7a28abc..cb2f8e8 100644 --- a/ast.h +++ b/ast.h @@ -598,8 +598,6 @@ typedef struct AstError { } AstError; Ast astParse(const char* source, uint32_t len); - -AstNodeIndex astNodeListAppend(AstNodeList*, AstNodeItem); -void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start); +void astDeinit(Ast*); #endif diff --git a/parser.c b/parser.c index d85a0ab..2eedf77 100644 --- a/parser.c +++ b/parser.c @@ -49,7 +49,7 @@ static void cleanupScratch(CleanupScratch* c) { static AstSubRange listToSpan(Parser* p, const AstNodeIndex* list, uint32_t count) { SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); - memcpy(&p->extra_data.arr, list, count * sizeof(AstNodeIndex)); + memcpy(p->extra_data.arr, list, count * sizeof(AstNodeIndex)); p->extra_data.len += count; return (AstSubRange) { .start = p->extra_data.len - count, @@ -86,7 +86,7 @@ static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) { } static void eatDocComments(Parser* p) { - while (eatToken(p, TOKEN_DOC_COMMENT) == null_token) { } + while (eatToken(p, TOKEN_DOC_COMMENT) != null_token) { } } static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { @@ -122,20 +122,20 @@ static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) { static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t count) { const AstNodeIndex result = p->extra_data.len; SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); - memcpy(&p->extra_data.arr, extra, count * sizeof(AstNodeIndex)); + memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex)); return result; } static AstNodeIndex parseByteAlign(Parser* p) { - if (eatToken(p, TOKEN_KEYWORD_ALIGN) != null_token) + if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token) return null_node; - fprintf(stderr, "parseByteAlign cannot parse alginment\n"); + fprintf(stderr, "parseByteAlign cannot parse alignment\n"); exit(1); return 0; // tcc } static AstNodeIndex parseAddrSpace(Parser* p) { - if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) != null_token) + if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) == null_token) return null_node; fprintf(stderr, "parseAddrSpace cannot parse addrspace\n"); exit(1); @@ -143,7 +143,7 @@ static AstNodeIndex parseAddrSpace(Parser* p) { } static AstNodeIndex parseLinkSection(Parser* p) { - if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) != null_token) + if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) == null_token) return null_node; fprintf(stderr, "parseLinkSection cannot parse linksection\n"); exit(1); @@ -151,7 +151,7 @@ static AstNodeIndex parseLinkSection(Parser* p) { } static AstNodeIndex parseCallconv(Parser* p) { - if (eatToken(p, TOKEN_KEYWORD_CALLCONV) != null_token) + if (eatToken(p, TOKEN_KEYWORD_CALLCONV) == null_token) return null_node; fprintf(stderr, "parseCallconv cannot parse callconv\n"); exit(1); @@ -403,7 +403,8 @@ static SmallSpan parseParamDeclList(Parser* p) { } static uint32_t reserveNode(Parser* p, AstNodeTag tag) { - astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1); + astNodeListEnsureCapacity(&p->nodes, 1); + p->nodes.len++; p->nodes.tags[p->nodes.len - 1] = tag; return p->nodes.len - 1; } @@ -427,7 +428,8 @@ static AstNodeIndex parseFnProto(Parser* p) { const AstNodeIndex return_type_expr = parseTypeExpr(p); if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) { - if (params.tag == SMALL_SPAN_ZERO_OR_ONE) + switch (params.tag) { + case SMALL_SPAN_ZERO_OR_ONE: return setNode( p, fn_proto_index, @@ -439,6 +441,11 @@ static AstNodeIndex parseFnProto(Parser* p) { .rhs = return_type_expr, }, }); + break; + case SMALL_SPAN_MULTI: + fprintf(stderr, "parseFnProto does not support multi params\n"); + exit(1); + } } fprintf(stderr, "parseFnProto does not support complex function decls\n"); @@ -766,7 +773,6 @@ void findNextContainerMember(Parser* p) { static Members parseContainerMembers(Parser* p) { CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); - while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token) ; diff --git a/parser_test.zig b/parser_test.zig index 0b1ee79..cfc9558 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -189,7 +189,7 @@ fn zigNode(token: c_uint) Ast.Node.Tag { // zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { var tokens = Ast.TokenList{}; - try tokens.ensureTotalCapacity(gpa, c_ast.tokens.len); + try tokens.resize(gpa, c_ast.tokens.len); errdefer tokens.deinit(gpa); for (0..c_ast.tokens.len) |i| @@ -199,7 +199,7 @@ fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { }); var nodes = Ast.NodeList{}; - try nodes.ensureTotalCapacity(gpa, c_ast.nodes.len); + try nodes.resize(gpa, c_ast.nodes.len); errdefer nodes.deinit(gpa); for (0..c_ast.nodes.len) |i| @@ -250,7 +250,8 @@ fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: * const stderr = io.getStdErr().writer(); //var tree = try std.zig.Ast.parse(allocator, source, .zig); - const c_tree = c.astParse(source, @intCast(source.len)); + var c_tree = c.astParse(source, @intCast(source.len)); + defer c.astDeinit(&c_tree); var tree = try zigAst(allocator, c_tree); defer tree.deinit(allocator); @@ -300,8 +301,6 @@ fn testCanonical(source: [:0]const u8) !void { } test "zig fmt: remove extra whitespace at start and end of file with comment between" { - if (true) return error.SkipZigTest; - try testTransform( \\ \\ @@ -313,3 +312,12 @@ test "zig fmt: remove extra whitespace at start and end of file with comment bet \\ ); } + +test "my function" { + try testCanonical( + \\pub fn main() void { + \\ @panic("hello"); + \\} + \\ + ); +} diff --git a/t/hello.zig b/t/hello.zig deleted file mode 100644 index c994c88..0000000 --- a/t/hello.zig +++ /dev/null @@ -1,3 +0,0 @@ -pub fn main() void { - @panic("hello"); -} diff --git a/tokenizer_test.zig b/tokenizer_test.zig index a2fd5a9..efe8f64 100644 --- a/tokenizer_test.zig +++ b/tokenizer_test.zig @@ -166,6 +166,18 @@ test "keywords" { try testTokenize("test const else", &.{ .keyword_test, .keyword_const, .keyword_else }); } +test "parser first test" { + try testTokenize( + \\ + \\ + \\// hello + \\ + \\ + , + &.{}, + ); +} + test "line comment followed by top-level comptime" { try testTokenize( \\// line comment