From cd07751d13886d31235ea904fc8f9f51a24384b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 11 Feb 2026 12:57:56 +0000 Subject: [PATCH] parser: port upstream error detection, unskip all 14 tests Mechanically port error handling patterns from upstream Parse.zig: - &&/whitespace validation in binary operator parsing - varargs state tracking in parameter lists - invalid_bit_range check for slice types - same-line doc comment detection in eatDocComments - required for-loop payload validation - error keyword requiring '.' for error values - expected_semi_or_else checks in if/for/while statements - labeled for/while/inline expressions in parsePrimaryExpr - doc comment validation for test/comptime blocks - EOF check in parseRoot - comptime handling in else-branch context All 381/381 tests pass with 0 skipped. Co-Authored-By: Claude Opus 4.6 --- parser.c | 241 +++++++++++++++++++++++++++++++++++++++++++++--- parser_test.zig | 21 +---- 2 files changed, 232 insertions(+), 30 deletions(-) diff --git a/parser.c b/parser.c index 2f3d90af39..a14e0663c2 100644 --- a/parser.c +++ b/parser.c @@ -1,6 +1,7 @@ #include "common.h" #include +#include #include #include #include @@ -116,8 +117,30 @@ static AstTokenIndex assertToken(Parser* p, TokenizerTag tag) { return token; } -static void eatDocComments(Parser* p) { - while (eatToken(p, TOKEN_DOC_COMMENT) != null_token) { } +static bool tokensOnSameLine( + Parser* p, AstTokenIndex tok1, AstTokenIndex tok2) { + const uint32_t start1 = p->token_starts[tok1]; + const uint32_t start2 = p->token_starts[tok2]; + for (uint32_t i = start1; i < start2; i++) { + if (p->source[i] == '\n') + return false; + } + return true; +} + +static AstTokenIndex eatDocComments(Parser* p) { + AstTokenIndex first = null_token; + AstTokenIndex tok; + while ((tok = eatToken(p, TOKEN_DOC_COMMENT)) != null_token) { + if (first == null_token) { + if (tok > 0 && tokensOnSameLine(p, tok - 1, tok)) { + fprintf(stderr, "same_line_doc_comment\n"); + longjmp(p->error_jmp, 1); + } + first = tok; + } + } + return first; } static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { @@ -209,6 +232,10 @@ static AstNodeIndex expectContainerField(Parser* p) { p->tok_i += 2; const AstNodeIndex type_expr = parseTypeExpr(p); + if (type_expr == 0) { + fprintf(stderr, "expected type expression\n"); + longjmp(p->error_jmp, 1); + } const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex value_expr = eatToken(p, TOKEN_EQUAL) != null_token ? expectExpr(p) : 0; @@ -605,14 +632,26 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { .data = { .lhs = lbrace, .rhs = rbrace }, }); } - default: + default: { + const AstTokenIndex main_token = nextToken(p); + const AstTokenIndex period = eatToken(p, TOKEN_PERIOD); + if (period == null_token) { + fprintf(stderr, "expected '.'\n"); + longjmp(p->error_jmp, 1); + } + const AstTokenIndex identifier = eatToken(p, TOKEN_IDENTIFIER); + if (identifier == null_token) { + fprintf(stderr, "expected identifier\n"); + longjmp(p->error_jmp, 1); + } return addNode(&p->nodes, (AstNodeItem) { - .tag = AST_NODE_IDENTIFIER, - .main_token = nextToken(p), - .data = {}, + .tag = AST_NODE_ERROR_VALUE, + .main_token = main_token, + .data = { .lhs = period, .rhs = identifier }, }); } + } case TOKEN_L_PAREN: { const AstTokenIndex lparen = nextToken(p); const AstNodeIndex inner = expectExpr(p); @@ -1018,6 +1057,10 @@ static AstNodeIndex parseTypeExpr(Parser* p) { // Slice type: []T or [:s]T const PtrModifiers mods = parsePtrModifiers(p); const AstNodeIndex elem_type = parseTypeExpr(p); + if (mods.bit_range_start != 0) { + fprintf(stderr, "invalid_bit_range\n"); + longjmp(p->error_jmp, 1); + } return makePtrTypeNode(p, lbracket, sentinel, mods, elem_type); } // Array type: [N]T or [N:s]T @@ -1163,9 +1206,14 @@ static SmallSpan parseParamDeclList(Parser* p) { CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); + // 0 = none, 1 = seen, 2 = nonfinal + int varargs = 0; + while (true) { if (eatToken(p, TOKEN_R_PAREN) != null_token) break; + if (varargs == 1) + varargs = 2; eatDocComments(p); @@ -1180,6 +1228,8 @@ static SmallSpan parseParamDeclList(Parser* p) { } else if (p->token_tags[p->tok_i] == TOKEN_ELLIPSIS3) { // varargs (...) p->tok_i++; + if (varargs == 0) + varargs = 1; if (eatToken(p, TOKEN_R_PAREN) != null_token) break; expectToken(p, TOKEN_COMMA); @@ -1208,6 +1258,11 @@ static SmallSpan parseParamDeclList(Parser* p) { break; } + if (varargs == 2) { + fprintf(stderr, "varargs_nonfinal\n"); + longjmp(p->error_jmp, 1); + } + const uint32_t params_len = p->scratch.len - scratch_top.old_len; switch (params_len) { case 0: @@ -1364,7 +1419,11 @@ static uint32_t forPrefix(Parser* p) { const uint32_t inputs = p->scratch.len - start; // Parse payload |a, *b, c| - if (eatToken(p, TOKEN_PIPE) != null_token) { + if (eatToken(p, TOKEN_PIPE) == null_token) { + fprintf(stderr, "expected loop payload\n"); + longjmp(p->error_jmp, 1); + } + { while (true) { eatToken(p, TOKEN_ASTERISK); expectToken(p, TOKEN_IDENTIFIER); @@ -1482,6 +1541,11 @@ static AstNodeIndex parseForStatement(Parser* p) { }); } + if (!seen_semicolon && block == 0) { + fprintf(stderr, "expected_semi_or_else\n"); + longjmp(p->error_jmp, 1); + } + if (inputs == 1) { const AstNodeIndex input = p->scratch.arr[scratch_top]; p->scratch.len = scratch_top; @@ -1597,6 +1661,10 @@ static AstNodeIndex parseWhileStatement(Parser* p) { } if (seen_semicolon || eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { + if (!seen_semicolon && block == 0) { + fprintf(stderr, "expected_semi_or_else\n"); + longjmp(p->error_jmp, 1); + } if (cont_expr != 0) { return addNode(&p->nodes, (AstNodeItem) { @@ -1932,6 +2000,50 @@ typedef struct { } assoc; } OperInfo; +static uint32_t tokenTagLexemeLen(TokenizerTag tag) { + switch (tag) { + case TOKEN_PLUS: + case TOKEN_MINUS: + case TOKEN_ASTERISK: + case TOKEN_SLASH: + case TOKEN_PERCENT: + case TOKEN_AMPERSAND: + case TOKEN_CARET: + case TOKEN_PIPE: + case TOKEN_ANGLE_BRACKET_LEFT: + case TOKEN_ANGLE_BRACKET_RIGHT: + return 1; + case TOKEN_PLUS_PLUS: + case TOKEN_MINUS_PERCENT: + case TOKEN_PLUS_PERCENT: + case TOKEN_MINUS_PIPE: + case TOKEN_PLUS_PIPE: + case TOKEN_ASTERISK_ASTERISK: + case TOKEN_ASTERISK_PERCENT: + case TOKEN_ASTERISK_PIPE: + case TOKEN_PIPE_PIPE: + case TOKEN_EQUAL_EQUAL: + case TOKEN_BANG_EQUAL: + case TOKEN_ANGLE_BRACKET_LEFT_EQUAL: + case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL: + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: + return 2; + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: + return 3; + case TOKEN_KEYWORD_OR: + return 2; + case TOKEN_KEYWORD_AND: + return 3; + case TOKEN_KEYWORD_ORELSE: + return 6; + case TOKEN_KEYWORD_CATCH: + return 5; + default: + return 0; + } +} + static OperInfo operTable(TokenizerTag tok_tag) { switch (tok_tag) { case TOKEN_KEYWORD_OR: @@ -2046,6 +2158,23 @@ static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) { longjmp(p->error_jmp, 1); } + { + const uint32_t tok_len = tokenTagLexemeLen(tok_tag); + if (tok_len > 0) { + const uint32_t tok_start = p->token_starts[oper_token]; + const char char_before = p->source[tok_start - 1]; + const char char_after = p->source[tok_start + tok_len]; + if (tok_tag == TOKEN_AMPERSAND && char_after == '&') { + fprintf(stderr, "invalid ampersand ampersand\n"); + longjmp(p->error_jmp, 1); + } else if (isspace((unsigned char)char_before) + != isspace((unsigned char)char_after)) { + fprintf(stderr, "mismatched binary op whitespace\n"); + longjmp(p->error_jmp, 1); + } + } + } + node = addNode( &p->nodes, (AstNodeItem) { @@ -2468,11 +2597,20 @@ static AstNodeIndex parsePrimaryExpr(Parser* p) { if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) { switch (p->token_tags[p->tok_i + 2]) { case TOKEN_KEYWORD_INLINE: - case TOKEN_KEYWORD_FOR: - case TOKEN_KEYWORD_WHILE: - fprintf(stderr, "parsePrimaryExpr NotImplemented\n"); + p->tok_i += 3; + if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_FOR) + return parseForExpr(p); + if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_WHILE) + return parseWhileExpr(p); + fprintf(stderr, "expected for or while after inline\n"); longjmp(p->error_jmp, 1); return 0; // tcc + case TOKEN_KEYWORD_FOR: + p->tok_i += 2; + return parseForExpr(p); + case TOKEN_KEYWORD_WHILE: + p->tok_i += 2; + return parseWhileExpr(p); case TOKEN_L_BRACE: p->tok_i += 2; return parseBlock(p); @@ -2749,9 +2887,20 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { // comptime var decl or expression if (allow_defer_var) return expectVarDeclExprStatement(p, comptime_token); - fprintf( - stderr, "expectStatement: comptime keyword not supported here\n"); - longjmp(p->error_jmp, 1); + { + const AstNodeIndex assign = parseAssignExpr(p); + if (assign == 0) { + fprintf(stderr, "expected expression\n"); + longjmp(p->error_jmp, 1); + } + expectSemicolon(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = comptime_token, + .data = { .lhs = assign, .rhs = 0 }, + }); + } } const AstNodeIndex tok = p->token_tags[p->tok_i]; @@ -2804,6 +2953,57 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { .rhs = 0, }, }); + case TOKEN_KEYWORD_IF: { + const AstTokenIndex if_token = nextToken(p); + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex condition = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + parsePtrPayload(p); + bool else_required = false; + AstNodeIndex then_body; + const AstNodeIndex block2 = parseBlockExpr(p); + if (block2 != 0) { + then_body = block2; + } else { + then_body = parseAssignExpr(p); + if (then_body == 0) { + fprintf(stderr, "expected block or assignment\n"); + longjmp(p->error_jmp, 1); + } + if (eatToken(p, TOKEN_SEMICOLON) != null_token) + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF_SIMPLE, + .main_token = if_token, + .data = { .lhs = condition, .rhs = then_body }, + }); + else_required = true; + } + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { + if (else_required) { + fprintf(stderr, "expected_semi_or_else\n"); + longjmp(p->error_jmp, 1); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF_SIMPLE, + .main_token = if_token, + .data = { .lhs = condition, .rhs = then_body }, + }); + } + parsePayload(p); + const AstNodeIndex else_body = expectStatement(p, false); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF, + .main_token = if_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { then_body, else_body }, 2), + }, + }); + } case TOKEN_KEYWORD_ENUM: case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_UNION:; @@ -3056,9 +3256,13 @@ static Members parseContainerMembers(Parser* p) { bool trailing = false; while (1) { - eatDocComments(p); + const AstTokenIndex doc_comment = eatDocComments(p); switch (p->token_tags[p->tok_i]) { case TOKEN_KEYWORD_TEST: { + if (doc_comment != null_token) { + fprintf(stderr, "test_doc_comment\n"); + longjmp(p->error_jmp, 1); + } const AstTokenIndex test_token = nextToken(p); // test name can be a string literal or identifier, or omitted const AstTokenIndex test_name @@ -3091,6 +3295,10 @@ static Members parseContainerMembers(Parser* p) { // block/decl. Check if it's followed by a block (comptime { ... // }). if (p->token_tags[p->tok_i + 1] == TOKEN_L_BRACE) { + if (doc_comment != null_token) { + fprintf(stderr, "comptime_doc_comment\n"); + longjmp(p->error_jmp, 1); + } const AstTokenIndex comptime_token = nextToken(p); const AstNodeIndex block_node = parseBlock(p); SLICE_APPEND(AstNodeIndex, &p->scratch, @@ -3215,6 +3423,11 @@ void parseRoot(Parser* p) { Members root_members = parseContainerMembers(p); AstSubRange root_decls = membersToSpan(root_members, p); + if (p->token_tags[p->tok_i] != TOKEN_EOF) { + fprintf(stderr, "expected EOF\n"); + longjmp(p->error_jmp, 1); + } + p->nodes.datas[0].lhs = root_decls.start; p->nodes.datas[0].rhs = root_decls.end; } diff --git a/parser_test.zig b/parser_test.zig index 1f2ecc257f..a82430955d 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -4397,7 +4397,6 @@ test "zig fmt: comptime before comptime field" { } test "zig fmt: invalid doc comments on comptime and test blocks" { - if (true) return error.SkipZigTest; try testError( \\/// This is a doc comment for a comptime block. \\comptime {} @@ -4491,7 +4490,6 @@ test "zig fmt: extern without container keyword returns error" { } test "zig fmt: same line doc comment returns error" { - if (true) return error.SkipZigTest; try testError( \\const Foo = struct{ \\ bar: u32, /// comment @@ -5099,7 +5097,6 @@ test "zig fmt: extern function with missing param name" { } test "zig fmt: line comment after multiline single expr if statement with multiline string" { - if (true) return error.SkipZigTest; try testCanonical( \\test { \\ if (foo) @@ -5592,7 +5589,6 @@ test "zig fmt: canonicalize symbols (simple)" { // Contextually unescape when shadowing primitive types and values. test "zig fmt: canonicalize symbols (primitive types)" { - if (true) return error.SkipZigTest; try testTransform( \\const @"anyopaque" = struct { \\ @"u8": @"type" = true, @@ -5885,7 +5881,6 @@ test "zig fmt: error for missing sentinel value in sentinel slice" { } test "zig fmt: error for invalid bit range" { - if (true) return error.SkipZigTest; try testError( \\var x: []align(0:0:0)u8 = bar; , &[_]Error{ @@ -6168,7 +6163,6 @@ test "recovery: invalid extern/inline" { } test "recovery: missing semicolon" { - if (true) return error.SkipZigTest; try testError( \\test "" { \\ comptime a & b @@ -6188,7 +6182,6 @@ test "recovery: missing semicolon" { // reporting a parse error and yet also parsing all the decls even // inside structs. test "recovery: extra '}' at top level" { - if (true) return error.SkipZigTest; try testError( \\}}} \\test "" { @@ -6210,7 +6203,6 @@ test "recovery: mismatched bracket at top level" { } test "recovery: invalid global error set access" { - if (true) return error.SkipZigTest; try testError( \\test "" { \\ error & foo; @@ -6240,7 +6232,6 @@ test "recovery: invalid asterisk after pointer dereference" { } test "recovery: missing semicolon after if, for, while stmt" { - if (true) return error.SkipZigTest; try testError( \\test "" { \\ if (foo) bar @@ -6256,7 +6247,6 @@ test "recovery: missing semicolon after if, for, while stmt" { } test "recovery: invalid comptime" { - if (true) return error.SkipZigTest; try testError( \\comptime , &[_]Error{ @@ -6290,7 +6280,6 @@ test "recovery: missing block after for/while loops" { } test "recovery: missing for payload" { - if (true) return error.SkipZigTest; try testError( \\comptime { \\ const a = for(a) {}; @@ -6327,7 +6316,6 @@ test "recovery: missing while rbrace" { } test "recovery: nonfinal varargs" { - if (true) return error.SkipZigTest; try testError( \\extern fn f(a: u32, ..., b: u32) void; \\extern fn g(a: u32, ..., b: anytype) void; @@ -6348,7 +6336,6 @@ test "recovery: eof in c pointer" { } test "matching whitespace on minus op" { - if (true) return error.SkipZigTest; try testError( \\ _ = 2 -1, \\ _ = 2- 1, @@ -6377,7 +6364,6 @@ test "matching whitespace on minus op" { } test "ampersand" { - if (true) return error.SkipZigTest; try testError( \\ _ = bar && foo, \\ _ = bar&&foo, @@ -6439,10 +6425,13 @@ fn testCanonical(source: [:0]const u8) !void { const Error = std.zig.Ast.Error.Tag; fn testError(source: [:0]const u8, expected_errors: []const Error) !void { - _ = expected_errors; var c_tree = c.astParse(source, @intCast(source.len)); defer c.astDeinit(&c_tree); - try std.testing.expect(c_tree.has_error); + if (expected_errors.len == 0) { + try std.testing.expect(!c_tree.has_error); + } else { + try std.testing.expect(c_tree.has_error); + } } const testing = std.testing;