commit 13fd3718366f70fa31ad9b613ad59e8a12f878da (tree)
parent 7c69ef5fd7407d2a443c60c7eb1b8209a375735a
Author: Motiejus Jakštys <motiejus.jakstys@chronosphere.io>
Date: Wed, 11 Feb 2026 12:57:56 +0000
parser: port upstream error detection, unskip all 14 tests
Mechanically port error handling patterns from upstream Parse.zig:
- &&/whitespace validation in binary operator parsing
- varargs state tracking in parameter lists
- invalid_bit_range check for slice types
- same-line doc comment detection in eatDocComments
- required for-loop payload validation
- error keyword requiring '.' for error values
- expected_semi_or_else checks in if/for/while statements
- labeled for/while/inline expressions in parsePrimaryExpr
- doc comment validation for test/comptime blocks
- EOF check in parseRoot
- comptime handling in else-branch context
All 381/381 tests pass with 0 skipped.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
| M | parser.c | | | 241 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
| M | parser_test.zig | | | 21 | +++++---------------- |
2 files changed, 232 insertions(+), 30 deletions(-)
diff --git a/parser.c b/parser.c
@@ -1,6 +1,7 @@
#include "common.h"
#include <assert.h>
+#include <ctype.h>
#include <setjmp.h>
#include <stdio.h>
#include <stdlib.h>
@@ -116,8 +117,30 @@ static AstTokenIndex assertToken(Parser* p, TokenizerTag tag) {
return token;
}
-static void eatDocComments(Parser* p) {
- while (eatToken(p, TOKEN_DOC_COMMENT) != null_token) { }
+static bool tokensOnSameLine(
+ Parser* p, AstTokenIndex tok1, AstTokenIndex tok2) {
+ const uint32_t start1 = p->token_starts[tok1];
+ const uint32_t start2 = p->token_starts[tok2];
+ for (uint32_t i = start1; i < start2; i++) {
+ if (p->source[i] == '\n')
+ return false;
+ }
+ return true;
+}
+
+static AstTokenIndex eatDocComments(Parser* p) {
+ AstTokenIndex first = null_token;
+ AstTokenIndex tok;
+ while ((tok = eatToken(p, TOKEN_DOC_COMMENT)) != null_token) {
+ if (first == null_token) {
+ if (tok > 0 && tokensOnSameLine(p, tok - 1, tok)) {
+ fprintf(stderr, "same_line_doc_comment\n");
+ longjmp(p->error_jmp, 1);
+ }
+ first = tok;
+ }
+ }
+ return first;
}
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
@@ -209,6 +232,10 @@ static AstNodeIndex expectContainerField(Parser* p) {
p->tok_i += 2;
const AstNodeIndex type_expr = parseTypeExpr(p);
+ if (type_expr == 0) {
+ fprintf(stderr, "expected type expression\n");
+ longjmp(p->error_jmp, 1);
+ }
const AstNodeIndex align_expr = parseByteAlign(p);
const AstNodeIndex value_expr
= eatToken(p, TOKEN_EQUAL) != null_token ? expectExpr(p) : 0;
@@ -605,14 +632,26 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
.data = { .lhs = lbrace, .rhs = rbrace },
});
}
- default:
+ default: {
+ const AstTokenIndex main_token = nextToken(p);
+ const AstTokenIndex period = eatToken(p, TOKEN_PERIOD);
+ if (period == null_token) {
+ fprintf(stderr, "expected '.'\n");
+ longjmp(p->error_jmp, 1);
+ }
+ const AstTokenIndex identifier = eatToken(p, TOKEN_IDENTIFIER);
+ if (identifier == null_token) {
+ fprintf(stderr, "expected identifier\n");
+ longjmp(p->error_jmp, 1);
+ }
return addNode(&p->nodes,
(AstNodeItem) {
- .tag = AST_NODE_IDENTIFIER,
- .main_token = nextToken(p),
- .data = {},
+ .tag = AST_NODE_ERROR_VALUE,
+ .main_token = main_token,
+ .data = { .lhs = period, .rhs = identifier },
});
}
+ }
case TOKEN_L_PAREN: {
const AstTokenIndex lparen = nextToken(p);
const AstNodeIndex inner = expectExpr(p);
@@ -1018,6 +1057,10 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
// Slice type: []T or [:s]T
const PtrModifiers mods = parsePtrModifiers(p);
const AstNodeIndex elem_type = parseTypeExpr(p);
+ if (mods.bit_range_start != 0) {
+ fprintf(stderr, "invalid_bit_range\n");
+ longjmp(p->error_jmp, 1);
+ }
return makePtrTypeNode(p, lbracket, sentinel, mods, elem_type);
}
// Array type: [N]T or [N:s]T
@@ -1163,9 +1206,14 @@ static SmallSpan parseParamDeclList(Parser* p) {
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
+ // 0 = none, 1 = seen, 2 = nonfinal
+ int varargs = 0;
+
while (true) {
if (eatToken(p, TOKEN_R_PAREN) != null_token)
break;
+ if (varargs == 1)
+ varargs = 2;
eatDocComments(p);
@@ -1180,6 +1228,8 @@ static SmallSpan parseParamDeclList(Parser* p) {
} else if (p->token_tags[p->tok_i] == TOKEN_ELLIPSIS3) {
// varargs (...)
p->tok_i++;
+ if (varargs == 0)
+ varargs = 1;
if (eatToken(p, TOKEN_R_PAREN) != null_token)
break;
expectToken(p, TOKEN_COMMA);
@@ -1208,6 +1258,11 @@ static SmallSpan parseParamDeclList(Parser* p) {
break;
}
+ if (varargs == 2) {
+ fprintf(stderr, "varargs_nonfinal\n");
+ longjmp(p->error_jmp, 1);
+ }
+
const uint32_t params_len = p->scratch.len - scratch_top.old_len;
switch (params_len) {
case 0:
@@ -1364,7 +1419,11 @@ static uint32_t forPrefix(Parser* p) {
const uint32_t inputs = p->scratch.len - start;
// Parse payload |a, *b, c|
- if (eatToken(p, TOKEN_PIPE) != null_token) {
+ if (eatToken(p, TOKEN_PIPE) == null_token) {
+ fprintf(stderr, "expected loop payload\n");
+ longjmp(p->error_jmp, 1);
+ }
+ {
while (true) {
eatToken(p, TOKEN_ASTERISK);
expectToken(p, TOKEN_IDENTIFIER);
@@ -1482,6 +1541,11 @@ static AstNodeIndex parseForStatement(Parser* p) {
});
}
+ if (!seen_semicolon && block == 0) {
+ fprintf(stderr, "expected_semi_or_else\n");
+ longjmp(p->error_jmp, 1);
+ }
+
if (inputs == 1) {
const AstNodeIndex input = p->scratch.arr[scratch_top];
p->scratch.len = scratch_top;
@@ -1597,6 +1661,10 @@ static AstNodeIndex parseWhileStatement(Parser* p) {
}
if (seen_semicolon || eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) {
+ if (!seen_semicolon && block == 0) {
+ fprintf(stderr, "expected_semi_or_else\n");
+ longjmp(p->error_jmp, 1);
+ }
if (cont_expr != 0) {
return addNode(&p->nodes,
(AstNodeItem) {
@@ -1932,6 +2000,50 @@ typedef struct {
} assoc;
} OperInfo;
+static uint32_t tokenTagLexemeLen(TokenizerTag tag) {
+ switch (tag) {
+ case TOKEN_PLUS:
+ case TOKEN_MINUS:
+ case TOKEN_ASTERISK:
+ case TOKEN_SLASH:
+ case TOKEN_PERCENT:
+ case TOKEN_AMPERSAND:
+ case TOKEN_CARET:
+ case TOKEN_PIPE:
+ case TOKEN_ANGLE_BRACKET_LEFT:
+ case TOKEN_ANGLE_BRACKET_RIGHT:
+ return 1;
+ case TOKEN_PLUS_PLUS:
+ case TOKEN_MINUS_PERCENT:
+ case TOKEN_PLUS_PERCENT:
+ case TOKEN_MINUS_PIPE:
+ case TOKEN_PLUS_PIPE:
+ case TOKEN_ASTERISK_ASTERISK:
+ case TOKEN_ASTERISK_PERCENT:
+ case TOKEN_ASTERISK_PIPE:
+ case TOKEN_PIPE_PIPE:
+ case TOKEN_EQUAL_EQUAL:
+ case TOKEN_BANG_EQUAL:
+ case TOKEN_ANGLE_BRACKET_LEFT_EQUAL:
+ case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL:
+ case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
+ case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
+ return 2;
+ case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
+ return 3;
+ case TOKEN_KEYWORD_OR:
+ return 2;
+ case TOKEN_KEYWORD_AND:
+ return 3;
+ case TOKEN_KEYWORD_ORELSE:
+ return 6;
+ case TOKEN_KEYWORD_CATCH:
+ return 5;
+ default:
+ return 0;
+ }
+}
+
static OperInfo operTable(TokenizerTag tok_tag) {
switch (tok_tag) {
case TOKEN_KEYWORD_OR:
@@ -2046,6 +2158,23 @@ static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) {
longjmp(p->error_jmp, 1);
}
+ {
+ const uint32_t tok_len = tokenTagLexemeLen(tok_tag);
+ if (tok_len > 0) {
+ const uint32_t tok_start = p->token_starts[oper_token];
+ const char char_before = p->source[tok_start - 1];
+ const char char_after = p->source[tok_start + tok_len];
+ if (tok_tag == TOKEN_AMPERSAND && char_after == '&') {
+ fprintf(stderr, "invalid ampersand ampersand\n");
+ longjmp(p->error_jmp, 1);
+ } else if (isspace((unsigned char)char_before)
+ != isspace((unsigned char)char_after)) {
+ fprintf(stderr, "mismatched binary op whitespace\n");
+ longjmp(p->error_jmp, 1);
+ }
+ }
+ }
+
node = addNode(
&p->nodes,
(AstNodeItem) {
@@ -2468,11 +2597,20 @@ static AstNodeIndex parsePrimaryExpr(Parser* p) {
if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
switch (p->token_tags[p->tok_i + 2]) {
case TOKEN_KEYWORD_INLINE:
- case TOKEN_KEYWORD_FOR:
- case TOKEN_KEYWORD_WHILE:
- fprintf(stderr, "parsePrimaryExpr NotImplemented\n");
+ p->tok_i += 3;
+ if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_FOR)
+ return parseForExpr(p);
+ if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_WHILE)
+ return parseWhileExpr(p);
+ fprintf(stderr, "expected for or while after inline\n");
longjmp(p->error_jmp, 1);
return 0; // tcc
+ case TOKEN_KEYWORD_FOR:
+ p->tok_i += 2;
+ return parseForExpr(p);
+ case TOKEN_KEYWORD_WHILE:
+ p->tok_i += 2;
+ return parseWhileExpr(p);
case TOKEN_L_BRACE:
p->tok_i += 2;
return parseBlock(p);
@@ -2749,9 +2887,20 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
// comptime var decl or expression
if (allow_defer_var)
return expectVarDeclExprStatement(p, comptime_token);
- fprintf(
- stderr, "expectStatement: comptime keyword not supported here\n");
- longjmp(p->error_jmp, 1);
+ {
+ const AstNodeIndex assign = parseAssignExpr(p);
+ if (assign == 0) {
+ fprintf(stderr, "expected expression\n");
+ longjmp(p->error_jmp, 1);
+ }
+ expectSemicolon(p);
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_COMPTIME,
+ .main_token = comptime_token,
+ .data = { .lhs = assign, .rhs = 0 },
+ });
+ }
}
const AstNodeIndex tok = p->token_tags[p->tok_i];
@@ -2804,6 +2953,57 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
.rhs = 0,
},
});
+ case TOKEN_KEYWORD_IF: {
+ const AstTokenIndex if_token = nextToken(p);
+ expectToken(p, TOKEN_L_PAREN);
+ const AstNodeIndex condition = expectExpr(p);
+ expectToken(p, TOKEN_R_PAREN);
+ parsePtrPayload(p);
+ bool else_required = false;
+ AstNodeIndex then_body;
+ const AstNodeIndex block2 = parseBlockExpr(p);
+ if (block2 != 0) {
+ then_body = block2;
+ } else {
+ then_body = parseAssignExpr(p);
+ if (then_body == 0) {
+ fprintf(stderr, "expected block or assignment\n");
+ longjmp(p->error_jmp, 1);
+ }
+ if (eatToken(p, TOKEN_SEMICOLON) != null_token)
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_IF_SIMPLE,
+ .main_token = if_token,
+ .data = { .lhs = condition, .rhs = then_body },
+ });
+ else_required = true;
+ }
+ if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) {
+ if (else_required) {
+ fprintf(stderr, "expected_semi_or_else\n");
+ longjmp(p->error_jmp, 1);
+ }
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_IF_SIMPLE,
+ .main_token = if_token,
+ .data = { .lhs = condition, .rhs = then_body },
+ });
+ }
+ parsePayload(p);
+ const AstNodeIndex else_body = expectStatement(p, false);
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_IF,
+ .main_token = if_token,
+ .data = {
+ .lhs = condition,
+ .rhs = addExtra(p,
+ (AstNodeIndex[]) { then_body, else_body }, 2),
+ },
+ });
+ }
case TOKEN_KEYWORD_ENUM:
case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_UNION:;
@@ -3056,9 +3256,13 @@ static Members parseContainerMembers(Parser* p) {
bool trailing = false;
while (1) {
- eatDocComments(p);
+ const AstTokenIndex doc_comment = eatDocComments(p);
switch (p->token_tags[p->tok_i]) {
case TOKEN_KEYWORD_TEST: {
+ if (doc_comment != null_token) {
+ fprintf(stderr, "test_doc_comment\n");
+ longjmp(p->error_jmp, 1);
+ }
const AstTokenIndex test_token = nextToken(p);
// test name can be a string literal or identifier, or omitted
const AstTokenIndex test_name
@@ -3091,6 +3295,10 @@ static Members parseContainerMembers(Parser* p) {
// block/decl. Check if it's followed by a block (comptime { ...
// }).
if (p->token_tags[p->tok_i + 1] == TOKEN_L_BRACE) {
+ if (doc_comment != null_token) {
+ fprintf(stderr, "comptime_doc_comment\n");
+ longjmp(p->error_jmp, 1);
+ }
const AstTokenIndex comptime_token = nextToken(p);
const AstNodeIndex block_node = parseBlock(p);
SLICE_APPEND(AstNodeIndex, &p->scratch,
@@ -3215,6 +3423,11 @@ void parseRoot(Parser* p) {
Members root_members = parseContainerMembers(p);
AstSubRange root_decls = membersToSpan(root_members, p);
+ if (p->token_tags[p->tok_i] != TOKEN_EOF) {
+ fprintf(stderr, "expected EOF\n");
+ longjmp(p->error_jmp, 1);
+ }
+
p->nodes.datas[0].lhs = root_decls.start;
p->nodes.datas[0].rhs = root_decls.end;
}
diff --git a/parser_test.zig b/parser_test.zig
@@ -4397,7 +4397,6 @@ test "zig fmt: comptime before comptime field" {
}
test "zig fmt: invalid doc comments on comptime and test blocks" {
- if (true) return error.SkipZigTest;
try testError(
\\/// This is a doc comment for a comptime block.
\\comptime {}
@@ -4491,7 +4490,6 @@ test "zig fmt: extern without container keyword returns error" {
}
test "zig fmt: same line doc comment returns error" {
- if (true) return error.SkipZigTest;
try testError(
\\const Foo = struct{
\\ bar: u32, /// comment
@@ -5099,7 +5097,6 @@ test "zig fmt: extern function with missing param name" {
}
test "zig fmt: line comment after multiline single expr if statement with multiline string" {
- if (true) return error.SkipZigTest;
try testCanonical(
\\test {
\\ if (foo)
@@ -5592,7 +5589,6 @@ test "zig fmt: canonicalize symbols (simple)" {
// Contextually unescape when shadowing primitive types and values.
test "zig fmt: canonicalize symbols (primitive types)" {
- if (true) return error.SkipZigTest;
try testTransform(
\\const @"anyopaque" = struct {
\\ @"u8": @"type" = true,
@@ -5885,7 +5881,6 @@ test "zig fmt: error for missing sentinel value in sentinel slice" {
}
test "zig fmt: error for invalid bit range" {
- if (true) return error.SkipZigTest;
try testError(
\\var x: []align(0:0:0)u8 = bar;
, &[_]Error{
@@ -6168,7 +6163,6 @@ test "recovery: invalid extern/inline" {
}
test "recovery: missing semicolon" {
- if (true) return error.SkipZigTest;
try testError(
\\test "" {
\\ comptime a & b
@@ -6188,7 +6182,6 @@ test "recovery: missing semicolon" {
// reporting a parse error and yet also parsing all the decls even
// inside structs.
test "recovery: extra '}' at top level" {
- if (true) return error.SkipZigTest;
try testError(
\\}}}
\\test "" {
@@ -6210,7 +6203,6 @@ test "recovery: mismatched bracket at top level" {
}
test "recovery: invalid global error set access" {
- if (true) return error.SkipZigTest;
try testError(
\\test "" {
\\ error & foo;
@@ -6240,7 +6232,6 @@ test "recovery: invalid asterisk after pointer dereference" {
}
test "recovery: missing semicolon after if, for, while stmt" {
- if (true) return error.SkipZigTest;
try testError(
\\test "" {
\\ if (foo) bar
@@ -6256,7 +6247,6 @@ test "recovery: missing semicolon after if, for, while stmt" {
}
test "recovery: invalid comptime" {
- if (true) return error.SkipZigTest;
try testError(
\\comptime
, &[_]Error{
@@ -6290,7 +6280,6 @@ test "recovery: missing block after for/while loops" {
}
test "recovery: missing for payload" {
- if (true) return error.SkipZigTest;
try testError(
\\comptime {
\\ const a = for(a) {};
@@ -6327,7 +6316,6 @@ test "recovery: missing while rbrace" {
}
test "recovery: nonfinal varargs" {
- if (true) return error.SkipZigTest;
try testError(
\\extern fn f(a: u32, ..., b: u32) void;
\\extern fn g(a: u32, ..., b: anytype) void;
@@ -6348,7 +6336,6 @@ test "recovery: eof in c pointer" {
}
test "matching whitespace on minus op" {
- if (true) return error.SkipZigTest;
try testError(
\\ _ = 2 -1,
\\ _ = 2- 1,
@@ -6377,7 +6364,6 @@ test "matching whitespace on minus op" {
}
test "ampersand" {
- if (true) return error.SkipZigTest;
try testError(
\\ _ = bar && foo,
\\ _ = bar&&foo,
@@ -6439,10 +6425,13 @@ fn testCanonical(source: [:0]const u8) !void {
const Error = std.zig.Ast.Error.Tag;
fn testError(source: [:0]const u8, expected_errors: []const Error) !void {
- _ = expected_errors;
var c_tree = c.astParse(source, @intCast(source.len));
defer c.astDeinit(&c_tree);
- try std.testing.expect(c_tree.has_error);
+ if (expected_errors.len == 0) {
+ try std.testing.expect(!c_tree.has_error);
+ } else {
+ try std.testing.expect(c_tree.has_error);
+ }
}
const testing = std.testing;