parser: sync tests with upstream, fix tokenizer and parser

Sync parser_test.zig test section with upstream, adding ~40 new tests
(testError, testCanonical, testTransform). Remove extra blank lines
between tests to match upstream formatting.

Fix tokenizer keyword lookup bug: getKeyword() returned TOKEN_INVALID
when input was longer than a keyword prefix (e.g., "orelse" matched
"or" prefix then bailed out instead of continuing to find "orelse").

Fix parser to handle if/for/while expressions in type position (e.g.,
function return types like `fn foo() if (cond) i32 else void`). Add
labeled block support in parsePrimaryTypeExpr. Replace assert for
chained comparison operators with longjmp error.

365/381 tests pass. Remaining 16 failures are parser limitations for
specific syntax patterns and error recovery.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-02-11 11:40:38 +00:00
parent f5f54fcbe8
commit fdefdc98c2
3 changed files with 1062 additions and 313 deletions

133
parser.c
View File

@@ -42,6 +42,8 @@ static AstNodeIndex parseSwitchExpr(Parser*);
static AstNodeIndex parseForExpr(Parser*);
static AstNodeIndex parseAsmExpr(Parser*);
static AstNodeIndex parseIfExpr(Parser*);
static uint32_t forPrefix(Parser*);
static AstNodeIndex parseLabeledStatement(Parser*);
typedef struct {
enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag;
@@ -521,6 +523,22 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
});
}
case TOKEN_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
switch (p->token_tags[p->tok_i + 2]) {
case TOKEN_L_BRACE: {
// Labeled block: label: { ... }
nextToken(p); // consume label
nextToken(p); // consume ':'
return parseBlock(p);
}
case TOKEN_KEYWORD_WHILE:
return parseLabeledStatement(p);
case TOKEN_KEYWORD_FOR:
return parseLabeledStatement(p);
default:
break;
}
}
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_IDENTIFIER,
@@ -1018,6 +1036,116 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
},
});
}
case TOKEN_KEYWORD_IF: {
// if-type-expr: uses parseTypeExpr for branches instead of parseExpr
const AstTokenIndex if_token = nextToken(p);
expectToken(p, TOKEN_L_PAREN);
const AstNodeIndex condition = expectExpr(p);
expectToken(p, TOKEN_R_PAREN);
parsePtrPayload(p);
const AstNodeIndex then_expr = parseTypeExpr(p);
if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token)
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_IF_SIMPLE,
.main_token = if_token,
.data = { .lhs = condition, .rhs = then_expr },
});
parsePayload(p);
const AstNodeIndex else_expr = parseTypeExpr(p);
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_IF,
.main_token = if_token,
.data = {
.lhs = condition,
.rhs = addExtra(p,
(AstNodeIndex[]) { then_expr, else_expr }, 2),
},
});
}
case TOKEN_KEYWORD_FOR: {
// for-type-expr: uses parseTypeExpr for body instead of parseExpr
const AstTokenIndex for_token = nextToken(p);
const uint32_t scratch_top2 = p->scratch.len;
const uint32_t inputs = forPrefix(p);
const AstNodeIndex body = parseTypeExpr(p);
if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) {
parsePayload(p);
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
const AstNodeIndex else_expr = parseTypeExpr(p);
SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr);
const uint32_t total = p->scratch.len - scratch_top2;
const AstSubRange span
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
p->scratch.len = scratch_top2;
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_FOR,
.main_token = for_token,
.data = {
.lhs = span.start,
.rhs = ((uint32_t)inputs & 0x7FFFFFFF) | (1u << 31),
},
});
}
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
const uint32_t total = p->scratch.len - scratch_top2;
const AstSubRange span
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
p->scratch.len = scratch_top2;
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_FOR,
.main_token = for_token,
.data = {
.lhs = span.start,
.rhs = (uint32_t)inputs & 0x7FFFFFFF,
},
});
}
case TOKEN_KEYWORD_WHILE: {
// while-type-expr: uses parseTypeExpr for body instead of parseExpr
const AstTokenIndex while_token = nextToken(p);
expectToken(p, TOKEN_L_PAREN);
const AstNodeIndex condition = expectExpr(p);
expectToken(p, TOKEN_R_PAREN);
parsePtrPayload(p);
const AstNodeIndex cont_expr
= eatToken(p, TOKEN_COLON) != null_token ? expectExpr(p) : 0;
const AstNodeIndex body = parseTypeExpr(p);
if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) {
parsePayload(p);
const AstNodeIndex else_expr = parseTypeExpr(p);
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_WHILE,
.main_token = while_token,
.data = {
.lhs = condition,
.rhs = addExtra(p,
(AstNodeIndex[]) { cont_expr, body, else_expr }, 3),
},
});
}
if (cont_expr != 0)
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_WHILE_CONT,
.main_token = while_token,
.data = {
.lhs = condition,
.rhs = addExtra(p,
(AstNodeIndex[]) { cont_expr, body }, 2),
},
});
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_WHILE_SIMPLE,
.main_token = while_token,
.data = { .lhs = condition, .rhs = body },
});
}
default:
return parseErrorUnionExpr(p);
}
@@ -1899,7 +2027,10 @@ static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) {
if (info.prec < min_prec)
break;
assert(info.prec != banned_prec);
if (info.prec == banned_prec) {
fprintf(stderr, "chained comparison operators\n");
longjmp(p->error_jmp, 1);
}
const AstTokenIndex oper_token = nextToken(p);
if (tok_tag == TOKEN_KEYWORD_CATCH)

File diff suppressed because it is too large Load Diff

View File

@@ -82,9 +82,11 @@ static TokenizerTag getKeyword(const char* bytes, const uint32_t len) {
if (cmp == 0) {
if (len == klen) {
return keywords[i].tag;
} else {
} else if (len < klen) {
return TOKEN_INVALID;
}
// len > klen: input is longer than keyword (e.g., "orelse" vs
// "or"), continue searching.
} else if (cmp < 0) {
return TOKEN_INVALID;
}