parser: sync tests with upstream, fix tokenizer and parser
Sync parser_test.zig test section with upstream, adding ~40 new tests (testError, testCanonical, testTransform). Remove extra blank lines between tests to match upstream formatting. Fix tokenizer keyword lookup bug: getKeyword() returned TOKEN_INVALID when input was longer than a keyword prefix (e.g., "orelse" matched "or" prefix then bailed out instead of continuing to find "orelse"). Fix parser to handle if/for/while expressions in type position (e.g., function return types like `fn foo() if (cond) i32 else void`). Add labeled block support in parsePrimaryTypeExpr. Replace assert for chained comparison operators with longjmp error. 365/381 tests pass. Remaining 16 failures are parser limitations for specific syntax patterns and error recovery. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
133
parser.c
133
parser.c
@@ -42,6 +42,8 @@ static AstNodeIndex parseSwitchExpr(Parser*);
|
||||
static AstNodeIndex parseForExpr(Parser*);
|
||||
static AstNodeIndex parseAsmExpr(Parser*);
|
||||
static AstNodeIndex parseIfExpr(Parser*);
|
||||
static uint32_t forPrefix(Parser*);
|
||||
static AstNodeIndex parseLabeledStatement(Parser*);
|
||||
|
||||
typedef struct {
|
||||
enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag;
|
||||
@@ -521,6 +523,22 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
|
||||
});
|
||||
}
|
||||
case TOKEN_IDENTIFIER:
|
||||
if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
|
||||
switch (p->token_tags[p->tok_i + 2]) {
|
||||
case TOKEN_L_BRACE: {
|
||||
// Labeled block: label: { ... }
|
||||
nextToken(p); // consume label
|
||||
nextToken(p); // consume ':'
|
||||
return parseBlock(p);
|
||||
}
|
||||
case TOKEN_KEYWORD_WHILE:
|
||||
return parseLabeledStatement(p);
|
||||
case TOKEN_KEYWORD_FOR:
|
||||
return parseLabeledStatement(p);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_IDENTIFIER,
|
||||
@@ -1018,6 +1036,116 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
|
||||
},
|
||||
});
|
||||
}
|
||||
case TOKEN_KEYWORD_IF: {
|
||||
// if-type-expr: uses parseTypeExpr for branches instead of parseExpr
|
||||
const AstTokenIndex if_token = nextToken(p);
|
||||
expectToken(p, TOKEN_L_PAREN);
|
||||
const AstNodeIndex condition = expectExpr(p);
|
||||
expectToken(p, TOKEN_R_PAREN);
|
||||
parsePtrPayload(p);
|
||||
const AstNodeIndex then_expr = parseTypeExpr(p);
|
||||
if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token)
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_IF_SIMPLE,
|
||||
.main_token = if_token,
|
||||
.data = { .lhs = condition, .rhs = then_expr },
|
||||
});
|
||||
parsePayload(p);
|
||||
const AstNodeIndex else_expr = parseTypeExpr(p);
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_IF,
|
||||
.main_token = if_token,
|
||||
.data = {
|
||||
.lhs = condition,
|
||||
.rhs = addExtra(p,
|
||||
(AstNodeIndex[]) { then_expr, else_expr }, 2),
|
||||
},
|
||||
});
|
||||
}
|
||||
case TOKEN_KEYWORD_FOR: {
|
||||
// for-type-expr: uses parseTypeExpr for body instead of parseExpr
|
||||
const AstTokenIndex for_token = nextToken(p);
|
||||
const uint32_t scratch_top2 = p->scratch.len;
|
||||
const uint32_t inputs = forPrefix(p);
|
||||
const AstNodeIndex body = parseTypeExpr(p);
|
||||
if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) {
|
||||
parsePayload(p);
|
||||
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
|
||||
const AstNodeIndex else_expr = parseTypeExpr(p);
|
||||
SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr);
|
||||
const uint32_t total = p->scratch.len - scratch_top2;
|
||||
const AstSubRange span
|
||||
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
|
||||
p->scratch.len = scratch_top2;
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_FOR,
|
||||
.main_token = for_token,
|
||||
.data = {
|
||||
.lhs = span.start,
|
||||
.rhs = ((uint32_t)inputs & 0x7FFFFFFF) | (1u << 31),
|
||||
},
|
||||
});
|
||||
}
|
||||
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
|
||||
const uint32_t total = p->scratch.len - scratch_top2;
|
||||
const AstSubRange span
|
||||
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
|
||||
p->scratch.len = scratch_top2;
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_FOR,
|
||||
.main_token = for_token,
|
||||
.data = {
|
||||
.lhs = span.start,
|
||||
.rhs = (uint32_t)inputs & 0x7FFFFFFF,
|
||||
},
|
||||
});
|
||||
}
|
||||
case TOKEN_KEYWORD_WHILE: {
|
||||
// while-type-expr: uses parseTypeExpr for body instead of parseExpr
|
||||
const AstTokenIndex while_token = nextToken(p);
|
||||
expectToken(p, TOKEN_L_PAREN);
|
||||
const AstNodeIndex condition = expectExpr(p);
|
||||
expectToken(p, TOKEN_R_PAREN);
|
||||
parsePtrPayload(p);
|
||||
const AstNodeIndex cont_expr
|
||||
= eatToken(p, TOKEN_COLON) != null_token ? expectExpr(p) : 0;
|
||||
const AstNodeIndex body = parseTypeExpr(p);
|
||||
if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) {
|
||||
parsePayload(p);
|
||||
const AstNodeIndex else_expr = parseTypeExpr(p);
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_WHILE,
|
||||
.main_token = while_token,
|
||||
.data = {
|
||||
.lhs = condition,
|
||||
.rhs = addExtra(p,
|
||||
(AstNodeIndex[]) { cont_expr, body, else_expr }, 3),
|
||||
},
|
||||
});
|
||||
}
|
||||
if (cont_expr != 0)
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_WHILE_CONT,
|
||||
.main_token = while_token,
|
||||
.data = {
|
||||
.lhs = condition,
|
||||
.rhs = addExtra(p,
|
||||
(AstNodeIndex[]) { cont_expr, body }, 2),
|
||||
},
|
||||
});
|
||||
return addNode(&p->nodes,
|
||||
(AstNodeItem) {
|
||||
.tag = AST_NODE_WHILE_SIMPLE,
|
||||
.main_token = while_token,
|
||||
.data = { .lhs = condition, .rhs = body },
|
||||
});
|
||||
}
|
||||
default:
|
||||
return parseErrorUnionExpr(p);
|
||||
}
|
||||
@@ -1899,7 +2027,10 @@ static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) {
|
||||
if (info.prec < min_prec)
|
||||
break;
|
||||
|
||||
assert(info.prec != banned_prec);
|
||||
if (info.prec == banned_prec) {
|
||||
fprintf(stderr, "chained comparison operators\n");
|
||||
longjmp(p->error_jmp, 1);
|
||||
}
|
||||
|
||||
const AstTokenIndex oper_token = nextToken(p);
|
||||
if (tok_tag == TOKEN_KEYWORD_CATCH)
|
||||
|
||||
1238
parser_test.zig
1238
parser_test.zig
File diff suppressed because it is too large
Load Diff
@@ -82,9 +82,11 @@ static TokenizerTag getKeyword(const char* bytes, const uint32_t len) {
|
||||
if (cmp == 0) {
|
||||
if (len == klen) {
|
||||
return keywords[i].tag;
|
||||
} else {
|
||||
} else if (len < klen) {
|
||||
return TOKEN_INVALID;
|
||||
}
|
||||
// len > klen: input is longer than keyword (e.g., "orelse" vs
|
||||
// "or"), continue searching.
|
||||
} else if (cmp < 0) {
|
||||
return TOKEN_INVALID;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user