astgen: fix firstToken, slice rl, fnDecl param order, break labels

- Comprehensive firstToken: handle all AST node types matching upstream
  Ast.zig (call, struct_init, slice, binary ops, fn_decl, blocks, etc.)
  instead of falling through to main_token for unknown types.
- Slice LHS uses .ref rl: pass RL_REF_VAL for slice_open/slice/
  slice_sentinel LHS evaluation, matching upstream AstGen.zig:882-939.
- fnDecl param name before type: resolve parameter name via
  identAsString before evaluating the type expression, matching upstream
  AstGen.zig:4283-4335 ordering.
- Break label comparison: use tokenIdentEql (source text comparison)
  instead of identAsString to avoid adding label names to string_bytes,
  matching upstream AstGen.zig:2176 tokenIdentEql.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-13 09:13:58 +00:00
parent 421c76dead
commit b2b9e6977b
2 changed files with 234 additions and 35 deletions

267
astgen.c
View File

@@ -553,13 +553,148 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
switch (tag) {
case AST_NODE_ROOT:
return 0;
// Binary operators: recurse into LHS (Ast.zig:656-710).
// Return main_token directly (Ast.zig:602-643).
case AST_NODE_TEST_DECL:
case AST_NODE_ERRDEFER:
case AST_NODE_DEFER:
case AST_NODE_BOOL_NOT:
case AST_NODE_NEGATION:
case AST_NODE_BIT_NOT:
case AST_NODE_NEGATION_WRAP:
case AST_NODE_ADDRESS_OF:
case AST_NODE_TRY:
case AST_NODE_AWAIT:
case AST_NODE_OPTIONAL_TYPE:
case AST_NODE_SWITCH:
case AST_NODE_SWITCH_COMMA:
case AST_NODE_IF_SIMPLE:
case AST_NODE_IF:
case AST_NODE_SUSPEND:
case AST_NODE_RESUME:
case AST_NODE_CONTINUE:
case AST_NODE_BREAK:
case AST_NODE_RETURN:
case AST_NODE_ANYFRAME_TYPE:
case AST_NODE_IDENTIFIER:
case AST_NODE_ANYFRAME_LITERAL:
case AST_NODE_CHAR_LITERAL:
case AST_NODE_NUMBER_LITERAL:
case AST_NODE_UNREACHABLE_LITERAL:
case AST_NODE_STRING_LITERAL:
case AST_NODE_MULTILINE_STRING_LITERAL:
case AST_NODE_GROUPED_EXPRESSION:
case AST_NODE_BUILTIN_CALL_TWO:
case AST_NODE_BUILTIN_CALL_TWO_COMMA:
case AST_NODE_BUILTIN_CALL:
case AST_NODE_BUILTIN_CALL_COMMA:
case AST_NODE_ERROR_SET_DECL:
case AST_NODE_COMPTIME:
case AST_NODE_NOSUSPEND:
case AST_NODE_ASM_SIMPLE:
case AST_NODE_ASM:
case AST_NODE_ARRAY_TYPE:
case AST_NODE_ARRAY_TYPE_SENTINEL:
case AST_NODE_ERROR_VALUE:
case AST_NODE_PTR_TYPE_ALIGNED:
case AST_NODE_PTR_TYPE_SENTINEL:
case AST_NODE_PTR_TYPE:
case AST_NODE_PTR_TYPE_BIT_RANGE:
return tree->nodes.main_tokens[n];
// Return main_token - 1: dot-prefixed inits and enum_literal
// (Ast.zig:645-654).
case AST_NODE_ARRAY_INIT_DOT:
case AST_NODE_ARRAY_INIT_DOT_COMMA:
case AST_NODE_ARRAY_INIT_DOT_TWO:
case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA:
case AST_NODE_STRUCT_INIT_DOT:
case AST_NODE_STRUCT_INIT_DOT_COMMA:
case AST_NODE_STRUCT_INIT_DOT_TWO:
case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA:
case AST_NODE_ENUM_LITERAL:
return tree->nodes.main_tokens[n] - 1;
// Recurse into LHS: all binary ops and compound expressions
// (Ast.zig:656-733).
case AST_NODE_CATCH:
case AST_NODE_EQUAL_EQUAL:
case AST_NODE_BANG_EQUAL:
case AST_NODE_LESS_THAN:
case AST_NODE_GREATER_THAN:
case AST_NODE_LESS_OR_EQUAL:
case AST_NODE_GREATER_OR_EQUAL:
case AST_NODE_ASSIGN_MUL:
case AST_NODE_ASSIGN_DIV:
case AST_NODE_ASSIGN_MOD:
case AST_NODE_ASSIGN_ADD:
case AST_NODE_ASSIGN_SUB:
case AST_NODE_ASSIGN_SHL:
case AST_NODE_ASSIGN_SHL_SAT:
case AST_NODE_ASSIGN_SHR:
case AST_NODE_ASSIGN_BIT_AND:
case AST_NODE_ASSIGN_BIT_XOR:
case AST_NODE_ASSIGN_BIT_OR:
case AST_NODE_ASSIGN_MUL_WRAP:
case AST_NODE_ASSIGN_ADD_WRAP:
case AST_NODE_ASSIGN_SUB_WRAP:
case AST_NODE_ASSIGN_MUL_SAT:
case AST_NODE_ASSIGN_ADD_SAT:
case AST_NODE_ASSIGN_SUB_SAT:
case AST_NODE_ASSIGN:
case AST_NODE_FIELD_ACCESS:
case AST_NODE_MERGE_ERROR_SETS:
case AST_NODE_MUL:
case AST_NODE_DIV:
case AST_NODE_MOD:
case AST_NODE_ARRAY_MULT:
case AST_NODE_MUL_WRAP:
case AST_NODE_MUL_SAT:
case AST_NODE_ADD:
case AST_NODE_SUB:
case AST_NODE_ARRAY_CAT:
case AST_NODE_ADD_WRAP:
case AST_NODE_SUB_WRAP:
case AST_NODE_ADD_SAT:
case AST_NODE_SUB_SAT:
case AST_NODE_SHL:
case AST_NODE_SHL_SAT:
case AST_NODE_SHR:
case AST_NODE_BIT_AND:
case AST_NODE_BIT_XOR:
case AST_NODE_BIT_OR:
case AST_NODE_ORELSE:
case AST_NODE_BOOL_AND:
case AST_NODE_BOOL_OR:
case AST_NODE_SLICE_OPEN:
case AST_NODE_ARRAY_ACCESS:
case AST_NODE_ARRAY_INIT_ONE:
case AST_NODE_ARRAY_INIT_ONE_COMMA:
case AST_NODE_SWITCH_RANGE:
case AST_NODE_ERROR_UNION:
case AST_NODE_FOR_RANGE:
case AST_NODE_CALL_ONE:
case AST_NODE_CALL_ONE_COMMA:
case AST_NODE_STRUCT_INIT_ONE:
case AST_NODE_STRUCT_INIT_ONE_COMMA:
case AST_NODE_CALL:
case AST_NODE_CALL_COMMA:
case AST_NODE_STRUCT_INIT:
case AST_NODE_STRUCT_INIT_COMMA:
case AST_NODE_SLICE:
case AST_NODE_SLICE_SENTINEL:
case AST_NODE_ARRAY_INIT:
case AST_NODE_ARRAY_INIT_COMMA:
case AST_NODE_FIELD_ACCESS:
case AST_NODE_UNWRAP_OPTIONAL:
case AST_NODE_DEREF:
case AST_NODE_ASYNC_CALL_ONE:
case AST_NODE_ASYNC_CALL_ONE_COMMA:
case AST_NODE_ASYNC_CALL:
case AST_NODE_ASYNC_CALL_COMMA:
n = tree->nodes.datas[n].lhs;
continue;
// Var decls: scan backwards for modifiers (Ast.zig:634-643).
// Var decls: scan backwards for modifiers (Ast.zig:771-792).
case AST_NODE_GLOBAL_VAR_DECL:
case AST_NODE_LOCAL_VAR_DECL:
case AST_NODE_SIMPLE_VAR_DECL:
@@ -580,7 +715,28 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
}
return i;
}
// Container fields: check for preceding comptime (Ast.zig:646-648).
// Fn decls: scan backwards for modifiers (Ast.zig:737-759).
case AST_NODE_FN_DECL:
case AST_NODE_FN_PROTO_SIMPLE:
case AST_NODE_FN_PROTO_MULTI:
case AST_NODE_FN_PROTO_ONE:
case AST_NODE_FN_PROTO: {
uint32_t mt = tree->nodes.main_tokens[n];
uint32_t i = mt;
while (i > 0) {
TokenizerTag tt = tree->tokens.tags[i - 1];
if (tt == TOKEN_KEYWORD_EXTERN || tt == TOKEN_KEYWORD_EXPORT
|| tt == TOKEN_KEYWORD_PUB || tt == TOKEN_KEYWORD_INLINE
|| tt == TOKEN_KEYWORD_NOINLINE
|| tt == TOKEN_STRING_LITERAL) {
i--;
} else {
break;
}
}
return i;
}
// Container fields: check for preceding comptime (Ast.zig:761-769).
case AST_NODE_CONTAINER_FIELD_INIT:
case AST_NODE_CONTAINER_FIELD_ALIGN:
case AST_NODE_CONTAINER_FIELD: {
@@ -589,7 +745,18 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
return mt - 1;
return mt;
}
// Everything else: main_token (Ast.zig:602-643).
// Blocks: check for label (Ast.zig:794-805).
case AST_NODE_BLOCK:
case AST_NODE_BLOCK_SEMICOLON:
case AST_NODE_BLOCK_TWO:
case AST_NODE_BLOCK_TWO_SEMICOLON: {
uint32_t lbrace = tree->nodes.main_tokens[n];
if (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON
&& tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER)
return lbrace - 2;
return lbrace;
}
// Fallback for any remaining node types.
default:
return tree->nodes.main_tokens[n];
}
@@ -645,6 +812,36 @@ static uint32_t findExistingString(
return UINT32_MAX;
}
// Mirrors AstGen.tokenIdentEql (AstGen.zig:6148-6152).
// Compares two identifier tokens by source text without touching string_bytes.
static bool tokenIdentEql(const Ast* tree, uint32_t tok1, uint32_t tok2) {
uint32_t s1 = tree->tokens.starts[tok1];
uint32_t s2 = tree->tokens.starts[tok2];
uint32_t e1 = tree->tokens.starts[tok1 + 1];
uint32_t e2 = tree->tokens.starts[tok2 + 1];
// Token length includes trailing whitespace in starts delta, but for
// identifiers the actual content is a contiguous alphanumeric/underscore
// run. Compute actual identifier lengths.
uint32_t len1 = 0;
while (s1 + len1 < e1) {
char c = tree->source[s1 + len1];
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c == '_'))
break;
len1++;
}
uint32_t len2 = 0;
while (s2 + len2 < e2) {
char c = tree->source[s2 + len2];
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c == '_'))
break;
len2++;
}
return len1 == len2
&& memcmp(tree->source + s1, tree->source + s2, len1) == 0;
}
// Forward declaration for strLitAsString (used by identAsString for @"..."
// quoted identifiers with escapes).
static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token,
@@ -3620,7 +3817,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
}
// slice (AstGen.zig:882-939).
case AST_NODE_SLICE_OPEN: {
uint32_t lhs = expr(gz, scope, nd.lhs);
uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
advanceSourceCursorToMainToken(ag, node);
uint32_t saved_line = ag->source_line - gz->decl_line;
uint32_t saved_col = ag->source_column;
@@ -3632,7 +3829,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
case AST_NODE_SLICE: {
// Slice[rhs]: { start, end }
const Ast* stree = ag->tree;
uint32_t lhs = expr(gz, scope, nd.lhs);
uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
advanceSourceCursorToMainToken(ag, node);
uint32_t saved_line = ag->source_line - gz->decl_line;
uint32_t saved_col = ag->source_column;
@@ -3655,7 +3852,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
case AST_NODE_SLICE_SENTINEL: {
// SliceSentinel[rhs]: { start, end, sentinel }
const Ast* stree = ag->tree;
uint32_t lhs = expr(gz, scope, nd.lhs);
uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
advanceSourceCursorToMainToken(ag, node);
uint32_t saved_line = ag->source_line - gz->decl_line;
uint32_t saved_col = ag->source_column;
@@ -3727,13 +3924,13 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
uint32_t block_inst = UINT32_MAX;
if (opt_break_label != UINT32_MAX) {
// Labeled break: check label on GenZir.
if (block_gz->label_token != UINT32_MAX) {
uint32_t break_name
= identAsString(ag, opt_break_label);
uint32_t label_name
= identAsString(ag, block_gz->label_token);
if (break_name == label_name)
block_inst = block_gz->label_block_inst;
// Use direct source text comparison, not identAsString,
// to avoid adding label names to string_bytes
// (AstGen.zig:2176 uses tokenIdentEql).
if (block_gz->label_token != UINT32_MAX
&& tokenIdentEql(ag->tree, opt_break_label,
block_gz->label_token)) {
block_inst = block_gz->label_block_inst;
}
} else {
// Unlabeled break: check break_block.
@@ -7303,6 +7500,27 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
}
}
// Determine param name string (AstGen.zig:4283-4321).
// Must be resolved BEFORE type expression to match upstream string
// table ordering.
uint32_t param_name_str = 0; // NullTerminatedString.empty
if (name_token != 0) {
uint32_t name_start = tree->tokens.starts[name_token];
char nch = tree->source[name_start];
// Skip "_" params (AstGen.zig:4285-4286).
if (nch == '_') {
uint32_t next_start = tree->tokens.starts[name_token + 1];
if (next_start == name_start + 1) {
// Single underscore: empty name.
param_name_str = 0;
} else {
param_name_str = identAsString(ag, name_token);
}
} else {
param_name_str = identAsString(ag, name_token);
}
}
// Evaluate param type expression in a sub-block
// (AstGen.zig:4333-4337).
GenZir param_gz = makeSubBlock(&decl_gz, params_scope);
@@ -7323,25 +7541,6 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
makeBreakInline(&param_gz, param_inst_expected, param_type_ref,
(int32_t)param_type_node - (int32_t)param_gz.decl_node_index);
// Determine param name string.
uint32_t param_name_str = 0; // NullTerminatedString.empty
if (name_token != 0) {
uint32_t name_start = tree->tokens.starts[name_token];
char nch = tree->source[name_start];
// Skip "_" params (AstGen.zig:4285-4286).
if (nch == '_') {
uint32_t next_start = tree->tokens.starts[name_token + 1];
if (next_start == name_start + 1) {
// Single underscore: empty name.
param_name_str = 0;
} else {
param_name_str = identAsString(ag, name_token);
}
} else {
param_name_str = identAsString(ag, name_token);
}
}
// Create param instruction (AstGen.zig:4341-4343).
ZirInstTag param_tag
= is_comptime_param ? ZIR_INST_PARAM_COMPTIME : ZIR_INST_PARAM;

View File

@@ -990,7 +990,7 @@ test "astgen: corpus test_all.zig" {
}
test "astgen: corpus build.zig" {
if (true) return error.SkipZigTest; // TODO: 1 inst diff (1 ref) - slice LHS needs .ref rl
if (true) return error.SkipZigTest; // TODO: string_bytes ordering - struct init field name pre-registration
const gpa = std.testing.allocator;
try corpusCheck(gpa, "build.zig", @embedFile("build.zig"));
}