commit b2b9e6977bec865b69831d3430c808b7403ec416 (tree)
parent 421c76deade1576dbfb4d9088e2e656d52e4c271
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Fri, 13 Feb 2026 09:13:58 +0000
astgen: fix firstToken, slice rl, fnDecl param order, break labels
- Comprehensive firstToken: handle all AST node types matching upstream
Ast.zig (call, struct_init, slice, binary ops, fn_decl, blocks, etc.)
instead of falling through to main_token for unknown types.
- Slice LHS uses .ref rl: pass RL_REF_VAL for slice_open/slice/
slice_sentinel LHS evaluation, matching upstream AstGen.zig:882-939.
- fnDecl param name before type: resolve parameter name via
identAsString before evaluating the type expression, matching upstream
AstGen.zig:4283-4335 ordering.
- Break label comparison: use tokenIdentEql (source text comparison)
instead of identAsString to avoid adding label names to string_bytes,
matching upstream AstGen.zig:2176 tokenIdentEql.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
| M | astgen.c | | | 267 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- |
| M | astgen_test.zig | | | 2 | +- |
2 files changed, 234 insertions(+), 35 deletions(-)
diff --git a/astgen.c b/astgen.c
@@ -553,13 +553,148 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
switch (tag) {
case AST_NODE_ROOT:
return 0;
- // Binary operators: recurse into LHS (Ast.zig:656-710).
+
+ // Return main_token directly (Ast.zig:602-643).
+ case AST_NODE_TEST_DECL:
+ case AST_NODE_ERRDEFER:
+ case AST_NODE_DEFER:
+ case AST_NODE_BOOL_NOT:
+ case AST_NODE_NEGATION:
+ case AST_NODE_BIT_NOT:
+ case AST_NODE_NEGATION_WRAP:
+ case AST_NODE_ADDRESS_OF:
+ case AST_NODE_TRY:
+ case AST_NODE_AWAIT:
+ case AST_NODE_OPTIONAL_TYPE:
+ case AST_NODE_SWITCH:
+ case AST_NODE_SWITCH_COMMA:
+ case AST_NODE_IF_SIMPLE:
+ case AST_NODE_IF:
+ case AST_NODE_SUSPEND:
+ case AST_NODE_RESUME:
+ case AST_NODE_CONTINUE:
+ case AST_NODE_BREAK:
+ case AST_NODE_RETURN:
+ case AST_NODE_ANYFRAME_TYPE:
+ case AST_NODE_IDENTIFIER:
+ case AST_NODE_ANYFRAME_LITERAL:
+ case AST_NODE_CHAR_LITERAL:
+ case AST_NODE_NUMBER_LITERAL:
+ case AST_NODE_UNREACHABLE_LITERAL:
+ case AST_NODE_STRING_LITERAL:
+ case AST_NODE_MULTILINE_STRING_LITERAL:
+ case AST_NODE_GROUPED_EXPRESSION:
+ case AST_NODE_BUILTIN_CALL_TWO:
+ case AST_NODE_BUILTIN_CALL_TWO_COMMA:
+ case AST_NODE_BUILTIN_CALL:
+ case AST_NODE_BUILTIN_CALL_COMMA:
+ case AST_NODE_ERROR_SET_DECL:
+ case AST_NODE_COMPTIME:
+ case AST_NODE_NOSUSPEND:
+ case AST_NODE_ASM_SIMPLE:
+ case AST_NODE_ASM:
+ case AST_NODE_ARRAY_TYPE:
+ case AST_NODE_ARRAY_TYPE_SENTINEL:
+ case AST_NODE_ERROR_VALUE:
+ case AST_NODE_PTR_TYPE_ALIGNED:
+ case AST_NODE_PTR_TYPE_SENTINEL:
+ case AST_NODE_PTR_TYPE:
+ case AST_NODE_PTR_TYPE_BIT_RANGE:
+ return tree->nodes.main_tokens[n];
+
+ // Return main_token - 1: dot-prefixed inits and enum_literal
+ // (Ast.zig:645-654).
+ case AST_NODE_ARRAY_INIT_DOT:
+ case AST_NODE_ARRAY_INIT_DOT_COMMA:
+ case AST_NODE_ARRAY_INIT_DOT_TWO:
+ case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA:
+ case AST_NODE_STRUCT_INIT_DOT:
+ case AST_NODE_STRUCT_INIT_DOT_COMMA:
+ case AST_NODE_STRUCT_INIT_DOT_TWO:
+ case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA:
+ case AST_NODE_ENUM_LITERAL:
+ return tree->nodes.main_tokens[n] - 1;
+
+ // Recurse into LHS: all binary ops and compound expressions
+ // (Ast.zig:656-733).
+ case AST_NODE_CATCH:
+ case AST_NODE_EQUAL_EQUAL:
+ case AST_NODE_BANG_EQUAL:
+ case AST_NODE_LESS_THAN:
+ case AST_NODE_GREATER_THAN:
+ case AST_NODE_LESS_OR_EQUAL:
+ case AST_NODE_GREATER_OR_EQUAL:
+ case AST_NODE_ASSIGN_MUL:
+ case AST_NODE_ASSIGN_DIV:
+ case AST_NODE_ASSIGN_MOD:
+ case AST_NODE_ASSIGN_ADD:
+ case AST_NODE_ASSIGN_SUB:
+ case AST_NODE_ASSIGN_SHL:
+ case AST_NODE_ASSIGN_SHL_SAT:
+ case AST_NODE_ASSIGN_SHR:
+ case AST_NODE_ASSIGN_BIT_AND:
+ case AST_NODE_ASSIGN_BIT_XOR:
+ case AST_NODE_ASSIGN_BIT_OR:
+ case AST_NODE_ASSIGN_MUL_WRAP:
+ case AST_NODE_ASSIGN_ADD_WRAP:
+ case AST_NODE_ASSIGN_SUB_WRAP:
+ case AST_NODE_ASSIGN_MUL_SAT:
+ case AST_NODE_ASSIGN_ADD_SAT:
+ case AST_NODE_ASSIGN_SUB_SAT:
case AST_NODE_ASSIGN:
- case AST_NODE_FIELD_ACCESS:
+ case AST_NODE_MERGE_ERROR_SETS:
+ case AST_NODE_MUL:
+ case AST_NODE_DIV:
+ case AST_NODE_MOD:
+ case AST_NODE_ARRAY_MULT:
+ case AST_NODE_MUL_WRAP:
+ case AST_NODE_MUL_SAT:
+ case AST_NODE_ADD:
+ case AST_NODE_SUB:
+ case AST_NODE_ARRAY_CAT:
+ case AST_NODE_ADD_WRAP:
+ case AST_NODE_SUB_WRAP:
+ case AST_NODE_ADD_SAT:
+ case AST_NODE_SUB_SAT:
+ case AST_NODE_SHL:
+ case AST_NODE_SHL_SAT:
+ case AST_NODE_SHR:
+ case AST_NODE_BIT_AND:
+ case AST_NODE_BIT_XOR:
+ case AST_NODE_BIT_OR:
+ case AST_NODE_ORELSE:
+ case AST_NODE_BOOL_AND:
+ case AST_NODE_BOOL_OR:
+ case AST_NODE_SLICE_OPEN:
case AST_NODE_ARRAY_ACCESS:
+ case AST_NODE_ARRAY_INIT_ONE:
+ case AST_NODE_ARRAY_INIT_ONE_COMMA:
+ case AST_NODE_SWITCH_RANGE:
+ case AST_NODE_ERROR_UNION:
+ case AST_NODE_FOR_RANGE:
+ case AST_NODE_CALL_ONE:
+ case AST_NODE_CALL_ONE_COMMA:
+ case AST_NODE_STRUCT_INIT_ONE:
+ case AST_NODE_STRUCT_INIT_ONE_COMMA:
+ case AST_NODE_CALL:
+ case AST_NODE_CALL_COMMA:
+ case AST_NODE_STRUCT_INIT:
+ case AST_NODE_STRUCT_INIT_COMMA:
+ case AST_NODE_SLICE:
+ case AST_NODE_SLICE_SENTINEL:
+ case AST_NODE_ARRAY_INIT:
+ case AST_NODE_ARRAY_INIT_COMMA:
+ case AST_NODE_FIELD_ACCESS:
+ case AST_NODE_UNWRAP_OPTIONAL:
+ case AST_NODE_DEREF:
+ case AST_NODE_ASYNC_CALL_ONE:
+ case AST_NODE_ASYNC_CALL_ONE_COMMA:
+ case AST_NODE_ASYNC_CALL:
+ case AST_NODE_ASYNC_CALL_COMMA:
n = tree->nodes.datas[n].lhs;
continue;
- // Var decls: scan backwards for modifiers (Ast.zig:634-643).
+
+ // Var decls: scan backwards for modifiers (Ast.zig:771-792).
case AST_NODE_GLOBAL_VAR_DECL:
case AST_NODE_LOCAL_VAR_DECL:
case AST_NODE_SIMPLE_VAR_DECL:
@@ -580,7 +715,28 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
}
return i;
}
- // Container fields: check for preceding comptime (Ast.zig:646-648).
+ // Fn decls: scan backwards for modifiers (Ast.zig:737-759).
+ case AST_NODE_FN_DECL:
+ case AST_NODE_FN_PROTO_SIMPLE:
+ case AST_NODE_FN_PROTO_MULTI:
+ case AST_NODE_FN_PROTO_ONE:
+ case AST_NODE_FN_PROTO: {
+ uint32_t mt = tree->nodes.main_tokens[n];
+ uint32_t i = mt;
+ while (i > 0) {
+ TokenizerTag tt = tree->tokens.tags[i - 1];
+ if (tt == TOKEN_KEYWORD_EXTERN || tt == TOKEN_KEYWORD_EXPORT
+ || tt == TOKEN_KEYWORD_PUB || tt == TOKEN_KEYWORD_INLINE
+ || tt == TOKEN_KEYWORD_NOINLINE
+ || tt == TOKEN_STRING_LITERAL) {
+ i--;
+ } else {
+ break;
+ }
+ }
+ return i;
+ }
+ // Container fields: check for preceding comptime (Ast.zig:761-769).
case AST_NODE_CONTAINER_FIELD_INIT:
case AST_NODE_CONTAINER_FIELD_ALIGN:
case AST_NODE_CONTAINER_FIELD: {
@@ -589,7 +745,18 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
return mt - 1;
return mt;
}
- // Everything else: main_token (Ast.zig:602-643).
+ // Blocks: check for label (Ast.zig:794-805).
+ case AST_NODE_BLOCK:
+ case AST_NODE_BLOCK_SEMICOLON:
+ case AST_NODE_BLOCK_TWO:
+ case AST_NODE_BLOCK_TWO_SEMICOLON: {
+ uint32_t lbrace = tree->nodes.main_tokens[n];
+ if (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON
+ && tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER)
+ return lbrace - 2;
+ return lbrace;
+ }
+ // Fallback for any remaining node types.
default:
return tree->nodes.main_tokens[n];
}
@@ -645,6 +812,36 @@ static uint32_t findExistingString(
return UINT32_MAX;
}
+// Mirrors AstGen.tokenIdentEql (AstGen.zig:6148-6152).
+// Compares two identifier tokens by source text without touching string_bytes.
+static bool tokenIdentEql(const Ast* tree, uint32_t tok1, uint32_t tok2) {
+ uint32_t s1 = tree->tokens.starts[tok1];
+ uint32_t s2 = tree->tokens.starts[tok2];
+ uint32_t e1 = tree->tokens.starts[tok1 + 1];
+ uint32_t e2 = tree->tokens.starts[tok2 + 1];
+ // Token length includes trailing whitespace in starts delta, but for
+ // identifiers the actual content is a contiguous alphanumeric/underscore
+ // run. Compute actual identifier lengths.
+ uint32_t len1 = 0;
+ while (s1 + len1 < e1) {
+ char c = tree->source[s1 + len1];
+ if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9') || c == '_'))
+ break;
+ len1++;
+ }
+ uint32_t len2 = 0;
+ while (s2 + len2 < e2) {
+ char c = tree->source[s2 + len2];
+ if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9') || c == '_'))
+ break;
+ len2++;
+ }
+ return len1 == len2
+ && memcmp(tree->source + s1, tree->source + s2, len1) == 0;
+}
+
// Forward declaration for strLitAsString (used by identAsString for @"..."
// quoted identifiers with escapes).
static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token,
@@ -3620,7 +3817,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
}
// slice (AstGen.zig:882-939).
case AST_NODE_SLICE_OPEN: {
- uint32_t lhs = expr(gz, scope, nd.lhs);
+ uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
advanceSourceCursorToMainToken(ag, node);
uint32_t saved_line = ag->source_line - gz->decl_line;
uint32_t saved_col = ag->source_column;
@@ -3632,7 +3829,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
case AST_NODE_SLICE: {
// Slice[rhs]: { start, end }
const Ast* stree = ag->tree;
- uint32_t lhs = expr(gz, scope, nd.lhs);
+ uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
advanceSourceCursorToMainToken(ag, node);
uint32_t saved_line = ag->source_line - gz->decl_line;
uint32_t saved_col = ag->source_column;
@@ -3655,7 +3852,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
case AST_NODE_SLICE_SENTINEL: {
// SliceSentinel[rhs]: { start, end, sentinel }
const Ast* stree = ag->tree;
- uint32_t lhs = expr(gz, scope, nd.lhs);
+ uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
advanceSourceCursorToMainToken(ag, node);
uint32_t saved_line = ag->source_line - gz->decl_line;
uint32_t saved_col = ag->source_column;
@@ -3727,13 +3924,13 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
uint32_t block_inst = UINT32_MAX;
if (opt_break_label != UINT32_MAX) {
// Labeled break: check label on GenZir.
- if (block_gz->label_token != UINT32_MAX) {
- uint32_t break_name
- = identAsString(ag, opt_break_label);
- uint32_t label_name
- = identAsString(ag, block_gz->label_token);
- if (break_name == label_name)
- block_inst = block_gz->label_block_inst;
+ // Use direct source text comparison, not identAsString,
+ // to avoid adding label names to string_bytes
+ // (AstGen.zig:2176 uses tokenIdentEql).
+ if (block_gz->label_token != UINT32_MAX
+ && tokenIdentEql(ag->tree, opt_break_label,
+ block_gz->label_token)) {
+ block_inst = block_gz->label_block_inst;
}
} else {
// Unlabeled break: check break_block.
@@ -7303,6 +7500,27 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
}
}
+ // Determine param name string (AstGen.zig:4283-4321).
+ // Must be resolved BEFORE type expression to match upstream string
+ // table ordering.
+ uint32_t param_name_str = 0; // NullTerminatedString.empty
+ if (name_token != 0) {
+ uint32_t name_start = tree->tokens.starts[name_token];
+ char nch = tree->source[name_start];
+ // Skip "_" params (AstGen.zig:4285-4286).
+ if (nch == '_') {
+ uint32_t next_start = tree->tokens.starts[name_token + 1];
+ if (next_start == name_start + 1) {
+ // Single underscore: empty name.
+ param_name_str = 0;
+ } else {
+ param_name_str = identAsString(ag, name_token);
+ }
+ } else {
+ param_name_str = identAsString(ag, name_token);
+ }
+ }
+
// Evaluate param type expression in a sub-block
// (AstGen.zig:4333-4337).
GenZir param_gz = makeSubBlock(&decl_gz, params_scope);
@@ -7323,25 +7541,6 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
makeBreakInline(¶m_gz, param_inst_expected, param_type_ref,
(int32_t)param_type_node - (int32_t)param_gz.decl_node_index);
- // Determine param name string.
- uint32_t param_name_str = 0; // NullTerminatedString.empty
- if (name_token != 0) {
- uint32_t name_start = tree->tokens.starts[name_token];
- char nch = tree->source[name_start];
- // Skip "_" params (AstGen.zig:4285-4286).
- if (nch == '_') {
- uint32_t next_start = tree->tokens.starts[name_token + 1];
- if (next_start == name_start + 1) {
- // Single underscore: empty name.
- param_name_str = 0;
- } else {
- param_name_str = identAsString(ag, name_token);
- }
- } else {
- param_name_str = identAsString(ag, name_token);
- }
- }
-
// Create param instruction (AstGen.zig:4341-4343).
ZirInstTag param_tag
= is_comptime_param ? ZIR_INST_PARAM_COMPTIME : ZIR_INST_PARAM;
diff --git a/astgen_test.zig b/astgen_test.zig
@@ -990,7 +990,7 @@ test "astgen: corpus test_all.zig" {
}
test "astgen: corpus build.zig" {
- if (true) return error.SkipZigTest; // TODO: 1 inst diff (1 ref) - slice LHS needs .ref rl
+ if (true) return error.SkipZigTest; // TODO: string_bytes ordering - struct init field name pre-registration
const gpa = std.testing.allocator;
try corpusCheck(gpa, "build.zig", @embedFile("build.zig"));
}