astgen: fix firstToken, slice rl, fnDecl param order, break labels
- Comprehensive firstToken: handle all AST node types matching upstream Ast.zig (call, struct_init, slice, binary ops, fn_decl, blocks, etc.) instead of falling through to main_token for unknown types. - Slice LHS uses .ref rl: pass RL_REF_VAL for slice_open/slice/ slice_sentinel LHS evaluation, matching upstream AstGen.zig:882-939. - fnDecl param name before type: resolve parameter name via identAsString before evaluating the type expression, matching upstream AstGen.zig:4283-4335 ordering. - Break label comparison: use tokenIdentEql (source text comparison) instead of identAsString to avoid adding label names to string_bytes, matching upstream AstGen.zig:2176 tokenIdentEql. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
267
astgen.c
267
astgen.c
@@ -553,13 +553,148 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
|
||||
switch (tag) {
|
||||
case AST_NODE_ROOT:
|
||||
return 0;
|
||||
// Binary operators: recurse into LHS (Ast.zig:656-710).
|
||||
|
||||
// Return main_token directly (Ast.zig:602-643).
|
||||
case AST_NODE_TEST_DECL:
|
||||
case AST_NODE_ERRDEFER:
|
||||
case AST_NODE_DEFER:
|
||||
case AST_NODE_BOOL_NOT:
|
||||
case AST_NODE_NEGATION:
|
||||
case AST_NODE_BIT_NOT:
|
||||
case AST_NODE_NEGATION_WRAP:
|
||||
case AST_NODE_ADDRESS_OF:
|
||||
case AST_NODE_TRY:
|
||||
case AST_NODE_AWAIT:
|
||||
case AST_NODE_OPTIONAL_TYPE:
|
||||
case AST_NODE_SWITCH:
|
||||
case AST_NODE_SWITCH_COMMA:
|
||||
case AST_NODE_IF_SIMPLE:
|
||||
case AST_NODE_IF:
|
||||
case AST_NODE_SUSPEND:
|
||||
case AST_NODE_RESUME:
|
||||
case AST_NODE_CONTINUE:
|
||||
case AST_NODE_BREAK:
|
||||
case AST_NODE_RETURN:
|
||||
case AST_NODE_ANYFRAME_TYPE:
|
||||
case AST_NODE_IDENTIFIER:
|
||||
case AST_NODE_ANYFRAME_LITERAL:
|
||||
case AST_NODE_CHAR_LITERAL:
|
||||
case AST_NODE_NUMBER_LITERAL:
|
||||
case AST_NODE_UNREACHABLE_LITERAL:
|
||||
case AST_NODE_STRING_LITERAL:
|
||||
case AST_NODE_MULTILINE_STRING_LITERAL:
|
||||
case AST_NODE_GROUPED_EXPRESSION:
|
||||
case AST_NODE_BUILTIN_CALL_TWO:
|
||||
case AST_NODE_BUILTIN_CALL_TWO_COMMA:
|
||||
case AST_NODE_BUILTIN_CALL:
|
||||
case AST_NODE_BUILTIN_CALL_COMMA:
|
||||
case AST_NODE_ERROR_SET_DECL:
|
||||
case AST_NODE_COMPTIME:
|
||||
case AST_NODE_NOSUSPEND:
|
||||
case AST_NODE_ASM_SIMPLE:
|
||||
case AST_NODE_ASM:
|
||||
case AST_NODE_ARRAY_TYPE:
|
||||
case AST_NODE_ARRAY_TYPE_SENTINEL:
|
||||
case AST_NODE_ERROR_VALUE:
|
||||
case AST_NODE_PTR_TYPE_ALIGNED:
|
||||
case AST_NODE_PTR_TYPE_SENTINEL:
|
||||
case AST_NODE_PTR_TYPE:
|
||||
case AST_NODE_PTR_TYPE_BIT_RANGE:
|
||||
return tree->nodes.main_tokens[n];
|
||||
|
||||
// Return main_token - 1: dot-prefixed inits and enum_literal
|
||||
// (Ast.zig:645-654).
|
||||
case AST_NODE_ARRAY_INIT_DOT:
|
||||
case AST_NODE_ARRAY_INIT_DOT_COMMA:
|
||||
case AST_NODE_ARRAY_INIT_DOT_TWO:
|
||||
case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA:
|
||||
case AST_NODE_STRUCT_INIT_DOT:
|
||||
case AST_NODE_STRUCT_INIT_DOT_COMMA:
|
||||
case AST_NODE_STRUCT_INIT_DOT_TWO:
|
||||
case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA:
|
||||
case AST_NODE_ENUM_LITERAL:
|
||||
return tree->nodes.main_tokens[n] - 1;
|
||||
|
||||
// Recurse into LHS: all binary ops and compound expressions
|
||||
// (Ast.zig:656-733).
|
||||
case AST_NODE_CATCH:
|
||||
case AST_NODE_EQUAL_EQUAL:
|
||||
case AST_NODE_BANG_EQUAL:
|
||||
case AST_NODE_LESS_THAN:
|
||||
case AST_NODE_GREATER_THAN:
|
||||
case AST_NODE_LESS_OR_EQUAL:
|
||||
case AST_NODE_GREATER_OR_EQUAL:
|
||||
case AST_NODE_ASSIGN_MUL:
|
||||
case AST_NODE_ASSIGN_DIV:
|
||||
case AST_NODE_ASSIGN_MOD:
|
||||
case AST_NODE_ASSIGN_ADD:
|
||||
case AST_NODE_ASSIGN_SUB:
|
||||
case AST_NODE_ASSIGN_SHL:
|
||||
case AST_NODE_ASSIGN_SHL_SAT:
|
||||
case AST_NODE_ASSIGN_SHR:
|
||||
case AST_NODE_ASSIGN_BIT_AND:
|
||||
case AST_NODE_ASSIGN_BIT_XOR:
|
||||
case AST_NODE_ASSIGN_BIT_OR:
|
||||
case AST_NODE_ASSIGN_MUL_WRAP:
|
||||
case AST_NODE_ASSIGN_ADD_WRAP:
|
||||
case AST_NODE_ASSIGN_SUB_WRAP:
|
||||
case AST_NODE_ASSIGN_MUL_SAT:
|
||||
case AST_NODE_ASSIGN_ADD_SAT:
|
||||
case AST_NODE_ASSIGN_SUB_SAT:
|
||||
case AST_NODE_ASSIGN:
|
||||
case AST_NODE_FIELD_ACCESS:
|
||||
case AST_NODE_MERGE_ERROR_SETS:
|
||||
case AST_NODE_MUL:
|
||||
case AST_NODE_DIV:
|
||||
case AST_NODE_MOD:
|
||||
case AST_NODE_ARRAY_MULT:
|
||||
case AST_NODE_MUL_WRAP:
|
||||
case AST_NODE_MUL_SAT:
|
||||
case AST_NODE_ADD:
|
||||
case AST_NODE_SUB:
|
||||
case AST_NODE_ARRAY_CAT:
|
||||
case AST_NODE_ADD_WRAP:
|
||||
case AST_NODE_SUB_WRAP:
|
||||
case AST_NODE_ADD_SAT:
|
||||
case AST_NODE_SUB_SAT:
|
||||
case AST_NODE_SHL:
|
||||
case AST_NODE_SHL_SAT:
|
||||
case AST_NODE_SHR:
|
||||
case AST_NODE_BIT_AND:
|
||||
case AST_NODE_BIT_XOR:
|
||||
case AST_NODE_BIT_OR:
|
||||
case AST_NODE_ORELSE:
|
||||
case AST_NODE_BOOL_AND:
|
||||
case AST_NODE_BOOL_OR:
|
||||
case AST_NODE_SLICE_OPEN:
|
||||
case AST_NODE_ARRAY_ACCESS:
|
||||
case AST_NODE_ARRAY_INIT_ONE:
|
||||
case AST_NODE_ARRAY_INIT_ONE_COMMA:
|
||||
case AST_NODE_SWITCH_RANGE:
|
||||
case AST_NODE_ERROR_UNION:
|
||||
case AST_NODE_FOR_RANGE:
|
||||
case AST_NODE_CALL_ONE:
|
||||
case AST_NODE_CALL_ONE_COMMA:
|
||||
case AST_NODE_STRUCT_INIT_ONE:
|
||||
case AST_NODE_STRUCT_INIT_ONE_COMMA:
|
||||
case AST_NODE_CALL:
|
||||
case AST_NODE_CALL_COMMA:
|
||||
case AST_NODE_STRUCT_INIT:
|
||||
case AST_NODE_STRUCT_INIT_COMMA:
|
||||
case AST_NODE_SLICE:
|
||||
case AST_NODE_SLICE_SENTINEL:
|
||||
case AST_NODE_ARRAY_INIT:
|
||||
case AST_NODE_ARRAY_INIT_COMMA:
|
||||
case AST_NODE_FIELD_ACCESS:
|
||||
case AST_NODE_UNWRAP_OPTIONAL:
|
||||
case AST_NODE_DEREF:
|
||||
case AST_NODE_ASYNC_CALL_ONE:
|
||||
case AST_NODE_ASYNC_CALL_ONE_COMMA:
|
||||
case AST_NODE_ASYNC_CALL:
|
||||
case AST_NODE_ASYNC_CALL_COMMA:
|
||||
n = tree->nodes.datas[n].lhs;
|
||||
continue;
|
||||
// Var decls: scan backwards for modifiers (Ast.zig:634-643).
|
||||
|
||||
// Var decls: scan backwards for modifiers (Ast.zig:771-792).
|
||||
case AST_NODE_GLOBAL_VAR_DECL:
|
||||
case AST_NODE_LOCAL_VAR_DECL:
|
||||
case AST_NODE_SIMPLE_VAR_DECL:
|
||||
@@ -580,7 +715,28 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
|
||||
}
|
||||
return i;
|
||||
}
|
||||
// Container fields: check for preceding comptime (Ast.zig:646-648).
|
||||
// Fn decls: scan backwards for modifiers (Ast.zig:737-759).
|
||||
case AST_NODE_FN_DECL:
|
||||
case AST_NODE_FN_PROTO_SIMPLE:
|
||||
case AST_NODE_FN_PROTO_MULTI:
|
||||
case AST_NODE_FN_PROTO_ONE:
|
||||
case AST_NODE_FN_PROTO: {
|
||||
uint32_t mt = tree->nodes.main_tokens[n];
|
||||
uint32_t i = mt;
|
||||
while (i > 0) {
|
||||
TokenizerTag tt = tree->tokens.tags[i - 1];
|
||||
if (tt == TOKEN_KEYWORD_EXTERN || tt == TOKEN_KEYWORD_EXPORT
|
||||
|| tt == TOKEN_KEYWORD_PUB || tt == TOKEN_KEYWORD_INLINE
|
||||
|| tt == TOKEN_KEYWORD_NOINLINE
|
||||
|| tt == TOKEN_STRING_LITERAL) {
|
||||
i--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
// Container fields: check for preceding comptime (Ast.zig:761-769).
|
||||
case AST_NODE_CONTAINER_FIELD_INIT:
|
||||
case AST_NODE_CONTAINER_FIELD_ALIGN:
|
||||
case AST_NODE_CONTAINER_FIELD: {
|
||||
@@ -589,7 +745,18 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) {
|
||||
return mt - 1;
|
||||
return mt;
|
||||
}
|
||||
// Everything else: main_token (Ast.zig:602-643).
|
||||
// Blocks: check for label (Ast.zig:794-805).
|
||||
case AST_NODE_BLOCK:
|
||||
case AST_NODE_BLOCK_SEMICOLON:
|
||||
case AST_NODE_BLOCK_TWO:
|
||||
case AST_NODE_BLOCK_TWO_SEMICOLON: {
|
||||
uint32_t lbrace = tree->nodes.main_tokens[n];
|
||||
if (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON
|
||||
&& tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER)
|
||||
return lbrace - 2;
|
||||
return lbrace;
|
||||
}
|
||||
// Fallback for any remaining node types.
|
||||
default:
|
||||
return tree->nodes.main_tokens[n];
|
||||
}
|
||||
@@ -645,6 +812,36 @@ static uint32_t findExistingString(
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
// Mirrors AstGen.tokenIdentEql (AstGen.zig:6148-6152).
|
||||
// Compares two identifier tokens by source text without touching string_bytes.
|
||||
static bool tokenIdentEql(const Ast* tree, uint32_t tok1, uint32_t tok2) {
|
||||
uint32_t s1 = tree->tokens.starts[tok1];
|
||||
uint32_t s2 = tree->tokens.starts[tok2];
|
||||
uint32_t e1 = tree->tokens.starts[tok1 + 1];
|
||||
uint32_t e2 = tree->tokens.starts[tok2 + 1];
|
||||
// Token length includes trailing whitespace in starts delta, but for
|
||||
// identifiers the actual content is a contiguous alphanumeric/underscore
|
||||
// run. Compute actual identifier lengths.
|
||||
uint32_t len1 = 0;
|
||||
while (s1 + len1 < e1) {
|
||||
char c = tree->source[s1 + len1];
|
||||
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9') || c == '_'))
|
||||
break;
|
||||
len1++;
|
||||
}
|
||||
uint32_t len2 = 0;
|
||||
while (s2 + len2 < e2) {
|
||||
char c = tree->source[s2 + len2];
|
||||
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9') || c == '_'))
|
||||
break;
|
||||
len2++;
|
||||
}
|
||||
return len1 == len2
|
||||
&& memcmp(tree->source + s1, tree->source + s2, len1) == 0;
|
||||
}
|
||||
|
||||
// Forward declaration for strLitAsString (used by identAsString for @"..."
|
||||
// quoted identifiers with escapes).
|
||||
static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token,
|
||||
@@ -3620,7 +3817,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
|
||||
}
|
||||
// slice (AstGen.zig:882-939).
|
||||
case AST_NODE_SLICE_OPEN: {
|
||||
uint32_t lhs = expr(gz, scope, nd.lhs);
|
||||
uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
|
||||
advanceSourceCursorToMainToken(ag, node);
|
||||
uint32_t saved_line = ag->source_line - gz->decl_line;
|
||||
uint32_t saved_col = ag->source_column;
|
||||
@@ -3632,7 +3829,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
|
||||
case AST_NODE_SLICE: {
|
||||
// Slice[rhs]: { start, end }
|
||||
const Ast* stree = ag->tree;
|
||||
uint32_t lhs = expr(gz, scope, nd.lhs);
|
||||
uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
|
||||
advanceSourceCursorToMainToken(ag, node);
|
||||
uint32_t saved_line = ag->source_line - gz->decl_line;
|
||||
uint32_t saved_col = ag->source_column;
|
||||
@@ -3655,7 +3852,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
|
||||
case AST_NODE_SLICE_SENTINEL: {
|
||||
// SliceSentinel[rhs]: { start, end, sentinel }
|
||||
const Ast* stree = ag->tree;
|
||||
uint32_t lhs = expr(gz, scope, nd.lhs);
|
||||
uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs);
|
||||
advanceSourceCursorToMainToken(ag, node);
|
||||
uint32_t saved_line = ag->source_line - gz->decl_line;
|
||||
uint32_t saved_col = ag->source_column;
|
||||
@@ -3727,13 +3924,13 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
|
||||
uint32_t block_inst = UINT32_MAX;
|
||||
if (opt_break_label != UINT32_MAX) {
|
||||
// Labeled break: check label on GenZir.
|
||||
if (block_gz->label_token != UINT32_MAX) {
|
||||
uint32_t break_name
|
||||
= identAsString(ag, opt_break_label);
|
||||
uint32_t label_name
|
||||
= identAsString(ag, block_gz->label_token);
|
||||
if (break_name == label_name)
|
||||
block_inst = block_gz->label_block_inst;
|
||||
// Use direct source text comparison, not identAsString,
|
||||
// to avoid adding label names to string_bytes
|
||||
// (AstGen.zig:2176 uses tokenIdentEql).
|
||||
if (block_gz->label_token != UINT32_MAX
|
||||
&& tokenIdentEql(ag->tree, opt_break_label,
|
||||
block_gz->label_token)) {
|
||||
block_inst = block_gz->label_block_inst;
|
||||
}
|
||||
} else {
|
||||
// Unlabeled break: check break_block.
|
||||
@@ -7303,6 +7500,27 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
|
||||
}
|
||||
}
|
||||
|
||||
// Determine param name string (AstGen.zig:4283-4321).
|
||||
// Must be resolved BEFORE type expression to match upstream string
|
||||
// table ordering.
|
||||
uint32_t param_name_str = 0; // NullTerminatedString.empty
|
||||
if (name_token != 0) {
|
||||
uint32_t name_start = tree->tokens.starts[name_token];
|
||||
char nch = tree->source[name_start];
|
||||
// Skip "_" params (AstGen.zig:4285-4286).
|
||||
if (nch == '_') {
|
||||
uint32_t next_start = tree->tokens.starts[name_token + 1];
|
||||
if (next_start == name_start + 1) {
|
||||
// Single underscore: empty name.
|
||||
param_name_str = 0;
|
||||
} else {
|
||||
param_name_str = identAsString(ag, name_token);
|
||||
}
|
||||
} else {
|
||||
param_name_str = identAsString(ag, name_token);
|
||||
}
|
||||
}
|
||||
|
||||
// Evaluate param type expression in a sub-block
|
||||
// (AstGen.zig:4333-4337).
|
||||
GenZir param_gz = makeSubBlock(&decl_gz, params_scope);
|
||||
@@ -7323,25 +7541,6 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
|
||||
makeBreakInline(¶m_gz, param_inst_expected, param_type_ref,
|
||||
(int32_t)param_type_node - (int32_t)param_gz.decl_node_index);
|
||||
|
||||
// Determine param name string.
|
||||
uint32_t param_name_str = 0; // NullTerminatedString.empty
|
||||
if (name_token != 0) {
|
||||
uint32_t name_start = tree->tokens.starts[name_token];
|
||||
char nch = tree->source[name_start];
|
||||
// Skip "_" params (AstGen.zig:4285-4286).
|
||||
if (nch == '_') {
|
||||
uint32_t next_start = tree->tokens.starts[name_token + 1];
|
||||
if (next_start == name_start + 1) {
|
||||
// Single underscore: empty name.
|
||||
param_name_str = 0;
|
||||
} else {
|
||||
param_name_str = identAsString(ag, name_token);
|
||||
}
|
||||
} else {
|
||||
param_name_str = identAsString(ag, name_token);
|
||||
}
|
||||
}
|
||||
|
||||
// Create param instruction (AstGen.zig:4341-4343).
|
||||
ZirInstTag param_tag
|
||||
= is_comptime_param ? ZIR_INST_PARAM_COMPTIME : ZIR_INST_PARAM;
|
||||
|
||||
@@ -990,7 +990,7 @@ test "astgen: corpus test_all.zig" {
|
||||
}
|
||||
|
||||
test "astgen: corpus build.zig" {
|
||||
if (true) return error.SkipZigTest; // TODO: 1 inst diff (1 ref) - slice LHS needs .ref rl
|
||||
if (true) return error.SkipZigTest; // TODO: string_bytes ordering - struct init field name pre-registration
|
||||
const gpa = std.testing.allocator;
|
||||
try corpusCheck(gpa, "build.zig", @embedFile("build.zig"));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user