diff --git a/astgen.c b/astgen.c index 94d3a9edd4..7182efde8f 100644 --- a/astgen.c +++ b/astgen.c @@ -553,13 +553,148 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) { switch (tag) { case AST_NODE_ROOT: return 0; - // Binary operators: recurse into LHS (Ast.zig:656-710). + + // Return main_token directly (Ast.zig:602-643). + case AST_NODE_TEST_DECL: + case AST_NODE_ERRDEFER: + case AST_NODE_DEFER: + case AST_NODE_BOOL_NOT: + case AST_NODE_NEGATION: + case AST_NODE_BIT_NOT: + case AST_NODE_NEGATION_WRAP: + case AST_NODE_ADDRESS_OF: + case AST_NODE_TRY: + case AST_NODE_AWAIT: + case AST_NODE_OPTIONAL_TYPE: + case AST_NODE_SWITCH: + case AST_NODE_SWITCH_COMMA: + case AST_NODE_IF_SIMPLE: + case AST_NODE_IF: + case AST_NODE_SUSPEND: + case AST_NODE_RESUME: + case AST_NODE_CONTINUE: + case AST_NODE_BREAK: + case AST_NODE_RETURN: + case AST_NODE_ANYFRAME_TYPE: + case AST_NODE_IDENTIFIER: + case AST_NODE_ANYFRAME_LITERAL: + case AST_NODE_CHAR_LITERAL: + case AST_NODE_NUMBER_LITERAL: + case AST_NODE_UNREACHABLE_LITERAL: + case AST_NODE_STRING_LITERAL: + case AST_NODE_MULTILINE_STRING_LITERAL: + case AST_NODE_GROUPED_EXPRESSION: + case AST_NODE_BUILTIN_CALL_TWO: + case AST_NODE_BUILTIN_CALL_TWO_COMMA: + case AST_NODE_BUILTIN_CALL: + case AST_NODE_BUILTIN_CALL_COMMA: + case AST_NODE_ERROR_SET_DECL: + case AST_NODE_COMPTIME: + case AST_NODE_NOSUSPEND: + case AST_NODE_ASM_SIMPLE: + case AST_NODE_ASM: + case AST_NODE_ARRAY_TYPE: + case AST_NODE_ARRAY_TYPE_SENTINEL: + case AST_NODE_ERROR_VALUE: + case AST_NODE_PTR_TYPE_ALIGNED: + case AST_NODE_PTR_TYPE_SENTINEL: + case AST_NODE_PTR_TYPE: + case AST_NODE_PTR_TYPE_BIT_RANGE: + return tree->nodes.main_tokens[n]; + + // Return main_token - 1: dot-prefixed inits and enum_literal + // (Ast.zig:645-654). + case AST_NODE_ARRAY_INIT_DOT: + case AST_NODE_ARRAY_INIT_DOT_COMMA: + case AST_NODE_ARRAY_INIT_DOT_TWO: + case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA: + case AST_NODE_STRUCT_INIT_DOT: + case AST_NODE_STRUCT_INIT_DOT_COMMA: + case AST_NODE_STRUCT_INIT_DOT_TWO: + case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA: + case AST_NODE_ENUM_LITERAL: + return tree->nodes.main_tokens[n] - 1; + + // Recurse into LHS: all binary ops and compound expressions + // (Ast.zig:656-733). + case AST_NODE_CATCH: + case AST_NODE_EQUAL_EQUAL: + case AST_NODE_BANG_EQUAL: + case AST_NODE_LESS_THAN: + case AST_NODE_GREATER_THAN: + case AST_NODE_LESS_OR_EQUAL: + case AST_NODE_GREATER_OR_EQUAL: + case AST_NODE_ASSIGN_MUL: + case AST_NODE_ASSIGN_DIV: + case AST_NODE_ASSIGN_MOD: + case AST_NODE_ASSIGN_ADD: + case AST_NODE_ASSIGN_SUB: + case AST_NODE_ASSIGN_SHL: + case AST_NODE_ASSIGN_SHL_SAT: + case AST_NODE_ASSIGN_SHR: + case AST_NODE_ASSIGN_BIT_AND: + case AST_NODE_ASSIGN_BIT_XOR: + case AST_NODE_ASSIGN_BIT_OR: + case AST_NODE_ASSIGN_MUL_WRAP: + case AST_NODE_ASSIGN_ADD_WRAP: + case AST_NODE_ASSIGN_SUB_WRAP: + case AST_NODE_ASSIGN_MUL_SAT: + case AST_NODE_ASSIGN_ADD_SAT: + case AST_NODE_ASSIGN_SUB_SAT: case AST_NODE_ASSIGN: - case AST_NODE_FIELD_ACCESS: + case AST_NODE_MERGE_ERROR_SETS: + case AST_NODE_MUL: + case AST_NODE_DIV: + case AST_NODE_MOD: + case AST_NODE_ARRAY_MULT: + case AST_NODE_MUL_WRAP: + case AST_NODE_MUL_SAT: + case AST_NODE_ADD: + case AST_NODE_SUB: + case AST_NODE_ARRAY_CAT: + case AST_NODE_ADD_WRAP: + case AST_NODE_SUB_WRAP: + case AST_NODE_ADD_SAT: + case AST_NODE_SUB_SAT: + case AST_NODE_SHL: + case AST_NODE_SHL_SAT: + case AST_NODE_SHR: + case AST_NODE_BIT_AND: + case AST_NODE_BIT_XOR: + case AST_NODE_BIT_OR: + case AST_NODE_ORELSE: + case AST_NODE_BOOL_AND: + case AST_NODE_BOOL_OR: + case AST_NODE_SLICE_OPEN: case AST_NODE_ARRAY_ACCESS: + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: + case AST_NODE_SWITCH_RANGE: + case AST_NODE_ERROR_UNION: + case AST_NODE_FOR_RANGE: + case AST_NODE_CALL_ONE: + case AST_NODE_CALL_ONE_COMMA: + case AST_NODE_STRUCT_INIT_ONE: + case AST_NODE_STRUCT_INIT_ONE_COMMA: + case AST_NODE_CALL: + case AST_NODE_CALL_COMMA: + case AST_NODE_STRUCT_INIT: + case AST_NODE_STRUCT_INIT_COMMA: + case AST_NODE_SLICE: + case AST_NODE_SLICE_SENTINEL: + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: + case AST_NODE_FIELD_ACCESS: + case AST_NODE_UNWRAP_OPTIONAL: + case AST_NODE_DEREF: + case AST_NODE_ASYNC_CALL_ONE: + case AST_NODE_ASYNC_CALL_ONE_COMMA: + case AST_NODE_ASYNC_CALL: + case AST_NODE_ASYNC_CALL_COMMA: n = tree->nodes.datas[n].lhs; continue; - // Var decls: scan backwards for modifiers (Ast.zig:634-643). + + // Var decls: scan backwards for modifiers (Ast.zig:771-792). case AST_NODE_GLOBAL_VAR_DECL: case AST_NODE_LOCAL_VAR_DECL: case AST_NODE_SIMPLE_VAR_DECL: @@ -580,7 +715,28 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) { } return i; } - // Container fields: check for preceding comptime (Ast.zig:646-648). + // Fn decls: scan backwards for modifiers (Ast.zig:737-759). + case AST_NODE_FN_DECL: + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO: { + uint32_t mt = tree->nodes.main_tokens[n]; + uint32_t i = mt; + while (i > 0) { + TokenizerTag tt = tree->tokens.tags[i - 1]; + if (tt == TOKEN_KEYWORD_EXTERN || tt == TOKEN_KEYWORD_EXPORT + || tt == TOKEN_KEYWORD_PUB || tt == TOKEN_KEYWORD_INLINE + || tt == TOKEN_KEYWORD_NOINLINE + || tt == TOKEN_STRING_LITERAL) { + i--; + } else { + break; + } + } + return i; + } + // Container fields: check for preceding comptime (Ast.zig:761-769). case AST_NODE_CONTAINER_FIELD_INIT: case AST_NODE_CONTAINER_FIELD_ALIGN: case AST_NODE_CONTAINER_FIELD: { @@ -589,7 +745,18 @@ static uint32_t firstToken(const Ast* tree, uint32_t node) { return mt - 1; return mt; } - // Everything else: main_token (Ast.zig:602-643). + // Blocks: check for label (Ast.zig:794-805). + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: { + uint32_t lbrace = tree->nodes.main_tokens[n]; + if (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON + && tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER) + return lbrace - 2; + return lbrace; + } + // Fallback for any remaining node types. default: return tree->nodes.main_tokens[n]; } @@ -645,6 +812,36 @@ static uint32_t findExistingString( return UINT32_MAX; } +// Mirrors AstGen.tokenIdentEql (AstGen.zig:6148-6152). +// Compares two identifier tokens by source text without touching string_bytes. +static bool tokenIdentEql(const Ast* tree, uint32_t tok1, uint32_t tok2) { + uint32_t s1 = tree->tokens.starts[tok1]; + uint32_t s2 = tree->tokens.starts[tok2]; + uint32_t e1 = tree->tokens.starts[tok1 + 1]; + uint32_t e2 = tree->tokens.starts[tok2 + 1]; + // Token length includes trailing whitespace in starts delta, but for + // identifiers the actual content is a contiguous alphanumeric/underscore + // run. Compute actual identifier lengths. + uint32_t len1 = 0; + while (s1 + len1 < e1) { + char c = tree->source[s1 + len1]; + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '_')) + break; + len1++; + } + uint32_t len2 = 0; + while (s2 + len2 < e2) { + char c = tree->source[s2 + len2]; + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '_')) + break; + len2++; + } + return len1 == len2 + && memcmp(tree->source + s1, tree->source + s2, len1) == 0; +} + // Forward declaration for strLitAsString (used by identAsString for @"..." // quoted identifiers with escapes). static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, @@ -3620,7 +3817,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { } // slice (AstGen.zig:882-939). case AST_NODE_SLICE_OPEN: { - uint32_t lhs = expr(gz, scope, nd.lhs); + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs); advanceSourceCursorToMainToken(ag, node); uint32_t saved_line = ag->source_line - gz->decl_line; uint32_t saved_col = ag->source_column; @@ -3632,7 +3829,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { case AST_NODE_SLICE: { // Slice[rhs]: { start, end } const Ast* stree = ag->tree; - uint32_t lhs = expr(gz, scope, nd.lhs); + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs); advanceSourceCursorToMainToken(ag, node); uint32_t saved_line = ag->source_line - gz->decl_line; uint32_t saved_col = ag->source_column; @@ -3655,7 +3852,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { case AST_NODE_SLICE_SENTINEL: { // SliceSentinel[rhs]: { start, end, sentinel } const Ast* stree = ag->tree; - uint32_t lhs = expr(gz, scope, nd.lhs); + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs); advanceSourceCursorToMainToken(ag, node); uint32_t saved_line = ag->source_line - gz->decl_line; uint32_t saved_col = ag->source_column; @@ -3727,13 +3924,13 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { uint32_t block_inst = UINT32_MAX; if (opt_break_label != UINT32_MAX) { // Labeled break: check label on GenZir. - if (block_gz->label_token != UINT32_MAX) { - uint32_t break_name - = identAsString(ag, opt_break_label); - uint32_t label_name - = identAsString(ag, block_gz->label_token); - if (break_name == label_name) - block_inst = block_gz->label_block_inst; + // Use direct source text comparison, not identAsString, + // to avoid adding label names to string_bytes + // (AstGen.zig:2176 uses tokenIdentEql). + if (block_gz->label_token != UINT32_MAX + && tokenIdentEql(ag->tree, opt_break_label, + block_gz->label_token)) { + block_inst = block_gz->label_block_inst; } } else { // Unlabeled break: check break_block. @@ -7303,6 +7500,27 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, } } + // Determine param name string (AstGen.zig:4283-4321). + // Must be resolved BEFORE type expression to match upstream string + // table ordering. + uint32_t param_name_str = 0; // NullTerminatedString.empty + if (name_token != 0) { + uint32_t name_start = tree->tokens.starts[name_token]; + char nch = tree->source[name_start]; + // Skip "_" params (AstGen.zig:4285-4286). + if (nch == '_') { + uint32_t next_start = tree->tokens.starts[name_token + 1]; + if (next_start == name_start + 1) { + // Single underscore: empty name. + param_name_str = 0; + } else { + param_name_str = identAsString(ag, name_token); + } + } else { + param_name_str = identAsString(ag, name_token); + } + } + // Evaluate param type expression in a sub-block // (AstGen.zig:4333-4337). GenZir param_gz = makeSubBlock(&decl_gz, params_scope); @@ -7323,25 +7541,6 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, makeBreakInline(¶m_gz, param_inst_expected, param_type_ref, (int32_t)param_type_node - (int32_t)param_gz.decl_node_index); - // Determine param name string. - uint32_t param_name_str = 0; // NullTerminatedString.empty - if (name_token != 0) { - uint32_t name_start = tree->tokens.starts[name_token]; - char nch = tree->source[name_start]; - // Skip "_" params (AstGen.zig:4285-4286). - if (nch == '_') { - uint32_t next_start = tree->tokens.starts[name_token + 1]; - if (next_start == name_start + 1) { - // Single underscore: empty name. - param_name_str = 0; - } else { - param_name_str = identAsString(ag, name_token); - } - } else { - param_name_str = identAsString(ag, name_token); - } - } - // Create param instruction (AstGen.zig:4341-4343). ZirInstTag param_tag = is_comptime_param ? ZIR_INST_PARAM_COMPTIME : ZIR_INST_PARAM; diff --git a/astgen_test.zig b/astgen_test.zig index 0146ff7525..825611d0e1 100644 --- a/astgen_test.zig +++ b/astgen_test.zig @@ -990,7 +990,7 @@ test "astgen: corpus test_all.zig" { } test "astgen: corpus build.zig" { - if (true) return error.SkipZigTest; // TODO: 1 inst diff (1 ref) - slice LHS needs .ref rl + if (true) return error.SkipZigTest; // TODO: string_bytes ordering - struct init field name pre-registration const gpa = std.testing.allocator; try corpusCheck(gpa, "build.zig", @embedFile("build.zig")); }