diff --git a/astgen.c b/astgen.c index 7182efde8f..c11de99d82 100644 --- a/astgen.c +++ b/astgen.c @@ -84,6 +84,12 @@ typedef struct { uint32_t* scratch_instructions; uint32_t scratch_inst_len; uint32_t scratch_inst_cap; + // Scratch extra array for call arguments (mirrors AstGen.scratch in Zig). + // Used to collect body lengths + body instructions before copying to + // extra. + uint32_t* scratch_extra; + uint32_t scratch_extra_len; + uint32_t scratch_extra_cap; // Return type ref for the current function (set during fnDecl/testDecl). uint32_t fn_ret_ty; // ZirInstRef // Pointer to the fn_block GenZir for the current function (AstGen.zig:45). @@ -169,6 +175,17 @@ static bool refTableFetchRemove(AstGenCtx* ag, uint32_t key, uint32_t* val) { // Simplified version of ResultInfo.Loc. // Defined here (before GenZir) because GenZir.break_result_info uses it. +// ResultInfo.Context (AstGen.zig:371-386). +typedef enum { + RI_CTX_NONE, + RI_CTX_RETURN, + RI_CTX_ERROR_HANDLING_EXPR, + RI_CTX_SHIFT_OP, + RI_CTX_FN_ARG, + RI_CTX_CONST_INIT, + RI_CTX_ASSIGNMENT, +} ResultCtx; + typedef enum { RL_NONE, // Just compute the value. RL_REF, // Compute a pointer to the value. @@ -185,12 +202,18 @@ typedef struct { uint32_t data; // ZirInstRef: ty_inst for TY/COERCED_TY, alloc inst for // PTR/INFERRED_PTR. uint32_t src_node; // Only used for RL_PTR. + ResultCtx ctx; // ResultInfo.Context (AstGen.zig:371). } ResultLoc; -#define RL_NONE_VAL ((ResultLoc) { .tag = RL_NONE, .data = 0, .src_node = 0 }) -#define RL_REF_VAL ((ResultLoc) { .tag = RL_REF, .data = 0, .src_node = 0 }) +#define RL_NONE_VAL \ + ((ResultLoc) { \ + .tag = RL_NONE, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE }) +#define RL_REF_VAL \ + ((ResultLoc) { \ + .tag = RL_REF, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE }) #define RL_DISCARD_VAL \ - ((ResultLoc) { .tag = RL_DISCARD, .data = 0, .src_node = 0 }) + ((ResultLoc) { \ + .tag = RL_DISCARD, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE }) #define RL_IS_REF(rl) ((rl).tag == RL_REF || (rl).tag == RL_REF_COERCED_TY) // --- Scope types (AstGen.zig:11621-11768) --- @@ -286,6 +309,19 @@ static const uint32_t* gzInstructionsSlice(const GenZir* gz) { return gz->astgen->scratch_instructions + gz->instructions_top; } +// Mirrors GenZir.instructionsSliceUpto (AstGen.zig:11835). +// Returns instructions from gz up to (but not including) stacked_gz's start. +static uint32_t gzInstructionsLenUpto( + const GenZir* gz, const GenZir* stacked_gz) { + return stacked_gz->instructions_top - gz->instructions_top; +} + +static const uint32_t* gzInstructionsSliceUpto( + const GenZir* gz, const GenZir* stacked_gz) { + (void)stacked_gz; // used only for length computation + return gz->astgen->scratch_instructions + gz->instructions_top; +} + // Mirrors GenZir.unstack (AstGen.zig:11822). // Restores the shared array length to this scope's start. static void gzUnstack(GenZir* gz) { @@ -438,6 +474,16 @@ static uint32_t addInt(GenZir* gz, uint64_t integer) { return addInstruction(gz, ZIR_INST_INT, data); } +// Mirrors GenZir.add for bin data (Zir.zig:1877). +// Creates an instruction with bin data (lhs + rhs stored in inst_datas). +static uint32_t addBin( + GenZir* gz, ZirInstTag tag, uint32_t lhs, uint32_t rhs) { + ZirInstData data; + data.bin.lhs = lhs; + data.bin.rhs = rhs; + return addInstruction(gz, tag, data); +} + // Mirrors GenZir.addPlNode (AstGen.zig:12308). // Creates an instruction with pl_node data and 2-word payload. static uint32_t addPlNodeBin( @@ -1125,9 +1171,55 @@ static void appendPossiblyRefdBodyInst(AstGenCtx* ag, uint32_t body_inst) { } } -// Mirrors countBodyLenAfterFixups (AstGen.zig:13686-13710). -static uint32_t countBodyLenAfterFixups( - AstGenCtx* ag, const uint32_t* body, uint32_t body_len) { +// Mirrors appendBodyWithFixupsExtraRefsArrayList (AstGen.zig:13659-13673). +// First processes extra_refs (e.g. param_insts), prepending their ref_table +// entries. Then writes body instructions with ref_table fixups. +static void appendBodyWithFixupsExtraRefs(AstGenCtx* ag, const uint32_t* body, + uint32_t body_len, const uint32_t* extra_refs, uint32_t extra_refs_len) { + for (uint32_t i = 0; i < extra_refs_len; i++) { + uint32_t ref_inst; + if (refTableFetchRemove(ag, extra_refs[i], &ref_inst)) { + appendPossiblyRefdBodyInst(ag, ref_inst); + } + } + for (uint32_t i = 0; i < body_len; i++) { + appendPossiblyRefdBodyInst(ag, body[i]); + } +} + +// Scratch extra capacity helper (for call arg bodies). +static void ensureScratchExtraCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->scratch_extra_len + additional; + if (needed > ag->scratch_extra_cap) { + uint32_t new_cap = ag->scratch_extra_cap * 2; + if (new_cap < needed) + new_cap = needed; + if (new_cap < 64) + new_cap = 64; + uint32_t* p = realloc(ag->scratch_extra, new_cap * sizeof(uint32_t)); + if (!p) + exit(1); + ag->scratch_extra = p; + ag->scratch_extra_cap = new_cap; + } +} + +// Like appendPossiblyRefdBodyInst but appends to scratch_extra instead of +// extra. +static void appendPossiblyRefdBodyInstScratch( + AstGenCtx* ag, uint32_t body_inst) { + ag->scratch_extra[ag->scratch_extra_len++] = body_inst; + uint32_t ref_inst; + if (refTableFetchRemove(ag, body_inst, &ref_inst)) { + ensureScratchExtraCapacity(ag, 1); + appendPossiblyRefdBodyInstScratch(ag, ref_inst); + } +} + +// Mirrors countBodyLenAfterFixupsExtraRefs (AstGen.zig:13694-13711). +static uint32_t countBodyLenAfterFixupsExtraRefs(AstGenCtx* ag, + const uint32_t* body, uint32_t body_len, const uint32_t* extra_refs, + uint32_t extra_refs_len) { uint32_t count = body_len; for (uint32_t i = 0; i < body_len; i++) { uint32_t check_inst = body[i]; @@ -1137,9 +1229,23 @@ static uint32_t countBodyLenAfterFixups( check_inst = *ref; } } + for (uint32_t i = 0; i < extra_refs_len; i++) { + uint32_t check_inst = extra_refs[i]; + const uint32_t* ref; + while ((ref = refTableGet(ag, check_inst)) != NULL) { + count++; + check_inst = *ref; + } + } return count; } +// Mirrors countBodyLenAfterFixups (AstGen.zig:13686-13688). +static uint32_t countBodyLenAfterFixups( + AstGenCtx* ag, const uint32_t* body, uint32_t body_len) { + return countBodyLenAfterFixupsExtraRefs(ag, body, body_len, NULL, 0); +} + // Mirrors GenZir.setBlockBody (AstGen.zig:11949). // Writes Block payload (body_len + instruction indices) to extra. // Sets the instruction's payload_index. Unstacks gz. @@ -1236,26 +1342,29 @@ static uint32_t addCondBr(GenZir* gz, ZirInstTag tag, uint32_t node) { return idx; } -// Mirrors GenZir.setCondBrPayload (AstGen.zig:12003). -// Writes CondBr payload: condition + then_body_len + then_body + -// else_body_len + else_body. Unstacks both scopes. +// Mirrors setCondBrPayload (AstGen.zig:6501). +// Writes CondBr payload: {condition, then_body_len, else_body_len} then +// then_body instructions, then else_body instructions. Unstacks both scopes. +// IMPORTANT: then_gz and else_gz are stacked (else on top of then), so +// then's instructions must use instructionsSliceUpto(else_gz) to avoid +// including else_gz's instructions in then's body. static void setCondBrPayload(AstGenCtx* ag, uint32_t condbr_inst, uint32_t condition, GenZir* then_gz, GenZir* else_gz) { - uint32_t raw_then_len = gzInstructionsLen(then_gz); - const uint32_t* then_body = gzInstructionsSlice(then_gz); + uint32_t raw_then_len = gzInstructionsLenUpto(then_gz, else_gz); + const uint32_t* then_body = gzInstructionsSliceUpto(then_gz, else_gz); uint32_t raw_else_len = gzInstructionsLen(else_gz); const uint32_t* else_body = gzInstructionsSlice(else_gz); uint32_t then_len = countBodyLenAfterFixups(ag, then_body, raw_then_len); uint32_t else_len = countBodyLenAfterFixups(ag, else_body, raw_else_len); - ensureExtraCapacity(ag, 2 + then_len + 1 + else_len); + ensureExtraCapacity(ag, 3 + then_len + else_len); uint32_t payload_index = ag->extra_len; ag->extra[ag->extra_len++] = condition; // CondBr.condition ag->extra[ag->extra_len++] = then_len; // CondBr.then_body_len + ag->extra[ag->extra_len++] = else_len; // CondBr.else_body_len for (uint32_t i = 0; i < raw_then_len; i++) appendPossiblyRefdBodyInst(ag, then_body[i]); - ag->extra[ag->extra_len++] = else_len; // CondBr.else_body_len for (uint32_t i = 0; i < raw_else_len; i++) appendPossiblyRefdBodyInst(ag, else_body[i]); @@ -1641,7 +1750,9 @@ static void writeImports(AstGenCtx* ag) { // ri.br() (AstGen.zig:274-282): convert coerced_ty to ty for branching. static inline ResultLoc rlBr(ResultLoc rl) { if (rl.tag == RL_COERCED_TY) { - return (ResultLoc) { .tag = RL_TY, .data = rl.data, .src_node = 0 }; + return (ResultLoc) { + .tag = RL_TY, .data = rl.data, .src_node = 0, .ctx = rl.ctx + }; } return rl; } @@ -1662,11 +1773,15 @@ static ResultLoc breakResultInfo( uint32_t ptr_ty = addUnNode(gz, ZIR_INST_TYPEOF, parent_rl.data, node); uint32_t ty = addUnNode(gz, ZIR_INST_ELEM_TYPE, ptr_ty, node); - block_ri = (ResultLoc) { .tag = RL_TY, .data = ty, .src_node = 0 }; + block_ri = (ResultLoc) { + .tag = RL_TY, .data = ty, .src_node = 0, .ctx = parent_rl.ctx + }; break; } case RL_INFERRED_PTR: - block_ri = RL_NONE_VAL; + block_ri = (ResultLoc) { + .tag = RL_NONE, .data = 0, .src_node = 0, .ctx = parent_rl.ctx + }; break; default: block_ri = parent_rl; @@ -1676,10 +1791,14 @@ static ResultLoc breakResultInfo( // Then: setBreakResultInfo (AstGen.zig:11910-11925). switch (block_ri.tag) { case RL_COERCED_TY: - return ( - ResultLoc) { .tag = RL_TY, .data = block_ri.data, .src_node = 0 }; + return (ResultLoc) { .tag = RL_TY, + .data = block_ri.data, + .src_node = 0, + .ctx = block_ri.ctx }; case RL_DISCARD: - return RL_DISCARD_VAL; + return (ResultLoc) { + .tag = RL_DISCARD, .data = 0, .src_node = 0, .ctx = block_ri.ctx + }; default: return block_ri; } @@ -2012,7 +2131,11 @@ static uint32_t tryResolvePrimitiveIdent(GenZir* gz, uint32_t node); // SimpleComptimeReason (std.zig:727) — values used in block_comptime payload. #define COMPTIME_REASON_TYPE 29 #define COMPTIME_REASON_ARRAY_SENTINEL 30 +#define COMPTIME_REASON_POINTER_SENTINEL 31 +#define COMPTIME_REASON_SLICE_SENTINEL 32 #define COMPTIME_REASON_ARRAY_LENGTH 33 +#define COMPTIME_REASON_ALIGN 50 +#define COMPTIME_REASON_ADDRSPACE 51 #define COMPTIME_REASON_COMPTIME_KEYWORD 53 #define COMPTIME_REASON_SWITCH_ITEM 56 @@ -2611,62 +2734,180 @@ static uint32_t fieldAccessExpr( } // --- ptrType (AstGen.zig:3833) --- -// Simplified: handles []const T and []T slice types. static uint32_t ptrTypeExpr(GenZir* gz, Scope* scope, uint32_t node) { AstGenCtx* ag = gz->astgen; const Ast* tree = ag->tree; AstNodeTag tag = tree->nodes.tags[node]; AstData nd = tree->nodes.datas[node]; + uint32_t main_tok = tree->nodes.main_tokens[node]; - // For ptr_type_aligned: data.lhs = child_type, data.rhs = extra info. - // For simple ptr_type: data.lhs = sentinel (optional), data.rhs = - // child_type. The exact layout depends on the variant. Simplified for - // []const u8. + // child_type is always in rhs for all ptr_type variants. + uint32_t child_type_node = nd.rhs; - uint32_t child_type_node; - bool is_const = false; - uint8_t size = 2; // slice - - // Determine child type and constness from AST. - // ptr_type_aligned: main_token points to `[`, tokens after determine type. - // For `[]const u8`: - // main_token = `[`, then `]`, then `const`, then `u8` (child type node). - // data.lhs = 0 (no sentinel/align), data.rhs = child_type_node. - - if (tag == AST_NODE_PTR_TYPE_ALIGNED) { - child_type_node = nd.rhs; - // Check for 'const' by looking at tokens after main_token. - uint32_t main_tok = tree->nodes.main_tokens[node]; - // For []const T: main_token=[, then ], then const keyword. - // Check if token after ] is 'const'. - uint32_t after_bracket = main_tok + 1; // ] - uint32_t maybe_const = after_bracket + 1; - if (maybe_const < tree->tokens.len) { - uint32_t tok_start = tree->tokens.starts[maybe_const]; - if (tok_start + 5 <= tree->source_len - && memcmp(tree->source + tok_start, "const", 5) == 0) - is_const = true; - } + // Determine size from main_token (Ast.zig:2122-2131). + // Pointer.Size: one=0, many=1, slice=2, c=3. + uint8_t size; + TokenizerTag main_tok_tag = tree->tokens.tags[main_tok]; + if (main_tok_tag == TOKEN_ASTERISK + || main_tok_tag == TOKEN_ASTERISK_ASTERISK) { + size = 0; // one } else { - // Simplified: treat all other ptr types as pointers to data.rhs. - child_type_node = nd.rhs; + assert(main_tok_tag == TOKEN_L_BRACKET); + TokenizerTag next_tag = tree->tokens.tags[main_tok + 1]; + if (next_tag == TOKEN_ASTERISK) { + // [*c]T vs [*]T: c-pointer if next-next is identifier. + if (tree->tokens.tags[main_tok + 2] == TOKEN_IDENTIFIER) + size = 3; // c + else + size = 1; // many + } else { + size = 2; // slice + } } - // Evaluate element type (AstGen.zig ptrType uses typeExpr). + // Determine sentinel, align, addrspace, bit_range nodes from AST variant + // (Ast.zig:1656-1696). + uint32_t sentinel_node = UINT32_MAX; + uint32_t align_node = UINT32_MAX; + uint32_t addrspace_node = UINT32_MAX; + uint32_t bit_range_start = UINT32_MAX; + uint32_t bit_range_end = UINT32_MAX; + + if (tag == AST_NODE_PTR_TYPE_ALIGNED) { + // opt_node_and_node: lhs = optional align_node (0=none), rhs = child. + if (nd.lhs != 0) + align_node = nd.lhs; + } else if (tag == AST_NODE_PTR_TYPE_SENTINEL) { + // opt_node_and_node: lhs = optional sentinel (0=none), rhs = child. + if (nd.lhs != 0) + sentinel_node = nd.lhs; + } else if (tag == AST_NODE_PTR_TYPE) { + // extra_and_node: lhs = extra index to AstPtrType, rhs = child_type. + const AstPtrType* pt + = (const AstPtrType*)(tree->extra_data.arr + nd.lhs); + if (pt->sentinel != UINT32_MAX) + sentinel_node = pt->sentinel; + if (pt->align_node != UINT32_MAX) + align_node = pt->align_node; + if (pt->addrspace_node != UINT32_MAX) + addrspace_node = pt->addrspace_node; + } else if (tag == AST_NODE_PTR_TYPE_BIT_RANGE) { + // extra_and_node: lhs = extra index to AstPtrTypeBitRange. + const AstPtrTypeBitRange* pt + = (const AstPtrTypeBitRange*)(tree->extra_data.arr + nd.lhs); + if (pt->sentinel != UINT32_MAX) + sentinel_node = pt->sentinel; + align_node = pt->align_node; + if (pt->addrspace_node != UINT32_MAX) + addrspace_node = pt->addrspace_node; + bit_range_start = pt->bit_range_start; + bit_range_end = pt->bit_range_end; + } + + // Scan tokens between main_token and child_type to find const/volatile/ + // allowzero (Ast.zig:2139-2164). + bool has_const = false; + bool has_volatile = false; + bool has_allowzero = false; + { + uint32_t i; + if (sentinel_node != UINT32_MAX) { + i = lastToken(tree, sentinel_node) + 1; + } else if (size == 1 || size == 3) { + // many or c: start after main_token. + i = main_tok + 1; + } else { + i = main_tok; + } + uint32_t end = firstToken(tree, child_type_node); + while (i < end) { + TokenizerTag tt = tree->tokens.tags[i]; + if (tt == TOKEN_KEYWORD_ALLOWZERO) { + has_allowzero = true; + } else if (tt == TOKEN_KEYWORD_CONST) { + has_const = true; + } else if (tt == TOKEN_KEYWORD_VOLATILE) { + has_volatile = true; + } else if (tt == TOKEN_KEYWORD_ALIGN) { + // Skip over align expression. + if (bit_range_end != UINT32_MAX) + i = lastToken(tree, bit_range_end) + 1; + else if (align_node != UINT32_MAX) + i = lastToken(tree, align_node) + 1; + } + i++; + } + } + + // Evaluate element type (AstGen.zig:3847). uint32_t elem_type = typeExpr(gz, scope, child_type_node); - // Build PtrType payload: { elem_type, src_node }. - ensureExtraCapacity(ag, 2); + // Evaluate trailing expressions (AstGen.zig:3856-3897). + uint32_t sentinel_ref = ZIR_REF_NONE; + uint32_t align_ref = ZIR_REF_NONE; + uint32_t addrspace_ref = ZIR_REF_NONE; + uint32_t bit_start_ref = ZIR_REF_NONE; + uint32_t bit_end_ref = ZIR_REF_NONE; + uint32_t trailing_count = 0; + + if (sentinel_node != UINT32_MAX) { + uint32_t reason = (size == 2) ? COMPTIME_REASON_SLICE_SENTINEL + : COMPTIME_REASON_POINTER_SENTINEL; + sentinel_ref = comptimeExpr(gz, scope, sentinel_node, reason); + trailing_count++; + } + if (addrspace_node != UINT32_MAX) { + addrspace_ref = comptimeExpr( + gz, scope, addrspace_node, COMPTIME_REASON_ADDRSPACE); + trailing_count++; + } + if (align_node != UINT32_MAX) { + align_ref = comptimeExpr(gz, scope, align_node, COMPTIME_REASON_ALIGN); + trailing_count++; + } + if (bit_range_start != UINT32_MAX) { + bit_start_ref + = comptimeExpr(gz, scope, bit_range_start, COMPTIME_REASON_TYPE); + bit_end_ref + = comptimeExpr(gz, scope, bit_range_end, COMPTIME_REASON_TYPE); + trailing_count += 2; + } + + // Build PtrType payload: { elem_type, src_node } + trailing + // (AstGen.zig:3905-3921). + ensureExtraCapacity(ag, 2 + trailing_count); uint32_t payload_index = ag->extra_len; ag->extra[ag->extra_len++] = elem_type; ag->extra[ag->extra_len++] = (uint32_t)((int32_t)node - (int32_t)gz->decl_node_index); + if (sentinel_ref != ZIR_REF_NONE) + ag->extra[ag->extra_len++] = sentinel_ref; + if (align_ref != ZIR_REF_NONE) + ag->extra[ag->extra_len++] = align_ref; + if (addrspace_ref != ZIR_REF_NONE) + ag->extra[ag->extra_len++] = addrspace_ref; + if (bit_start_ref != ZIR_REF_NONE) { + ag->extra[ag->extra_len++] = bit_start_ref; + ag->extra[ag->extra_len++] = bit_end_ref; + } - // Build flags packed byte. + // Build flags packed byte (AstGen.zig:3927-3934). uint8_t flags = 0; - if (!is_const) + if (has_allowzero) + flags |= (1 << 0); // is_allowzero + if (!has_const) flags |= (1 << 1); // is_mutable + if (has_volatile) + flags |= (1 << 2); // is_volatile + if (sentinel_ref != ZIR_REF_NONE) + flags |= (1 << 3); // has_sentinel + if (align_ref != ZIR_REF_NONE) + flags |= (1 << 4); // has_align + if (addrspace_ref != ZIR_REF_NONE) + flags |= (1 << 5); // has_addrspace + if (bit_start_ref != ZIR_REF_NONE) + flags |= (1 << 6); // has_bit_range ZirInstData data; data.ptr_type.flags = flags; @@ -3119,17 +3360,22 @@ static uint32_t callExpr( ag->inst_len++; gzAppendInstruction(gz, call_index); - // Process arguments in sub-blocks (AstGen.zig:10100-10115). - // Simplified: we collect arg body lengths into extra. - uint32_t scratch_top = ag->extra_len; - // Reserve space for arg body lengths. - ensureExtraCapacity(ag, args_len); - uint32_t arg_lengths_start = ag->extra_len; - ag->extra_len += args_len; - - // call_inst ref reused for param type (AstGen.zig:10107). + // Process arguments in sub-blocks (AstGen.zig:10096-10116). + // Upstream uses a separate scratch array; we use a local buffer for body + // lengths and append body instructions to scratch_extra, then copy all + // to extra after the call payload. uint32_t call_inst = call_index + ZIR_REF_START_INDEX; - ResultLoc arg_rl = { .tag = RL_COERCED_TY, .data = call_inst }; + ResultLoc arg_rl = { .tag = RL_COERCED_TY, + .data = call_inst, + .src_node = 0, + .ctx = RI_CTX_FN_ARG }; + + // Use scratch_extra to collect body lengths + body instructions, + // mirroring upstream's scratch array (AstGen.zig:10096-10116). + uint32_t scratch_top = ag->scratch_extra_len; + // Reserve space for cumulative body lengths (one per arg). + ensureScratchExtraCapacity(ag, args_len); + ag->scratch_extra_len += args_len; for (uint32_t i = 0; i < args_len; i++) { GenZir arg_block = makeSubBlock(gz, scope); @@ -3141,52 +3387,71 @@ static uint32_t callExpr( = (int32_t)args[i] - (int32_t)arg_block.decl_node_index; makeBreakInline(&arg_block, call_index, arg_ref, param_src); - // Copy arg_block body to extra (with ref_table fixups). + // Append arg_block body to scratch_extra (with ref_table fixups). uint32_t raw_body_len = gzInstructionsLen(&arg_block); const uint32_t* body = gzInstructionsSlice(&arg_block); uint32_t fixup_len = countBodyLenAfterFixups(ag, body, raw_body_len); - ensureExtraCapacity(ag, fixup_len); + ensureScratchExtraCapacity(ag, fixup_len); for (uint32_t j = 0; j < raw_body_len; j++) { - appendPossiblyRefdBodyInst(ag, body[j]); + appendPossiblyRefdBodyInstScratch(ag, body[j]); } - // Record cumulative body length (AstGen.zig:10113). - ag->extra[arg_lengths_start + i] - = ag->extra_len - scratch_top - args_len; + // Record cumulative body length (AstGen.zig:10114). + ag->scratch_extra[scratch_top + i] + = ag->scratch_extra_len - scratch_top; gzUnstack(&arg_block); } - // Build call payload (AstGen.zig:10124-10168). + // Build call payload (AstGen.zig:10118-10168). + // Upstream layout: [flags, callee/obj_ptr, field_name_start], then + // body_lengths + body_instructions from scratch. + // Flags layout (packed): modifier:u3, ensure_result_used:bool, + // pop_error_return_trace:bool, args_len:u27. + // pop_error_return_trace = !propagate_error_trace + // (AstGen.zig:10121-10124). + bool propagate_error_trace + = (rl.ctx == RI_CTX_ERROR_HANDLING_EXPR || rl.ctx == RI_CTX_RETURN + || rl.ctx == RI_CTX_FN_ARG || rl.ctx == RI_CTX_CONST_INIT); + uint32_t flags = (propagate_error_trace ? 0u : (1u << 4)) + | ((args_len & 0x7FFFFFFu) << 5); // args_len + if (callee.is_field) { - // FieldCall payload: obj_ptr, field_name_start, flags. - ensureExtraCapacity(ag, 3); + // FieldCall: {flags, obj_ptr, field_name_start} (AstGen.zig:10148). + ensureExtraCapacity(ag, 3 + (ag->scratch_extra_len - scratch_top)); uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = flags; ag->extra[ag->extra_len++] = callee.obj_ptr; ag->extra[ag->extra_len++] = callee.field_name_start; - // Flags layout (packed): modifier:u3, ensure_result_used:bool, - // pop_error_return_trace:bool, args_len:u27. - uint32_t flags = (1u << 4) // pop_error_return_trace = true - | ((args_len & 0x7FFFFFFu) << 5); // args_len - ag->extra[ag->extra_len++] = flags; + // Append scratch data (body lengths + body instructions). + if (args_len != 0) { + memcpy(ag->extra + ag->extra_len, ag->scratch_extra + scratch_top, + (ag->scratch_extra_len - scratch_top) * sizeof(uint32_t)); + ag->extra_len += ag->scratch_extra_len - scratch_top; + } ag->inst_tags[call_index] = ZIR_INST_FIELD_CALL; ag->inst_datas[call_index].pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; ag->inst_datas[call_index].pl_node.payload_index = payload_index; } else { - // Call payload: callee, flags. - ensureExtraCapacity(ag, 2); + // Call: {flags, callee} (AstGen.zig:10128). + ensureExtraCapacity(ag, 2 + (ag->scratch_extra_len - scratch_top)); uint32_t payload_index = ag->extra_len; - ag->extra[ag->extra_len++] = callee.direct; - // Flags layout (packed): modifier:u3, ensure_result_used:bool, - // pop_error_return_trace:bool, args_len:u27. - uint32_t flags = (1u << 4) // pop_error_return_trace = true - | ((args_len & 0x7FFFFFFu) << 5); // args_len ag->extra[ag->extra_len++] = flags; + ag->extra[ag->extra_len++] = callee.direct; + // Append scratch data (body lengths + body instructions). + if (args_len != 0) { + memcpy(ag->extra + ag->extra_len, ag->scratch_extra + scratch_top, + (ag->scratch_extra_len - scratch_top) * sizeof(uint32_t)); + ag->extra_len += ag->scratch_extra_len - scratch_top; + } ag->inst_tags[call_index] = ZIR_INST_CALL; ag->inst_datas[call_index].pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; ag->inst_datas[call_index].pl_node.payload_index = payload_index; } + // Restore scratch (AstGen.zig:10097 defer). + ag->scratch_extra_len = scratch_top; + return call_index + ZIR_REF_START_INDEX; } @@ -3267,17 +3532,21 @@ static uint32_t structInitExpr( return ZIR_REF_EMPTY_TUPLE; } + // Pre-register all field names to match upstream string ordering. + // Upstream has a duplicate name check (AstGen.zig:1756-1806) that + // adds all field names to string_bytes before evaluating values. + for (uint32_t i = 0; i < fields_len; i++) { + uint32_t name_token = firstToken(tree, fields[i]) - 2; + identAsString(ag, name_token); + } + if (type_expr_node == 0 && fields_len > 0) { // Anonymous struct init with RL type (AstGen.zig:1706-1731). if (rl.tag == RL_TY || rl.tag == RL_COERCED_TY) { uint32_t ty_inst = rl.data; - // validate_struct_init_result_ty (AstGen.zig:1710-1713). - ensureExtraCapacity(ag, 2); - uint32_t val_payload = ag->extra_len; - ag->extra[ag->extra_len++] = ty_inst; - ag->extra[ag->extra_len++] = fields_len; - addPlNodePayloadIndex(gz, ZIR_INST_VALIDATE_STRUCT_INIT_RESULT_TY, - node, val_payload); + // validate_struct_init_result_ty (AstGen.zig:1840). + addUnNode( + gz, ZIR_INST_VALIDATE_STRUCT_INIT_RESULT_TY, ty_inst, node); // structInitExprTyped (AstGen.zig:1896-1931). ensureExtraCapacity(ag, 3 + fields_len * 2); uint32_t payload_index = ag->extra_len; @@ -4076,7 +4345,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { uint32_t result = exprRl(&block_scope, scope, ty_only_rl, body_node); addBreak(&block_scope, ZIR_INST_BREAK_INLINE, block_inst, result, - (int32_t)body_node - (int32_t)gz->decl_node_index); + AST_NODE_OFFSET_NONE); setBlockComptimeBody( ag, &block_scope, block_inst, COMPTIME_REASON_COMPTIME_KEYWORD); gzAppendInstruction(gz, block_inst); @@ -4406,9 +4675,9 @@ static uint32_t arrayInitDotExpr( uint32_t extra_start = ag->extra_len; ag->extra_len += elem_count; for (uint32_t i = 0; i < elem_count; i++) { - // array_init_elem_type (AstGen.zig:1626-1632). - uint32_t elem_ty = addPlNodeBin( - gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, elements[i], result_ty, i); + // array_init_elem_type uses bin data (AstGen.zig:1626-1632). + uint32_t elem_ty + = addBin(gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, result_ty, i); ResultLoc elem_rl = { .tag = RL_COERCED_TY, .data = elem_ty, .src_node = 0 }; uint32_t elem_ref = exprRl(gz, scope, elem_rl, elements[i]); @@ -4474,8 +4743,9 @@ static uint32_t arrayInitDotExpr( uint32_t extra_start2 = ag->extra_len; ag->extra_len += elem_count; for (uint32_t i = 0; i < elem_count; i++) { - uint32_t elem_ty = addPlNodeBin(gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, - elements[i], dest_arr_ty_inst, i); + // array_init_elem_type uses bin data (AstGen.zig:1626-1632). + uint32_t elem_ty = addBin( + gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, dest_arr_ty_inst, i); ResultLoc elem_rl = { .tag = RL_COERCED_TY, .data = elem_ty, .src_node = 0 }; uint32_t elem_ref = exprRl(gz, scope, elem_rl, elements[i]); @@ -4549,12 +4819,14 @@ static uint32_t ifExpr(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { } } - // Emit DBG_STMT for condition (AstGen.zig:6335). - emitDbgNode(gz, cond_node); - // Create block_scope (AstGen.zig:6326-6328). GenZir block_scope = makeSubBlock(gz, scope); + // Emit DBG_STMT for condition (AstGen.zig:6335). + // NOTE: upstream emits into parent_gz AFTER block_scope is created, + // so the dbg_stmt ends up in block_scope's range (shared array). + emitDbgNode(gz, cond_node); + // Evaluate condition (AstGen.zig:6335-6363). uint32_t cond_inst; // the value (optional/err-union/bool) uint32_t bool_bit; // the boolean for condbr @@ -4822,26 +5094,31 @@ static uint32_t forExpr( loop_scope.is_inline = is_inline; // Load index (AstGen.zig:6955-6956). + // We need to finish loop_scope later once we have the deferred refs from + // then_scope. However, the load must be removed from instructions in the + // meantime or it appears to be part of parent_gz. uint32_t index = addUnNode(&loop_scope, ZIR_INST_LOAD, index_ptr, node); + uint32_t index_inst = index - ZIR_REF_START_INDEX; + ag->scratch_inst_len--; // pop from loop_scope (AstGen.zig:6956) - // Condition: index < len (AstGen.zig:6962). + // Condition: added to cond_scope (AstGen.zig:6958-6962). + GenZir cond_scope = makeSubBlock(gz, &loop_scope.base); uint32_t cond - = addPlNodeBin(&loop_scope, ZIR_INST_CMP_LT, node, index, len); + = addPlNodeBin(&cond_scope, ZIR_INST_CMP_LT, node, index, len); // Create condbr + block (AstGen.zig:6967-6974). - GenZir cond_scope = makeSubBlock(&loop_scope, &loop_scope.base); ZirInstTag condbr_tag = is_inline ? ZIR_INST_CONDBR_INLINE : ZIR_INST_CONDBR; uint32_t condbr = addCondBr(&cond_scope, condbr_tag, node); ZirInstTag block_tag = is_inline ? ZIR_INST_BLOCK_INLINE : ZIR_INST_BLOCK; uint32_t cond_block = makeBlockInst(ag, block_tag, &loop_scope, node); setBlockBody(ag, &cond_scope, cond_block); + loop_scope.break_block = loop_inst; loop_scope.continue_block = cond_block; // AstGen.zig:6974 - gzAppendInstruction(&loop_scope, cond_block); // Then branch: loop body (AstGen.zig:6982-7065). - GenZir then_scope = makeSubBlock(&loop_scope, &loop_scope.base); + GenZir then_scope = makeSubBlock(gz, &cond_scope.base); // Set up capture scopes for all inputs (AstGen.zig:6986-7045). ScopeLocalVal capture_scopes[FOR_MAX_INPUTS]; @@ -4927,29 +5204,36 @@ static uint32_t forExpr( AST_NODE_OFFSET_NONE); // Else branch: break out of loop (AstGen.zig:7066-7091). - GenZir else_scope = makeSubBlock(&loop_scope, &loop_scope.base); + GenZir else_scope = makeSubBlock(gz, &cond_scope.base); addBreak(&else_scope, break_tag, loop_inst, ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); setCondBrPayload(ag, condbr, cond, &then_scope, &else_scope); - // Increment index (AstGen.zig:7096-7113). - uint32_t index_plus_one = addPlNodeBin( - &loop_scope, ZIR_INST_ADD_UNSAFE, node, index, ZIR_REF_ONE_USIZE); - addPlNodeBin( - &loop_scope, ZIR_INST_STORE_NODE, node, index_ptr, index_plus_one); - - // Repeat (AstGen.zig:7110-7111). + // then_scope and else_scope unstacked now. Resurrect loop_scope to + // finally finish it (AstGen.zig:7095-7113). { + // Reset loop_scope instructions and re-add index + cond_block. + loop_scope.instructions_top = ag->scratch_inst_len; + gzAppendInstruction(&loop_scope, index_inst); + gzAppendInstruction(&loop_scope, cond_block); + + // Increment the index variable (AstGen.zig:7100-7108). + uint32_t index_plus_one = addPlNodeBin( + &loop_scope, ZIR_INST_ADD_UNSAFE, node, index, ZIR_REF_ONE_USIZE); + addPlNodeBin( + &loop_scope, ZIR_INST_STORE_NODE, node, index_ptr, index_plus_one); + + // Repeat (AstGen.zig:7110-7111). ZirInstTag repeat_tag = is_inline ? ZIR_INST_REPEAT_INLINE : ZIR_INST_REPEAT; ZirInstData repeat_data; memset(&repeat_data, 0, sizeof(repeat_data)); repeat_data.node = (int32_t)node - (int32_t)loop_scope.decl_node_index; addInstruction(&loop_scope, repeat_tag, repeat_data); - } - setBlockBody(ag, &loop_scope, loop_inst); + setBlockBody(ag, &loop_scope, loop_inst); + } gzAppendInstruction(gz, loop_inst); uint32_t result = loop_inst + ZIR_REF_START_INDEX; @@ -5850,11 +6134,15 @@ static void varDecl(GenZir* gz, Scope* scope, uint32_t node, ResultLoc result_info; if (type_node != 0) { uint32_t type_ref = typeExpr(gz, scope, type_node); - result_info = (ResultLoc) { - .tag = RL_TY, .data = type_ref, .src_node = 0 - }; + result_info = (ResultLoc) { .tag = RL_TY, + .data = type_ref, + .src_node = 0, + .ctx = RI_CTX_CONST_INIT }; } else { - result_info = RL_NONE_VAL; + result_info = (ResultLoc) { .tag = RL_NONE, + .data = 0, + .src_node = 0, + .ctx = RI_CTX_CONST_INIT }; } // Evaluate init expression (AstGen.zig:3251-3252). @@ -5917,6 +6205,7 @@ static void varDecl(GenZir* gz, Scope* scope, uint32_t node, init_rl.data = var_ptr; init_rl.src_node = 0; } + init_rl.ctx = RI_CTX_CONST_INIT; uint32_t init_ref = exprRl(gz, scope, init_rl, init_node); if (ag->has_compile_errors) @@ -6025,22 +6314,14 @@ static bool addEnsureResult( uint32_t inst = maybe_unused_result - ZIR_REF_START_INDEX; ZirInstTag tag = ag->inst_tags[inst]; switch (tag) { - // For call/field_call: set ensure_result_used flag (bit 3). - case ZIR_INST_CALL: { - uint32_t pi = ag->inst_datas[inst].pl_node.payload_index; - ag->extra[pi + 1] |= (1u << 3); // ensure_result_used - elide_check = true; - break; - } - case ZIR_INST_FIELD_CALL: { - uint32_t pi = ag->inst_datas[inst].pl_node.payload_index; - ag->extra[pi + 2] |= (1u << 3); // ensure_result_used - elide_check = true; - break; - } + // For call/field_call/builtin_call: set ensure_result_used flag + // (bit 3 of flags at offset 0). Flags *must* be at offset 0 in all + // three structs (AstGen.zig:2658-2665, Zir.zig:3022). + case ZIR_INST_CALL: + case ZIR_INST_FIELD_CALL: case ZIR_INST_BUILTIN_CALL: { uint32_t pi = ag->inst_datas[inst].pl_node.payload_index; - ag->extra[pi + 1] |= (1u << 3); // ensure_result_used + ag->extra[pi] |= (1u << 3); // ensure_result_used elide_check = true; break; } @@ -7157,6 +7438,7 @@ static void addDbgVar( static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node, uint32_t param_block, uint32_t ret_ref, const uint32_t* ret_body, uint32_t ret_body_len, const uint32_t* body, uint32_t body_len, + const uint32_t* param_insts, uint32_t param_insts_len, uint32_t lbrace_line, uint32_t lbrace_column, bool is_inferred_error) { AstGenCtx* ag = gz->astgen; const Ast* tree = ag->tree; @@ -7180,7 +7462,8 @@ static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node, uint32_t ret_ty_packed = ret_ty_packed_len & 0x7FFFFFFFu; // is_generic=false - uint32_t fixup_body_len = countBodyLenAfterFixups(ag, body, body_len); + uint32_t fixup_body_len = countBodyLenAfterFixupsExtraRefs( + ag, body, body_len, param_insts, param_insts_len); ensureExtraCapacity(ag, 3 + ret_ty_packed_len + fixup_body_len + 7); uint32_t payload_index = ag->extra_len; ag->extra[ag->extra_len++] = ret_ty_packed; // Func.ret_ty @@ -7195,10 +7478,10 @@ static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node, ag->extra[ag->extra_len++] = ret_ref; } - // Body instructions (with ref_table fixups). - for (uint32_t i = 0; i < body_len; i++) { - appendPossiblyRefdBodyInst(ag, body[i]); - } + // Body instructions with extra_refs for param_insts + // (AstGen.zig:12206). + appendBodyWithFixupsExtraRefs( + ag, body, body_len, param_insts, param_insts_len); // SrcLocs (AstGen.zig:12098-12106). uint32_t columns = (lbrace_column & 0xFFFFu) | (rbrace_column << 16); @@ -7329,7 +7612,7 @@ static void testDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, // Create func instruction (AstGen.zig:4874-4897). uint32_t func_ref = addFunc(&decl_block, node, body_node, decl_inst, ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE, NULL, 0, fn_body, fn_body_len, - lbrace_line, lbrace_column, false); + NULL, 0, lbrace_line, lbrace_column, false); // break_inline returning func to declaration (AstGen.zig:4899). makeBreakInline(&decl_block, decl_inst, func_ref, AST_NODE_OFFSET_NONE); @@ -7459,6 +7742,9 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, Scope* params_scope = &decl_gz.base; ScopeLocalVal param_scopes[32]; uint32_t param_scope_count = 0; + // Collect param instruction indices (AstGen.zig:4254, 4360). + uint32_t param_insts[32]; + uint32_t param_insts_len = 0; for (uint32_t param_i = 0; param_i < params_len; param_i++) { uint32_t param_type_node = param_nodes[param_i]; @@ -7550,6 +7836,9 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, uint32_t param_inst = addParam( &decl_gz, ¶m_gz, param_tag, name_tok_for_src, param_name_str); (void)param_inst_expected; + // Record param instruction index (AstGen.zig:4360). + if (param_insts_len < 32) + param_insts[param_insts_len++] = param_inst; // Create ScopeLocalVal for this param (AstGen.zig:4349-4359). if (param_name_str != 0 && param_scope_count < 32) { @@ -7679,8 +7968,8 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, // Create func instruction (AstGen.zig:4476-4494). uint32_t func_ref = addFunc(&decl_gz, node, body_node, decl_inst, ret_ref, - ret_body, ret_body_len, fn_body, fn_body_len, lbrace_line, - lbrace_column, is_inferred_error); + ret_body, ret_body_len, fn_body, fn_body_len, param_insts, + param_insts_len, lbrace_line, lbrace_column, is_inferred_error); // Patch ret_body break_inline to point to func instruction // (AstGen.zig:12199-12202). @@ -10073,6 +10362,7 @@ Zir astGen(const Ast* ast) { free(ag.decl_names); free(ag.decl_nodes); free(ag.scratch_instructions); + free(ag.scratch_extra); free(ag.ref_table_keys); free(ag.ref_table_vals); free(ag.nodes_need_rl); diff --git a/astgen_test.zig b/astgen_test.zig index 825611d0e1..247925638c 100644 --- a/astgen_test.zig +++ b/astgen_test.zig @@ -8,41 +8,6 @@ const c = @cImport({ @cInclude("astgen.h"); }); -fn dumpZir(ref_zir: Zir) void { - const tags = ref_zir.instructions.items(.tag); - const datas = ref_zir.instructions.items(.data); - std.debug.print(" instructions: {d}\n", .{ref_zir.instructions.len}); - for (0..ref_zir.instructions.len) |i| { - const tag = tags[i]; - std.debug.print(" [{d}] tag={d} ({s})", .{ i, @intFromEnum(tag), @tagName(tag) }); - switch (tag) { - .extended => { - const ext = datas[i].extended; - std.debug.print(" opcode={d} small=0x{x:0>4} operand={d}", .{ @intFromEnum(ext.opcode), ext.small, ext.operand }); - }, - .declaration => { - const decl = datas[i].declaration; - std.debug.print(" src_node={d} payload_index={d}", .{ @intFromEnum(decl.src_node), decl.payload_index }); - }, - .break_inline => { - const brk = datas[i].@"break"; - std.debug.print(" operand={d} payload_index={d}", .{ @intFromEnum(brk.operand), brk.payload_index }); - }, - else => {}, - } - std.debug.print("\n", .{}); - } - std.debug.print(" extra ({d}):\n", .{ref_zir.extra.len}); - for (0..ref_zir.extra.len) |i| { - std.debug.print(" [{d}] = 0x{x:0>8} ({d})\n", .{ i, ref_zir.extra[i], ref_zir.extra[i] }); - } - std.debug.print(" string_bytes ({d}):", .{ref_zir.string_bytes.len}); - for (0..ref_zir.string_bytes.len) |i| { - std.debug.print(" {x:0>2}", .{ref_zir.string_bytes[i]}); - } - std.debug.print("\n", .{}); -} - fn refZir(gpa: Allocator, source: [:0]const u8) !Zir { var tree = try Ast.parse(gpa, source, .zig); defer tree.deinit(gpa); @@ -273,16 +238,14 @@ test "astgen: @import" { } fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void { - // Compare instruction count. const ref_len: u32 = @intCast(ref.instructions.len); - if (ref_len != got.inst_len) { - std.debug.print("inst_len mismatch: ref={d} got={d}\n", .{ ref_len, got.inst_len }); - return error.TestExpectedEqual; - } - - // Compare instructions (tag + data) field-by-field. const ref_tags = ref.instructions.items(.tag); const ref_datas = ref.instructions.items(.data); + + // 1. Compare lengths. + try std.testing.expectEqual(ref_len, got.inst_len); + + // 2. Compare instruction tags. for (0..ref_len) |i| { const ref_tag: u8 = @intFromEnum(ref_tags[i]); const got_tag: u8 = @intCast(got.inst_tags[i]); @@ -293,28 +256,13 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void { ); return error.TestExpectedEqual; } + } + + // 3. Compare instruction data field-by-field. + for (0..ref_len) |i| { try expectEqualData(i, ref_tags[i], ref_datas[i], got.inst_datas[i]); } - - // Build hash skip mask for extra comparison. - const skip = try buildHashSkipMask(gpa, ref); - defer gpa.free(skip); - - // Compare extra data, skipping hash positions. - const ref_extra_len: u32 = @intCast(ref.extra.len); - try std.testing.expectEqual(ref_extra_len, got.extra_len); - for (0..ref_extra_len) |i| { - if (skip[i]) continue; - if (ref.extra[i] != got.extra[i]) { - std.debug.print( - "extra[{d}] mismatch: ref=0x{x:0>8} got=0x{x:0>8}\n", - .{ i, ref.extra[i], got.extra[i] }, - ); - return error.TestExpectedEqual; - } - } - - // Compare string bytes. + // 4. Compare string bytes. const ref_sb_len: u32 = @intCast(ref.string_bytes.len); try std.testing.expectEqual(ref_sb_len, got.string_bytes_len); for (0..ref_sb_len) |i| { @@ -326,6 +274,30 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void { return error.TestExpectedEqual; } } + + // 5. Compare extra data (skipping hash positions). + const skip = try buildHashSkipMask(gpa, ref); + defer gpa.free(skip); + const ref_extra_len: u32 = @intCast(ref.extra.len); + try std.testing.expectEqual(ref_extra_len, got.extra_len); + for (0..ref_extra_len) |i| { + if (skip[i]) continue; + if (ref.extra[i] != got.extra[i]) { + // Show first 10 extra diffs. + var count: u32 = 0; + for (0..ref_extra_len) |j| { + if (!skip[j] and ref.extra[j] != got.extra[j]) { + std.debug.print( + "extra[{d}] mismatch: ref={d} got={d}\n", + .{ j, ref.extra[j], got.extra[j] }, + ); + count += 1; + if (count >= 10) break; + } + } + return error.TestExpectedEqual; + } + } } /// Compare a single instruction's data, dispatching by tag. @@ -341,9 +313,24 @@ fn expectEqualData( .extended => { const r = ref.extended; const g = got.extended; + // Some extended opcodes have undefined/unused small+operand. + const skip_data = switch (r.opcode) { + .dbg_empty_stmt, .astgen_error => true, + else => false, + }; + const skip_small = switch (r.opcode) { + .add_with_overflow, + .sub_with_overflow, + .mul_with_overflow, + .shl_with_overflow, + .restore_err_ret_index, + .branch_hint, + => true, + else => false, + }; if (@intFromEnum(r.opcode) != g.opcode or - r.small != g.small or - r.operand != g.operand) + (!skip_data and !skip_small and r.small != g.small) or + (!skip_data and r.operand != g.operand)) { std.debug.print( "inst_datas[{d}] (extended) mismatch:\n" ++ @@ -441,6 +428,7 @@ fn expectEqualData( .ensure_result_non_error, .restore_err_ret_index_unconditional, .validate_struct_init_ty, + .validate_struct_init_result_ty, .struct_init_empty_result, .struct_init_empty, .struct_init_empty_ref_result, @@ -500,7 +488,6 @@ fn expectEqualData( .struct_init_ref, .validate_array_init_ref_ty, .validate_array_init_ty, - .validate_struct_init_result_ty, => { const r = ref.pl_node; const g = got.pl_node; @@ -625,233 +612,49 @@ fn expectEqualData( } }, else => { - std.debug.print( - "inst_datas[{d}]: unhandled tag {d} ({s}) in comparison\n", - .{ idx, @intFromEnum(tag), @tagName(tag) }, - ); - return error.TestUnexpectedResult; - }, - } -} - -/// Silent ZIR comparison: returns true if ZIR matches, false otherwise. -/// Unlike expectEqualZir, does not print diagnostics or return errors. -fn zirMatches(_: Allocator, ref: Zir, got: c.Zir) bool { - const ref_len: u32 = @intCast(ref.instructions.len); - if (ref_len != got.inst_len) { - std.debug.print(" inst_len: ref={d} got={d}\n", .{ ref_len, got.inst_len }); - } - - { - // const elen: u32 = @intCast(ref.extra.len); - // const slen: u32 = @intCast(ref.string_bytes.len); - // std.debug.print(" inst_len: ref={d} got={d}\n", .{ ref_len, got.inst_len }); - // std.debug.print(" extra_len: ref={d} got={d} diff={d}\n", .{ elen, got.extra_len, @as(i64, elen) - @as(i64, got.extra_len) }); - // std.debug.print(" string_bytes_len: ref={d} got={d} diff={d}\n", .{ slen, got.string_bytes_len, @as(i64, slen) - @as(i64, got.string_bytes_len) }); - } - - const ref_tags = ref.instructions.items(.tag); - const ref_datas = ref.instructions.items(.data); - const min_len = @min(ref_len, got.inst_len); - var first_tag_mismatch: ?u32 = null; - for (0..min_len) |i| { - const ref_tag: u8 = @intFromEnum(ref_tags[i]); - const got_tag: u8 = @intCast(got.inst_tags[i]); - if (ref_tag != got_tag) { - first_tag_mismatch = @intCast(i); - break; - } - } - if (first_tag_mismatch) |ftm| { - const start = if (ftm > 15) ftm - 15 else 0; - const end = @min(ftm + 30, min_len); - std.debug.print(" first tag mismatch at inst[{d}]:\n", .{ftm}); - for (start..end) |i| { - const ref_tag: u8 = @intFromEnum(ref_tags[i]); - const got_tag: u8 = @intCast(got.inst_tags[i]); - const marker: u8 = if (i == ftm) '>' else ' '; - if (ref_tag == 251) { - const ext_op: u16 = @intFromEnum(ref_datas[i].extended.opcode); - std.debug.print(" {c} [{d}] ref_tag=251(EXT:{d}) got_tag={d}\n", .{ marker, i, ext_op, got_tag }); - } else { - std.debug.print(" {c} [{d}] ref_tag={d} got_tag={d}\n", .{ marker, i, ref_tag, got_tag }); + // Generic raw comparison: treat data as two u32 words. + // Tags using .node data format have undefined second word. + const ref_raw = @as([*]const u32, @ptrCast(&ref)); + const got_raw = @as([*]const u32, @ptrCast(&got)); + // Tags where only the first u32 word is meaningful + // (second word is padding/undefined). + const first_word_only = switch (tag) { + // .node data format (single i32): + .repeat, + .repeat_inline, + .ret_ptr, + .ret_type, + .trap, + .alloc_inferred, + .alloc_inferred_mut, + .alloc_inferred_comptime, + .alloc_inferred_comptime_mut, + // .@"unreachable" data format (src_node + padding): + .@"unreachable", + // .save_err_ret_index data format (operand only): + .save_err_ret_index, + => true, + else => false, + }; + const w1_match = ref_raw[0] == got_raw[0]; + const w2_match = first_word_only or ref_raw[1] == got_raw[1]; + if (!w1_match or !w2_match) { + std.debug.print( + "inst_datas[{d}] ({s}) raw mismatch:\n" ++ + " ref: 0x{x:0>8} 0x{x:0>8}\n" ++ + " got: 0x{x:0>8} 0x{x:0>8}\n", + .{ + idx, + @tagName(tag), + ref_raw[0], + ref_raw[1], + got_raw[0], + got_raw[1], + }, + ); + return error.TestExpectedEqual; } - } - // Tag histogram: count each tag in ref vs got and show diffs. - var ref_hist: [256]i32 = undefined; - var got_hist: [256]i32 = undefined; - for (&ref_hist) |*h| h.* = 0; - for (&got_hist) |*h| h.* = 0; - for (0..ref_len) |j| { - ref_hist[@intFromEnum(ref_tags[j])] += 1; - } - for (0..got.inst_len) |j| { - got_hist[@as(u8, @intCast(got.inst_tags[j]))] += 1; - } - std.debug.print(" tag histogram diff (ref-got):\n", .{}); - for (0..256) |t| { - const diff = ref_hist[t] - got_hist[t]; - if (diff != 0) { - std.debug.print(" tag {d}: ref={d} got={d} diff={d}\n", .{ t, ref_hist[t], got_hist[t], diff }); - } - } - return false; - } - // Skip inst_datas comparison for now (extra indices shift). - // Go straight to extra/string_bytes. - if (ref_len != got.inst_len) return false; - - // Compare string_bytes first (smaller diff). - const ref_sb_len2: u32 = @intCast(ref.string_bytes.len); - const sb_min = @min(ref_sb_len2, got.string_bytes_len); - for (0..sb_min) |i| { - if (ref.string_bytes[i] != got.string_bytes[i]) { - // Print surrounding context. - const ctx_start = if (i > 30) i - 30 else 0; - std.debug.print(" string_bytes[{d}] first diff (ref=0x{x:0>2} got=0x{x:0>2})\n", .{ i, ref.string_bytes[i], got.string_bytes[i] }); - std.debug.print(" ref context: \"", .{}); - for (ctx_start..@min(i + 30, sb_min)) |j| { - const ch = ref.string_bytes[j]; - if (ch >= 0x20 and ch < 0x7f) { - std.debug.print("{c}", .{ch}); - } else { - std.debug.print("\\x{x:0>2}", .{ch}); - } - } - std.debug.print("\"\n", .{}); - std.debug.print(" got context: \"", .{}); - for (ctx_start..@min(i + 30, sb_min)) |j| { - const ch = got.string_bytes[j]; - if (ch >= 0x20 and ch < 0x7f) { - std.debug.print("{c}", .{ch}); - } else { - std.debug.print("\\x{x:0>2}", .{ch}); - } - } - std.debug.print("\"\n", .{}); - return false; - } - } - if (ref_sb_len2 != got.string_bytes_len) { - std.debug.print(" string_bytes_len mismatch: ref={d} got={d} (content matched up to {d})\n", .{ ref_sb_len2, got.string_bytes_len, sb_min }); - // Print what ref has at the end. - if (ref_sb_len2 > got.string_bytes_len) { - const extra_start = got.string_bytes_len; - std.debug.print(" ref extra at [{d}]: \"", .{extra_start}); - for (extra_start..@min(extra_start + 60, ref_sb_len2)) |j| { - const ch = ref.string_bytes[j]; - if (ch >= 0x20 and ch < 0x7f) { - std.debug.print("{c}", .{ch}); - } else { - std.debug.print("\\x{x:0>2}", .{ch}); - } - } - std.debug.print("\"\n", .{}); - } - return false; - } - - const ref_extra_len2: u32 = @intCast(ref.extra.len); - if (ref_extra_len2 != got.extra_len) { - std.debug.print(" extra_len mismatch: ref={d} got={d}\n", .{ ref_extra_len2, got.extra_len }); - return false; - } - - return true; -} - -/// Silent data comparison: returns true if fields match, false otherwise. -fn dataMatches(tag: Zir.Inst.Tag, ref: Zir.Inst.Data, got: c.ZirInstData) bool { - switch (tag) { - .extended => { - const r = ref.extended; - const g = got.extended; - return @intFromEnum(r.opcode) == g.opcode and - r.small == g.small and - r.operand == g.operand; }, - .declaration => { - const r = ref.declaration; - const g = got.declaration; - return @intFromEnum(r.src_node) == g.src_node and - r.payload_index == g.payload_index; - }, - .break_inline => { - const r = ref.@"break"; - const g = got.break_data; - return @intFromEnum(r.operand) == g.operand and - r.payload_index == g.payload_index; - }, - .import => { - const r = ref.pl_tok; - const g = got.pl_tok; - return @intFromEnum(r.src_tok) == g.src_tok and - r.payload_index == g.payload_index; - }, - .dbg_stmt => { - return ref.dbg_stmt.line == got.dbg_stmt.line and - ref.dbg_stmt.column == got.dbg_stmt.column; - }, - .ensure_result_non_error, - .restore_err_ret_index_unconditional, - .validate_struct_init_ty, - .struct_init_empty_result, - .struct_init_empty, - .struct_init_empty_ref_result, - => { - return @intFromEnum(ref.un_node.src_node) == got.un_node.src_node and - @intFromEnum(ref.un_node.operand) == got.un_node.operand; - }, - .ret_implicit => { - return @intFromEnum(ref.un_tok.src_tok) == got.un_tok.src_tok and - @intFromEnum(ref.un_tok.operand) == got.un_tok.operand; - }, - .func, - .func_inferred, - .array_type, - .array_type_sentinel, - .array_cat, - .array_init, - .array_init_ref, - .error_set_decl, - .struct_init_field_type, - .struct_init, - .struct_init_ref, - .validate_array_init_ref_ty, - .validate_array_init_ty, - .validate_struct_init_result_ty, - => { - return @intFromEnum(ref.pl_node.src_node) == got.pl_node.src_node and - ref.pl_node.payload_index == got.pl_node.payload_index; - }, - .ptr_type => { - return @as(u8, @bitCast(ref.ptr_type.flags)) == got.ptr_type.flags and - @intFromEnum(ref.ptr_type.size) == got.ptr_type.size and - ref.ptr_type.payload_index == got.ptr_type.payload_index; - }, - .int_type => { - return @intFromEnum(ref.int_type.src_node) == got.int_type.src_node and - @intFromEnum(ref.int_type.signedness) == got.int_type.signedness and - ref.int_type.bit_count == got.int_type.bit_count; - }, - .decl_val, .decl_ref => { - return @intFromEnum(ref.str_tok.start) == got.str_tok.start and - @intFromEnum(ref.str_tok.src_tok) == got.str_tok.src_tok; - }, - .field_val, .field_ptr, .field_val_named, .field_ptr_named => { - return @intFromEnum(ref.pl_node.src_node) == got.pl_node.src_node and - ref.pl_node.payload_index == got.pl_node.payload_index; - }, - .int => return ref.int == got.int_val, - .str => { - return @intFromEnum(ref.str.start) == got.str.start and - ref.str.len == got.str.len; - }, - .@"defer" => { - return ref.@"defer".index == got.defer_data.index and - ref.@"defer".len == got.defer_data.len; - }, - else => return false, } } @@ -863,7 +666,7 @@ const corpus_files = .{ .{ "tokenizer_test.zig", @embedFile("tokenizer_test.zig") }, }; -fn corpusCheck(gpa: Allocator, name: []const u8, source: [:0]const u8) !void { +fn corpusCheck(gpa: Allocator, source: [:0]const u8) !void { var tree = try Ast.parse(gpa, source, .zig); defer tree.deinit(gpa); @@ -876,16 +679,11 @@ fn corpusCheck(gpa: Allocator, name: []const u8, source: [:0]const u8) !void { defer c.zirDeinit(&c_zir); if (c_zir.has_compile_errors) { - std.debug.print(" {s} -> has_compile_errors\n", .{name}); - return error.ZirCompileErrors; + std.debug.print("C port returned compile errors (inst_len={d})\n", .{c_zir.inst_len}); + return error.TestUnexpectedResult; } - if (zirMatches(gpa, ref_zir, c_zir)) { - return; - } else { - std.debug.print(" {s} -> zir mismatch\n", .{name}); - return error.ZirMismatch; - } + try expectEqualZir(gpa, ref_zir, c_zir); } test "astgen: struct single field" { @@ -986,25 +784,24 @@ test "astgen: extern var" { test "astgen: corpus test_all.zig" { const gpa = std.testing.allocator; - try corpusCheck(gpa, "test_all.zig", @embedFile("test_all.zig")); + try corpusCheck(gpa, @embedFile("test_all.zig")); } test "astgen: corpus build.zig" { - if (true) return error.SkipZigTest; // TODO: string_bytes ordering - struct init field name pre-registration const gpa = std.testing.allocator; - try corpusCheck(gpa, "build.zig", @embedFile("build.zig")); + try corpusCheck(gpa, @embedFile("build.zig")); } test "astgen: corpus tokenizer_test.zig" { if (true) return error.SkipZigTest; // TODO: string_bytes and extra_len diffs const gpa = std.testing.allocator; - try corpusCheck(gpa, "tokenizer_test.zig", @embedFile("tokenizer_test.zig")); + try corpusCheck(gpa, @embedFile("tokenizer_test.zig")); } test "astgen: corpus astgen_test.zig" { if (true) return error.SkipZigTest; // TODO: extra_len diff=-377, string_bytes diff=-1 const gpa = std.testing.allocator; - try corpusCheck(gpa, "astgen_test.zig", @embedFile("astgen_test.zig")); + try corpusCheck(gpa, @embedFile("astgen_test.zig")); } test "astgen: enum decl" { @@ -1040,7 +837,7 @@ test "astgen: corpus" { var any_fail = false; inline for (corpus_files) |entry| { - corpusCheck(gpa, entry[0], entry[1]) catch { + corpusCheck(gpa, entry[1]) catch { any_fail = true; }; }