astgen: fix build.zig corpus - call payload, condbr, for loop, ResultCtx

Major fixes to match upstream AstGen.zig:
- Call/FieldCall: flags at offset 0, scratch_extra for arg bodies,
  pop_error_return_trace from ResultCtx instead of hardcoded true
- CondBr: write {condition, then_body_len, else_body_len} then bodies
  (was interleaving lengths with bodies)
- For loop: use instructionsSliceUpto, resurrect loop_scope for
  increment/repeat after then/else unstacked
- validate_struct_init_result_ty: un_node encoding (no extra payload)
- addEnsureResult: flags always at pi+0 for all call types
- addFunc: param_insts extra refs for correct body attribution
- array_init_elem_type: addBin instead of addPlNodeBin
- Pre-register struct field names for correct string ordering
- comptime break_inline: AST_NODE_OFFSET_NONE
- varDecl: pass RI_CTX_CONST_INIT context
- Rewrite test infrastructure with field-by-field ZIR comparison

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-13 12:51:43 +00:00
parent 7e414347b7
commit 68d0917ec3
2 changed files with 535 additions and 448 deletions

576
astgen.c
View File

@@ -84,6 +84,12 @@ typedef struct {
uint32_t* scratch_instructions;
uint32_t scratch_inst_len;
uint32_t scratch_inst_cap;
// Scratch extra array for call arguments (mirrors AstGen.scratch in Zig).
// Used to collect body lengths + body instructions before copying to
// extra.
uint32_t* scratch_extra;
uint32_t scratch_extra_len;
uint32_t scratch_extra_cap;
// Return type ref for the current function (set during fnDecl/testDecl).
uint32_t fn_ret_ty; // ZirInstRef
// Pointer to the fn_block GenZir for the current function (AstGen.zig:45).
@@ -169,6 +175,17 @@ static bool refTableFetchRemove(AstGenCtx* ag, uint32_t key, uint32_t* val) {
// Simplified version of ResultInfo.Loc.
// Defined here (before GenZir) because GenZir.break_result_info uses it.
// ResultInfo.Context (AstGen.zig:371-386).
typedef enum {
RI_CTX_NONE,
RI_CTX_RETURN,
RI_CTX_ERROR_HANDLING_EXPR,
RI_CTX_SHIFT_OP,
RI_CTX_FN_ARG,
RI_CTX_CONST_INIT,
RI_CTX_ASSIGNMENT,
} ResultCtx;
typedef enum {
RL_NONE, // Just compute the value.
RL_REF, // Compute a pointer to the value.
@@ -185,12 +202,18 @@ typedef struct {
uint32_t data; // ZirInstRef: ty_inst for TY/COERCED_TY, alloc inst for
// PTR/INFERRED_PTR.
uint32_t src_node; // Only used for RL_PTR.
ResultCtx ctx; // ResultInfo.Context (AstGen.zig:371).
} ResultLoc;
#define RL_NONE_VAL ((ResultLoc) { .tag = RL_NONE, .data = 0, .src_node = 0 })
#define RL_REF_VAL ((ResultLoc) { .tag = RL_REF, .data = 0, .src_node = 0 })
#define RL_NONE_VAL \
((ResultLoc) { \
.tag = RL_NONE, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE })
#define RL_REF_VAL \
((ResultLoc) { \
.tag = RL_REF, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE })
#define RL_DISCARD_VAL \
((ResultLoc) { .tag = RL_DISCARD, .data = 0, .src_node = 0 })
((ResultLoc) { \
.tag = RL_DISCARD, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE })
#define RL_IS_REF(rl) ((rl).tag == RL_REF || (rl).tag == RL_REF_COERCED_TY)
// --- Scope types (AstGen.zig:11621-11768) ---
@@ -286,6 +309,19 @@ static const uint32_t* gzInstructionsSlice(const GenZir* gz) {
return gz->astgen->scratch_instructions + gz->instructions_top;
}
// Mirrors GenZir.instructionsSliceUpto (AstGen.zig:11835).
// Returns instructions from gz up to (but not including) stacked_gz's start.
static uint32_t gzInstructionsLenUpto(
const GenZir* gz, const GenZir* stacked_gz) {
return stacked_gz->instructions_top - gz->instructions_top;
}
static const uint32_t* gzInstructionsSliceUpto(
const GenZir* gz, const GenZir* stacked_gz) {
(void)stacked_gz; // used only for length computation
return gz->astgen->scratch_instructions + gz->instructions_top;
}
// Mirrors GenZir.unstack (AstGen.zig:11822).
// Restores the shared array length to this scope's start.
static void gzUnstack(GenZir* gz) {
@@ -438,6 +474,16 @@ static uint32_t addInt(GenZir* gz, uint64_t integer) {
return addInstruction(gz, ZIR_INST_INT, data);
}
// Mirrors GenZir.add for bin data (Zir.zig:1877).
// Creates an instruction with bin data (lhs + rhs stored in inst_datas).
static uint32_t addBin(
GenZir* gz, ZirInstTag tag, uint32_t lhs, uint32_t rhs) {
ZirInstData data;
data.bin.lhs = lhs;
data.bin.rhs = rhs;
return addInstruction(gz, tag, data);
}
// Mirrors GenZir.addPlNode (AstGen.zig:12308).
// Creates an instruction with pl_node data and 2-word payload.
static uint32_t addPlNodeBin(
@@ -1125,9 +1171,55 @@ static void appendPossiblyRefdBodyInst(AstGenCtx* ag, uint32_t body_inst) {
}
}
// Mirrors countBodyLenAfterFixups (AstGen.zig:13686-13710).
static uint32_t countBodyLenAfterFixups(
AstGenCtx* ag, const uint32_t* body, uint32_t body_len) {
// Mirrors appendBodyWithFixupsExtraRefsArrayList (AstGen.zig:13659-13673).
// First processes extra_refs (e.g. param_insts), prepending their ref_table
// entries. Then writes body instructions with ref_table fixups.
static void appendBodyWithFixupsExtraRefs(AstGenCtx* ag, const uint32_t* body,
uint32_t body_len, const uint32_t* extra_refs, uint32_t extra_refs_len) {
for (uint32_t i = 0; i < extra_refs_len; i++) {
uint32_t ref_inst;
if (refTableFetchRemove(ag, extra_refs[i], &ref_inst)) {
appendPossiblyRefdBodyInst(ag, ref_inst);
}
}
for (uint32_t i = 0; i < body_len; i++) {
appendPossiblyRefdBodyInst(ag, body[i]);
}
}
// Scratch extra capacity helper (for call arg bodies).
static void ensureScratchExtraCapacity(AstGenCtx* ag, uint32_t additional) {
uint32_t needed = ag->scratch_extra_len + additional;
if (needed > ag->scratch_extra_cap) {
uint32_t new_cap = ag->scratch_extra_cap * 2;
if (new_cap < needed)
new_cap = needed;
if (new_cap < 64)
new_cap = 64;
uint32_t* p = realloc(ag->scratch_extra, new_cap * sizeof(uint32_t));
if (!p)
exit(1);
ag->scratch_extra = p;
ag->scratch_extra_cap = new_cap;
}
}
// Like appendPossiblyRefdBodyInst but appends to scratch_extra instead of
// extra.
static void appendPossiblyRefdBodyInstScratch(
AstGenCtx* ag, uint32_t body_inst) {
ag->scratch_extra[ag->scratch_extra_len++] = body_inst;
uint32_t ref_inst;
if (refTableFetchRemove(ag, body_inst, &ref_inst)) {
ensureScratchExtraCapacity(ag, 1);
appendPossiblyRefdBodyInstScratch(ag, ref_inst);
}
}
// Mirrors countBodyLenAfterFixupsExtraRefs (AstGen.zig:13694-13711).
static uint32_t countBodyLenAfterFixupsExtraRefs(AstGenCtx* ag,
const uint32_t* body, uint32_t body_len, const uint32_t* extra_refs,
uint32_t extra_refs_len) {
uint32_t count = body_len;
for (uint32_t i = 0; i < body_len; i++) {
uint32_t check_inst = body[i];
@@ -1137,9 +1229,23 @@ static uint32_t countBodyLenAfterFixups(
check_inst = *ref;
}
}
for (uint32_t i = 0; i < extra_refs_len; i++) {
uint32_t check_inst = extra_refs[i];
const uint32_t* ref;
while ((ref = refTableGet(ag, check_inst)) != NULL) {
count++;
check_inst = *ref;
}
}
return count;
}
// Mirrors countBodyLenAfterFixups (AstGen.zig:13686-13688).
static uint32_t countBodyLenAfterFixups(
AstGenCtx* ag, const uint32_t* body, uint32_t body_len) {
return countBodyLenAfterFixupsExtraRefs(ag, body, body_len, NULL, 0);
}
// Mirrors GenZir.setBlockBody (AstGen.zig:11949).
// Writes Block payload (body_len + instruction indices) to extra.
// Sets the instruction's payload_index. Unstacks gz.
@@ -1236,26 +1342,29 @@ static uint32_t addCondBr(GenZir* gz, ZirInstTag tag, uint32_t node) {
return idx;
}
// Mirrors GenZir.setCondBrPayload (AstGen.zig:12003).
// Writes CondBr payload: condition + then_body_len + then_body +
// else_body_len + else_body. Unstacks both scopes.
// Mirrors setCondBrPayload (AstGen.zig:6501).
// Writes CondBr payload: {condition, then_body_len, else_body_len} then
// then_body instructions, then else_body instructions. Unstacks both scopes.
// IMPORTANT: then_gz and else_gz are stacked (else on top of then), so
// then's instructions must use instructionsSliceUpto(else_gz) to avoid
// including else_gz's instructions in then's body.
static void setCondBrPayload(AstGenCtx* ag, uint32_t condbr_inst,
uint32_t condition, GenZir* then_gz, GenZir* else_gz) {
uint32_t raw_then_len = gzInstructionsLen(then_gz);
const uint32_t* then_body = gzInstructionsSlice(then_gz);
uint32_t raw_then_len = gzInstructionsLenUpto(then_gz, else_gz);
const uint32_t* then_body = gzInstructionsSliceUpto(then_gz, else_gz);
uint32_t raw_else_len = gzInstructionsLen(else_gz);
const uint32_t* else_body = gzInstructionsSlice(else_gz);
uint32_t then_len = countBodyLenAfterFixups(ag, then_body, raw_then_len);
uint32_t else_len = countBodyLenAfterFixups(ag, else_body, raw_else_len);
ensureExtraCapacity(ag, 2 + then_len + 1 + else_len);
ensureExtraCapacity(ag, 3 + then_len + else_len);
uint32_t payload_index = ag->extra_len;
ag->extra[ag->extra_len++] = condition; // CondBr.condition
ag->extra[ag->extra_len++] = then_len; // CondBr.then_body_len
ag->extra[ag->extra_len++] = else_len; // CondBr.else_body_len
for (uint32_t i = 0; i < raw_then_len; i++)
appendPossiblyRefdBodyInst(ag, then_body[i]);
ag->extra[ag->extra_len++] = else_len; // CondBr.else_body_len
for (uint32_t i = 0; i < raw_else_len; i++)
appendPossiblyRefdBodyInst(ag, else_body[i]);
@@ -1641,7 +1750,9 @@ static void writeImports(AstGenCtx* ag) {
// ri.br() (AstGen.zig:274-282): convert coerced_ty to ty for branching.
static inline ResultLoc rlBr(ResultLoc rl) {
if (rl.tag == RL_COERCED_TY) {
return (ResultLoc) { .tag = RL_TY, .data = rl.data, .src_node = 0 };
return (ResultLoc) {
.tag = RL_TY, .data = rl.data, .src_node = 0, .ctx = rl.ctx
};
}
return rl;
}
@@ -1662,11 +1773,15 @@ static ResultLoc breakResultInfo(
uint32_t ptr_ty
= addUnNode(gz, ZIR_INST_TYPEOF, parent_rl.data, node);
uint32_t ty = addUnNode(gz, ZIR_INST_ELEM_TYPE, ptr_ty, node);
block_ri = (ResultLoc) { .tag = RL_TY, .data = ty, .src_node = 0 };
block_ri = (ResultLoc) {
.tag = RL_TY, .data = ty, .src_node = 0, .ctx = parent_rl.ctx
};
break;
}
case RL_INFERRED_PTR:
block_ri = RL_NONE_VAL;
block_ri = (ResultLoc) {
.tag = RL_NONE, .data = 0, .src_node = 0, .ctx = parent_rl.ctx
};
break;
default:
block_ri = parent_rl;
@@ -1676,10 +1791,14 @@ static ResultLoc breakResultInfo(
// Then: setBreakResultInfo (AstGen.zig:11910-11925).
switch (block_ri.tag) {
case RL_COERCED_TY:
return (
ResultLoc) { .tag = RL_TY, .data = block_ri.data, .src_node = 0 };
return (ResultLoc) { .tag = RL_TY,
.data = block_ri.data,
.src_node = 0,
.ctx = block_ri.ctx };
case RL_DISCARD:
return RL_DISCARD_VAL;
return (ResultLoc) {
.tag = RL_DISCARD, .data = 0, .src_node = 0, .ctx = block_ri.ctx
};
default:
return block_ri;
}
@@ -2012,7 +2131,11 @@ static uint32_t tryResolvePrimitiveIdent(GenZir* gz, uint32_t node);
// SimpleComptimeReason (std.zig:727) — values used in block_comptime payload.
#define COMPTIME_REASON_TYPE 29
#define COMPTIME_REASON_ARRAY_SENTINEL 30
#define COMPTIME_REASON_POINTER_SENTINEL 31
#define COMPTIME_REASON_SLICE_SENTINEL 32
#define COMPTIME_REASON_ARRAY_LENGTH 33
#define COMPTIME_REASON_ALIGN 50
#define COMPTIME_REASON_ADDRSPACE 51
#define COMPTIME_REASON_COMPTIME_KEYWORD 53
#define COMPTIME_REASON_SWITCH_ITEM 56
@@ -2611,62 +2734,180 @@ static uint32_t fieldAccessExpr(
}
// --- ptrType (AstGen.zig:3833) ---
// Simplified: handles []const T and []T slice types.
static uint32_t ptrTypeExpr(GenZir* gz, Scope* scope, uint32_t node) {
AstGenCtx* ag = gz->astgen;
const Ast* tree = ag->tree;
AstNodeTag tag = tree->nodes.tags[node];
AstData nd = tree->nodes.datas[node];
uint32_t main_tok = tree->nodes.main_tokens[node];
// For ptr_type_aligned: data.lhs = child_type, data.rhs = extra info.
// For simple ptr_type: data.lhs = sentinel (optional), data.rhs =
// child_type. The exact layout depends on the variant. Simplified for
// []const u8.
// child_type is always in rhs for all ptr_type variants.
uint32_t child_type_node = nd.rhs;
uint32_t child_type_node;
bool is_const = false;
uint8_t size = 2; // slice
// Determine child type and constness from AST.
// ptr_type_aligned: main_token points to `[`, tokens after determine type.
// For `[]const u8`:
// main_token = `[`, then `]`, then `const`, then `u8` (child type node).
// data.lhs = 0 (no sentinel/align), data.rhs = child_type_node.
if (tag == AST_NODE_PTR_TYPE_ALIGNED) {
child_type_node = nd.rhs;
// Check for 'const' by looking at tokens after main_token.
uint32_t main_tok = tree->nodes.main_tokens[node];
// For []const T: main_token=[, then ], then const keyword.
// Check if token after ] is 'const'.
uint32_t after_bracket = main_tok + 1; // ]
uint32_t maybe_const = after_bracket + 1;
if (maybe_const < tree->tokens.len) {
uint32_t tok_start = tree->tokens.starts[maybe_const];
if (tok_start + 5 <= tree->source_len
&& memcmp(tree->source + tok_start, "const", 5) == 0)
is_const = true;
}
// Determine size from main_token (Ast.zig:2122-2131).
// Pointer.Size: one=0, many=1, slice=2, c=3.
uint8_t size;
TokenizerTag main_tok_tag = tree->tokens.tags[main_tok];
if (main_tok_tag == TOKEN_ASTERISK
|| main_tok_tag == TOKEN_ASTERISK_ASTERISK) {
size = 0; // one
} else {
// Simplified: treat all other ptr types as pointers to data.rhs.
child_type_node = nd.rhs;
assert(main_tok_tag == TOKEN_L_BRACKET);
TokenizerTag next_tag = tree->tokens.tags[main_tok + 1];
if (next_tag == TOKEN_ASTERISK) {
// [*c]T vs [*]T: c-pointer if next-next is identifier.
if (tree->tokens.tags[main_tok + 2] == TOKEN_IDENTIFIER)
size = 3; // c
else
size = 1; // many
} else {
size = 2; // slice
}
}
// Evaluate element type (AstGen.zig ptrType uses typeExpr).
// Determine sentinel, align, addrspace, bit_range nodes from AST variant
// (Ast.zig:1656-1696).
uint32_t sentinel_node = UINT32_MAX;
uint32_t align_node = UINT32_MAX;
uint32_t addrspace_node = UINT32_MAX;
uint32_t bit_range_start = UINT32_MAX;
uint32_t bit_range_end = UINT32_MAX;
if (tag == AST_NODE_PTR_TYPE_ALIGNED) {
// opt_node_and_node: lhs = optional align_node (0=none), rhs = child.
if (nd.lhs != 0)
align_node = nd.lhs;
} else if (tag == AST_NODE_PTR_TYPE_SENTINEL) {
// opt_node_and_node: lhs = optional sentinel (0=none), rhs = child.
if (nd.lhs != 0)
sentinel_node = nd.lhs;
} else if (tag == AST_NODE_PTR_TYPE) {
// extra_and_node: lhs = extra index to AstPtrType, rhs = child_type.
const AstPtrType* pt
= (const AstPtrType*)(tree->extra_data.arr + nd.lhs);
if (pt->sentinel != UINT32_MAX)
sentinel_node = pt->sentinel;
if (pt->align_node != UINT32_MAX)
align_node = pt->align_node;
if (pt->addrspace_node != UINT32_MAX)
addrspace_node = pt->addrspace_node;
} else if (tag == AST_NODE_PTR_TYPE_BIT_RANGE) {
// extra_and_node: lhs = extra index to AstPtrTypeBitRange.
const AstPtrTypeBitRange* pt
= (const AstPtrTypeBitRange*)(tree->extra_data.arr + nd.lhs);
if (pt->sentinel != UINT32_MAX)
sentinel_node = pt->sentinel;
align_node = pt->align_node;
if (pt->addrspace_node != UINT32_MAX)
addrspace_node = pt->addrspace_node;
bit_range_start = pt->bit_range_start;
bit_range_end = pt->bit_range_end;
}
// Scan tokens between main_token and child_type to find const/volatile/
// allowzero (Ast.zig:2139-2164).
bool has_const = false;
bool has_volatile = false;
bool has_allowzero = false;
{
uint32_t i;
if (sentinel_node != UINT32_MAX) {
i = lastToken(tree, sentinel_node) + 1;
} else if (size == 1 || size == 3) {
// many or c: start after main_token.
i = main_tok + 1;
} else {
i = main_tok;
}
uint32_t end = firstToken(tree, child_type_node);
while (i < end) {
TokenizerTag tt = tree->tokens.tags[i];
if (tt == TOKEN_KEYWORD_ALLOWZERO) {
has_allowzero = true;
} else if (tt == TOKEN_KEYWORD_CONST) {
has_const = true;
} else if (tt == TOKEN_KEYWORD_VOLATILE) {
has_volatile = true;
} else if (tt == TOKEN_KEYWORD_ALIGN) {
// Skip over align expression.
if (bit_range_end != UINT32_MAX)
i = lastToken(tree, bit_range_end) + 1;
else if (align_node != UINT32_MAX)
i = lastToken(tree, align_node) + 1;
}
i++;
}
}
// Evaluate element type (AstGen.zig:3847).
uint32_t elem_type = typeExpr(gz, scope, child_type_node);
// Build PtrType payload: { elem_type, src_node }.
ensureExtraCapacity(ag, 2);
// Evaluate trailing expressions (AstGen.zig:3856-3897).
uint32_t sentinel_ref = ZIR_REF_NONE;
uint32_t align_ref = ZIR_REF_NONE;
uint32_t addrspace_ref = ZIR_REF_NONE;
uint32_t bit_start_ref = ZIR_REF_NONE;
uint32_t bit_end_ref = ZIR_REF_NONE;
uint32_t trailing_count = 0;
if (sentinel_node != UINT32_MAX) {
uint32_t reason = (size == 2) ? COMPTIME_REASON_SLICE_SENTINEL
: COMPTIME_REASON_POINTER_SENTINEL;
sentinel_ref = comptimeExpr(gz, scope, sentinel_node, reason);
trailing_count++;
}
if (addrspace_node != UINT32_MAX) {
addrspace_ref = comptimeExpr(
gz, scope, addrspace_node, COMPTIME_REASON_ADDRSPACE);
trailing_count++;
}
if (align_node != UINT32_MAX) {
align_ref = comptimeExpr(gz, scope, align_node, COMPTIME_REASON_ALIGN);
trailing_count++;
}
if (bit_range_start != UINT32_MAX) {
bit_start_ref
= comptimeExpr(gz, scope, bit_range_start, COMPTIME_REASON_TYPE);
bit_end_ref
= comptimeExpr(gz, scope, bit_range_end, COMPTIME_REASON_TYPE);
trailing_count += 2;
}
// Build PtrType payload: { elem_type, src_node } + trailing
// (AstGen.zig:3905-3921).
ensureExtraCapacity(ag, 2 + trailing_count);
uint32_t payload_index = ag->extra_len;
ag->extra[ag->extra_len++] = elem_type;
ag->extra[ag->extra_len++]
= (uint32_t)((int32_t)node - (int32_t)gz->decl_node_index);
if (sentinel_ref != ZIR_REF_NONE)
ag->extra[ag->extra_len++] = sentinel_ref;
if (align_ref != ZIR_REF_NONE)
ag->extra[ag->extra_len++] = align_ref;
if (addrspace_ref != ZIR_REF_NONE)
ag->extra[ag->extra_len++] = addrspace_ref;
if (bit_start_ref != ZIR_REF_NONE) {
ag->extra[ag->extra_len++] = bit_start_ref;
ag->extra[ag->extra_len++] = bit_end_ref;
}
// Build flags packed byte.
// Build flags packed byte (AstGen.zig:3927-3934).
uint8_t flags = 0;
if (!is_const)
if (has_allowzero)
flags |= (1 << 0); // is_allowzero
if (!has_const)
flags |= (1 << 1); // is_mutable
if (has_volatile)
flags |= (1 << 2); // is_volatile
if (sentinel_ref != ZIR_REF_NONE)
flags |= (1 << 3); // has_sentinel
if (align_ref != ZIR_REF_NONE)
flags |= (1 << 4); // has_align
if (addrspace_ref != ZIR_REF_NONE)
flags |= (1 << 5); // has_addrspace
if (bit_start_ref != ZIR_REF_NONE)
flags |= (1 << 6); // has_bit_range
ZirInstData data;
data.ptr_type.flags = flags;
@@ -3119,17 +3360,22 @@ static uint32_t callExpr(
ag->inst_len++;
gzAppendInstruction(gz, call_index);
// Process arguments in sub-blocks (AstGen.zig:10100-10115).
// Simplified: we collect arg body lengths into extra.
uint32_t scratch_top = ag->extra_len;
// Reserve space for arg body lengths.
ensureExtraCapacity(ag, args_len);
uint32_t arg_lengths_start = ag->extra_len;
ag->extra_len += args_len;
// call_inst ref reused for param type (AstGen.zig:10107).
// Process arguments in sub-blocks (AstGen.zig:10096-10116).
// Upstream uses a separate scratch array; we use a local buffer for body
// lengths and append body instructions to scratch_extra, then copy all
// to extra after the call payload.
uint32_t call_inst = call_index + ZIR_REF_START_INDEX;
ResultLoc arg_rl = { .tag = RL_COERCED_TY, .data = call_inst };
ResultLoc arg_rl = { .tag = RL_COERCED_TY,
.data = call_inst,
.src_node = 0,
.ctx = RI_CTX_FN_ARG };
// Use scratch_extra to collect body lengths + body instructions,
// mirroring upstream's scratch array (AstGen.zig:10096-10116).
uint32_t scratch_top = ag->scratch_extra_len;
// Reserve space for cumulative body lengths (one per arg).
ensureScratchExtraCapacity(ag, args_len);
ag->scratch_extra_len += args_len;
for (uint32_t i = 0; i < args_len; i++) {
GenZir arg_block = makeSubBlock(gz, scope);
@@ -3141,52 +3387,71 @@ static uint32_t callExpr(
= (int32_t)args[i] - (int32_t)arg_block.decl_node_index;
makeBreakInline(&arg_block, call_index, arg_ref, param_src);
// Copy arg_block body to extra (with ref_table fixups).
// Append arg_block body to scratch_extra (with ref_table fixups).
uint32_t raw_body_len = gzInstructionsLen(&arg_block);
const uint32_t* body = gzInstructionsSlice(&arg_block);
uint32_t fixup_len = countBodyLenAfterFixups(ag, body, raw_body_len);
ensureExtraCapacity(ag, fixup_len);
ensureScratchExtraCapacity(ag, fixup_len);
for (uint32_t j = 0; j < raw_body_len; j++) {
appendPossiblyRefdBodyInst(ag, body[j]);
appendPossiblyRefdBodyInstScratch(ag, body[j]);
}
// Record cumulative body length (AstGen.zig:10113).
ag->extra[arg_lengths_start + i]
= ag->extra_len - scratch_top - args_len;
// Record cumulative body length (AstGen.zig:10114).
ag->scratch_extra[scratch_top + i]
= ag->scratch_extra_len - scratch_top;
gzUnstack(&arg_block);
}
// Build call payload (AstGen.zig:10124-10168).
// Build call payload (AstGen.zig:10118-10168).
// Upstream layout: [flags, callee/obj_ptr, field_name_start], then
// body_lengths + body_instructions from scratch.
// Flags layout (packed): modifier:u3, ensure_result_used:bool,
// pop_error_return_trace:bool, args_len:u27.
// pop_error_return_trace = !propagate_error_trace
// (AstGen.zig:10121-10124).
bool propagate_error_trace
= (rl.ctx == RI_CTX_ERROR_HANDLING_EXPR || rl.ctx == RI_CTX_RETURN
|| rl.ctx == RI_CTX_FN_ARG || rl.ctx == RI_CTX_CONST_INIT);
uint32_t flags = (propagate_error_trace ? 0u : (1u << 4))
| ((args_len & 0x7FFFFFFu) << 5); // args_len
if (callee.is_field) {
// FieldCall payload: obj_ptr, field_name_start, flags.
ensureExtraCapacity(ag, 3);
// FieldCall: {flags, obj_ptr, field_name_start} (AstGen.zig:10148).
ensureExtraCapacity(ag, 3 + (ag->scratch_extra_len - scratch_top));
uint32_t payload_index = ag->extra_len;
ag->extra[ag->extra_len++] = flags;
ag->extra[ag->extra_len++] = callee.obj_ptr;
ag->extra[ag->extra_len++] = callee.field_name_start;
// Flags layout (packed): modifier:u3, ensure_result_used:bool,
// pop_error_return_trace:bool, args_len:u27.
uint32_t flags = (1u << 4) // pop_error_return_trace = true
| ((args_len & 0x7FFFFFFu) << 5); // args_len
ag->extra[ag->extra_len++] = flags;
// Append scratch data (body lengths + body instructions).
if (args_len != 0) {
memcpy(ag->extra + ag->extra_len, ag->scratch_extra + scratch_top,
(ag->scratch_extra_len - scratch_top) * sizeof(uint32_t));
ag->extra_len += ag->scratch_extra_len - scratch_top;
}
ag->inst_tags[call_index] = ZIR_INST_FIELD_CALL;
ag->inst_datas[call_index].pl_node.src_node
= (int32_t)node - (int32_t)gz->decl_node_index;
ag->inst_datas[call_index].pl_node.payload_index = payload_index;
} else {
// Call payload: callee, flags.
ensureExtraCapacity(ag, 2);
// Call: {flags, callee} (AstGen.zig:10128).
ensureExtraCapacity(ag, 2 + (ag->scratch_extra_len - scratch_top));
uint32_t payload_index = ag->extra_len;
ag->extra[ag->extra_len++] = callee.direct;
// Flags layout (packed): modifier:u3, ensure_result_used:bool,
// pop_error_return_trace:bool, args_len:u27.
uint32_t flags = (1u << 4) // pop_error_return_trace = true
| ((args_len & 0x7FFFFFFu) << 5); // args_len
ag->extra[ag->extra_len++] = flags;
ag->extra[ag->extra_len++] = callee.direct;
// Append scratch data (body lengths + body instructions).
if (args_len != 0) {
memcpy(ag->extra + ag->extra_len, ag->scratch_extra + scratch_top,
(ag->scratch_extra_len - scratch_top) * sizeof(uint32_t));
ag->extra_len += ag->scratch_extra_len - scratch_top;
}
ag->inst_tags[call_index] = ZIR_INST_CALL;
ag->inst_datas[call_index].pl_node.src_node
= (int32_t)node - (int32_t)gz->decl_node_index;
ag->inst_datas[call_index].pl_node.payload_index = payload_index;
}
// Restore scratch (AstGen.zig:10097 defer).
ag->scratch_extra_len = scratch_top;
return call_index + ZIR_REF_START_INDEX;
}
@@ -3267,17 +3532,21 @@ static uint32_t structInitExpr(
return ZIR_REF_EMPTY_TUPLE;
}
// Pre-register all field names to match upstream string ordering.
// Upstream has a duplicate name check (AstGen.zig:1756-1806) that
// adds all field names to string_bytes before evaluating values.
for (uint32_t i = 0; i < fields_len; i++) {
uint32_t name_token = firstToken(tree, fields[i]) - 2;
identAsString(ag, name_token);
}
if (type_expr_node == 0 && fields_len > 0) {
// Anonymous struct init with RL type (AstGen.zig:1706-1731).
if (rl.tag == RL_TY || rl.tag == RL_COERCED_TY) {
uint32_t ty_inst = rl.data;
// validate_struct_init_result_ty (AstGen.zig:1710-1713).
ensureExtraCapacity(ag, 2);
uint32_t val_payload = ag->extra_len;
ag->extra[ag->extra_len++] = ty_inst;
ag->extra[ag->extra_len++] = fields_len;
addPlNodePayloadIndex(gz, ZIR_INST_VALIDATE_STRUCT_INIT_RESULT_TY,
node, val_payload);
// validate_struct_init_result_ty (AstGen.zig:1840).
addUnNode(
gz, ZIR_INST_VALIDATE_STRUCT_INIT_RESULT_TY, ty_inst, node);
// structInitExprTyped (AstGen.zig:1896-1931).
ensureExtraCapacity(ag, 3 + fields_len * 2);
uint32_t payload_index = ag->extra_len;
@@ -4076,7 +4345,7 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
uint32_t result = exprRl(&block_scope, scope, ty_only_rl, body_node);
addBreak(&block_scope, ZIR_INST_BREAK_INLINE, block_inst, result,
(int32_t)body_node - (int32_t)gz->decl_node_index);
AST_NODE_OFFSET_NONE);
setBlockComptimeBody(
ag, &block_scope, block_inst, COMPTIME_REASON_COMPTIME_KEYWORD);
gzAppendInstruction(gz, block_inst);
@@ -4406,9 +4675,9 @@ static uint32_t arrayInitDotExpr(
uint32_t extra_start = ag->extra_len;
ag->extra_len += elem_count;
for (uint32_t i = 0; i < elem_count; i++) {
// array_init_elem_type (AstGen.zig:1626-1632).
uint32_t elem_ty = addPlNodeBin(
gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, elements[i], result_ty, i);
// array_init_elem_type uses bin data (AstGen.zig:1626-1632).
uint32_t elem_ty
= addBin(gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, result_ty, i);
ResultLoc elem_rl
= { .tag = RL_COERCED_TY, .data = elem_ty, .src_node = 0 };
uint32_t elem_ref = exprRl(gz, scope, elem_rl, elements[i]);
@@ -4474,8 +4743,9 @@ static uint32_t arrayInitDotExpr(
uint32_t extra_start2 = ag->extra_len;
ag->extra_len += elem_count;
for (uint32_t i = 0; i < elem_count; i++) {
uint32_t elem_ty = addPlNodeBin(gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE,
elements[i], dest_arr_ty_inst, i);
// array_init_elem_type uses bin data (AstGen.zig:1626-1632).
uint32_t elem_ty = addBin(
gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, dest_arr_ty_inst, i);
ResultLoc elem_rl
= { .tag = RL_COERCED_TY, .data = elem_ty, .src_node = 0 };
uint32_t elem_ref = exprRl(gz, scope, elem_rl, elements[i]);
@@ -4549,12 +4819,14 @@ static uint32_t ifExpr(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) {
}
}
// Emit DBG_STMT for condition (AstGen.zig:6335).
emitDbgNode(gz, cond_node);
// Create block_scope (AstGen.zig:6326-6328).
GenZir block_scope = makeSubBlock(gz, scope);
// Emit DBG_STMT for condition (AstGen.zig:6335).
// NOTE: upstream emits into parent_gz AFTER block_scope is created,
// so the dbg_stmt ends up in block_scope's range (shared array).
emitDbgNode(gz, cond_node);
// Evaluate condition (AstGen.zig:6335-6363).
uint32_t cond_inst; // the value (optional/err-union/bool)
uint32_t bool_bit; // the boolean for condbr
@@ -4822,26 +5094,31 @@ static uint32_t forExpr(
loop_scope.is_inline = is_inline;
// Load index (AstGen.zig:6955-6956).
// We need to finish loop_scope later once we have the deferred refs from
// then_scope. However, the load must be removed from instructions in the
// meantime or it appears to be part of parent_gz.
uint32_t index = addUnNode(&loop_scope, ZIR_INST_LOAD, index_ptr, node);
uint32_t index_inst = index - ZIR_REF_START_INDEX;
ag->scratch_inst_len--; // pop from loop_scope (AstGen.zig:6956)
// Condition: index < len (AstGen.zig:6962).
// Condition: added to cond_scope (AstGen.zig:6958-6962).
GenZir cond_scope = makeSubBlock(gz, &loop_scope.base);
uint32_t cond
= addPlNodeBin(&loop_scope, ZIR_INST_CMP_LT, node, index, len);
= addPlNodeBin(&cond_scope, ZIR_INST_CMP_LT, node, index, len);
// Create condbr + block (AstGen.zig:6967-6974).
GenZir cond_scope = makeSubBlock(&loop_scope, &loop_scope.base);
ZirInstTag condbr_tag
= is_inline ? ZIR_INST_CONDBR_INLINE : ZIR_INST_CONDBR;
uint32_t condbr = addCondBr(&cond_scope, condbr_tag, node);
ZirInstTag block_tag = is_inline ? ZIR_INST_BLOCK_INLINE : ZIR_INST_BLOCK;
uint32_t cond_block = makeBlockInst(ag, block_tag, &loop_scope, node);
setBlockBody(ag, &cond_scope, cond_block);
loop_scope.break_block = loop_inst;
loop_scope.continue_block = cond_block; // AstGen.zig:6974
gzAppendInstruction(&loop_scope, cond_block);
// Then branch: loop body (AstGen.zig:6982-7065).
GenZir then_scope = makeSubBlock(&loop_scope, &loop_scope.base);
GenZir then_scope = makeSubBlock(gz, &cond_scope.base);
// Set up capture scopes for all inputs (AstGen.zig:6986-7045).
ScopeLocalVal capture_scopes[FOR_MAX_INPUTS];
@@ -4927,29 +5204,36 @@ static uint32_t forExpr(
AST_NODE_OFFSET_NONE);
// Else branch: break out of loop (AstGen.zig:7066-7091).
GenZir else_scope = makeSubBlock(&loop_scope, &loop_scope.base);
GenZir else_scope = makeSubBlock(gz, &cond_scope.base);
addBreak(&else_scope, break_tag, loop_inst, ZIR_REF_VOID_VALUE,
AST_NODE_OFFSET_NONE);
setCondBrPayload(ag, condbr, cond, &then_scope, &else_scope);
// Increment index (AstGen.zig:7096-7113).
uint32_t index_plus_one = addPlNodeBin(
&loop_scope, ZIR_INST_ADD_UNSAFE, node, index, ZIR_REF_ONE_USIZE);
addPlNodeBin(
&loop_scope, ZIR_INST_STORE_NODE, node, index_ptr, index_plus_one);
// Repeat (AstGen.zig:7110-7111).
// then_scope and else_scope unstacked now. Resurrect loop_scope to
// finally finish it (AstGen.zig:7095-7113).
{
// Reset loop_scope instructions and re-add index + cond_block.
loop_scope.instructions_top = ag->scratch_inst_len;
gzAppendInstruction(&loop_scope, index_inst);
gzAppendInstruction(&loop_scope, cond_block);
// Increment the index variable (AstGen.zig:7100-7108).
uint32_t index_plus_one = addPlNodeBin(
&loop_scope, ZIR_INST_ADD_UNSAFE, node, index, ZIR_REF_ONE_USIZE);
addPlNodeBin(
&loop_scope, ZIR_INST_STORE_NODE, node, index_ptr, index_plus_one);
// Repeat (AstGen.zig:7110-7111).
ZirInstTag repeat_tag
= is_inline ? ZIR_INST_REPEAT_INLINE : ZIR_INST_REPEAT;
ZirInstData repeat_data;
memset(&repeat_data, 0, sizeof(repeat_data));
repeat_data.node = (int32_t)node - (int32_t)loop_scope.decl_node_index;
addInstruction(&loop_scope, repeat_tag, repeat_data);
}
setBlockBody(ag, &loop_scope, loop_inst);
setBlockBody(ag, &loop_scope, loop_inst);
}
gzAppendInstruction(gz, loop_inst);
uint32_t result = loop_inst + ZIR_REF_START_INDEX;
@@ -5850,11 +6134,15 @@ static void varDecl(GenZir* gz, Scope* scope, uint32_t node,
ResultLoc result_info;
if (type_node != 0) {
uint32_t type_ref = typeExpr(gz, scope, type_node);
result_info = (ResultLoc) {
.tag = RL_TY, .data = type_ref, .src_node = 0
};
result_info = (ResultLoc) { .tag = RL_TY,
.data = type_ref,
.src_node = 0,
.ctx = RI_CTX_CONST_INIT };
} else {
result_info = RL_NONE_VAL;
result_info = (ResultLoc) { .tag = RL_NONE,
.data = 0,
.src_node = 0,
.ctx = RI_CTX_CONST_INIT };
}
// Evaluate init expression (AstGen.zig:3251-3252).
@@ -5917,6 +6205,7 @@ static void varDecl(GenZir* gz, Scope* scope, uint32_t node,
init_rl.data = var_ptr;
init_rl.src_node = 0;
}
init_rl.ctx = RI_CTX_CONST_INIT;
uint32_t init_ref = exprRl(gz, scope, init_rl, init_node);
if (ag->has_compile_errors)
@@ -6025,22 +6314,14 @@ static bool addEnsureResult(
uint32_t inst = maybe_unused_result - ZIR_REF_START_INDEX;
ZirInstTag tag = ag->inst_tags[inst];
switch (tag) {
// For call/field_call: set ensure_result_used flag (bit 3).
case ZIR_INST_CALL: {
uint32_t pi = ag->inst_datas[inst].pl_node.payload_index;
ag->extra[pi + 1] |= (1u << 3); // ensure_result_used
elide_check = true;
break;
}
case ZIR_INST_FIELD_CALL: {
uint32_t pi = ag->inst_datas[inst].pl_node.payload_index;
ag->extra[pi + 2] |= (1u << 3); // ensure_result_used
elide_check = true;
break;
}
// For call/field_call/builtin_call: set ensure_result_used flag
// (bit 3 of flags at offset 0). Flags *must* be at offset 0 in all
// three structs (AstGen.zig:2658-2665, Zir.zig:3022).
case ZIR_INST_CALL:
case ZIR_INST_FIELD_CALL:
case ZIR_INST_BUILTIN_CALL: {
uint32_t pi = ag->inst_datas[inst].pl_node.payload_index;
ag->extra[pi + 1] |= (1u << 3); // ensure_result_used
ag->extra[pi] |= (1u << 3); // ensure_result_used
elide_check = true;
break;
}
@@ -7157,6 +7438,7 @@ static void addDbgVar(
static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node,
uint32_t param_block, uint32_t ret_ref, const uint32_t* ret_body,
uint32_t ret_body_len, const uint32_t* body, uint32_t body_len,
const uint32_t* param_insts, uint32_t param_insts_len,
uint32_t lbrace_line, uint32_t lbrace_column, bool is_inferred_error) {
AstGenCtx* ag = gz->astgen;
const Ast* tree = ag->tree;
@@ -7180,7 +7462,8 @@ static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node,
uint32_t ret_ty_packed
= ret_ty_packed_len & 0x7FFFFFFFu; // is_generic=false
uint32_t fixup_body_len = countBodyLenAfterFixups(ag, body, body_len);
uint32_t fixup_body_len = countBodyLenAfterFixupsExtraRefs(
ag, body, body_len, param_insts, param_insts_len);
ensureExtraCapacity(ag, 3 + ret_ty_packed_len + fixup_body_len + 7);
uint32_t payload_index = ag->extra_len;
ag->extra[ag->extra_len++] = ret_ty_packed; // Func.ret_ty
@@ -7195,10 +7478,10 @@ static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node,
ag->extra[ag->extra_len++] = ret_ref;
}
// Body instructions (with ref_table fixups).
for (uint32_t i = 0; i < body_len; i++) {
appendPossiblyRefdBodyInst(ag, body[i]);
}
// Body instructions with extra_refs for param_insts
// (AstGen.zig:12206).
appendBodyWithFixupsExtraRefs(
ag, body, body_len, param_insts, param_insts_len);
// SrcLocs (AstGen.zig:12098-12106).
uint32_t columns = (lbrace_column & 0xFFFFu) | (rbrace_column << 16);
@@ -7329,7 +7612,7 @@ static void testDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
// Create func instruction (AstGen.zig:4874-4897).
uint32_t func_ref = addFunc(&decl_block, node, body_node, decl_inst,
ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE, NULL, 0, fn_body, fn_body_len,
lbrace_line, lbrace_column, false);
NULL, 0, lbrace_line, lbrace_column, false);
// break_inline returning func to declaration (AstGen.zig:4899).
makeBreakInline(&decl_block, decl_inst, func_ref, AST_NODE_OFFSET_NONE);
@@ -7459,6 +7742,9 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
Scope* params_scope = &decl_gz.base;
ScopeLocalVal param_scopes[32];
uint32_t param_scope_count = 0;
// Collect param instruction indices (AstGen.zig:4254, 4360).
uint32_t param_insts[32];
uint32_t param_insts_len = 0;
for (uint32_t param_i = 0; param_i < params_len; param_i++) {
uint32_t param_type_node = param_nodes[param_i];
@@ -7550,6 +7836,9 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
uint32_t param_inst = addParam(
&decl_gz, &param_gz, param_tag, name_tok_for_src, param_name_str);
(void)param_inst_expected;
// Record param instruction index (AstGen.zig:4360).
if (param_insts_len < 32)
param_insts[param_insts_len++] = param_inst;
// Create ScopeLocalVal for this param (AstGen.zig:4349-4359).
if (param_name_str != 0 && param_scope_count < 32) {
@@ -7679,8 +7968,8 @@ static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts,
// Create func instruction (AstGen.zig:4476-4494).
uint32_t func_ref = addFunc(&decl_gz, node, body_node, decl_inst, ret_ref,
ret_body, ret_body_len, fn_body, fn_body_len, lbrace_line,
lbrace_column, is_inferred_error);
ret_body, ret_body_len, fn_body, fn_body_len, param_insts,
param_insts_len, lbrace_line, lbrace_column, is_inferred_error);
// Patch ret_body break_inline to point to func instruction
// (AstGen.zig:12199-12202).
@@ -10073,6 +10362,7 @@ Zir astGen(const Ast* ast) {
free(ag.decl_names);
free(ag.decl_nodes);
free(ag.scratch_instructions);
free(ag.scratch_extra);
free(ag.ref_table_keys);
free(ag.ref_table_vals);
free(ag.nodes_need_rl);

View File

@@ -8,41 +8,6 @@ const c = @cImport({
@cInclude("astgen.h");
});
fn dumpZir(ref_zir: Zir) void {
const tags = ref_zir.instructions.items(.tag);
const datas = ref_zir.instructions.items(.data);
std.debug.print(" instructions: {d}\n", .{ref_zir.instructions.len});
for (0..ref_zir.instructions.len) |i| {
const tag = tags[i];
std.debug.print(" [{d}] tag={d} ({s})", .{ i, @intFromEnum(tag), @tagName(tag) });
switch (tag) {
.extended => {
const ext = datas[i].extended;
std.debug.print(" opcode={d} small=0x{x:0>4} operand={d}", .{ @intFromEnum(ext.opcode), ext.small, ext.operand });
},
.declaration => {
const decl = datas[i].declaration;
std.debug.print(" src_node={d} payload_index={d}", .{ @intFromEnum(decl.src_node), decl.payload_index });
},
.break_inline => {
const brk = datas[i].@"break";
std.debug.print(" operand={d} payload_index={d}", .{ @intFromEnum(brk.operand), brk.payload_index });
},
else => {},
}
std.debug.print("\n", .{});
}
std.debug.print(" extra ({d}):\n", .{ref_zir.extra.len});
for (0..ref_zir.extra.len) |i| {
std.debug.print(" [{d}] = 0x{x:0>8} ({d})\n", .{ i, ref_zir.extra[i], ref_zir.extra[i] });
}
std.debug.print(" string_bytes ({d}):", .{ref_zir.string_bytes.len});
for (0..ref_zir.string_bytes.len) |i| {
std.debug.print(" {x:0>2}", .{ref_zir.string_bytes[i]});
}
std.debug.print("\n", .{});
}
fn refZir(gpa: Allocator, source: [:0]const u8) !Zir {
var tree = try Ast.parse(gpa, source, .zig);
defer tree.deinit(gpa);
@@ -273,16 +238,14 @@ test "astgen: @import" {
}
fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
// Compare instruction count.
const ref_len: u32 = @intCast(ref.instructions.len);
if (ref_len != got.inst_len) {
std.debug.print("inst_len mismatch: ref={d} got={d}\n", .{ ref_len, got.inst_len });
return error.TestExpectedEqual;
}
// Compare instructions (tag + data) field-by-field.
const ref_tags = ref.instructions.items(.tag);
const ref_datas = ref.instructions.items(.data);
// 1. Compare lengths.
try std.testing.expectEqual(ref_len, got.inst_len);
// 2. Compare instruction tags.
for (0..ref_len) |i| {
const ref_tag: u8 = @intFromEnum(ref_tags[i]);
const got_tag: u8 = @intCast(got.inst_tags[i]);
@@ -293,28 +256,13 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
);
return error.TestExpectedEqual;
}
}
// 3. Compare instruction data field-by-field.
for (0..ref_len) |i| {
try expectEqualData(i, ref_tags[i], ref_datas[i], got.inst_datas[i]);
}
// Build hash skip mask for extra comparison.
const skip = try buildHashSkipMask(gpa, ref);
defer gpa.free(skip);
// Compare extra data, skipping hash positions.
const ref_extra_len: u32 = @intCast(ref.extra.len);
try std.testing.expectEqual(ref_extra_len, got.extra_len);
for (0..ref_extra_len) |i| {
if (skip[i]) continue;
if (ref.extra[i] != got.extra[i]) {
std.debug.print(
"extra[{d}] mismatch: ref=0x{x:0>8} got=0x{x:0>8}\n",
.{ i, ref.extra[i], got.extra[i] },
);
return error.TestExpectedEqual;
}
}
// Compare string bytes.
// 4. Compare string bytes.
const ref_sb_len: u32 = @intCast(ref.string_bytes.len);
try std.testing.expectEqual(ref_sb_len, got.string_bytes_len);
for (0..ref_sb_len) |i| {
@@ -326,6 +274,30 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
return error.TestExpectedEqual;
}
}
// 5. Compare extra data (skipping hash positions).
const skip = try buildHashSkipMask(gpa, ref);
defer gpa.free(skip);
const ref_extra_len: u32 = @intCast(ref.extra.len);
try std.testing.expectEqual(ref_extra_len, got.extra_len);
for (0..ref_extra_len) |i| {
if (skip[i]) continue;
if (ref.extra[i] != got.extra[i]) {
// Show first 10 extra diffs.
var count: u32 = 0;
for (0..ref_extra_len) |j| {
if (!skip[j] and ref.extra[j] != got.extra[j]) {
std.debug.print(
"extra[{d}] mismatch: ref={d} got={d}\n",
.{ j, ref.extra[j], got.extra[j] },
);
count += 1;
if (count >= 10) break;
}
}
return error.TestExpectedEqual;
}
}
}
/// Compare a single instruction's data, dispatching by tag.
@@ -341,9 +313,24 @@ fn expectEqualData(
.extended => {
const r = ref.extended;
const g = got.extended;
// Some extended opcodes have undefined/unused small+operand.
const skip_data = switch (r.opcode) {
.dbg_empty_stmt, .astgen_error => true,
else => false,
};
const skip_small = switch (r.opcode) {
.add_with_overflow,
.sub_with_overflow,
.mul_with_overflow,
.shl_with_overflow,
.restore_err_ret_index,
.branch_hint,
=> true,
else => false,
};
if (@intFromEnum(r.opcode) != g.opcode or
r.small != g.small or
r.operand != g.operand)
(!skip_data and !skip_small and r.small != g.small) or
(!skip_data and r.operand != g.operand))
{
std.debug.print(
"inst_datas[{d}] (extended) mismatch:\n" ++
@@ -441,6 +428,7 @@ fn expectEqualData(
.ensure_result_non_error,
.restore_err_ret_index_unconditional,
.validate_struct_init_ty,
.validate_struct_init_result_ty,
.struct_init_empty_result,
.struct_init_empty,
.struct_init_empty_ref_result,
@@ -500,7 +488,6 @@ fn expectEqualData(
.struct_init_ref,
.validate_array_init_ref_ty,
.validate_array_init_ty,
.validate_struct_init_result_ty,
=> {
const r = ref.pl_node;
const g = got.pl_node;
@@ -625,233 +612,49 @@ fn expectEqualData(
}
},
else => {
std.debug.print(
"inst_datas[{d}]: unhandled tag {d} ({s}) in comparison\n",
.{ idx, @intFromEnum(tag), @tagName(tag) },
);
return error.TestUnexpectedResult;
},
}
}
/// Silent ZIR comparison: returns true if ZIR matches, false otherwise.
/// Unlike expectEqualZir, does not print diagnostics or return errors.
fn zirMatches(_: Allocator, ref: Zir, got: c.Zir) bool {
const ref_len: u32 = @intCast(ref.instructions.len);
if (ref_len != got.inst_len) {
std.debug.print(" inst_len: ref={d} got={d}\n", .{ ref_len, got.inst_len });
}
{
// const elen: u32 = @intCast(ref.extra.len);
// const slen: u32 = @intCast(ref.string_bytes.len);
// std.debug.print(" inst_len: ref={d} got={d}\n", .{ ref_len, got.inst_len });
// std.debug.print(" extra_len: ref={d} got={d} diff={d}\n", .{ elen, got.extra_len, @as(i64, elen) - @as(i64, got.extra_len) });
// std.debug.print(" string_bytes_len: ref={d} got={d} diff={d}\n", .{ slen, got.string_bytes_len, @as(i64, slen) - @as(i64, got.string_bytes_len) });
}
const ref_tags = ref.instructions.items(.tag);
const ref_datas = ref.instructions.items(.data);
const min_len = @min(ref_len, got.inst_len);
var first_tag_mismatch: ?u32 = null;
for (0..min_len) |i| {
const ref_tag: u8 = @intFromEnum(ref_tags[i]);
const got_tag: u8 = @intCast(got.inst_tags[i]);
if (ref_tag != got_tag) {
first_tag_mismatch = @intCast(i);
break;
}
}
if (first_tag_mismatch) |ftm| {
const start = if (ftm > 15) ftm - 15 else 0;
const end = @min(ftm + 30, min_len);
std.debug.print(" first tag mismatch at inst[{d}]:\n", .{ftm});
for (start..end) |i| {
const ref_tag: u8 = @intFromEnum(ref_tags[i]);
const got_tag: u8 = @intCast(got.inst_tags[i]);
const marker: u8 = if (i == ftm) '>' else ' ';
if (ref_tag == 251) {
const ext_op: u16 = @intFromEnum(ref_datas[i].extended.opcode);
std.debug.print(" {c} [{d}] ref_tag=251(EXT:{d}) got_tag={d}\n", .{ marker, i, ext_op, got_tag });
} else {
std.debug.print(" {c} [{d}] ref_tag={d} got_tag={d}\n", .{ marker, i, ref_tag, got_tag });
// Generic raw comparison: treat data as two u32 words.
// Tags using .node data format have undefined second word.
const ref_raw = @as([*]const u32, @ptrCast(&ref));
const got_raw = @as([*]const u32, @ptrCast(&got));
// Tags where only the first u32 word is meaningful
// (second word is padding/undefined).
const first_word_only = switch (tag) {
// .node data format (single i32):
.repeat,
.repeat_inline,
.ret_ptr,
.ret_type,
.trap,
.alloc_inferred,
.alloc_inferred_mut,
.alloc_inferred_comptime,
.alloc_inferred_comptime_mut,
// .@"unreachable" data format (src_node + padding):
.@"unreachable",
// .save_err_ret_index data format (operand only):
.save_err_ret_index,
=> true,
else => false,
};
const w1_match = ref_raw[0] == got_raw[0];
const w2_match = first_word_only or ref_raw[1] == got_raw[1];
if (!w1_match or !w2_match) {
std.debug.print(
"inst_datas[{d}] ({s}) raw mismatch:\n" ++
" ref: 0x{x:0>8} 0x{x:0>8}\n" ++
" got: 0x{x:0>8} 0x{x:0>8}\n",
.{
idx,
@tagName(tag),
ref_raw[0],
ref_raw[1],
got_raw[0],
got_raw[1],
},
);
return error.TestExpectedEqual;
}
}
// Tag histogram: count each tag in ref vs got and show diffs.
var ref_hist: [256]i32 = undefined;
var got_hist: [256]i32 = undefined;
for (&ref_hist) |*h| h.* = 0;
for (&got_hist) |*h| h.* = 0;
for (0..ref_len) |j| {
ref_hist[@intFromEnum(ref_tags[j])] += 1;
}
for (0..got.inst_len) |j| {
got_hist[@as(u8, @intCast(got.inst_tags[j]))] += 1;
}
std.debug.print(" tag histogram diff (ref-got):\n", .{});
for (0..256) |t| {
const diff = ref_hist[t] - got_hist[t];
if (diff != 0) {
std.debug.print(" tag {d}: ref={d} got={d} diff={d}\n", .{ t, ref_hist[t], got_hist[t], diff });
}
}
return false;
}
// Skip inst_datas comparison for now (extra indices shift).
// Go straight to extra/string_bytes.
if (ref_len != got.inst_len) return false;
// Compare string_bytes first (smaller diff).
const ref_sb_len2: u32 = @intCast(ref.string_bytes.len);
const sb_min = @min(ref_sb_len2, got.string_bytes_len);
for (0..sb_min) |i| {
if (ref.string_bytes[i] != got.string_bytes[i]) {
// Print surrounding context.
const ctx_start = if (i > 30) i - 30 else 0;
std.debug.print(" string_bytes[{d}] first diff (ref=0x{x:0>2} got=0x{x:0>2})\n", .{ i, ref.string_bytes[i], got.string_bytes[i] });
std.debug.print(" ref context: \"", .{});
for (ctx_start..@min(i + 30, sb_min)) |j| {
const ch = ref.string_bytes[j];
if (ch >= 0x20 and ch < 0x7f) {
std.debug.print("{c}", .{ch});
} else {
std.debug.print("\\x{x:0>2}", .{ch});
}
}
std.debug.print("\"\n", .{});
std.debug.print(" got context: \"", .{});
for (ctx_start..@min(i + 30, sb_min)) |j| {
const ch = got.string_bytes[j];
if (ch >= 0x20 and ch < 0x7f) {
std.debug.print("{c}", .{ch});
} else {
std.debug.print("\\x{x:0>2}", .{ch});
}
}
std.debug.print("\"\n", .{});
return false;
}
}
if (ref_sb_len2 != got.string_bytes_len) {
std.debug.print(" string_bytes_len mismatch: ref={d} got={d} (content matched up to {d})\n", .{ ref_sb_len2, got.string_bytes_len, sb_min });
// Print what ref has at the end.
if (ref_sb_len2 > got.string_bytes_len) {
const extra_start = got.string_bytes_len;
std.debug.print(" ref extra at [{d}]: \"", .{extra_start});
for (extra_start..@min(extra_start + 60, ref_sb_len2)) |j| {
const ch = ref.string_bytes[j];
if (ch >= 0x20 and ch < 0x7f) {
std.debug.print("{c}", .{ch});
} else {
std.debug.print("\\x{x:0>2}", .{ch});
}
}
std.debug.print("\"\n", .{});
}
return false;
}
const ref_extra_len2: u32 = @intCast(ref.extra.len);
if (ref_extra_len2 != got.extra_len) {
std.debug.print(" extra_len mismatch: ref={d} got={d}\n", .{ ref_extra_len2, got.extra_len });
return false;
}
return true;
}
/// Silent data comparison: returns true if fields match, false otherwise.
fn dataMatches(tag: Zir.Inst.Tag, ref: Zir.Inst.Data, got: c.ZirInstData) bool {
switch (tag) {
.extended => {
const r = ref.extended;
const g = got.extended;
return @intFromEnum(r.opcode) == g.opcode and
r.small == g.small and
r.operand == g.operand;
},
.declaration => {
const r = ref.declaration;
const g = got.declaration;
return @intFromEnum(r.src_node) == g.src_node and
r.payload_index == g.payload_index;
},
.break_inline => {
const r = ref.@"break";
const g = got.break_data;
return @intFromEnum(r.operand) == g.operand and
r.payload_index == g.payload_index;
},
.import => {
const r = ref.pl_tok;
const g = got.pl_tok;
return @intFromEnum(r.src_tok) == g.src_tok and
r.payload_index == g.payload_index;
},
.dbg_stmt => {
return ref.dbg_stmt.line == got.dbg_stmt.line and
ref.dbg_stmt.column == got.dbg_stmt.column;
},
.ensure_result_non_error,
.restore_err_ret_index_unconditional,
.validate_struct_init_ty,
.struct_init_empty_result,
.struct_init_empty,
.struct_init_empty_ref_result,
=> {
return @intFromEnum(ref.un_node.src_node) == got.un_node.src_node and
@intFromEnum(ref.un_node.operand) == got.un_node.operand;
},
.ret_implicit => {
return @intFromEnum(ref.un_tok.src_tok) == got.un_tok.src_tok and
@intFromEnum(ref.un_tok.operand) == got.un_tok.operand;
},
.func,
.func_inferred,
.array_type,
.array_type_sentinel,
.array_cat,
.array_init,
.array_init_ref,
.error_set_decl,
.struct_init_field_type,
.struct_init,
.struct_init_ref,
.validate_array_init_ref_ty,
.validate_array_init_ty,
.validate_struct_init_result_ty,
=> {
return @intFromEnum(ref.pl_node.src_node) == got.pl_node.src_node and
ref.pl_node.payload_index == got.pl_node.payload_index;
},
.ptr_type => {
return @as(u8, @bitCast(ref.ptr_type.flags)) == got.ptr_type.flags and
@intFromEnum(ref.ptr_type.size) == got.ptr_type.size and
ref.ptr_type.payload_index == got.ptr_type.payload_index;
},
.int_type => {
return @intFromEnum(ref.int_type.src_node) == got.int_type.src_node and
@intFromEnum(ref.int_type.signedness) == got.int_type.signedness and
ref.int_type.bit_count == got.int_type.bit_count;
},
.decl_val, .decl_ref => {
return @intFromEnum(ref.str_tok.start) == got.str_tok.start and
@intFromEnum(ref.str_tok.src_tok) == got.str_tok.src_tok;
},
.field_val, .field_ptr, .field_val_named, .field_ptr_named => {
return @intFromEnum(ref.pl_node.src_node) == got.pl_node.src_node and
ref.pl_node.payload_index == got.pl_node.payload_index;
},
.int => return ref.int == got.int_val,
.str => {
return @intFromEnum(ref.str.start) == got.str.start and
ref.str.len == got.str.len;
},
.@"defer" => {
return ref.@"defer".index == got.defer_data.index and
ref.@"defer".len == got.defer_data.len;
},
else => return false,
}
}
@@ -863,7 +666,7 @@ const corpus_files = .{
.{ "tokenizer_test.zig", @embedFile("tokenizer_test.zig") },
};
fn corpusCheck(gpa: Allocator, name: []const u8, source: [:0]const u8) !void {
fn corpusCheck(gpa: Allocator, source: [:0]const u8) !void {
var tree = try Ast.parse(gpa, source, .zig);
defer tree.deinit(gpa);
@@ -876,16 +679,11 @@ fn corpusCheck(gpa: Allocator, name: []const u8, source: [:0]const u8) !void {
defer c.zirDeinit(&c_zir);
if (c_zir.has_compile_errors) {
std.debug.print(" {s} -> has_compile_errors\n", .{name});
return error.ZirCompileErrors;
std.debug.print("C port returned compile errors (inst_len={d})\n", .{c_zir.inst_len});
return error.TestUnexpectedResult;
}
if (zirMatches(gpa, ref_zir, c_zir)) {
return;
} else {
std.debug.print(" {s} -> zir mismatch\n", .{name});
return error.ZirMismatch;
}
try expectEqualZir(gpa, ref_zir, c_zir);
}
test "astgen: struct single field" {
@@ -986,25 +784,24 @@ test "astgen: extern var" {
test "astgen: corpus test_all.zig" {
const gpa = std.testing.allocator;
try corpusCheck(gpa, "test_all.zig", @embedFile("test_all.zig"));
try corpusCheck(gpa, @embedFile("test_all.zig"));
}
test "astgen: corpus build.zig" {
if (true) return error.SkipZigTest; // TODO: string_bytes ordering - struct init field name pre-registration
const gpa = std.testing.allocator;
try corpusCheck(gpa, "build.zig", @embedFile("build.zig"));
try corpusCheck(gpa, @embedFile("build.zig"));
}
test "astgen: corpus tokenizer_test.zig" {
if (true) return error.SkipZigTest; // TODO: string_bytes and extra_len diffs
const gpa = std.testing.allocator;
try corpusCheck(gpa, "tokenizer_test.zig", @embedFile("tokenizer_test.zig"));
try corpusCheck(gpa, @embedFile("tokenizer_test.zig"));
}
test "astgen: corpus astgen_test.zig" {
if (true) return error.SkipZigTest; // TODO: extra_len diff=-377, string_bytes diff=-1
const gpa = std.testing.allocator;
try corpusCheck(gpa, "astgen_test.zig", @embedFile("astgen_test.zig"));
try corpusCheck(gpa, @embedFile("astgen_test.zig"));
}
test "astgen: enum decl" {
@@ -1040,7 +837,7 @@ test "astgen: corpus" {
var any_fail = false;
inline for (corpus_files) |entry| {
corpusCheck(gpa, entry[0], entry[1]) catch {
corpusCheck(gpa, entry[1]) catch {
any_fail = true;
};
}