zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit a6e2b3fef2cfa181ae08af69290a09269858311d (tree)
parent 98162fef92f16cebe50e43b16b267193e3697d74
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Thu, 26 Feb 2026 00:25:25 +0000

stage0: fix IP entry ordering for @export and add func_decl/memoized_call support

- Add IP_KEY_FUNC and IP_KEY_MEMOIZED_CALL support to InternPool
  (hash, equality, typeOf cases)
- Change InternPoolKey.func from simple index to struct{owner_nav, ty}
  and memoized_call from simple index to struct{func, result}
- Restructure zirStructDecl with multi-pass approach:
  1. Record ALL declaration names first (so comptime blocks can find
     forward-referenced declarations via DECL_REF)
  2. Pre-create func_type + func_decl + ptr_type + ptr_nav +
     memoized_call IP entries for @export targets before comptime
     block body analysis
  3. Process bodies (comptime blocks and functions)
- Extract parseDeclValueBody helper for declaration parsing
- This fixes the IP entry ordering where func_type/func_decl now
  appear before enum_literal entries, matching the Zig compiler's
  demand-driven resolution order

Closes 3 entries of the IP gap for neghf2.zig (853 remaining).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
Mstage0/intern_pool.c | 20++++++++++++++++++++
Mstage0/intern_pool.h | 10++++++++--
Mstage0/sema.c | 357+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mstage0/verbose_intern_pool.c | 7++++++-
4 files changed, 309 insertions(+), 85 deletions(-)

diff --git a/stage0/intern_pool.c b/stage0/intern_pool.c @@ -83,6 +83,14 @@ static uint32_t ipHashKey(const InternPoolKey* key) { h = ipHashCombine(h, key->data.ptr_nav.ty); h = ipHashCombine(h, key->data.ptr_nav.nav); break; + case IP_KEY_FUNC: + h = ipHashCombine(h, key->data.func_decl.owner_nav); + h = ipHashCombine(h, key->data.func_decl.ty); + break; + case IP_KEY_MEMOIZED_CALL: + h = ipHashCombine(h, key->data.memoized_call.func); + h = ipHashCombine(h, key->data.memoized_call.result); + break; default: /* For other tag types, just use the tag hash. */ break; @@ -143,6 +151,12 @@ static bool ipKeysEqual(const InternPoolKey* a, const InternPoolKey* b) { case IP_KEY_PTR_NAV: return a->data.ptr_nav.ty == b->data.ptr_nav.ty && a->data.ptr_nav.nav == b->data.ptr_nav.nav; + case IP_KEY_FUNC: + return a->data.func_decl.owner_nav == b->data.func_decl.owner_nav + && a->data.func_decl.ty == b->data.func_decl.ty; + case IP_KEY_MEMOIZED_CALL: + return a->data.memoized_call.func == b->data.memoized_call.func + && a->data.memoized_call.result == b->data.memoized_call.result; default: /* Fallback: memcmp the entire data union. */ return memcmp(&a->data, &b->data, sizeof(a->data)) == 0; @@ -792,6 +806,12 @@ InternPoolIndex ipTypeOf(const InternPool* ip, InternPoolIndex index) { case IP_KEY_PTR_NAV: return key.data.ptr_nav.ty; + case IP_KEY_FUNC: + return key.data.func_decl.ty; + + case IP_KEY_MEMOIZED_CALL: + return IP_INDEX_VOID_TYPE; + default: break; } diff --git a/stage0/intern_pool.h b/stage0/intern_pool.h @@ -333,7 +333,10 @@ typedef struct { SimpleValue simple_value; InternPoolIndex variable; InternPoolIndex extern_val; - InternPoolIndex func; + struct { + uint32_t owner_nav; // Nav index + InternPoolIndex ty; // function type IP index + } func_decl; TypedInt int_val; InternPoolIndex err; InternPoolIndex error_union; @@ -349,7 +352,10 @@ typedef struct { InternPoolIndex opt; InternPoolIndex aggregate; InternPoolIndex union_value; - InternPoolIndex memoized_call; + struct { + InternPoolIndex func; // func_decl IP index + InternPoolIndex result; // result value IP index + } memoized_call; struct { InternPoolIndex ty; // pointer type uint32_t nav; // Nav index (NOT an IP index) diff --git a/stage0/sema.c b/stage0/sema.c @@ -4048,7 +4048,8 @@ static AirInstRef zirCall( InternPoolKey fvk; memset(&fvk, 0, sizeof(fvk)); fvk.tag = IP_KEY_FUNC; - fvk.data.func = fti + 0xF80F80; + fvk.data.func_decl.owner_nav = 0xF80F80; + fvk.data.func_decl.ty = fti; InternPoolIndex fvi = ipIntern(sema->ip, fvk); // Emit CALL extra: {args_len=1, loaded}. @@ -5036,7 +5037,8 @@ static AirInstRef zirCall( InternPoolKey fv_key; memset(&fv_key, 0, sizeof(fv_key)); fv_key.tag = IP_KEY_FUNC; - fv_key.data.func = func_type_ip + func_inst; + fv_key.data.func_decl.owner_nav = func_inst; + fv_key.data.func_decl.ty = func_type_ip; InternPoolIndex func_val_ip = ipIntern(sema->ip, fv_key); // Emit CALL extra: {args_len, arg_refs[0..args_len]}. @@ -5196,7 +5198,8 @@ static AirInstRef zirCall( InternPoolKey func_key; memset(&func_key, 0, sizeof(func_key)); func_key.tag = IP_KEY_FUNC; - func_key.data.func = func_inst; // use ZIR inst as unique id + func_key.data.func_decl.owner_nav = func_inst; + func_key.data.func_decl.ty = 0; // use ZIR inst as unique id InternPoolIndex func_ip = ipIntern(sema->ip, func_key); SemaBlockInlining inlining; @@ -6052,10 +6055,220 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { sema, block, inst, sema->cur_decl_name, NULL, 0, NULL, IP_INDEX_NONE); } +// parseDeclValueBody: parse a declaration instruction to find its value body. +// Returns the pointer to the value body instructions and sets *out_len. +// Returns NULL if the declaration has no value body. +static const uint32_t* parseDeclValueBody( + const Sema* sema, uint32_t decl_inst, uint32_t* out_len) { + uint32_t payload + = sema->code.inst_datas[decl_inst].declaration.payload_index; + uint32_t flags_1 = sema->code.extra[payload + 5]; + uint32_t id = (flags_1 >> 27) & 0x1F; + uint32_t di = payload + 6; + if (declIdHasName(id)) + di++; + if (declIdHasLibName(id)) + di++; + uint32_t type_body_len = 0; + if (declIdHasTypeBody(id)) { + type_body_len = sema->code.extra[di]; + di++; + } + uint32_t align_body_len = 0; + uint32_t linksection_body_len = 0; + uint32_t addrspace_body_len = 0; + if (declIdHasSpecialBodies(id)) { + align_body_len = sema->code.extra[di]; + linksection_body_len = sema->code.extra[di + 1]; + addrspace_body_len = sema->code.extra[di + 2]; + di += 3; + } + uint32_t value_body_len = 0; + if (declIdHasValueBody(id)) { + value_body_len = sema->code.extra[di]; + di++; + } + di += type_body_len + align_body_len + linksection_body_len + + addrspace_body_len; + *out_len = value_body_len; + if (value_body_len == 0) + return NULL; + return &sema->code.extra[di]; +} + +// preCreateExportedFuncEntries: scan comptime blocks for @export targets +// and pre-create func_type + func_decl + ptr_type + ptr_nav + memoized_call +// entries for each exported function. This matches the Zig compiler's +// demand-driven resolution where DECL_REF inside a comptime block triggers +// function resolution via analyzeNavRefInner → ensureNavValUpToDate BEFORE +// any cross-module field evaluation creates enum_literal entries. +static void preCreateExportedFuncEntries( + Sema* sema, const uint32_t* decl_list, uint32_t decls_len) { + for (uint32_t d = 0; d < decls_len; d++) { + uint32_t decl_inst = decl_list[d]; + uint32_t payload + = sema->code.inst_datas[decl_inst].declaration.payload_index; + uint32_t flags_1 = sema->code.extra[payload + 5]; + uint32_t id = (flags_1 >> 27) & 0x1F; + + // Only scan comptime blocks (id == 3). + if (id != 3) + continue; + + uint32_t vb_len = 0; + const uint32_t* vb = parseDeclValueBody(sema, decl_inst, &vb_len); + if (!vb) + continue; + + // Scan for EXPORT instructions in the comptime body. + for (uint32_t vi = 0; vi < vb_len; vi++) { + uint32_t vinst = vb[vi]; + if (sema->code.inst_tags[vinst] != ZIR_INST_EXPORT) + continue; + + uint32_t export_pl + = sema->code.inst_datas[vinst].pl_node.payload_index; + uint32_t exported_ref = sema->code.extra[export_pl]; + if (exported_ref < ZIR_REF_START_INDEX) + continue; + + uint32_t exported_inst = exported_ref - ZIR_REF_START_INDEX; + ZirInstTag etag = sema->code.inst_tags[exported_inst]; + if (etag != ZIR_INST_DECL_REF && etag != ZIR_INST_DECL_VAL) + continue; + + uint32_t name_idx + = sema->code.inst_datas[exported_inst].str_tok.start; + + // Record exported name for zirFunc. + if (sema->num_exported_decl_names < MAX_EXPORTED_DECL_NAMES) + s_exported_decl_names[sema->num_exported_decl_names++] + = name_idx; + + // Find the function declaration by name. + for (uint32_t fd = 0; fd < sema->num_decls; fd++) { + if (sema->decl_names[fd] != name_idx) + continue; + + uint32_t fd_inst = sema->decl_insts[fd]; + + // Parse the declaration's value body. + uint32_t fd_vb_len = 0; + const uint32_t* fd_vb + = parseDeclValueBody(sema, fd_inst, &fd_vb_len); + if (!fd_vb) + break; + + // Find ZIR_INST_FUNC / FUNC_FANCY in value body. + uint32_t func_zinst = 0; + for (uint32_t fvi = 0; fvi < fd_vb_len; fvi++) { + ZirInstTag ftag = sema->code.inst_tags[fd_vb[fvi]]; + if (ftag == ZIR_INST_FUNC || ftag == ZIR_INST_FUNC_FANCY) { + func_zinst = fd_vb[fvi]; + break; + } + } + if (func_zinst == 0) + break; + + // Parse function type from ZIR. + FuncZirInfo fi = parseFuncZir(sema, func_zinst); + + InternPoolIndex ret_ty = IP_INDEX_VOID_TYPE; + if (fi.ret_ty_body_len == 1) { + uint32_t ret_ref = sema->code.extra[fi.ret_ty_ref_pos]; + if (ret_ref < ZIR_REF_START_INDEX) + ret_ty = ret_ref; + } + + uint32_t param_count = 0; + const uint32_t* fbody = &sema->code.extra[fi.extra_index]; + for (uint32_t bi = 0; bi < fi.body_len; bi++) { + ZirInstTag btag = sema->code.inst_tags[fbody[bi]]; + if (btag == ZIR_INST_PARAM + || btag == ZIR_INST_PARAM_COMPTIME + || btag == ZIR_INST_PARAM_ANYTYPE + || btag == ZIR_INST_PARAM_ANYTYPE_COMPTIME) { + param_count++; + } else { + break; + } + } + + uint8_t cc = 0; + if (fi.is_fancy && fi.has_cc_body) + cc = 1; + + // Create func_type IP entry. + InternPoolKey ftype_key; + memset(&ftype_key, 0, sizeof(ftype_key)); + ftype_key.tag = IP_KEY_FUNC_TYPE; + ftype_key.data.func_type.return_type = ret_ty; + ftype_key.data.func_type.param_count = param_count; + ftype_key.data.func_type.cc = cc; + InternPoolIndex ftype_ip = ipIntern(sema->ip, ftype_key); + + // Find Nav for this function declaration. + uint32_t func_nav = UINT32_MAX; + uint32_t total_navs = ipNavCount(); + for (uint32_t n = 0; n < total_navs; n++) { + const Nav* nav = ipGetNav(n); + if (nav->zir_index == fd_inst) { + func_nav = n; + break; + } + } + if (func_nav == UINT32_MAX) + break; + + // Create func_decl IP entry. + InternPoolKey fdk; + memset(&fdk, 0, sizeof(fdk)); + fdk.tag = IP_KEY_FUNC; + fdk.data.func_decl.owner_nav = func_nav; + fdk.data.func_decl.ty = ftype_ip; + InternPoolIndex fdecl_ip = ipIntern(sema->ip, fdk); + + // Create ptr_type (*const func_type). + InternPoolKey ptk; + memset(&ptk, 0, sizeof(ptk)); + ptk.tag = IP_KEY_PTR_TYPE; + ptk.data.ptr_type.child = ftype_ip; + ptk.data.ptr_type.sentinel = IP_INDEX_NONE; + ptk.data.ptr_type.flags + = PTR_FLAGS_SIZE_ONE | PTR_FLAGS_IS_CONST; + ptk.data.ptr_type.packed_offset = 0; + InternPoolIndex ptype_ip = ipIntern(sema->ip, ptk); + + // Create ptr_nav. + (void)internNavPtr(ptype_ip, func_nav); + + // Create memoized_call (comptime @export result). + InternPoolKey mck; + memset(&mck, 0, sizeof(mck)); + mck.tag = IP_KEY_MEMOIZED_CALL; + mck.data.memoized_call.func = fdecl_ip; + mck.data.memoized_call.result = IP_INDEX_VOID_VALUE; + (void)ipIntern(sema->ip, mck); + + break; + } + } + } +} + // zirStructDecl: process struct_decl extended instruction. // Iterates over declarations and analyzes their value bodies. // Ported from src/Sema.zig zirStructDecl (subset) and // lib/std/zig/Zir.zig declIterator / getDeclaration. +// +// Uses a multi-pass approach to match the Zig compiler's entry ordering: +// Pass 1: Record ALL declaration names (so comptime blocks can find +// forward-referenced declarations via DECL_REF). +// Pass 1.5: Pre-create func_type + func_decl + ptr_type + ptr_nav + +// memoized_call IP entries for @export targets (before any +// comptime block body analysis creates enum_literal entries). +// Pass 2: Process comptime and function bodies. static void zirStructDecl(Sema* sema, SemaBlock* block, uint32_t inst) { uint16_t small = sema->code.inst_datas[inst].extended.small; uint32_t operand = sema->code.inst_datas[inst].extended.operand; @@ -6097,111 +6310,91 @@ static void zirStructDecl(Sema* sema, SemaBlock* block, uint32_t inst) { } // extra_index now points to the declaration instruction list. + const uint32_t* decl_list = &sema->code.extra[extra_index]; + + // === Pass 1: Record ALL declaration names === + // Must complete before any body analysis so that DECL_REF inside + // comptime blocks can find forward-referenced declarations. for (uint32_t d = 0; d < decls_len; d++) { - uint32_t decl_inst = sema->code.extra[extra_index + d]; + uint32_t decl_inst = decl_list[d]; assert(sema->code.inst_tags[decl_inst] == ZIR_INST_DECLARATION); uint32_t payload = sema->code.inst_datas[decl_inst].declaration.payload_index; - - // Declaration has 6 u32 fields (src_hash×4, flags_0, flags_1). - // The Id is in bits 59-63 of the packed u64 flags, i.e. - // bits 27-31 of flags_1. uint32_t flags_1 = sema->code.extra[payload + 5]; uint32_t id = (flags_1 >> 27) & 0x1F; - uint32_t di = payload + 6; - - // Extract declaration name for use by zirFunc. uint32_t decl_name = 0; - if (declIdHasName(id)) { - decl_name = sema->code.extra[di]; - di++; - } - if (declIdHasLibName(id)) - di++; - - uint32_t type_body_len = 0; - if (declIdHasTypeBody(id)) { - type_body_len = sema->code.extra[di]; - di++; - } - - uint32_t align_body_len = 0; - uint32_t linksection_body_len = 0; - uint32_t addrspace_body_len = 0; - if (declIdHasSpecialBodies(id)) { - align_body_len = sema->code.extra[di]; - linksection_body_len = sema->code.extra[di + 1]; - addrspace_body_len = sema->code.extra[di + 2]; - di += 3; - } - - uint32_t value_body_len = 0; - if (declIdHasValueBody(id)) { - value_body_len = sema->code.extra[di]; - di++; - } - - // Skip type, align, linksection, addrspace bodies. - di += type_body_len; - di += align_body_len; - di += linksection_body_len; - di += addrspace_body_len; + if (declIdHasName(id)) + decl_name = sema->code.extra[payload + 6]; - // Record declaration name→inst mapping for decl_val/decl_ref. if (decl_name != 0 && sema->num_decls < 64) { sema->decl_names[sema->num_decls] = decl_name; sema->decl_insts[sema->num_decls] = decl_inst; sema->num_decls++; } + } + + // === Pass 1.5: Pre-create exported function IP entries === + preCreateExportedFuncEntries(sema, decl_list, decls_len); + + // === Pass 2: Process comptime and function bodies === + for (uint32_t d = 0; d < decls_len; d++) { + uint32_t decl_inst = decl_list[d]; + uint32_t payload + = sema->code.inst_datas[decl_inst].declaration.payload_index; + uint32_t flags_1 = sema->code.extra[payload + 5]; + uint32_t id = (flags_1 >> 27) & 0x1F; + + uint32_t decl_name = 0; + if (declIdHasName(id)) + decl_name = sema->code.extra[payload + 6]; + + uint32_t vb_len = 0; + const uint32_t* vb = parseDeclValueBody(sema, decl_inst, &vb_len); + if (!vb) + continue; - // Analyze value body if present. // The upstream Zig sema is lazy: it only evaluates const // declarations when first accessed. We must only eagerly // evaluate comptime declarations (id == 3) and function // declarations (detected by ZIR_INST_FUNC / FUNC_FANCY). // All other const/var declarations are skipped here. bool is_comptime_decl = (id == 3); - if (value_body_len > 0) { - const uint32_t* value_body = &sema->code.extra[di]; - bool is_func = false; - for (uint32_t vi = 0; vi < value_body_len; vi++) { - ZirInstTag vtag = sema->code.inst_tags[value_body[vi]]; - if (vtag == ZIR_INST_FUNC || vtag == ZIR_INST_FUNC_FANCY) { - is_func = true; - break; - } + bool is_func = false; + for (uint32_t vi = 0; vi < vb_len; vi++) { + ZirInstTag vtag = sema->code.inst_tags[vb[vi]]; + if (vtag == ZIR_INST_FUNC || vtag == ZIR_INST_FUNC_FANCY) { + is_func = true; + break; } + } - if (is_func || is_comptime_decl) { - // Set declaration context so zirFunc can read - // name/linkage. - uint32_t old_decl_name = sema->cur_decl_name; - bool old_decl_is_export = sema->cur_decl_is_export; - sema->cur_decl_name = decl_name; - sema->cur_decl_is_export = declIdIsExport(id); - - // Reset compile errors before each declaration so a - // failure in one comptime decl doesn't cascade. - sema->has_compile_errors = false; - - if (is_func) { - (void)analyzeBodyInner( - sema, block, value_body, value_body_len); - } else { - SemaBlock decl_block; - semaBlockInit(&decl_block, sema, block); - decl_block.is_comptime = true; - (void)analyzeBodyInner( - sema, &decl_block, value_body, value_body_len); - semaBlockDeinit(&decl_block); - } + if (!is_func && !is_comptime_decl) + continue; - sema->cur_decl_name = old_decl_name; - sema->cur_decl_is_export = old_decl_is_export; - } + // Set declaration context so zirFunc can read name/linkage. + uint32_t old_decl_name = sema->cur_decl_name; + bool old_decl_is_export = sema->cur_decl_is_export; + sema->cur_decl_name = decl_name; + sema->cur_decl_is_export = declIdIsExport(id); + + // Reset compile errors before each declaration so a failure + // in one comptime decl doesn't cascade. + sema->has_compile_errors = false; + + if (is_func) { + (void)analyzeBodyInner(sema, block, vb, vb_len); + } else { + SemaBlock decl_block; + semaBlockInit(&decl_block, sema, block); + decl_block.is_comptime = true; + (void)analyzeBodyInner(sema, &decl_block, vb, vb_len); + semaBlockDeinit(&decl_block); } + + sema->cur_decl_name = old_decl_name; + sema->cur_decl_is_export = old_decl_is_export; } } diff --git a/stage0/verbose_intern_pool.c b/stage0/verbose_intern_pool.c @@ -111,7 +111,8 @@ void verboseIpPrint(FILE* out, const InternPool* ip) { fprintf(out, " neg"); break; case IP_KEY_FUNC: - fprintf(out, " idx=%u", key.data.func); + fprintf(out, " nav=%u ty=%u", key.data.func_decl.owner_nav, + key.data.func_decl.ty); break; case IP_KEY_ENUM_LITERAL: fprintf(out, " str_idx=%u", key.data.enum_literal); @@ -120,6 +121,10 @@ void verboseIpPrint(FILE* out, const InternPool* ip) { fprintf(out, " ty=%u val=%g", key.data.float_val.ty, key.data.float_val.val); break; + case IP_KEY_MEMOIZED_CALL: + fprintf(out, " func=%u result=%u", key.data.memoized_call.func, + key.data.memoized_call.result); + break; case IP_KEY_PTR_NAV: fprintf(out, " ty=%u nav=%u", key.data.ptr_nav.ty, key.data.ptr_nav.nav);