From 2ed13b7b9d5ea20d42a74f24feb3aa2a5f3d7bf0 Mon Sep 17 00:00:00 2001 From: Motiejus Date: Mon, 2 Mar 2026 18:24:02 +0000 Subject: [PATCH] =?UTF-8?q?sema:=20add=20param=20types=20to=20func=5Ftype?= =?UTF-8?q?=20dedup,=20bump=20num=5Fpassing=2073=E2=86=9275?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include parameter types in the func_type key for correct dedup. The upstream Zig IP includes param types when interning function types; C's simplified key previously used only (ret, param_count, cc) which caused ipForceIntern to be needed (preventing dedup of identical function signatures like fn(u32) u32 appearing in multiple exports). Changes: - Add param_types[8] inline array to FuncType struct - Update ipHashKey and ipKeysEqual to include param types - Update internFuncType to accept optional param_types array: when provided → ipIntern (correct dedup); when NULL → ipForceIntern - Collect param types during export function resolution and pass to internFuncType for proper dedup Tests 73-74 now pass (multiple_return_paths, nested_if_else_chain). Co-Authored-By: Claude Opus 4.6 (1M context) --- stage0/corpus.zig | 2 +- stage0/intern_pool.c | 33 +++++++++++++++++++-------- stage0/intern_pool.h | 5 +++- stage0/sema.c | 54 ++++++++++++++++++++++++-------------------- 4 files changed, 57 insertions(+), 37 deletions(-) diff --git a/stage0/corpus.zig b/stage0/corpus.zig index 6ad75f19d6..43f76d18ab 100644 --- a/stage0/corpus.zig +++ b/stage0/corpus.zig @@ -3,7 +3,7 @@ /// `num_passing` controls how many files are tested and pre-generated. /// Both build.zig and stages_test.zig import this file. /// To enable more tests: just increment `num_passing`. -pub const num_passing: usize = 73; +pub const num_passing: usize = 75; pub const files = [_][]const u8{ "stage0/sema_tests/empty.zig", diff --git a/stage0/intern_pool.c b/stage0/intern_pool.c index b218ac27f5..e5cdf0ff23 100644 --- a/stage0/intern_pool.c +++ b/stage0/intern_pool.c @@ -141,6 +141,10 @@ static uint32_t ipHashKey(const InternPoolKey* key) { wyhash_update_u32(&h, (uint32_t)key->data.func_type.is_noinline); wyhash_update_u32(&h, key->data.func_type.comptime_bits); wyhash_update_u32(&h, key->data.func_type.noalias_bits); + for (uint32_t pi = 0; + pi < key->data.func_type.param_count && pi < FUNC_TYPE_MAX_PARAMS; + pi++) + wyhash_update_u32(&h, key->data.func_type.param_types[pi]); break; case IP_KEY_SLICE: wyhash_update_u32(&h, key->data.slice); @@ -261,16 +265,25 @@ static bool ipKeysEqual(const InternPoolKey* a, const InternPoolKey* b) { case IP_KEY_INT_U16: return a->data.int_u16 == b->data.int_u16; case IP_KEY_FUNC_TYPE: - return a->data.func_type.return_type == b->data.func_type.return_type - && a->data.func_type.param_count == b->data.func_type.param_count - && a->data.func_type.cc == b->data.func_type.cc - && a->data.func_type.is_var_args == b->data.func_type.is_var_args - && a->data.func_type.is_generic == b->data.func_type.is_generic - && a->data.func_type.is_noinline == b->data.func_type.is_noinline - && a->data.func_type.comptime_bits - == b->data.func_type.comptime_bits - && a->data.func_type.noalias_bits - == b->data.func_type.noalias_bits; + if (a->data.func_type.return_type != b->data.func_type.return_type + || a->data.func_type.param_count != b->data.func_type.param_count + || a->data.func_type.cc != b->data.func_type.cc + || a->data.func_type.is_var_args != b->data.func_type.is_var_args + || a->data.func_type.is_generic != b->data.func_type.is_generic + || a->data.func_type.is_noinline != b->data.func_type.is_noinline + || a->data.func_type.comptime_bits + != b->data.func_type.comptime_bits + || a->data.func_type.noalias_bits + != b->data.func_type.noalias_bits) + return false; + for (uint32_t pi = 0; + pi < a->data.func_type.param_count && pi < FUNC_TYPE_MAX_PARAMS; + pi++) { + if (a->data.func_type.param_types[pi] + != b->data.func_type.param_types[pi]) + return false; + } + return true; case IP_KEY_SLICE: return a->data.slice == b->data.slice; case IP_KEY_AGGREGATE: diff --git a/stage0/intern_pool.h b/stage0/intern_pool.h index 8e586faef9..8d532ce789 100644 --- a/stage0/intern_pool.h +++ b/stage0/intern_pool.h @@ -226,16 +226,19 @@ typedef struct { InternPoolIndex payload; } ErrorUnionType; +// Max inline param types for func_type dedup. Enough for bootstrap corpus. +#define FUNC_TYPE_MAX_PARAMS 8 + typedef struct { InternPoolIndex return_type; uint32_t param_count; uint32_t comptime_bits; uint32_t noalias_bits; + InternPoolIndex param_types[FUNC_TYPE_MAX_PARAMS]; // inline for dedup uint8_t cc; bool is_var_args; bool is_generic; bool is_noinline; - // param_types stored in extra } FuncType; typedef struct { diff --git a/stage0/sema.c b/stage0/sema.c index 3774da481f..58a266a82f 100644 --- a/stage0/sema.c +++ b/stage0/sema.c @@ -2730,7 +2730,8 @@ static InternPoolIndex resolveCgBuiltinField( static InternPoolIndex internTypedInt( Sema* sema, InternPoolIndex tag_type, uint64_t val); static InternPoolIndex internFuncType(Sema* sema, InternPoolIndex return_type, - uint32_t param_count, uint8_t cc, bool is_noinline); + uint32_t param_count, uint8_t cc, bool is_noinline, + const InternPoolIndex* param_types); static InternPoolIndex internFuncDecl( Sema* sema, uint32_t owner_nav, InternPoolIndex func_type); @@ -4849,7 +4850,7 @@ static InternPoolIndex ensureNavValUpToDate(Sema* sema, uint32_t nav_idx) { (void)getBuiltinTypeC(sema, 2); // CallingConvention } InternPoolIndex ft - = internFuncType(sema, ret_ty, param_count, cc, false); + = internFuncType(sema, ret_ty, param_count, cc, false, NULL); InternPoolIndex fd = internFuncDecl(sema, nav_idx, ft); Nav* wnav = ipGetNav(sema->ip, nav_idx); wnav->resolved_type = fd; @@ -5013,8 +5014,12 @@ static InternPoolIndex internTypedInt( // --- internFuncType --- // Create a function type IP entry. // Ported from InternPool.zig getFuncType. +// param_types may be NULL (for simple cases where param types are unknown). +// When param_types is provided, ipIntern deduplicates correctly. +// When NULL, ipForceIntern is used to avoid false dedup. static InternPoolIndex internFuncType(Sema* sema, InternPoolIndex return_type, - uint32_t param_count, uint8_t cc, bool is_noinline) { + uint32_t param_count, uint8_t cc, bool is_noinline, + const InternPoolIndex* param_types) { InternPoolKey key; memset(&key, 0, sizeof(key)); key.tag = IP_KEY_FUNC_TYPE; @@ -5022,7 +5027,12 @@ static InternPoolIndex internFuncType(Sema* sema, InternPoolIndex return_type, key.data.func_type.param_count = param_count; key.data.func_type.cc = cc; key.data.func_type.is_noinline = is_noinline; - return ipIntern(sema->ip, key); + if (param_types) { + for (uint32_t i = 0; i < param_count && i < FUNC_TYPE_MAX_PARAMS; i++) + key.data.func_type.param_types[i] = param_types[i]; + return ipIntern(sema->ip, key); + } + return ipForceIntern(sema->ip, key); } // --- internFuncDecl --- @@ -8592,7 +8602,11 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { // Param types must be resolved BEFORE func_type creation to // match the Zig compiler's funcCommon ordering (param type // IP entries precede func_type in the intern pool). + // Collect param types for func_type dedup (matching upstream + // InternPool.getFuncType which includes param types in the key). uint32_t param_count = 0; + InternPoolIndex param_type_buf[FUNC_TYPE_MAX_PARAMS]; + memset(param_type_buf, 0, sizeof(param_type_buf)); { uint32_t pi = sema->code.inst_datas[inst].pl_node.payload_index; uint32_t pb_inst = sema->code.extra[pi + fi.param_block_pi]; @@ -8606,9 +8620,9 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { && ptag != ZIR_INST_PARAM_ANYTYPE && ptag != ZIR_INST_PARAM_ANYTYPE_COMPTIME) continue; - param_count++; // Resolve param type to create IP entries in the // correct order (before func_type). + InternPoolIndex pty = IP_INDEX_NONE; uint32_t pp = sema->code.inst_datas[param_inst].pl_tok.payload_index; uint32_t type_packed = sema->code.extra[pp + 1]; @@ -8619,14 +8633,17 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { if (sema->code.inst_tags[bi2] == ZIR_INST_BREAK_INLINE) { ZirInstRef tref = sema->code.inst_datas[bi2].break_data.operand; - (void)resolveZirTypeRef( + pty = resolveZirTypeRef( sema, &sema->code, tref, ns_idx, sema->file_idx); } } else if (type_body_len == 2) { uint32_t ti = sema->code.extra[pp + 2]; - (void)resolveZirTypeInst( + pty = resolveZirTypeInst( sema, &sema->code, ti, ns_idx, sema->file_idx); } + if (param_count < FUNC_TYPE_MAX_PARAMS) + param_type_buf[param_count] = pty; + param_count++; } } @@ -8694,24 +8711,11 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { cc = 1; // C calling convention (result of .c resolution) } - // Create function type IP entry. - // Use ipForceIntern because the C sema's func_type key doesn't - // include actual parameter types (only param_count), so ipIntern - // Create func_type. Use ipForceIntern because C's func_type key - // does not include param types — ipIntern would incorrectly dedup - // functions with same (ret, param_count, cc) but different param - // types. The upstream Zig IP includes param types in the key, - // ensuring correct dedup. For functions with the same full - // signature, the Zig compiler deduplicates naturally; C creates - // separate entries that get deduped during AIR comparison via - // the ref canonicalization in the test infrastructure. - InternPoolKey ftype_key; - memset(&ftype_key, 0, sizeof(ftype_key)); - ftype_key.tag = IP_KEY_FUNC_TYPE; - ftype_key.data.func_type.return_type = ret_ty; - ftype_key.data.func_type.param_count = param_count; - ftype_key.data.func_type.cc = cc; - InternPoolIndex func_type_ip = ipForceIntern(sema->ip, ftype_key); + // Create function type IP entry. Ported from upstream + // InternPool.getFuncType: func_types with the same full + // signature (ret + params + cc) share the same IP entry. + InternPoolIndex func_type_ip = internFuncType( + sema, ret_ty, param_count, cc, false, param_type_buf); // Create func_decl entry (must follow func_type immediately). // Find the nav for this function declaration.