commit 2ed13b7b9d5ea20d42a74f24feb3aa2a5f3d7bf0 (tree)
parent 412316e7088bba326040f25e29ea819bc3062273
Author: Motiejus <motiejus@jakstys.lt>
Date: Mon, 2 Mar 2026 18:24:02 +0000
sema: add param types to func_type dedup, bump num_passing 73→75
Include parameter types in the func_type key for correct dedup. The
upstream Zig IP includes param types when interning function types;
C's simplified key previously used only (ret, param_count, cc) which
caused ipForceIntern to be needed (preventing dedup of identical
function signatures like fn(u32) u32 appearing in multiple exports).
Changes:
- Add param_types[8] inline array to FuncType struct
- Update ipHashKey and ipKeysEqual to include param types
- Update internFuncType to accept optional param_types array:
when provided → ipIntern (correct dedup); when NULL → ipForceIntern
- Collect param types during export function resolution and pass to
internFuncType for proper dedup
Tests 73-74 now pass (multiple_return_paths, nested_if_else_chain).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
4 files changed, 57 insertions(+), 37 deletions(-)
diff --git a/stage0/corpus.zig b/stage0/corpus.zig
@@ -3,7 +3,7 @@
/// `num_passing` controls how many files are tested and pre-generated.
/// Both build.zig and stages_test.zig import this file.
/// To enable more tests: just increment `num_passing`.
-pub const num_passing: usize = 73;
+pub const num_passing: usize = 75;
pub const files = [_][]const u8{
"stage0/sema_tests/empty.zig",
diff --git a/stage0/intern_pool.c b/stage0/intern_pool.c
@@ -141,6 +141,10 @@ static uint32_t ipHashKey(const InternPoolKey* key) {
wyhash_update_u32(&h, (uint32_t)key->data.func_type.is_noinline);
wyhash_update_u32(&h, key->data.func_type.comptime_bits);
wyhash_update_u32(&h, key->data.func_type.noalias_bits);
+ for (uint32_t pi = 0;
+ pi < key->data.func_type.param_count && pi < FUNC_TYPE_MAX_PARAMS;
+ pi++)
+ wyhash_update_u32(&h, key->data.func_type.param_types[pi]);
break;
case IP_KEY_SLICE:
wyhash_update_u32(&h, key->data.slice);
@@ -261,16 +265,25 @@ static bool ipKeysEqual(const InternPoolKey* a, const InternPoolKey* b) {
case IP_KEY_INT_U16:
return a->data.int_u16 == b->data.int_u16;
case IP_KEY_FUNC_TYPE:
- return a->data.func_type.return_type == b->data.func_type.return_type
- && a->data.func_type.param_count == b->data.func_type.param_count
- && a->data.func_type.cc == b->data.func_type.cc
- && a->data.func_type.is_var_args == b->data.func_type.is_var_args
- && a->data.func_type.is_generic == b->data.func_type.is_generic
- && a->data.func_type.is_noinline == b->data.func_type.is_noinline
- && a->data.func_type.comptime_bits
- == b->data.func_type.comptime_bits
- && a->data.func_type.noalias_bits
- == b->data.func_type.noalias_bits;
+ if (a->data.func_type.return_type != b->data.func_type.return_type
+ || a->data.func_type.param_count != b->data.func_type.param_count
+ || a->data.func_type.cc != b->data.func_type.cc
+ || a->data.func_type.is_var_args != b->data.func_type.is_var_args
+ || a->data.func_type.is_generic != b->data.func_type.is_generic
+ || a->data.func_type.is_noinline != b->data.func_type.is_noinline
+ || a->data.func_type.comptime_bits
+ != b->data.func_type.comptime_bits
+ || a->data.func_type.noalias_bits
+ != b->data.func_type.noalias_bits)
+ return false;
+ for (uint32_t pi = 0;
+ pi < a->data.func_type.param_count && pi < FUNC_TYPE_MAX_PARAMS;
+ pi++) {
+ if (a->data.func_type.param_types[pi]
+ != b->data.func_type.param_types[pi])
+ return false;
+ }
+ return true;
case IP_KEY_SLICE:
return a->data.slice == b->data.slice;
case IP_KEY_AGGREGATE:
diff --git a/stage0/intern_pool.h b/stage0/intern_pool.h
@@ -226,16 +226,19 @@ typedef struct {
InternPoolIndex payload;
} ErrorUnionType;
+// Max inline param types for func_type dedup. Enough for bootstrap corpus.
+#define FUNC_TYPE_MAX_PARAMS 8
+
typedef struct {
InternPoolIndex return_type;
uint32_t param_count;
uint32_t comptime_bits;
uint32_t noalias_bits;
+ InternPoolIndex param_types[FUNC_TYPE_MAX_PARAMS]; // inline for dedup
uint8_t cc;
bool is_var_args;
bool is_generic;
bool is_noinline;
- // param_types stored in extra
} FuncType;
typedef struct {
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -2730,7 +2730,8 @@ static InternPoolIndex resolveCgBuiltinField(
static InternPoolIndex internTypedInt(
Sema* sema, InternPoolIndex tag_type, uint64_t val);
static InternPoolIndex internFuncType(Sema* sema, InternPoolIndex return_type,
- uint32_t param_count, uint8_t cc, bool is_noinline);
+ uint32_t param_count, uint8_t cc, bool is_noinline,
+ const InternPoolIndex* param_types);
static InternPoolIndex internFuncDecl(
Sema* sema, uint32_t owner_nav, InternPoolIndex func_type);
@@ -4849,7 +4850,7 @@ static InternPoolIndex ensureNavValUpToDate(Sema* sema, uint32_t nav_idx) {
(void)getBuiltinTypeC(sema, 2); // CallingConvention
}
InternPoolIndex ft
- = internFuncType(sema, ret_ty, param_count, cc, false);
+ = internFuncType(sema, ret_ty, param_count, cc, false, NULL);
InternPoolIndex fd = internFuncDecl(sema, nav_idx, ft);
Nav* wnav = ipGetNav(sema->ip, nav_idx);
wnav->resolved_type = fd;
@@ -5013,8 +5014,12 @@ static InternPoolIndex internTypedInt(
// --- internFuncType ---
// Create a function type IP entry.
// Ported from InternPool.zig getFuncType.
+// param_types may be NULL (for simple cases where param types are unknown).
+// When param_types is provided, ipIntern deduplicates correctly.
+// When NULL, ipForceIntern is used to avoid false dedup.
static InternPoolIndex internFuncType(Sema* sema, InternPoolIndex return_type,
- uint32_t param_count, uint8_t cc, bool is_noinline) {
+ uint32_t param_count, uint8_t cc, bool is_noinline,
+ const InternPoolIndex* param_types) {
InternPoolKey key;
memset(&key, 0, sizeof(key));
key.tag = IP_KEY_FUNC_TYPE;
@@ -5022,7 +5027,12 @@ static InternPoolIndex internFuncType(Sema* sema, InternPoolIndex return_type,
key.data.func_type.param_count = param_count;
key.data.func_type.cc = cc;
key.data.func_type.is_noinline = is_noinline;
- return ipIntern(sema->ip, key);
+ if (param_types) {
+ for (uint32_t i = 0; i < param_count && i < FUNC_TYPE_MAX_PARAMS; i++)
+ key.data.func_type.param_types[i] = param_types[i];
+ return ipIntern(sema->ip, key);
+ }
+ return ipForceIntern(sema->ip, key);
}
// --- internFuncDecl ---
@@ -8592,7 +8602,11 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
// Param types must be resolved BEFORE func_type creation to
// match the Zig compiler's funcCommon ordering (param type
// IP entries precede func_type in the intern pool).
+ // Collect param types for func_type dedup (matching upstream
+ // InternPool.getFuncType which includes param types in the key).
uint32_t param_count = 0;
+ InternPoolIndex param_type_buf[FUNC_TYPE_MAX_PARAMS];
+ memset(param_type_buf, 0, sizeof(param_type_buf));
{
uint32_t pi = sema->code.inst_datas[inst].pl_node.payload_index;
uint32_t pb_inst = sema->code.extra[pi + fi.param_block_pi];
@@ -8606,9 +8620,9 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
&& ptag != ZIR_INST_PARAM_ANYTYPE
&& ptag != ZIR_INST_PARAM_ANYTYPE_COMPTIME)
continue;
- param_count++;
// Resolve param type to create IP entries in the
// correct order (before func_type).
+ InternPoolIndex pty = IP_INDEX_NONE;
uint32_t pp
= sema->code.inst_datas[param_inst].pl_tok.payload_index;
uint32_t type_packed = sema->code.extra[pp + 1];
@@ -8619,14 +8633,17 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
if (sema->code.inst_tags[bi2] == ZIR_INST_BREAK_INLINE) {
ZirInstRef tref
= sema->code.inst_datas[bi2].break_data.operand;
- (void)resolveZirTypeRef(
+ pty = resolveZirTypeRef(
sema, &sema->code, tref, ns_idx, sema->file_idx);
}
} else if (type_body_len == 2) {
uint32_t ti = sema->code.extra[pp + 2];
- (void)resolveZirTypeInst(
+ pty = resolveZirTypeInst(
sema, &sema->code, ti, ns_idx, sema->file_idx);
}
+ if (param_count < FUNC_TYPE_MAX_PARAMS)
+ param_type_buf[param_count] = pty;
+ param_count++;
}
}
@@ -8694,24 +8711,11 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
cc = 1; // C calling convention (result of .c resolution)
}
- // Create function type IP entry.
- // Use ipForceIntern because the C sema's func_type key doesn't
- // include actual parameter types (only param_count), so ipIntern
- // Create func_type. Use ipForceIntern because C's func_type key
- // does not include param types — ipIntern would incorrectly dedup
- // functions with same (ret, param_count, cc) but different param
- // types. The upstream Zig IP includes param types in the key,
- // ensuring correct dedup. For functions with the same full
- // signature, the Zig compiler deduplicates naturally; C creates
- // separate entries that get deduped during AIR comparison via
- // the ref canonicalization in the test infrastructure.
- InternPoolKey ftype_key;
- memset(&ftype_key, 0, sizeof(ftype_key));
- ftype_key.tag = IP_KEY_FUNC_TYPE;
- ftype_key.data.func_type.return_type = ret_ty;
- ftype_key.data.func_type.param_count = param_count;
- ftype_key.data.func_type.cc = cc;
- InternPoolIndex func_type_ip = ipForceIntern(sema->ip, ftype_key);
+ // Create function type IP entry. Ported from upstream
+ // InternPool.getFuncType: func_types with the same full
+ // signature (ret + params + cc) share the same IP entry.
+ InternPoolIndex func_type_ip = internFuncType(
+ sema, ret_ty, param_count, cc, false, param_type_buf);
// Create func_decl entry (must follow func_type immediately).
// Find the nav for this function declaration.