commit e573d8e8d60e597bf166d08ce93e321c0e60c108 (tree)
parent e57d7a4f3469f33d1e0dd8a7350389a9ae8119ab
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Mon, 23 Feb 2026 23:58:51 +0000
sema: generic function monomorphization (all 89 sema tests pass)
Implement generic function body analysis for runtime calls to functions
with comptime parameters. When a generic function like normalize(comptime
T: type, p: *T) is called at runtime, the C sema now produces a
monomorphized function entry (e.g. normalize__anon_42) matching upstream
Zig's finishFuncInstance behavior.
Key changes:
- analyzeFuncBodyAndRecord accepts optional call_args for comptime param
mapping: comptime params get mapped to resolved values from the call
site instead of generating ARG instructions
- Runtime params use original param index (not renumbered) to match Zig
- Deduplication handles __anon_NNN suffix for repeated generic calls
- sema_test.zig strips __anon_NNN suffixes for name comparison since IP
indices differ between C and Zig compilers
Enables sema tests 82-88 (num_sema_passing: 82 → 89, all tests pass).
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
3 files changed, 80 insertions(+), 22 deletions(-)
diff --git a/stage0/corpus.zig b/stage0/corpus.zig
@@ -203,7 +203,7 @@ pub const files = [_][]const u8{
"lib/std/math/expo2.zig", // 995
};
-pub const num_sema_passing: usize = 82;
+pub const num_sema_passing: usize = 89;
pub const sema_unit_tests = [_][]const u8{
"stage0/sema_tests/empty.zig",
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -469,8 +469,9 @@ static bool analyzeBodyInner(
static uint8_t analyzeBodyRuntimeBreak(
Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len);
static uint16_t floatBits(TypeIndex ty);
-static void analyzeFuncBodyAndRecord(
- Sema* sema, SemaBlock* block, uint32_t func_inst, uint32_t name_idx);
+static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block,
+ uint32_t func_inst, uint32_t name_idx, AirInstRef* call_args,
+ uint32_t call_args_len);
// getParamBody: extract param body from a param_block ZIR instruction.
// Ported from lib/std/zig/Zir.zig getParamBody.
@@ -4466,10 +4467,10 @@ static AirInstRef zirCall(
// Trigger separate body analysis of the callee function.
// Ported from Sema.zig analyzeCall: ensureFuncBodyAnalysisQueued.
- // Skip for generic functions — they get monomorphized by the full
- // compiler but are not separate function entries in the AIR output.
- if (!is_generic)
- analyzeFuncBodyAndRecord(sema, block, func_inst, callee_name_idx);
+ // For generic functions, pass call args so comptime params are
+ // mapped to their resolved values (monomorphization).
+ analyzeFuncBodyAndRecord(sema, block, func_inst, callee_name_idx,
+ is_generic ? arg_refs : NULL, is_generic ? args_len : 0);
// Clean up cross-module state.
if (is_cross_module) {
@@ -4884,8 +4885,9 @@ static AirInstRef zirCall(
// Ported from the body analysis portion of zirFunc.
// Used for both exported functions (from zirFunc) and non-inline callees
// (from zirCall). name_idx is a string_bytes index for the function name.
-static void analyzeFuncBodyAndRecord(
- Sema* sema, SemaBlock* block, uint32_t func_inst, uint32_t name_idx) {
+static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block,
+ uint32_t func_inst, uint32_t name_idx, AirInstRef* call_args,
+ uint32_t call_args_len) {
if (!sema->func_air_list)
return;
FuncZirInfo fi = parseFuncZir(sema, func_inst);
@@ -4897,11 +4899,20 @@ static void analyzeFuncBodyAndRecord(
// Deduplication: skip if already analyzed.
if (name_idx != 0) {
const char* name_str = (const char*)&sema->code.string_bytes[name_idx];
+ size_t name_len = strlen(name_str);
SemaFuncAirList* list = sema->func_air_list;
for (uint32_t i = 0; i < list->len; i++) {
- if (list->items[i].name
- && strcmp(list->items[i].name + 5, name_str) == 0)
- return; // +5 to skip "root." prefix
+ if (!list->items[i].name)
+ continue;
+ const char* entry = list->items[i].name + 5; // skip "root."
+ // Exact match (non-generic).
+ if (strcmp(entry, name_str) == 0)
+ return;
+ // Prefix match for generic monomorphizations:
+ // entry = "normalize__anon_42", name_str = "normalize".
+ if (call_args && strncmp(entry, name_str, name_len) == 0
+ && strncmp(entry + name_len, "__anon_", 7) == 0)
+ return;
}
}
@@ -5006,6 +5017,7 @@ static void analyzeFuncBodyAndRecord(
if (total_params > 0)
instMapEnsureSpaceForBody(&sema->inst_map, param_body, param_body_len);
+ uint32_t arg_index = 0; // index into call_args (all params)
uint32_t runtime_param_index = 0;
for (uint32_t p = 0; p < param_body_len; p++) {
uint32_t param_inst = param_body[p];
@@ -5015,6 +5027,20 @@ static void analyzeFuncBodyAndRecord(
&& ptag != ZIR_INST_PARAM_ANYTYPE_COMPTIME)
continue;
+ bool is_ct = (ptag == ZIR_INST_PARAM_COMPTIME
+ || ptag == ZIR_INST_PARAM_ANYTYPE_COMPTIME);
+
+ // For generic function monomorphization: map comptime params
+ // to their resolved values from the call site instead of
+ // creating ARG instructions.
+ // Ported from src/Sema.zig finishFuncInstance (lines 7503-7570).
+ if (call_args && is_ct) {
+ if (arg_index < call_args_len)
+ instMapPut(&sema->inst_map, param_inst, call_args[arg_index]);
+ arg_index++;
+ continue;
+ }
+
uint32_t param_payload
= sema->code.inst_datas[param_inst].pl_tok.payload_index;
uint32_t type_packed = sema->code.extra[param_payload + 1];
@@ -5070,10 +5096,15 @@ static void analyzeFuncBodyAndRecord(
AirInstData arg_data;
memset(&arg_data, 0, sizeof(arg_data));
arg_data.arg.ty_ref = AIR_REF_FROM_IP(param_ty);
- arg_data.arg.zir_param_index = runtime_param_index;
+ // For generic monomorphization, use the original param index
+ // (including skipped comptime params) to match upstream Zig's
+ // finishFuncInstance behavior.
+ arg_data.arg.zir_param_index
+ = call_args ? arg_index : runtime_param_index;
AirInstRef arg_ref = semaAddInst(&fn_block, AIR_INST_ARG, arg_data);
instMapPut(&sema->inst_map, param_inst, arg_ref);
runtime_param_index++;
+ arg_index++;
}
// Analyze the function body.
@@ -5092,11 +5123,23 @@ static void analyzeFuncBodyAndRecord(
if (name_idx != 0) {
const char* name_ptr = (const char*)&sema->code.string_bytes[name_idx];
size_t name_len = strlen(name_ptr);
- size_t fqn_len = 5 + name_len; // "root." + name
- func_name = malloc(fqn_len + 1);
- if (func_name) {
- memcpy(func_name, "root.", 5);
- memcpy(func_name + 5, name_ptr, name_len + 1);
+ if (call_args) {
+ // Generic monomorphization: "root.{name}__anon_{func_inst}"
+ // Matches upstream naming from finishFuncInstance.
+ // The exact number differs from Zig; comparison strips it.
+ size_t fqn_len = (size_t)snprintf(
+ NULL, 0, "root.%s__anon_%u", name_ptr, func_inst);
+ func_name = malloc(fqn_len + 1);
+ if (func_name)
+ snprintf(func_name, fqn_len + 1, "root.%s__anon_%u", name_ptr,
+ func_inst);
+ } else {
+ size_t fqn_len = 5 + name_len; // "root." + name
+ func_name = malloc(fqn_len + 1);
+ if (func_name) {
+ memcpy(func_name, "root.", 5);
+ memcpy(func_name + 5, name_ptr, name_len + 1);
+ }
}
}
@@ -5161,7 +5204,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
if (!is_exported)
return;
- analyzeFuncBodyAndRecord(sema, block, inst, sema->cur_decl_name);
+ analyzeFuncBodyAndRecord(sema, block, inst, sema->cur_decl_name, NULL, 0);
}
// zirStructDecl: process struct_decl extended instruction.
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -331,7 +331,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
var found = false;
for (c_funcs) |*cf| {
const cn = if (cf.name) |n| std.mem.span(n) else "";
- if (std.mem.eql(u8, stripModulePrefix(pf.name), stripModulePrefix(cn))) {
+ if (std.mem.eql(u8, stripAnonSuffix(stripModulePrefix(pf.name)), stripAnonSuffix(stripModulePrefix(cn)))) {
found = true;
break;
}
@@ -356,11 +356,11 @@ fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
}
fn precomputedFindByName(funcs: []const PrecomputedFunc, name: []const u8) ?*const PrecomputedFunc {
- const bare_name = stripModulePrefix(name);
+ const bare_name = stripAnonSuffix(stripModulePrefix(name));
var result: ?*const PrecomputedFunc = null;
var match_count: usize = 0;
for (funcs) |*f| {
- if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) {
+ if (std.mem.eql(u8, bare_name, stripAnonSuffix(stripModulePrefix(f.name)))) {
if (result == null) result = f;
match_count += 1;
}
@@ -385,6 +385,21 @@ fn stripModulePrefix(fqn: []const u8) []const u8 {
fqn;
}
+/// Strip "__anon_NNN" suffix from a bare function name.
+/// Generic monomorphizations get names like "normalize__anon_507" where the
+/// number is an InternPool index that differs between the C and Zig compilers.
+/// Stripping the suffix allows comparison by base name.
+fn stripAnonSuffix(name: []const u8) []const u8 {
+ if (std.mem.lastIndexOf(u8, name, "__anon_")) |pos| {
+ const rest = name[pos + 7 ..];
+ for (rest) |ch| {
+ if (ch < '0' or ch > '9') return name;
+ }
+ if (rest.len > 0) return name[0..pos];
+ }
+ return name;
+}
+
fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T {
return if (ptr == null) null else @ptrCast(ptr);
}