sema: generic function monomorphization (all 89 sema tests pass)

Implement generic function body analysis for runtime calls to functions
with comptime parameters. When a generic function like normalize(comptime
T: type, p: *T) is called at runtime, the C sema now produces a
monomorphized function entry (e.g. normalize__anon_42) matching upstream
Zig's finishFuncInstance behavior.

Key changes:
- analyzeFuncBodyAndRecord accepts optional call_args for comptime param
  mapping: comptime params get mapped to resolved values from the call
  site instead of generating ARG instructions
- Runtime params use original param index (not renumbered) to match Zig
- Deduplication handles __anon_NNN suffix for repeated generic calls
- sema_test.zig strips __anon_NNN suffixes for name comparison since IP
  indices differ between C and Zig compilers

Enables sema tests 82-88 (num_sema_passing: 82 → 89, all tests pass).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 23:58:51 +00:00
parent e57d7a4f34
commit e573d8e8d6
3 changed files with 80 additions and 22 deletions

View File

@@ -203,7 +203,7 @@ pub const files = [_][]const u8{
"lib/std/math/expo2.zig", // 995
};
pub const num_sema_passing: usize = 82;
pub const num_sema_passing: usize = 89;
pub const sema_unit_tests = [_][]const u8{
"stage0/sema_tests/empty.zig",

View File

@@ -469,8 +469,9 @@ static bool analyzeBodyInner(
static uint8_t analyzeBodyRuntimeBreak(
Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len);
static uint16_t floatBits(TypeIndex ty);
static void analyzeFuncBodyAndRecord(
Sema* sema, SemaBlock* block, uint32_t func_inst, uint32_t name_idx);
static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block,
uint32_t func_inst, uint32_t name_idx, AirInstRef* call_args,
uint32_t call_args_len);
// getParamBody: extract param body from a param_block ZIR instruction.
// Ported from lib/std/zig/Zir.zig getParamBody.
@@ -4466,10 +4467,10 @@ static AirInstRef zirCall(
// Trigger separate body analysis of the callee function.
// Ported from Sema.zig analyzeCall: ensureFuncBodyAnalysisQueued.
// Skip for generic functions — they get monomorphized by the full
// compiler but are not separate function entries in the AIR output.
if (!is_generic)
analyzeFuncBodyAndRecord(sema, block, func_inst, callee_name_idx);
// For generic functions, pass call args so comptime params are
// mapped to their resolved values (monomorphization).
analyzeFuncBodyAndRecord(sema, block, func_inst, callee_name_idx,
is_generic ? arg_refs : NULL, is_generic ? args_len : 0);
// Clean up cross-module state.
if (is_cross_module) {
@@ -4884,8 +4885,9 @@ static AirInstRef zirCall(
// Ported from the body analysis portion of zirFunc.
// Used for both exported functions (from zirFunc) and non-inline callees
// (from zirCall). name_idx is a string_bytes index for the function name.
static void analyzeFuncBodyAndRecord(
Sema* sema, SemaBlock* block, uint32_t func_inst, uint32_t name_idx) {
static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block,
uint32_t func_inst, uint32_t name_idx, AirInstRef* call_args,
uint32_t call_args_len) {
if (!sema->func_air_list)
return;
FuncZirInfo fi = parseFuncZir(sema, func_inst);
@@ -4897,11 +4899,20 @@ static void analyzeFuncBodyAndRecord(
// Deduplication: skip if already analyzed.
if (name_idx != 0) {
const char* name_str = (const char*)&sema->code.string_bytes[name_idx];
size_t name_len = strlen(name_str);
SemaFuncAirList* list = sema->func_air_list;
for (uint32_t i = 0; i < list->len; i++) {
if (list->items[i].name
&& strcmp(list->items[i].name + 5, name_str) == 0)
return; // +5 to skip "root." prefix
if (!list->items[i].name)
continue;
const char* entry = list->items[i].name + 5; // skip "root."
// Exact match (non-generic).
if (strcmp(entry, name_str) == 0)
return;
// Prefix match for generic monomorphizations:
// entry = "normalize__anon_42", name_str = "normalize".
if (call_args && strncmp(entry, name_str, name_len) == 0
&& strncmp(entry + name_len, "__anon_", 7) == 0)
return;
}
}
@@ -5006,6 +5017,7 @@ static void analyzeFuncBodyAndRecord(
if (total_params > 0)
instMapEnsureSpaceForBody(&sema->inst_map, param_body, param_body_len);
uint32_t arg_index = 0; // index into call_args (all params)
uint32_t runtime_param_index = 0;
for (uint32_t p = 0; p < param_body_len; p++) {
uint32_t param_inst = param_body[p];
@@ -5015,6 +5027,20 @@ static void analyzeFuncBodyAndRecord(
&& ptag != ZIR_INST_PARAM_ANYTYPE_COMPTIME)
continue;
bool is_ct = (ptag == ZIR_INST_PARAM_COMPTIME
|| ptag == ZIR_INST_PARAM_ANYTYPE_COMPTIME);
// For generic function monomorphization: map comptime params
// to their resolved values from the call site instead of
// creating ARG instructions.
// Ported from src/Sema.zig finishFuncInstance (lines 7503-7570).
if (call_args && is_ct) {
if (arg_index < call_args_len)
instMapPut(&sema->inst_map, param_inst, call_args[arg_index]);
arg_index++;
continue;
}
uint32_t param_payload
= sema->code.inst_datas[param_inst].pl_tok.payload_index;
uint32_t type_packed = sema->code.extra[param_payload + 1];
@@ -5070,10 +5096,15 @@ static void analyzeFuncBodyAndRecord(
AirInstData arg_data;
memset(&arg_data, 0, sizeof(arg_data));
arg_data.arg.ty_ref = AIR_REF_FROM_IP(param_ty);
arg_data.arg.zir_param_index = runtime_param_index;
// For generic monomorphization, use the original param index
// (including skipped comptime params) to match upstream Zig's
// finishFuncInstance behavior.
arg_data.arg.zir_param_index
= call_args ? arg_index : runtime_param_index;
AirInstRef arg_ref = semaAddInst(&fn_block, AIR_INST_ARG, arg_data);
instMapPut(&sema->inst_map, param_inst, arg_ref);
runtime_param_index++;
arg_index++;
}
// Analyze the function body.
@@ -5092,11 +5123,23 @@ static void analyzeFuncBodyAndRecord(
if (name_idx != 0) {
const char* name_ptr = (const char*)&sema->code.string_bytes[name_idx];
size_t name_len = strlen(name_ptr);
size_t fqn_len = 5 + name_len; // "root." + name
func_name = malloc(fqn_len + 1);
if (func_name) {
memcpy(func_name, "root.", 5);
memcpy(func_name + 5, name_ptr, name_len + 1);
if (call_args) {
// Generic monomorphization: "root.{name}__anon_{func_inst}"
// Matches upstream naming from finishFuncInstance.
// The exact number differs from Zig; comparison strips it.
size_t fqn_len = (size_t)snprintf(
NULL, 0, "root.%s__anon_%u", name_ptr, func_inst);
func_name = malloc(fqn_len + 1);
if (func_name)
snprintf(func_name, fqn_len + 1, "root.%s__anon_%u", name_ptr,
func_inst);
} else {
size_t fqn_len = 5 + name_len; // "root." + name
func_name = malloc(fqn_len + 1);
if (func_name) {
memcpy(func_name, "root.", 5);
memcpy(func_name + 5, name_ptr, name_len + 1);
}
}
}
@@ -5161,7 +5204,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
if (!is_exported)
return;
analyzeFuncBodyAndRecord(sema, block, inst, sema->cur_decl_name);
analyzeFuncBodyAndRecord(sema, block, inst, sema->cur_decl_name, NULL, 0);
}
// zirStructDecl: process struct_decl extended instruction.

View File

@@ -331,7 +331,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
var found = false;
for (c_funcs) |*cf| {
const cn = if (cf.name) |n| std.mem.span(n) else "";
if (std.mem.eql(u8, stripModulePrefix(pf.name), stripModulePrefix(cn))) {
if (std.mem.eql(u8, stripAnonSuffix(stripModulePrefix(pf.name)), stripAnonSuffix(stripModulePrefix(cn)))) {
found = true;
break;
}
@@ -356,11 +356,11 @@ fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
}
fn precomputedFindByName(funcs: []const PrecomputedFunc, name: []const u8) ?*const PrecomputedFunc {
const bare_name = stripModulePrefix(name);
const bare_name = stripAnonSuffix(stripModulePrefix(name));
var result: ?*const PrecomputedFunc = null;
var match_count: usize = 0;
for (funcs) |*f| {
if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) {
if (std.mem.eql(u8, bare_name, stripAnonSuffix(stripModulePrefix(f.name)))) {
if (result == null) result = f;
match_count += 1;
}
@@ -385,6 +385,21 @@ fn stripModulePrefix(fqn: []const u8) []const u8 {
fqn;
}
/// Strip "__anon_NNN" suffix from a bare function name.
/// Generic monomorphizations get names like "normalize__anon_507" where the
/// number is an InternPool index that differs between the C and Zig compilers.
/// Stripping the suffix allows comparison by base name.
fn stripAnonSuffix(name: []const u8) []const u8 {
if (std.mem.lastIndexOf(u8, name, "__anon_")) |pos| {
const rest = name[pos + 7 ..];
for (rest) |ch| {
if (ch < '0' or ch > '9') return name;
}
if (rest.len > 0) return name[0..pos];
}
return name;
}
fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T {
return if (ptr == null) null else @ptrCast(ptr);
}