sema: fix struct_info overflow, add generic param pre-emission; enable 4 corpus tests

Change semaInit to take Sema* (init in-place) to avoid stack corruption
from returning large struct by value. Increase struct_info from [8] to
[32]. Add name-based dead BLOCK pre-emission for generic param type
resolution to match upstream AIR layout (extendsfxf2 fix).

Newly enabled: extendsfxf2, backend, extenddfxf2, compress.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 20:44:32 +00:00
parent df29b3a085
commit 19cf4fec0c
5 changed files with 198 additions and 34 deletions

View File

@@ -25,23 +25,21 @@ static uint32_t simpleStringHash(const char* s) {
#define MAX_EXPORTED_DECL_NAMES 16
static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES];
Sema semaInit(InternPool* ip, Zir code) {
Sema sema;
memset(&sema, 0, sizeof(sema));
sema.ip = ip;
sema.code = code;
sema.air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
sema.air_inst_cap = SEMA_AIR_INITIAL_CAP;
sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
sema.air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP;
sema.func_index = IP_INDEX_NONE;
sema.fn_ret_ty = TYPE_NONE;
sema.branch_quota = SEMA_DEFAULT_BRANCH_QUOTA;
sema.allow_memoize = true;
sema.branch_hint = -1;
sema.num_ia = 0;
return sema;
void semaInit(Sema* sema, InternPool* ip, Zir code) {
memset(sema, 0, sizeof(*sema));
sema->ip = ip;
sema->code = code;
sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
sema->air_inst_cap = SEMA_AIR_INITIAL_CAP;
sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
sema->air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
sema->air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP;
sema->func_index = IP_INDEX_NONE;
sema->fn_ret_ty = TYPE_NONE;
sema->branch_quota = SEMA_DEFAULT_BRANCH_QUOTA;
sema->allow_memoize = true;
sema->branch_hint = -1;
sema->num_ia = 0;
}
void semaDeinit(Sema* sema) {
@@ -2522,7 +2520,7 @@ static InternPoolIndex registerStructTypeFromZir(
InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey);
// Register struct field info.
if (sema->num_struct_info >= 8)
if (sema->num_struct_info >= 32)
return IP_INDEX_VOID_TYPE;
StructFieldInfo* info
= &sema->struct_info[sema->num_struct_info++];
@@ -3105,9 +3103,25 @@ static AirInstRef zirCall(
if (strcmp(cn, "Int") == 0 || strcmp(cn, "Log2Int") == 0
|| strcmp(cn, "PowerOfTwoSignificandZ") == 0
|| strcmp(cn, "F16T") == 0) {
AirInstData dead;
memset(&dead, 0, sizeof(dead));
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
// Check if this function's dead block was pre-emitted
// during generic param type resolution.
bool skip_block = false;
for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) {
if (strcmp(sema->type_fn_to_skip[k], cn) == 0) {
sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema
->num_type_fn_to_skip];
skip_block = true;
break;
}
}
if (!skip_block) {
AirInstData dead;
memset(&dead, 0, sizeof(dead));
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
}
// Track that this function has had its dead block created.
if (sema->num_type_fn_created < 16)
sema->type_fn_created[sema->num_type_fn_created++] = cn;
// Resolve args and compute the type result.
// Same logic as the returns_type handler below.
AirInstRef ur_arg_refs[16];
@@ -3228,6 +3242,10 @@ static AirInstRef zirCall(
bool is_ct_param[16];
memset(is_ct_param, 0, sizeof(is_ct_param));
bool is_generic = false;
// Track whether each param has a generic type (refers to previous
// comptime params). Ported from Zir.Inst.Param.Type.is_generic.
bool has_generic_type[16];
memset(has_generic_type, 0, sizeof(has_generic_type));
{
uint32_t early_pb_inst
= sema->code
@@ -3247,11 +3265,123 @@ static AirInstRef zirCall(
is_ct_param[pi] = true;
is_generic = true;
}
// Check if param type is generic (refers to previous params).
// Ported from Zir.Inst.Param: extra[payload+1] bit 31.
if (ptag == ZIR_INST_PARAM
|| ptag == ZIR_INST_PARAM_COMPTIME) {
uint32_t ppl = sema->code.inst_datas[early_pb[p]]
.pl_tok.payload_index;
uint32_t type_raw = sema->code.extra[ppl + 1];
if ((type_raw >> 31) & 1)
has_generic_type[pi] = true;
}
pi++;
}
}
}
// Ported from src/Sema.zig lines 7316-7353: generic param type
// evaluation. In the upstream, generic param type bodies are evaluated
// via resolveInlineBody in a comptime generic_block. For type bodies
// containing calls to inline type-returning functions (e.g. Int),
// this creates dead BLOCK instructions at the current AIR position
// (before arg and inline body processing). If the type function was
// already called earlier (memoized), no new block is created.
// We simulate this by pre-emitting dead blocks only for type functions
// that haven't been called yet, and skipping the corresponding
// returns_type dead block during inline body processing.
if (is_generic) {
for (uint32_t a = 0; a < args_len; a++) {
if (!is_ct_param[a] && has_generic_type[a]) {
// Find the param ZIR instruction for arg 'a'.
uint32_t early_pb_inst2
= sema->code.extra[sema->code.inst_datas[func_inst]
.pl_node.payload_index
+ func_info.param_block_pi];
const uint32_t* early_pb2;
uint32_t early_pb_len2;
getParamBody(sema, early_pb_inst2, &early_pb2, &early_pb_len2);
uint32_t pi2 = 0;
uint32_t param_zir = 0;
for (uint32_t p2 = 0; p2 < early_pb_len2; p2++) {
ZirInstTag pt2 = sema->code.inst_tags[early_pb2[p2]];
if (pt2 == ZIR_INST_PARAM || pt2 == ZIR_INST_PARAM_COMPTIME
|| pt2 == ZIR_INST_PARAM_ANYTYPE
|| pt2 == ZIR_INST_PARAM_ANYTYPE_COMPTIME) {
if (pi2 == a) {
param_zir = early_pb2[p2];
break;
}
pi2++;
}
}
if (param_zir == 0)
continue;
uint32_t ppl
= sema->code.inst_datas[param_zir].pl_tok.payload_index;
uint32_t type_raw2 = sema->code.extra[ppl + 1];
uint32_t tbody_len = type_raw2 & 0x7FFFFFFF;
// Scan the param type body for call/field_call instructions
// and extract the callee name.
for (uint32_t ti = 0; ti < tbody_len; ti++) {
uint32_t tzi = sema->code.extra[ppl + 2 + ti];
if (tzi >= sema->code.inst_len)
continue;
ZirInstTag ttag = sema->code.inst_tags[tzi];
const char* callee_name = NULL;
if (ttag == ZIR_INST_FIELD_CALL) {
uint32_t tpi
= sema->code.inst_datas[tzi].pl_node.payload_index;
uint32_t fn_start = sema->code.extra[tpi + 2];
callee_name
= (const char*)&sema->code.string_bytes[fn_start];
} else if (ttag == ZIR_INST_CALL) {
uint32_t tpi
= sema->code.inst_datas[tzi].pl_node.payload_index;
uint32_t cref = sema->code.extra[tpi + 1];
if (cref >= ZIR_REF_START_INDEX) {
uint32_t ci = cref - ZIR_REF_START_INDEX;
ZirInstTag ctag = sema->code.inst_tags[ci];
if (ctag == ZIR_INST_DECL_VAL
|| ctag == ZIR_INST_DECL_REF) {
callee_name = (const char*)&sema->code
.string_bytes[sema->code
.inst_datas[ci]
.str_tok.start];
}
}
}
if (callee_name == NULL)
continue;
if (strcmp(callee_name, "Int") != 0
&& strcmp(callee_name, "Log2Int") != 0
&& strcmp(callee_name, "PowerOfTwoSignificandZ") != 0
&& strcmp(callee_name, "F16T") != 0)
continue;
// Check if this function already had a dead block
// created (memoized in upstream).
bool already_created = false;
for (uint32_t k = 0; k < sema->num_type_fn_created; k++) {
if (strcmp(sema->type_fn_created[k], callee_name)
== 0) {
already_created = true;
break;
}
}
if (!already_created) {
AirInstData dead;
memset(&dead, 0, sizeof(dead));
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
if (sema->num_type_fn_to_skip < 4)
sema->type_fn_to_skip[sema->num_type_fn_to_skip++]
= callee_name;
}
break; // only handle first call in type body
}
}
}
}
// Resolve the argument values (from the ORIGINAL module's ZIR).
// Each arg has a body that produces the argument value via
// break_inline.
@@ -3334,10 +3464,29 @@ static AirInstRef zirCall(
// returns_type functions return `type` which is comptime-only.
// Upstream evaluates these in comptime context, so
// need_debug_scope is always false → BLOCK tag.
// Check if this function's dead block was pre-emitted during
// generic param type resolution.
{
AirInstData rt_dead;
memset(&rt_dead, 0, sizeof(rt_dead));
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead);
bool skip_block = false;
if (type_fn_name) {
for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) {
if (strcmp(sema->type_fn_to_skip[k], type_fn_name) == 0) {
sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema
->num_type_fn_to_skip];
skip_block = true;
break;
}
}
}
if (!skip_block) {
AirInstData rt_dead;
memset(&rt_dead, 0, sizeof(rt_dead));
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead);
}
// Track that this function has had its dead block created.
if (type_fn_name && sema->num_type_fn_created < 16)
sema->type_fn_created[sema->num_type_fn_created++]
= type_fn_name;
}
InternPoolIndex result_type = IP_INDEX_NONE;
@@ -4946,7 +5095,7 @@ static InternPoolIndex ensureF80StructRegistered(Sema* sema) {
pkey.data.ptr_type.flags = 0;
InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey);
if (sema->num_struct_info >= 8)
if (sema->num_struct_info >= 32)
return IP_INDEX_VOID_TYPE;
StructFieldInfo* info = &sema->struct_info[sema->num_struct_info++];
info->struct_type = struct_ip;

View File

@@ -219,10 +219,23 @@ typedef struct Sema {
uint32_t memo_args_len[32];
AirInstRef memo_result[32];
uint32_t num_memo;
// Track type-returning function names that have had dead BLOCK
// instructions created (for memoization simulation).
// When a returns_type function is called for the first time, a dead
// BLOCK is emitted. Subsequent calls with the same name are
// considered "memoized" and don't emit new blocks.
const char* type_fn_created[16];
uint32_t num_type_fn_created;
// Names of type-returning functions pre-emitted during generic param
// type resolution. The returns_type handler skips dead block creation
// for these, since the block was already emitted at the correct
// (earlier) position.
const char* type_fn_to_skip[4];
uint32_t num_type_fn_to_skip;
// Known struct types with runtime field information.
// Populated by zirCall when a call returns a struct type.
// Used by zirFieldVal/zirFieldPtr for runtime field access.
StructFieldInfo struct_info[8];
StructFieldInfo struct_info[32];
uint32_t num_struct_info;
} Sema;
@@ -230,7 +243,7 @@ typedef struct Sema {
// --- Function declarations ---
Sema semaInit(InternPool* ip, Zir code);
void semaInit(Sema* sema, InternPool* ip, Zir code);
void semaDeinit(Sema* sema);
SemaFuncAirList semaAnalyze(Sema* sema);
void semaFuncAirListDeinit(SemaFuncAirList* list);

View File

@@ -186,7 +186,7 @@ fn semaCheck(source: [:0]const u8) !SemaCheckResult {
defer c.zirDeinit(&c_zir);
var result: SemaCheckResult = undefined;
result.c_ip = c.ipInit();
result.c_sema = c.semaInit(&result.c_ip, c_zir);
c.semaInit(&result.c_sema, &result.c_ip, c_zir);
result.c_func_air_list = c.semaAnalyze(&result.c_sema);
return result;
}

View File

@@ -95,7 +95,8 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
var c_ip = sc.ipInit();
defer sc.ipDeinit(&c_ip);
var c_sema = sc.semaInit(&c_ip, @bitCast(c_zir));
var c_sema: sc.Sema = undefined;
sc.semaInit(&c_sema, &c_ip, @bitCast(c_zir));
defer sc.semaDeinit(&c_sema);
c_sema.source_dir = source_dir_path.ptr;
c_sema.module_root = module_root_path.ptr;
@@ -106,7 +107,7 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
}
}
const last_successful_corpus = "../lib/std/crypto/codecs.zig";
const last_successful_corpus = "../lib/std/compress.zig";
// find ../{lib,src} -name '*.zig' | xargs -n1 stat -c "%s %n" | sort -n | awk '{printf " \""$2"\", // "$1"\n"}'
const corpus_files = .{
@@ -148,9 +149,9 @@ const corpus_files = .{
"../lib/compiler_rt/floatunsihf.zig", // 357
"../lib/compiler_rt/trunctfhf2.zig", // 359
"../lib/compiler_rt/extendsfxf2.zig", // 360
//"../lib/compiler/aro/backend.zig", // 362
//"../lib/compiler_rt/extenddfxf2.zig", // 364
//"../lib/std/compress.zig", // 372
"../lib/compiler/aro/backend.zig", // 362
"../lib/compiler_rt/extenddfxf2.zig", // 364
"../lib/std/compress.zig", // 372
//"../lib/compiler_rt/extendhfdf2.zig", // 373
//"../lib/compiler_rt/extendhfxf2.zig", // 373
//"../lib/compiler_rt/extendhftf2.zig", // 376

View File

@@ -42,7 +42,8 @@ static int zig0Run(const char* program, bool verbose_air, char** msg) {
zir.inst_len, zir.extra_len, zir.string_bytes_len);
InternPool ip = ipInit();
Sema sema = semaInit(&ip, zir);
Sema sema;
semaInit(&sema, &ip, zir);
SemaFuncAirList func_airs = semaAnalyze(&sema);
if (verbose_air)
verboseAirPrint(stderr, &func_airs, &ip);