commit 19cf4fec0cdab7ae322d3372ea98ff45d4ad42d5 (tree)
parent df29b3a085769f3ca959d8ca55d9856222f9e186
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Sun, 22 Feb 2026 20:44:32 +0000
sema: fix struct_info overflow, add generic param pre-emission; enable 4 corpus tests
Change semaInit to take Sema* (init in-place) to avoid stack corruption
from returning large struct by value. Increase struct_info from [8] to
[32]. Add name-based dead BLOCK pre-emission for generic param type
resolution to match upstream AIR layout (extendsfxf2 fix).
Newly enabled: extendsfxf2, backend, extenddfxf2, compress.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
5 files changed, 198 insertions(+), 34 deletions(-)
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -25,23 +25,21 @@ static uint32_t simpleStringHash(const char* s) {
#define MAX_EXPORTED_DECL_NAMES 16
static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES];
-Sema semaInit(InternPool* ip, Zir code) {
- Sema sema;
- memset(&sema, 0, sizeof(sema));
- sema.ip = ip;
- sema.code = code;
- sema.air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
- sema.air_inst_cap = SEMA_AIR_INITIAL_CAP;
- sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
- sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
- sema.air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP;
- sema.func_index = IP_INDEX_NONE;
- sema.fn_ret_ty = TYPE_NONE;
- sema.branch_quota = SEMA_DEFAULT_BRANCH_QUOTA;
- sema.allow_memoize = true;
- sema.branch_hint = -1;
- sema.num_ia = 0;
- return sema;
+void semaInit(Sema* sema, InternPool* ip, Zir code) {
+ memset(sema, 0, sizeof(*sema));
+ sema->ip = ip;
+ sema->code = code;
+ sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
+ sema->air_inst_cap = SEMA_AIR_INITIAL_CAP;
+ sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
+ sema->air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
+ sema->air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP;
+ sema->func_index = IP_INDEX_NONE;
+ sema->fn_ret_ty = TYPE_NONE;
+ sema->branch_quota = SEMA_DEFAULT_BRANCH_QUOTA;
+ sema->allow_memoize = true;
+ sema->branch_hint = -1;
+ sema->num_ia = 0;
}
void semaDeinit(Sema* sema) {
@@ -2522,7 +2520,7 @@ static InternPoolIndex registerStructTypeFromZir(
InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey);
// Register struct field info.
- if (sema->num_struct_info >= 8)
+ if (sema->num_struct_info >= 32)
return IP_INDEX_VOID_TYPE;
StructFieldInfo* info
= &sema->struct_info[sema->num_struct_info++];
@@ -3105,9 +3103,25 @@ static AirInstRef zirCall(
if (strcmp(cn, "Int") == 0 || strcmp(cn, "Log2Int") == 0
|| strcmp(cn, "PowerOfTwoSignificandZ") == 0
|| strcmp(cn, "F16T") == 0) {
- AirInstData dead;
- memset(&dead, 0, sizeof(dead));
- (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
+ // Check if this function's dead block was pre-emitted
+ // during generic param type resolution.
+ bool skip_block = false;
+ for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) {
+ if (strcmp(sema->type_fn_to_skip[k], cn) == 0) {
+ sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema
+ ->num_type_fn_to_skip];
+ skip_block = true;
+ break;
+ }
+ }
+ if (!skip_block) {
+ AirInstData dead;
+ memset(&dead, 0, sizeof(dead));
+ (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
+ }
+ // Track that this function has had its dead block created.
+ if (sema->num_type_fn_created < 16)
+ sema->type_fn_created[sema->num_type_fn_created++] = cn;
// Resolve args and compute the type result.
// Same logic as the returns_type handler below.
AirInstRef ur_arg_refs[16];
@@ -3228,6 +3242,10 @@ static AirInstRef zirCall(
bool is_ct_param[16];
memset(is_ct_param, 0, sizeof(is_ct_param));
bool is_generic = false;
+ // Track whether each param has a generic type (refers to previous
+ // comptime params). Ported from Zir.Inst.Param.Type.is_generic.
+ bool has_generic_type[16];
+ memset(has_generic_type, 0, sizeof(has_generic_type));
{
uint32_t early_pb_inst
= sema->code
@@ -3247,11 +3265,123 @@ static AirInstRef zirCall(
is_ct_param[pi] = true;
is_generic = true;
}
+ // Check if param type is generic (refers to previous params).
+ // Ported from Zir.Inst.Param: extra[payload+1] bit 31.
+ if (ptag == ZIR_INST_PARAM
+ || ptag == ZIR_INST_PARAM_COMPTIME) {
+ uint32_t ppl = sema->code.inst_datas[early_pb[p]]
+ .pl_tok.payload_index;
+ uint32_t type_raw = sema->code.extra[ppl + 1];
+ if ((type_raw >> 31) & 1)
+ has_generic_type[pi] = true;
+ }
pi++;
}
}
}
+ // Ported from src/Sema.zig lines 7316-7353: generic param type
+ // evaluation. In the upstream, generic param type bodies are evaluated
+ // via resolveInlineBody in a comptime generic_block. For type bodies
+ // containing calls to inline type-returning functions (e.g. Int),
+ // this creates dead BLOCK instructions at the current AIR position
+ // (before arg and inline body processing). If the type function was
+ // already called earlier (memoized), no new block is created.
+ // We simulate this by pre-emitting dead blocks only for type functions
+ // that haven't been called yet, and skipping the corresponding
+ // returns_type dead block during inline body processing.
+ if (is_generic) {
+ for (uint32_t a = 0; a < args_len; a++) {
+ if (!is_ct_param[a] && has_generic_type[a]) {
+ // Find the param ZIR instruction for arg 'a'.
+ uint32_t early_pb_inst2
+ = sema->code.extra[sema->code.inst_datas[func_inst]
+ .pl_node.payload_index
+ + func_info.param_block_pi];
+ const uint32_t* early_pb2;
+ uint32_t early_pb_len2;
+ getParamBody(sema, early_pb_inst2, &early_pb2, &early_pb_len2);
+ uint32_t pi2 = 0;
+ uint32_t param_zir = 0;
+ for (uint32_t p2 = 0; p2 < early_pb_len2; p2++) {
+ ZirInstTag pt2 = sema->code.inst_tags[early_pb2[p2]];
+ if (pt2 == ZIR_INST_PARAM || pt2 == ZIR_INST_PARAM_COMPTIME
+ || pt2 == ZIR_INST_PARAM_ANYTYPE
+ || pt2 == ZIR_INST_PARAM_ANYTYPE_COMPTIME) {
+ if (pi2 == a) {
+ param_zir = early_pb2[p2];
+ break;
+ }
+ pi2++;
+ }
+ }
+ if (param_zir == 0)
+ continue;
+ uint32_t ppl
+ = sema->code.inst_datas[param_zir].pl_tok.payload_index;
+ uint32_t type_raw2 = sema->code.extra[ppl + 1];
+ uint32_t tbody_len = type_raw2 & 0x7FFFFFFF;
+ // Scan the param type body for call/field_call instructions
+ // and extract the callee name.
+ for (uint32_t ti = 0; ti < tbody_len; ti++) {
+ uint32_t tzi = sema->code.extra[ppl + 2 + ti];
+ if (tzi >= sema->code.inst_len)
+ continue;
+ ZirInstTag ttag = sema->code.inst_tags[tzi];
+ const char* callee_name = NULL;
+ if (ttag == ZIR_INST_FIELD_CALL) {
+ uint32_t tpi
+ = sema->code.inst_datas[tzi].pl_node.payload_index;
+ uint32_t fn_start = sema->code.extra[tpi + 2];
+ callee_name
+ = (const char*)&sema->code.string_bytes[fn_start];
+ } else if (ttag == ZIR_INST_CALL) {
+ uint32_t tpi
+ = sema->code.inst_datas[tzi].pl_node.payload_index;
+ uint32_t cref = sema->code.extra[tpi + 1];
+ if (cref >= ZIR_REF_START_INDEX) {
+ uint32_t ci = cref - ZIR_REF_START_INDEX;
+ ZirInstTag ctag = sema->code.inst_tags[ci];
+ if (ctag == ZIR_INST_DECL_VAL
+ || ctag == ZIR_INST_DECL_REF) {
+ callee_name = (const char*)&sema->code
+ .string_bytes[sema->code
+ .inst_datas[ci]
+ .str_tok.start];
+ }
+ }
+ }
+ if (callee_name == NULL)
+ continue;
+ if (strcmp(callee_name, "Int") != 0
+ && strcmp(callee_name, "Log2Int") != 0
+ && strcmp(callee_name, "PowerOfTwoSignificandZ") != 0
+ && strcmp(callee_name, "F16T") != 0)
+ continue;
+ // Check if this function already had a dead block
+ // created (memoized in upstream).
+ bool already_created = false;
+ for (uint32_t k = 0; k < sema->num_type_fn_created; k++) {
+ if (strcmp(sema->type_fn_created[k], callee_name)
+ == 0) {
+ already_created = true;
+ break;
+ }
+ }
+ if (!already_created) {
+ AirInstData dead;
+ memset(&dead, 0, sizeof(dead));
+ (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
+ if (sema->num_type_fn_to_skip < 4)
+ sema->type_fn_to_skip[sema->num_type_fn_to_skip++]
+ = callee_name;
+ }
+ break; // only handle first call in type body
+ }
+ }
+ }
+ }
+
// Resolve the argument values (from the ORIGINAL module's ZIR).
// Each arg has a body that produces the argument value via
// break_inline.
@@ -3334,10 +3464,29 @@ static AirInstRef zirCall(
// returns_type functions return `type` which is comptime-only.
// Upstream evaluates these in comptime context, so
// need_debug_scope is always false → BLOCK tag.
+ // Check if this function's dead block was pre-emitted during
+ // generic param type resolution.
{
- AirInstData rt_dead;
- memset(&rt_dead, 0, sizeof(rt_dead));
- (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead);
+ bool skip_block = false;
+ if (type_fn_name) {
+ for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) {
+ if (strcmp(sema->type_fn_to_skip[k], type_fn_name) == 0) {
+ sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema
+ ->num_type_fn_to_skip];
+ skip_block = true;
+ break;
+ }
+ }
+ }
+ if (!skip_block) {
+ AirInstData rt_dead;
+ memset(&rt_dead, 0, sizeof(rt_dead));
+ (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead);
+ }
+ // Track that this function has had its dead block created.
+ if (type_fn_name && sema->num_type_fn_created < 16)
+ sema->type_fn_created[sema->num_type_fn_created++]
+ = type_fn_name;
}
InternPoolIndex result_type = IP_INDEX_NONE;
@@ -4946,7 +5095,7 @@ static InternPoolIndex ensureF80StructRegistered(Sema* sema) {
pkey.data.ptr_type.flags = 0;
InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey);
- if (sema->num_struct_info >= 8)
+ if (sema->num_struct_info >= 32)
return IP_INDEX_VOID_TYPE;
StructFieldInfo* info = &sema->struct_info[sema->num_struct_info++];
info->struct_type = struct_ip;
diff --git a/stage0/sema.h b/stage0/sema.h
@@ -219,10 +219,23 @@ typedef struct Sema {
uint32_t memo_args_len[32];
AirInstRef memo_result[32];
uint32_t num_memo;
+ // Track type-returning function names that have had dead BLOCK
+ // instructions created (for memoization simulation).
+ // When a returns_type function is called for the first time, a dead
+ // BLOCK is emitted. Subsequent calls with the same name are
+ // considered "memoized" and don't emit new blocks.
+ const char* type_fn_created[16];
+ uint32_t num_type_fn_created;
+ // Names of type-returning functions pre-emitted during generic param
+ // type resolution. The returns_type handler skips dead block creation
+ // for these, since the block was already emitted at the correct
+ // (earlier) position.
+ const char* type_fn_to_skip[4];
+ uint32_t num_type_fn_to_skip;
// Known struct types with runtime field information.
// Populated by zirCall when a call returns a struct type.
// Used by zirFieldVal/zirFieldPtr for runtime field access.
- StructFieldInfo struct_info[8];
+ StructFieldInfo struct_info[32];
uint32_t num_struct_info;
} Sema;
@@ -230,7 +243,7 @@ typedef struct Sema {
// --- Function declarations ---
-Sema semaInit(InternPool* ip, Zir code);
+void semaInit(Sema* sema, InternPool* ip, Zir code);
void semaDeinit(Sema* sema);
SemaFuncAirList semaAnalyze(Sema* sema);
void semaFuncAirListDeinit(SemaFuncAirList* list);
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -186,7 +186,7 @@ fn semaCheck(source: [:0]const u8) !SemaCheckResult {
defer c.zirDeinit(&c_zir);
var result: SemaCheckResult = undefined;
result.c_ip = c.ipInit();
- result.c_sema = c.semaInit(&result.c_ip, c_zir);
+ c.semaInit(&result.c_sema, &result.c_ip, c_zir);
result.c_func_air_list = c.semaAnalyze(&result.c_sema);
return result;
}
diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig
@@ -95,7 +95,8 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
var c_ip = sc.ipInit();
defer sc.ipDeinit(&c_ip);
- var c_sema = sc.semaInit(&c_ip, @bitCast(c_zir));
+ var c_sema: sc.Sema = undefined;
+ sc.semaInit(&c_sema, &c_ip, @bitCast(c_zir));
defer sc.semaDeinit(&c_sema);
c_sema.source_dir = source_dir_path.ptr;
c_sema.module_root = module_root_path.ptr;
@@ -106,7 +107,7 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
}
}
-const last_successful_corpus = "../lib/std/crypto/codecs.zig";
+const last_successful_corpus = "../lib/std/compress.zig";
// find ../{lib,src} -name '*.zig' | xargs -n1 stat -c "%s %n" | sort -n | awk '{printf " \""$2"\", // "$1"\n"}'
const corpus_files = .{
@@ -148,9 +149,9 @@ const corpus_files = .{
"../lib/compiler_rt/floatunsihf.zig", // 357
"../lib/compiler_rt/trunctfhf2.zig", // 359
"../lib/compiler_rt/extendsfxf2.zig", // 360
- //"../lib/compiler/aro/backend.zig", // 362
- //"../lib/compiler_rt/extenddfxf2.zig", // 364
- //"../lib/std/compress.zig", // 372
+ "../lib/compiler/aro/backend.zig", // 362
+ "../lib/compiler_rt/extenddfxf2.zig", // 364
+ "../lib/std/compress.zig", // 372
//"../lib/compiler_rt/extendhfdf2.zig", // 373
//"../lib/compiler_rt/extendhfxf2.zig", // 373
//"../lib/compiler_rt/extendhftf2.zig", // 376
diff --git a/stage0/zig0.c b/stage0/zig0.c
@@ -42,7 +42,8 @@ static int zig0Run(const char* program, bool verbose_air, char** msg) {
zir.inst_len, zir.extra_len, zir.string_bytes_len);
InternPool ip = ipInit();
- Sema sema = semaInit(&ip, zir);
+ Sema sema;
+ semaInit(&sema, &ip, zir);
SemaFuncAirList func_airs = semaAnalyze(&sema);
if (verbose_air)
verboseAirPrint(stderr, &func_airs, &ip);