sema: fix struct_info overflow, add generic param pre-emission; enable 4 corpus tests
Change semaInit to take Sema* (init in-place) to avoid stack corruption from returning large struct by value. Increase struct_info from [8] to [32]. Add name-based dead BLOCK pre-emission for generic param type resolution to match upstream AIR layout (extendsfxf2 fix). Newly enabled: extendsfxf2, backend, extenddfxf2, compress. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
199
stage0/sema.c
199
stage0/sema.c
@@ -25,23 +25,21 @@ static uint32_t simpleStringHash(const char* s) {
|
||||
#define MAX_EXPORTED_DECL_NAMES 16
|
||||
static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES];
|
||||
|
||||
Sema semaInit(InternPool* ip, Zir code) {
|
||||
Sema sema;
|
||||
memset(&sema, 0, sizeof(sema));
|
||||
sema.ip = ip;
|
||||
sema.code = code;
|
||||
sema.air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
|
||||
sema.air_inst_cap = SEMA_AIR_INITIAL_CAP;
|
||||
sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
|
||||
sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
|
||||
sema.air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP;
|
||||
sema.func_index = IP_INDEX_NONE;
|
||||
sema.fn_ret_ty = TYPE_NONE;
|
||||
sema.branch_quota = SEMA_DEFAULT_BRANCH_QUOTA;
|
||||
sema.allow_memoize = true;
|
||||
sema.branch_hint = -1;
|
||||
sema.num_ia = 0;
|
||||
return sema;
|
||||
void semaInit(Sema* sema, InternPool* ip, Zir code) {
|
||||
memset(sema, 0, sizeof(*sema));
|
||||
sema->ip = ip;
|
||||
sema->code = code;
|
||||
sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
|
||||
sema->air_inst_cap = SEMA_AIR_INITIAL_CAP;
|
||||
sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
|
||||
sema->air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
|
||||
sema->air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP;
|
||||
sema->func_index = IP_INDEX_NONE;
|
||||
sema->fn_ret_ty = TYPE_NONE;
|
||||
sema->branch_quota = SEMA_DEFAULT_BRANCH_QUOTA;
|
||||
sema->allow_memoize = true;
|
||||
sema->branch_hint = -1;
|
||||
sema->num_ia = 0;
|
||||
}
|
||||
|
||||
void semaDeinit(Sema* sema) {
|
||||
@@ -2522,7 +2520,7 @@ static InternPoolIndex registerStructTypeFromZir(
|
||||
InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey);
|
||||
|
||||
// Register struct field info.
|
||||
if (sema->num_struct_info >= 8)
|
||||
if (sema->num_struct_info >= 32)
|
||||
return IP_INDEX_VOID_TYPE;
|
||||
StructFieldInfo* info
|
||||
= &sema->struct_info[sema->num_struct_info++];
|
||||
@@ -3105,9 +3103,25 @@ static AirInstRef zirCall(
|
||||
if (strcmp(cn, "Int") == 0 || strcmp(cn, "Log2Int") == 0
|
||||
|| strcmp(cn, "PowerOfTwoSignificandZ") == 0
|
||||
|| strcmp(cn, "F16T") == 0) {
|
||||
AirInstData dead;
|
||||
memset(&dead, 0, sizeof(dead));
|
||||
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
|
||||
// Check if this function's dead block was pre-emitted
|
||||
// during generic param type resolution.
|
||||
bool skip_block = false;
|
||||
for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) {
|
||||
if (strcmp(sema->type_fn_to_skip[k], cn) == 0) {
|
||||
sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema
|
||||
->num_type_fn_to_skip];
|
||||
skip_block = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!skip_block) {
|
||||
AirInstData dead;
|
||||
memset(&dead, 0, sizeof(dead));
|
||||
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
|
||||
}
|
||||
// Track that this function has had its dead block created.
|
||||
if (sema->num_type_fn_created < 16)
|
||||
sema->type_fn_created[sema->num_type_fn_created++] = cn;
|
||||
// Resolve args and compute the type result.
|
||||
// Same logic as the returns_type handler below.
|
||||
AirInstRef ur_arg_refs[16];
|
||||
@@ -3228,6 +3242,10 @@ static AirInstRef zirCall(
|
||||
bool is_ct_param[16];
|
||||
memset(is_ct_param, 0, sizeof(is_ct_param));
|
||||
bool is_generic = false;
|
||||
// Track whether each param has a generic type (refers to previous
|
||||
// comptime params). Ported from Zir.Inst.Param.Type.is_generic.
|
||||
bool has_generic_type[16];
|
||||
memset(has_generic_type, 0, sizeof(has_generic_type));
|
||||
{
|
||||
uint32_t early_pb_inst
|
||||
= sema->code
|
||||
@@ -3247,11 +3265,123 @@ static AirInstRef zirCall(
|
||||
is_ct_param[pi] = true;
|
||||
is_generic = true;
|
||||
}
|
||||
// Check if param type is generic (refers to previous params).
|
||||
// Ported from Zir.Inst.Param: extra[payload+1] bit 31.
|
||||
if (ptag == ZIR_INST_PARAM
|
||||
|| ptag == ZIR_INST_PARAM_COMPTIME) {
|
||||
uint32_t ppl = sema->code.inst_datas[early_pb[p]]
|
||||
.pl_tok.payload_index;
|
||||
uint32_t type_raw = sema->code.extra[ppl + 1];
|
||||
if ((type_raw >> 31) & 1)
|
||||
has_generic_type[pi] = true;
|
||||
}
|
||||
pi++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ported from src/Sema.zig lines 7316-7353: generic param type
|
||||
// evaluation. In the upstream, generic param type bodies are evaluated
|
||||
// via resolveInlineBody in a comptime generic_block. For type bodies
|
||||
// containing calls to inline type-returning functions (e.g. Int),
|
||||
// this creates dead BLOCK instructions at the current AIR position
|
||||
// (before arg and inline body processing). If the type function was
|
||||
// already called earlier (memoized), no new block is created.
|
||||
// We simulate this by pre-emitting dead blocks only for type functions
|
||||
// that haven't been called yet, and skipping the corresponding
|
||||
// returns_type dead block during inline body processing.
|
||||
if (is_generic) {
|
||||
for (uint32_t a = 0; a < args_len; a++) {
|
||||
if (!is_ct_param[a] && has_generic_type[a]) {
|
||||
// Find the param ZIR instruction for arg 'a'.
|
||||
uint32_t early_pb_inst2
|
||||
= sema->code.extra[sema->code.inst_datas[func_inst]
|
||||
.pl_node.payload_index
|
||||
+ func_info.param_block_pi];
|
||||
const uint32_t* early_pb2;
|
||||
uint32_t early_pb_len2;
|
||||
getParamBody(sema, early_pb_inst2, &early_pb2, &early_pb_len2);
|
||||
uint32_t pi2 = 0;
|
||||
uint32_t param_zir = 0;
|
||||
for (uint32_t p2 = 0; p2 < early_pb_len2; p2++) {
|
||||
ZirInstTag pt2 = sema->code.inst_tags[early_pb2[p2]];
|
||||
if (pt2 == ZIR_INST_PARAM || pt2 == ZIR_INST_PARAM_COMPTIME
|
||||
|| pt2 == ZIR_INST_PARAM_ANYTYPE
|
||||
|| pt2 == ZIR_INST_PARAM_ANYTYPE_COMPTIME) {
|
||||
if (pi2 == a) {
|
||||
param_zir = early_pb2[p2];
|
||||
break;
|
||||
}
|
||||
pi2++;
|
||||
}
|
||||
}
|
||||
if (param_zir == 0)
|
||||
continue;
|
||||
uint32_t ppl
|
||||
= sema->code.inst_datas[param_zir].pl_tok.payload_index;
|
||||
uint32_t type_raw2 = sema->code.extra[ppl + 1];
|
||||
uint32_t tbody_len = type_raw2 & 0x7FFFFFFF;
|
||||
// Scan the param type body for call/field_call instructions
|
||||
// and extract the callee name.
|
||||
for (uint32_t ti = 0; ti < tbody_len; ti++) {
|
||||
uint32_t tzi = sema->code.extra[ppl + 2 + ti];
|
||||
if (tzi >= sema->code.inst_len)
|
||||
continue;
|
||||
ZirInstTag ttag = sema->code.inst_tags[tzi];
|
||||
const char* callee_name = NULL;
|
||||
if (ttag == ZIR_INST_FIELD_CALL) {
|
||||
uint32_t tpi
|
||||
= sema->code.inst_datas[tzi].pl_node.payload_index;
|
||||
uint32_t fn_start = sema->code.extra[tpi + 2];
|
||||
callee_name
|
||||
= (const char*)&sema->code.string_bytes[fn_start];
|
||||
} else if (ttag == ZIR_INST_CALL) {
|
||||
uint32_t tpi
|
||||
= sema->code.inst_datas[tzi].pl_node.payload_index;
|
||||
uint32_t cref = sema->code.extra[tpi + 1];
|
||||
if (cref >= ZIR_REF_START_INDEX) {
|
||||
uint32_t ci = cref - ZIR_REF_START_INDEX;
|
||||
ZirInstTag ctag = sema->code.inst_tags[ci];
|
||||
if (ctag == ZIR_INST_DECL_VAL
|
||||
|| ctag == ZIR_INST_DECL_REF) {
|
||||
callee_name = (const char*)&sema->code
|
||||
.string_bytes[sema->code
|
||||
.inst_datas[ci]
|
||||
.str_tok.start];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (callee_name == NULL)
|
||||
continue;
|
||||
if (strcmp(callee_name, "Int") != 0
|
||||
&& strcmp(callee_name, "Log2Int") != 0
|
||||
&& strcmp(callee_name, "PowerOfTwoSignificandZ") != 0
|
||||
&& strcmp(callee_name, "F16T") != 0)
|
||||
continue;
|
||||
// Check if this function already had a dead block
|
||||
// created (memoized in upstream).
|
||||
bool already_created = false;
|
||||
for (uint32_t k = 0; k < sema->num_type_fn_created; k++) {
|
||||
if (strcmp(sema->type_fn_created[k], callee_name)
|
||||
== 0) {
|
||||
already_created = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!already_created) {
|
||||
AirInstData dead;
|
||||
memset(&dead, 0, sizeof(dead));
|
||||
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead);
|
||||
if (sema->num_type_fn_to_skip < 4)
|
||||
sema->type_fn_to_skip[sema->num_type_fn_to_skip++]
|
||||
= callee_name;
|
||||
}
|
||||
break; // only handle first call in type body
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve the argument values (from the ORIGINAL module's ZIR).
|
||||
// Each arg has a body that produces the argument value via
|
||||
// break_inline.
|
||||
@@ -3334,10 +3464,29 @@ static AirInstRef zirCall(
|
||||
// returns_type functions return `type` which is comptime-only.
|
||||
// Upstream evaluates these in comptime context, so
|
||||
// need_debug_scope is always false → BLOCK tag.
|
||||
// Check if this function's dead block was pre-emitted during
|
||||
// generic param type resolution.
|
||||
{
|
||||
AirInstData rt_dead;
|
||||
memset(&rt_dead, 0, sizeof(rt_dead));
|
||||
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead);
|
||||
bool skip_block = false;
|
||||
if (type_fn_name) {
|
||||
for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) {
|
||||
if (strcmp(sema->type_fn_to_skip[k], type_fn_name) == 0) {
|
||||
sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema
|
||||
->num_type_fn_to_skip];
|
||||
skip_block = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!skip_block) {
|
||||
AirInstData rt_dead;
|
||||
memset(&rt_dead, 0, sizeof(rt_dead));
|
||||
(void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead);
|
||||
}
|
||||
// Track that this function has had its dead block created.
|
||||
if (type_fn_name && sema->num_type_fn_created < 16)
|
||||
sema->type_fn_created[sema->num_type_fn_created++]
|
||||
= type_fn_name;
|
||||
}
|
||||
InternPoolIndex result_type = IP_INDEX_NONE;
|
||||
|
||||
@@ -4946,7 +5095,7 @@ static InternPoolIndex ensureF80StructRegistered(Sema* sema) {
|
||||
pkey.data.ptr_type.flags = 0;
|
||||
InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey);
|
||||
|
||||
if (sema->num_struct_info >= 8)
|
||||
if (sema->num_struct_info >= 32)
|
||||
return IP_INDEX_VOID_TYPE;
|
||||
StructFieldInfo* info = &sema->struct_info[sema->num_struct_info++];
|
||||
info->struct_type = struct_ip;
|
||||
|
||||
@@ -219,10 +219,23 @@ typedef struct Sema {
|
||||
uint32_t memo_args_len[32];
|
||||
AirInstRef memo_result[32];
|
||||
uint32_t num_memo;
|
||||
// Track type-returning function names that have had dead BLOCK
|
||||
// instructions created (for memoization simulation).
|
||||
// When a returns_type function is called for the first time, a dead
|
||||
// BLOCK is emitted. Subsequent calls with the same name are
|
||||
// considered "memoized" and don't emit new blocks.
|
||||
const char* type_fn_created[16];
|
||||
uint32_t num_type_fn_created;
|
||||
// Names of type-returning functions pre-emitted during generic param
|
||||
// type resolution. The returns_type handler skips dead block creation
|
||||
// for these, since the block was already emitted at the correct
|
||||
// (earlier) position.
|
||||
const char* type_fn_to_skip[4];
|
||||
uint32_t num_type_fn_to_skip;
|
||||
// Known struct types with runtime field information.
|
||||
// Populated by zirCall when a call returns a struct type.
|
||||
// Used by zirFieldVal/zirFieldPtr for runtime field access.
|
||||
StructFieldInfo struct_info[8];
|
||||
StructFieldInfo struct_info[32];
|
||||
uint32_t num_struct_info;
|
||||
} Sema;
|
||||
|
||||
@@ -230,7 +243,7 @@ typedef struct Sema {
|
||||
|
||||
// --- Function declarations ---
|
||||
|
||||
Sema semaInit(InternPool* ip, Zir code);
|
||||
void semaInit(Sema* sema, InternPool* ip, Zir code);
|
||||
void semaDeinit(Sema* sema);
|
||||
SemaFuncAirList semaAnalyze(Sema* sema);
|
||||
void semaFuncAirListDeinit(SemaFuncAirList* list);
|
||||
|
||||
@@ -186,7 +186,7 @@ fn semaCheck(source: [:0]const u8) !SemaCheckResult {
|
||||
defer c.zirDeinit(&c_zir);
|
||||
var result: SemaCheckResult = undefined;
|
||||
result.c_ip = c.ipInit();
|
||||
result.c_sema = c.semaInit(&result.c_ip, c_zir);
|
||||
c.semaInit(&result.c_sema, &result.c_ip, c_zir);
|
||||
result.c_func_air_list = c.semaAnalyze(&result.c_sema);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -95,7 +95,8 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
|
||||
|
||||
var c_ip = sc.ipInit();
|
||||
defer sc.ipDeinit(&c_ip);
|
||||
var c_sema = sc.semaInit(&c_ip, @bitCast(c_zir));
|
||||
var c_sema: sc.Sema = undefined;
|
||||
sc.semaInit(&c_sema, &c_ip, @bitCast(c_zir));
|
||||
defer sc.semaDeinit(&c_sema);
|
||||
c_sema.source_dir = source_dir_path.ptr;
|
||||
c_sema.module_root = module_root_path.ptr;
|
||||
@@ -106,7 +107,7 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
|
||||
}
|
||||
}
|
||||
|
||||
const last_successful_corpus = "../lib/std/crypto/codecs.zig";
|
||||
const last_successful_corpus = "../lib/std/compress.zig";
|
||||
|
||||
// find ../{lib,src} -name '*.zig' | xargs -n1 stat -c "%s %n" | sort -n | awk '{printf " \""$2"\", // "$1"\n"}'
|
||||
const corpus_files = .{
|
||||
@@ -148,9 +149,9 @@ const corpus_files = .{
|
||||
"../lib/compiler_rt/floatunsihf.zig", // 357
|
||||
"../lib/compiler_rt/trunctfhf2.zig", // 359
|
||||
"../lib/compiler_rt/extendsfxf2.zig", // 360
|
||||
//"../lib/compiler/aro/backend.zig", // 362
|
||||
//"../lib/compiler_rt/extenddfxf2.zig", // 364
|
||||
//"../lib/std/compress.zig", // 372
|
||||
"../lib/compiler/aro/backend.zig", // 362
|
||||
"../lib/compiler_rt/extenddfxf2.zig", // 364
|
||||
"../lib/std/compress.zig", // 372
|
||||
//"../lib/compiler_rt/extendhfdf2.zig", // 373
|
||||
//"../lib/compiler_rt/extendhfxf2.zig", // 373
|
||||
//"../lib/compiler_rt/extendhftf2.zig", // 376
|
||||
|
||||
@@ -42,7 +42,8 @@ static int zig0Run(const char* program, bool verbose_air, char** msg) {
|
||||
zir.inst_len, zir.extra_len, zir.string_bytes_len);
|
||||
|
||||
InternPool ip = ipInit();
|
||||
Sema sema = semaInit(&ip, zir);
|
||||
Sema sema;
|
||||
semaInit(&sema, &ip, zir);
|
||||
SemaFuncAirList func_airs = semaAnalyze(&sema);
|
||||
if (verbose_air)
|
||||
verboseAirPrint(stderr, &func_airs, &ip);
|
||||
|
||||
Reference in New Issue
Block a user