diff --git a/stage0/sema.c b/stage0/sema.c index 8fb4b4a0a8..60ff2dbdf7 100644 --- a/stage0/sema.c +++ b/stage0/sema.c @@ -25,23 +25,21 @@ static uint32_t simpleStringHash(const char* s) { #define MAX_EXPORTED_DECL_NAMES 16 static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES]; -Sema semaInit(InternPool* ip, Zir code) { - Sema sema; - memset(&sema, 0, sizeof(sema)); - sema.ip = ip; - sema.code = code; - sema.air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP); - sema.air_inst_cap = SEMA_AIR_INITIAL_CAP; - sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP); - sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP); - sema.air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP; - sema.func_index = IP_INDEX_NONE; - sema.fn_ret_ty = TYPE_NONE; - sema.branch_quota = SEMA_DEFAULT_BRANCH_QUOTA; - sema.allow_memoize = true; - sema.branch_hint = -1; - sema.num_ia = 0; - return sema; +void semaInit(Sema* sema, InternPool* ip, Zir code) { + memset(sema, 0, sizeof(*sema)); + sema->ip = ip; + sema->code = code; + sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP); + sema->air_inst_cap = SEMA_AIR_INITIAL_CAP; + sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP); + sema->air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP); + sema->air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP; + sema->func_index = IP_INDEX_NONE; + sema->fn_ret_ty = TYPE_NONE; + sema->branch_quota = SEMA_DEFAULT_BRANCH_QUOTA; + sema->allow_memoize = true; + sema->branch_hint = -1; + sema->num_ia = 0; } void semaDeinit(Sema* sema) { @@ -2522,7 +2520,7 @@ static InternPoolIndex registerStructTypeFromZir( InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey); // Register struct field info. - if (sema->num_struct_info >= 8) + if (sema->num_struct_info >= 32) return IP_INDEX_VOID_TYPE; StructFieldInfo* info = &sema->struct_info[sema->num_struct_info++]; @@ -3105,9 +3103,25 @@ static AirInstRef zirCall( if (strcmp(cn, "Int") == 0 || strcmp(cn, "Log2Int") == 0 || strcmp(cn, "PowerOfTwoSignificandZ") == 0 || strcmp(cn, "F16T") == 0) { - AirInstData dead; - memset(&dead, 0, sizeof(dead)); - (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead); + // Check if this function's dead block was pre-emitted + // during generic param type resolution. + bool skip_block = false; + for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) { + if (strcmp(sema->type_fn_to_skip[k], cn) == 0) { + sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema + ->num_type_fn_to_skip]; + skip_block = true; + break; + } + } + if (!skip_block) { + AirInstData dead; + memset(&dead, 0, sizeof(dead)); + (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead); + } + // Track that this function has had its dead block created. + if (sema->num_type_fn_created < 16) + sema->type_fn_created[sema->num_type_fn_created++] = cn; // Resolve args and compute the type result. // Same logic as the returns_type handler below. AirInstRef ur_arg_refs[16]; @@ -3228,6 +3242,10 @@ static AirInstRef zirCall( bool is_ct_param[16]; memset(is_ct_param, 0, sizeof(is_ct_param)); bool is_generic = false; + // Track whether each param has a generic type (refers to previous + // comptime params). Ported from Zir.Inst.Param.Type.is_generic. + bool has_generic_type[16]; + memset(has_generic_type, 0, sizeof(has_generic_type)); { uint32_t early_pb_inst = sema->code @@ -3247,11 +3265,123 @@ static AirInstRef zirCall( is_ct_param[pi] = true; is_generic = true; } + // Check if param type is generic (refers to previous params). + // Ported from Zir.Inst.Param: extra[payload+1] bit 31. + if (ptag == ZIR_INST_PARAM + || ptag == ZIR_INST_PARAM_COMPTIME) { + uint32_t ppl = sema->code.inst_datas[early_pb[p]] + .pl_tok.payload_index; + uint32_t type_raw = sema->code.extra[ppl + 1]; + if ((type_raw >> 31) & 1) + has_generic_type[pi] = true; + } pi++; } } } + // Ported from src/Sema.zig lines 7316-7353: generic param type + // evaluation. In the upstream, generic param type bodies are evaluated + // via resolveInlineBody in a comptime generic_block. For type bodies + // containing calls to inline type-returning functions (e.g. Int), + // this creates dead BLOCK instructions at the current AIR position + // (before arg and inline body processing). If the type function was + // already called earlier (memoized), no new block is created. + // We simulate this by pre-emitting dead blocks only for type functions + // that haven't been called yet, and skipping the corresponding + // returns_type dead block during inline body processing. + if (is_generic) { + for (uint32_t a = 0; a < args_len; a++) { + if (!is_ct_param[a] && has_generic_type[a]) { + // Find the param ZIR instruction for arg 'a'. + uint32_t early_pb_inst2 + = sema->code.extra[sema->code.inst_datas[func_inst] + .pl_node.payload_index + + func_info.param_block_pi]; + const uint32_t* early_pb2; + uint32_t early_pb_len2; + getParamBody(sema, early_pb_inst2, &early_pb2, &early_pb_len2); + uint32_t pi2 = 0; + uint32_t param_zir = 0; + for (uint32_t p2 = 0; p2 < early_pb_len2; p2++) { + ZirInstTag pt2 = sema->code.inst_tags[early_pb2[p2]]; + if (pt2 == ZIR_INST_PARAM || pt2 == ZIR_INST_PARAM_COMPTIME + || pt2 == ZIR_INST_PARAM_ANYTYPE + || pt2 == ZIR_INST_PARAM_ANYTYPE_COMPTIME) { + if (pi2 == a) { + param_zir = early_pb2[p2]; + break; + } + pi2++; + } + } + if (param_zir == 0) + continue; + uint32_t ppl + = sema->code.inst_datas[param_zir].pl_tok.payload_index; + uint32_t type_raw2 = sema->code.extra[ppl + 1]; + uint32_t tbody_len = type_raw2 & 0x7FFFFFFF; + // Scan the param type body for call/field_call instructions + // and extract the callee name. + for (uint32_t ti = 0; ti < tbody_len; ti++) { + uint32_t tzi = sema->code.extra[ppl + 2 + ti]; + if (tzi >= sema->code.inst_len) + continue; + ZirInstTag ttag = sema->code.inst_tags[tzi]; + const char* callee_name = NULL; + if (ttag == ZIR_INST_FIELD_CALL) { + uint32_t tpi + = sema->code.inst_datas[tzi].pl_node.payload_index; + uint32_t fn_start = sema->code.extra[tpi + 2]; + callee_name + = (const char*)&sema->code.string_bytes[fn_start]; + } else if (ttag == ZIR_INST_CALL) { + uint32_t tpi + = sema->code.inst_datas[tzi].pl_node.payload_index; + uint32_t cref = sema->code.extra[tpi + 1]; + if (cref >= ZIR_REF_START_INDEX) { + uint32_t ci = cref - ZIR_REF_START_INDEX; + ZirInstTag ctag = sema->code.inst_tags[ci]; + if (ctag == ZIR_INST_DECL_VAL + || ctag == ZIR_INST_DECL_REF) { + callee_name = (const char*)&sema->code + .string_bytes[sema->code + .inst_datas[ci] + .str_tok.start]; + } + } + } + if (callee_name == NULL) + continue; + if (strcmp(callee_name, "Int") != 0 + && strcmp(callee_name, "Log2Int") != 0 + && strcmp(callee_name, "PowerOfTwoSignificandZ") != 0 + && strcmp(callee_name, "F16T") != 0) + continue; + // Check if this function already had a dead block + // created (memoized in upstream). + bool already_created = false; + for (uint32_t k = 0; k < sema->num_type_fn_created; k++) { + if (strcmp(sema->type_fn_created[k], callee_name) + == 0) { + already_created = true; + break; + } + } + if (!already_created) { + AirInstData dead; + memset(&dead, 0, sizeof(dead)); + (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, dead); + if (sema->num_type_fn_to_skip < 4) + sema->type_fn_to_skip[sema->num_type_fn_to_skip++] + = callee_name; + } + break; // only handle first call in type body + } + } + } + } + // Resolve the argument values (from the ORIGINAL module's ZIR). // Each arg has a body that produces the argument value via // break_inline. @@ -3334,10 +3464,29 @@ static AirInstRef zirCall( // returns_type functions return `type` which is comptime-only. // Upstream evaluates these in comptime context, so // need_debug_scope is always false → BLOCK tag. + // Check if this function's dead block was pre-emitted during + // generic param type resolution. { - AirInstData rt_dead; - memset(&rt_dead, 0, sizeof(rt_dead)); - (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead); + bool skip_block = false; + if (type_fn_name) { + for (uint32_t k = 0; k < sema->num_type_fn_to_skip; k++) { + if (strcmp(sema->type_fn_to_skip[k], type_fn_name) == 0) { + sema->type_fn_to_skip[k] = sema->type_fn_to_skip[--sema + ->num_type_fn_to_skip]; + skip_block = true; + break; + } + } + } + if (!skip_block) { + AirInstData rt_dead; + memset(&rt_dead, 0, sizeof(rt_dead)); + (void)semaAddInstAsIndex(sema, AIR_INST_BLOCK, rt_dead); + } + // Track that this function has had its dead block created. + if (type_fn_name && sema->num_type_fn_created < 16) + sema->type_fn_created[sema->num_type_fn_created++] + = type_fn_name; } InternPoolIndex result_type = IP_INDEX_NONE; @@ -4946,7 +5095,7 @@ static InternPoolIndex ensureF80StructRegistered(Sema* sema) { pkey.data.ptr_type.flags = 0; InternPoolIndex ptr_ip = ipIntern(sema->ip, pkey); - if (sema->num_struct_info >= 8) + if (sema->num_struct_info >= 32) return IP_INDEX_VOID_TYPE; StructFieldInfo* info = &sema->struct_info[sema->num_struct_info++]; info->struct_type = struct_ip; diff --git a/stage0/sema.h b/stage0/sema.h index 3f79390262..e8a8e21433 100644 --- a/stage0/sema.h +++ b/stage0/sema.h @@ -219,10 +219,23 @@ typedef struct Sema { uint32_t memo_args_len[32]; AirInstRef memo_result[32]; uint32_t num_memo; + // Track type-returning function names that have had dead BLOCK + // instructions created (for memoization simulation). + // When a returns_type function is called for the first time, a dead + // BLOCK is emitted. Subsequent calls with the same name are + // considered "memoized" and don't emit new blocks. + const char* type_fn_created[16]; + uint32_t num_type_fn_created; + // Names of type-returning functions pre-emitted during generic param + // type resolution. The returns_type handler skips dead block creation + // for these, since the block was already emitted at the correct + // (earlier) position. + const char* type_fn_to_skip[4]; + uint32_t num_type_fn_to_skip; // Known struct types with runtime field information. // Populated by zirCall when a call returns a struct type. // Used by zirFieldVal/zirFieldPtr for runtime field access. - StructFieldInfo struct_info[8]; + StructFieldInfo struct_info[32]; uint32_t num_struct_info; } Sema; @@ -230,7 +243,7 @@ typedef struct Sema { // --- Function declarations --- -Sema semaInit(InternPool* ip, Zir code); +void semaInit(Sema* sema, InternPool* ip, Zir code); void semaDeinit(Sema* sema); SemaFuncAirList semaAnalyze(Sema* sema); void semaFuncAirListDeinit(SemaFuncAirList* list); diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig index 1dbeef0245..bf47e878cd 100644 --- a/stage0/sema_test.zig +++ b/stage0/sema_test.zig @@ -186,7 +186,7 @@ fn semaCheck(source: [:0]const u8) !SemaCheckResult { defer c.zirDeinit(&c_zir); var result: SemaCheckResult = undefined; result.c_ip = c.ipInit(); - result.c_sema = c.semaInit(&result.c_ip, c_zir); + c.semaInit(&result.c_sema, &result.c_ip, c_zir); result.c_func_air_list = c.semaAnalyze(&result.c_sema); return result; } diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig index 1dbf97f024..036e7cec79 100644 --- a/stage0/stages_test.zig +++ b/stage0/stages_test.zig @@ -95,7 +95,8 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8) var c_ip = sc.ipInit(); defer sc.ipDeinit(&c_ip); - var c_sema = sc.semaInit(&c_ip, @bitCast(c_zir)); + var c_sema: sc.Sema = undefined; + sc.semaInit(&c_sema, &c_ip, @bitCast(c_zir)); defer sc.semaDeinit(&c_sema); c_sema.source_dir = source_dir_path.ptr; c_sema.module_root = module_root_path.ptr; @@ -106,7 +107,7 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8) } } -const last_successful_corpus = "../lib/std/crypto/codecs.zig"; +const last_successful_corpus = "../lib/std/compress.zig"; // find ../{lib,src} -name '*.zig' | xargs -n1 stat -c "%s %n" | sort -n | awk '{printf " \""$2"\", // "$1"\n"}' const corpus_files = .{ @@ -148,9 +149,9 @@ const corpus_files = .{ "../lib/compiler_rt/floatunsihf.zig", // 357 "../lib/compiler_rt/trunctfhf2.zig", // 359 "../lib/compiler_rt/extendsfxf2.zig", // 360 - //"../lib/compiler/aro/backend.zig", // 362 - //"../lib/compiler_rt/extenddfxf2.zig", // 364 - //"../lib/std/compress.zig", // 372 + "../lib/compiler/aro/backend.zig", // 362 + "../lib/compiler_rt/extenddfxf2.zig", // 364 + "../lib/std/compress.zig", // 372 //"../lib/compiler_rt/extendhfdf2.zig", // 373 //"../lib/compiler_rt/extendhfxf2.zig", // 373 //"../lib/compiler_rt/extendhftf2.zig", // 376 diff --git a/stage0/zig0.c b/stage0/zig0.c index 6739dfeaa9..4c9f3c552d 100644 --- a/stage0/zig0.c +++ b/stage0/zig0.c @@ -42,7 +42,8 @@ static int zig0Run(const char* program, bool verbose_air, char** msg) { zir.inst_len, zir.extra_len, zir.string_bytes_len); InternPool ip = ipInit(); - Sema sema = semaInit(&ip, zir); + Sema sema; + semaInit(&sema, &ip, zir); SemaFuncAirList func_airs = semaAnalyze(&sema); if (verbose_air) verboseAirPrint(stderr, &func_airs, &ip);