From dfd0b9c736133e466f01f2b1dbaf248804f6ac2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 20 Feb 2026 11:20:16 +0000 Subject: [PATCH] sema: handle @export builtin and ZIR_INST_FUNC_FANCY Add three pieces of support needed for compiler_rt corpus files that use @export to make functions visible: - ZIR_INST_EXPORT handler in analyzeBodyInner: when processing a comptime block containing @export, extract the target declaration name from the ZIR_INST_DECL_REF/DECL_VAL instruction and record it for later use by zirFunc. - ZIR_INST_FUNC_FANCY payload parsing in zirFunc: functions with explicit calling conventions (callconv(.c)) use func_fancy instead of func/func_inferred. Parse the different extra layout {param_block, body_len, bits} with optional trailing cc, ret_ty, and noalias fields. - Export-aware function filtering: zirFunc now checks both the declaration-level export flag and the @export-collected names list when deciding whether to analyze a function body. The exported compiler_rt functions are now found and analyzed, but their bodies produce fewer AIR instructions than Zig because the C sema does not yet implement imports, field access, or function calls. Corpus test comments updated to reflect new status. Co-Authored-By: Claude Opus 4.6 --- stage0/sema.c | 131 +++++++++++++++++++++++++++++++++++------ stage0/sema.h | 5 ++ stage0/stages_test.zig | 10 ++-- 3 files changed, 123 insertions(+), 23 deletions(-) diff --git a/stage0/sema.c b/stage0/sema.c index 6f7d15ee20..164f6b8e82 100644 --- a/stage0/sema.c +++ b/stage0/sema.c @@ -8,6 +8,11 @@ #define SEMA_BLOCK_INITIAL_CAP 64 #define INST_MAP_INITIAL_CAP 32 +// Exported declaration names collected by ZIR_INST_EXPORT handler. +// Module-level state shared between analyzeBodyInner and zirFunc. +#define MAX_EXPORTED_DECL_NAMES 16 +static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES]; + Sema semaInit(InternPool* ip, Zir code) { Sema sema; memset(&sema, 0, sizeof(sema)); @@ -409,32 +414,90 @@ static void zirInt(Sema* sema, uint32_t inst) { } // zirFunc: analyze a function declaration. -// Ported from src/Sema.zig zirFunc / analyzeFnBodyInner (PerThread.zig). +// Ported from src/Sema.zig zirFunc / zirFuncFancy / analyzeFnBodyInner. +// Handles ZIR_INST_FUNC, ZIR_INST_FUNC_INFERRED, and ZIR_INST_FUNC_FANCY. // For the bootstrap, we only fully analyze exported functions with bodies. // The function body is analyzed in a fresh AIR context; the resulting // per-function Air is appended to sema->func_air_list. static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { (void)block; uint32_t payload_index = sema->code.inst_datas[inst].pl_node.payload_index; + ZirInstTag tag = sema->code.inst_tags[inst]; + bool is_fancy = (tag == ZIR_INST_FUNC_FANCY); - // Func extra: {ret_ty(u32), param_block(u32), body_len(u32)} - uint32_t ret_ty_raw = sema->code.extra[payload_index]; - uint32_t body_len = sema->code.extra[payload_index + 2]; - uint32_t ret_ty_body_len = ret_ty_raw & 0x7FFFFFFF; // low 31 bits + // Parse payload depending on variant. + // Func/FuncInferred: {ret_ty(u32), param_block(u32), body_len(u32)} + // FuncFancy: {param_block(u32), body_len(u32), bits(u32)} + uint32_t body_len; + uint32_t extra_index; // will point to body instructions + uint32_t ret_ty_body_len = 0; + uint32_t ret_ty_ref_pos = 0; // extra[] index of ret_ty ref (if len==1) + uint32_t param_block_pi; // payload offset of param_block field - bool has_body = (body_len != 0); + if (is_fancy) { + // FuncFancy extra: {param_block, body_len, bits} + body_len = sema->code.extra[payload_index + 1]; + uint32_t bits = sema->code.extra[payload_index + 2]; + param_block_pi = 0; // param_block at payload_index + 0 - // Skip trailing data to find the body instructions. - uint32_t extra_index = payload_index + 3; // past Func struct - if (ret_ty_body_len == 1) { - extra_index += 1; // single ref - } else if (ret_ty_body_len > 1) { - extra_index += ret_ty_body_len; // body + // Skip trailing optional data after the 3-word header. + extra_index = payload_index + 3; + bool has_cc_body = (bits & (1u << 4)) != 0; + bool has_cc_ref = (bits & (1u << 3)) != 0; + bool has_ret_ty_body = (bits & (1u << 6)) != 0; + bool has_ret_ty_ref = (bits & (1u << 5)) != 0; + bool has_any_noalias = (bits & (1u << 7)) != 0; + if (has_cc_body) { + uint32_t cc_body_len = sema->code.extra[extra_index]; + extra_index += 1 + cc_body_len; + } else if (has_cc_ref) { + extra_index += 1; // single ref + } + if (has_ret_ty_body) { + uint32_t rtb_len = sema->code.extra[extra_index]; + extra_index += 1 + rtb_len; + } else if (has_ret_ty_ref) { + ret_ty_body_len = 1; // single ref + ret_ty_ref_pos = extra_index; + extra_index += 1; + } + if (has_any_noalias) { + extra_index += 1; + } + } else { + // Func/FuncInferred extra: {ret_ty(u32), param_block(u32), + // body_len(u32)} + uint32_t ret_ty_raw = sema->code.extra[payload_index]; + body_len = sema->code.extra[payload_index + 2]; + ret_ty_body_len = ret_ty_raw & 0x7FFFFFFF; // low 31 bits + param_block_pi = 1; // param_block at payload_index + 1 + + extra_index = payload_index + 3; + if (ret_ty_body_len == 1) { + ret_ty_ref_pos = extra_index; + extra_index += 1; + } else if (ret_ty_body_len > 1) { + extra_index += ret_ty_body_len; + } } // extra_index now points to the body instructions. + bool has_body = (body_len != 0); + // Only analyze exported functions with bodies. - if (!has_body || !sema->cur_decl_is_export || !sema->func_air_list) + // A function is considered exported if either: + // 1. Its declaration has export linkage (cur_decl_is_export), or + // 2. It was referenced by an @export builtin (exported_decl_names). + bool is_exported = sema->cur_decl_is_export; + if (!is_exported) { + for (uint32_t e = 0; e < sema->num_exported_decl_names; e++) { + if (s_exported_decl_names[e] == sema->cur_decl_name) { + is_exported = true; + break; + } + } + } + if (!has_body || !is_exported || !sema->func_air_list) return; // --- Save the current AIR state --- @@ -466,8 +529,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { if (ret_ty_body_len == 0) { sema->fn_ret_ty = IP_INDEX_VOID_TYPE; } else if (ret_ty_body_len == 1) { - // Single ref at payload_index + 3 (already read past by extra_index). - ZirInstRef ret_ty_ref = sema->code.extra[payload_index + 3]; + ZirInstRef ret_ty_ref = sema->code.extra[ret_ty_ref_pos]; // For pre-interned refs, the ZIR ref == IP index. assert(ret_ty_ref < ZIR_REF_START_INDEX); sema->fn_ret_ty = ret_ty_ref; @@ -487,7 +549,8 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { // Ported from src/Zcu/PerThread.zig analyzeFnBodyInner (lines 2884-2940). // Get param body from param_block, emit AIR arg instructions, and map // param ZIR instructions to their corresponding AIR arg refs. - uint32_t param_block_inst = sema->code.extra[payload_index + 1]; + uint32_t param_block_inst + = sema->code.extra[payload_index + param_block_pi]; const uint32_t* param_body; uint32_t param_body_len; getParamBody(sema, param_block_inst, ¶m_body, ¶m_body_len); @@ -787,10 +850,11 @@ static bool analyzeBodyInner( return false; } - // func: function declaration. - // Ported from src/Sema.zig zirFunc. + // func/func_inferred/func_fancy: function declaration. + // Ported from src/Sema.zig zirFunc / zirFuncFancy. case ZIR_INST_FUNC: case ZIR_INST_FUNC_INFERRED: + case ZIR_INST_FUNC_FANCY: zirFunc(sema, block, inst); instMapPut( &sema->inst_map, inst, AIR_REF_FROM_IP(IP_INDEX_VOID_TYPE)); @@ -872,6 +936,37 @@ static bool analyzeBodyInner( i++; continue; + // export: @export builtin. Record the exported declaration name + // so that zirFunc can later analyze the target function's body. + // Ported from src/Sema.zig zirExport (subset: only extracts + // the declaration name from simple decl_ref/decl_val targets). + case ZIR_INST_EXPORT: { + uint32_t payload_index + = sema->code.inst_datas[inst].pl_node.payload_index; + uint32_t exported_ref = sema->code.extra[payload_index]; + if (exported_ref >= ZIR_REF_START_INDEX) { + uint32_t exported_inst + = exported_ref - ZIR_REF_START_INDEX; + ZirInstTag etag = sema->code.inst_tags[exported_inst]; + if (etag == ZIR_INST_DECL_REF + || etag == ZIR_INST_DECL_VAL) { + uint32_t name_idx + = sema->code.inst_datas[exported_inst] + .str_tok.start; + if (sema->num_exported_decl_names + < MAX_EXPORTED_DECL_NAMES) { + s_exported_decl_names + [sema->num_exported_decl_names++] = name_idx; + } + } + } + instMapPut( + &sema->inst_map, inst, + AIR_REF_FROM_IP(IP_INDEX_VOID_VALUE)); + i++; + continue; + } + // For all other instructions, produce a void mapping and skip. // As handlers are implemented, they will replace this default. default: { diff --git a/stage0/sema.h b/stage0/sema.h index de338829e7..b80571fd7f 100644 --- a/stage0/sema.h +++ b/stage0/sema.h @@ -150,6 +150,11 @@ typedef struct Sema { // set by zirStructDecl while iterating declarations. uint32_t cur_decl_name; // index into code.string_bytes; 0 = none bool cur_decl_is_export; + // Declaration names exported via @export builtin (ZIR_INST_EXPORT). + // Populated by analyzeBodyInner when processing comptime blocks + // that contain @export. Used by zirFunc to decide whether to analyze + // non-declaration-level-exported functions. + uint32_t num_exported_decl_names; } Sema; #define SEMA_DEFAULT_BRANCH_QUOTA 1000 diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig index a92a0bc45e..da175c9040 100644 --- a/stage0/stages_test.zig +++ b/stage0/stages_test.zig @@ -97,11 +97,11 @@ const corpus_files = .{ "../lib/std/crypto/codecs.zig", // 165 "../lib/std/os/uefi/tables/table_header.zig", // 214 "../lib/std/zig/llvm.zig", // 247 - //"../lib/compiler_rt/neghf2.zig", // 265 -- disabled: @export produces Zig AIR, C sema not yet ported - //"../lib/compiler_rt/negxf2.zig", // 265 -- disabled: @export produces Zig AIR, C sema not yet ported - //"../lib/compiler_rt/absvdi2.zig", // 311 -- disabled: @export produces Zig AIR, C sema not yet ported - //"../lib/compiler_rt/absvsi2.zig", // 311 -- disabled: @export produces Zig AIR, C sema not yet ported - //"../lib/compiler_rt/absvti2.zig", // 314 -- disabled: @export produces Zig AIR, C sema not yet ported + //"../lib/compiler_rt/neghf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete + //"../lib/compiler_rt/negxf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete + //"../lib/compiler_rt/absvdi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete + //"../lib/compiler_rt/absvsi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete + //"../lib/compiler_rt/absvti2.zig", // 314 -- @export+func_fancy handled; body analysis incomplete //"../lib/compiler_rt/addhf3.zig", // 319 //"../lib/compiler_rt/addxf3.zig", // 323 //"../lib/compiler_rt/mulhf3.zig", // 323