sema: handle @export builtin and ZIR_INST_FUNC_FANCY
Add three pieces of support needed for compiler_rt corpus files that
use @export to make functions visible:
- ZIR_INST_EXPORT handler in analyzeBodyInner: when processing a
comptime block containing @export, extract the target declaration
name from the ZIR_INST_DECL_REF/DECL_VAL instruction and record it
for later use by zirFunc.
- ZIR_INST_FUNC_FANCY payload parsing in zirFunc: functions with
explicit calling conventions (callconv(.c)) use func_fancy instead
of func/func_inferred. Parse the different extra layout
{param_block, body_len, bits} with optional trailing cc, ret_ty,
and noalias fields.
- Export-aware function filtering: zirFunc now checks both the
declaration-level export flag and the @export-collected names list
when deciding whether to analyze a function body.
The exported compiler_rt functions are now found and analyzed, but
their bodies produce fewer AIR instructions than Zig because the C
sema does not yet implement imports, field access, or function calls.
Corpus test comments updated to reflect new status.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
131
stage0/sema.c
131
stage0/sema.c
@@ -8,6 +8,11 @@
|
||||
#define SEMA_BLOCK_INITIAL_CAP 64
|
||||
#define INST_MAP_INITIAL_CAP 32
|
||||
|
||||
// Exported declaration names collected by ZIR_INST_EXPORT handler.
|
||||
// Module-level state shared between analyzeBodyInner and zirFunc.
|
||||
#define MAX_EXPORTED_DECL_NAMES 16
|
||||
static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES];
|
||||
|
||||
Sema semaInit(InternPool* ip, Zir code) {
|
||||
Sema sema;
|
||||
memset(&sema, 0, sizeof(sema));
|
||||
@@ -409,32 +414,90 @@ static void zirInt(Sema* sema, uint32_t inst) {
|
||||
}
|
||||
|
||||
// zirFunc: analyze a function declaration.
|
||||
// Ported from src/Sema.zig zirFunc / analyzeFnBodyInner (PerThread.zig).
|
||||
// Ported from src/Sema.zig zirFunc / zirFuncFancy / analyzeFnBodyInner.
|
||||
// Handles ZIR_INST_FUNC, ZIR_INST_FUNC_INFERRED, and ZIR_INST_FUNC_FANCY.
|
||||
// For the bootstrap, we only fully analyze exported functions with bodies.
|
||||
// The function body is analyzed in a fresh AIR context; the resulting
|
||||
// per-function Air is appended to sema->func_air_list.
|
||||
static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
|
||||
(void)block;
|
||||
uint32_t payload_index = sema->code.inst_datas[inst].pl_node.payload_index;
|
||||
ZirInstTag tag = sema->code.inst_tags[inst];
|
||||
bool is_fancy = (tag == ZIR_INST_FUNC_FANCY);
|
||||
|
||||
// Func extra: {ret_ty(u32), param_block(u32), body_len(u32)}
|
||||
uint32_t ret_ty_raw = sema->code.extra[payload_index];
|
||||
uint32_t body_len = sema->code.extra[payload_index + 2];
|
||||
uint32_t ret_ty_body_len = ret_ty_raw & 0x7FFFFFFF; // low 31 bits
|
||||
// Parse payload depending on variant.
|
||||
// Func/FuncInferred: {ret_ty(u32), param_block(u32), body_len(u32)}
|
||||
// FuncFancy: {param_block(u32), body_len(u32), bits(u32)}
|
||||
uint32_t body_len;
|
||||
uint32_t extra_index; // will point to body instructions
|
||||
uint32_t ret_ty_body_len = 0;
|
||||
uint32_t ret_ty_ref_pos = 0; // extra[] index of ret_ty ref (if len==1)
|
||||
uint32_t param_block_pi; // payload offset of param_block field
|
||||
|
||||
bool has_body = (body_len != 0);
|
||||
if (is_fancy) {
|
||||
// FuncFancy extra: {param_block, body_len, bits}
|
||||
body_len = sema->code.extra[payload_index + 1];
|
||||
uint32_t bits = sema->code.extra[payload_index + 2];
|
||||
param_block_pi = 0; // param_block at payload_index + 0
|
||||
|
||||
// Skip trailing data to find the body instructions.
|
||||
uint32_t extra_index = payload_index + 3; // past Func struct
|
||||
if (ret_ty_body_len == 1) {
|
||||
extra_index += 1; // single ref
|
||||
} else if (ret_ty_body_len > 1) {
|
||||
extra_index += ret_ty_body_len; // body
|
||||
// Skip trailing optional data after the 3-word header.
|
||||
extra_index = payload_index + 3;
|
||||
bool has_cc_body = (bits & (1u << 4)) != 0;
|
||||
bool has_cc_ref = (bits & (1u << 3)) != 0;
|
||||
bool has_ret_ty_body = (bits & (1u << 6)) != 0;
|
||||
bool has_ret_ty_ref = (bits & (1u << 5)) != 0;
|
||||
bool has_any_noalias = (bits & (1u << 7)) != 0;
|
||||
if (has_cc_body) {
|
||||
uint32_t cc_body_len = sema->code.extra[extra_index];
|
||||
extra_index += 1 + cc_body_len;
|
||||
} else if (has_cc_ref) {
|
||||
extra_index += 1; // single ref
|
||||
}
|
||||
if (has_ret_ty_body) {
|
||||
uint32_t rtb_len = sema->code.extra[extra_index];
|
||||
extra_index += 1 + rtb_len;
|
||||
} else if (has_ret_ty_ref) {
|
||||
ret_ty_body_len = 1; // single ref
|
||||
ret_ty_ref_pos = extra_index;
|
||||
extra_index += 1;
|
||||
}
|
||||
if (has_any_noalias) {
|
||||
extra_index += 1;
|
||||
}
|
||||
} else {
|
||||
// Func/FuncInferred extra: {ret_ty(u32), param_block(u32),
|
||||
// body_len(u32)}
|
||||
uint32_t ret_ty_raw = sema->code.extra[payload_index];
|
||||
body_len = sema->code.extra[payload_index + 2];
|
||||
ret_ty_body_len = ret_ty_raw & 0x7FFFFFFF; // low 31 bits
|
||||
param_block_pi = 1; // param_block at payload_index + 1
|
||||
|
||||
extra_index = payload_index + 3;
|
||||
if (ret_ty_body_len == 1) {
|
||||
ret_ty_ref_pos = extra_index;
|
||||
extra_index += 1;
|
||||
} else if (ret_ty_body_len > 1) {
|
||||
extra_index += ret_ty_body_len;
|
||||
}
|
||||
}
|
||||
// extra_index now points to the body instructions.
|
||||
|
||||
bool has_body = (body_len != 0);
|
||||
|
||||
// Only analyze exported functions with bodies.
|
||||
if (!has_body || !sema->cur_decl_is_export || !sema->func_air_list)
|
||||
// A function is considered exported if either:
|
||||
// 1. Its declaration has export linkage (cur_decl_is_export), or
|
||||
// 2. It was referenced by an @export builtin (exported_decl_names).
|
||||
bool is_exported = sema->cur_decl_is_export;
|
||||
if (!is_exported) {
|
||||
for (uint32_t e = 0; e < sema->num_exported_decl_names; e++) {
|
||||
if (s_exported_decl_names[e] == sema->cur_decl_name) {
|
||||
is_exported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!has_body || !is_exported || !sema->func_air_list)
|
||||
return;
|
||||
|
||||
// --- Save the current AIR state ---
|
||||
@@ -466,8 +529,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
|
||||
if (ret_ty_body_len == 0) {
|
||||
sema->fn_ret_ty = IP_INDEX_VOID_TYPE;
|
||||
} else if (ret_ty_body_len == 1) {
|
||||
// Single ref at payload_index + 3 (already read past by extra_index).
|
||||
ZirInstRef ret_ty_ref = sema->code.extra[payload_index + 3];
|
||||
ZirInstRef ret_ty_ref = sema->code.extra[ret_ty_ref_pos];
|
||||
// For pre-interned refs, the ZIR ref == IP index.
|
||||
assert(ret_ty_ref < ZIR_REF_START_INDEX);
|
||||
sema->fn_ret_ty = ret_ty_ref;
|
||||
@@ -487,7 +549,8 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
|
||||
// Ported from src/Zcu/PerThread.zig analyzeFnBodyInner (lines 2884-2940).
|
||||
// Get param body from param_block, emit AIR arg instructions, and map
|
||||
// param ZIR instructions to their corresponding AIR arg refs.
|
||||
uint32_t param_block_inst = sema->code.extra[payload_index + 1];
|
||||
uint32_t param_block_inst
|
||||
= sema->code.extra[payload_index + param_block_pi];
|
||||
const uint32_t* param_body;
|
||||
uint32_t param_body_len;
|
||||
getParamBody(sema, param_block_inst, ¶m_body, ¶m_body_len);
|
||||
@@ -787,10 +850,11 @@ static bool analyzeBodyInner(
|
||||
return false;
|
||||
}
|
||||
|
||||
// func: function declaration.
|
||||
// Ported from src/Sema.zig zirFunc.
|
||||
// func/func_inferred/func_fancy: function declaration.
|
||||
// Ported from src/Sema.zig zirFunc / zirFuncFancy.
|
||||
case ZIR_INST_FUNC:
|
||||
case ZIR_INST_FUNC_INFERRED:
|
||||
case ZIR_INST_FUNC_FANCY:
|
||||
zirFunc(sema, block, inst);
|
||||
instMapPut(
|
||||
&sema->inst_map, inst, AIR_REF_FROM_IP(IP_INDEX_VOID_TYPE));
|
||||
@@ -872,6 +936,37 @@ static bool analyzeBodyInner(
|
||||
i++;
|
||||
continue;
|
||||
|
||||
// export: @export builtin. Record the exported declaration name
|
||||
// so that zirFunc can later analyze the target function's body.
|
||||
// Ported from src/Sema.zig zirExport (subset: only extracts
|
||||
// the declaration name from simple decl_ref/decl_val targets).
|
||||
case ZIR_INST_EXPORT: {
|
||||
uint32_t payload_index
|
||||
= sema->code.inst_datas[inst].pl_node.payload_index;
|
||||
uint32_t exported_ref = sema->code.extra[payload_index];
|
||||
if (exported_ref >= ZIR_REF_START_INDEX) {
|
||||
uint32_t exported_inst
|
||||
= exported_ref - ZIR_REF_START_INDEX;
|
||||
ZirInstTag etag = sema->code.inst_tags[exported_inst];
|
||||
if (etag == ZIR_INST_DECL_REF
|
||||
|| etag == ZIR_INST_DECL_VAL) {
|
||||
uint32_t name_idx
|
||||
= sema->code.inst_datas[exported_inst]
|
||||
.str_tok.start;
|
||||
if (sema->num_exported_decl_names
|
||||
< MAX_EXPORTED_DECL_NAMES) {
|
||||
s_exported_decl_names
|
||||
[sema->num_exported_decl_names++] = name_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
instMapPut(
|
||||
&sema->inst_map, inst,
|
||||
AIR_REF_FROM_IP(IP_INDEX_VOID_VALUE));
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// For all other instructions, produce a void mapping and skip.
|
||||
// As handlers are implemented, they will replace this default.
|
||||
default: {
|
||||
|
||||
@@ -150,6 +150,11 @@ typedef struct Sema {
|
||||
// set by zirStructDecl while iterating declarations.
|
||||
uint32_t cur_decl_name; // index into code.string_bytes; 0 = none
|
||||
bool cur_decl_is_export;
|
||||
// Declaration names exported via @export builtin (ZIR_INST_EXPORT).
|
||||
// Populated by analyzeBodyInner when processing comptime blocks
|
||||
// that contain @export. Used by zirFunc to decide whether to analyze
|
||||
// non-declaration-level-exported functions.
|
||||
uint32_t num_exported_decl_names;
|
||||
} Sema;
|
||||
|
||||
#define SEMA_DEFAULT_BRANCH_QUOTA 1000
|
||||
|
||||
@@ -97,11 +97,11 @@ const corpus_files = .{
|
||||
"../lib/std/crypto/codecs.zig", // 165
|
||||
"../lib/std/os/uefi/tables/table_header.zig", // 214
|
||||
"../lib/std/zig/llvm.zig", // 247
|
||||
//"../lib/compiler_rt/neghf2.zig", // 265 -- disabled: @export produces Zig AIR, C sema not yet ported
|
||||
//"../lib/compiler_rt/negxf2.zig", // 265 -- disabled: @export produces Zig AIR, C sema not yet ported
|
||||
//"../lib/compiler_rt/absvdi2.zig", // 311 -- disabled: @export produces Zig AIR, C sema not yet ported
|
||||
//"../lib/compiler_rt/absvsi2.zig", // 311 -- disabled: @export produces Zig AIR, C sema not yet ported
|
||||
//"../lib/compiler_rt/absvti2.zig", // 314 -- disabled: @export produces Zig AIR, C sema not yet ported
|
||||
//"../lib/compiler_rt/neghf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete
|
||||
//"../lib/compiler_rt/negxf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete
|
||||
//"../lib/compiler_rt/absvdi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete
|
||||
//"../lib/compiler_rt/absvsi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete
|
||||
//"../lib/compiler_rt/absvti2.zig", // 314 -- @export+func_fancy handled; body analysis incomplete
|
||||
//"../lib/compiler_rt/addhf3.zig", // 319
|
||||
//"../lib/compiler_rt/addxf3.zig", // 323
|
||||
//"../lib/compiler_rt/mulhf3.zig", // 323
|
||||
|
||||
Reference in New Issue
Block a user