sema: add inline function call support and declaration table

Port same-file inline function call infrastructure from upstream:
- parseFuncZir to extract ZIR body/ret-type info for func/func_fancy
- zirCall for same-file inline function calls (decl_val → func lookup →
  inline body analysis with dbg_inline_block, dbg_arg_inline, br)
- Declaration table (decl_names/decl_insts) built by zirStructDecl
- decl_val/decl_ref dispatch, field_call dispatch
- restore_err_ret_index as no-op
- DBG_INLINE_BLOCK/BLOCK in semaTypeOf
- DBG_INLINE_BLOCK extra canonicalization in test comparison

Add unit tests: same-file inline call, inline call with bitcast+xor,
inline call with two args.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-20 14:57:39 +00:00
parent 93c97b42d1
commit a3dd84ede0
4 changed files with 459 additions and 9 deletions

View File

@@ -480,7 +480,7 @@ static TypeIndex semaTypeOf(Sema* sema, AirInstRef ref) {
case AIR_INST_CMP_GT:
case AIR_INST_CMP_NEQ:
return IP_INDEX_BOOL_TYPE;
// ty_op: type from ty_ref field.
// ty_op / ty_pl: type from ty_ref field.
case AIR_INST_BITCAST:
case AIR_INST_INTCAST:
case AIR_INST_TRUNC:
@@ -494,6 +494,8 @@ static TypeIndex semaTypeOf(Sema* sema, AirInstRef ref) {
case AIR_INST_CTZ:
case AIR_INST_POPCOUNT:
case AIR_INST_BYTE_SWAP:
case AIR_INST_DBG_INLINE_BLOCK:
case AIR_INST_BLOCK:
return AIR_REF_TO_IP(sema->air_inst_datas[inst_idx].ty_op.ty_ref);
default:
assert(0 && "semaTypeOf: unhandled AIR tag");
@@ -918,6 +920,337 @@ static void zirInt(Sema* sema, uint32_t inst) {
instMapPut(&sema->inst_map, inst, AIR_REF_FROM_IP(ip_index));
}
// FuncZirInfo: parsed ZIR info for a func/func_fancy instruction.
// Holds pointers into the ZIR extra data.
typedef struct {
uint32_t body_len;
uint32_t extra_index; // points to body instructions in code.extra
uint32_t ret_ty_body_len;
uint32_t ret_ty_ref_pos;
uint32_t param_block_pi; // offset from payload_index for param_block
bool is_fancy;
bool has_cc_body; // for func_fancy: whether cc body is present
} FuncZirInfo;
// parseFuncZir: parse a func/func_fancy ZIR instruction into FuncZirInfo.
// Ported from the parsing portion of zirFunc.
static FuncZirInfo parseFuncZir(Sema* sema, uint32_t inst) {
uint32_t payload_index
= sema->code.inst_datas[inst].pl_node.payload_index;
ZirInstTag tag = sema->code.inst_tags[inst];
FuncZirInfo info;
memset(&info, 0, sizeof(info));
info.is_fancy = (tag == ZIR_INST_FUNC_FANCY);
if (info.is_fancy) {
info.body_len = sema->code.extra[payload_index + 1];
uint32_t bits = sema->code.extra[payload_index + 2];
info.param_block_pi = 0;
uint32_t extra_index = payload_index + 3;
info.has_cc_body = (bits & (1u << 4)) != 0;
bool has_cc_ref = (bits & (1u << 3)) != 0;
bool has_ret_ty_body = (bits & (1u << 6)) != 0;
bool has_ret_ty_ref = (bits & (1u << 5)) != 0;
bool has_any_noalias = (bits & (1u << 7)) != 0;
if (info.has_cc_body) {
uint32_t cc_body_len = sema->code.extra[extra_index];
extra_index += 1 + cc_body_len;
} else if (has_cc_ref) {
extra_index += 1;
}
if (has_ret_ty_body) {
uint32_t rtb_len = sema->code.extra[extra_index];
info.ret_ty_body_len = rtb_len;
info.ret_ty_ref_pos = extra_index + 1;
extra_index += 1 + rtb_len;
} else if (has_ret_ty_ref) {
info.ret_ty_body_len = 1;
info.ret_ty_ref_pos = extra_index;
extra_index += 1;
}
if (has_any_noalias)
extra_index += 1;
info.extra_index = extra_index;
} else {
uint32_t ret_ty_raw = sema->code.extra[payload_index];
info.body_len = sema->code.extra[payload_index + 2];
info.ret_ty_body_len = ret_ty_raw & 0x7FFFFFFF;
info.param_block_pi = 1;
uint32_t extra_index = payload_index + 3;
if (info.ret_ty_body_len >= 1) {
info.ret_ty_ref_pos = extra_index;
extra_index += info.ret_ty_body_len;
}
info.extra_index = extra_index;
}
return info;
}
// findDeclFuncInst: given a string_bytes name index, find the
// func/func_fancy ZIR instruction in that declaration's value body.
// Returns the instruction index, or UINT32_MAX if not found.
static uint32_t findDeclFuncInst(Sema* sema, uint32_t name_idx) {
// Look up declaration by name in the declaration table.
uint32_t decl_inst = UINT32_MAX;
for (uint32_t i = 0; i < sema->num_decls; i++) {
if (sema->decl_names[i] == name_idx) {
decl_inst = sema->decl_insts[i];
break;
}
}
if (decl_inst == UINT32_MAX)
return UINT32_MAX;
// Parse the declaration to find its value body.
const uint32_t* value_body;
uint32_t value_body_len;
getParamBody(sema, decl_inst, &value_body, &value_body_len);
if (value_body_len == 0)
return UINT32_MAX;
// Scan the value body for a func/func_fancy/func_inferred instruction.
for (uint32_t i = 0; i < value_body_len; i++) {
ZirInstTag itag = sema->code.inst_tags[value_body[i]];
if (itag == ZIR_INST_FUNC || itag == ZIR_INST_FUNC_FANCY
|| itag == ZIR_INST_FUNC_INFERRED)
return value_body[i];
}
return UINT32_MAX;
}
// resolveFuncRetType: resolve the return type from a FuncZirInfo.
// Ported from the return type resolution portion of zirFunc.
static TypeIndex resolveFuncRetType(Sema* sema, const FuncZirInfo* info) {
if (info->ret_ty_body_len == 0)
return IP_INDEX_VOID_TYPE;
if (info->ret_ty_body_len == 1) {
ZirInstRef ret_ty_ref = sema->code.extra[info->ret_ty_ref_pos];
assert(ret_ty_ref < ZIR_REF_START_INDEX);
return ret_ty_ref;
}
// Multi-instruction return type body — not yet supported for inline.
return IP_INDEX_VOID_TYPE;
}
// zirCall: handle call/field_call ZIR instruction for inline functions.
// Ported from src/Sema.zig zirCall / analyzeCall (inline-only subset).
// For inline functions from the same module, analyzes the function body
// in a child block and emits dbg_inline_block.
static AirInstRef zirCall(Sema* sema, SemaBlock* block, uint32_t inst,
bool is_field_call) {
uint32_t payload_index
= sema->code.inst_datas[inst].pl_node.payload_index;
// Parse Call/FieldCall extra data.
uint32_t flags = sema->code.extra[payload_index];
uint32_t args_len = flags >> 5; // bits 5..31 = args_len
uint32_t callee_ref;
uint32_t arg_data_start;
uint32_t callee_name_idx = 0; // string_bytes index for decl name
if (is_field_call) {
// FieldCall: {flags, obj_ptr, field_name_start}
// For field_call, we need the field_name to find the function.
// obj_ptr should be a decl_ref/decl_val pointing to a module.
// We don't resolve cross-module calls; just find by field_name.
callee_ref = sema->code.extra[payload_index + 1]; // obj_ptr
uint32_t field_name_start
= sema->code.extra[payload_index + 2];
callee_name_idx = field_name_start;
arg_data_start = payload_index + 3;
} else {
// Call: {flags, callee}
callee_ref = sema->code.extra[payload_index + 1];
arg_data_start = payload_index + 2;
}
// For non-field calls, resolve the callee name from decl_val/decl_ref.
if (!is_field_call && callee_ref >= ZIR_REF_START_INDEX) {
uint32_t callee_inst = callee_ref - ZIR_REF_START_INDEX;
ZirInstTag callee_tag = sema->code.inst_tags[callee_inst];
if (callee_tag == ZIR_INST_DECL_VAL
|| callee_tag == ZIR_INST_DECL_REF) {
callee_name_idx
= sema->code.inst_datas[callee_inst].str_tok.start;
}
}
// Find the inline function's ZIR instruction.
uint32_t func_inst = findDeclFuncInst(sema, callee_name_idx);
if (func_inst == UINT32_MAX) {
// Can't resolve callee; return void (fallback).
return AIR_REF_FROM_IP(IP_INDEX_VOID_VALUE);
}
FuncZirInfo func_info = parseFuncZir(sema, func_inst);
if (func_info.body_len == 0) {
return AIR_REF_FROM_IP(IP_INDEX_VOID_VALUE);
}
// Resolve the argument values. Each arg has a body that produces
// the argument value via break_inline.
// Layout at arg_data_start (= extra.end for Call/FieldCall struct):
// [0..args_len] = end-offset for each arg (absolute from args_body)
// [args_len..end_offsets[args_len-1]] = arg body instructions
// Arg 0 body: args_body[args_len .. end_offsets[0]]
// Arg N body: args_body[end_offsets[N-1] .. end_offsets[N]]
AirInstRef arg_refs[16];
assert(args_len <= 16);
{
uint32_t prev_end = args_len; // arg 0 starts after end-offset table
for (uint32_t a = 0; a < args_len; a++) {
uint32_t arg_end_off
= sema->code.extra[arg_data_start + a];
uint32_t arg_body_start = arg_data_start + prev_end;
uint32_t arg_body_len = arg_end_off - prev_end;
// Each arg body should end with a break_inline whose
// operand is the argument ref.
assert(arg_body_len >= 1);
uint32_t last_inst_idx
= sema->code.extra[arg_body_start + arg_body_len - 1];
ZirInstTag last_tag = sema->code.inst_tags[last_inst_idx];
assert(last_tag == ZIR_INST_BREAK_INLINE);
ZirInstRef arg_operand
= sema->code.inst_datas[last_inst_idx].break_data.operand;
arg_refs[a] = resolveInst(sema, arg_operand);
prev_end = arg_end_off;
}
}
// Resolve inline function return type.
TypeIndex ret_ty = resolveFuncRetType(sema, &func_info);
// Parse the inline function's parameter body.
uint32_t param_block_inst = sema->code.extra
[sema->code.inst_datas[func_inst].pl_node.payload_index
+ func_info.param_block_pi];
const uint32_t* param_body;
uint32_t param_body_len;
getParamBody(sema, param_block_inst, &param_body, &param_body_len);
// Reserve the dbg_inline_block instruction (data filled later).
uint32_t block_inst_idx = addAirInst(sema,
AIR_INST_DBG_INLINE_BLOCK,
(AirInstData){ .ty_pl = { .ty_ref = 0, .payload = 0 } });
// Set up child block for inlining.
SemaBlockMerges merges;
memset(&merges, 0, sizeof(merges));
merges.block_inst = block_inst_idx;
// Intern a func value for the dbg_inline_block's func field.
// The exact value doesn't matter for correctness; it just needs to be
// a unique IP index that the comparison can canonicalize.
InternPoolKey func_key;
memset(&func_key, 0, sizeof(func_key));
func_key.tag = IP_KEY_FUNC;
func_key.data.func = func_inst; // use ZIR inst as unique id
InternPoolIndex func_ip = ipIntern(sema->ip, func_key);
SemaBlockInlining inlining;
memset(&inlining, 0, sizeof(inlining));
inlining.call_block = block;
inlining.func = func_ip;
inlining.merges = merges;
SemaBlock child_block;
semaBlockInit(&child_block, sema, block);
child_block.is_comptime = false;
child_block.want_safety = false;
child_block.want_safety_set = true;
child_block.inlining = &inlining;
// Map param ZIR instructions to the argument values.
instMapEnsureSpaceForBody(
&sema->inst_map, param_body, param_body_len);
uint32_t param_idx = 0;
for (uint32_t p = 0; p < param_body_len; p++) {
ZirInstTag ptag = sema->code.inst_tags[param_body[p]];
if (ptag == ZIR_INST_PARAM || ptag == ZIR_INST_PARAM_COMPTIME
|| ptag == ZIR_INST_PARAM_ANYTYPE
|| ptag == ZIR_INST_PARAM_ANYTYPE_COMPTIME) {
assert(param_idx < args_len);
instMapPut(
&sema->inst_map, param_body[p], arg_refs[param_idx]);
// Emit dbg_arg_inline for each param.
if (!child_block.is_comptime) {
uint32_t param_payload
= sema->code.inst_datas[param_body[p]]
.pl_tok.payload_index;
uint32_t param_name_idx
= sema->code.extra[param_payload];
const char* param_name
= (const char*)&sema->code
.string_bytes[param_name_idx];
uint32_t name_nts
= semaAppendAirString(sema, param_name);
AirInstData data;
memset(&data, 0, sizeof(data));
data.pl_op.operand = arg_refs[param_idx];
data.pl_op.payload = name_nts;
(void)blockAddInst(
&child_block, AIR_INST_DBG_ARG_INLINE, data);
}
param_idx++;
}
}
// Save and set the return type for the inline function.
TypeIndex saved_fn_ret_ty = sema->fn_ret_ty;
sema->fn_ret_ty = ret_ty;
// Analyze the inline function body.
const uint32_t* func_body
= &sema->code.extra[func_info.extra_index];
(void)analyzeBodyInner(
sema, &child_block, func_body, func_info.body_len);
sema->fn_ret_ty = saved_fn_ret_ty;
// Write dbg_inline_block extra data.
// Layout: {func(InternPoolIndex), body_len, body[0..body_len]}
uint32_t extra_start = addAirExtra(sema, inlining.func);
addAirExtra(sema, child_block.instructions_len);
for (uint32_t i = 0; i < child_block.instructions_len; i++) {
addAirExtra(sema, child_block.instructions[i]);
}
// Patch the dbg_inline_block instruction data.
// If merges have results, the type is the result type.
// Otherwise (noreturn), use noreturn_type.
AirInstRef result_ref;
if (inlining.merges.results_len > 0) {
result_ref = inlining.merges.results[0];
TypeIndex result_ty = semaTypeOf(sema, result_ref);
sema->air_inst_datas[block_inst_idx].ty_pl.ty_ref
= AIR_REF_FROM_IP(result_ty);
} else {
sema->air_inst_datas[block_inst_idx].ty_pl.ty_ref
= AIR_REF_FROM_IP(IP_INDEX_NORETURN_TYPE);
}
sema->air_inst_datas[block_inst_idx].ty_pl.payload = extra_start;
// Add the block instruction to the parent block.
if (block->instructions_len >= block->instructions_cap) {
uint32_t new_cap = block->instructions_cap * 2;
uint32_t* new_insts
= realloc(block->instructions, new_cap * sizeof(uint32_t));
if (!new_insts)
exit(1);
block->instructions = new_insts;
block->instructions_cap = new_cap;
}
block->instructions[block->instructions_len++] = block_inst_idx;
// Clean up.
free(inlining.merges.results);
free(inlining.merges.br_list);
semaBlockDeinit(&child_block);
return AIR_REF_FROM_INST(block_inst_idx);
}
// zirFunc: analyze a function declaration.
// Ported from src/Sema.zig zirFunc / zirFuncFancy / analyzeFnBodyInner.
// Handles ZIR_INST_FUNC, ZIR_INST_FUNC_INFERRED, and ZIR_INST_FUNC_FANCY.
@@ -1331,6 +1664,13 @@ static void zirStructDecl(Sema* sema, SemaBlock* block, uint32_t inst) {
di += linksection_body_len;
di += addrspace_body_len;
// Record declaration name→inst mapping for decl_val/decl_ref.
if (decl_name != 0 && sema->num_decls < 64) {
sema->decl_names[sema->num_decls] = decl_name;
sema->decl_insts[sema->num_decls] = decl_inst;
sema->num_decls++;
}
// Analyze value body if present.
if (value_body_len > 0) {
// Set declaration context so zirFunc can read name/linkage.
@@ -1399,6 +1739,40 @@ static bool analyzeBodyInner(
AirInstRef operand = resolveInst(sema, operand_ref);
// Coerce the operand to the function return type.
operand = semaCoerce(sema, block, sema->fn_ret_ty, operand);
if (block->inlining) {
// Inlining: rewrite ret as br to the inline block.
// Ported from src/Sema.zig analyzeRet (inlining path).
SemaBlockInlining* inl = block->inlining;
AirInstData br_data;
memset(&br_data, 0, sizeof(br_data));
br_data.br.block_inst = inl->merges.block_inst;
br_data.br.operand = operand;
AirInstRef br_ref
= blockAddInst(block, AIR_INST_BR, br_data);
// Record merge result and br instruction.
if (inl->merges.results_len >= inl->merges.results_cap) {
uint32_t new_cap
= (inl->merges.results_cap == 0)
? 4
: inl->merges.results_cap * 2;
inl->merges.results = realloc(
inl->merges.results,
new_cap * sizeof(AirInstRef));
inl->merges.br_list = realloc(
inl->merges.br_list,
new_cap * sizeof(uint32_t));
if (!inl->merges.results || !inl->merges.br_list)
exit(1);
inl->merges.results_cap = new_cap;
inl->merges.br_list_cap = new_cap;
}
inl->merges.results[inl->merges.results_len++] = operand;
inl->merges.br_list[inl->merges.br_list_len++]
= AIR_REF_TO_INST(br_ref);
return false;
}
AirInstData ret_data;
memset(&ret_data, 0, sizeof(ret_data));
ret_data.un_op.operand = operand;
@@ -1440,10 +1814,34 @@ static bool analyzeBodyInner(
i++;
continue;
// restore_err_ret_index_fn_entry: error return trace restore.
// restore_err_ret_index: error return trace restore.
// In ReleaseFast (no safety), this is a no-op.
// Ported from src/Sema.zig zirRestoreErrRetIndex.
case ZIR_INST_RESTORE_ERR_RET_INDEX_FN_ENTRY:
case ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL:
i++;
continue;
// decl_val / decl_ref: reference to a module-level declaration.
// Maps to void for now; the actual resolution happens in zirCall
// when the callee is a decl_val/decl_ref.
case ZIR_INST_DECL_VAL:
case ZIR_INST_DECL_REF:
instMapPut(&sema->inst_map, inst,
AIR_REF_FROM_IP(IP_INDEX_VOID_VALUE));
i++;
continue;
// call / field_call: function call.
// Handles inline function calls from the same module.
case ZIR_INST_CALL:
instMapPut(&sema->inst_map, inst,
zirCall(sema, block, inst, false));
i++;
continue;
case ZIR_INST_FIELD_CALL:
instMapPut(&sema->inst_map, inst,
zirCall(sema, block, inst, true));
i++;
continue;

View File

@@ -155,6 +155,12 @@ typedef struct Sema {
// that contain @export. Used by zirFunc to decide whether to analyze
// non-declaration-level-exported functions.
uint32_t num_exported_decl_names;
// Declaration table: maps string_bytes index (name) to the ZIR
// declaration instruction index. Built by zirStructDecl.
// Used by decl_val/decl_ref to find declarations by name.
uint32_t decl_names[64]; // string_bytes index
uint32_t decl_insts[64]; // ZIR instruction index
uint32_t num_decls;
} Sema;
#define SEMA_DEFAULT_BRANCH_QUOTA 1000

View File

@@ -527,6 +527,14 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
}
const inst_len = zig_air.inst_len;
// Canonical ref maps shared between datas and extra comparisons.
var zig_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
defer zig_ref_map.deinit();
var c_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
defer c_ref_map.deinit();
var next_zig_id: u32 = 0;
var next_c_id: u32 = 0;
// Tags
if (inst_len > 0) {
const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse {
@@ -565,12 +573,6 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
std.debug.print("'{s}': C inst_datas is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
});
var zig_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
defer zig_ref_map.deinit();
var c_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
defer c_ref_map.deinit();
var next_zig_id: u32 = 0;
var next_c_id: u32 = 0;
for (0..inst_len) |j| {
const off = j * 8;
@@ -641,6 +643,17 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
normalizeNtsPadding(zig_extra_copy, zig_nts);
normalizeNtsPadding(c_extra_copy, c_nts);
}
if (tags[j] == c.AIR_INST_DBG_INLINE_BLOCK) {
// ty_pl: slot 1 = payload (extra index).
// Extra layout: {func(IP ref), body_len, body...}
// Canonicalize the func IP ref.
const zig_payload = std.mem.readInt(u32, zig_datas_raw[j * 8 + 4 ..][0..4], .little);
const c_payload = std.mem.readInt(u32, c_datas_raw[j * 8 + 4 ..][0..4], .little);
if (zig_payload < extra_len and c_payload < extra_len) {
zig_extra_copy[zig_payload] = canonicalizeRef(zig_extra_copy[zig_payload], &zig_ref_map, &next_zig_id);
c_extra_copy[c_payload] = canonicalizeRef(c_extra_copy[c_payload], &c_ref_map, &next_c_id);
}
}
}
}
if (!std.mem.eql(u32, zig_extra_copy, c_extra_copy)) {
@@ -958,3 +971,36 @@ test "sema air: byteswap and xor" {
\\}
);
}
test "sema air: same-file inline function call" {
try semaAirRawCheck(
\\inline fn negate(x: u16) u16 {
\\ return ~x;
\\}
\\export fn f(a: u16) u16 {
\\ return negate(a);
\\}
);
}
test "sema air: same-file inline call with bitcast and xor" {
try semaAirRawCheck(
\\inline fn flip_sign(x: u16) u16 {
\\ return x ^ 0x8000;
\\}
\\export fn f(a: u16) u16 {
\\ return flip_sign(a);
\\}
);
}
test "sema air: same-file inline call with two args" {
try semaAirRawCheck(
\\inline fn my_add(x: u32, y: u32) u32 {
\\ return x + y;
\\}
\\export fn f(a: u32, b: u32) u32 {
\\ return my_add(a, b);
\\}
);
}

View File

@@ -97,7 +97,7 @@ const corpus_files = .{
"../lib/std/crypto/codecs.zig", // 165
"../lib/std/os/uefi/tables/table_header.zig", // 214
"../lib/std/zig/llvm.zig", // 247
//"../lib/compiler_rt/neghf2.zig", // 265 -- needs decl_ref + field_call (inline fn from import)
//"../lib/compiler_rt/neghf2.zig", // 265 -- needs cross-module inline call (field_call)
//"../lib/compiler_rt/negxf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete
//"../lib/compiler_rt/absvdi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete
//"../lib/compiler_rt/absvsi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete