sema: handle struct_decl declarations and ZIR_INST_INT

Add enough C sema handling so that `const x = 42;` interns the integer
value 42 in the InternPool, matching the Zig reference implementation.

- Add Declaration.Flags.Id helper functions (hasName, hasLibName,
  hasTypeBody, hasValueBody, hasSpecialBodies) ported from Zir.zig
- Add zirInt handler to intern comptime integer values
- Add zirStructDecl handler to parse struct_decl extra payload,
  iterate declarations, and analyze their value bodies
- Add cross-check test comparing C and Zig InternPool entries

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 06:56:42 +00:00
parent 9504050c84
commit db33d34d6d
2 changed files with 218 additions and 2 deletions

View File

@@ -235,6 +235,173 @@ static void zirDbgStmt(Sema* sema, SemaBlock* block, uint32_t inst) {
(void)blockAddInst(block, AIR_INST_DBG_STMT, data);
}
// --- Declaration.Flags.Id helpers ---
// Ported from lib/std/zig/Zir.zig Declaration.Flags.Id methods.
// The Id is a 5-bit enum packed in the upper bits of Declaration.flags_1.
//
// Enum values (matching Zir.zig order):
// 0=unnamed_test, 1=test, 2=decltest, 3=comptime,
// 4=const_simple, 5=const_typed, 6=const,
// 7=pub_const_simple, 8=pub_const_typed, 9=pub_const,
// 10=extern_const_simple, 11=extern_const,
// 12=pub_extern_const_simple, 13=pub_extern_const,
// 14=export_const, 15=pub_export_const,
// 16=var_simple, 17=var, 18=var_threadlocal,
// 19=pub_var_simple, 20=pub_var, 21=pub_var_threadlocal,
// 22=extern_var, 23=extern_var_threadlocal,
// 24=pub_extern_var, 25=pub_extern_var_threadlocal,
// 26=export_var, 27=export_var_threadlocal,
// 28=pub_export_var, 29=pub_export_var_threadlocal
static bool declIdHasName(uint32_t id) {
// false for unnamed_test(0) and comptime(3)
return id != 0 && id != 3;
}
static bool declIdHasLibName(uint32_t id) {
return id == 11 || id == 13
|| id == 22 || id == 23 || id == 24 || id == 25;
}
static bool declIdHasTypeBody(uint32_t id) {
// false for untyped constructs (0-3) and simple reprs (4,7,16,19)
return !(id == 0 || id == 1 || id == 2 || id == 3
|| id == 4 || id == 7 || id == 16 || id == 19);
}
static bool declIdHasValueBody(uint32_t id) {
// false for extern IDs (10-13, 22-25)
return !(id == 10 || id == 11 || id == 12 || id == 13
|| id == 22 || id == 23 || id == 24 || id == 25);
}
static bool declIdHasSpecialBodies(uint32_t id) {
// false for untyped (0-3), simple const (4,5,7,8),
// extern_const_simple (10,12), simple var (16,19)
return !(id == 0 || id == 1 || id == 2 || id == 3
|| id == 4 || id == 5 || id == 7 || id == 8
|| id == 10 || id == 12 || id == 16 || id == 19);
}
// Forward declaration for recursive call from zirStructDecl.
static bool analyzeBodyInner(
Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len);
// zirInt: intern a comptime integer value.
// Ported from src/Sema.zig zirInt.
static void zirInt(Sema* sema, uint32_t inst) {
uint64_t int_val = sema->code.inst_datas[inst].int_val;
InternPoolKey key;
memset(&key, 0, sizeof(key));
key.tag = IP_KEY_INT;
key.data.int_val.ty = IP_INDEX_COMPTIME_INT_TYPE;
key.data.int_val.value = int_val;
key.data.int_val.is_negative = false;
uint32_t ip_index = ipIntern(sema->ip, key);
instMapPut(&sema->inst_map, inst, AIR_REF_FROM_IP(ip_index));
}
// zirStructDecl: process struct_decl extended instruction.
// Iterates over declarations and analyzes their value bodies.
// Ported from src/Sema.zig zirStructDecl (subset) and
// lib/std/zig/Zir.zig declIterator / getDeclaration.
static void zirStructDecl(Sema* sema, SemaBlock* block, uint32_t inst) {
uint16_t small = sema->code.inst_datas[inst].extended.small;
uint32_t operand = sema->code.inst_datas[inst].extended.operand;
// StructDecl has 6 u32 fields in extra (fields_hash×4, src_line,
// src_node).
uint32_t extra_index = operand + 6;
// Parse Small flags (packed u16).
bool has_captures_len = (small & (1 << 0)) != 0;
bool has_fields_len = (small & (1 << 1)) != 0;
bool has_decls_len = (small & (1 << 2)) != 0;
bool has_backing_int = (small & (1 << 3)) != 0;
uint32_t captures_len = 0;
if (has_captures_len) {
captures_len = sema->code.extra[extra_index];
extra_index++;
}
if (has_fields_len) {
extra_index++; // skip fields_len
}
uint32_t decls_len = 0;
if (has_decls_len) {
decls_len = sema->code.extra[extra_index];
extra_index++;
}
extra_index += captures_len * 2; // skip captures
if (has_backing_int) {
uint32_t backing_int_body_len = sema->code.extra[extra_index];
extra_index++;
if (backing_int_body_len == 0) {
extra_index++; // backing_int_ref
} else {
extra_index += backing_int_body_len;
}
}
// extra_index now points to the declaration instruction list.
for (uint32_t d = 0; d < decls_len; d++) {
uint32_t decl_inst = sema->code.extra[extra_index + d];
assert(sema->code.inst_tags[decl_inst] == ZIR_INST_DECLARATION);
uint32_t payload
= sema->code.inst_datas[decl_inst].declaration.payload_index;
// Declaration has 6 u32 fields (src_hash×4, flags_0, flags_1).
// The Id is in bits 59-63 of the packed u64 flags, i.e.
// bits 27-31 of flags_1.
uint32_t flags_1 = sema->code.extra[payload + 5];
uint32_t id = (flags_1 >> 27) & 0x1F;
uint32_t di = payload + 6;
if (declIdHasName(id))
di++;
if (declIdHasLibName(id))
di++;
uint32_t type_body_len = 0;
if (declIdHasTypeBody(id)) {
type_body_len = sema->code.extra[di];
di++;
}
uint32_t align_body_len = 0;
uint32_t linksection_body_len = 0;
uint32_t addrspace_body_len = 0;
if (declIdHasSpecialBodies(id)) {
align_body_len = sema->code.extra[di];
linksection_body_len = sema->code.extra[di + 1];
addrspace_body_len = sema->code.extra[di + 2];
di += 3;
}
uint32_t value_body_len = 0;
if (declIdHasValueBody(id)) {
value_body_len = sema->code.extra[di];
di++;
}
// Skip type, align, linksection, addrspace bodies.
di += type_body_len;
di += align_body_len;
di += linksection_body_len;
di += addrspace_body_len;
// Analyze value body if present.
if (value_body_len > 0) {
const uint32_t* value_body = &sema->code.extra[di];
(void)analyzeBodyInner(sema, block, value_body, value_body_len);
}
}
}
// --- analyzeBodyInner ---
// Ported from src/Sema.zig analyzeBodyInner.
// Main dispatch loop: iterates over ZIR instructions in a body and
@@ -271,10 +438,20 @@ static bool analyzeBodyInner(
case ZIR_INST_RET_IMPLICIT:
return false;
// int: intern a comptime integer literal.
case ZIR_INST_INT:
zirInt(sema, inst);
i++;
continue;
// extended: handle extended opcodes.
case ZIR_INST_EXTENDED: {
// For now, skip all extended opcodes.
// struct_decl, enum_decl, etc. need full type machinery.
uint16_t opcode = sema->code.inst_datas[inst].extended.opcode;
if (opcode == ZIR_EXT_STRUCT_DECL) {
zirStructDecl(sema, block, inst);
}
// Map the extended instruction to void; full type
// machinery is not yet implemented.
AirInstRef air_ref = AIR_REF_FROM_IP(IP_INDEX_VOID_TYPE);
instMapPut(&sema->inst_map, inst, air_ref);
i++;

View File

@@ -1317,3 +1317,42 @@ fn expectKeysEqual(c_key: sc.InternPoolKey, zig_key: ZigIP.Key, index: u32) !voi
},
}
}
test "sema: const x = 42 intern pool comparison" {
const gpa = std.testing.allocator;
const source: [:0]const u8 = "const x = 42;";
// Run C pipeline: parse → astgen → sema
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
var c_zir = c.astGen(&c_ast);
defer c.zirDeinit(&c_zir);
var c_ip = sc.ipInit();
defer sc.ipDeinit(&c_ip);
var sema = sc.semaInit(&c_ip, @bitCast(c_zir));
defer sc.semaDeinit(&sema);
var air = sc.semaAnalyze(&sema);
defer sc.airDeinit(&air);
// C IP should have grown beyond 124 pre-interned entries
try std.testing.expect(c_ip.items_len > 124);
// Init Zig reference IP and intern the same value
var zig_ip: ZigIP = ZigIP.empty;
try zig_ip.init(gpa, 1);
defer zig_ip.deinit(gpa);
const zig_idx = try zig_ip.get(gpa, .main, .{ .int = .{
.ty = .comptime_int_type,
.storage = .{ .u64 = 42 },
} });
// Both should have created the entry at the same index (124)
try std.testing.expectEqual(@as(u32, 124), @intFromEnum(zig_idx));
// Compare the key at index 124
const c_key = sc.ipIndexToKey(&c_ip, 124);
const zig_key = zig_ip.indexToKey(zig_idx);
try expectKeysEqual(c_key, zig_key, 124);
}