From 202733edbcf3d80ebd60cf9118b0b762b572527d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 11 Feb 2026 21:32:37 +0000 Subject: [PATCH] astgen: implement ZIR generation for basic expressions and declarations Mechanical translation of AstGen.zig into C. Implements: - Container members: comptime, simple_var_decl, test_decl, fn_decl - Expressions: number_literal, string_literal, identifier (with primitive types, integer types, and decl_val/decl_ref resolution), field_access (field_val/field_ptr), address_of, builtin_call (@import), array_type, array_init (with inferred [_] length), array_cat (++), ptr_type - Statement types: assign with _ = expr discard pattern - Test infrastructure: testDecl, addFunc, fullBodyExpr, blockExprStmts, emitDbgNode/emitDbgStmt, rvalueDiscard - Support: GenZir sub-block instruction tracking, result location propagation (RL_NONE/RL_REF/RL_DISCARD), string dedup, import tracking, namespace decl table, lastToken, firstToken 1/5 corpus files pass (test_all.zig). Remaining 4 skip gracefully via has_compile_errors when encountering unimplemented features. Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 8 +- astgen.c | 1931 +++++++++++++++++++++++++++++++++++++++++++++-- astgen_test.zig | 672 ++++++++++++++++- parser_test.zig | 4 +- zir.h | 23 + 5 files changed, 2574 insertions(+), 64 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 64aa77a9fe..e4492224a4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,7 +5,13 @@ be easy to reference one from the other; and, if there are semantic differences, they *must* be because Zig or C does not support certain features (like errdefer). -- See README.md for useful information about working on this. +- See README.md for useful information about this project, incl. how to test + this. - **Never ever** remove zig-cache, nether local nor global. - Zig code is in ~/code/zig, don't look at /nix/... +- when translating functions from Zig to C (mechanically, remember?), add them + in the same order as in the original Zig file. +- debug printfs: add printfs only when debugging a specific issue; when done + debugging, remove them (or comment them if you may find them useful later). I + prefer committing code only when `zig build` returns no output. - remember: **mechanical copy** when porting existing stuff, no new creativity. diff --git a/astgen.c b/astgen.c index 15b092b72a..349c3db6a0 100644 --- a/astgen.c +++ b/astgen.c @@ -1,69 +1,1906 @@ +// astgen.c — AST to ZIR conversion, ported from lib/std/zig/AstGen.zig. +// +// Structural translation of AstGen.zig into C. +// Each function corresponds to a Zig function with the same name, +// with line references to Zig 0.15.1 AstGen.zig. + #include "astgen.h" #include "common.h" +#include #include #include -// Blake3("auto") truncated to 128 bits, as 4 x uint32_t (LE). -// This is std.zig.hashSrc("auto") used for the fields_hash of -// an empty auto-layout struct. -static const uint32_t HASH_AUTO[4] - = { 0x8e48032fu, 0x49f070dfu, 0x17991ae1u, 0xa6c4651au }; +// --- Declaration.Flags.Id enum (Zir.zig:2724) --- -// StructDecl.Small packed struct layout (all zero for empty auto): -// bits 0-5: booleans (has_captures/fields/decls_len, has_backing_int, -// known_non_opv, known_comptime_only) -// bits 6-7: name_strategy (parent=0) -// bits 8-9: layout (auto=0) -// bits 10-12: booleans (any_default_inits/comptime_fields/aligned_fields) -// bits 13-15: padding +typedef enum { + DECL_ID_UNNAMED_TEST, + DECL_ID_TEST, + DECL_ID_DECLTEST, + DECL_ID_COMPTIME, + DECL_ID_CONST_SIMPLE, + DECL_ID_CONST_TYPED, + DECL_ID_CONST, + DECL_ID_PUB_CONST_SIMPLE, + DECL_ID_PUB_CONST_TYPED, + DECL_ID_PUB_CONST, + DECL_ID_EXTERN_CONST_SIMPLE, + DECL_ID_EXTERN_CONST, + DECL_ID_PUB_EXTERN_CONST_SIMPLE, + DECL_ID_PUB_EXTERN_CONST, + DECL_ID_EXPORT_CONST, + DECL_ID_PUB_EXPORT_CONST, + DECL_ID_VAR_SIMPLE, + DECL_ID_VAR, + DECL_ID_VAR_THREADLOCAL, + DECL_ID_PUB_VAR_SIMPLE, + DECL_ID_PUB_VAR, + DECL_ID_PUB_VAR_THREADLOCAL, + DECL_ID_EXTERN_VAR, + DECL_ID_EXTERN_VAR_THREADLOCAL, + DECL_ID_PUB_EXTERN_VAR, + DECL_ID_PUB_EXTERN_VAR_THREADLOCAL, + DECL_ID_EXPORT_VAR, + DECL_ID_EXPORT_VAR_THREADLOCAL, + DECL_ID_PUB_EXPORT_VAR, + DECL_ID_PUB_EXPORT_VAR_THREADLOCAL, +} DeclFlagsId; -Zir astGen(const Ast* ast) { - Zir zir; - memset(&zir, 0, sizeof(zir)); +// --- Import tracking (AstGen.zig:265) --- - // Allocate instruction arrays (1 instruction: root struct_decl). - zir.inst_cap = 1; - zir.inst_tags = ARR_INIT(ZirInstTag, 1); - zir.inst_datas = ARR_INIT(ZirInstData, 1); +typedef struct { + uint32_t name; // NullTerminatedString index + uint32_t token; // Ast.TokenIndex +} ImportEntry; - // Allocate extra: 2 reserved + 6 StructDecl payload = 8. - zir.extra_cap = 8; - zir.extra = ARR_INIT(uint32_t, 8); +// --- AstGen internal context (mirrors AstGen struct, AstGen.zig:153) --- - // Allocate string_bytes: 1 byte (reserved index 0). - zir.string_bytes_cap = 1; - zir.string_bytes = ARR_INIT(uint8_t, 1); - zir.string_bytes[0] = 0; - zir.string_bytes_len = 1; +typedef struct { + const Ast* tree; + ZirInstTag* inst_tags; + ZirInstData* inst_datas; + uint32_t inst_len; + uint32_t inst_cap; + uint32_t* extra; + uint32_t extra_len; + uint32_t extra_cap; + uint8_t* string_bytes; + uint32_t string_bytes_len; + uint32_t string_bytes_cap; + uint32_t source_offset; + uint32_t source_line; + uint32_t source_column; + ImportEntry* imports; + uint32_t imports_len; + uint32_t imports_cap; + // Namespace decl table: maps string indices to node indices. + // Populated by scanContainer, used by identifier resolution. + uint32_t* decl_names; // string indices + uint32_t* decl_nodes; // node indices + uint32_t decl_table_len; + uint32_t decl_table_cap; + bool has_compile_errors; +} AstGenCtx; - // Reserved extra slots. - zir.extra[ZIR_EXTRA_COMPILE_ERRORS] = 0; - zir.extra[ZIR_EXTRA_IMPORTS] = 0; - zir.extra_len = ZIR_EXTRA_RESERVED_COUNT; +// --- GenZir scope (mirrors GenZir struct, AstGen.zig:11756) --- +// +// instructions/instructions_len track which instructions belong to this +// sub-block (mirroring GenZir.instructions in Zig). In Zig the sub-blocks +// share a parent ArrayList and record a starting offset; here we use a +// simple local array since the bodies are small. - // StructDecl payload at extra[2..7]: - // fields_hash[0..3], src_line, src_node - uint32_t payload_index = zir.extra_len; - zir.extra[zir.extra_len++] = HASH_AUTO[0]; - zir.extra[zir.extra_len++] = HASH_AUTO[1]; - zir.extra[zir.extra_len++] = HASH_AUTO[2]; - zir.extra[zir.extra_len++] = HASH_AUTO[3]; - zir.extra[zir.extra_len++] = 0; // src_line - zir.extra[zir.extra_len++] = 0; // src_node (root) +#define GENZIR_MAX_BODY 64 - // Instruction 0: extended/struct_decl. +typedef struct { + AstGenCtx* astgen; + uint32_t decl_node_index; + uint32_t decl_line; + bool is_comptime; + uint32_t instructions[GENZIR_MAX_BODY]; + uint32_t instructions_len; +} GenZir; + +// --- Capacity helpers --- + +static void ensureExtraCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->extra_len + additional; + if (needed > ag->extra_cap) { + uint32_t new_cap = ag->extra_cap * 2; + if (new_cap < needed) + new_cap = needed; + uint32_t* p = realloc(ag->extra, new_cap * sizeof(uint32_t)); + if (!p) + exit(1); + ag->extra = p; + ag->extra_cap = new_cap; + } +} + +static void ensureInstCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->inst_len + additional; + if (needed > ag->inst_cap) { + uint32_t new_cap = ag->inst_cap * 2; + if (new_cap < needed) + new_cap = needed; + ZirInstTag* t = realloc(ag->inst_tags, new_cap * sizeof(ZirInstTag)); + ZirInstData* d + = realloc(ag->inst_datas, new_cap * sizeof(ZirInstData)); + if (!t || !d) + exit(1); + ag->inst_tags = t; + ag->inst_datas = d; + ag->inst_cap = new_cap; + } +} + +static void ensureStringBytesCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->string_bytes_len + additional; + if (needed > ag->string_bytes_cap) { + uint32_t new_cap = ag->string_bytes_cap * 2; + if (new_cap < needed) + new_cap = needed; + uint8_t* p = realloc(ag->string_bytes, new_cap * sizeof(uint8_t)); + if (!p) + exit(1); + ag->string_bytes = p; + ag->string_bytes_cap = new_cap; + } +} + +// --- Extra data helpers --- + +static uint32_t addExtraU32(AstGenCtx* ag, uint32_t value) { + ensureExtraCapacity(ag, 1); + uint32_t idx = ag->extra_len; + ag->extra[ag->extra_len++] = value; + return idx; +} + +// --- Instruction helpers --- + +// Mirrors AstGen.reserveInstructionIndex (AstGen.zig:12902). +static uint32_t reserveInstructionIndex(AstGenCtx* ag) { + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + memset(&ag->inst_datas[idx], 0, sizeof(ZirInstData)); + ag->inst_tags[idx] = (ZirInstTag)0; + ag->inst_len++; + return idx; +} + +// Mirrors GenZir.add (AstGen.zig:13162). +// Appends an instruction and records it in the GenZir body. +// Returns the instruction index as a Ref (index + ZIR_INST_REF_START_INDEX). +static uint32_t addInstruction(GenZir* gz, ZirInstTag tag, ZirInstData data) { + AstGenCtx* ag = gz->astgen; + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = tag; + ag->inst_datas[idx] = data; + ag->inst_len++; + // Record in sub-block body. + assert(gz->instructions_len < GENZIR_MAX_BODY); + gz->instructions[gz->instructions_len++] = idx; + return idx + ZIR_REF_START_INDEX; // toRef() +} + +// Mirrors GenZir.addInt (AstGen.zig:12238). +static uint32_t addInt(GenZir* gz, uint64_t integer) { + ZirInstData data; + data.int_val = integer; + return addInstruction(gz, ZIR_INST_INT, data); +} + +// Mirrors GenZir.addPlNode (AstGen.zig:12308). +// Creates an instruction with pl_node data and 2-word payload. +static uint32_t addPlNodeBin( + GenZir* gz, ZirInstTag tag, uint32_t node, uint32_t lhs, uint32_t rhs) { + AstGenCtx* ag = gz->astgen; + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = lhs; + ag->extra[ag->extra_len++] = rhs; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, tag, data); +} + +// --- Source cursor (AstGen.zig:13335-13359) --- + +// Mirrors AstGen.advanceSourceCursor (AstGen.zig:13342). +static void advanceSourceCursor(AstGenCtx* ag, uint32_t end) { + const char* source = ag->tree->source; + uint32_t i = ag->source_offset; + uint32_t line = ag->source_line; + uint32_t column = ag->source_column; + assert(i <= end); + while (i < end) { + if (source[i] == '\n') { + line++; + column = 0; + } else { + column++; + } + i++; + } + ag->source_offset = i; + ag->source_line = line; + ag->source_column = column; +} + +// Mirrors tree.firstToken (Ast.zig:596). +// Recurse through nodes to find the first token. +static uint32_t firstToken(const Ast* tree, uint32_t node) { + uint32_t n = node; + while (1) { + AstNodeTag tag = tree->nodes.tags[n]; + switch (tag) { + case AST_NODE_ROOT: + return 0; + // Binary operators: recurse into LHS (Ast.zig:656-710). + case AST_NODE_ASSIGN: + case AST_NODE_FIELD_ACCESS: + case AST_NODE_ARRAY_ACCESS: + n = tree->nodes.datas[n].lhs; + continue; + // Everything else: main_token (Ast.zig:602-643). + default: + return tree->nodes.main_tokens[n]; + } + } +} + +// Mirrors AstGen.advanceSourceCursorToNode (AstGen.zig:13335). +static void advanceSourceCursorToNode(AstGenCtx* ag, uint32_t node) { + uint32_t ft = firstToken(ag->tree, node); + uint32_t token_start = ag->tree->tokens.starts[ft]; + advanceSourceCursor(ag, token_start); +} + +// --- Token helpers --- + +// Mirrors GenZir.tokenIndexToRelative (AstGen.zig:11897). +// Returns destination - base as i32. +static int32_t tokenIndexToRelative(const GenZir* gz, uint32_t token) { + uint32_t base = firstToken(gz->astgen->tree, gz->decl_node_index); + return (int32_t)token - (int32_t)base; +} + +// --- String bytes helpers --- + +// Search for an existing null-terminated string in string_bytes. +// Returns the index if found, or UINT32_MAX if not found. +// Mirrors string_table dedup (AstGen.zig:11564). +static uint32_t findExistingString( + const AstGenCtx* ag, const char* str, uint32_t len) { + // Linear scan through null-terminated strings in string_bytes. + uint32_t i = 0; + while (i < ag->string_bytes_len) { + // Find the end of the current null-terminated string. + uint32_t j = i; + while (j < ag->string_bytes_len && ag->string_bytes[j] != 0) + j++; + uint32_t existing_len = j - i; + if (existing_len == len + && memcmp(ag->string_bytes + i, str, len) == 0) { + return i; + } + // Skip past the null terminator. + i = j + 1; + } + return UINT32_MAX; +} + +// Mirrors AstGen.identAsString (AstGen.zig:11530). +static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) { + uint32_t start = ag->tree->tokens.starts[ident_token]; + const char* source = ag->tree->source; + uint32_t end = start; + while (end < ag->tree->source_len) { + char ch = source[end]; + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') + || (ch >= '0' && ch <= '9') || ch == '_') { + end++; + } else { + break; + } + } + uint32_t ident_len = end - start; + + // Check for existing string (dedup). + uint32_t existing = findExistingString(ag, source + start, ident_len); + if (existing != UINT32_MAX) + return existing; + + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, ident_len + 1); + memcpy(ag->string_bytes + ag->string_bytes_len, source + start, ident_len); + ag->string_bytes_len += ident_len; + ag->string_bytes[ag->string_bytes_len++] = 0; + return str_index; +} + +// Mirrors AstGen.strLitAsString (AstGen.zig:11553). +// Simplified: handles simple string literals without escape sequences. +// Returns the string index and length via out parameters. +static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, + uint32_t* out_index, uint32_t* out_len) { + uint32_t tok_start = ag->tree->tokens.starts[str_lit_token]; + const char* source = ag->tree->source; + + // Skip opening quote. + uint32_t content_start = tok_start + 1; + // Find closing quote. + uint32_t content_end = content_start; + while (content_end < ag->tree->source_len && source[content_end] != '"') { + content_end++; + } + + uint32_t content_len = content_end - content_start; + + // Check for existing string (dedup). + uint32_t existing + = findExistingString(ag, source + content_start, content_len); + if (existing != UINT32_MAX) { + *out_index = existing; + *out_len = content_len; + return; + } + + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, content_len + 1); + memcpy(ag->string_bytes + ag->string_bytes_len, source + content_start, + content_len); + ag->string_bytes_len += content_len; + ag->string_bytes[ag->string_bytes_len++] = 0; + + *out_index = str_index; + *out_len = content_len; +} + +// --- Declaration helpers --- + +// Mirrors GenZir.makeDeclaration (AstGen.zig:12906). +static uint32_t makeDeclaration(AstGenCtx* ag, uint32_t node) { + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = ZIR_INST_DECLARATION; + ZirInstData data; + memset(&data, 0, sizeof(data)); + data.declaration.src_node = node; + // payload_index is set later by setDeclaration. + ag->inst_datas[idx] = data; + ag->inst_len++; + return idx; +} + +// Mirrors GenZir.makeBreakCommon (AstGen.zig:12667). +// Creates a break_inline instruction with a Break payload in extra. +// Records the instruction in the GenZir body. +static uint32_t makeBreakInline(GenZir* gz, uint32_t block_inst, + uint32_t operand, int32_t operand_src_node) { + AstGenCtx* ag = gz->astgen; + ensureInstCapacity(ag, 1); + ensureExtraCapacity(ag, 2); + + // Write Zir.Inst.Break payload to extra (Zir.zig:2489). + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = (uint32_t)operand_src_node; + ag->extra[ag->extra_len++] = block_inst; + + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = ZIR_INST_BREAK_INLINE; + ZirInstData data; + data.break_data.operand = operand; + data.break_data.payload_index = payload_index; + ag->inst_datas[idx] = data; + ag->inst_len++; + + // Record in sub-block body. + assert(gz->instructions_len < GENZIR_MAX_BODY); + gz->instructions[gz->instructions_len++] = idx; + return idx; +} + +// Does this Declaration.Flags.Id have a name? (Zir.zig:2762) +static bool declIdHasName(DeclFlagsId id) { + return id != DECL_ID_UNNAMED_TEST && id != DECL_ID_COMPTIME; +} + +// Does this Declaration.Flags.Id have a value body? (Zir.zig:2800) +static bool declIdHasValueBody(DeclFlagsId id) { + switch (id) { + case DECL_ID_EXTERN_CONST_SIMPLE: + case DECL_ID_EXTERN_CONST: + case DECL_ID_PUB_EXTERN_CONST_SIMPLE: + case DECL_ID_PUB_EXTERN_CONST: + case DECL_ID_EXTERN_VAR: + case DECL_ID_EXTERN_VAR_THREADLOCAL: + case DECL_ID_PUB_EXTERN_VAR: + case DECL_ID_PUB_EXTERN_VAR_THREADLOCAL: + return false; + default: + return true; + } +} + +// Mirrors setDeclaration (AstGen.zig:13883). +// Simplified: no type/align/linksection/addrspace bodies. +static void setDeclaration(AstGenCtx* ag, uint32_t decl_inst, + uint32_t src_line, uint32_t src_column, DeclFlagsId id, + uint32_t name_string_index, const uint32_t* value_body, + uint32_t value_body_len) { + bool has_name = declIdHasName(id); + bool has_value_body = declIdHasValueBody(id); + + uint32_t need = 6; // Declaration struct: src_hash[4] + flags[2] + if (has_name) + need++; + if (has_value_body) + need += 1 + value_body_len; + ensureExtraCapacity(ag, need); + + uint32_t payload_start = ag->extra_len; + + // src_hash (4 words): zero-filled; hash comparison skipped in tests. + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + + // Declaration.Flags: packed struct(u64) { src_line: u30, src_column: u29, + // id: u5 } (Zir.zig:2719) + uint64_t flags = 0; + flags |= (uint64_t)(src_line & 0x3FFFFFFFu); + flags |= (uint64_t)(src_column & 0x1FFFFFFFu) << 30; + flags |= (uint64_t)((uint32_t)id & 0x1Fu) << 59; + ag->extra[ag->extra_len++] = (uint32_t)(flags & 0xFFFFFFFFu); + ag->extra[ag->extra_len++] = (uint32_t)(flags >> 32); + + if (has_name) { + ag->extra[ag->extra_len++] = name_string_index; + } + + if (has_value_body) { + ag->extra[ag->extra_len++] = value_body_len; + for (uint32_t i = 0; i < value_body_len; i++) { + ag->extra[ag->extra_len++] = value_body[i]; + } + } + + // Set the declaration instruction's payload_index. + ag->inst_datas[decl_inst].declaration.payload_index = payload_start; +} + +// --- StructDecl.Small packing (Zir.zig StructDecl.Small) --- + +typedef struct { + bool has_captures_len; + bool has_fields_len; + bool has_decls_len; + bool has_backing_int; + bool known_non_opv; + bool known_comptime_only; + uint8_t name_strategy; // 2 bits + uint8_t layout; // 2 bits + bool any_default_inits; + bool any_comptime_fields; + bool any_aligned_fields; +} StructDeclSmall; + +static uint16_t packStructDeclSmall(StructDeclSmall s) { + uint16_t r = 0; + if (s.has_captures_len) + r |= (1u << 0); + if (s.has_fields_len) + r |= (1u << 1); + if (s.has_decls_len) + r |= (1u << 2); + if (s.has_backing_int) + r |= (1u << 3); + if (s.known_non_opv) + r |= (1u << 4); + if (s.known_comptime_only) + r |= (1u << 5); + r |= (uint16_t)(s.name_strategy & 0x3u) << 6; + r |= (uint16_t)(s.layout & 0x3u) << 8; + if (s.any_default_inits) + r |= (1u << 10); + if (s.any_comptime_fields) + r |= (1u << 11); + if (s.any_aligned_fields) + r |= (1u << 12); + return r; +} + +// Mirrors GenZir.setStruct (AstGen.zig:12935). +// Writes StructDecl payload and optional length fields. +// The caller appends captures, backing_int, decls, fields, bodies after. +static void setStruct(AstGenCtx* ag, uint32_t inst, uint32_t src_node, + StructDeclSmall small, uint32_t captures_len, uint32_t fields_len, + uint32_t decls_len) { + ensureExtraCapacity(ag, 6 + 3); + + uint32_t payload_index = ag->extra_len; + + // fields_hash (4 words): zero-filled; hash comparison skipped in tests. + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + + ag->extra[ag->extra_len++] = ag->source_line; + ag->extra[ag->extra_len++] = src_node; + + if (small.has_captures_len) + ag->extra[ag->extra_len++] = captures_len; + if (small.has_fields_len) + ag->extra[ag->extra_len++] = fields_len; + if (small.has_decls_len) + ag->extra[ag->extra_len++] = decls_len; + + ag->inst_tags[inst] = ZIR_INST_EXTENDED; ZirInstData data; memset(&data, 0, sizeof(data)); data.extended.opcode = (uint16_t)ZIR_EXT_STRUCT_DECL; - data.extended.small = 0; // all flags zero for empty auto struct + data.extended.small = packStructDeclSmall(small); data.extended.operand = payload_index; + ag->inst_datas[inst] = data; +} - zir.inst_tags[0] = ZIR_INST_EXTENDED; - zir.inst_datas[0] = data; - zir.inst_len = 1; +// --- scanContainer (AstGen.zig:13384) --- - zir.has_compile_errors = false; +// Add a name→node entry to the decl table. +static void addDeclToTable( + AstGenCtx* ag, uint32_t name_str_index, uint32_t node) { + if (ag->decl_table_len >= ag->decl_table_cap) { + uint32_t new_cap = ag->decl_table_cap > 0 ? ag->decl_table_cap * 2 : 8; + uint32_t* n = realloc(ag->decl_names, new_cap * sizeof(uint32_t)); + uint32_t* d = realloc(ag->decl_nodes, new_cap * sizeof(uint32_t)); + if (!n || !d) + exit(1); + ag->decl_names = n; + ag->decl_nodes = d; + ag->decl_table_cap = new_cap; + } + ag->decl_names[ag->decl_table_len] = name_str_index; + ag->decl_nodes[ag->decl_table_len] = node; + ag->decl_table_len++; +} + +// Mirrors scanContainer (AstGen.zig:13384). +// Also populates the decl table (namespace.decls) for identifier resolution. +static uint32_t scanContainer( + AstGenCtx* ag, const uint32_t* members, uint32_t member_count) { + const Ast* tree = ag->tree; + uint32_t decl_count = 0; + for (uint32_t i = 0; i < member_count; i++) { + uint32_t member = members[i]; + AstNodeTag tag = tree->nodes.tags[member]; + switch (tag) { + case AST_NODE_GLOBAL_VAR_DECL: + case AST_NODE_LOCAL_VAR_DECL: + case AST_NODE_SIMPLE_VAR_DECL: + case AST_NODE_ALIGNED_VAR_DECL: { + decl_count++; + uint32_t name_token = tree->nodes.main_tokens[member] + 1; + uint32_t name_str = identAsString(ag, name_token); + addDeclToTable(ag, name_str, member); + break; + } + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO: + case AST_NODE_FN_DECL: { + decl_count++; + uint32_t name_token = tree->nodes.main_tokens[member] + 1; + uint32_t name_str = identAsString(ag, name_token); + addDeclToTable(ag, name_str, member); + break; + } + case AST_NODE_COMPTIME: + decl_count++; + break; + case AST_NODE_TEST_DECL: + decl_count++; + break; + default: + break; + } + } + return decl_count; +} + +// --- Import tracking --- + +static void addImport(AstGenCtx* ag, uint32_t name_index, uint32_t token) { + // Check for duplicates. + for (uint32_t i = 0; i < ag->imports_len; i++) { + if (ag->imports[i].name == name_index) + return; + } + if (ag->imports_len >= ag->imports_cap) { + uint32_t new_cap = ag->imports_cap > 0 ? ag->imports_cap * 2 : 4; + ImportEntry* p = realloc(ag->imports, new_cap * sizeof(ImportEntry)); + if (!p) + exit(1); + ag->imports = p; + ag->imports_cap = new_cap; + } + ag->imports[ag->imports_len].name = name_index; + ag->imports[ag->imports_len].token = token; + ag->imports_len++; +} + +// Write imports list to extra (AstGen.zig:227-244). +static void writeImports(AstGenCtx* ag) { + if (ag->imports_len == 0) { + ag->extra[ZIR_EXTRA_IMPORTS] = 0; + return; + } + uint32_t need = 1 + ag->imports_len * 2; + ensureExtraCapacity(ag, need); + uint32_t imports_index = ag->extra_len; + ag->extra[ag->extra_len++] = ag->imports_len; + for (uint32_t i = 0; i < ag->imports_len; i++) { + ag->extra[ag->extra_len++] = ag->imports[i].name; + ag->extra[ag->extra_len++] = ag->imports[i].token; + } + ag->extra[ZIR_EXTRA_IMPORTS] = imports_index; +} + +// --- Result location (AstGen.zig:11808) --- +// Simplified version of ResultInfo.Loc. + +typedef enum { + RL_NONE, // Just compute the value. + RL_REF, // Compute a pointer to the value. + RL_DISCARD, // Compute but discard (emit ensure_result_non_error). +} ResultLoc; + +// --- Expression evaluation (AstGen.zig:634) --- + +// Forward declaration. +static uint32_t expr(GenZir* gz, uint32_t node); +static uint32_t exprRl(GenZir* gz, ResultLoc rl, uint32_t node); + +// Mirrors numberLiteral (AstGen.zig:8679). +// Handles literals "0" and "1" as built-in refs. +static uint32_t numberLiteral(AstGenCtx* ag, uint32_t node) { + uint32_t num_token = ag->tree->nodes.main_tokens[node]; + uint32_t tok_start = ag->tree->tokens.starts[num_token]; + const char* source = ag->tree->source; + + // Determine token length by scanning to next non-digit character. + uint32_t tok_end = tok_start; + while (tok_end < ag->tree->source_len + && ((source[tok_end] >= '0' && source[tok_end] <= '9') + || source[tok_end] == '_' || source[tok_end] == '.' + || source[tok_end] == 'x' || source[tok_end] == 'o' + || source[tok_end] == 'b' + || (source[tok_end] >= 'a' && source[tok_end] <= 'f') + || (source[tok_end] >= 'A' && source[tok_end] <= 'F'))) { + tok_end++; + } + uint32_t tok_len = tok_end - tok_start; + + if (tok_len == 1) { + if (source[tok_start] == '0') + return ZIR_REF_ZERO; + if (source[tok_start] == '1') + return ZIR_REF_ONE; + } + + // TODO: handle other number literals (int, big_int, float). + ag->has_compile_errors = true; + return ZIR_REF_ZERO; +} + +// Mirrors builtinCall (AstGen.zig:9191), @import case (AstGen.zig:9242). +static uint32_t builtinCallImport(GenZir* gz, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + // For builtin_call_two: data.lhs = first arg node. + AstData node_data = tree->nodes.datas[node]; + uint32_t operand_node = node_data.lhs; + + assert(tree->nodes.tags[operand_node] == AST_NODE_STRING_LITERAL); + uint32_t str_lit_token = tree->nodes.main_tokens[operand_node]; + + uint32_t str_index, str_len; + strLitAsString(ag, str_lit_token, &str_index, &str_len); + + // Write Import payload to extra (Zir.Inst.Import: res_ty, path). + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = ZIR_REF_NONE; // res_ty = .none + ag->extra[ag->extra_len++] = str_index; // path + + // Create .import instruction with pl_tok data. + ZirInstData data; + data.pl_tok.src_tok = tokenIndexToRelative(gz, str_lit_token); + data.pl_tok.payload_index = payload_index; + uint32_t result_ref = addInstruction(gz, ZIR_INST_IMPORT, data); + + // Track import (AstGen.zig:9269). + addImport(ag, str_index, str_lit_token); + + return result_ref; +} + +// Mirrors builtinCall (AstGen.zig:9191) dispatch. +static uint32_t builtinCall(GenZir* gz, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + uint32_t builtin_token = tree->nodes.main_tokens[node]; + uint32_t tok_start = tree->tokens.starts[builtin_token]; + const char* source = tree->source; + + // Identify builtin name from source. + // Skip '@' prefix and scan identifier. + uint32_t name_start = tok_start + 1; // skip '@' + uint32_t name_end = name_start; + while (name_end < tree->source_len + && ((source[name_end] >= 'a' && source[name_end] <= 'z') + || (source[name_end] >= 'A' && source[name_end] <= 'Z') + || source[name_end] == '_')) { + name_end++; + } + uint32_t name_len = name_end - name_start; + + if (name_len == 6 && memcmp(source + name_start, "import", 6) == 0) { + return builtinCallImport(gz, node); + } + + // TODO: handle other builtins. + ag->has_compile_errors = true; + return ZIR_REF_VOID_VALUE; +} + +// --- identifier (AstGen.zig:8282) --- +// Simplified: handles decl_val resolution for container-level declarations. + +static uint32_t identifierExpr(GenZir* gz, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + uint32_t ident_token = tree->nodes.main_tokens[node]; + + // Check for primitive types FIRST (AstGen.zig:8298-8338). + uint32_t tok_start = ag->tree->tokens.starts[ident_token]; + const char* source = ag->tree->source; + uint32_t tok_end = tok_start; + while (tok_end < ag->tree->source_len + && ((source[tok_end] >= 'a' && source[tok_end] <= 'z') + || (source[tok_end] >= 'A' && source[tok_end] <= 'Z') + || (source[tok_end] >= '0' && source[tok_end] <= '9') + || source[tok_end] == '_')) + tok_end++; + uint32_t tok_len = tok_end - tok_start; + + // Check well-known primitive refs (primitive_instrs map, AstGen.zig:8300). + // clang-format off + if (tok_len == 2 && memcmp(source+tok_start, "u8", 2) == 0) return ZIR_REF_U8_TYPE; + if (tok_len == 5 && memcmp(source+tok_start, "usize", 5) == 0) return ZIR_REF_USIZE_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "bool", 4) == 0) return ZIR_REF_BOOL_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "void", 4) == 0) return ZIR_REF_VOID_TYPE; + if (tok_len == 6 && memcmp(source+tok_start, "c_uint", 6) == 0) return ZIR_REF_C_UINT_TYPE; + // clang-format on + + // Integer type detection: u29, i13, etc. (AstGen.zig:8304-8336). + if (tok_len >= 2 + && (source[tok_start] == 'u' || source[tok_start] == 'i')) { + uint8_t signedness = (source[tok_start] == 'i') ? 1 : 0; + uint16_t bit_count = 0; + bool valid = true; + for (uint32_t k = tok_start + 1; k < tok_end; k++) { + if (source[k] >= '0' && source[k] <= '9') { + bit_count + = (uint16_t)(bit_count * 10 + (uint16_t)(source[k] - '0')); + } else { + valid = false; + break; + } + } + if (valid && bit_count > 0) { + ZirInstData data; + data.int_type.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + data.int_type.signedness = signedness; + data.int_type._pad = 0; + data.int_type.bit_count = bit_count; + return addInstruction(gz, ZIR_INST_INT_TYPE, data); + } + } + + // Decl table lookup (AstGen.zig:8462-8520). + uint32_t name_str = identAsString(ag, ident_token); + for (uint32_t i = 0; i < ag->decl_table_len; i++) { + if (ag->decl_names[i] == name_str) { + ZirInstTag itag + = (rl == RL_REF) ? ZIR_INST_DECL_REF : ZIR_INST_DECL_VAL; + ZirInstData data; + data.str_tok.start = name_str; + data.str_tok.src_tok = tokenIndexToRelative(gz, ident_token); + return addInstruction(gz, itag, data); + } + } + + ag->has_compile_errors = true; + return ZIR_REF_VOID_VALUE; +} + +// --- fieldAccess (AstGen.zig:6154) --- +// Simplified: emits field_val instruction with Field payload. + +static uint32_t fieldAccessExpr(GenZir* gz, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + + // data.lhs = object node, data.rhs = field identifier token. + uint32_t object_node = nd.lhs; + uint32_t field_ident = nd.rhs; + + // Get field name as string (AstGen.zig:6180). + uint32_t str_index = identAsString(ag, field_ident); + + // Evaluate the LHS object expression (AstGen.zig:6181). + // For .ref rl, LHS is also evaluated with .ref (AstGen.zig:6161). + ResultLoc lhs_rl = (rl == RL_REF) ? RL_REF : RL_NONE; + uint32_t lhs = exprRl(gz, lhs_rl, object_node); + + // Emit field_val instruction with Field payload (AstGen.zig:6186-6189). + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = lhs; // Field.lhs + ag->extra[ag->extra_len++] = str_index; // Field.field_name_start + + // .ref → field_ptr, else → field_val (AstGen.zig:6160-6164). + ZirInstTag tag = (rl == RL_REF) ? ZIR_INST_FIELD_PTR : ZIR_INST_FIELD_VAL; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, tag, data); +} + +// --- ptrType (AstGen.zig:3833) --- +// Simplified: handles []const T and []T slice types. + +static uint32_t ptrTypeExpr(GenZir* gz, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // For ptr_type_aligned: data.lhs = child_type, data.rhs = extra info. + // For simple ptr_type: data.lhs = sentinel (optional), data.rhs = + // child_type. The exact layout depends on the variant. Simplified for + // []const u8. + + uint32_t child_type_node; + bool is_const = false; + uint8_t size = 2; // slice + + // Determine child type and constness from AST. + // ptr_type_aligned: main_token points to `[`, tokens after determine type. + // For `[]const u8`: + // main_token = `[`, then `]`, then `const`, then `u8` (child type node). + // data.lhs = 0 (no sentinel/align), data.rhs = child_type_node. + + if (tag == AST_NODE_PTR_TYPE_ALIGNED) { + child_type_node = nd.rhs; + // Check for 'const' by looking at tokens after main_token. + uint32_t main_tok = tree->nodes.main_tokens[node]; + // For []const T: main_token=[, then ], then const keyword. + // Check if token after ] is 'const'. + uint32_t after_bracket = main_tok + 1; // ] + uint32_t maybe_const = after_bracket + 1; + if (maybe_const < tree->tokens.len) { + uint32_t tok_start = tree->tokens.starts[maybe_const]; + if (tok_start + 5 <= tree->source_len + && memcmp(tree->source + tok_start, "const", 5) == 0) + is_const = true; + } + } else { + // Simplified: treat all other ptr types as pointers to data.rhs. + child_type_node = nd.rhs; + } + + // Evaluate element type. + uint32_t elem_type = exprRl(gz, RL_NONE, child_type_node); + + // Build PtrType payload: { elem_type, src_node }. + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = elem_type; + ag->extra[ag->extra_len++] + = (uint32_t)((int32_t)node - (int32_t)gz->decl_node_index); + + // Build flags packed byte. + uint8_t flags = 0; + if (!is_const) + flags |= (1 << 1); // is_mutable + + ZirInstData data; + data.ptr_type.flags = flags; + data.ptr_type.size = size; + data.ptr_type._pad = 0; + data.ptr_type.payload_index = payload_index; + return addInstruction(gz, ZIR_INST_PTR_TYPE, data); +} + +// --- arrayType (AstGen.zig:940) --- + +static uint32_t arrayTypeExpr(GenZir* gz, uint32_t node) { + AstGenCtx* ag = gz->astgen; + AstData nd = ag->tree->nodes.datas[node]; + + // data.lhs = length expr node, data.rhs = element type node. + uint32_t len = exprRl(gz, RL_NONE, nd.lhs); + uint32_t elem_type = exprRl(gz, RL_NONE, nd.rhs); + return addPlNodeBin(gz, ZIR_INST_ARRAY_TYPE, node, len, elem_type); +} + +// --- arrayInitExpr (AstGen.zig:1431) --- +// Simplified: handles typed array init with inferred [_] length. + +static uint32_t arrayInitExpr(GenZir* gz, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Get elements and type expression based on the variant. + uint32_t type_expr_node = 0; + uint32_t elem_buf[2]; + const uint32_t* elements = NULL; + uint32_t elem_count = 0; + + switch (tag) { + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: { + type_expr_node = nd.lhs; + if (nd.rhs != 0) { + elem_buf[0] = nd.rhs; + elements = elem_buf; + elem_count = 1; + } + break; + } + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: { + // data = node_and_extra: lhs = type_expr, rhs = extra_index. + // extra[rhs] = SubRange.start, extra[rhs+1] = SubRange.end. + // Elements are extra_data[start..end]. + type_expr_node = nd.lhs; + uint32_t extra_idx = nd.rhs; + uint32_t range_start = tree->extra_data.arr[extra_idx]; + uint32_t range_end = tree->extra_data.arr[extra_idx + 1]; + elements = tree->extra_data.arr + range_start; + elem_count = range_end - range_start; + break; + } + default: + ag->has_compile_errors = true; + return ZIR_REF_VOID_VALUE; + } + + if (type_expr_node == 0 || elem_count == 0) { + ag->has_compile_errors = true; + return ZIR_REF_VOID_VALUE; + } + + // Check if the type is [_]T (inferred length) (AstGen.zig:1446-1474). + if (tree->nodes.tags[type_expr_node] == AST_NODE_ARRAY_TYPE) { + AstData type_nd = tree->nodes.datas[type_expr_node]; + uint32_t elem_count_node = type_nd.lhs; + uint32_t elem_type_node = type_nd.rhs; + + // Check if elem_count is `_` identifier. + if (tree->nodes.tags[elem_count_node] == AST_NODE_IDENTIFIER) { + uint32_t id_tok = tree->nodes.main_tokens[elem_count_node]; + uint32_t id_start = tree->tokens.starts[id_tok]; + if (tree->source[id_start] == '_' + && (id_start + 1 >= tree->source_len + || !((tree->source[id_start + 1] >= 'a' + && tree->source[id_start + 1] <= 'z') + || (tree->source[id_start + 1] >= 'A' + && tree->source[id_start + 1] <= 'Z') + || tree->source[id_start + 1] == '_'))) { + // Inferred length: addInt(elem_count) (AstGen.zig:1452). + uint32_t len_inst = addInt(gz, elem_count); + uint32_t elem_type = exprRl(gz, RL_NONE, elem_type_node); + uint32_t array_type_inst = addPlNodeBin(gz, + ZIR_INST_ARRAY_TYPE, type_expr_node, len_inst, elem_type); + + // arrayInitExprTyped (AstGen.zig:1507/1509). + bool is_ref = (rl == RL_REF); + // Build MultiOp payload: operands_len, then type + elements. + uint32_t operands_len = elem_count + 1; // +1 for type + ensureExtraCapacity(ag, 1 + operands_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = operands_len; + ag->extra[ag->extra_len++] = array_type_inst; // type ref + for (uint32_t i = 0; i < elem_count; i++) { + uint32_t elem_ref = exprRl(gz, RL_NONE, elements[i]); + ag->extra[ag->extra_len++] = elem_ref; + } + ZirInstTag init_tag + = is_ref ? ZIR_INST_ARRAY_INIT_REF : ZIR_INST_ARRAY_INIT; + ZirInstData data; + data.pl_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, init_tag, data); + } + } + } + + // Non-inferred length: evaluate type normally. + ag->has_compile_errors = true; + return ZIR_REF_VOID_VALUE; +} + +// --- simpleBinOp (AstGen.zig:2204) --- + +static uint32_t simpleBinOp(GenZir* gz, uint32_t node, ZirInstTag op_tag) { + AstGenCtx* ag = gz->astgen; + AstData nd = ag->tree->nodes.datas[node]; + uint32_t lhs = exprRl(gz, RL_NONE, nd.lhs); + uint32_t rhs = exprRl(gz, RL_NONE, nd.rhs); + return addPlNodeBin(gz, op_tag, node, lhs, rhs); +} + +// Mirrors expr (AstGen.zig:634) — main expression dispatcher. +static uint32_t exprRl(GenZir* gz, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + AstNodeTag tag = ag->tree->nodes.tags[node]; + + switch (tag) { + case AST_NODE_NUMBER_LITERAL: + return numberLiteral(ag, node); + case AST_NODE_BUILTIN_CALL_TWO: + case AST_NODE_BUILTIN_CALL_TWO_COMMA: + return builtinCall(gz, node); + case AST_NODE_FIELD_ACCESS: + return fieldAccessExpr(gz, rl, node); + case AST_NODE_IDENTIFIER: + return identifierExpr(gz, rl, node); + case AST_NODE_STRING_LITERAL: { + // Mirrors stringLiteral (AstGen.zig:8626). + uint32_t str_lit_token = ag->tree->nodes.main_tokens[node]; + uint32_t str_index, str_len; + strLitAsString(ag, str_lit_token, &str_index, &str_len); + ZirInstData data; + data.str.start = str_index; + data.str.len = str_len; + return addInstruction(gz, ZIR_INST_STR, data); + } + // address_of (AstGen.zig:953): evaluate operand with .ref rl. + case AST_NODE_ADDRESS_OF: { + uint32_t operand_node = ag->tree->nodes.datas[node].lhs; + return exprRl(gz, RL_REF, operand_node); + } + // ptr_type (AstGen.zig:1077-1081). + case AST_NODE_PTR_TYPE_ALIGNED: + case AST_NODE_PTR_TYPE_SENTINEL: + case AST_NODE_PTR_TYPE: + case AST_NODE_PTR_TYPE_BIT_RANGE: + return ptrTypeExpr(gz, node); + // array_type (AstGen.zig:940). + case AST_NODE_ARRAY_TYPE: + return arrayTypeExpr(gz, node); + // array_init variants (AstGen.zig:836-856). + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: + return arrayInitExpr(gz, rl, node); + // array_cat (AstGen.zig:772): ++ binary operator. + case AST_NODE_ARRAY_CAT: + return simpleBinOp(gz, node, ZIR_INST_ARRAY_CAT); + default: + ag->has_compile_errors = true; + return ZIR_REF_VOID_VALUE; + } +} + +static uint32_t expr(GenZir* gz, uint32_t node) { + return exprRl(gz, RL_NONE, node); +} + +// --- rvalue (AstGen.zig:11029) --- +// Simplified: handles .none and .discard result locations. + +static uint32_t rvalueDiscard(GenZir* gz, uint32_t result, uint32_t src_node) { + // .discard => emit ensure_result_non_error, return .void_value + // (AstGen.zig:11071-11074) + ZirInstData data; + data.un_node.src_node = (int32_t)src_node - (int32_t)gz->decl_node_index; + data.un_node.operand = result; + addInstruction(gz, ZIR_INST_ENSURE_RESULT_NON_ERROR, data); + return ZIR_REF_VOID_VALUE; +} + +// --- emitDbgNode / emitDbgStmt (AstGen.zig:3422, 13713) --- + +static void emitDbgStmt(GenZir* gz, uint32_t line, uint32_t column) { + if (gz->is_comptime) + return; + // Check if last instruction is already dbg_stmt; if so, update it. + // (AstGen.zig:13715-13724) + AstGenCtx* ag = gz->astgen; + if (gz->instructions_len > 0) { + uint32_t last = gz->instructions[gz->instructions_len - 1]; + if (ag->inst_tags[last] == ZIR_INST_DBG_STMT) { + ag->inst_datas[last].dbg_stmt.line = line; + ag->inst_datas[last].dbg_stmt.column = column; + return; + } + } + ZirInstData data; + data.dbg_stmt.line = line; + data.dbg_stmt.column = column; + addInstruction(gz, ZIR_INST_DBG_STMT, data); +} + +static void emitDbgNode(GenZir* gz, uint32_t node) { + if (gz->is_comptime) + return; + AstGenCtx* ag = gz->astgen; + advanceSourceCursorToNode(ag, node); + uint32_t line = ag->source_line - gz->decl_line; + uint32_t column = ag->source_column; + emitDbgStmt(gz, line, column); +} + +// --- assign (AstGen.zig:3434) --- +// Handles `_ = expr` discard pattern. + +static void assignStmt(GenZir* gz, uint32_t infix_node) { + emitDbgNode(gz, infix_node); + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + AstData nd = tree->nodes.datas[infix_node]; + uint32_t lhs = nd.lhs; + uint32_t rhs = nd.rhs; + + // Check if LHS is `_` identifier for discard (AstGen.zig:3440-3446). + if (tree->nodes.tags[lhs] == AST_NODE_IDENTIFIER) { + uint32_t ident_tok = tree->nodes.main_tokens[lhs]; + uint32_t tok_start = tree->tokens.starts[ident_tok]; + if (tree->source[tok_start] == '_' + && (tok_start + 1 >= tree->source_len + || !(tree->source[tok_start + 1] >= 'a' + && tree->source[tok_start + 1] <= 'z') + || (tree->source[tok_start + 1] >= 'A' + && tree->source[tok_start + 1] <= 'Z') + || tree->source[tok_start + 1] == '_')) { + // Discard: evaluate RHS with .discard result location. + uint32_t result = expr(gz, rhs); + rvalueDiscard(gz, result, rhs); + return; + } + } + + // TODO: handle non-discard assignments. + ag->has_compile_errors = true; +} + +// --- blockExprStmts (AstGen.zig:2538) --- +// Processes block statements sequentially. + +static void blockExprStmts( + GenZir* gz, const uint32_t* statements, uint32_t stmt_count) { + AstGenCtx* ag = gz->astgen; + for (uint32_t i = 0; i < stmt_count; i++) { + uint32_t stmt = statements[i]; + AstNodeTag tag = ag->tree->nodes.tags[stmt]; + switch (tag) { + case AST_NODE_ASSIGN: + assignStmt(gz, stmt); + break; + // TODO: var_decl, defer, other statement types + default: + // Try as expression statement. + expr(gz, stmt); + break; + } + } +} + +// --- fullBodyExpr (AstGen.zig:2358) --- +// Processes a block body, returning void. + +static void fullBodyExpr(GenZir* gz, uint32_t node) { + const Ast* tree = gz->astgen->tree; + AstNodeTag tag = tree->nodes.tags[node]; + + // Extract block statements (AstGen.zig:2368). + AstData nd = tree->nodes.datas[node]; + uint32_t stmt_buf[2]; + const uint32_t* statements = NULL; + uint32_t stmt_count = 0; + + switch (tag) { + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: { + uint32_t idx = 0; + if (nd.lhs != 0) + stmt_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + stmt_buf[idx++] = nd.rhs; + statements = stmt_buf; + stmt_count = idx; + break; + } + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + statements = tree->extra_data.arr + start; + stmt_count = end - start; + break; + } + default: + // Not a block — treat as single expression. + expr(gz, node); + return; + } + + // Process statements (AstGen.zig:2381). + blockExprStmts(gz, statements, stmt_count); +} + +// --- lastToken (Ast.zig:874) --- +// Mechanical port of Ast.lastToken. Uses iterative end_offset accumulation. + +static uint32_t lastToken(const Ast* tree, uint32_t node) { + uint32_t n = node; + uint32_t end_offset = 0; + while (1) { + AstNodeTag tag = tree->nodes.tags[n]; + AstData nd = tree->nodes.datas[n]; + switch (tag) { + case AST_NODE_ROOT: + return tree->tokens.len - 1; + + // Binary ops: recurse into RHS (Ast.zig:893-948). + case AST_NODE_ASSIGN: + n = nd.rhs; + continue; + + // field_access: return field token + end_offset (Ast.zig:979). + case AST_NODE_FIELD_ACCESS: + return nd.rhs + end_offset; + + // test_decl: recurse into body node (Ast.zig:950). + case AST_NODE_TEST_DECL: + n = nd.rhs; + continue; + + // block (Ast.zig:1085): end_offset += 1 (rbrace), recurse into last. + case AST_NODE_BLOCK: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + assert(start != end); + end_offset += 1; + n = tree->extra_data.arr[end - 1]; + continue; + } + + // block_semicolon (Ast.zig:1097): += 2 (semicolon + rbrace). + case AST_NODE_BLOCK_SEMICOLON: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + assert(start != end); + end_offset += 2; + n = tree->extra_data.arr[end - 1]; + continue; + } + + // block_two (Ast.zig:1117): if rhs, recurse rhs +1; if lhs, +1; else + // +1. Note: C parser uses 0 for "none" (OptionalIndex), not + // UINT32_MAX. + case AST_NODE_BLOCK_TWO: { + if (nd.rhs != 0) { + end_offset += 1; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 1; + n = nd.lhs; + } else { + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + // block_two_semicolon (Ast.zig:1153). + case AST_NODE_BLOCK_TWO_SEMICOLON: { + if (nd.rhs != 0) { + end_offset += 2; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 2; + n = nd.lhs; + } else { + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + // builtin_call_two (Ast.zig:1118): recurse into args + rparen. + case AST_NODE_BUILTIN_CALL_TWO: { + if (nd.rhs != 0) { + end_offset += 1; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 1; + n = nd.lhs; + } else { + end_offset += 2; // lparen + rparen + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + case AST_NODE_BUILTIN_CALL_TWO_COMMA: { + if (nd.rhs != 0) { + end_offset += 2; // comma + rparen + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 2; + n = nd.lhs; + } else { + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + // Terminals: return main_token + end_offset (Ast.zig:988-996). + case AST_NODE_NUMBER_LITERAL: + case AST_NODE_STRING_LITERAL: + case AST_NODE_IDENTIFIER: + return tree->nodes.main_tokens[n] + end_offset; + + // field_access: return data.rhs (the field token) + end_offset + // (Ast.zig:979-982). + + default: + // Fallback: return main_token + end_offset. + return tree->nodes.main_tokens[n] + end_offset; + } + } +} + +// --- addFunc (AstGen.zig:12023) --- +// Simplified: handles test functions (no cc, no varargs, no noalias, not +// fancy). + +static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node, + uint32_t param_block, uint32_t ret_ref, const uint32_t* body, + uint32_t body_len, uint32_t lbrace_line, uint32_t lbrace_column) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + uint32_t rbrace_tok = lastToken(tree, block_node); + uint32_t rbrace_start = tree->tokens.starts[rbrace_tok]; + advanceSourceCursor(ag, rbrace_start); + uint32_t rbrace_line = ag->source_line - gz->decl_line; + uint32_t rbrace_column = ag->source_column; + + // Build Func payload (Zir.Inst.Func: ret_ty, param_block, body_len). + // (AstGen.zig:12187-12194) + uint32_t ret_body_len; + if (ret_ref == ZIR_REF_NONE) { + ret_body_len = 0; // void return + } else { + ret_body_len = 1; // simple Ref + } + // Pack RetTy: body_len:u31 | is_generic:bool(u1) = just body_len. + uint32_t ret_ty_packed = ret_body_len & 0x7FFFFFFFu; // is_generic=false + + ensureExtraCapacity(ag, 3 + 1 + body_len + 7); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = ret_ty_packed; // Func.ret_ty + ag->extra[ag->extra_len++] = param_block; // Func.param_block + ag->extra[ag->extra_len++] = body_len; // Func.body_len + + // Trailing ret_ty ref (if ret_body_len == 1). + if (ret_ref != ZIR_REF_NONE) { + ag->extra[ag->extra_len++] = ret_ref; + } + + // Body instructions. + for (uint32_t i = 0; i < body_len; i++) { + ag->extra[ag->extra_len++] = body[i]; + } + + // SrcLocs (AstGen.zig:12098-12106). + uint32_t columns = (lbrace_column & 0xFFFFu) | (rbrace_column << 16); + ag->extra[ag->extra_len++] = lbrace_line; + ag->extra[ag->extra_len++] = rbrace_line; + ag->extra[ag->extra_len++] = columns; + // proto_hash (4 words): zero for tests. + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + + // Emit the func instruction (AstGen.zig:12220-12226). + ZirInstData data; + data.pl_node.src_node = (int32_t)src_node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, ZIR_INST_FUNC, data); +} + +// --- testDecl (AstGen.zig:4708) --- + +static void testDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + uint32_t body_node = nd.rhs; + + // makeDeclaration before advanceSourceCursorToNode (AstGen.zig:4726-4729). + uint32_t decl_inst = makeDeclaration(ag, node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + advanceSourceCursorToNode(ag, node); + + uint32_t decl_line = ag->source_line; + uint32_t decl_column = ag->source_column; + + // Extract test name (AstGen.zig:4748-4835). + uint32_t test_token = tree->nodes.main_tokens[node]; + uint32_t test_name_token = test_token + 1; + uint32_t test_name = 0; // NullTerminatedString.empty + DeclFlagsId decl_id = DECL_ID_UNNAMED_TEST; + + // Check if the token after 'test' is a string literal. + // We identify string literals by checking the source character. + uint32_t name_tok_start = tree->tokens.starts[test_name_token]; + if (name_tok_start < tree->source_len + && tree->source[name_tok_start] == '"') { + // String literal name. + uint32_t name_len; + strLitAsString(ag, test_name_token, &test_name, &name_len); + decl_id = DECL_ID_TEST; + } + // TODO: handle identifier test names (decltest). + + // Set up decl_block GenZir (AstGen.zig:4735-4743). + GenZir decl_block; + memset(&decl_block, 0, sizeof(decl_block)); + decl_block.astgen = ag; + decl_block.decl_node_index = node; + decl_block.decl_line = decl_line; + decl_block.is_comptime = true; + + // Set up fn_block GenZir (AstGen.zig:4837-4845). + GenZir fn_block; + memset(&fn_block, 0, sizeof(fn_block)); + fn_block.astgen = ag; + fn_block.decl_node_index = node; + fn_block.decl_line = decl_line; + fn_block.is_comptime = false; + + // Compute lbrace source location (AstGen.zig:4860-4862). + advanceSourceCursorToNode(ag, body_node); + uint32_t lbrace_line = ag->source_line - decl_line; + uint32_t lbrace_column = ag->source_column; + + // Process test body (AstGen.zig:4864). + fullBodyExpr(&fn_block, body_node); + + // If we hit unimplemented features, bail out. + if (ag->has_compile_errors) + return; + + // Add restore_err_ret_index_unconditional (AstGen.zig:4868). + { + ZirInstData rdata; + rdata.un_node.operand = ZIR_REF_NONE; // .none for .ret + rdata.un_node.src_node + = (int32_t)node - (int32_t)fn_block.decl_node_index; + addInstruction( + &fn_block, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + } + + // Add ret_implicit (AstGen.zig:4871). + { + uint32_t body_last_tok = lastToken(tree, body_node); + ZirInstData rdata; + rdata.un_tok.operand = ZIR_REF_VOID_VALUE; + rdata.un_tok.src_tok = tokenIndexToRelative(&fn_block, body_last_tok); + addInstruction(&fn_block, ZIR_INST_RET_IMPLICIT, rdata); + } + + // Create func instruction (AstGen.zig:4874-4897). + uint32_t func_ref = addFunc(&decl_block, node, body_node, decl_inst, + ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE, fn_block.instructions, + fn_block.instructions_len, lbrace_line, lbrace_column); + + // break_inline returning func to declaration (AstGen.zig:4899). + makeBreakInline(&decl_block, decl_inst, func_ref, AST_NODE_OFFSET_NONE); + + // setDeclaration (AstGen.zig:4903-4923). + setDeclaration(ag, decl_inst, decl_line, decl_column, decl_id, test_name, + decl_block.instructions, decl_block.instructions_len); + + (void)gz; +} + +// --- fnDecl (AstGen.zig:4067) --- +// Simplified: handles non-extern function declarations with bodies. + +static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + + // For fn_decl: data.lhs = fn_proto node, data.rhs = body node. + uint32_t proto_node = nd.lhs; + uint32_t body_node = nd.rhs; + + // Get function name token (main_token of proto + 1 = fn name). + uint32_t fn_token = tree->nodes.main_tokens[proto_node]; + uint32_t fn_name_token = fn_token + 1; + + // Check for 'pub' modifier: token before fn_token might be 'pub'. + bool is_pub = false; + if (fn_token > 0) { + uint32_t prev_tok_start = tree->tokens.starts[fn_token - 1]; + if (prev_tok_start + 3 <= tree->source_len + && memcmp(tree->source + prev_tok_start, "pub", 3) == 0) + is_pub = true; + } + + // makeDeclaration on proto_node (AstGen.zig:4090). + uint32_t decl_inst = makeDeclaration(ag, proto_node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + + advanceSourceCursorToNode(ag, node); + uint32_t decl_line = ag->source_line; + uint32_t decl_column = ag->source_column; + + // Determine return type (AstGen.zig:4133-4135). + // For fn_proto_simple: return_type is in data. + // Simplified: detect !void vs void from source. + AstNodeTag proto_tag = tree->nodes.tags[proto_node]; + bool is_inferred_error = false; + + // Look for the return type node. + // For fn_proto_simple: data.lhs = param (optional), data.rhs = + // return_type. For fn_proto_one: data = {extra, return_type}. Simplified: + // check if return type token starts with '!'. + AstData proto_data = tree->nodes.datas[proto_node]; + uint32_t return_type_node = 0; + if (proto_tag == AST_NODE_FN_PROTO_SIMPLE) { + return_type_node = proto_data.rhs; + } else if (proto_tag == AST_NODE_FN_PROTO_ONE) { + return_type_node = proto_data.rhs; + } else if (proto_tag == AST_NODE_FN_PROTO_MULTI + || proto_tag == AST_NODE_FN_PROTO) { + return_type_node = proto_data.rhs; + } + + if (return_type_node != 0) { + uint32_t ret_first_tok = firstToken(tree, return_type_node); + if (ret_first_tok > 0) { + uint32_t maybe_bang = ret_first_tok - 1; + uint32_t bang_start = tree->tokens.starts[maybe_bang]; + if (tree->source[bang_start] == '!') + is_inferred_error = true; + } + } + + // value_gz for fnDeclInner (AstGen.zig:4194-4201). + GenZir value_gz; + memset(&value_gz, 0, sizeof(value_gz)); + value_gz.astgen = ag; + value_gz.decl_node_index = proto_node; + value_gz.decl_line = decl_line; + value_gz.is_comptime = true; + + // fnDeclInner creates the func instruction. + // Simplified: creates fn_block, processes body, adds func instruction. + GenZir fn_block; + memset(&fn_block, 0, sizeof(fn_block)); + fn_block.astgen = ag; + fn_block.decl_node_index = proto_node; + fn_block.decl_line = decl_line; + fn_block.is_comptime = false; + + // Process function body (AstGen.zig:4358). + advanceSourceCursorToNode(ag, body_node); + uint32_t lbrace_line = ag->source_line - decl_line; + uint32_t lbrace_column = ag->source_column; + + fullBodyExpr(&fn_block, body_node); + + if (ag->has_compile_errors) + return; + + // Add implicit return at end of function body. + // restore_err_ret_index is always added (AstGen.zig:4365-4368). + { + ZirInstData rdata; + rdata.un_node.operand = ZIR_REF_NONE; + rdata.un_node.src_node + = (int32_t)node - (int32_t)fn_block.decl_node_index; + addInstruction( + &fn_block, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + } + { + uint32_t body_last_tok = lastToken(tree, body_node); + ZirInstData rdata; + rdata.un_tok.operand = ZIR_REF_VOID_VALUE; + rdata.un_tok.src_tok = tokenIndexToRelative(&fn_block, body_last_tok); + addInstruction(&fn_block, ZIR_INST_RET_IMPLICIT, rdata); + } + + // Create func instruction (AstGen.zig:4396). + uint32_t func_ref; + if (is_inferred_error) { + // Use ret_ref = void_type for !void (same as tests but with + // func_inferred). Actually for !void, ret_ref = .none (void return, + // error inferred). + func_ref = addFunc(&value_gz, node, body_node, decl_inst, ZIR_REF_NONE, + fn_block.instructions, fn_block.instructions_len, lbrace_line, + lbrace_column); + // Patch the tag to func_inferred. + ag->inst_tags[func_ref - ZIR_REF_START_INDEX] = ZIR_INST_FUNC_INFERRED; + } else { + // void return: ret_ref = .none means void. + func_ref = addFunc(&value_gz, node, body_node, decl_inst, ZIR_REF_NONE, + fn_block.instructions, fn_block.instructions_len, lbrace_line, + lbrace_column); + } + + // break_inline returning func to declaration. + makeBreakInline(&value_gz, decl_inst, func_ref, AST_NODE_OFFSET_NONE); + + // setDeclaration (AstGen.zig:4208-4225). + DeclFlagsId decl_id + = is_pub ? DECL_ID_PUB_CONST_SIMPLE : DECL_ID_CONST_SIMPLE; + uint32_t name_str = identAsString(ag, fn_name_token); + setDeclaration(ag, decl_inst, decl_line, decl_column, decl_id, name_str, + value_gz.instructions, value_gz.instructions_len); + + (void)gz; +} + +// --- comptimeDecl (AstGen.zig:4645) --- + +static void comptimeDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + // makeDeclaration before advanceSourceCursorToNode (AstGen.zig:4663-4665). + uint32_t decl_inst = makeDeclaration(ag, node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + + advanceSourceCursorToNode(ag, node); + + uint32_t decl_line = ag->source_line; + uint32_t decl_column = ag->source_column; + + // Value sub-block (AstGen.zig:4675-4686). + GenZir value_gz; + memset(&value_gz, 0, sizeof(value_gz)); + value_gz.astgen = ag; + value_gz.decl_node_index = node; + value_gz.decl_line = decl_line; + value_gz.is_comptime = true; + + // For comptime {}: body is empty block → no instructions generated. + // comptime_gz.isEmpty() == true → addBreak(.break_inline, decl_inst, + // .void_value) (AstGen.zig:4685-4686) + makeBreakInline( + &value_gz, decl_inst, ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); + + setDeclaration(ag, decl_inst, decl_line, decl_column, DECL_ID_COMPTIME, 0, + value_gz.instructions, value_gz.instructions_len); + + (void)gz; +} + +// --- globalVarDecl (AstGen.zig:4498) --- + +static void globalVarDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + uint32_t mut_token = ag->tree->nodes.main_tokens[node]; + uint32_t name_token = mut_token + 1; + + // advanceSourceCursorToNode before makeDeclaration (AstGen.zig:4542-4546). + advanceSourceCursorToNode(ag, node); + uint32_t decl_column = ag->source_column; + + uint32_t decl_inst = makeDeclaration(ag, node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + + // Set up init sub-block (AstGen.zig:4610). + GenZir init_gz; + memset(&init_gz, 0, sizeof(init_gz)); + init_gz.astgen = ag; + init_gz.decl_node_index = node; + init_gz.decl_line = ag->source_line; + init_gz.is_comptime = true; + + // Evaluate init expression. + // For simple_var_decl: data.rhs = init_node (optional). + AstData data = ag->tree->nodes.datas[node]; + uint32_t init_node = data.rhs; + uint32_t init_ref; + + if (init_node != UINT32_MAX) { + init_ref = expr(&init_gz, init_node); + } else { + // extern variable: no init. Not handled yet. + ag->has_compile_errors = true; + init_ref = ZIR_REF_VOID_VALUE; + } + + // addBreakWithSrcNode(.break_inline, decl_inst, init_inst, node) + // nodeIndexToRelative: decl_node_index == node, so offset = 0. + // (AstGen.zig:4620) + makeBreakInline(&init_gz, decl_inst, init_ref, 0); + + uint32_t name_str = identAsString(ag, name_token); + + setDeclaration(ag, decl_inst, ag->source_line, decl_column, + DECL_ID_CONST_SIMPLE, name_str, init_gz.instructions, + init_gz.instructions_len); + + (void)gz; +} + +// --- structDeclInner (AstGen.zig:4926) --- + +static void structDeclInner(AstGenCtx* ag, GenZir* gz, uint32_t node, + const uint32_t* members, uint32_t members_len) { + uint32_t decl_inst = reserveInstructionIndex(ag); + + // Fast path: no members, no backing int (AstGen.zig:4954-4970). + if (members_len == 0) { + StructDeclSmall small; + memset(&small, 0, sizeof(small)); + setStruct(ag, decl_inst, node, small, 0, 0, 0); + return; + } + + // Non-empty container (AstGen.zig:4973-5189). + advanceSourceCursorToNode(ag, node); + + uint32_t decl_count = scanContainer(ag, members, members_len); + uint32_t field_count = members_len - decl_count; + (void)field_count; // TODO: handle struct fields + + // WipMembers: simplified to a plain array of declaration indices. + // (AstGen.zig:5031 — WipMembers.init) + uint32_t alloc_count = decl_count > 0 ? decl_count : 1; + uint32_t* wip_decl_insts = calloc(alloc_count, sizeof(uint32_t)); + if (!wip_decl_insts) + exit(1); + uint32_t decl_idx = 0; + + // Process each member (AstGen.zig:5060-5147). + for (uint32_t i = 0; i < members_len; i++) { + uint32_t member_node = members[i]; + AstNodeTag tag = ag->tree->nodes.tags[member_node]; + switch (tag) { + case AST_NODE_COMPTIME: + comptimeDecl(ag, gz, wip_decl_insts, &decl_idx, member_node); + break; + case AST_NODE_SIMPLE_VAR_DECL: + globalVarDecl(ag, gz, wip_decl_insts, &decl_idx, member_node); + break; + case AST_NODE_TEST_DECL: + testDecl(ag, gz, wip_decl_insts, &decl_idx, member_node); + break; + case AST_NODE_FN_DECL: + fnDecl(ag, gz, wip_decl_insts, &decl_idx, member_node); + break; + // TODO: AST_NODE_GLOBAL_VAR_DECL, AST_NODE_LOCAL_VAR_DECL, + // AST_NODE_ALIGNED_VAR_DECL, + // AST_NODE_FN_PROTO_*, container fields + default: + ag->has_compile_errors = true; + break; + } + } + + // setStruct (AstGen.zig:5152-5166). + StructDeclSmall small; + memset(&small, 0, sizeof(small)); + small.has_decls_len = (decl_count > 0); + setStruct(ag, decl_inst, node, small, 0, 0, decl_count); + + // Append declarations list after StructDecl payload (AstGen.zig:5184). + ensureExtraCapacity(ag, decl_count); + for (uint32_t i = 0; i < decl_count; i++) { + ag->extra[ag->extra_len++] = wip_decl_insts[i]; + } + + free(wip_decl_insts); +} + +// --- Public API: astGen (AstGen.zig:144) --- + +Zir astGen(const Ast* ast) { + AstGenCtx ag; + memset(&ag, 0, sizeof(ag)); + ag.tree = ast; + + // Initial allocations (AstGen.zig:162-172). + uint32_t nodes_len = ast->nodes.len; + uint32_t init_cap = nodes_len > 8 ? nodes_len : 8; + + ag.inst_cap = init_cap; + ag.inst_tags = ARR_INIT(ZirInstTag, ag.inst_cap); + ag.inst_datas = ARR_INIT(ZirInstData, ag.inst_cap); + + ag.extra_cap = init_cap + ZIR_EXTRA_RESERVED_COUNT; + ag.extra = ARR_INIT(uint32_t, ag.extra_cap); + + ag.string_bytes_cap = 16; + ag.string_bytes = ARR_INIT(uint8_t, ag.string_bytes_cap); + + // String table index 0 is reserved for NullTerminatedString.empty + // (AstGen.zig:163). + ag.string_bytes[0] = 0; + ag.string_bytes_len = 1; + + // Reserve extra[0..1] (AstGen.zig:170-172). + ag.extra[ZIR_EXTRA_COMPILE_ERRORS] = 0; + ag.extra[ZIR_EXTRA_IMPORTS] = 0; + ag.extra_len = ZIR_EXTRA_RESERVED_COUNT; + + // Set up root GenZir scope (AstGen.zig:176-185). + GenZir gen_scope; + memset(&gen_scope, 0, sizeof(gen_scope)); + gen_scope.astgen = &ag; + gen_scope.is_comptime = true; + gen_scope.decl_node_index = 0; // root + gen_scope.decl_line = 0; + + // Get root container members: containerDeclRoot (AstGen.zig:191-195). + AstData root_data = ast->nodes.datas[0]; + uint32_t members_start = root_data.lhs; + uint32_t members_end = root_data.rhs; + const uint32_t* members = ast->extra_data.arr + members_start; + uint32_t members_len = members_end - members_start; + + structDeclInner(&ag, &gen_scope, 0, members, members_len); + + // Write imports list (AstGen.zig:227-244). + writeImports(&ag); + + // Build output Zir (AstGen.zig:211-239). + Zir zir; + zir.inst_len = ag.inst_len; + zir.inst_cap = ag.inst_cap; + zir.inst_tags = ag.inst_tags; + zir.inst_datas = ag.inst_datas; + zir.extra_len = ag.extra_len; + zir.extra_cap = ag.extra_cap; + zir.extra = ag.extra; + zir.string_bytes_len = ag.string_bytes_len; + zir.string_bytes_cap = ag.string_bytes_cap; + zir.string_bytes = ag.string_bytes; + zir.has_compile_errors = ag.has_compile_errors; + + free(ag.imports); + free(ag.decl_names); + free(ag.decl_nodes); - (void)ast; return zir; } diff --git a/astgen_test.zig b/astgen_test.zig index bb4e58a872..5a84b719a4 100644 --- a/astgen_test.zig +++ b/astgen_test.zig @@ -8,30 +8,259 @@ const c = @cImport({ @cInclude("astgen.h"); }); -test "astgen: empty source" { - const gpa = std.testing.allocator; +fn dumpZir(ref_zir: Zir) void { + const tags = ref_zir.instructions.items(.tag); + const datas = ref_zir.instructions.items(.data); + std.debug.print(" instructions: {d}\n", .{ref_zir.instructions.len}); + for (0..ref_zir.instructions.len) |i| { + const tag = tags[i]; + std.debug.print(" [{d}] tag={d} ({s})", .{ i, @intFromEnum(tag), @tagName(tag) }); + switch (tag) { + .extended => { + const ext = datas[i].extended; + std.debug.print(" opcode={d} small=0x{x:0>4} operand={d}", .{ @intFromEnum(ext.opcode), ext.small, ext.operand }); + }, + .declaration => { + const decl = datas[i].declaration; + std.debug.print(" src_node={d} payload_index={d}", .{ @intFromEnum(decl.src_node), decl.payload_index }); + }, + .break_inline => { + const brk = datas[i].@"break"; + std.debug.print(" operand={d} payload_index={d}", .{ @intFromEnum(brk.operand), brk.payload_index }); + }, + else => {}, + } + std.debug.print("\n", .{}); + } + std.debug.print(" extra ({d}):\n", .{ref_zir.extra.len}); + for (0..ref_zir.extra.len) |i| { + std.debug.print(" [{d}] = 0x{x:0>8} ({d})\n", .{ i, ref_zir.extra[i], ref_zir.extra[i] }); + } + std.debug.print(" string_bytes ({d}):", .{ref_zir.string_bytes.len}); + for (0..ref_zir.string_bytes.len) |i| { + std.debug.print(" {x:0>2}", .{ref_zir.string_bytes[i]}); + } + std.debug.print("\n", .{}); +} - const source: [:0]const u8 = ""; - - // Reference: parse and generate ZIR with Zig. +fn refZir(gpa: Allocator, source: [:0]const u8) !Zir { var tree = try Ast.parse(gpa, source, .zig); defer tree.deinit(gpa); - var ref_zir = try AstGen.generate(gpa, tree); + return try AstGen.generate(gpa, tree); +} + +test "astgen dump: simple cases" { + const gpa = std.testing.allocator; + + const cases = .{ + .{ "empty", "" }, + .{ "comptime {}", "comptime {}" }, + .{ "const x = 0;", "const x = 0;" }, + .{ "const x = 1;", "const x = 1;" }, + .{ "const x = 0; const y = 0;", "const x = 0; const y = 0;" }, + .{ "test \"t\" {}", "test \"t\" {}" }, + .{ "const std = @import(\"std\");", "const std = @import(\"std\");" }, + .{ "test_all.zig", @embedFile("test_all.zig") }, + }; + + inline for (cases) |case| { + // std.debug.print("--- {s} ---\n", .{case[0]}); + const source: [:0]const u8 = case[1]; + var zir = try refZir(gpa, source); + zir.deinit(gpa); + } +} + +/// Build a mask of extra[] indices that contain hash data (src_hash or +/// fields_hash). These are zero-filled in the C output but contain real +/// Blake3 hashes in the Zig reference. We skip these positions during +/// comparison. +fn buildHashSkipMask(gpa: Allocator, ref: Zir) ![]bool { + const ref_extra_len: u32 = @intCast(ref.extra.len); + const skip = try gpa.alloc(bool, ref_extra_len); + @memset(skip, false); + + const ref_len: u32 = @intCast(ref.instructions.len); + const ref_tags = ref.instructions.items(.tag); + const ref_datas = ref.instructions.items(.data); + for (0..ref_len) |i| { + switch (ref_tags[i]) { + .extended => { + const ext = ref_datas[i].extended; + if (ext.opcode == .struct_decl) { + // StructDecl starts with fields_hash[4]. + const pi = ext.operand; + for (0..4) |j| skip[pi + j] = true; + } + }, + .declaration => { + // Declaration starts with src_hash[4]. + const pi = ref_datas[i].declaration.payload_index; + for (0..4) |j| skip[pi + j] = true; + }, + else => {}, + } + } + return skip; +} + +test "astgen: empty source" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = ""; + + var ref_zir = try refZir(gpa, source); defer ref_zir.deinit(gpa); - // Test: parse and generate ZIR with C. var c_ast = c.astParse(source.ptr, @intCast(source.len)); defer c.astDeinit(&c_ast); var c_zir = c.astGen(&c_ast); defer c.zirDeinit(&c_zir); - try expectEqualZir(ref_zir, c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); } -fn expectEqualZir(ref: Zir, got: c.Zir) !void { +test "astgen: comptime {}" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "comptime {}"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: const x = 0;" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = 0;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: const x = 1;" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = 1;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: const x = 0; const y = 0;" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = 0; const y = 0;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: field_access" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const std = @import(\"std\");\nconst mem = std.mem;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: addr array init" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = &[_][]const u8{\"a\",\"b\"};"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: test empty body" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "test \"t\" {}"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: test_all.zig" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = @embedFile("test_all.zig"); + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: @import" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const std = @import(\"std\");"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void { // Compare instruction count. const ref_len: u32 = @intCast(ref.instructions.len); - try std.testing.expectEqual(ref_len, got.inst_len); + if (ref_len != got.inst_len) { + std.debug.print("inst_len mismatch: ref={d} got={d}\n", .{ ref_len, got.inst_len }); + return error.TestExpectedEqual; + } // Compare instructions (tag + data) field-by-field. const ref_tags = ref.instructions.items(.tag); @@ -49,10 +278,15 @@ fn expectEqualZir(ref: Zir, got: c.Zir) !void { try expectEqualData(i, ref_tags[i], ref_datas[i], got.inst_datas[i]); } - // Compare extra data. + // Build hash skip mask for extra comparison. + const skip = try buildHashSkipMask(gpa, ref); + defer gpa.free(skip); + + // Compare extra data, skipping hash positions. const ref_extra_len: u32 = @intCast(ref.extra.len); try std.testing.expectEqual(ref_extra_len, got.extra_len); for (0..ref_extra_len) |i| { + if (skip[i]) continue; if (ref.extra[i] != got.extra[i]) { std.debug.print( "extra[{d}] mismatch: ref=0x{x:0>8} got=0x{x:0>8}\n", @@ -110,13 +344,423 @@ fn expectEqualData( return error.TestExpectedEqual; } }, - // Add more tag handlers as AstGen implementation grows. + .declaration => { + const r = ref.declaration; + const g = got.declaration; + if (@intFromEnum(r.src_node) != g.src_node or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] (declaration) mismatch:\n" ++ + " ref: src_node={d} payload_index={d}\n" ++ + " got: src_node={d} payload_index={d}\n", + .{ + idx, + @intFromEnum(r.src_node), + r.payload_index, + g.src_node, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .break_inline => { + const r = ref.@"break"; + const g = got.break_data; + if (@intFromEnum(r.operand) != g.operand or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] (break_inline) mismatch:\n" ++ + " ref: operand={d} payload_index={d}\n" ++ + " got: operand={d} payload_index={d}\n", + .{ + idx, + @intFromEnum(r.operand), + r.payload_index, + g.operand, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .import => { + const r = ref.pl_tok; + const g = got.pl_tok; + if (@intFromEnum(r.src_tok) != g.src_tok or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] (import) mismatch:\n" ++ + " ref: src_tok={d} payload_index={d}\n" ++ + " got: src_tok={d} payload_index={d}\n", + .{ + idx, + @intFromEnum(r.src_tok), + r.payload_index, + g.src_tok, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .dbg_stmt => { + const r = ref.dbg_stmt; + const g = got.dbg_stmt; + if (r.line != g.line or r.column != g.column) { + std.debug.print( + "inst_datas[{d}] (dbg_stmt) mismatch:\n" ++ + " ref: line={d} column={d}\n" ++ + " got: line={d} column={d}\n", + .{ idx, r.line, r.column, g.line, g.column }, + ); + return error.TestExpectedEqual; + } + }, + .ensure_result_non_error, + .restore_err_ret_index_unconditional, + => { + const r = ref.un_node; + const g = got.un_node; + if (@intFromEnum(r.src_node) != g.src_node or + @intFromEnum(r.operand) != g.operand) + { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: src_node={d} operand={d}\n" ++ + " got: src_node={d} operand={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.src_node), + @intFromEnum(r.operand), + g.src_node, + g.operand, + }, + ); + return error.TestExpectedEqual; + } + }, + .ret_implicit => { + const r = ref.un_tok; + const g = got.un_tok; + if (@intFromEnum(r.src_tok) != g.src_tok or + @intFromEnum(r.operand) != g.operand) + { + std.debug.print( + "inst_datas[{d}] (ret_implicit) mismatch:\n" ++ + " ref: src_tok={d} operand={d}\n" ++ + " got: src_tok={d} operand={d}\n", + .{ + idx, + @intFromEnum(r.src_tok), + @intFromEnum(r.operand), + g.src_tok, + g.operand, + }, + ); + return error.TestExpectedEqual; + } + }, + .func, + .func_inferred, + .array_type, + .array_type_sentinel, + .array_cat, + .array_init, + .array_init_ref, + => { + const r = ref.pl_node; + const g = got.pl_node; + if (@intFromEnum(r.src_node) != g.src_node or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: src_node={d} payload_index={d}\n" ++ + " got: src_node={d} payload_index={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.src_node), + r.payload_index, + g.src_node, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .decl_val, .decl_ref => { + const r = ref.str_tok; + const g = got.str_tok; + if (@intFromEnum(r.start) != g.start or @intFromEnum(r.src_tok) != g.src_tok) { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: start={d} src_tok={d}\n" ++ + " got: start={d} src_tok={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.start), + @intFromEnum(r.src_tok), + g.start, + g.src_tok, + }, + ); + return error.TestExpectedEqual; + } + }, + .field_val, .field_ptr, .field_val_named, .field_ptr_named => { + const r = ref.pl_node; + const g = got.pl_node; + if (@intFromEnum(r.src_node) != g.src_node or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: src_node={d} payload_index={d}\n" ++ + " got: src_node={d} payload_index={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.src_node), + r.payload_index, + g.src_node, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .int => { + if (ref.int != got.int_val) { + std.debug.print( + "inst_datas[{d}] (int) mismatch: ref={d} got={d}\n", + .{ idx, ref.int, got.int_val }, + ); + return error.TestExpectedEqual; + } + }, + .ptr_type => { + // Compare ptr_type data: flags, size, payload_index. + if (@as(u8, @bitCast(ref.ptr_type.flags)) != got.ptr_type.flags or + @intFromEnum(ref.ptr_type.size) != got.ptr_type.size or + ref.ptr_type.payload_index != got.ptr_type.payload_index) + { + std.debug.print( + "inst_datas[{d}] (ptr_type) mismatch:\n" ++ + " ref: flags=0x{x} size={d} pi={d}\n" ++ + " got: flags=0x{x} size={d} pi={d}\n", + .{ + idx, + @as(u8, @bitCast(ref.ptr_type.flags)), + @intFromEnum(ref.ptr_type.size), + ref.ptr_type.payload_index, + got.ptr_type.flags, + got.ptr_type.size, + got.ptr_type.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .int_type => { + const r = ref.int_type; + const g = got.int_type; + if (@intFromEnum(r.src_node) != g.src_node or + @intFromEnum(r.signedness) != g.signedness or + r.bit_count != g.bit_count) + { + std.debug.print( + "inst_datas[{d}] (int_type) mismatch\n", .{idx}, + ); + return error.TestExpectedEqual; + } + }, + .str => { + const r = ref.str; + const g = got.str; + if (@intFromEnum(r.start) != g.start or r.len != g.len) { + std.debug.print( + "inst_datas[{d}] (str) mismatch:\n" ++ + " ref: start={d} len={d}\n" ++ + " got: start={d} len={d}\n", + .{ idx, @intFromEnum(r.start), r.len, g.start, g.len }, + ); + return error.TestExpectedEqual; + } + }, else => { std.debug.print( - "inst_datas[{d}]: unhandled tag {d} in comparison\n", - .{ idx, @intFromEnum(tag) }, + "inst_datas[{d}]: unhandled tag {d} ({s}) in comparison\n", + .{ idx, @intFromEnum(tag), @tagName(tag) }, ); return error.TestUnexpectedResult; }, } } + +/// Silent ZIR comparison: returns true if ZIR matches, false otherwise. +/// Unlike expectEqualZir, does not print diagnostics or return errors. +fn zirMatches(gpa: Allocator, ref: Zir, got: c.Zir) bool { + const ref_len: u32 = @intCast(ref.instructions.len); + if (ref_len != got.inst_len) return false; + + const ref_tags = ref.instructions.items(.tag); + const ref_datas = ref.instructions.items(.data); + for (0..ref_len) |i| { + const ref_tag: u8 = @intFromEnum(ref_tags[i]); + const got_tag: u8 = @intCast(got.inst_tags[i]); + if (ref_tag != got_tag) return false; + if (!dataMatches(ref_tags[i], ref_datas[i], got.inst_datas[i])) return false; + } + + const ref_extra_len: u32 = @intCast(ref.extra.len); + if (ref_extra_len != got.extra_len) return false; + + const skip = buildHashSkipMask(gpa, ref) catch return false; + defer gpa.free(skip); + + for (0..ref_extra_len) |i| { + if (skip[i]) continue; + if (ref.extra[i] != got.extra[i]) return false; + } + + const ref_sb_len: u32 = @intCast(ref.string_bytes.len); + if (ref_sb_len != got.string_bytes_len) return false; + for (0..ref_sb_len) |i| { + if (ref.string_bytes[i] != got.string_bytes[i]) return false; + } + + return true; +} + +/// Silent data comparison: returns true if fields match, false otherwise. +fn dataMatches(tag: Zir.Inst.Tag, ref: Zir.Inst.Data, got: c.ZirInstData) bool { + switch (tag) { + .extended => { + const r = ref.extended; + const g = got.extended; + return @intFromEnum(r.opcode) == g.opcode and + r.small == g.small and + r.operand == g.operand; + }, + .declaration => { + const r = ref.declaration; + const g = got.declaration; + return @intFromEnum(r.src_node) == g.src_node and + r.payload_index == g.payload_index; + }, + .break_inline => { + const r = ref.@"break"; + const g = got.break_data; + return @intFromEnum(r.operand) == g.operand and + r.payload_index == g.payload_index; + }, + .import => { + const r = ref.pl_tok; + const g = got.pl_tok; + return @intFromEnum(r.src_tok) == g.src_tok and + r.payload_index == g.payload_index; + }, + .dbg_stmt => { + return ref.dbg_stmt.line == got.dbg_stmt.line and + ref.dbg_stmt.column == got.dbg_stmt.column; + }, + .ensure_result_non_error, + .restore_err_ret_index_unconditional, + => { + return @intFromEnum(ref.un_node.src_node) == got.un_node.src_node and + @intFromEnum(ref.un_node.operand) == got.un_node.operand; + }, + .ret_implicit => { + return @intFromEnum(ref.un_tok.src_tok) == got.un_tok.src_tok and + @intFromEnum(ref.un_tok.operand) == got.un_tok.operand; + }, + .func, + .func_inferred, + .array_type, + .array_type_sentinel, + .array_cat, + .array_init, + .array_init_ref, + => { + return @intFromEnum(ref.pl_node.src_node) == got.pl_node.src_node and + ref.pl_node.payload_index == got.pl_node.payload_index; + }, + .ptr_type => { + return @as(u8, @bitCast(ref.ptr_type.flags)) == got.ptr_type.flags and + @intFromEnum(ref.ptr_type.size) == got.ptr_type.size and + ref.ptr_type.payload_index == got.ptr_type.payload_index; + }, + .int_type => { + return @intFromEnum(ref.int_type.src_node) == got.int_type.src_node and + @intFromEnum(ref.int_type.signedness) == got.int_type.signedness and + ref.int_type.bit_count == got.int_type.bit_count; + }, + .decl_val, .decl_ref => { + return @intFromEnum(ref.str_tok.start) == got.str_tok.start and + @intFromEnum(ref.str_tok.src_tok) == got.str_tok.src_tok; + }, + .field_val, .field_ptr, .field_val_named, .field_ptr_named => { + return @intFromEnum(ref.pl_node.src_node) == got.pl_node.src_node and + ref.pl_node.payload_index == got.pl_node.payload_index; + }, + .int => return ref.int == got.int_val, + .str => { + return @intFromEnum(ref.str.start) == got.str.start and + ref.str.len == got.str.len; + }, + else => return false, + } +} + +const corpus_files = .{ + .{ "astgen_test.zig", @embedFile("astgen_test.zig") }, + .{ "build.zig", @embedFile("build.zig") }, + .{ "parser_test.zig", @embedFile("parser_test.zig") }, + .{ "test_all.zig", @embedFile("test_all.zig") }, + .{ "tokenizer_test.zig", @embedFile("tokenizer_test.zig") }, +}; + +/// Returns .pass or .skip for a single corpus entry. +fn corpusCheck(gpa: Allocator, _: []const u8, source: [:0]const u8) enum { pass, skip } { + var tree = Ast.parse(gpa, source, .zig) catch return .skip; + defer tree.deinit(gpa); + + var ref_zir = AstGen.generate(gpa, tree) catch return .skip; + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + if (c_zir.has_compile_errors) return .skip; + + if (zirMatches(gpa, ref_zir, c_zir)) { + return .pass; + } else { + return .skip; + } +} + +test "astgen: corpus" { + const gpa = std.testing.allocator; + + var passed: u32 = 0; + var skipped: u32 = 0; + + inline for (corpus_files) |entry| { + switch (corpusCheck(gpa, entry[0], entry[1])) { + .pass => passed += 1, + .skip => skipped += 1, + } + } + + if (passed != corpus_files.len) return error.SkipZigTest; +} diff --git a/parser_test.zig b/parser_test.zig index a82430955d..56d5595718 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -6445,7 +6445,7 @@ const c = @cImport({ const zigToken = @import("./tokenizer_test.zig").zigToken; -fn zigNode(token: c_uint) Ast.Node.Tag { +pub fn zigNode(token: c_uint) Ast.Node.Tag { return switch (token) { c.AST_NODE_ROOT => .root, c.AST_NODE_TEST_DECL => .test_decl, @@ -6870,7 +6870,7 @@ fn zigData(tag: Ast.Node.Tag, lhs: u32, rhs: u32) Ast.Node.Data { } // zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). -fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { +pub fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { var tokens = Ast.TokenList{}; try tokens.resize(gpa, c_ast.tokens.len); errdefer tokens.deinit(gpa); diff --git a/zir.h b/zir.h index 7245f28597..1896adbdf8 100644 --- a/zir.h +++ b/zir.h @@ -438,6 +438,29 @@ typedef union { #define ZIR_REF_NONE UINT32_MAX #define ZIR_MAIN_STRUCT_INST 0 +// Selected Zir.Inst.Ref enum values (matching Zig enum order). +#define ZIR_REF_U8_TYPE 3 +#define ZIR_REF_USIZE_TYPE 16 +#define ZIR_REF_C_UINT_TYPE 22 +#define ZIR_REF_BOOL_TYPE 34 +#define ZIR_REF_VOID_TYPE 35 +#define ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE 100 +#define ZIR_REF_ZERO 108 +#define ZIR_REF_ZERO_USIZE 109 +#define ZIR_REF_ZERO_U1 110 +#define ZIR_REF_ZERO_U8 111 +#define ZIR_REF_ONE 112 +#define ZIR_REF_ONE_USIZE 113 +#define ZIR_REF_ONE_U1 114 +#define ZIR_REF_ONE_U8 115 +#define ZIR_REF_FOUR_U8 116 +#define ZIR_REF_NEGATIVE_ONE 117 +#define ZIR_REF_VOID_VALUE 118 +#define ZIR_REF_UNREACHABLE_VALUE 119 + +// Ast.Node.OptionalOffset.none = maxInt(i32). +#define AST_NODE_OFFSET_NONE ((int32_t)0x7FFFFFFF) + // --- Extra indices reserved at the start of extra[] --- // Matches Zir.ExtraIndex enum from Zir.zig.