diff --git a/stage0/.clang-format b/stage0/.clang-format new file mode 100644 index 0000000000..860458e26e --- /dev/null +++ b/stage0/.clang-format @@ -0,0 +1,3 @@ +BasedOnStyle: WebKit +BreakBeforeBraces: Attach +ColumnLimit: 79 diff --git a/stage0/.claude/skills/port-astgen/SKILL.md b/stage0/.claude/skills/port-astgen/SKILL.md new file mode 100644 index 0000000000..6ebf62da2f --- /dev/null +++ b/stage0/.claude/skills/port-astgen/SKILL.md @@ -0,0 +1,122 @@ +--- +name: port-astgen +description: Iteratively port AstGen.zig to astgen.c by enabling skipped corpus tests, finding divergences, and mechanically copying upstream code. +allowed-tools: Read, Write, Edit, Bash, Grep, Glob, Task +disable-model-invocation: true +--- + +# Port AstGen — Iterative Corpus Test Loop + +You are porting `AstGen.zig` to `astgen.c`. This is a **mechanical +translation** — no creativity, no invention. When the C code differs +from Zig, copy the Zig structure into C. + +## Key files + +- `astgen.c` — C implementation (modify this) +- `astgen_test.zig` — corpus tests (enable/skip tests here) +- `~/code/zig/lib/std/zig/AstGen.zig` — upstream reference (~14k lines) +- `~/code/zig/lib/std/zig/Ast.zig` — AST node accessors +- `~/code/zig/lib/std/zig/Zir.zig` — ZIR instruction definitions + +## Loop + +Repeat the following steps until all corpus tests pass or you've made +3 consecutive iterations with zero progress. + +### Step 1: Find the first skipped corpus test + +Search `astgen_test.zig` for lines matching: +``` +if (true) return error.SkipZigTest +``` +Pick the first one. If none found, all corpus tests pass — stop. + +### Step 2: Enable it + +Remove or comment out the `if (true) return error.SkipZigTest` line. + +### Step 3: Run tests + +```sh +zig build test 2>&1 +``` + +Record the output. If tests pass, go to Step 7. + +### Step 4: Analyze the failure + +From the test output, determine the failure type: + +- **`has_compile_errors`**: Temporarily add `#include ` and + `fprintf(stderr, ...)` to `setCompileError()` in `astgen.c` to find + which `SET_ERROR` fires. Run the test again and note the function and + line. +- **`zir mismatch`**: Note `inst_len`, `extra_len`, `string_bytes_len` + diffs and the first tag mismatch position. +- **`unhandled tag N`**: Add the missing ZIR tag to the `expectEqualData` + and `dataMatches` switch statements in `astgen_test.zig`. + +### Step 5: Compare implementations + +Find the upstream Zig function that corresponds to the failing code +path. Use the Task tool with `subagent_type=general-purpose` to read +both implementations and enumerate **every difference**. + +Focus on differences that affect output: +- Extra data written (field order, conditional fields, body lengths) +- Instruction tags emitted +- String table entries +- Break payload values (operand_src_node) + +Do NOT guess. Read both implementations completely and compare +mechanically. + +### Step 6: Port the fix + +Apply the minimal mechanical change to `astgen.c` to match the upstream. +Run `zig build test` after each change to check for progress. + +**Progress** means any of: +- `inst_len` diff decreased +- `extra_len` diff decreased +- `string_bytes_len` diff decreased +- First tag mismatch position moved later + +If after porting a fix the test still fails but progress was made, +continue to Step 7 (commit progress, re-skip). + +### Step 7: Clean up and commit + +1. If the corpus test still fails: re-add the `SkipZigTest` line with + a TODO comment describing the remaining diff. +2. Remove ALL `fprintf`/`printf` debug statements from `astgen.c`. +3. Remove `#include ` if it was added for debugging. +4. Verify: `zig build fmt && zig build all` must exit 0 with no unexpected output. +5. Commit: + ```sh + git add astgen.c astgen_test.zig + git commit -m " + + Co-Authored-By: " + ``` + +### Step 8: Repeat + +Go back to Step 1. + +## Rules + +- **Mechanical copy only.** Do not invent new approaches. If the upstream does + X, do X in C. +- **Never remove zig-cache.** +- **Never print to stdout/stderr in committed code.** Debug prints are + temporary only. +- **Functions must appear in the same order as in the upstream Zig file.** +- **Commit after every iteration**, even partial positive progress. +- **Prefer finding systematic differences for catching bugs** instead of + debugging and hunting for them. Zig code is bug-free for the purposes of + porting. When test cases fail, it means the C implementation differs from the + Zig one, which is the source of the bug. So standard "bug hunting" methods no + longer apply -- making implementations consistent is a much better approach + in all ways. diff --git a/stage0/.gitignore b/stage0/.gitignore new file mode 100644 index 0000000000..56de7d8925 --- /dev/null +++ b/stage0/.gitignore @@ -0,0 +1,3 @@ +/.zig-cache/ +/zig-out/ +*.o diff --git a/stage0/CLAUDE.md b/stage0/CLAUDE.md new file mode 100644 index 0000000000..d7844864c8 --- /dev/null +++ b/stage0/CLAUDE.md @@ -0,0 +1,25 @@ +- when porting features from upstream Zig, it should be a mechanical copy. + Don't invent. Most of what you are doing is invented, but needs to be re-done + in C. Keep the structure in place, name functions and types the same way (or + within reason equivalently if there are namespacing constraints). It should + be easy to reference one from the other; and, if there are semantic + differences, they *must* be because Zig or C does not support certain + features (like errdefer). +- See README.md for useful information about this project, incl. how to test + this. +- **Never ever** remove zig-cache, nether local nor global. +- Zig code is in ~/code/zig, don't look at /nix/... +- when translating functions from Zig to C (mechanically, remember?), add them + in the same order as in the original Zig file. +- debug printfs: add printfs only when debugging a specific issue; when done + debugging, remove them (or comment them if you may find them useful later). I + prefer committing code only when `zig build` returns no output. +- Always complete all tasks before stopping. Do not stop to ask for + confirmation mid-task. If you have remaining work, continue without waiting + for input. +- no `cppcheck` suppressions. They are here for a reason. If it is complaining + about automatic variables, make it non-automatic. I.e. find a way to satisfy + the linter, do not suppress it. +- if you are in the middle of porting AstGen, load up the skill + .claude/skills/port-astgen/SKILL.md and proceed with it. +- remember: **mechanical copy** when porting existing stuff, no new creativity. diff --git a/stage0/LICENSE b/stage0/LICENSE new file mode 100644 index 0000000000..f0da9830a5 --- /dev/null +++ b/stage0/LICENSE @@ -0,0 +1,34 @@ +NOTICE TO PROSPECTIVE UPSTREAM CONTRIBUTORS + +This software is licensed under the MIT License below. However, the +author politely but firmly requests that you do not submit this work, or +any derivative thereof, to the Zig project upstream unless you have +obtained explicit written permission from a Zig core team member +authorizing the submission. + +This notice is not a license restriction. The MIT License governs all +use of this software. This is a social contract: please honor it. + +--- + +The MIT License (Expat) + +Copyright (c) Motiejus Jakštys + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/stage0/README.md b/stage0/README.md new file mode 100644 index 0000000000..bfe23dd788 --- /dev/null +++ b/stage0/README.md @@ -0,0 +1,45 @@ +# About + +zig0 aspires to be an interpreter of zig 0.15.1 written in C. + +This is written with help from LLM: + +- Lexer: + - Datastructures 100% human. + - Helper functions 100% human. + - Lexing functions 50/50 human/bot. +- Parser: + - Datastructures 100% human. + - Helper functions 50/50. + - Parser functions 5/95 human/bot. +- AstGen: TBD. + +# Testing + +Quick test: + + zig build fmt test + +Full test and static analysis with all supported compilers and valgrind (run +before commit, takes a while): + + zig build -Dvalgrind + +# Debugging tips + +Test runs infinitely? Build the test program executable: + + $ zig build test -Dno-exec + +And then run it, capturing the stack trace: + +``` +gdb -batch \ + -ex "python import threading; threading.Timer(1.0, lambda: gdb.post_event(lambda: gdb.execute('interrupt'))).start()" \ + -ex run \ + -ex "bt full" \ + -ex quit \ + zig-out/bin/test +``` + +You are welcome to replace `-ex "bt full"` with anything other of interest. diff --git a/stage0/ast.c b/stage0/ast.c new file mode 100644 index 0000000000..e9a1d04eb3 --- /dev/null +++ b/stage0/ast.c @@ -0,0 +1,122 @@ +#include "common.h" + +#include +#include +#include +#include + +#include "ast.h" +#include "parser.h" + +#define N 1024 + +static void astTokenListEnsureCapacity( + AstTokenList* list, uint32_t additional) { + const uint32_t new_len = list->len + additional; + if (new_len <= list->cap) { + return; + } + + const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; + list->tags = realloc(list->tags, new_cap * sizeof(TokenizerTag)); + list->starts = realloc(list->starts, new_cap * sizeof(AstIndex)); + if (!list->tags || !list->starts) + exit(1); + list->cap = new_cap; +} + +Ast astParse(const char* source, const uint32_t len) { + uint32_t estimated_token_count = len / 8; + + AstTokenList tokens = { + .len = 0, + .cap = estimated_token_count, + .tags = ARR_INIT(TokenizerTag, estimated_token_count), + .starts = ARR_INIT(AstIndex, estimated_token_count), + }; + + Tokenizer tok = tokenizerInit(source, len); + while (true) { + astTokenListEnsureCapacity(&tokens, 1); + TokenizerToken token = tokenizerNext(&tok); + tokens.tags[tokens.len] = token.tag; + tokens.starts[tokens.len++] = token.loc.start; + if (token.tag == TOKEN_EOF) + break; + } + + uint32_t estimated_node_count = (tokens.len + 2) / 2; + + char err_buf[PARSE_ERR_BUF_SIZE]; + err_buf[0] = '\0'; + + Parser p = { + .source = source, + .source_len = len, + .token_tags = tokens.tags, + .token_starts = tokens.starts, + .tokens_len = tokens.len, + .tok_i = 0, + .nodes = { + .len = 0, + .cap = estimated_node_count, + .tags = ARR_INIT(AstNodeTag, estimated_node_count), + .main_tokens = ARR_INIT(AstTokenIndex, estimated_node_count), + .datas = ARR_INIT(AstData, estimated_node_count), + }, + .extra_data = SLICE_INIT(AstNodeIndex, N), + .scratch = SLICE_INIT(AstNodeIndex, N), + .err_buf = err_buf, + }; + + bool has_error = false; + if (setjmp(p.error_jmp) != 0) { + has_error = true; + } + if (!has_error) + parseRoot(&p); + + p.scratch.cap = p.scratch.len = 0; + free(p.scratch.arr); + + char* err_msg = NULL; + if (has_error && err_buf[0] != '\0') { + const size_t len2 = strlen(err_buf); + err_msg = malloc(len2 + 1); + if (!err_msg) + exit(1); + memcpy(err_msg, err_buf, len2 + 1); + } + + return (Ast) { + .source = source, + .source_len = len, + .tokens = tokens, + .nodes = p.nodes, + .extra_data = { + .len = p.extra_data.len, + .cap = p.extra_data.cap, + .arr = p.extra_data.arr, + }, + .has_error = has_error, + .err_msg = err_msg, + }; +} + +void astDeinit(Ast* tree) { + free(tree->err_msg); + + tree->tokens.cap = tree->tokens.len = 0; + free(tree->tokens.tags); + free(tree->tokens.starts); + + tree->nodes.cap = 0; + tree->nodes.len = 0; + free(tree->nodes.tags); + free(tree->nodes.main_tokens); + free(tree->nodes.datas); + + tree->extra_data.cap = 0; + tree->extra_data.len = 0; + free(tree->extra_data.arr); +} diff --git a/stage0/ast.h b/stage0/ast.h new file mode 100644 index 0000000000..aa444c01c4 --- /dev/null +++ b/stage0/ast.h @@ -0,0 +1,625 @@ +#ifndef _ZIG0_AST_H__ +#define _ZIG0_AST_H__ + +#include +#include + +#include "common.h" +#include "tokenizer.h" + +typedef enum { + /// sub_list[lhs...rhs] + AST_NODE_ROOT, + /// `usingnamespace lhs;`. rhs unused. main_token is `usingnamespace`. + AST_NODE_USINGNAMESPACE, + /// lhs is test name token (must be string literal or identifier), if any. + /// rhs is the body node. + AST_NODE_TEST_DECL, + /// lhs is the index into extra_data. + /// rhs is the initialization expression, if any. + /// main_token is `var` or `const`. + AST_NODE_GLOBAL_VAR_DECL, + /// `var a: x align(y) = rhs` + /// lhs is the index into extra_data. + /// main_token is `var` or `const`. + AST_NODE_LOCAL_VAR_DECL, + /// `var a: lhs = rhs`. lhs and rhs may be unused. + /// Can be local or global. + /// main_token is `var` or `const`. + AST_NODE_SIMPLE_VAR_DECL, + /// `var a align(lhs) = rhs`. lhs and rhs may be unused. + /// Can be local or global. + /// main_token is `var` or `const`. + AST_NODE_ALIGNED_VAR_DECL, + /// lhs is the identifier token payload if any, + /// rhs is the deferred expression. + AST_NODE_ERRDEFER, + /// lhs is unused. + /// rhs is the deferred expression. + AST_NODE_DEFER, + /// lhs catch rhs + /// lhs catch |err| rhs + /// main_token is the `catch` keyword. + /// payload is determined by looking at the next token after the `catch` + /// keyword. + AST_NODE_CATCH, + /// `lhs.a`. main_token is the dot. rhs is the identifier token index. + AST_NODE_FIELD_ACCESS, + /// `lhs.?`. main_token is the dot. rhs is the `?` token index. + AST_NODE_UNWRAP_OPTIONAL, + /// `lhs == rhs`. main_token is op. + AST_NODE_EQUAL_EQUAL, + /// `lhs != rhs`. main_token is op. + AST_NODE_BANG_EQUAL, + /// `lhs < rhs`. main_token is op. + AST_NODE_LESS_THAN, + /// `lhs > rhs`. main_token is op. + AST_NODE_GREATER_THAN, + /// `lhs <= rhs`. main_token is op. + AST_NODE_LESS_OR_EQUAL, + /// `lhs >= rhs`. main_token is op. + AST_NODE_GREATER_OR_EQUAL, + /// `lhs *= rhs`. main_token is op. + AST_NODE_ASSIGN_MUL, + /// `lhs /= rhs`. main_token is op. + AST_NODE_ASSIGN_DIV, + /// `lhs %= rhs`. main_token is op. + AST_NODE_ASSIGN_MOD, + /// `lhs += rhs`. main_token is op. + AST_NODE_ASSIGN_ADD, + /// `lhs -= rhs`. main_token is op. + AST_NODE_ASSIGN_SUB, + /// `lhs <<= rhs`. main_token is op. + AST_NODE_ASSIGN_SHL, + /// `lhs <<|= rhs`. main_token is op. + AST_NODE_ASSIGN_SHL_SAT, + /// `lhs >>= rhs`. main_token is op. + AST_NODE_ASSIGN_SHR, + /// `lhs &= rhs`. main_token is op. + AST_NODE_ASSIGN_BIT_AND, + /// `lhs ^= rhs`. main_token is op. + AST_NODE_ASSIGN_BIT_XOR, + /// `lhs |= rhs`. main_token is op. + AST_NODE_ASSIGN_BIT_OR, + /// `lhs *%= rhs`. main_token is op. + AST_NODE_ASSIGN_MUL_WRAP, + /// `lhs +%= rhs`. main_token is op. + AST_NODE_ASSIGN_ADD_WRAP, + /// `lhs -%= rhs`. main_token is op. + AST_NODE_ASSIGN_SUB_WRAP, + /// `lhs *|= rhs`. main_token is op. + AST_NODE_ASSIGN_MUL_SAT, + /// `lhs +|= rhs`. main_token is op. + AST_NODE_ASSIGN_ADD_SAT, + /// `lhs -|= rhs`. main_token is op. + AST_NODE_ASSIGN_SUB_SAT, + /// `lhs = rhs`. main_token is op. + AST_NODE_ASSIGN, + /// `a, b, ... = rhs`. main_token is op. lhs is index into `extra_data` + /// of an lhs elem count followed by an array of that many `Node.Index`, + /// with each node having one of the following types: + /// * `global_var_decl` + /// * `local_var_decl` + /// * `simple_var_decl` + /// * `aligned_var_decl` + /// * Any expression node + /// The first 3 types correspond to a `var` or `const` lhs node (note + /// that their `rhs` is always 0). An expression node corresponds to a + /// standard assignment LHS (which must be evaluated as an lvalue). + /// There may be a preceding `comptime` token, which does not create a + /// corresponding `comptime` node so must be manually detected. + AST_NODE_ASSIGN_DESTRUCTURE, + /// `lhs || rhs`. main_token is the `||`. + AST_NODE_MERGE_ERROR_SETS, + /// `lhs * rhs`. main_token is the `*`. + AST_NODE_MUL, + /// `lhs / rhs`. main_token is the `/`. + AST_NODE_DIV, + /// `lhs % rhs`. main_token is the `%`. + AST_NODE_MOD, + /// `lhs ** rhs`. main_token is the `**`. + AST_NODE_ARRAY_MULT, + /// `lhs *% rhs`. main_token is the `*%`. + AST_NODE_MUL_WRAP, + /// `lhs *| rhs`. main_token is the `*|`. + AST_NODE_MUL_SAT, + /// `lhs + rhs`. main_token is the `+`. + AST_NODE_ADD, + /// `lhs - rhs`. main_token is the `-`. + AST_NODE_SUB, + /// `lhs ++ rhs`. main_token is the `++`. + AST_NODE_ARRAY_CAT, + /// `lhs +% rhs`. main_token is the `+%`. + AST_NODE_ADD_WRAP, + /// `lhs -% rhs`. main_token is the `-%`. + AST_NODE_SUB_WRAP, + /// `lhs +| rhs`. main_token is the `+|`. + AST_NODE_ADD_SAT, + /// `lhs -| rhs`. main_token is the `-|`. + AST_NODE_SUB_SAT, + /// `lhs << rhs`. main_token is the `<<`. + AST_NODE_SHL, + /// `lhs <<| rhs`. main_token is the `<<|`. + AST_NODE_SHL_SAT, + /// `lhs >> rhs`. main_token is the `>>`. + AST_NODE_SHR, + /// `lhs & rhs`. main_token is the `&`. + AST_NODE_BIT_AND, + /// `lhs ^ rhs`. main_token is the `^`. + AST_NODE_BIT_XOR, + /// `lhs | rhs`. main_token is the `|`. + AST_NODE_BIT_OR, + /// `lhs orelse rhs`. main_token is the `orelse`. + AST_NODE_ORELSE, + /// `lhs and rhs`. main_token is the `and`. + AST_NODE_BOOL_AND, + /// `lhs or rhs`. main_token is the `or`. + AST_NODE_BOOL_OR, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_BOOL_NOT, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_NEGATION, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_BIT_NOT, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_NEGATION_WRAP, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_ADDRESS_OF, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TRY, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_AWAIT, + /// `?lhs`. rhs unused. main_token is the `?`. + AST_NODE_OPTIONAL_TYPE, + /// `[lhs]rhs`. + AST_NODE_ARRAY_TYPE, + /// `[lhs:a]b`. `ArrayTypeSentinel[rhs]`. + AST_NODE_ARRAY_TYPE_SENTINEL, + /// `[*]align(lhs) rhs`. lhs can be omitted. + /// `*align(lhs) rhs`. lhs can be omitted. + /// `[]rhs`. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_PTR_TYPE_ALIGNED, + /// `[*:lhs]rhs`. lhs can be omitted. + /// `*rhs`. + /// `[:lhs]rhs`. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_PTR_TYPE_SENTINEL, + /// lhs is index into ptr_type. rhs is the element type expression. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_PTR_TYPE, + /// lhs is index into ptr_type_bit_range. rhs is the element type + /// expression. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_PTR_TYPE_BIT_RANGE, + /// `lhs[rhs..]` + /// main_token is the lbracket. + AST_NODE_SLICE_OPEN, + /// `lhs[b..c]`. rhs is index into Slice + /// main_token is the lbracket. + AST_NODE_SLICE, + /// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be + /// omitted. + /// main_token is the lbracket. + AST_NODE_SLICE_SENTINEL, + /// `lhs.*`. rhs is unused. + AST_NODE_DEREF, + /// `lhs[rhs]`. + AST_NODE_ARRAY_ACCESS, + /// `lhs{rhs}`. rhs can be omitted. + AST_NODE_ARRAY_INIT_ONE, + /// `lhs{rhs,}`. rhs can *not* be omitted + AST_NODE_ARRAY_INIT_ONE_COMMA, + /// `.{lhs, rhs}`. lhs and rhs can be omitted. + AST_NODE_ARRAY_INIT_DOT_TWO, + /// Same as `array_init_dot_two` except there is known to be a trailing + /// comma + /// before the final rbrace. + AST_NODE_ARRAY_INIT_DOT_TWO_COMMA, + /// `.{a, b}`. `sub_list[lhs..rhs]`. + AST_NODE_ARRAY_INIT_DOT, + /// Same as `array_init_dot` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_ARRAY_INIT_DOT_COMMA, + /// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means + /// `.{a, b}`. + AST_NODE_ARRAY_INIT, + /// Same as `array_init` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_ARRAY_INIT_COMMA, + /// `lhs{.a = rhs}`. rhs can be omitted making it empty. + /// main_token is the lbrace. + AST_NODE_STRUCT_INIT_ONE, + /// `lhs{.a = rhs,}`. rhs can *not* be omitted. + /// main_token is the lbrace. + AST_NODE_STRUCT_INIT_ONE_COMMA, + /// `.{.a = lhs, .b = rhs}`. lhs and rhs can be omitted. + /// main_token is the lbrace. + /// No trailing comma before the rbrace. + AST_NODE_STRUCT_INIT_DOT_TWO, + /// Same as `struct_init_dot_two` except there is known to be a trailing + /// comma + /// before the final rbrace. + AST_NODE_STRUCT_INIT_DOT_TWO_COMMA, + /// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`. + /// main_token is the lbrace. + AST_NODE_STRUCT_INIT_DOT, + /// Same as `struct_init_dot` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_STRUCT_INIT_DOT_COMMA, + /// `lhs{.a = b, .c = d}`. `sub_range_list[rhs]`. + /// lhs can be omitted which means `.{.a = b, .c = d}`. + /// main_token is the lbrace. + AST_NODE_STRUCT_INIT, + /// Same as `struct_init` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_STRUCT_INIT_COMMA, + /// `lhs(rhs)`. rhs can be omitted. + /// main_token is the lparen. + AST_NODE_CALL_ONE, + /// `lhs(rhs,)`. rhs can be omitted. + /// main_token is the lparen. + AST_NODE_CALL_ONE_COMMA, + /// `async lhs(rhs)`. rhs can be omitted. + AST_NODE_ASYNC_CALL_ONE, + /// `async lhs(rhs,)`. + AST_NODE_ASYNC_CALL_ONE_COMMA, + /// `lhs(a, b, c)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_CALL, + /// `lhs(a, b, c,)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_CALL_COMMA, + /// `async lhs(a, b, c)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_ASYNC_CALL, + /// `async lhs(a, b, c,)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_ASYNC_CALL_COMMA, + /// `switch(lhs) {}`. `SubRange[rhs]`. + /// `main_token` is the identifier of a preceding label, if any; otherwise + /// `switch`. + AST_NODE_SWITCH, + /// Same as switch except there is known to be a trailing comma + /// before the final rbrace + AST_NODE_SWITCH_COMMA, + /// `lhs => rhs`. If lhs is omitted it means `else`. + /// main_token is the `=>` + AST_NODE_SWITCH_CASE_ONE, + /// Same ast `switch_case_one` but the case is inline + AST_NODE_SWITCH_CASE_INLINE_ONE, + /// `a, b, c => rhs`. `SubRange[lhs]`. + /// main_token is the `=>` + AST_NODE_SWITCH_CASE, + /// Same ast `switch_case` but the case is inline + AST_NODE_SWITCH_CASE_INLINE, + /// `lhs...rhs`. + AST_NODE_SWITCH_RANGE, + /// `while (lhs) rhs`. + /// `while (lhs) |x| rhs`. + AST_NODE_WHILE_SIMPLE, + /// `while (lhs) : (a) b`. `WhileCont[rhs]`. + /// `while (lhs) : (a) b`. `WhileCont[rhs]`. + AST_NODE_WHILE_CONT, + /// `while (lhs) : (a) b else c`. `While[rhs]`. + /// `while (lhs) |x| : (a) b else c`. `While[rhs]`. + /// `while (lhs) |x| : (a) b else |y| c`. `While[rhs]`. + /// The cont expression part `: (a)` may be omitted. + AST_NODE_WHILE, + /// `for (lhs) rhs`. + AST_NODE_FOR_SIMPLE, + /// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`. + /// `For[rhs]`. + AST_NODE_FOR, + /// `lhs..rhs`. rhs can be omitted. + AST_NODE_FOR_RANGE, + /// `if (lhs) rhs`. + /// `if (lhs) |a| rhs`. + AST_NODE_IF_SIMPLE, + /// `if (lhs) a else b`. `If[rhs]`. + /// `if (lhs) |x| a else b`. `If[rhs]`. + /// `if (lhs) |x| a else |y| b`. `If[rhs]`. + AST_NODE_IF, + /// `suspend lhs`. lhs can be omitted. rhs is unused. + AST_NODE_SUSPEND, + /// `resume lhs`. rhs is unused. + AST_NODE_RESUME, + /// `continue :lhs rhs` + /// both lhs and rhs may be omitted. + AST_NODE_CONTINUE, + /// `break :lhs rhs` + /// both lhs and rhs may be omitted. + AST_NODE_BREAK, + /// `return lhs`. lhs can be omitted. rhs is unused. + AST_NODE_RETURN, + /// `fn (a: lhs) rhs`. lhs can be omitted. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_FN_PROTO_SIMPLE, + /// `fn (a: b, c: d) rhs`. `sub_range_list[lhs]`. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_FN_PROTO_MULTI, + /// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`. + /// `FnProtoOne[lhs]`. + /// zero or one parameters. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_FN_PROTO_ONE, + /// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`. + /// `FnProto[lhs]`. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_FN_PROTO, + /// lhs is the fn_proto. + /// rhs is the function body block. + /// Note that extern function declarations use the fn_proto tags rather + /// than this one. + AST_NODE_FN_DECL, + /// `anyframe->rhs`. main_token is `anyframe`. `lhs` is arrow token index. + AST_NODE_ANYFRAME_TYPE, + /// Both lhs and rhs unused. + AST_NODE_ANYFRAME_LITERAL, + /// Both lhs and rhs unused. + AST_NODE_CHAR_LITERAL, + /// Both lhs and rhs unused. + AST_NODE_NUMBER_LITERAL, + /// Both lhs and rhs unused. + AST_NODE_UNREACHABLE_LITERAL, + /// Both lhs and rhs unused. + /// Most identifiers will not have explicit AST nodes, however for + /// expressions + /// which could be one of many different kinds of AST nodes, there will be + /// an + /// identifier AST node for it. + AST_NODE_IDENTIFIER, + /// lhs is the dot token index, rhs unused, main_token is the identifier. + AST_NODE_ENUM_LITERAL, + /// main_token is the string literal token + /// Both lhs and rhs unused. + AST_NODE_STRING_LITERAL, + /// main_token is the first token index (redundant with lhs) + /// lhs is the first token index; rhs is the last token index. + /// Could be a series of multiline_string_literal_line tokens, or a single + /// string_literal token. + AST_NODE_MULTILINE_STRING_LITERAL, + /// `(lhs)`. main_token is the `(`; rhs is the token index of the `)`. + AST_NODE_GROUPED_EXPRESSION, + /// `@a(lhs, rhs)`. lhs and rhs may be omitted. + /// main_token is the builtin token. + AST_NODE_BUILTIN_CALL_TWO, + /// Same as builtin_call_two but there is known to be a trailing comma + /// before the rparen. + AST_NODE_BUILTIN_CALL_TWO_COMMA, + /// `@a(b, c)`. `sub_list[lhs..rhs]`. + /// main_token is the builtin token. + AST_NODE_BUILTIN_CALL, + /// Same as builtin_call but there is known to be a trailing comma before + /// the rparen. + AST_NODE_BUILTIN_CALL_COMMA, + /// `error{a, b}`. + /// rhs is the rbrace, lhs is unused. + AST_NODE_ERROR_SET_DECL, + /// `struct {}`, `union {}`, `opaque {}`, `enum {}`. + /// `extra_data[lhs..rhs]`. + /// main_token is `struct`, `union`, `opaque`, `enum` keyword. + AST_NODE_CONTAINER_DECL, + /// Same as ContainerDecl but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_CONTAINER_DECL_TRAILING, + /// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum + /// {lhs, rhs}`. + /// lhs or rhs can be omitted. + /// main_token is `struct`, `union`, `opaque`, `enum` keyword. + AST_NODE_CONTAINER_DECL_TWO, + /// Same as ContainerDeclTwo except there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_CONTAINER_DECL_TWO_TRAILING, + /// `struct(lhs)` / `union(lhs)` / `enum(lhs)`. `SubRange[rhs]`. + AST_NODE_CONTAINER_DECL_ARG, + /// Same as container_decl_arg but there is known to be a trailing + /// comma or semicolon before the rbrace. + AST_NODE_CONTAINER_DECL_ARG_TRAILING, + /// `union(enum) {}`. `sub_list[lhs..rhs]`. + /// Note that tagged unions with explicitly provided enums are represented + /// by `container_decl_arg`. + AST_NODE_TAGGED_UNION, + /// Same as tagged_union but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAGGED_UNION_TRAILING, + /// `union(enum) {lhs, rhs}`. lhs or rhs may be omitted. + /// Note that tagged unions with explicitly provided enums are represented + /// by `container_decl_arg`. + AST_NODE_TAGGED_UNION_TWO, + /// Same as tagged_union_two but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAGGED_UNION_TWO_TRAILING, + /// `union(enum(lhs)) {}`. `SubRange[rhs]`. + AST_NODE_TAGGED_UNION_ENUM_TAG, + /// Same as tagged_union_enum_tag but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING, + /// `a: lhs = rhs,`. lhs and rhs can be omitted. + /// main_token is the field name identifier. + /// lastToken() does not include the possible trailing comma. + AST_NODE_CONTAINER_FIELD_INIT, + /// `a: lhs align(rhs),`. rhs can be omitted. + /// main_token is the field name identifier. + /// lastToken() does not include the possible trailing comma. + AST_NODE_CONTAINER_FIELD_ALIGN, + /// `a: lhs align(c) = d,`. `container_field_list[rhs]`. + /// main_token is the field name identifier. + /// lastToken() does not include the possible trailing comma. + AST_NODE_CONTAINER_FIELD, + /// `comptime lhs`. rhs unused. + AST_NODE_COMPTIME, + /// `nosuspend lhs`. rhs unused. + AST_NODE_NOSUSPEND, + /// `{lhs rhs}`. rhs or lhs can be omitted. + /// main_token points at the lbrace. + AST_NODE_BLOCK_TWO, + /// Same as block_two but there is known to be a semicolon before the + /// rbrace. + AST_NODE_BLOCK_TWO_SEMICOLON, + /// `{}`. `sub_list[lhs..rhs]`. + /// main_token points at the lbrace. + AST_NODE_BLOCK, + /// Same as block but there is known to be a semicolon before the rbrace. + AST_NODE_BLOCK_SEMICOLON, + /// `asm(lhs)`. rhs is the token index of the rparen. + AST_NODE_ASM_SIMPLE, + /// Legacy asm with string clobbers. `asm(lhs, a)`. + /// `AsmLegacy[rhs]`. + AST_NODE_ASM_LEGACY, + /// `asm(lhs, a)`. `Asm[rhs]`. + AST_NODE_ASM, + /// `[a] "b" (c)`. lhs is 0, rhs is token index of the rparen. + /// `[a] "b" (-> lhs)`. rhs is token index of the rparen. + /// main_token is `a`. + AST_NODE_ASM_OUTPUT, + /// `[a] "b" (lhs)`. rhs is token index of the rparen. + /// main_token is `a`. + AST_NODE_ASM_INPUT, + /// `error.a`. lhs is token index of `.`. rhs is token index of `a`. + AST_NODE_ERROR_VALUE, + /// `lhs!rhs`. main_token is the `!`. + AST_NODE_ERROR_UNION, +} AstNodeTag; + +typedef uint32_t AstTokenIndex; +typedef uint32_t AstNodeIndex; +typedef uint32_t AstIndex; + +typedef struct { + AstIndex lhs; + AstIndex rhs; +} AstData; + +typedef struct { + uint32_t len; + uint32_t cap; + AstNodeTag* tags; + AstTokenIndex* main_tokens; + AstData* datas; +} AstNodeList; + +typedef struct { + AstNodeTag tag; + AstTokenIndex main_token; + AstData data; +} AstNodeItem; + +typedef struct { + uint32_t len; + uint32_t cap; + TokenizerTag* tags; + AstIndex* starts; +} AstTokenList; + +typedef SLICE(AstNodeIndex) AstNodeIndexSlice; + +typedef struct { + const char* source; + uint32_t source_len; + AstTokenList tokens; + AstNodeList nodes; + AstNodeIndexSlice extra_data; + bool has_error; + char* err_msg; +} Ast; + +typedef struct AstPtrType { + AstNodeIndex sentinel; + AstNodeIndex align_node; + AstNodeIndex addrspace_node; +} AstPtrType; + +typedef struct AstPtrTypeBitRange { + AstNodeIndex sentinel; + AstNodeIndex align_node; + AstNodeIndex addrspace_node; + AstNodeIndex bit_range_start; + AstNodeIndex bit_range_end; +} AstPtrTypeBitRange; + +typedef struct AstFnProtoOne { + AstNodeIndex param; + AstNodeIndex align_expr; + AstNodeIndex addrspace_expr; + AstNodeIndex section_expr; + AstNodeIndex callconv_expr; +} AstFnProtoOne; + +typedef struct AstFnProto { + AstNodeIndex params_start; + AstNodeIndex params_end; + AstNodeIndex align_expr; + AstNodeIndex addrspace_expr; + AstNodeIndex section_expr; + AstNodeIndex callconv_expr; +} AstFnProto; + +typedef struct AstSubRange { + AstNodeIndex start; + AstNodeIndex end; +} AstSubRange; + +typedef struct AstSliceSentinel { + AstNodeIndex start; + AstNodeIndex end; + AstNodeIndex sentinel; +} AstSliceSentinel; + +typedef struct AstWhileCont { + AstNodeIndex cont_expr; + AstNodeIndex then_expr; +} AstWhileCont; + +typedef struct AstWhile { + AstNodeIndex cont_expr; + AstNodeIndex then_expr; + AstNodeIndex else_expr; +} AstWhile; + +typedef struct AstFor { + unsigned int inputs : 31; + unsigned int has_else : 1; +} AstFor; + +typedef struct AstIf { + AstNodeIndex then_expr; + AstNodeIndex else_expr; +} AstIf; + +typedef struct AstError { + bool is_note; + AstTokenIndex token; + union { + struct { + TokenizerTag expected_tag; + } expected; + struct { + } none; + } extra; +} AstError; + +Ast astParse(const char* source, uint32_t len); +void astDeinit(Ast*); + +#endif diff --git a/stage0/astgen.c b/stage0/astgen.c new file mode 100644 index 0000000000..288b2ac01c --- /dev/null +++ b/stage0/astgen.c @@ -0,0 +1,10639 @@ +// astgen.c — AST to ZIR conversion, ported from lib/std/zig/AstGen.zig. +// +// Structural translation of AstGen.zig into C. +// Each function corresponds to a Zig function with the same name, +// with line references to Zig 0.15.1 AstGen.zig. + +#include "astgen.h" +#include "common.h" +#include +#include +#include + +// --- Declaration.Flags.Id enum (Zir.zig:2724) --- + +typedef enum { + DECL_ID_UNNAMED_TEST, + DECL_ID_TEST, + DECL_ID_DECLTEST, + DECL_ID_COMPTIME, + DECL_ID_CONST_SIMPLE, + DECL_ID_CONST_TYPED, + DECL_ID_CONST, + DECL_ID_PUB_CONST_SIMPLE, + DECL_ID_PUB_CONST_TYPED, + DECL_ID_PUB_CONST, + DECL_ID_EXTERN_CONST_SIMPLE, + DECL_ID_EXTERN_CONST, + DECL_ID_PUB_EXTERN_CONST_SIMPLE, + DECL_ID_PUB_EXTERN_CONST, + DECL_ID_EXPORT_CONST, + DECL_ID_PUB_EXPORT_CONST, + DECL_ID_VAR_SIMPLE, + DECL_ID_VAR, + DECL_ID_VAR_THREADLOCAL, + DECL_ID_PUB_VAR_SIMPLE, + DECL_ID_PUB_VAR, + DECL_ID_PUB_VAR_THREADLOCAL, + DECL_ID_EXTERN_VAR, + DECL_ID_EXTERN_VAR_THREADLOCAL, + DECL_ID_PUB_EXTERN_VAR, + DECL_ID_PUB_EXTERN_VAR_THREADLOCAL, + DECL_ID_EXPORT_VAR, + DECL_ID_EXPORT_VAR_THREADLOCAL, + DECL_ID_PUB_EXPORT_VAR, + DECL_ID_PUB_EXPORT_VAR_THREADLOCAL, +} DeclFlagsId; + +// --- Import tracking (AstGen.zig:265) --- + +typedef struct { + uint32_t name; // NullTerminatedString index + uint32_t token; // Ast.TokenIndex +} ImportEntry; + +// --- AstGen internal context (mirrors AstGen struct, AstGen.zig:153) --- + +typedef struct { + const Ast* tree; + ZirInstTag* inst_tags; + ZirInstData* inst_datas; + uint32_t inst_len; + uint32_t inst_cap; + uint32_t* extra; + uint32_t extra_len; + uint32_t extra_cap; + uint8_t* string_bytes; + uint32_t string_bytes_len; + uint32_t string_bytes_cap; + // String dedup table: stores positions in string_bytes that are + // registered for deduplication (mirrors AstGen.string_table). + // Only strings added via identAsString/strLitAsString (non-embedded-null) + // are registered. Multiline strings are NOT registered. + uint32_t* string_table; + uint32_t string_table_len; + uint32_t string_table_cap; + uint32_t source_offset; + uint32_t source_line; + uint32_t source_column; + ImportEntry* imports; + uint32_t imports_len; + uint32_t imports_cap; + // Namespace decl table: maps string indices to node indices. + // Populated by scanContainer, used by identifier resolution. + uint32_t* decl_names; // string indices + uint32_t* decl_nodes; // node indices + uint32_t decl_table_len; + uint32_t decl_table_cap; + // Shared dynamic array for GenZir instructions (AstGen.zig:11796). + // Sub-blocks share this array and track their slice via + // instructions_top. + uint32_t* scratch_instructions; + uint32_t scratch_inst_len; + uint32_t scratch_inst_cap; + // Scratch extra array for call arguments (mirrors AstGen.scratch in Zig). + // Used to collect body lengths + body instructions before copying to + // extra. + uint32_t* scratch_extra; + uint32_t scratch_extra_len; + uint32_t scratch_extra_cap; + // Return type ref for the current function (set during fnDecl/testDecl). + uint32_t fn_ret_ty; // ZirInstRef + // Pointer to the fn_block GenZir for the current function (AstGen.zig:45). + void* fn_block; // GenZir* + // ref_table: deferred REF instructions (AstGen.zig:58-68). + // Key = operand inst index, Value = ref inst index. + uint32_t* ref_table_keys; + uint32_t* ref_table_vals; + uint32_t ref_table_len; + uint32_t ref_table_cap; + // nodes_need_rl: set of AST node indices that need result locations. + // Populated by astRlAnnotate() pre-pass (AstRlAnnotate.zig). + uint32_t* nodes_need_rl; + uint32_t nodes_need_rl_len; + uint32_t nodes_need_rl_cap; + bool has_compile_errors; +} AstGenCtx; + +static void setCompileError(AstGenCtx* ag, const char* where, int line) { + (void)where; + (void)line; + ag->has_compile_errors = true; +} +#define SET_ERROR(ag) setCompileError(ag, __func__, __LINE__) + +// Set fn_block pointer on AstGenCtx. The caller is responsible for saving +// and restoring the previous value before the pointed-to GenZir goes out +// of scope (AstGen.zig:45). +static void setFnBlock(AstGenCtx* ag, void* block) { ag->fn_block = block; } + +// --- ref_table operations (AstGen.zig:58-68) --- +// Simple linear-scan hash table for deferred REF instructions. + +// Returns pointer to existing value if key found, NULL if not found. +static uint32_t* refTableGet(AstGenCtx* ag, uint32_t key) { + for (uint32_t i = 0; i < ag->ref_table_len; i++) { + if (ag->ref_table_keys[i] == key) + return &ag->ref_table_vals[i]; + } + return NULL; +} + +// getOrPut: returns pointer to value slot; sets *found to true if existed. +static uint32_t* refTableGetOrPut(AstGenCtx* ag, uint32_t key, bool* found) { + for (uint32_t i = 0; i < ag->ref_table_len; i++) { + if (ag->ref_table_keys[i] == key) { + *found = true; + return &ag->ref_table_vals[i]; + } + } + *found = false; + if (ag->ref_table_len >= ag->ref_table_cap) { + uint32_t new_cap = ag->ref_table_cap == 0 ? 16 : ag->ref_table_cap * 2; + ag->ref_table_keys + = realloc(ag->ref_table_keys, new_cap * sizeof(uint32_t)); + ag->ref_table_vals + = realloc(ag->ref_table_vals, new_cap * sizeof(uint32_t)); + ag->ref_table_cap = new_cap; + } + uint32_t idx = ag->ref_table_len++; + ag->ref_table_keys[idx] = key; + return &ag->ref_table_vals[idx]; +} + +// fetchRemove: if key exists, remove it and return true with *val set. +static bool refTableFetchRemove(AstGenCtx* ag, uint32_t key, uint32_t* val) { + for (uint32_t i = 0; i < ag->ref_table_len; i++) { + if (ag->ref_table_keys[i] == key) { + *val = ag->ref_table_vals[i]; + // Swap with last element. + ag->ref_table_len--; + if (i < ag->ref_table_len) { + ag->ref_table_keys[i] = ag->ref_table_keys[ag->ref_table_len]; + ag->ref_table_vals[i] = ag->ref_table_vals[ag->ref_table_len]; + } + return true; + } + } + return false; +} + +// --- Result location (AstGen.zig:11808) --- +// Simplified version of ResultInfo.Loc. +// Defined here (before GenZir) because GenZir.break_result_info uses it. + +// ResultInfo.Context (AstGen.zig:371-386). +typedef enum { + RI_CTX_NONE, + RI_CTX_RETURN, + RI_CTX_ERROR_HANDLING_EXPR, + RI_CTX_SHIFT_OP, + RI_CTX_FN_ARG, + RI_CTX_CONST_INIT, + RI_CTX_ASSIGNMENT, +} ResultCtx; + +typedef enum { + RL_NONE, // Just compute the value. + RL_REF, // Compute a pointer to the value. + RL_DISCARD, // Compute but discard (emit ensure_result_non_error). + RL_TY, // Coerce to specific type. + RL_COERCED_TY, // Coerce to specific type, result is the coercion. + RL_PTR, // Store result to typed pointer. data=alloc inst, src_node=node. + RL_INFERRED_PTR, // Store result to inferred pointer. data=alloc inst. + RL_REF_COERCED_TY, // Ref with pointer type. data=ptr_ty_inst. +} ResultLocTag; + +typedef struct { + ResultLocTag tag; + uint32_t data; // ZirInstRef: ty_inst for TY/COERCED_TY, alloc inst for + // PTR/INFERRED_PTR. + uint32_t src_node; // Only used for RL_PTR. + ResultCtx ctx; // ResultInfo.Context (AstGen.zig:371). +} ResultLoc; + +#define RL_NONE_VAL \ + ((ResultLoc) { \ + .tag = RL_NONE, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE }) +#define RL_REF_VAL \ + ((ResultLoc) { \ + .tag = RL_REF, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE }) +#define RL_DISCARD_VAL \ + ((ResultLoc) { \ + .tag = RL_DISCARD, .data = 0, .src_node = 0, .ctx = RI_CTX_NONE }) +#define RL_IS_REF(rl) ((rl).tag == RL_REF || (rl).tag == RL_REF_COERCED_TY) + +// --- Scope types (AstGen.zig:11621-11768) --- + +typedef enum { + SCOPE_GEN_ZIR, + SCOPE_LOCAL_VAL, + SCOPE_LOCAL_PTR, + SCOPE_DEFER_NORMAL, + SCOPE_DEFER_ERROR, + SCOPE_NAMESPACE, + SCOPE_TOP, + SCOPE_LABEL, +} ScopeTag; + +typedef struct Scope { + ScopeTag tag; +} Scope; + +// --- GenZir scope (mirrors GenZir struct, AstGen.zig:11772) --- +// +// Sub-blocks share the parent AstGenCtx's scratch_instructions array and +// record their starting offset (instructions_top). This mirrors the upstream +// GenZir.instructions / instructions_top design (AstGen.zig:11796-11850). + +typedef struct { + Scope base; // tag = SCOPE_GEN_ZIR + Scope* parent; + AstGenCtx* astgen; + uint32_t decl_node_index; + uint32_t decl_line; + bool is_comptime; + bool is_inline; // true for inline for/while, labeled blocks in comptime + bool c_import; // true inside @cImport block + uint32_t instructions_top; // start index in shared array + uint32_t break_block; // UINT32_MAX = none (AstGen.zig:11780) + uint32_t continue_block; // UINT32_MAX = none (AstGen.zig:11784) + // Label for labeled blocks (AstGen.zig:11800, 11869-11874). + uint32_t label_token; // UINT32_MAX = no label + uint32_t label_block_inst; // the BLOCK instruction index + ResultLoc break_result_info; // RL for break values +} GenZir; + +// Scope.LocalVal (AstGen.zig:11682). +// This is always a `const` local and the `inst` is a value type, not a +// pointer. +typedef struct { + Scope base; // tag = SCOPE_LOCAL_VAL + Scope* parent; + GenZir* gen_zir; + uint32_t inst; // ZirInstRef + uint32_t token_src; // Ast.TokenIndex + uint32_t name; // NullTerminatedString (string table index) +} ScopeLocalVal; + +// Scope.LocalPtr (AstGen.zig:11704). +// This could be a `const` or `var` local. It has a pointer instead of a value. +typedef struct { + Scope base; // tag = SCOPE_LOCAL_PTR + Scope* parent; + GenZir* gen_zir; + uint32_t ptr; // ZirInstRef + uint32_t token_src; // Ast.TokenIndex + uint32_t name; // NullTerminatedString (string table index) + bool maybe_comptime; +} ScopeLocalPtr; + +// Scope.Defer (AstGen.zig:11741). +typedef struct { + Scope base; // tag = SCOPE_DEFER_NORMAL or SCOPE_DEFER_ERROR + Scope* parent; + uint32_t index; + uint32_t len; +} ScopeDefer; + +// Scope.Label — for labeled blocks and loops. +typedef struct { + Scope base; // tag = SCOPE_LABEL + Scope* parent; + uint32_t label_name; // NullTerminatedString + uint32_t block_inst; // instruction index (not ref) +} ScopeLabel; + +// --- GenZir instruction helpers (AstGen.zig:11830-11850) --- + +// Returns the number of instructions in this scope. +static uint32_t gzInstructionsLen(const GenZir* gz) { + return gz->astgen->scratch_inst_len - gz->instructions_top; +} + +// Returns pointer to start of this scope's instructions in the shared array. +static const uint32_t* gzInstructionsSlice(const GenZir* gz) { + return gz->astgen->scratch_instructions + gz->instructions_top; +} + +// Mirrors GenZir.instructionsSliceUpto (AstGen.zig:11835). +// Returns instructions from gz up to (but not including) stacked_gz's start. +static uint32_t gzInstructionsLenUpto( + const GenZir* gz, const GenZir* stacked_gz) { + return stacked_gz->instructions_top - gz->instructions_top; +} + +static const uint32_t* gzInstructionsSliceUpto( + const GenZir* gz, const GenZir* stacked_gz) { + (void)stacked_gz; // used only for length computation + return gz->astgen->scratch_instructions + gz->instructions_top; +} + +// Mirrors GenZir.unstack (AstGen.zig:11822). +// Restores the shared array length to this scope's start. +static void gzUnstack(GenZir* gz) { + gz->astgen->scratch_inst_len = gz->instructions_top; +} + +// Append an instruction index to this scope's portion of the shared array. +static void gzAppendInstruction(GenZir* gz, uint32_t inst_idx) { + AstGenCtx* ag = gz->astgen; + if (ag->scratch_inst_len >= ag->scratch_inst_cap) { + uint32_t new_cap + = ag->scratch_inst_cap > 0 ? ag->scratch_inst_cap * 2 : 64; + uint32_t* p + = realloc(ag->scratch_instructions, new_cap * sizeof(uint32_t)); + if (!p) + exit(1); + ag->scratch_instructions = p; + ag->scratch_inst_cap = new_cap; + } + ag->scratch_instructions[ag->scratch_inst_len++] = inst_idx; +} + +// Mirrors GenZir.makeSubBlock (AstGen.zig:11852). +static GenZir makeSubBlock(GenZir* parent, Scope* scope) { + GenZir sub; + memset(&sub, 0, sizeof(sub)); + sub.base.tag = SCOPE_GEN_ZIR; + sub.parent = scope; + sub.astgen = parent->astgen; + sub.decl_node_index = parent->decl_node_index; + sub.decl_line = parent->decl_line; + sub.is_comptime = parent->is_comptime; + sub.c_import = parent->c_import; + sub.instructions_top = parent->astgen->scratch_inst_len; + sub.break_block = UINT32_MAX; + sub.continue_block = UINT32_MAX; + sub.label_token = UINT32_MAX; + return sub; +} + +// --- Capacity helpers --- + +static void ensureExtraCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->extra_len + additional; + if (needed > ag->extra_cap) { + uint32_t new_cap = ag->extra_cap * 2; + if (new_cap < needed) + new_cap = needed; + uint32_t* p = realloc(ag->extra, new_cap * sizeof(uint32_t)); + if (!p) + exit(1); + ag->extra = p; + ag->extra_cap = new_cap; + } +} + +static void ensureInstCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->inst_len + additional; + if (needed > ag->inst_cap) { + uint32_t new_cap = ag->inst_cap * 2; + if (new_cap < needed) + new_cap = needed; + ZirInstTag* t = realloc(ag->inst_tags, new_cap * sizeof(ZirInstTag)); + ZirInstData* d + = realloc(ag->inst_datas, new_cap * sizeof(ZirInstData)); + if (!t || !d) + exit(1); + ag->inst_tags = t; + ag->inst_datas = d; + ag->inst_cap = new_cap; + } +} + +static void ensureStringBytesCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->string_bytes_len + additional; + if (needed > ag->string_bytes_cap) { + uint32_t new_cap = ag->string_bytes_cap * 2; + if (new_cap < needed) + new_cap = needed; + uint8_t* p = realloc(ag->string_bytes, new_cap * sizeof(uint8_t)); + if (!p) + exit(1); + ag->string_bytes = p; + ag->string_bytes_cap = new_cap; + } +} + +// --- Extra data helpers --- + +static uint32_t addExtraU32(AstGenCtx* ag, uint32_t value) { + ensureExtraCapacity(ag, 1); + uint32_t idx = ag->extra_len; + ag->extra[ag->extra_len++] = value; + return idx; +} + +// --- Instruction helpers --- + +// Mirrors AstGen.reserveInstructionIndex (AstGen.zig:12902). +static uint32_t reserveInstructionIndex(AstGenCtx* ag) { + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + memset(&ag->inst_datas[idx], 0, sizeof(ZirInstData)); + ag->inst_tags[idx] = (ZirInstTag)0; + ag->inst_len++; + return idx; +} + +// Forward declarations. +static int32_t tokenIndexToRelative(const GenZir* gz, uint32_t token); +static uint32_t firstToken(const Ast* tree, uint32_t node); +static bool nodesNeedRlContains(const AstGenCtx* ag, uint32_t node); + +// Mirrors GenZir.makeUnTok (AstGen.zig:12520). +// Allocates an instruction but does NOT add to GenZir body. +// Returns the raw instruction INDEX (not a Ref). +static uint32_t makeUnTok( + GenZir* gz, ZirInstTag tag, uint32_t operand, uint32_t abs_tok_index) { + AstGenCtx* ag = gz->astgen; + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ZirInstData data; + data.un_tok.src_tok = tokenIndexToRelative(gz, abs_tok_index); + data.un_tok.operand = operand; + ag->inst_tags[idx] = tag; + ag->inst_datas[idx] = data; + ag->inst_len++; + return idx; // Raw index, NOT a Ref. +} + +// Mirrors GenZir.add (AstGen.zig:13162). +// Appends an instruction and records it in the GenZir body. +// Returns the instruction index as a Ref (index + ZIR_INST_REF_START_INDEX). +static uint32_t addInstruction(GenZir* gz, ZirInstTag tag, ZirInstData data) { + AstGenCtx* ag = gz->astgen; + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = tag; + ag->inst_datas[idx] = data; + ag->inst_len++; + // Record in sub-block body. + gzAppendInstruction(gz, idx); + return idx + ZIR_REF_START_INDEX; // toRef() +} + +// Mirrors GenZir.addInt (AstGen.zig:12238). +static uint32_t addInt(GenZir* gz, uint64_t integer) { + ZirInstData data; + data.int_val = integer; + return addInstruction(gz, ZIR_INST_INT, data); +} + +// Mirrors GenZir.add for bin data (Zir.zig:1877). +// Creates an instruction with bin data (lhs + rhs stored in inst_datas). +static uint32_t addBin( + GenZir* gz, ZirInstTag tag, uint32_t lhs, uint32_t rhs) { + ZirInstData data; + data.bin.lhs = lhs; + data.bin.rhs = rhs; + return addInstruction(gz, tag, data); +} + +// Mirrors GenZir.addPlNode (AstGen.zig:12308). +// Creates an instruction with pl_node data and 2-word payload. +static uint32_t addPlNodeBin( + GenZir* gz, ZirInstTag tag, uint32_t node, uint32_t lhs, uint32_t rhs) { + AstGenCtx* ag = gz->astgen; + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = lhs; + ag->extra[ag->extra_len++] = rhs; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, tag, data); +} + +// Mirrors addPlNode for 3-operand payloads (e.g. ArrayTypeSentinel). +static uint32_t addPlNodeTriple(GenZir* gz, ZirInstTag tag, uint32_t node, + uint32_t a, uint32_t b, uint32_t c) { + AstGenCtx* ag = gz->astgen; + ensureExtraCapacity(ag, 3); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = a; + ag->extra[ag->extra_len++] = b; + ag->extra[ag->extra_len++] = c; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, tag, data); +} + +// Checks if an AST identifier node is the single underscore `_`. +// Used for inferred array length detection in [_]T patterns. +// Intentionally does NOT support @"_" syntax (matches upstream). +static bool isUnderscoreIdent(const Ast* tree, uint32_t ident_node) { + uint32_t id_tok = tree->nodes.main_tokens[ident_node]; + uint32_t id_start = tree->tokens.starts[id_tok]; + if (tree->source[id_start] != '_') + return false; + if (id_start + 1 >= tree->source_len) + return true; + char next = tree->source[id_start + 1]; + return !((next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z') + || next == '_' || (next >= '0' && next <= '9')); +} + +// Mirrors GenZir.addUnNode (AstGen.zig:12406). +static uint32_t addUnNode( + GenZir* gz, ZirInstTag tag, uint32_t operand, uint32_t node) { + ZirInstData data; + data.un_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.un_node.operand = operand; + return addInstruction(gz, tag, data); +} + +// Mirrors GenZir.addUnTok (AstGen.zig:12497). +static uint32_t addUnTok( + GenZir* gz, ZirInstTag tag, uint32_t operand, uint32_t abs_tok_index) { + ZirInstData data; + data.un_tok.src_tok = tokenIndexToRelative(gz, abs_tok_index); + data.un_tok.operand = operand; + return addInstruction(gz, tag, data); +} + +// Mirrors GenZir.addStrTok (AstGen.zig:12349). +static uint32_t addStrTok( + GenZir* gz, ZirInstTag tag, uint32_t str_index, uint32_t token) { + ZirInstData data; + data.str_tok.start = str_index; + data.str_tok.src_tok = tokenIndexToRelative(gz, token); + return addInstruction(gz, tag, data); +} + +// Mirrors GenZir.addPlNodePayloadIndex (AstGen.zig:12332). +static uint32_t addPlNodePayloadIndex( + GenZir* gz, ZirInstTag tag, uint32_t node, uint32_t payload_index) { + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, tag, data); +} + +// --- Source cursor (AstGen.zig:13335-13359) --- + +// Mirrors AstGen.advanceSourceCursor (AstGen.zig:13342). +static void advanceSourceCursor(AstGenCtx* ag, uint32_t end) { + const char* source = ag->tree->source; + uint32_t i = ag->source_offset; + uint32_t line = ag->source_line; + uint32_t column = ag->source_column; + assert(i <= end); + while (i < end) { + if (source[i] == '\n') { + line++; + column = 0; + } else { + column++; + } + i++; + } + ag->source_offset = i; + ag->source_line = line; + ag->source_column = column; +} + +// Mirrors tree.firstToken (Ast.zig:596). +// Recurse through nodes to find the first token. +static uint32_t firstToken(const Ast* tree, uint32_t node) { + uint32_t n = node; + while (1) { + AstNodeTag tag = tree->nodes.tags[n]; + switch (tag) { + case AST_NODE_ROOT: + return 0; + + // Return main_token directly (Ast.zig:602-643). + case AST_NODE_TEST_DECL: + case AST_NODE_ERRDEFER: + case AST_NODE_DEFER: + case AST_NODE_BOOL_NOT: + case AST_NODE_NEGATION: + case AST_NODE_BIT_NOT: + case AST_NODE_NEGATION_WRAP: + case AST_NODE_ADDRESS_OF: + case AST_NODE_TRY: + case AST_NODE_AWAIT: + case AST_NODE_OPTIONAL_TYPE: + case AST_NODE_SWITCH: + case AST_NODE_SWITCH_COMMA: + case AST_NODE_IF_SIMPLE: + case AST_NODE_IF: + case AST_NODE_SUSPEND: + case AST_NODE_RESUME: + case AST_NODE_CONTINUE: + case AST_NODE_BREAK: + case AST_NODE_RETURN: + case AST_NODE_ANYFRAME_TYPE: + case AST_NODE_IDENTIFIER: + case AST_NODE_ANYFRAME_LITERAL: + case AST_NODE_CHAR_LITERAL: + case AST_NODE_NUMBER_LITERAL: + case AST_NODE_UNREACHABLE_LITERAL: + case AST_NODE_STRING_LITERAL: + case AST_NODE_MULTILINE_STRING_LITERAL: + case AST_NODE_GROUPED_EXPRESSION: + case AST_NODE_BUILTIN_CALL_TWO: + case AST_NODE_BUILTIN_CALL_TWO_COMMA: + case AST_NODE_BUILTIN_CALL: + case AST_NODE_BUILTIN_CALL_COMMA: + case AST_NODE_ERROR_SET_DECL: + case AST_NODE_COMPTIME: + case AST_NODE_NOSUSPEND: + case AST_NODE_ASM_SIMPLE: + case AST_NODE_ASM: + case AST_NODE_ARRAY_TYPE: + case AST_NODE_ARRAY_TYPE_SENTINEL: + case AST_NODE_ERROR_VALUE: + case AST_NODE_PTR_TYPE_ALIGNED: + case AST_NODE_PTR_TYPE_SENTINEL: + case AST_NODE_PTR_TYPE: + case AST_NODE_PTR_TYPE_BIT_RANGE: + return tree->nodes.main_tokens[n]; + + // Return main_token - 1: dot-prefixed inits and enum_literal + // (Ast.zig:645-654). + case AST_NODE_ARRAY_INIT_DOT: + case AST_NODE_ARRAY_INIT_DOT_COMMA: + case AST_NODE_ARRAY_INIT_DOT_TWO: + case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA: + case AST_NODE_STRUCT_INIT_DOT: + case AST_NODE_STRUCT_INIT_DOT_COMMA: + case AST_NODE_STRUCT_INIT_DOT_TWO: + case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA: + case AST_NODE_ENUM_LITERAL: + return tree->nodes.main_tokens[n] - 1; + + // Recurse into LHS: all binary ops and compound expressions + // (Ast.zig:656-733). + case AST_NODE_CATCH: + case AST_NODE_EQUAL_EQUAL: + case AST_NODE_BANG_EQUAL: + case AST_NODE_LESS_THAN: + case AST_NODE_GREATER_THAN: + case AST_NODE_LESS_OR_EQUAL: + case AST_NODE_GREATER_OR_EQUAL: + case AST_NODE_ASSIGN_MUL: + case AST_NODE_ASSIGN_DIV: + case AST_NODE_ASSIGN_MOD: + case AST_NODE_ASSIGN_ADD: + case AST_NODE_ASSIGN_SUB: + case AST_NODE_ASSIGN_SHL: + case AST_NODE_ASSIGN_SHL_SAT: + case AST_NODE_ASSIGN_SHR: + case AST_NODE_ASSIGN_BIT_AND: + case AST_NODE_ASSIGN_BIT_XOR: + case AST_NODE_ASSIGN_BIT_OR: + case AST_NODE_ASSIGN_MUL_WRAP: + case AST_NODE_ASSIGN_ADD_WRAP: + case AST_NODE_ASSIGN_SUB_WRAP: + case AST_NODE_ASSIGN_MUL_SAT: + case AST_NODE_ASSIGN_ADD_SAT: + case AST_NODE_ASSIGN_SUB_SAT: + case AST_NODE_ASSIGN: + case AST_NODE_MERGE_ERROR_SETS: + case AST_NODE_MUL: + case AST_NODE_DIV: + case AST_NODE_MOD: + case AST_NODE_ARRAY_MULT: + case AST_NODE_MUL_WRAP: + case AST_NODE_MUL_SAT: + case AST_NODE_ADD: + case AST_NODE_SUB: + case AST_NODE_ARRAY_CAT: + case AST_NODE_ADD_WRAP: + case AST_NODE_SUB_WRAP: + case AST_NODE_ADD_SAT: + case AST_NODE_SUB_SAT: + case AST_NODE_SHL: + case AST_NODE_SHL_SAT: + case AST_NODE_SHR: + case AST_NODE_BIT_AND: + case AST_NODE_BIT_XOR: + case AST_NODE_BIT_OR: + case AST_NODE_ORELSE: + case AST_NODE_BOOL_AND: + case AST_NODE_BOOL_OR: + case AST_NODE_SLICE_OPEN: + case AST_NODE_ARRAY_ACCESS: + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: + case AST_NODE_SWITCH_RANGE: + case AST_NODE_ERROR_UNION: + case AST_NODE_FOR_RANGE: + case AST_NODE_CALL_ONE: + case AST_NODE_CALL_ONE_COMMA: + case AST_NODE_STRUCT_INIT_ONE: + case AST_NODE_STRUCT_INIT_ONE_COMMA: + case AST_NODE_CALL: + case AST_NODE_CALL_COMMA: + case AST_NODE_STRUCT_INIT: + case AST_NODE_STRUCT_INIT_COMMA: + case AST_NODE_SLICE: + case AST_NODE_SLICE_SENTINEL: + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: + case AST_NODE_FIELD_ACCESS: + case AST_NODE_UNWRAP_OPTIONAL: + case AST_NODE_DEREF: + case AST_NODE_ASYNC_CALL_ONE: + case AST_NODE_ASYNC_CALL_ONE_COMMA: + case AST_NODE_ASYNC_CALL: + case AST_NODE_ASYNC_CALL_COMMA: + n = tree->nodes.datas[n].lhs; + continue; + + // Var decls: scan backwards for modifiers (Ast.zig:771-792). + case AST_NODE_GLOBAL_VAR_DECL: + case AST_NODE_LOCAL_VAR_DECL: + case AST_NODE_SIMPLE_VAR_DECL: + case AST_NODE_ALIGNED_VAR_DECL: { + uint32_t mt = tree->nodes.main_tokens[n]; + uint32_t i = mt; + while (i > 0) { + TokenizerTag tt = tree->tokens.tags[i - 1]; + if (tt == TOKEN_KEYWORD_EXTERN || tt == TOKEN_KEYWORD_EXPORT + || tt == TOKEN_KEYWORD_PUB + || tt == TOKEN_KEYWORD_THREADLOCAL + || tt == TOKEN_KEYWORD_COMPTIME + || tt == TOKEN_STRING_LITERAL) { + i--; + } else { + break; + } + } + return i; + } + // Fn decls: scan backwards for modifiers (Ast.zig:737-759). + case AST_NODE_FN_DECL: + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO: { + uint32_t mt = tree->nodes.main_tokens[n]; + uint32_t i = mt; + while (i > 0) { + TokenizerTag tt = tree->tokens.tags[i - 1]; + if (tt == TOKEN_KEYWORD_EXTERN || tt == TOKEN_KEYWORD_EXPORT + || tt == TOKEN_KEYWORD_PUB || tt == TOKEN_KEYWORD_INLINE + || tt == TOKEN_KEYWORD_NOINLINE + || tt == TOKEN_STRING_LITERAL) { + i--; + } else { + break; + } + } + return i; + } + // Container fields: check for preceding comptime (Ast.zig:761-769). + case AST_NODE_CONTAINER_FIELD_INIT: + case AST_NODE_CONTAINER_FIELD_ALIGN: + case AST_NODE_CONTAINER_FIELD: { + uint32_t mt = tree->nodes.main_tokens[n]; + if (mt > 0 && tree->tokens.tags[mt - 1] == TOKEN_KEYWORD_COMPTIME) + return mt - 1; + return mt; + } + // Blocks: check for label (Ast.zig:794-805). + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: { + uint32_t lbrace = tree->nodes.main_tokens[n]; + if (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON + && tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER) + return lbrace - 2; + return lbrace; + } + // Fallback for any remaining node types. + default: + return tree->nodes.main_tokens[n]; + } + } +} + +// Mirrors AstGen.advanceSourceCursorToNode (AstGen.zig:13335). +static void advanceSourceCursorToNode(AstGenCtx* ag, uint32_t node) { + uint32_t ft = firstToken(ag->tree, node); + uint32_t token_start = ag->tree->tokens.starts[ft]; + (void)0; // cursor backward check disabled temporarily + advanceSourceCursor(ag, token_start); +} + +// Mirrors maybeAdvanceSourceCursorToMainToken (AstGen.zig:13324). +// Skips advancing when in comptime scope (matching upstream behavior). +static void advanceSourceCursorToMainToken( + AstGenCtx* ag, const GenZir* gz, uint32_t node) { + if (gz->is_comptime) + return; + uint32_t main_tok = ag->tree->nodes.main_tokens[node]; + uint32_t token_start = ag->tree->tokens.starts[main_tok]; + advanceSourceCursor(ag, token_start); +} + +// --- Token helpers --- + +// Mirrors GenZir.tokenIndexToRelative (AstGen.zig:11897). +// Returns destination - base as i32. +static int32_t tokenIndexToRelative(const GenZir* gz, uint32_t token) { + uint32_t base = firstToken(gz->astgen->tree, gz->decl_node_index); + return (int32_t)token - (int32_t)base; +} + +// --- String bytes helpers --- + +// Search for an existing null-terminated string in string_bytes. +// Returns the index if found, or UINT32_MAX if not found. +// Mirrors string_table dedup (AstGen.zig:11564). +// Find a string in string_table (registered strings only). +// Mirrors AstGen.string_table hash table lookup. +static uint32_t findExistingString( + const AstGenCtx* ag, const char* str, uint32_t len) { + for (uint32_t k = 0; k < ag->string_table_len; k++) { + uint32_t pos = ag->string_table[k]; + // Compare: string at pos is null-terminated in string_bytes. + const char* existing = (const char*)ag->string_bytes + pos; + uint32_t existing_len = (uint32_t)strlen(existing); + if (existing_len == len && memcmp(existing, str, len) == 0) { + return pos; + } + } + return UINT32_MAX; +} + +// Register a string position in the string table for deduplication. +static void registerString(AstGenCtx* ag, uint32_t pos) { + if (ag->string_table_len >= ag->string_table_cap) { + uint32_t new_cap = ag->string_table_cap * 2; + if (new_cap < 64) + new_cap = 64; + uint32_t* p = realloc(ag->string_table, new_cap * sizeof(uint32_t)); + if (!p) + exit(1); + ag->string_table = p; + ag->string_table_cap = new_cap; + } + ag->string_table[ag->string_table_len++] = pos; +} + +// Mirrors AstGen.tokenIdentEql (AstGen.zig:6148-6152). +// Compares two identifier tokens by source text without touching string_bytes. +static bool tokenIdentEql(const Ast* tree, uint32_t tok1, uint32_t tok2) { + uint32_t s1 = tree->tokens.starts[tok1]; + uint32_t s2 = tree->tokens.starts[tok2]; + uint32_t e1 = tree->tokens.starts[tok1 + 1]; + uint32_t e2 = tree->tokens.starts[tok2 + 1]; + // Token length includes trailing whitespace in starts delta, but for + // identifiers the actual content is a contiguous alphanumeric/underscore + // run. Compute actual identifier lengths. + uint32_t len1 = 0; + while (s1 + len1 < e1) { + char c = tree->source[s1 + len1]; + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '_')) + break; + len1++; + } + uint32_t len2 = 0; + while (s2 + len2 < e2) { + char c = tree->source[s2 + len2]; + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '_')) + break; + len2++; + } + return len1 == len2 + && memcmp(tree->source + s1, tree->source + s2, len1) == 0; +} + +// Forward declaration for strLitAsString (used by identAsString for @"..." +// quoted identifiers with escapes). +static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, + uint32_t* out_index, uint32_t* out_len); + +// Mirrors AstGen.identAsString (AstGen.zig:11530). +// Handles both bare identifiers and @"..." quoted identifiers. +static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) { + uint32_t start = ag->tree->tokens.starts[ident_token]; + const char* source = ag->tree->source; + + if (source[start] == '@' && start + 1 < ag->tree->source_len + && source[start + 1] == '"') { + // Quoted identifier: @"name" (AstGen.zig:11297-11308). + // Extract content between quotes, handling escapes. + uint32_t si, sl; + // str_lit_token refers to the same token, content starts after @" + // We reuse strLitAsString but offset by 1 to skip '@'. + // Actually, strLitAsString expects a token whose source starts + // with '"'. The @"..." token starts with '@'. We need to handle + // the offset manually. + uint32_t content_start = start + 2; // skip @" + uint32_t content_end = content_start; + while ( + content_end < ag->tree->source_len && source[content_end] != '"') + content_end++; + // Check for escapes. + bool has_escapes = false; + for (uint32_t j = content_start; j < content_end; j++) { + if (source[j] == '\\') { + has_escapes = true; + break; + } + } + + if (!has_escapes) { + uint32_t content_len = content_end - content_start; + uint32_t existing + = findExistingString(ag, source + content_start, content_len); + if (existing != UINT32_MAX) + return existing; + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, content_len + 1); + memcpy(ag->string_bytes + ag->string_bytes_len, + source + content_start, content_len); + ag->string_bytes_len += content_len; + ag->string_bytes[ag->string_bytes_len++] = 0; + registerString(ag, str_index); + return str_index; + } + + // With escapes: use strLitAsString-like decoding. + strLitAsString(ag, ident_token, &si, &sl); + return si; + } + + // Bare identifier: scan alphanumeric + underscore. + uint32_t end = start; + while (end < ag->tree->source_len) { + char ch = source[end]; + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') + || (ch >= '0' && ch <= '9') || ch == '_') { + end++; + } else { + break; + } + } + uint32_t ident_len = end - start; + + // Check for existing string (dedup). + uint32_t existing = findExistingString(ag, source + start, ident_len); + if (existing != UINT32_MAX) + return existing; + + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, ident_len + 1); + memcpy(ag->string_bytes + ag->string_bytes_len, source + start, ident_len); + ag->string_bytes_len += ident_len; + ag->string_bytes[ag->string_bytes_len++] = 0; + registerString(ag, str_index); + return str_index; +} + +// Mirrors AstGen.strLitAsString (AstGen.zig:11553). +// Decodes string literal, checks for embedded nulls. +// If embedded null found: store raw bytes without trailing null, no dedup. +// Otherwise: dedup via string_table, add trailing null. +static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, + uint32_t* out_index, uint32_t* out_len) { + uint32_t tok_start = ag->tree->tokens.starts[str_lit_token]; + const char* source = ag->tree->source; + + // Skip opening quote. + uint32_t i = tok_start + 1; + // Find closing quote, skipping escaped characters. + uint32_t raw_end = i; + while (raw_end < ag->tree->source_len) { + if (source[raw_end] == '\\') { + raw_end += 2; // skip escape + escaped char + } else if (source[raw_end] == '"') { + break; + } else { + raw_end++; + } + } + + // Check if there are any escape sequences. + bool has_escapes = false; + for (uint32_t j = i; j < raw_end; j++) { + if (source[j] == '\\') { + has_escapes = true; + break; + } + } + + if (!has_escapes) { + // Fast path: no escapes, no embedded nulls possible. + uint32_t content_len = raw_end - i; + uint32_t existing = findExistingString(ag, source + i, content_len); + if (existing != UINT32_MAX) { + *out_index = existing; + *out_len = content_len; + return; + } + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, content_len + 1); + memcpy( + ag->string_bytes + ag->string_bytes_len, source + i, content_len); + ag->string_bytes_len += content_len; + ag->string_bytes[ag->string_bytes_len++] = 0; + registerString(ag, str_index); + *out_index = str_index; + *out_len = content_len; + return; + } + + // Slow path: process escape sequences (AstGen.zig:11558). + // Decode directly into string_bytes (like upstream). + uint32_t str_index = ag->string_bytes_len; + uint32_t max_len = raw_end - i; + ensureStringBytesCapacity(ag, max_len + 1); + while (i < raw_end) { + if (source[i] == '\\') { + i++; + if (i >= raw_end) + break; + switch (source[i]) { + case 'n': + ag->string_bytes[ag->string_bytes_len++] = '\n'; + break; + case 'r': + ag->string_bytes[ag->string_bytes_len++] = '\r'; + break; + case 't': + ag->string_bytes[ag->string_bytes_len++] = '\t'; + break; + case '\\': + ag->string_bytes[ag->string_bytes_len++] = '\\'; + break; + case '\'': + ag->string_bytes[ag->string_bytes_len++] = '\''; + break; + case '"': + ag->string_bytes[ag->string_bytes_len++] = '"'; + break; + case 'x': { + // \xNN hex escape. + uint8_t val = 0; + for (int k = 0; k < 2 && i + 1 < raw_end; k++) { + i++; + char c = source[i]; + if (c >= '0' && c <= '9') + val = (uint8_t)(val * 16 + (uint8_t)(c - '0')); + else if (c >= 'a' && c <= 'f') + val = (uint8_t)(val * 16 + 10 + (uint8_t)(c - 'a')); + else if (c >= 'A' && c <= 'F') + val = (uint8_t)(val * 16 + 10 + (uint8_t)(c - 'A')); + } + ag->string_bytes[ag->string_bytes_len++] = val; + break; + } + default: + ag->string_bytes[ag->string_bytes_len++] = (uint8_t)source[i]; + break; + } + } else { + ag->string_bytes[ag->string_bytes_len++] = (uint8_t)source[i]; + } + i++; + } + uint32_t decoded_len = ag->string_bytes_len - str_index; + uint8_t* key = ag->string_bytes + str_index; + + // Check for embedded null bytes (AstGen.zig:11560). + // If found, skip dedup and don't add trailing null. + bool has_embedded_null = false; + for (uint32_t j = 0; j < decoded_len; j++) { + if (key[j] == 0) { + has_embedded_null = true; + break; + } + } + if (has_embedded_null) { + *out_index = str_index; + *out_len = decoded_len; + return; + } + + // Dedup against string_table (AstGen.zig:11564-11585). + uint32_t existing = findExistingString(ag, (const char*)key, decoded_len); + if (existing != UINT32_MAX) { + // Shrink back (AstGen.zig:11570). + ag->string_bytes_len = str_index; + *out_index = existing; + *out_len = decoded_len; + return; + } + + // New entry: add trailing null and register. + ensureStringBytesCapacity(ag, 1); + ag->string_bytes[ag->string_bytes_len++] = 0; + registerString(ag, str_index); + *out_index = str_index; + *out_len = decoded_len; +} + +// --- Declaration helpers --- + +// Mirrors GenZir.makeDeclaration (AstGen.zig:12906). +static uint32_t makeDeclaration(AstGenCtx* ag, uint32_t node) { + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = ZIR_INST_DECLARATION; + ZirInstData data; + memset(&data, 0, sizeof(data)); + data.declaration.src_node = node; + // payload_index is set later by setDeclaration. + ag->inst_datas[idx] = data; + ag->inst_len++; + return idx; +} + +// Mirrors GenZir.makeBreakCommon (AstGen.zig:12667). +// Creates a break_inline instruction with a Break payload in extra. +// Records the instruction in the GenZir body. +static uint32_t makeBreakInline(GenZir* gz, uint32_t block_inst, + uint32_t operand, int32_t operand_src_node) { + AstGenCtx* ag = gz->astgen; + ensureInstCapacity(ag, 1); + ensureExtraCapacity(ag, 2); + + // Write Zir.Inst.Break payload to extra (Zir.zig:2489). + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = (uint32_t)operand_src_node; + ag->extra[ag->extra_len++] = block_inst; + + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = ZIR_INST_BREAK_INLINE; + ZirInstData data; + data.break_data.operand = operand; + data.break_data.payload_index = payload_index; + ag->inst_datas[idx] = data; + ag->inst_len++; + + // Record in sub-block body. + gzAppendInstruction(gz, idx); + return idx; +} + +// Mirrors GenZir.makeBlockInst (AstGen.zig:12890). +// Creates a pl_node instruction with payload_index left as 0 (set later). +// Does NOT append to gz's instruction list. +// Returns instruction index (not a ref). +static uint32_t makeBlockInst( + AstGenCtx* ag, ZirInstTag tag, const GenZir* gz, uint32_t node) { + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = tag; + ZirInstData data; + memset(&data, 0, sizeof(data)); + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = 0; // set later + ag->inst_datas[idx] = data; + ag->inst_len++; + return idx; +} + +// Mirrors appendPossiblyRefdBodyInst (AstGen.zig:13675-13683). +// Appends body_inst first, then recursively appends ref_table entry. +static void appendPossiblyRefdBodyInst(AstGenCtx* ag, uint32_t body_inst) { + ag->extra[ag->extra_len++] = body_inst; + uint32_t ref_inst; + if (refTableFetchRemove(ag, body_inst, &ref_inst)) { + appendPossiblyRefdBodyInst(ag, ref_inst); + } +} + +// Mirrors appendBodyWithFixupsExtraRefsArrayList (AstGen.zig:13659-13673). +// First processes extra_refs (e.g. param_insts), prepending their ref_table +// entries. Then writes body instructions with ref_table fixups. +static void appendBodyWithFixupsExtraRefs(AstGenCtx* ag, const uint32_t* body, + uint32_t body_len, const uint32_t* extra_refs, uint32_t extra_refs_len) { + for (uint32_t i = 0; i < extra_refs_len; i++) { + uint32_t ref_inst; + if (refTableFetchRemove(ag, extra_refs[i], &ref_inst)) { + appendPossiblyRefdBodyInst(ag, ref_inst); + } + } + for (uint32_t i = 0; i < body_len; i++) { + appendPossiblyRefdBodyInst(ag, body[i]); + } +} + +// Scratch extra capacity helper (for call arg bodies). +static void ensureScratchExtraCapacity(AstGenCtx* ag, uint32_t additional) { + uint32_t needed = ag->scratch_extra_len + additional; + if (needed > ag->scratch_extra_cap) { + uint32_t new_cap = ag->scratch_extra_cap * 2; + if (new_cap < needed) + new_cap = needed; + if (new_cap < 64) + new_cap = 64; + uint32_t* p = realloc(ag->scratch_extra, new_cap * sizeof(uint32_t)); + if (!p) + exit(1); + ag->scratch_extra = p; + ag->scratch_extra_cap = new_cap; + } +} + +// Like appendPossiblyRefdBodyInst but appends to scratch_extra instead of +// extra. +static void appendPossiblyRefdBodyInstScratch( + AstGenCtx* ag, uint32_t body_inst) { + ag->scratch_extra[ag->scratch_extra_len++] = body_inst; + uint32_t ref_inst; + if (refTableFetchRemove(ag, body_inst, &ref_inst)) { + ensureScratchExtraCapacity(ag, 1); + appendPossiblyRefdBodyInstScratch(ag, ref_inst); + } +} + +// Mirrors countBodyLenAfterFixupsExtraRefs (AstGen.zig:13694-13711). +static uint32_t countBodyLenAfterFixupsExtraRefs(AstGenCtx* ag, + const uint32_t* body, uint32_t body_len, const uint32_t* extra_refs, + uint32_t extra_refs_len) { + uint32_t count = body_len; + for (uint32_t i = 0; i < body_len; i++) { + uint32_t check_inst = body[i]; + const uint32_t* ref; + while ((ref = refTableGet(ag, check_inst)) != NULL) { + count++; + check_inst = *ref; + } + } + for (uint32_t i = 0; i < extra_refs_len; i++) { + uint32_t check_inst = extra_refs[i]; + const uint32_t* ref; + while ((ref = refTableGet(ag, check_inst)) != NULL) { + count++; + check_inst = *ref; + } + } + return count; +} + +// Mirrors countBodyLenAfterFixups (AstGen.zig:13686-13688). +static uint32_t countBodyLenAfterFixups( + AstGenCtx* ag, const uint32_t* body, uint32_t body_len) { + return countBodyLenAfterFixupsExtraRefs(ag, body, body_len, NULL, 0); +} + +// Mirrors GenZir.setBlockBody (AstGen.zig:11949). +// Writes Block payload (body_len + instruction indices) to extra. +// Sets the instruction's payload_index. Unstacks gz. +static void setBlockBody(AstGenCtx* ag, GenZir* gz, uint32_t inst) { + uint32_t raw_body_len = gzInstructionsLen(gz); + const uint32_t* body = gzInstructionsSlice(gz); + uint32_t body_len = countBodyLenAfterFixups(ag, body, raw_body_len); + ensureExtraCapacity(ag, 1 + body_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = body_len; + for (uint32_t i = 0; i < raw_body_len; i++) { + appendPossiblyRefdBodyInst(ag, body[i]); + } + ag->inst_datas[inst].pl_node.payload_index = payload_index; + gzUnstack(gz); +} + +// Mirrors GenZir.setTryBody (AstGen.zig:11997). +// Writes Try payload (operand + body_len + instruction indices) to extra. +// Sets the instruction's payload_index. Unstacks gz. +static void setTryBody( + AstGenCtx* ag, GenZir* gz, uint32_t inst, uint32_t operand) { + uint32_t raw_body_len = gzInstructionsLen(gz); + const uint32_t* body = gzInstructionsSlice(gz); + uint32_t body_len = countBodyLenAfterFixups(ag, body, raw_body_len); + ensureExtraCapacity(ag, 2 + body_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = operand; // Try.operand + ag->extra[ag->extra_len++] = body_len; // Try.body_len + for (uint32_t i = 0; i < raw_body_len; i++) { + appendPossiblyRefdBodyInst(ag, body[i]); + } + ag->inst_datas[inst].pl_node.payload_index = payload_index; + gzUnstack(gz); +} + +// Mirrors GenZir.setBlockComptimeBody (AstGen.zig:11972). +// Like setBlockBody but prepends comptime_reason before body_len. +// Asserts inst is a BLOCK_COMPTIME. +static void setBlockComptimeBody( + AstGenCtx* ag, GenZir* gz, uint32_t inst, uint32_t comptime_reason) { + uint32_t raw_body_len = gzInstructionsLen(gz); + const uint32_t* body = gzInstructionsSlice(gz); + uint32_t body_len = countBodyLenAfterFixups(ag, body, raw_body_len); + ensureExtraCapacity(ag, 2 + body_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = comptime_reason; + ag->extra[ag->extra_len++] = body_len; + for (uint32_t i = 0; i < raw_body_len; i++) { + appendPossiblyRefdBodyInst(ag, body[i]); + } + ag->inst_datas[inst].pl_node.payload_index = payload_index; + gzUnstack(gz); +} + +// Mirrors GenZir.addBreak (AstGen.zig:12623). +// Creates a ZIR_INST_BREAK instruction. +static uint32_t addBreak(GenZir* gz, ZirInstTag tag, uint32_t block_inst, + uint32_t operand, int32_t operand_src_node) { + AstGenCtx* ag = gz->astgen; + ensureInstCapacity(ag, 1); + ensureExtraCapacity(ag, 2); + + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = (uint32_t)operand_src_node; + ag->extra[ag->extra_len++] = block_inst; + + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = tag; + ZirInstData data; + data.break_data.operand = operand; + data.break_data.payload_index = payload_index; + ag->inst_datas[idx] = data; + ag->inst_len++; + gzAppendInstruction(gz, idx); + return idx; +} + +// Mirrors GenZir.addCondBr (AstGen.zig:12834). +// Creates condbr instruction placeholder with src_node set. +// Payload is filled later by setCondBrPayload. +static uint32_t addCondBr(GenZir* gz, ZirInstTag tag, uint32_t node) { + AstGenCtx* ag = gz->astgen; + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = tag; + ZirInstData data; + memset(&data, 0, sizeof(data)); + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = 0; // set later + ag->inst_datas[idx] = data; + ag->inst_len++; + gzAppendInstruction(gz, idx); + return idx; +} + +// Mirrors setCondBrPayload (AstGen.zig:6501). +// Writes CondBr payload: {condition, then_body_len, else_body_len} then +// then_body instructions, then else_body instructions. Unstacks both scopes. +// IMPORTANT: then_gz and else_gz are stacked (else on top of then), so +// then's instructions must use instructionsSliceUpto(else_gz) to avoid +// including else_gz's instructions in then's body. +static void setCondBrPayload(AstGenCtx* ag, uint32_t condbr_inst, + uint32_t condition, GenZir* then_gz, GenZir* else_gz) { + uint32_t raw_then_len = gzInstructionsLenUpto(then_gz, else_gz); + const uint32_t* then_body = gzInstructionsSliceUpto(then_gz, else_gz); + uint32_t raw_else_len = gzInstructionsLen(else_gz); + const uint32_t* else_body = gzInstructionsSlice(else_gz); + + uint32_t then_len = countBodyLenAfterFixups(ag, then_body, raw_then_len); + uint32_t else_len = countBodyLenAfterFixups(ag, else_body, raw_else_len); + + ensureExtraCapacity(ag, 3 + then_len + else_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = condition; // CondBr.condition + ag->extra[ag->extra_len++] = then_len; // CondBr.then_body_len + ag->extra[ag->extra_len++] = else_len; // CondBr.else_body_len + for (uint32_t i = 0; i < raw_then_len; i++) + appendPossiblyRefdBodyInst(ag, then_body[i]); + for (uint32_t i = 0; i < raw_else_len; i++) + appendPossiblyRefdBodyInst(ag, else_body[i]); + + ag->inst_datas[condbr_inst].pl_node.payload_index = payload_index; + gzUnstack(else_gz); + gzUnstack(then_gz); +} + +// Does this Declaration.Flags.Id have a name? (Zir.zig:2762) +static bool declIdHasName(DeclFlagsId id) { + return id != DECL_ID_UNNAMED_TEST && id != DECL_ID_COMPTIME; +} + +// Does this Declaration.Flags.Id have a lib name? (Zir.zig:2771) +static bool declIdHasLibName(DeclFlagsId id) { + switch (id) { + case DECL_ID_EXTERN_CONST: + case DECL_ID_PUB_EXTERN_CONST: + case DECL_ID_EXTERN_VAR: + case DECL_ID_EXTERN_VAR_THREADLOCAL: + case DECL_ID_PUB_EXTERN_VAR: + case DECL_ID_PUB_EXTERN_VAR_THREADLOCAL: + return true; + default: + return false; + } +} + +// Does this Declaration.Flags.Id have a type body? (Zir.zig:2783) +static bool declIdHasTypeBody(DeclFlagsId id) { + switch (id) { + case DECL_ID_UNNAMED_TEST: + case DECL_ID_TEST: + case DECL_ID_DECLTEST: + case DECL_ID_COMPTIME: + case DECL_ID_CONST_SIMPLE: + case DECL_ID_PUB_CONST_SIMPLE: + case DECL_ID_VAR_SIMPLE: + case DECL_ID_PUB_VAR_SIMPLE: + return false; + default: + return true; + } +} + +// Does this Declaration.Flags.Id have a value body? (Zir.zig:2800) +static bool declIdHasValueBody(DeclFlagsId id) { + switch (id) { + case DECL_ID_EXTERN_CONST_SIMPLE: + case DECL_ID_EXTERN_CONST: + case DECL_ID_PUB_EXTERN_CONST_SIMPLE: + case DECL_ID_PUB_EXTERN_CONST: + case DECL_ID_EXTERN_VAR: + case DECL_ID_EXTERN_VAR_THREADLOCAL: + case DECL_ID_PUB_EXTERN_VAR: + case DECL_ID_PUB_EXTERN_VAR_THREADLOCAL: + return false; + default: + return true; + } +} + +// Does this Declaration.Flags.Id have special bodies? (Zir.zig:2815) +static bool declIdHasSpecialBodies(DeclFlagsId id) { + switch (id) { + case DECL_ID_UNNAMED_TEST: + case DECL_ID_TEST: + case DECL_ID_DECLTEST: + case DECL_ID_COMPTIME: + case DECL_ID_CONST_SIMPLE: + case DECL_ID_CONST_TYPED: + case DECL_ID_PUB_CONST_SIMPLE: + case DECL_ID_PUB_CONST_TYPED: + case DECL_ID_EXTERN_CONST_SIMPLE: + case DECL_ID_PUB_EXTERN_CONST_SIMPLE: + case DECL_ID_VAR_SIMPLE: + case DECL_ID_PUB_VAR_SIMPLE: + return false; + default: + return true; + } +} + +// Mirrors setDeclaration (AstGen.zig:13883). +// Full version with type/align/linksection/addrspace/value bodies. +typedef struct { + uint32_t src_line; + uint32_t src_column; + DeclFlagsId id; + uint32_t name; // NullTerminatedString index + uint32_t lib_name; // NullTerminatedString index (UINT32_MAX=none) + const uint32_t* type_body; + uint32_t type_body_len; + const uint32_t* align_body; + uint32_t align_body_len; + const uint32_t* linksection_body; + uint32_t linksection_body_len; + const uint32_t* addrspace_body; + uint32_t addrspace_body_len; + const uint32_t* value_body; + uint32_t value_body_len; +} SetDeclArgs; + +static void setDeclaration( + AstGenCtx* ag, uint32_t decl_inst, SetDeclArgs args) { + DeclFlagsId id = args.id; + bool has_name = declIdHasName(id); + bool has_lib_name = declIdHasLibName(id); + bool has_type_body_field = declIdHasTypeBody(id); + bool has_special_bodies = declIdHasSpecialBodies(id); + bool has_value_body_field = declIdHasValueBody(id); + + uint32_t type_len + = countBodyLenAfterFixups(ag, args.type_body, args.type_body_len); + uint32_t align_len + = countBodyLenAfterFixups(ag, args.align_body, args.align_body_len); + uint32_t linksection_len = countBodyLenAfterFixups( + ag, args.linksection_body, args.linksection_body_len); + uint32_t addrspace_len = countBodyLenAfterFixups( + ag, args.addrspace_body, args.addrspace_body_len); + uint32_t value_len + = countBodyLenAfterFixups(ag, args.value_body, args.value_body_len); + + uint32_t need = 6; // src_hash[4] + flags[2] + if (has_name) + need++; + if (has_lib_name) + need++; + if (has_type_body_field) + need++; + if (has_special_bodies) + need += 3; + if (has_value_body_field) + need++; + need += type_len + align_len + linksection_len + addrspace_len + value_len; + ensureExtraCapacity(ag, need); + + uint32_t payload_start = ag->extra_len; + + // src_hash (4 words): zero-filled; hash comparison skipped in tests. + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + + // Declaration.Flags: packed struct(u64) { src_line: u30, src_column: u29, + // id: u5 } (Zir.zig:2719) + uint64_t flags = 0; + flags |= (uint64_t)(args.src_line & 0x3FFFFFFFu); + flags |= (uint64_t)(args.src_column & 0x1FFFFFFFu) << 30; + flags |= (uint64_t)((uint32_t)id & 0x1Fu) << 59; + ag->extra[ag->extra_len++] = (uint32_t)(flags & 0xFFFFFFFFu); + ag->extra[ag->extra_len++] = (uint32_t)(flags >> 32); + + if (has_name) + ag->extra[ag->extra_len++] = args.name; + if (has_lib_name) { + ag->extra[ag->extra_len++] + = (args.lib_name != UINT32_MAX) ? args.lib_name : 0; + } + if (has_type_body_field) + ag->extra[ag->extra_len++] = type_len; + if (has_special_bodies) { + ag->extra[ag->extra_len++] = align_len; + ag->extra[ag->extra_len++] = linksection_len; + ag->extra[ag->extra_len++] = addrspace_len; + } + if (has_value_body_field) + ag->extra[ag->extra_len++] = value_len; + + for (uint32_t i = 0; i < args.type_body_len; i++) + appendPossiblyRefdBodyInst(ag, args.type_body[i]); + for (uint32_t i = 0; i < args.align_body_len; i++) + appendPossiblyRefdBodyInst(ag, args.align_body[i]); + for (uint32_t i = 0; i < args.linksection_body_len; i++) + appendPossiblyRefdBodyInst(ag, args.linksection_body[i]); + for (uint32_t i = 0; i < args.addrspace_body_len; i++) + appendPossiblyRefdBodyInst(ag, args.addrspace_body[i]); + for (uint32_t i = 0; i < args.value_body_len; i++) + appendPossiblyRefdBodyInst(ag, args.value_body[i]); + + ag->inst_datas[decl_inst].declaration.payload_index = payload_start; +} + +// --- StructDecl.Small packing (Zir.zig StructDecl.Small) --- + +typedef struct { + bool has_captures_len; + bool has_fields_len; + bool has_decls_len; + bool has_backing_int; + bool known_non_opv; + bool known_comptime_only; + uint8_t name_strategy; // 2 bits + uint8_t layout; // 2 bits + bool any_default_inits; + bool any_comptime_fields; + bool any_aligned_fields; +} StructDeclSmall; + +static uint16_t packStructDeclSmall(StructDeclSmall s) { + uint16_t r = 0; + if (s.has_captures_len) + r |= (1u << 0); + if (s.has_fields_len) + r |= (1u << 1); + if (s.has_decls_len) + r |= (1u << 2); + if (s.has_backing_int) + r |= (1u << 3); + if (s.known_non_opv) + r |= (1u << 4); + if (s.known_comptime_only) + r |= (1u << 5); + r |= (uint16_t)(s.name_strategy & 0x3u) << 6; + r |= (uint16_t)(s.layout & 0x3u) << 8; + if (s.any_default_inits) + r |= (1u << 10); + if (s.any_comptime_fields) + r |= (1u << 11); + if (s.any_aligned_fields) + r |= (1u << 12); + return r; +} + +// Mirrors GenZir.setStruct (AstGen.zig:12935). +// Writes StructDecl payload and optional length fields. +// The caller appends captures, backing_int, decls, fields, bodies after. +static void setStruct(AstGenCtx* ag, uint32_t inst, uint32_t src_node, + StructDeclSmall small, uint32_t captures_len, uint32_t fields_len, + uint32_t decls_len) { + ensureExtraCapacity(ag, 6 + 3); + + uint32_t payload_index = ag->extra_len; + + // fields_hash (4 words): zero-filled; hash comparison skipped in tests. + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + + ag->extra[ag->extra_len++] = ag->source_line; + ag->extra[ag->extra_len++] = src_node; + + if (small.has_captures_len) + ag->extra[ag->extra_len++] = captures_len; + if (small.has_fields_len) + ag->extra[ag->extra_len++] = fields_len; + if (small.has_decls_len) + ag->extra[ag->extra_len++] = decls_len; + + ag->inst_tags[inst] = ZIR_INST_EXTENDED; + ZirInstData data; + memset(&data, 0, sizeof(data)); + data.extended.opcode = (uint16_t)ZIR_EXT_STRUCT_DECL; + data.extended.small = packStructDeclSmall(small); + data.extended.operand = payload_index; + ag->inst_datas[inst] = data; +} + +// --- scanContainer (AstGen.zig:13384) --- + +// Add a name→node entry to the decl table. +static void addDeclToTable( + AstGenCtx* ag, uint32_t name_str_index, uint32_t node) { + if (ag->decl_table_len >= ag->decl_table_cap) { + uint32_t new_cap = ag->decl_table_cap > 0 ? ag->decl_table_cap * 2 : 8; + uint32_t* n = realloc(ag->decl_names, new_cap * sizeof(uint32_t)); + uint32_t* d = realloc(ag->decl_nodes, new_cap * sizeof(uint32_t)); + if (!n || !d) + exit(1); + ag->decl_names = n; + ag->decl_nodes = d; + ag->decl_table_cap = new_cap; + } + ag->decl_names[ag->decl_table_len] = name_str_index; + ag->decl_nodes[ag->decl_table_len] = node; + ag->decl_table_len++; +} + +// Mirrors scanContainer (AstGen.zig:13384). +// Also populates the decl table (namespace.decls) for identifier resolution. +static uint32_t scanContainer( + AstGenCtx* ag, const uint32_t* members, uint32_t member_count) { + const Ast* tree = ag->tree; + uint32_t decl_count = 0; + for (uint32_t i = 0; i < member_count; i++) { + uint32_t member = members[i]; + AstNodeTag tag = tree->nodes.tags[member]; + switch (tag) { + case AST_NODE_GLOBAL_VAR_DECL: + case AST_NODE_LOCAL_VAR_DECL: + case AST_NODE_SIMPLE_VAR_DECL: + case AST_NODE_ALIGNED_VAR_DECL: { + decl_count++; + uint32_t name_token = tree->nodes.main_tokens[member] + 1; + uint32_t name_str = identAsString(ag, name_token); + addDeclToTable(ag, name_str, member); + break; + } + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO: + case AST_NODE_FN_DECL: { + decl_count++; + uint32_t name_token = tree->nodes.main_tokens[member] + 1; + uint32_t name_str = identAsString(ag, name_token); + addDeclToTable(ag, name_str, member); + break; + } + // Container fields: add field name to string table for ordering + // (AstGen.zig:13509). + case AST_NODE_CONTAINER_FIELD_INIT: + case AST_NODE_CONTAINER_FIELD_ALIGN: + case AST_NODE_CONTAINER_FIELD: { + uint32_t main_token = tree->nodes.main_tokens[member]; + identAsString(ag, main_token); + break; + } + case AST_NODE_COMPTIME: + decl_count++; + break; + case AST_NODE_TEST_DECL: { + decl_count++; + // Process test name string to match upstream string table + // ordering (AstGen.zig:13465-13500). + uint32_t test_name_token = tree->nodes.main_tokens[member] + 1; + TokenizerTag tt = tree->tokens.tags[test_name_token]; + if (tt == TOKEN_STRING_LITERAL) { + uint32_t si, sl; + strLitAsString(ag, test_name_token, &si, &sl); + } else if (tt == TOKEN_IDENTIFIER) { + identAsString(ag, test_name_token); + } + break; + } + default: + break; + } + } + return decl_count; +} + +// --- Import tracking --- + +static void addImport(AstGenCtx* ag, uint32_t name_index, uint32_t token) { + // Check for duplicates. + for (uint32_t i = 0; i < ag->imports_len; i++) { + if (ag->imports[i].name == name_index) + return; + } + if (ag->imports_len >= ag->imports_cap) { + uint32_t new_cap = ag->imports_cap > 0 ? ag->imports_cap * 2 : 4; + ImportEntry* p = realloc(ag->imports, new_cap * sizeof(ImportEntry)); + if (!p) + exit(1); + ag->imports = p; + ag->imports_cap = new_cap; + } + ag->imports[ag->imports_len].name = name_index; + ag->imports[ag->imports_len].token = token; + ag->imports_len++; +} + +// Write imports list to extra (AstGen.zig:227-244). +static void writeImports(AstGenCtx* ag) { + if (ag->imports_len == 0) { + ag->extra[ZIR_EXTRA_IMPORTS] = 0; + return; + } + uint32_t need = 1 + ag->imports_len * 2; + ensureExtraCapacity(ag, need); + uint32_t imports_index = ag->extra_len; + ag->extra[ag->extra_len++] = ag->imports_len; + for (uint32_t i = 0; i < ag->imports_len; i++) { + ag->extra[ag->extra_len++] = ag->imports[i].name; + ag->extra[ag->extra_len++] = ag->imports[i].token; + } + ag->extra[ZIR_EXTRA_IMPORTS] = imports_index; +} + +// ri.br() (AstGen.zig:274-282): convert coerced_ty to ty for branching. +static inline ResultLoc rlBr(ResultLoc rl) { + if (rl.tag == RL_COERCED_TY) { + return (ResultLoc) { + .tag = RL_TY, .data = rl.data, .src_node = 0, .ctx = rl.ctx + }; + } + return rl; +} + +// setBreakResultInfo (AstGen.zig:11905-11926): compute break result info +// from parent RL. Converts coerced_ty → ty, discard → discard, else passes +// through. For ptr/inferred_ptr, converts to ty/none respectively. +static ResultLoc breakResultInfo( + GenZir* gz, ResultLoc parent_rl, uint32_t node, bool need_rl) { + // First: compute block_ri (AstGen.zig:7639-7646). + // When need_rl is true, forward the rl as-is (don't convert ptr→ty). + ResultLoc block_ri; + if (need_rl) { + block_ri = parent_rl; + } else { + switch (parent_rl.tag) { + case RL_PTR: { + uint32_t ptr_ty + = addUnNode(gz, ZIR_INST_TYPEOF, parent_rl.data, node); + uint32_t ty = addUnNode(gz, ZIR_INST_ELEM_TYPE, ptr_ty, node); + block_ri = (ResultLoc) { + .tag = RL_TY, .data = ty, .src_node = 0, .ctx = parent_rl.ctx + }; + break; + } + case RL_INFERRED_PTR: + block_ri = (ResultLoc) { + .tag = RL_NONE, .data = 0, .src_node = 0, .ctx = parent_rl.ctx + }; + break; + default: + block_ri = parent_rl; + break; + } + } + // Then: setBreakResultInfo (AstGen.zig:11910-11925). + switch (block_ri.tag) { + case RL_COERCED_TY: + return (ResultLoc) { .tag = RL_TY, + .data = block_ri.data, + .src_node = 0, + .ctx = block_ri.ctx }; + case RL_DISCARD: + // Don't forward ctx (AstGen.zig:11916-11920). + return RL_DISCARD_VAL; + default: + return block_ri; + } +} + +// resultType (AstGen.zig:341-351): extract result type from RL. +// Returns 0 if no result type available. +static uint32_t rlResultType(GenZir* gz, ResultLoc rl, uint32_t node) { + switch (rl.tag) { + case RL_TY: + case RL_COERCED_TY: + return rl.data; + case RL_REF_COERCED_TY: + // AstGen.zig:345: .ref_coerced_ty => |ptr_ty| gz.addUnNode(.elem_type, + // ptr_ty, node) + return addUnNode(gz, ZIR_INST_ELEM_TYPE, rl.data, node); + case RL_PTR: { + // typeof(ptr) -> elem_type (AstGen.zig:346-349). + uint32_t ptr_ty = addUnNode(gz, ZIR_INST_TYPEOF, rl.data, node); + return addUnNode(gz, ZIR_INST_ELEM_TYPE, ptr_ty, node); + } + default: + return 0; + } +} + +// rvalue (AstGen.zig:11051-11224): apply result location wrapping. +static uint32_t rvalue( + GenZir* gz, ResultLoc rl, uint32_t result, uint32_t node) { + switch (rl.tag) { + case RL_NONE: + case RL_COERCED_TY: + return result; + case RL_DISCARD: + // ensure_result_non_error (AstGen.zig:11071-11074). + addUnNode(gz, ZIR_INST_ENSURE_RESULT_NON_ERROR, result, node); + return ZIR_REF_VOID_VALUE; + case RL_REF: + case RL_REF_COERCED_TY: { + // coerce_ptr_elem_ty for ref_coerced_ty (AstGen.zig:11077-11083). + uint32_t coerced_result = result; + if (rl.tag == RL_REF_COERCED_TY) { + coerced_result = addPlNodeBin( + gz, ZIR_INST_COERCE_PTR_ELEM_TY, node, rl.data, result); + } + AstGenCtx* ag = gz->astgen; + uint32_t src_token = firstToken(ag->tree, node); + // If result is not an instruction index (e.g. a well-known ref), + // emit ref directly (AstGen.zig:11091-11092). + if (coerced_result < ZIR_REF_START_INDEX) { + return addUnTok(gz, ZIR_INST_REF, coerced_result, src_token); + } + // Deduplication via ref_table (AstGen.zig:11093-11097). + uint32_t result_index = coerced_result - ZIR_REF_START_INDEX; + bool found; + uint32_t* val_ptr = refTableGetOrPut(ag, result_index, &found); + if (!found) { + *val_ptr = makeUnTok(gz, ZIR_INST_REF, coerced_result, src_token); + } + return *val_ptr + ZIR_REF_START_INDEX; + } + case RL_TY: { + // Quick elimination of common, unnecessary type coercions + // (AstGen.zig:11099-11209). +#define RC(t, v) (((uint64_t)(t) << 32) | (uint64_t)(v)) + uint64_t combined = RC(rl.data, result); + switch (combined) { + // Identity: type of result is already correct + // (AstGen.zig:11109-11176). + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_U1_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_U8_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_I8_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_U16_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_U29_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_I16_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_U32_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_I32_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_U64_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_I64_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_U128_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_I128_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_USIZE_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_ISIZE_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_CHAR_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_SHORT_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_USHORT_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_INT_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_UINT_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_LONG_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_ULONG_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_LONGLONG_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_ULONGLONG_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_C_LONGDOUBLE_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_F16_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_F32_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_F64_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_F80_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_F128_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_ANYOPAQUE_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_BOOL_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_VOID_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_TYPE_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_ANYERROR_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_COMPTIME_INT_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_COMPTIME_FLOAT_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_NORETURN_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_ANYFRAME_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_NULL_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_UNDEFINED_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_ENUM_LITERAL_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_PTR_USIZE_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_PTR_CONST_COMPTIME_INT_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_MANYPTR_U8_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_MANYPTR_CONST_U8_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_MANYPTR_CONST_U8_SENTINEL_0_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_SLICE_CONST_U8_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_SLICE_CONST_U8_SENTINEL_0_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_GENERIC_POISON_TYPE): + case RC(ZIR_REF_TYPE_TYPE, ZIR_REF_EMPTY_TUPLE_TYPE): + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ZERO): + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ONE): + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_NEGATIVE_ONE): + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_UNDEF_USIZE): + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ZERO_USIZE): + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ONE_USIZE): + case RC(ZIR_REF_U1_TYPE, ZIR_REF_UNDEF_U1): + case RC(ZIR_REF_U1_TYPE, ZIR_REF_ZERO_U1): + case RC(ZIR_REF_U1_TYPE, ZIR_REF_ONE_U1): + case RC(ZIR_REF_U8_TYPE, ZIR_REF_ZERO_U8): + case RC(ZIR_REF_U8_TYPE, ZIR_REF_ONE_U8): + case RC(ZIR_REF_U8_TYPE, ZIR_REF_FOUR_U8): + case RC(ZIR_REF_BOOL_TYPE, ZIR_REF_UNDEF_BOOL): + case RC(ZIR_REF_BOOL_TYPE, ZIR_REF_BOOL_TRUE): + case RC(ZIR_REF_BOOL_TYPE, ZIR_REF_BOOL_FALSE): + case RC(ZIR_REF_VOID_TYPE, ZIR_REF_VOID_VALUE): + return result; + // Conversions (AstGen.zig:11178-11202). + case RC(ZIR_REF_BOOL_TYPE, ZIR_REF_UNDEF): + return ZIR_REF_UNDEF_BOOL; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_UNDEF): + return ZIR_REF_UNDEF_USIZE; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_UNDEF_U1): + return ZIR_REF_UNDEF_USIZE; + case RC(ZIR_REF_U1_TYPE, ZIR_REF_UNDEF): + return ZIR_REF_UNDEF_U1; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ZERO): + return ZIR_REF_ZERO_USIZE; + case RC(ZIR_REF_U1_TYPE, ZIR_REF_ZERO): + return ZIR_REF_ZERO_U1; + case RC(ZIR_REF_U8_TYPE, ZIR_REF_ZERO): + return ZIR_REF_ZERO_U8; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ONE): + return ZIR_REF_ONE_USIZE; + case RC(ZIR_REF_U1_TYPE, ZIR_REF_ONE): + return ZIR_REF_ONE_U1; + case RC(ZIR_REF_U8_TYPE, ZIR_REF_ONE): + return ZIR_REF_ONE_U8; + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ZERO_USIZE): + return ZIR_REF_ZERO; + case RC(ZIR_REF_U1_TYPE, ZIR_REF_ZERO_USIZE): + return ZIR_REF_ZERO_U1; + case RC(ZIR_REF_U8_TYPE, ZIR_REF_ZERO_USIZE): + return ZIR_REF_ZERO_U8; + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ONE_USIZE): + return ZIR_REF_ONE; + case RC(ZIR_REF_U1_TYPE, ZIR_REF_ONE_USIZE): + return ZIR_REF_ONE_U1; + case RC(ZIR_REF_U8_TYPE, ZIR_REF_ONE_USIZE): + return ZIR_REF_ONE_U8; + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ZERO_U1): + return ZIR_REF_ZERO; + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ZERO_U8): + return ZIR_REF_ZERO; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ZERO_U1): + return ZIR_REF_ZERO_USIZE; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ZERO_U8): + return ZIR_REF_ZERO_USIZE; + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ONE_U1): + return ZIR_REF_ONE; + case RC(ZIR_REF_COMPTIME_INT_TYPE, ZIR_REF_ONE_U8): + return ZIR_REF_ONE; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ONE_U1): + return ZIR_REF_ONE_USIZE; + case RC(ZIR_REF_USIZE_TYPE, ZIR_REF_ONE_U8): + return ZIR_REF_ONE_USIZE; + default: { + ZirInstTag as_tag = (rl.ctx == RI_CTX_SHIFT_OP) + ? ZIR_INST_AS_SHIFT_OPERAND + : ZIR_INST_AS_NODE; + return addPlNodeBin(gz, as_tag, node, rl.data, result); + } + } +#undef RC + } + case RL_PTR: + // store_node (AstGen.zig:11211-11216). + addPlNodeBin(gz, ZIR_INST_STORE_NODE, + rl.src_node != 0 ? rl.src_node : node, rl.data, result); + return ZIR_REF_VOID_VALUE; + case RL_INFERRED_PTR: + // store_to_inferred_ptr (AstGen.zig:11218-11223). + addPlNodeBin( + gz, ZIR_INST_STORE_TO_INFERRED_PTR, node, rl.data, result); + return ZIR_REF_VOID_VALUE; + } + return result; +} + +// rvalueNoCoercePreRef (AstGen.zig:11042-11049): like rvalue but does NOT +// emit coerce_ptr_elem_ty for RL_REF_COERCED_TY. Used for local var refs. +static uint32_t rvalueNoCoercePreRef( + GenZir* gz, ResultLoc rl, uint32_t result, uint32_t node) { + if (rl.tag == RL_REF_COERCED_TY) { + ResultLoc ref_rl = rl; + ref_rl.tag = RL_REF; + return rvalue(gz, ref_rl, result, node); + } + return rvalue(gz, rl, result, node); +} + +// --- Expression evaluation (AstGen.zig:634) --- + +// Forward declarations. +static uint32_t expr(GenZir* gz, Scope* scope, uint32_t node); +// --- DefersToEmit (AstGen.zig:3008) --- +#define DEFER_NORMAL_ONLY 0 +#define DEFER_BOTH_SANS_ERR 1 + +// --- DeferCounts (AstGen.zig:2966) --- +typedef struct { + bool have_any; + bool have_normal; + bool have_err; + bool need_err_code; +} DeferCounts; +static DeferCounts countDefers(const Scope* outer_scope, Scope* inner_scope); + +static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node); +static void assignStmt(GenZir* gz, Scope* scope, uint32_t infix_node); +static void assignOp( + GenZir* gz, Scope* scope, uint32_t infix_node, ZirInstTag op_tag); +static uint32_t shiftOp( + GenZir* gz, Scope* scope, uint32_t node, ZirInstTag tag); +static void emitDbgStmt(GenZir* gz, uint32_t line, uint32_t column); +static void genDefers( + GenZir* gz, const Scope* outer_scope, Scope* inner_scope, int which); +static void emitDbgStmtForceCurrentIndex( + GenZir* gz, uint32_t line, uint32_t column); +static void emitDbgNode(GenZir* gz, uint32_t node); +static void addDbgVar( + GenZir* gz, ZirInstTag tag, uint32_t name, uint32_t inst); +static bool addEnsureResult( + GenZir* gz, uint32_t maybe_unused_result, uint32_t statement); +static void blockExprStmts( + GenZir* gz, Scope* scope, const uint32_t* statements, uint32_t stmt_count); +static uint32_t fullBodyExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node); +static uint32_t containerDecl(GenZir* gz, Scope* scope, uint32_t node); +static uint32_t structDeclInner(AstGenCtx* ag, GenZir* gz, uint32_t node, + const uint32_t* members, uint32_t members_len); +static uint32_t enumDeclInner(AstGenCtx* ag, GenZir* gz, uint32_t node, + const uint32_t* members, uint32_t members_len); +static uint32_t blockExprExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node); +static uint32_t ifExpr(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node); +static uint32_t forExpr( + GenZir* gz, Scope* scope, uint32_t node, bool is_statement); +static uint32_t orelseCatchExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node, bool is_catch); +static uint32_t arrayInitDotExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node); +static uint32_t switchExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node); +static uint32_t whileExpr( + GenZir* gz, Scope* scope, uint32_t node, bool is_statement); +#define EVAL_TO_ERROR_NEVER 0 +#define EVAL_TO_ERROR_ALWAYS 1 +#define EVAL_TO_ERROR_MAYBE 2 +static int nodeMayEvalToError(const Ast* tree, uint32_t node); +static bool nodeMayAppendToErrorTrace(const Ast* tree, uint32_t node); +static void addSaveErrRetIndex(GenZir* gz, uint32_t operand); +static void addRestoreErrRetIndexBlock( + GenZir* gz, uint32_t block_inst, uint32_t operand, uint32_t node); +static void restoreErrRetIndex(GenZir* gz, uint32_t block_inst, ResultLoc rl, + uint32_t node, uint32_t result); +static uint32_t identAsString(AstGenCtx* ag, uint32_t token); +static uint32_t lastToken(const Ast* tree, uint32_t node); +static uint32_t simpleBinOp( + GenZir* gz, Scope* scope, uint32_t node, ZirInstTag tag); + +// Mirrors GenZir.endsWithNoReturn (AstGen.zig:11770). +static bool endsWithNoReturn(GenZir* gz) { + uint32_t len = gzInstructionsLen(gz); + if (len == 0) + return false; + uint32_t last = gzInstructionsSlice(gz)[len - 1]; + ZirInstTag tag = gz->astgen->inst_tags[last]; + switch (tag) { + case ZIR_INST_BREAK: + case ZIR_INST_BREAK_INLINE: + case ZIR_INST_CONDBR: + case ZIR_INST_CONDBR_INLINE: + case ZIR_INST_COMPILE_ERROR: + case ZIR_INST_RET_NODE: + case ZIR_INST_RET_LOAD: + case ZIR_INST_RET_IMPLICIT: + case ZIR_INST_RET_ERR_VALUE: + case ZIR_INST_UNREACHABLE: + case ZIR_INST_REPEAT: + case ZIR_INST_REPEAT_INLINE: + case ZIR_INST_PANIC: + case ZIR_INST_TRAP: + case ZIR_INST_CHECK_COMPTIME_CONTROL_FLOW: + case ZIR_INST_SWITCH_CONTINUE: + return true; + default: + return false; + } +} + +// Mirrors GenZir.refIsNoReturn (AstGen.zig:11885). +static bool refIsNoReturn(GenZir* gz, uint32_t inst_ref) { + if (inst_ref == ZIR_REF_UNREACHABLE_VALUE) + return true; + if (inst_ref >= ZIR_REF_START_INDEX) { + uint32_t inst_index = inst_ref - ZIR_REF_START_INDEX; + ZirInstTag tag = gz->astgen->inst_tags[inst_index]; + switch (tag) { + case ZIR_INST_BREAK: + case ZIR_INST_BREAK_INLINE: + case ZIR_INST_CONDBR: + case ZIR_INST_CONDBR_INLINE: + case ZIR_INST_COMPILE_ERROR: + case ZIR_INST_RET_NODE: + case ZIR_INST_RET_LOAD: + case ZIR_INST_RET_IMPLICIT: + case ZIR_INST_RET_ERR_VALUE: + case ZIR_INST_UNREACHABLE: + case ZIR_INST_REPEAT: + case ZIR_INST_REPEAT_INLINE: + case ZIR_INST_PANIC: + case ZIR_INST_TRAP: + case ZIR_INST_CHECK_COMPTIME_CONTROL_FLOW: + case ZIR_INST_SWITCH_CONTINUE: + return true; + default: + return false; + } + } + return false; +} + +static uint32_t tryResolvePrimitiveIdent(GenZir* gz, uint32_t node); + +// SimpleComptimeReason (std.zig:727) — values used in block_comptime payload. +#define COMPTIME_REASON_TYPE 29 +#define COMPTIME_REASON_ARRAY_SENTINEL 30 +#define COMPTIME_REASON_POINTER_SENTINEL 31 +#define COMPTIME_REASON_SLICE_SENTINEL 32 +#define COMPTIME_REASON_ARRAY_LENGTH 33 +#define COMPTIME_REASON_ALIGN 50 +#define COMPTIME_REASON_ADDRSPACE 51 +#define COMPTIME_REASON_COMPTIME_KEYWORD 53 +#define COMPTIME_REASON_SWITCH_ITEM 56 + +// Mirrors comptimeExpr2 (AstGen.zig:1982). +// Evaluates a node in a comptime block_comptime scope. +static uint32_t comptimeExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node, uint32_t reason) { + // Skip wrapping when already in comptime context (AstGen.zig:1990). + if (gz->is_comptime) + return exprRl(gz, scope, rl, node); + // Optimization: certain node types are trivially comptime and don't need + // a block_comptime wrapper (AstGen.zig:1997-2046). + AstGenCtx* ag = gz->astgen; + AstNodeTag tag = ag->tree->nodes.tags[node]; + switch (tag) { + // Identifier handling (AstGen.zig:2000-2003): + // Upstream calls identifier() with force_comptime which resolves + // primitives/int types directly and only wraps others in block_comptime. + // We mirror this by resolving primitives here and falling through for + // non-primitives. + case AST_NODE_IDENTIFIER: { + uint32_t prim = tryResolvePrimitiveIdent(gz, node); + if (prim != ZIR_REF_NONE) + return prim; + break; // non-primitive: fall through to block_comptime wrapping + } + case AST_NODE_NUMBER_LITERAL: + case AST_NODE_CHAR_LITERAL: + case AST_NODE_STRING_LITERAL: + case AST_NODE_MULTILINE_STRING_LITERAL: + case AST_NODE_ENUM_LITERAL: + case AST_NODE_ERROR_VALUE: + // Type expressions that force comptime eval of sub-expressions + // (AstGen.zig:2017-2042). + case AST_NODE_ERROR_UNION: + case AST_NODE_MERGE_ERROR_SETS: + case AST_NODE_OPTIONAL_TYPE: + case AST_NODE_PTR_TYPE_ALIGNED: + case AST_NODE_PTR_TYPE_SENTINEL: + case AST_NODE_PTR_TYPE: + case AST_NODE_PTR_TYPE_BIT_RANGE: + case AST_NODE_ARRAY_TYPE: + case AST_NODE_ARRAY_TYPE_SENTINEL: + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO: + case AST_NODE_CONTAINER_DECL: + case AST_NODE_CONTAINER_DECL_TRAILING: + case AST_NODE_CONTAINER_DECL_ARG: + case AST_NODE_CONTAINER_DECL_ARG_TRAILING: + case AST_NODE_CONTAINER_DECL_TWO: + case AST_NODE_CONTAINER_DECL_TWO_TRAILING: + case AST_NODE_TAGGED_UNION: + case AST_NODE_TAGGED_UNION_TRAILING: + case AST_NODE_TAGGED_UNION_ENUM_TAG: + case AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING: + case AST_NODE_TAGGED_UNION_TWO: + case AST_NODE_TAGGED_UNION_TWO_TRAILING: + return exprRl(gz, scope, rl, node); + default: + break; + } + // General case: wrap in block_comptime (AstGen.zig:2078-2096). + uint32_t block_inst = makeBlockInst(ag, ZIR_INST_BLOCK_COMPTIME, gz, node); + GenZir block_scope = makeSubBlock(gz, scope); + block_scope.is_comptime = true; + // Transform RL to type-only (AstGen.zig:2084-2090). + ResultLoc ty_only_rl; + uint32_t res_ty = rlResultType(gz, rl, node); + if (res_ty != 0) + ty_only_rl = (ResultLoc) { + .tag = RL_COERCED_TY, .data = res_ty, .src_node = 0, .ctx = rl.ctx + }; + else + ty_only_rl = (ResultLoc) { + .tag = RL_NONE, .data = 0, .src_node = 0, .ctx = rl.ctx + }; + uint32_t result = exprRl(&block_scope, scope, ty_only_rl, node); + addBreak(&block_scope, ZIR_INST_BREAK_INLINE, block_inst, result, + AST_NODE_OFFSET_NONE); + setBlockComptimeBody(ag, &block_scope, block_inst, reason); + gzAppendInstruction(gz, block_inst); + return rvalue(gz, rl, block_inst + ZIR_REF_START_INDEX, node); +} + +// Mirrors typeExpr (AstGen.zig:394). +static uint32_t typeExpr(GenZir* gz, Scope* scope, uint32_t node) { + ResultLoc rl = { .tag = RL_COERCED_TY, + .data = ZIR_REF_TYPE_TYPE, + .src_node = 0, + .ctx = RI_CTX_NONE }; + return comptimeExpr(gz, scope, rl, node, COMPTIME_REASON_TYPE); +} + +// Mirrors numberLiteral (AstGen.zig:8544). +// Parses integer and float literals, returns appropriate ZIR ref. +static uint32_t numberLiteral(GenZir* gz, uint32_t node) { + AstGenCtx* ag = gz->astgen; + uint32_t num_token = ag->tree->nodes.main_tokens[node]; + uint32_t tok_start = ag->tree->tokens.starts[num_token]; + const char* source = ag->tree->source; + + // Determine token length by scanning to next non-number character. + uint32_t tok_end = tok_start; + while (tok_end < ag->tree->source_len + && ((source[tok_end] >= '0' && source[tok_end] <= '9') + || source[tok_end] == '_' || source[tok_end] == '.' + || source[tok_end] == 'x' || source[tok_end] == 'o' + || source[tok_end] == 'b' + || (source[tok_end] >= 'a' && source[tok_end] <= 'f') + || (source[tok_end] >= 'A' && source[tok_end] <= 'F'))) { + tok_end++; + } + + // Parse the integer value (simplified: decimal and hex). + uint64_t value = 0; + bool is_hex = false; + uint32_t pos = tok_start; + if (tok_end - tok_start >= 2 && source[tok_start] == '0' + && source[tok_start + 1] == 'x') { + is_hex = true; + pos = tok_start + 2; + } + + if (is_hex) { + for (; pos < tok_end; pos++) { + if (source[pos] == '_') + continue; + if (source[pos] >= '0' && source[pos] <= '9') + value = value * 16 + (uint64_t)(source[pos] - '0'); + else if (source[pos] >= 'a' && source[pos] <= 'f') + value = value * 16 + 10 + (uint64_t)(source[pos] - 'a'); + else if (source[pos] >= 'A' && source[pos] <= 'F') + value = value * 16 + 10 + (uint64_t)(source[pos] - 'A'); + } + } else { + for (; pos < tok_end; pos++) { + if (source[pos] == '_') + continue; + if (source[pos] == '.') + break; // float — not handled yet + if (source[pos] >= '0' && source[pos] <= '9') + value = value * 10 + (uint64_t)(source[pos] - '0'); + } + } + + // Special cases for 0 and 1 (AstGen.zig:8687-8703). + if (value == 0) + return ZIR_REF_ZERO; + if (value == 1) + return ZIR_REF_ONE; + + return addInt(gz, value); +} + +// Mirrors builtinCall (AstGen.zig:9191), @import case (AstGen.zig:9242). +static uint32_t builtinCallImport(GenZir* gz, Scope* scope, uint32_t node) { + (void)scope; + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + // For builtin_call_two: data.lhs = first arg node. + AstData node_data = tree->nodes.datas[node]; + uint32_t operand_node = node_data.lhs; + + assert(tree->nodes.tags[operand_node] == AST_NODE_STRING_LITERAL); + uint32_t str_lit_token = tree->nodes.main_tokens[operand_node]; + + uint32_t str_index, str_len; + strLitAsString(ag, str_lit_token, &str_index, &str_len); + + // Write Import payload to extra (Zir.Inst.Import: res_ty, path). + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = ZIR_REF_NONE; // res_ty = .none + ag->extra[ag->extra_len++] = str_index; // path + + // Create .import instruction with pl_tok data. + ZirInstData data; + data.pl_tok.src_tok = tokenIndexToRelative(gz, str_lit_token); + data.pl_tok.payload_index = payload_index; + uint32_t result_ref = addInstruction(gz, ZIR_INST_IMPORT, data); + + // Track import (AstGen.zig:9269). + addImport(ag, str_index, str_lit_token); + + return result_ref; +} + +// Mirrors cImport (AstGen.zig:10011). +static uint32_t cImportExpr(GenZir* gz, Scope* scope, uint32_t node) { + AstGenCtx* ag = gz->astgen; + AstData nd = ag->tree->nodes.datas[node]; + uint32_t body_node = nd.lhs; // first arg = body + + uint32_t block_inst = makeBlockInst(ag, ZIR_INST_C_IMPORT, gz, node); + + GenZir block_scope = makeSubBlock(gz, scope); + block_scope.is_comptime = true; + block_scope.c_import = true; + + // Use fullBodyExpr to inline unlabeled block body (AstGen.zig:10028). + fullBodyExpr(&block_scope, &block_scope.base, RL_NONE_VAL, body_node); + + // ensure_result_used on gz (parent), not block_scope (AstGen.zig:10029). + addUnNode(gz, ZIR_INST_ENSURE_RESULT_USED, ZIR_REF_VOID_VALUE, node); + + // break_inline (AstGen.zig:10030-10032). + makeBreakInline( + &block_scope, block_inst, ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); + + setBlockBody(ag, &block_scope, block_inst); + // block_scope unstacked now, can add to gz. + gzAppendInstruction(gz, block_inst); + + return block_inst + ZIR_REF_START_INDEX; // toRef() +} + +// Mirrors simpleCBuiltin (AstGen.zig:9938). +static uint32_t simpleCBuiltin(GenZir* gz, Scope* scope, uint32_t node, + uint32_t operand_node, uint16_t ext_tag) { + AstGenCtx* ag = gz->astgen; + + // Evaluate operand as comptime string. + uint32_t operand = expr(gz, scope, operand_node); + + // Emit extended instruction with UnNode payload (AstGen.zig:9954). + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] + = (uint32_t)((int32_t)node - (int32_t)gz->decl_node_index); + ag->extra[ag->extra_len++] = operand; + + ZirInstData data; + data.extended.opcode = ext_tag; + data.extended.small = 0xAAAAu; // undefined (addExtendedPayload passes + // undefined for small) + data.extended.operand = payload_index; + addInstruction(gz, ZIR_INST_EXTENDED, data); + + return ZIR_REF_VOID_VALUE; +} + +// Mirrors builtinCall (AstGen.zig:9191) dispatch. +static uint32_t builtinCall( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + uint32_t builtin_token = tree->nodes.main_tokens[node]; + uint32_t tok_start = tree->tokens.starts[builtin_token]; + const char* source = tree->source; + + // Identify builtin name from source. + // Skip '@' prefix and scan identifier. + uint32_t name_start = tok_start + 1; // skip '@' + uint32_t name_end = name_start; + while (name_end < tree->source_len + && ((source[name_end] >= 'a' && source[name_end] <= 'z') + || (source[name_end] >= 'A' && source[name_end] <= 'Z') + || source[name_end] == '_')) { + name_end++; + } + uint32_t name_len = name_end - name_start; + + // clang-format off + if (name_len == 6 && memcmp(source + name_start, "import", 6) == 0) + return builtinCallImport(gz, scope, node); + if (name_len == 7 && memcmp(source + name_start, "cImport", 7) == 0) + return cImportExpr(gz, scope, node); + if (name_len == 8 && memcmp(source + name_start, "cInclude", 8) == 0) { + AstData nd = tree->nodes.datas[node]; + return simpleCBuiltin(gz, scope, node, nd.lhs, (uint16_t)ZIR_EXT_C_INCLUDE); + } + // @intCast — typeCast pattern (AstGen.zig:9416, 9807-9826). + if (name_len == 7 && memcmp(source + name_start, "intCast", 7) == 0) { + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t result_type = rlResultType(gz, rl, node); + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + emitDbgStmt(gz, saved_line, saved_col); + return addPlNodeBin(gz, ZIR_INST_INT_CAST, node, + result_type, operand); + } + // @embedFile (AstGen.zig:9626). + if (name_len == 9 && memcmp(source + name_start, "embedFile", 9) == 0) { + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + return addUnNode(gz, ZIR_INST_EMBED_FILE, operand, node); + } + // @intFromEnum (AstGen.zig:9478). + if (name_len == 11 && memcmp(source + name_start, "intFromEnum", 11) == 0) { + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + return addUnNode(gz, ZIR_INST_INT_FROM_ENUM, operand, node); + } + // @tagName (AstGen.zig:9407) — simpleUnOp with dbg_stmt. + if (name_len == 7 && memcmp(source + name_start, "tagName", 7) == 0) { + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + emitDbgStmt(gz, saved_line, saved_col); + return addUnNode(gz, ZIR_INST_TAG_NAME, operand, node); + } + // @as (AstGen.zig:8909-8920). + if (name_len == 2 && memcmp(source + name_start, "as", 2) == 0) { + AstData nd = tree->nodes.datas[node]; + uint32_t dest_type = typeExpr(gz, scope, nd.lhs); + ResultLoc as_rl = { .tag = RL_TY, .data = dest_type, .src_node = 0, + .ctx = rl.ctx }; + uint32_t operand = exprRl(gz, scope, as_rl, nd.rhs); + return rvalue(gz, rl, operand, node); + } + // @truncate — typeCast pattern (AstGen.zig:9417, 9807-9826). + if (name_len == 8 && memcmp(source + name_start, "truncate", 8) == 0) { + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t result_type = rlResultType(gz, rl, node); + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + emitDbgStmt(gz, saved_line, saved_col); + return addPlNodeBin(gz, ZIR_INST_TRUNCATE, node, + result_type, operand); + } + // @ptrCast — typeCast pattern (AstGen.zig:9056, 9807-9826). + if (name_len == 7 && memcmp(source + name_start, "ptrCast", 7) == 0) { + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t result_type = rlResultType(gz, rl, node); + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + emitDbgStmt(gz, saved_line, saved_col); + return addPlNodeBin(gz, ZIR_INST_PTR_CAST, node, + result_type, operand); + } + // @enumFromInt — typeCast pattern (AstGen.zig:9414, 9807-9826). + if (name_len == 11 && memcmp(source + name_start, "enumFromInt", 11) == 0) { + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t result_type = rlResultType(gz, rl, node); + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + emitDbgStmt(gz, saved_line, saved_col); + return addPlNodeBin(gz, ZIR_INST_ENUM_FROM_INT, node, + result_type, operand); + } + // @bitCast (AstGen.zig:8944-8958, dispatched at 9313). + if (name_len == 7 && memcmp(source + name_start, "bitCast", 7) == 0) { + uint32_t result_type = rlResultType(gz, rl, node); + AstData nd = tree->nodes.datas[node]; + uint32_t operand = expr(gz, scope, nd.lhs); + return addPlNodeBin(gz, ZIR_INST_BITCAST, node, + result_type, operand); + } + // @memcpy (AstGen.zig:9631-9637). + if (name_len == 6 && memcmp(source + name_start, "memcpy", 6) == 0) { + AstData nd = tree->nodes.datas[node]; + uint32_t dst = expr(gz, scope, nd.lhs); + uint32_t src = expr(gz, scope, nd.rhs); + addPlNodeBin(gz, ZIR_INST_MEMCPY, node, dst, src); + return ZIR_REF_VOID_VALUE; + } + // @memset (AstGen.zig:9638-9647). + if (name_len == 6 && memcmp(source + name_start, "memset", 6) == 0) { + AstData nd = tree->nodes.datas[node]; + uint32_t lhs = expr(gz, scope, nd.lhs); + uint32_t lhs_ty = addUnNode(gz, ZIR_INST_TYPEOF, lhs, nd.lhs); + uint32_t elem_ty = + addUnNode(gz, ZIR_INST_INDEXABLE_PTR_ELEM_TYPE, lhs_ty, nd.lhs); + ResultLoc val_rl = { + .tag = RL_COERCED_TY, .data = elem_ty, .src_node = 0}; + uint32_t val = exprRl(gz, scope, val_rl, nd.rhs); + addPlNodeBin(gz, ZIR_INST_MEMSET, node, lhs, val); + return ZIR_REF_VOID_VALUE; + } + // @min (AstGen.zig:9155). + if (name_len == 3 && memcmp(source + name_start, "min", 3) == 0) { + AstData nd = tree->nodes.datas[node]; + uint32_t a = expr(gz, scope, nd.lhs); + uint32_t b = expr(gz, scope, nd.rhs); + return addPlNodeBin(gz, ZIR_INST_MIN, node, a, b); + } + // @max (AstGen.zig:9155). + if (name_len == 3 && memcmp(source + name_start, "max", 3) == 0) { + AstData nd = tree->nodes.datas[node]; + uint32_t a = expr(gz, scope, nd.lhs); + uint32_t b = expr(gz, scope, nd.rhs); + return addPlNodeBin(gz, ZIR_INST_MAX, node, a, b); + } + // clang-format on + + // TODO: handle other builtins. + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; +} + +// --- identifier (AstGen.zig:8282) --- +// Simplified: handles decl_val resolution for container-level declarations. + +// Tries to resolve an identifier as a primitive type or integer type. +// Returns the ZIR ref if it's a primitive/int type, or ZIR_REF_NONE. +// Mirrors primitive_instrs + integer type checks in identifier() +// (AstGen.zig:8298-8337). +static uint32_t tryResolvePrimitiveIdent(GenZir* gz, uint32_t node) { + AstGenCtx* ag = gz->astgen; + uint32_t ident_token = ag->tree->nodes.main_tokens[node]; + uint32_t tok_start = ag->tree->tokens.starts[ident_token]; + const char* source = ag->tree->source; + uint32_t tok_end = tok_start; + while (tok_end < ag->tree->source_len + && ((source[tok_end] >= 'a' && source[tok_end] <= 'z') + || (source[tok_end] >= 'A' && source[tok_end] <= 'Z') + || (source[tok_end] >= '0' && source[tok_end] <= '9') + || source[tok_end] == '_')) + tok_end++; + uint32_t tok_len = tok_end - tok_start; + + // Check well-known primitive refs (primitive_instrs map, + // AstGen.zig:10236-10281). + // clang-format off + if (tok_len == 2 && memcmp(source+tok_start, "u1", 2) == 0) return ZIR_REF_U1_TYPE; + if (tok_len == 2 && memcmp(source+tok_start, "u8", 2) == 0) return ZIR_REF_U8_TYPE; + if (tok_len == 2 && memcmp(source+tok_start, "i8", 2) == 0) return ZIR_REF_I8_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "u16", 3) == 0) return ZIR_REF_U16_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "i16", 3) == 0) return ZIR_REF_I16_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "u29", 3) == 0) return ZIR_REF_U29_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "u32", 3) == 0) return ZIR_REF_U32_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "i32", 3) == 0) return ZIR_REF_I32_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "u64", 3) == 0) return ZIR_REF_U64_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "i64", 3) == 0) return ZIR_REF_I64_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "u128", 4) == 0) return ZIR_REF_U128_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "i128", 4) == 0) return ZIR_REF_I128_TYPE; + if (tok_len == 5 && memcmp(source+tok_start, "usize", 5) == 0) return ZIR_REF_USIZE_TYPE; + if (tok_len == 5 && memcmp(source+tok_start, "isize", 5) == 0) return ZIR_REF_ISIZE_TYPE; + if (tok_len == 6 && memcmp(source+tok_start, "c_char", 6) == 0) return ZIR_REF_C_CHAR_TYPE; + if (tok_len == 7 && memcmp(source+tok_start, "c_short", 7) == 0) return ZIR_REF_C_SHORT_TYPE; + if (tok_len == 8 && memcmp(source+tok_start, "c_ushort", 8) == 0) return ZIR_REF_C_USHORT_TYPE; + if (tok_len == 5 && memcmp(source+tok_start, "c_int", 5) == 0) return ZIR_REF_C_INT_TYPE; + if (tok_len == 6 && memcmp(source+tok_start, "c_uint", 6) == 0) return ZIR_REF_C_UINT_TYPE; + if (tok_len == 6 && memcmp(source+tok_start, "c_long", 6) == 0) return ZIR_REF_C_LONG_TYPE; + if (tok_len == 7 && memcmp(source+tok_start, "c_ulong", 7) == 0) return ZIR_REF_C_ULONG_TYPE; + if (tok_len == 10 && memcmp(source+tok_start, "c_longlong", 10) == 0) return ZIR_REF_C_LONGLONG_TYPE; + if (tok_len == 11 && memcmp(source+tok_start, "c_ulonglong", 11) == 0) return ZIR_REF_C_ULONGLONG_TYPE; + if (tok_len == 14 && memcmp(source+tok_start, "comptime_float", 14) == 0) return ZIR_REF_COMPTIME_FLOAT_TYPE; + if (tok_len == 12 && memcmp(source+tok_start, "comptime_int", 12) == 0) return ZIR_REF_COMPTIME_INT_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "f16", 3) == 0) return ZIR_REF_F16_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "f32", 3) == 0) return ZIR_REF_F32_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "f64", 3) == 0) return ZIR_REF_F64_TYPE; + if (tok_len == 3 && memcmp(source+tok_start, "f80", 3) == 0) return ZIR_REF_F80_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "f128", 4) == 0) return ZIR_REF_F128_TYPE; + if (tok_len == 9 && memcmp(source+tok_start, "anyopaque", 9) == 0) return ZIR_REF_ANYOPAQUE_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "bool", 4) == 0) return ZIR_REF_BOOL_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "void", 4) == 0) return ZIR_REF_VOID_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "type", 4) == 0) return ZIR_REF_TYPE_TYPE; + if (tok_len == 8 && memcmp(source+tok_start, "anyerror", 8) == 0) return ZIR_REF_ANYERROR_TYPE; + if (tok_len == 8 && memcmp(source+tok_start, "noreturn", 8) == 0) return ZIR_REF_NORETURN_TYPE; + if (tok_len == 4 && memcmp(source+tok_start, "true", 4) == 0) return ZIR_REF_BOOL_TRUE; + if (tok_len == 5 && memcmp(source+tok_start, "false", 5) == 0) return ZIR_REF_BOOL_FALSE; + if (tok_len == 4 && memcmp(source+tok_start, "null", 4) == 0) return ZIR_REF_NULL_VALUE; + if (tok_len == 9 && memcmp(source+tok_start, "undefined", 9) == 0) return ZIR_REF_UNDEF; + // clang-format on + + // Integer type detection: u29, i13, etc. (AstGen.zig:8304-8336). + if (tok_len >= 2 + && (source[tok_start] == 'u' || source[tok_start] == 'i')) { + // Zig Signedness enum: unsigned=1, signed=0 + uint8_t signedness = (source[tok_start] == 'u') ? 1 : 0; + uint16_t bit_count = 0; + bool valid = true; + for (uint32_t k = tok_start + 1; k < tok_end; k++) { + if (source[k] >= '0' && source[k] <= '9') { + bit_count + = (uint16_t)(bit_count * 10 + (uint16_t)(source[k] - '0')); + } else { + valid = false; + break; + } + } + if (valid && bit_count > 0) { + ZirInstData data; + data.int_type.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + data.int_type.signedness = signedness; + data.int_type._pad = 0; + data.int_type.bit_count = bit_count; + return addInstruction(gz, ZIR_INST_INT_TYPE, data); + } + } + return ZIR_REF_NONE; +} + +static uint32_t identifierExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + uint32_t ident_token = ag->tree->nodes.main_tokens[node]; + + // Check for primitive types FIRST (AstGen.zig:8298-8338). + uint32_t prim = tryResolvePrimitiveIdent(gz, node); + if (prim != ZIR_REF_NONE) + return rvalue(gz, rl, prim, node); + + // Scope chain walk (AstGen.zig:8340-8461). + uint32_t name_str = identAsString(ag, ident_token); + for (Scope* s = scope; s != NULL;) { + switch (s->tag) { + case SCOPE_LOCAL_VAL: { + ScopeLocalVal* lv = (ScopeLocalVal*)s; + if (lv->name == name_str) + return rvalueNoCoercePreRef(gz, rl, lv->inst, node); + s = lv->parent; + continue; + } + case SCOPE_LOCAL_PTR: { + ScopeLocalPtr* lp = (ScopeLocalPtr*)s; + if (lp->name == name_str) { + if (RL_IS_REF(rl)) + return lp->ptr; + return addUnNode(gz, ZIR_INST_LOAD, lp->ptr, node); + } + s = lp->parent; + continue; + } + case SCOPE_GEN_ZIR: { + GenZir* gzs = (GenZir*)s; + s = gzs->parent; + continue; + } + case SCOPE_DEFER_NORMAL: + case SCOPE_DEFER_ERROR: { + ScopeDefer* sd = (ScopeDefer*)s; + s = sd->parent; + continue; + } + case SCOPE_LABEL: { + ScopeLabel* sl = (ScopeLabel*)s; + s = sl->parent; + continue; + } + case SCOPE_NAMESPACE: + case SCOPE_TOP: + goto decl_table; + } + } +decl_table: + + // Decl table lookup (AstGen.zig:8462-8520). + for (uint32_t i = 0; i < ag->decl_table_len; i++) { + if (ag->decl_names[i] == name_str) { + ZirInstTag itag + = (RL_IS_REF(rl)) ? ZIR_INST_DECL_REF : ZIR_INST_DECL_VAL; + ZirInstData data; + data.str_tok.start = name_str; + data.str_tok.src_tok = tokenIndexToRelative(gz, ident_token); + return addInstruction(gz, itag, data); + } + } + + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; +} + +// --- fieldAccess (AstGen.zig:6154) --- +// Simplified: emits field_val instruction with Field payload. + +static uint32_t fieldAccessExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + + // data.lhs = object node, data.rhs = field identifier token. + uint32_t object_node = nd.lhs; + uint32_t field_ident = nd.rhs; + + // Get field name as string (AstGen.zig:6180). + uint32_t str_index = identAsString(ag, field_ident); + + // Evaluate the LHS object expression (AstGen.zig:6181). + // For .ref rl, LHS is also evaluated with .ref (AstGen.zig:6161). + ResultLoc lhs_rl = (RL_IS_REF(rl)) ? RL_REF_VAL : RL_NONE_VAL; + uint32_t lhs = exprRl(gz, scope, lhs_rl, object_node); + + // Emit dbg_stmt for the dot token (AstGen.zig:6183-6184). + advanceSourceCursorToMainToken(ag, gz, node); + { + uint32_t line = ag->source_line - gz->decl_line; + uint32_t column = ag->source_column; + emitDbgStmt(gz, line, column); + } + + // Emit field_val instruction with Field payload (AstGen.zig:6186-6189). + ensureExtraCapacity(ag, 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = lhs; // Field.lhs + ag->extra[ag->extra_len++] = str_index; // Field.field_name_start + + // .ref → field_ptr, else → field_val (AstGen.zig:6160-6164). + ZirInstTag ftag + = (RL_IS_REF(rl)) ? ZIR_INST_FIELD_PTR : ZIR_INST_FIELD_VAL; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + uint32_t access = addInstruction(gz, ftag, data); + // For ref, return directly; otherwise apply rvalue (AstGen.zig:6161-6164). + if (RL_IS_REF(rl)) + return access; + return rvalue(gz, rl, access, node); +} + +// --- ptrType (AstGen.zig:3833) --- + +static uint32_t ptrTypeExpr(GenZir* gz, Scope* scope, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + uint32_t main_tok = tree->nodes.main_tokens[node]; + + // child_type is always in rhs for all ptr_type variants. + uint32_t child_type_node = nd.rhs; + + // Determine size from main_token (Ast.zig:2122-2131). + // Pointer.Size: one=0, many=1, slice=2, c=3. + uint8_t size; + TokenizerTag main_tok_tag = tree->tokens.tags[main_tok]; + if (main_tok_tag == TOKEN_ASTERISK + || main_tok_tag == TOKEN_ASTERISK_ASTERISK) { + size = 0; // one + } else { + assert(main_tok_tag == TOKEN_L_BRACKET); + TokenizerTag next_tag = tree->tokens.tags[main_tok + 1]; + if (next_tag == TOKEN_ASTERISK) { + // [*c]T vs [*]T: c-pointer if next-next is identifier. + if (tree->tokens.tags[main_tok + 2] == TOKEN_IDENTIFIER) + size = 3; // c + else + size = 1; // many + } else { + size = 2; // slice + } + } + + // Determine sentinel, align, addrspace, bit_range nodes from AST variant + // (Ast.zig:1656-1696). + uint32_t sentinel_node = UINT32_MAX; + uint32_t align_node = UINT32_MAX; + uint32_t addrspace_node = UINT32_MAX; + uint32_t bit_range_start = UINT32_MAX; + uint32_t bit_range_end = UINT32_MAX; + + if (tag == AST_NODE_PTR_TYPE_ALIGNED) { + // opt_node_and_node: lhs = optional align_node (0=none), rhs = child. + if (nd.lhs != 0) + align_node = nd.lhs; + } else if (tag == AST_NODE_PTR_TYPE_SENTINEL) { + // opt_node_and_node: lhs = optional sentinel (0=none), rhs = child. + if (nd.lhs != 0) + sentinel_node = nd.lhs; + } else if (tag == AST_NODE_PTR_TYPE) { + // extra_and_node: lhs = extra index to AstPtrType, rhs = child_type. + const AstPtrType* pt + = (const AstPtrType*)(tree->extra_data.arr + nd.lhs); + if (pt->sentinel != UINT32_MAX) + sentinel_node = pt->sentinel; + if (pt->align_node != UINT32_MAX) + align_node = pt->align_node; + if (pt->addrspace_node != UINT32_MAX) + addrspace_node = pt->addrspace_node; + } else if (tag == AST_NODE_PTR_TYPE_BIT_RANGE) { + // extra_and_node: lhs = extra index to AstPtrTypeBitRange. + const AstPtrTypeBitRange* pt + = (const AstPtrTypeBitRange*)(tree->extra_data.arr + nd.lhs); + if (pt->sentinel != UINT32_MAX) + sentinel_node = pt->sentinel; + align_node = pt->align_node; + if (pt->addrspace_node != UINT32_MAX) + addrspace_node = pt->addrspace_node; + bit_range_start = pt->bit_range_start; + bit_range_end = pt->bit_range_end; + } + + // Scan tokens between main_token and child_type to find const/volatile/ + // allowzero (Ast.zig:2139-2164). + bool has_const = false; + bool has_volatile = false; + bool has_allowzero = false; + { + uint32_t i; + if (sentinel_node != UINT32_MAX) { + i = lastToken(tree, sentinel_node) + 1; + } else if (size == 1 || size == 3) { + // many or c: start after main_token. + i = main_tok + 1; + } else { + i = main_tok; + } + uint32_t end = firstToken(tree, child_type_node); + while (i < end) { + TokenizerTag tt = tree->tokens.tags[i]; + if (tt == TOKEN_KEYWORD_ALLOWZERO) { + has_allowzero = true; + } else if (tt == TOKEN_KEYWORD_CONST) { + has_const = true; + } else if (tt == TOKEN_KEYWORD_VOLATILE) { + has_volatile = true; + } else if (tt == TOKEN_KEYWORD_ALIGN) { + // Skip over align expression. + if (bit_range_end != UINT32_MAX) + i = lastToken(tree, bit_range_end) + 1; + else if (align_node != UINT32_MAX) + i = lastToken(tree, align_node) + 1; + } + i++; + } + } + + // Evaluate element type (AstGen.zig:3847). + uint32_t elem_type = typeExpr(gz, scope, child_type_node); + + // Evaluate trailing expressions (AstGen.zig:3856-3897). + uint32_t sentinel_ref = ZIR_REF_NONE; + uint32_t align_ref = ZIR_REF_NONE; + uint32_t addrspace_ref = ZIR_REF_NONE; + uint32_t bit_start_ref = ZIR_REF_NONE; + uint32_t bit_end_ref = ZIR_REF_NONE; + uint32_t trailing_count = 0; + + if (sentinel_node != UINT32_MAX) { + uint32_t reason = (size == 2) ? COMPTIME_REASON_SLICE_SENTINEL + : COMPTIME_REASON_POINTER_SENTINEL; + ResultLoc srl = { + .tag = RL_TY, .data = elem_type, .src_node = 0, .ctx = RI_CTX_NONE + }; + sentinel_ref = comptimeExpr(gz, scope, srl, sentinel_node, reason); + trailing_count++; + } + if (addrspace_node != UINT32_MAX) { + // Upstream creates addrspace_ty via addBuiltinValue, we don't have + // that yet, so pass RL_NONE (matching previous behavior). + addrspace_ref = comptimeExpr( + gz, scope, RL_NONE_VAL, addrspace_node, COMPTIME_REASON_ADDRSPACE); + trailing_count++; + } + if (align_node != UINT32_MAX) { + ResultLoc arl = { .tag = RL_COERCED_TY, + .data = ZIR_REF_U29_TYPE, + .src_node = 0, + .ctx = RI_CTX_NONE }; + align_ref + = comptimeExpr(gz, scope, arl, align_node, COMPTIME_REASON_ALIGN); + trailing_count++; + } + if (bit_range_start != UINT32_MAX) { + ResultLoc brl = { .tag = RL_COERCED_TY, + .data = ZIR_REF_U16_TYPE, + .src_node = 0, + .ctx = RI_CTX_NONE }; + bit_start_ref = comptimeExpr( + gz, scope, brl, bit_range_start, COMPTIME_REASON_TYPE); + bit_end_ref = comptimeExpr( + gz, scope, brl, bit_range_end, COMPTIME_REASON_TYPE); + trailing_count += 2; + } + + // Build PtrType payload: { elem_type, src_node } + trailing + // (AstGen.zig:3905-3921). + ensureExtraCapacity(ag, 2 + trailing_count); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = elem_type; + ag->extra[ag->extra_len++] + = (uint32_t)((int32_t)node - (int32_t)gz->decl_node_index); + if (sentinel_ref != ZIR_REF_NONE) + ag->extra[ag->extra_len++] = sentinel_ref; + if (align_ref != ZIR_REF_NONE) + ag->extra[ag->extra_len++] = align_ref; + if (addrspace_ref != ZIR_REF_NONE) + ag->extra[ag->extra_len++] = addrspace_ref; + if (bit_start_ref != ZIR_REF_NONE) { + ag->extra[ag->extra_len++] = bit_start_ref; + ag->extra[ag->extra_len++] = bit_end_ref; + } + + // Build flags packed byte (AstGen.zig:3927-3934). + uint8_t flags = 0; + if (has_allowzero) + flags |= (1 << 0); // is_allowzero + if (!has_const) + flags |= (1 << 1); // is_mutable + if (has_volatile) + flags |= (1 << 2); // is_volatile + if (sentinel_ref != ZIR_REF_NONE) + flags |= (1 << 3); // has_sentinel + if (align_ref != ZIR_REF_NONE) + flags |= (1 << 4); // has_align + if (addrspace_ref != ZIR_REF_NONE) + flags |= (1 << 5); // has_addrspace + if (bit_start_ref != ZIR_REF_NONE) + flags |= (1 << 6); // has_bit_range + + ZirInstData data; + data.ptr_type.flags = flags; + data.ptr_type.size = size; + data.ptr_type._pad = 0; + data.ptr_type.payload_index = payload_index; + return addInstruction(gz, ZIR_INST_PTR_TYPE, data); +} + +// --- arrayType (AstGen.zig:940) --- + +static uint32_t arrayTypeExpr(GenZir* gz, Scope* scope, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + + // data.lhs = length expr node, data.rhs = element type node. + // Check for `_` identifier → compile error (AstGen.zig:3950-3953). + if (tree->nodes.tags[nd.lhs] == AST_NODE_IDENTIFIER + && isUnderscoreIdent(tree, nd.lhs)) { + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + ResultLoc len_rl = { .tag = RL_COERCED_TY, + .data = ZIR_REF_USIZE_TYPE, + .src_node = 0, + .ctx = RI_CTX_NONE }; + uint32_t len + = comptimeExpr(gz, scope, len_rl, nd.lhs, COMPTIME_REASON_TYPE); + uint32_t elem_type = typeExpr(gz, scope, nd.rhs); + return addPlNodeBin(gz, ZIR_INST_ARRAY_TYPE, node, len, elem_type); +} + +// --- arrayInitExpr (AstGen.zig:1431) --- +// Simplified: handles typed array init with inferred [_] length. + +static uint32_t arrayInitExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Get elements and type expression based on the variant. + uint32_t type_expr_node = 0; + uint32_t elem_buf[2]; + const uint32_t* elements = NULL; + uint32_t elem_count = 0; + + switch (tag) { + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: { + type_expr_node = nd.lhs; + if (nd.rhs != 0) { + elem_buf[0] = nd.rhs; + elements = elem_buf; + elem_count = 1; + } + break; + } + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: { + // data = node_and_extra: lhs = type_expr, rhs = extra_index. + // extra[rhs] = SubRange.start, extra[rhs+1] = SubRange.end. + // Elements are extra_data[start..end]. + type_expr_node = nd.lhs; + uint32_t extra_idx = nd.rhs; + uint32_t range_start = tree->extra_data.arr[extra_idx]; + uint32_t range_end = tree->extra_data.arr[extra_idx + 1]; + elements = tree->extra_data.arr + range_start; + elem_count = range_end - range_start; + break; + } + default: + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + + if (type_expr_node == 0 || elem_count == 0) { + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + + // Check if the type is [_]T (inferred length) (AstGen.zig:1446-1474). + if (tree->nodes.tags[type_expr_node] == AST_NODE_ARRAY_TYPE) { + AstData type_nd = tree->nodes.datas[type_expr_node]; + uint32_t elem_count_node = type_nd.lhs; + uint32_t elem_type_node = type_nd.rhs; + + // Check if elem_count is `_` identifier. + if (tree->nodes.tags[elem_count_node] == AST_NODE_IDENTIFIER + && isUnderscoreIdent(tree, elem_count_node)) { + // Inferred length: addInt(elem_count) (AstGen.zig:1452). + uint32_t len_inst = addInt(gz, elem_count); + uint32_t elem_type = typeExpr(gz, scope, elem_type_node); + uint32_t array_type_inst = addPlNodeBin( + gz, ZIR_INST_ARRAY_TYPE, type_expr_node, len_inst, elem_type); + + // arrayInitExprTyped (AstGen.zig:1484-1513, 1598-1642). + // Only RL_REF produces array_init_ref; all other RLs use + // array_init + rvalue (AstGen.zig:1507-1511). + bool is_ref = (rl.tag == RL_REF); + uint32_t operands_len = elem_count + 1; + ensureExtraCapacity(ag, 1 + operands_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = operands_len; + ag->extra[ag->extra_len++] = array_type_inst; + uint32_t extra_start = ag->extra_len; + ag->extra_len += elem_count; + for (uint32_t i = 0; i < elem_count; i++) { + // Use elem_type as coercion target for each element. + ResultLoc elem_rl = { + .tag = RL_COERCED_TY, .data = elem_type, .src_node = 0 + }; + uint32_t elem_ref = exprRl(gz, scope, elem_rl, elements[i]); + ag->extra[extra_start + i] = elem_ref; + } + ZirInstTag init_tag + = is_ref ? ZIR_INST_ARRAY_INIT_REF : ZIR_INST_ARRAY_INIT; + ZirInstData idata; + idata.pl_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + idata.pl_node.payload_index = payload_index; + uint32_t result = addInstruction(gz, init_tag, idata); + if (is_ref) + return result; + return rvalue(gz, rl, result, node); + } + } + + // Non-inferred length: evaluate type normally. + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; +} + +// --- simpleBinOp (AstGen.zig:2204) --- + +static uint32_t simpleBinOp( + GenZir* gz, Scope* scope, uint32_t node, ZirInstTag op_tag) { + AstGenCtx* ag = gz->astgen; + AstData nd = ag->tree->nodes.datas[node]; + uint32_t lhs = exprRl(gz, scope, RL_NONE_VAL, nd.lhs); + // For arithmetic ops, advance cursor before RHS (AstGen.zig:6245-6256). + uint32_t saved_line = 0, saved_col = 0; + bool need_dbg = false; + if (op_tag == ZIR_INST_ADD || op_tag == ZIR_INST_SUB + || op_tag == ZIR_INST_MUL || op_tag == ZIR_INST_DIV + || op_tag == ZIR_INST_MOD_REM) { + if (!gz->is_comptime) { + advanceSourceCursorToMainToken(ag, gz, node); + } + saved_line = ag->source_line - gz->decl_line; + saved_col = ag->source_column; + need_dbg = true; + } + uint32_t rhs = exprRl(gz, scope, RL_NONE_VAL, nd.rhs); + if (need_dbg) { + emitDbgStmt(gz, saved_line, saved_col); + } + return addPlNodeBin(gz, op_tag, node, lhs, rhs); +} + +// --- shiftOp (AstGen.zig:9978) --- + +static uint32_t shiftOp( + GenZir* gz, Scope* scope, uint32_t node, ZirInstTag tag) { + AstGenCtx* ag = gz->astgen; + AstData nd = ag->tree->nodes.datas[node]; + uint32_t lhs = exprRl(gz, scope, RL_NONE_VAL, nd.lhs); + + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + + uint32_t log2_int_type + = addUnNode(gz, ZIR_INST_TYPEOF_LOG2_INT_TYPE, lhs, nd.lhs); + ResultLoc rhs_rl = { .tag = RL_TY, + .data = log2_int_type, + .src_node = 0, + .ctx = RI_CTX_SHIFT_OP }; + uint32_t rhs = exprRl(gz, scope, rhs_rl, nd.rhs); + + emitDbgStmt(gz, saved_line, saved_col); + + return addPlNodeBin(gz, tag, node, lhs, rhs); +} + +// --- multilineStringLiteral (AstGen.zig:8645) --- +// Port of strLitNodeAsString for multiline strings. +static uint32_t multilineStringLiteral( + GenZir* gz, Scope* scope, uint32_t node) { + (void)scope; + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + uint32_t start_tok = nd.lhs; + uint32_t end_tok = nd.rhs; + + uint32_t str_index = ag->string_bytes_len; + + // First line: no preceding newline. + for (uint32_t tok_i = start_tok; tok_i <= end_tok; tok_i++) { + uint32_t tok_start = tree->tokens.starts[tok_i]; + const char* source = tree->source; + // Skip leading `\\` (2 chars). + uint32_t content_start = tok_start + 2; + // Find end of line. + uint32_t content_end = content_start; + while (content_end < tree->source_len && source[content_end] != '\n') + content_end++; + uint32_t line_len = content_end - content_start; + + if (tok_i > start_tok) { + // Prepend newline for lines after the first. + ensureStringBytesCapacity(ag, line_len + 1); + ag->string_bytes[ag->string_bytes_len++] = '\n'; + } else { + ensureStringBytesCapacity(ag, line_len); + } + memcpy(ag->string_bytes + ag->string_bytes_len, source + content_start, + line_len); + ag->string_bytes_len += line_len; + } + + uint32_t len = ag->string_bytes_len - str_index; + ensureStringBytesCapacity(ag, 1); + ag->string_bytes[ag->string_bytes_len++] = 0; // null terminator + + ZirInstData data; + data.str.start = str_index; + data.str.len = len; + return addInstruction(gz, ZIR_INST_STR, data); +} + +// --- ret (AstGen.zig:8119) --- +static uint32_t retExpr(GenZir* gz, Scope* scope, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + // Ensure debug line/column information is emitted for this return + // expression (AstGen.zig:8141-8144). + if (!gz->is_comptime) { + emitDbgNode(gz, node); + } + uint32_t ret_lc_line = ag->source_line - gz->decl_line; + uint32_t ret_lc_column = ag->source_column; + + // AstGen.zig:8123: return outside function is an error. + if (ag->fn_block == NULL) { + SET_ERROR(ag); + return ZIR_REF_UNREACHABLE_VALUE; + } + const Scope* defer_outer = &((GenZir*)ag->fn_block)->base; + + AstData nd = tree->nodes.datas[node]; + uint32_t operand_node = nd.lhs; // optional + + if (operand_node == 0) { + // Void return (AstGen.zig:8148-8156). + genDefers(gz, defer_outer, scope, DEFER_NORMAL_ONLY); + // Restore error trace unconditionally (AstGen.zig:8153). + ZirInstData rdata; + rdata.un_node.operand = ZIR_REF_NONE; + rdata.un_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction( + gz, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + addUnNode(gz, ZIR_INST_RET_NODE, ZIR_REF_VOID_VALUE, node); + return ZIR_REF_UNREACHABLE_VALUE; + } + + // Fast path: return error.Foo (AstGen.zig:8159-8175). + if (tree->nodes.tags[operand_node] == AST_NODE_ERROR_VALUE) { + uint32_t error_token = tree->nodes.main_tokens[operand_node] + 2; + uint32_t err_name_str = identAsString(ag, error_token); + DeferCounts dc = countDefers(defer_outer, scope); + if (!dc.need_err_code) { + genDefers(gz, defer_outer, scope, DEFER_BOTH_SANS_ERR); + emitDbgStmt(gz, ret_lc_line, ret_lc_column); + addStrTok(gz, ZIR_INST_RET_ERR_VALUE, err_name_str, error_token); + return ZIR_REF_UNREACHABLE_VALUE; + } + // need_err_code path: not implemented yet, fall through to general. + } + + // Evaluate operand with result location (AstGen.zig:8178-8186). + // If nodes_need_rl contains this return node, use ptr-based RL; + // otherwise use coerced_ty. + ResultLoc ret_rl = RL_NONE_VAL; + bool use_ptr = nodesNeedRlContains(ag, node); + uint32_t ret_ptr_inst = 0; + if (use_ptr) { + // Create ret_ptr instruction (AstGen.zig:8179). + ZirInstData rpdata; + rpdata.node = (int32_t)node - (int32_t)gz->decl_node_index; + ret_ptr_inst = addInstruction(gz, ZIR_INST_RET_PTR, rpdata); + ret_rl.tag = RL_PTR; + ret_rl.data = ret_ptr_inst; + } else if (ag->fn_ret_ty != 0) { + ret_rl.tag = RL_COERCED_TY; + ret_rl.data = ag->fn_ret_ty; + } + ret_rl.ctx = RI_CTX_RETURN; + uint32_t operand = exprRl(gz, scope, ret_rl, operand_node); + + // Emit RESTORE_ERR_RET_INDEX based on nodeMayEvalToError + // (AstGen.zig:8188-8253). + int eval_to_err = nodeMayEvalToError(tree, operand_node); + if (eval_to_err == EVAL_TO_ERROR_NEVER) { + // Returning non-error: pop error trace unconditionally + // (AstGen.zig:8190-8198). + genDefers(gz, defer_outer, scope, DEFER_NORMAL_ONLY); + ZirInstData rdata; + rdata.un_node.operand = ZIR_REF_NONE; + rdata.un_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction( + gz, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + emitDbgStmt(gz, ret_lc_line, ret_lc_column); + // addRet (AstGen.zig:13188-13194). + if (use_ptr) { + addUnNode(gz, ZIR_INST_RET_LOAD, ret_ptr_inst, node); + } else { + addUnNode(gz, ZIR_INST_RET_NODE, operand, node); + } + return ZIR_REF_UNREACHABLE_VALUE; + } else if (eval_to_err == EVAL_TO_ERROR_ALWAYS) { + // .always: emit both error defers and regular defers + // (AstGen.zig:8200-8206). + uint32_t err_code = use_ptr + ? addUnNode(gz, ZIR_INST_LOAD, ret_ptr_inst, node) + : operand; + (void)err_code; + // TODO: genDefers with .both = err_code when errdefer is implemented. + genDefers(gz, defer_outer, scope, DEFER_NORMAL_ONLY); + emitDbgStmt(gz, ret_lc_line, ret_lc_column); + if (use_ptr) { + addUnNode(gz, ZIR_INST_RET_LOAD, ret_ptr_inst, node); + } else { + addUnNode(gz, ZIR_INST_RET_NODE, operand, node); + } + return ZIR_REF_UNREACHABLE_VALUE; + } else { + // .maybe (AstGen.zig:8208-8252). + DeferCounts dc = countDefers(defer_outer, scope); + if (!dc.have_err) { + // Only regular defers; no branch needed (AstGen.zig:8210-8220). + genDefers(gz, defer_outer, scope, DEFER_NORMAL_ONLY); + emitDbgStmt(gz, ret_lc_line, ret_lc_column); + uint32_t result = use_ptr + ? addUnNode(gz, ZIR_INST_LOAD, ret_ptr_inst, node) + : operand; + ZirInstData rdata; + rdata.un_node.operand = result; + rdata.un_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction(gz, ZIR_INST_RESTORE_ERR_RET_INDEX_FN_ENTRY, rdata); + if (use_ptr) { + addUnNode(gz, ZIR_INST_RET_LOAD, ret_ptr_inst, node); + } else { + addUnNode(gz, ZIR_INST_RET_NODE, operand, node); + } + return ZIR_REF_UNREACHABLE_VALUE; + } + // have_err path: emit conditional branch (not yet implemented). + // Fall through to simplified path. + genDefers(gz, defer_outer, scope, DEFER_NORMAL_ONLY); + emitDbgStmt(gz, ret_lc_line, ret_lc_column); + if (use_ptr) { + addUnNode(gz, ZIR_INST_RET_LOAD, ret_ptr_inst, node); + } else { + addUnNode(gz, ZIR_INST_RET_NODE, operand, node); + } + return ZIR_REF_UNREACHABLE_VALUE; + } +} + +// --- calleeExpr (AstGen.zig:10183) --- +// Returns: 0 = direct call, 1 = field call. + +typedef struct { + bool is_field; + uint32_t obj_ptr; // for field calls: ref to object + uint32_t field_name_start; // for field calls: string index + uint32_t direct; // for direct calls: ref to callee +} Callee; + +static Callee calleeExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t fn_expr_node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[fn_expr_node]; + + if (tag == AST_NODE_FIELD_ACCESS) { + AstData nd = tree->nodes.datas[fn_expr_node]; + uint32_t object_node = nd.lhs; + uint32_t field_ident = nd.rhs; + uint32_t str_index = identAsString(ag, field_ident); + // Evaluate object with .ref rl (AstGen.zig:10207). + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, object_node); + + // Advance to main token (the `.` dot) — not first token + // (AstGen.zig:10209). + advanceSourceCursorToMainToken(ag, gz, fn_expr_node); + { + uint32_t line = ag->source_line - gz->decl_line; + uint32_t column = ag->source_column; + emitDbgStmt(gz, line, column); + } + + Callee c; + c.is_field = true; + c.obj_ptr = lhs; + c.field_name_start = str_index; + c.direct = 0; + return c; + } + + // enum_literal callee: decl literal call syntax (AstGen.zig:10217-10233). + if (tag == AST_NODE_ENUM_LITERAL) { + uint32_t res_ty = rlResultType(gz, rl, fn_expr_node); + if (res_ty != 0) { + uint32_t str_index + = identAsString(ag, tree->nodes.main_tokens[fn_expr_node]); + uint32_t callee = addPlNodeBin(gz, ZIR_INST_DECL_LITERAL_NO_COERCE, + fn_expr_node, res_ty, str_index); + Callee c; + c.is_field = false; + c.direct = callee; + c.obj_ptr = 0; + c.field_name_start = 0; + return c; + } + // No result type: fall through to expr with rl=none. + } + + // Default: direct call (AstGen.zig:10235). + Callee c; + c.is_field = false; + c.direct = expr(gz, scope, fn_expr_node); + c.obj_ptr = 0; + c.field_name_start = 0; + return c; +} + +// --- callExpr (AstGen.zig:10058) --- +static uint32_t callExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Extract callee and args from AST. + uint32_t fn_expr_node; + uint32_t arg_buf[2]; + const uint32_t* args = NULL; + uint32_t args_len = 0; + uint32_t lparen_tok; + + switch (tag) { + case AST_NODE_CALL_ONE: + case AST_NODE_CALL_ONE_COMMA: { + fn_expr_node = nd.lhs; + lparen_tok = tree->nodes.main_tokens[node]; + if (nd.rhs != 0) { + arg_buf[0] = nd.rhs; + args = arg_buf; + args_len = 1; + } + break; + } + case AST_NODE_CALL: + case AST_NODE_CALL_COMMA: { + fn_expr_node = nd.lhs; + lparen_tok = tree->nodes.main_tokens[node]; + uint32_t extra_idx = nd.rhs; + uint32_t range_start = tree->extra_data.arr[extra_idx]; + uint32_t range_end = tree->extra_data.arr[extra_idx + 1]; + args = tree->extra_data.arr + range_start; + args_len = range_end - range_start; + break; + } + default: + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + + Callee callee = calleeExpr(gz, scope, rl, fn_expr_node); + + // dbg_stmt before call (AstGen.zig:10078-10083). + { + advanceSourceCursor(ag, tree->tokens.starts[lparen_tok]); + uint32_t line = ag->source_line - gz->decl_line; + uint32_t column = ag->source_column; + emitDbgStmtForceCurrentIndex(gz, line, column); + } + + // Reserve instruction slot for call (AstGen.zig:10093). + uint32_t call_index = ag->inst_len; + ensureInstCapacity(ag, 1); + memset(&ag->inst_datas[call_index], 0, sizeof(ZirInstData)); + ag->inst_tags[call_index] = (ZirInstTag)0; + ag->inst_len++; + gzAppendInstruction(gz, call_index); + + // Process arguments in sub-blocks (AstGen.zig:10096-10116). + // Upstream uses a separate scratch array; we use a local buffer for body + // lengths and append body instructions to scratch_extra, then copy all + // to extra after the call payload. + uint32_t call_inst = call_index + ZIR_REF_START_INDEX; + ResultLoc arg_rl = { .tag = RL_COERCED_TY, + .data = call_inst, + .src_node = 0, + .ctx = RI_CTX_FN_ARG }; + + // Use scratch_extra to collect body lengths + body instructions, + // mirroring upstream's scratch array (AstGen.zig:10096-10116). + uint32_t scratch_top = ag->scratch_extra_len; + // Reserve space for cumulative body lengths (one per arg). + ensureScratchExtraCapacity(ag, args_len); + ag->scratch_extra_len += args_len; + + for (uint32_t i = 0; i < args_len; i++) { + GenZir arg_block = makeSubBlock(gz, scope); + uint32_t arg_ref + = exprRl(&arg_block, &arg_block.base, arg_rl, args[i]); + + // break_inline with param_node src (AstGen.zig:10108). + int32_t param_src + = (int32_t)args[i] - (int32_t)arg_block.decl_node_index; + makeBreakInline(&arg_block, call_index, arg_ref, param_src); + + // Append arg_block body to scratch_extra (with ref_table fixups). + uint32_t raw_body_len = gzInstructionsLen(&arg_block); + const uint32_t* body = gzInstructionsSlice(&arg_block); + uint32_t fixup_len = countBodyLenAfterFixups(ag, body, raw_body_len); + ensureScratchExtraCapacity(ag, fixup_len); + for (uint32_t j = 0; j < raw_body_len; j++) { + appendPossiblyRefdBodyInstScratch(ag, body[j]); + } + // Record cumulative body length (AstGen.zig:10114). + ag->scratch_extra[scratch_top + i] + = ag->scratch_extra_len - scratch_top; + gzUnstack(&arg_block); + } + + // Build call payload (AstGen.zig:10118-10168). + // Upstream layout: [flags, callee/obj_ptr, field_name_start], then + // body_lengths + body_instructions from scratch. + // Flags layout (packed): modifier:u3, ensure_result_used:bool, + // pop_error_return_trace:bool, args_len:u27. + // pop_error_return_trace = !propagate_error_trace + // (AstGen.zig:10121-10124). + bool propagate_error_trace + = (rl.ctx == RI_CTX_ERROR_HANDLING_EXPR || rl.ctx == RI_CTX_RETURN + || rl.ctx == RI_CTX_FN_ARG || rl.ctx == RI_CTX_CONST_INIT); + uint32_t flags = (propagate_error_trace ? 0u : (1u << 4)) + | ((args_len & 0x7FFFFFFu) << 5); // args_len + + if (callee.is_field) { + // FieldCall: {flags, obj_ptr, field_name_start} (AstGen.zig:10148). + ensureExtraCapacity(ag, 3 + (ag->scratch_extra_len - scratch_top)); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = flags; + ag->extra[ag->extra_len++] = callee.obj_ptr; + ag->extra[ag->extra_len++] = callee.field_name_start; + // Append scratch data (body lengths + body instructions). + if (args_len != 0) { + memcpy(ag->extra + ag->extra_len, ag->scratch_extra + scratch_top, + (ag->scratch_extra_len - scratch_top) * sizeof(uint32_t)); + ag->extra_len += ag->scratch_extra_len - scratch_top; + } + ag->inst_tags[call_index] = ZIR_INST_FIELD_CALL; + ag->inst_datas[call_index].pl_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + ag->inst_datas[call_index].pl_node.payload_index = payload_index; + } else { + // Call: {flags, callee} (AstGen.zig:10128). + ensureExtraCapacity(ag, 2 + (ag->scratch_extra_len - scratch_top)); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = flags; + ag->extra[ag->extra_len++] = callee.direct; + // Append scratch data (body lengths + body instructions). + if (args_len != 0) { + memcpy(ag->extra + ag->extra_len, ag->scratch_extra + scratch_top, + (ag->scratch_extra_len - scratch_top) * sizeof(uint32_t)); + ag->extra_len += ag->scratch_extra_len - scratch_top; + } + ag->inst_tags[call_index] = ZIR_INST_CALL; + ag->inst_datas[call_index].pl_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + ag->inst_datas[call_index].pl_node.payload_index = payload_index; + } + + // Restore scratch (AstGen.zig:10097 defer). + ag->scratch_extra_len = scratch_top; + + return call_index + ZIR_REF_START_INDEX; +} + +// --- structInitExpr (AstGen.zig:1674) --- +// Simplified: handles .{} (empty tuple), .{.a = b} (anon init). +static uint32_t structInitExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Extract type_expr and fields. + uint32_t type_expr_node = 0; // 0 = anonymous (.{...}) + uint32_t field_buf[2]; + const uint32_t* fields = NULL; + uint32_t fields_len = 0; + + switch (tag) { + case AST_NODE_STRUCT_INIT_DOT_TWO: + case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA: { + // .{.a = lhs, .b = rhs} + uint32_t idx = 0; + if (nd.lhs != 0) + field_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + field_buf[idx++] = nd.rhs; + fields = field_buf; + fields_len = idx; + break; + } + case AST_NODE_STRUCT_INIT_DOT: + case AST_NODE_STRUCT_INIT_DOT_COMMA: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + fields = tree->extra_data.arr + start; + fields_len = end - start; + break; + } + case AST_NODE_STRUCT_INIT_ONE: + case AST_NODE_STRUCT_INIT_ONE_COMMA: { + type_expr_node = nd.lhs; + if (nd.rhs != 0) { + field_buf[0] = nd.rhs; + fields = field_buf; + fields_len = 1; + } + break; + } + case AST_NODE_STRUCT_INIT: + case AST_NODE_STRUCT_INIT_COMMA: { + type_expr_node = nd.lhs; + uint32_t extra_idx = nd.rhs; + uint32_t range_start = tree->extra_data.arr[extra_idx]; + uint32_t range_end = tree->extra_data.arr[extra_idx + 1]; + fields = tree->extra_data.arr + range_start; + fields_len = range_end - range_start; + break; + } + default: + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + + if (type_expr_node == 0 && fields_len == 0) { + // .{} — depends on result location (AstGen.zig:1687-1698). + if (rl.tag == RL_REF_COERCED_TY) { + return addUnNode( + gz, ZIR_INST_STRUCT_INIT_EMPTY_REF_RESULT, rl.data, node); + } + if (rl.tag == RL_TY || rl.tag == RL_COERCED_TY) { + return addUnNode( + gz, ZIR_INST_STRUCT_INIT_EMPTY_RESULT, rl.data, node); + } + if (rl.tag == RL_DISCARD) { + return ZIR_REF_VOID_VALUE; + } + return ZIR_REF_EMPTY_TUPLE; + } + + // Pre-register all field names to match upstream string ordering. + // Upstream has a duplicate name check (AstGen.zig:1756-1806) that + // adds all field names to string_bytes before evaluating values. + for (uint32_t i = 0; i < fields_len; i++) { + uint32_t name_token = firstToken(tree, fields[i]) - 2; + identAsString(ag, name_token); + } + + if (type_expr_node == 0 && fields_len > 0) { + // structInitExprPtr for RL_PTR (AstGen.zig:1843-1846, 1934-1964). + if (rl.tag == RL_PTR) { + uint32_t struct_ptr_inst + = addUnNode(gz, ZIR_INST_OPT_EU_BASE_PTR_INIT, rl.data, node); + // Block payload: body_len = fields_len. + ensureExtraCapacity(ag, 1 + fields_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = fields_len; + uint32_t items_start = ag->extra_len; + ag->extra_len += fields_len; + + for (uint32_t i = 0; i < fields_len; i++) { + uint32_t field_init = fields[i]; + uint32_t name_token = firstToken(tree, field_init) - 2; + uint32_t str_index = identAsString(ag, name_token); + // struct_init_field_ptr (AstGen.zig:1954-1957). + uint32_t field_ptr + = addPlNodeBin(gz, ZIR_INST_STRUCT_INIT_FIELD_PTR, + field_init, struct_ptr_inst, str_index); + ag->extra[items_start + i] + = field_ptr - ZIR_REF_START_INDEX; // .toIndex() + // Evaluate init with ptr RL (AstGen.zig:1960). + ResultLoc ptr_rl = { .tag = RL_PTR, + .data = field_ptr, + .src_node = 0, + .ctx = rl.ctx }; + exprRl(gz, scope, ptr_rl, field_init); + } + addPlNodePayloadIndex( + gz, ZIR_INST_VALIDATE_PTR_STRUCT_INIT, node, payload_index); + return ZIR_REF_VOID_VALUE; + } + // Anonymous struct init with RL type (AstGen.zig:1706-1731). + if (rl.tag == RL_TY || rl.tag == RL_COERCED_TY) { + uint32_t ty_inst = rl.data; + // validate_struct_init_result_ty (AstGen.zig:1840). + addUnNode( + gz, ZIR_INST_VALIDATE_STRUCT_INIT_RESULT_TY, ty_inst, node); + // structInitExprTyped (AstGen.zig:1896-1931). + ensureExtraCapacity(ag, 3 + fields_len * 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = node; + ag->extra[ag->extra_len++] = ag->source_line; + ag->extra[ag->extra_len++] = fields_len; + uint32_t items_start = ag->extra_len; + ag->extra_len += fields_len * 2; + for (uint32_t i = 0; i < fields_len; i++) { + uint32_t field_init = fields[i]; + uint32_t name_token = firstToken(tree, field_init) - 2; + uint32_t str_index = identAsString(ag, name_token); + uint32_t field_ty_inst + = addPlNodeBin(gz, ZIR_INST_STRUCT_INIT_FIELD_TYPE, + field_init, ty_inst, str_index); + ResultLoc elem_rl = { + .tag = RL_COERCED_TY, .data = field_ty_inst, .src_node = 0 + }; + uint32_t init_ref = exprRl(gz, scope, elem_rl, field_init); + ag->extra[items_start + i * 2] + = field_ty_inst - ZIR_REF_START_INDEX; + ag->extra[items_start + i * 2 + 1] = init_ref; + } + return addPlNodePayloadIndex( + gz, ZIR_INST_STRUCT_INIT, node, payload_index); + } + // Anonymous struct init without RL type (AstGen.zig:1864). + // StructInitAnon payload: abs_node, abs_line, fields_len. + ensureExtraCapacity(ag, 3 + fields_len * 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = node; // abs_node + ag->extra[ag->extra_len++] = ag->source_line; // abs_line + ag->extra[ag->extra_len++] = fields_len; + // Reserve space for field entries. + uint32_t items_start = ag->extra_len; + ag->extra_len += fields_len * 2; + + for (uint32_t i = 0; i < fields_len; i++) { + uint32_t field_init = fields[i]; + // field name is 2 tokens before the field init's first token. + uint32_t name_token = firstToken(tree, field_init) - 2; + uint32_t str_index = identAsString(ag, name_token); + uint32_t init_ref = expr(gz, scope, field_init); + ag->extra[items_start + i * 2] = str_index; + ag->extra[items_start + i * 2 + 1] = init_ref; + } + + return addPlNodePayloadIndex( + gz, ZIR_INST_STRUCT_INIT_ANON, node, payload_index); + } + + // Typed init: evaluate type, emit struct_init_empty or struct_init. + if (type_expr_node != 0 && fields_len == 0) { + // Check for [_]T{} pattern (AstGen.zig:1707-1753). + AstNodeTag type_tag = tree->nodes.tags[type_expr_node]; + if (type_tag == AST_NODE_ARRAY_TYPE + || type_tag == AST_NODE_ARRAY_TYPE_SENTINEL) { + AstData type_nd = tree->nodes.datas[type_expr_node]; + uint32_t elem_count_node = type_nd.lhs; + if (tree->nodes.tags[elem_count_node] == AST_NODE_IDENTIFIER + && isUnderscoreIdent(tree, elem_count_node)) { + // Inferred length with 0 fields → length 0. + if (type_tag == AST_NODE_ARRAY_TYPE) { + uint32_t elem_type = typeExpr(gz, scope, type_nd.rhs); + uint32_t array_type_inst + = addPlNodeBin(gz, ZIR_INST_ARRAY_TYPE, type_expr_node, + ZIR_REF_ZERO_USIZE, elem_type); + return rvalue(gz, rl, + addUnNode(gz, ZIR_INST_STRUCT_INIT_EMPTY, + array_type_inst, node), + node); + } + // ARRAY_TYPE_SENTINEL: extra[rhs] = sentinel, extra[rhs+1] + // = elem_type + uint32_t sentinel_node = tree->extra_data.arr[type_nd.rhs]; + uint32_t elem_type_node + = tree->extra_data.arr[type_nd.rhs + 1]; + uint32_t elem_type = typeExpr(gz, scope, elem_type_node); + ResultLoc sent_rl = { .tag = RL_COERCED_TY, + .data = elem_type, + .src_node = 0, + .ctx = RI_CTX_NONE }; + uint32_t sentinel = comptimeExpr(gz, scope, sent_rl, + sentinel_node, COMPTIME_REASON_ARRAY_SENTINEL); + uint32_t array_type_inst = addPlNodeTriple(gz, + ZIR_INST_ARRAY_TYPE_SENTINEL, type_expr_node, + ZIR_REF_ZERO_USIZE, elem_type, sentinel); + return rvalue(gz, rl, + addUnNode( + gz, ZIR_INST_STRUCT_INIT_EMPTY, array_type_inst, node), + node); + } + } + uint32_t ty_inst = typeExpr(gz, scope, type_expr_node); + return rvalue(gz, rl, + addUnNode(gz, ZIR_INST_STRUCT_INIT_EMPTY, ty_inst, node), node); + } + + // Typed struct init with fields (AstGen.zig:1808-1818). + if (type_expr_node != 0 && fields_len > 0) { + uint32_t ty_inst = typeExpr(gz, scope, type_expr_node); + addUnNode(gz, ZIR_INST_VALIDATE_STRUCT_INIT_TY, ty_inst, node); + + // structInitExprTyped (AstGen.zig:1896-1931). + // StructInit payload: abs_node, abs_line, fields_len. + ensureExtraCapacity(ag, 3 + fields_len * 2); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = node; // abs_node + ag->extra[ag->extra_len++] = ag->source_line; // abs_line + ag->extra[ag->extra_len++] = fields_len; + // Reserve space for field items (field_type + init each). + uint32_t items_start = ag->extra_len; + ag->extra_len += fields_len * 2; + + for (uint32_t i = 0; i < fields_len; i++) { + uint32_t field_init = fields[i]; + uint32_t name_token = firstToken(tree, field_init) - 2; + uint32_t str_index = identAsString(ag, name_token); + // struct_init_field_type (AstGen.zig:1918-1921). + uint32_t field_ty_inst + = addPlNodeBin(gz, ZIR_INST_STRUCT_INIT_FIELD_TYPE, field_init, + ty_inst, str_index); + // Evaluate init with coerced_ty (AstGen.zig:1924). + ResultLoc elem_rl = { .tag = RL_COERCED_TY, + .data = field_ty_inst, + .src_node = 0, + .ctx = rl.ctx }; + uint32_t init_ref = exprRl(gz, scope, elem_rl, field_init); + ag->extra[items_start + i * 2] + = field_ty_inst - ZIR_REF_START_INDEX; // .toIndex() + ag->extra[items_start + i * 2 + 1] = init_ref; + } + + bool is_ref = (RL_IS_REF(rl)); + ZirInstTag init_tag + = is_ref ? ZIR_INST_STRUCT_INIT_REF : ZIR_INST_STRUCT_INIT; + return addPlNodePayloadIndex(gz, init_tag, node, payload_index); + } + + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; +} + +// --- tryExpr (AstGen.zig:5957) --- +static uint32_t tryExpr(GenZir* gz, Scope* scope, uint32_t node) { + AstGenCtx* ag = gz->astgen; + AstData nd = ag->tree->nodes.datas[node]; + uint32_t operand_node = nd.lhs; + + if (!gz->is_comptime) { + emitDbgNode(gz, node); + } + uint32_t try_lc_line = ag->source_line - gz->decl_line; + uint32_t try_lc_column = ag->source_column; + + // Evaluate operand (AstGen.zig:5993-6001). + ResultLoc operand_rl = RL_NONE_VAL; + operand_rl.ctx = RI_CTX_ERROR_HANDLING_EXPR; + uint32_t operand = exprRl(gz, scope, operand_rl, operand_node); + + // Create try block instruction (AstGen.zig:6007). + uint32_t try_inst = makeBlockInst(ag, ZIR_INST_TRY, gz, node); + gzAppendInstruction(gz, try_inst); + + // Else scope: extract error code, return it (AstGen.zig:6012-6025). + GenZir else_scope = makeSubBlock(gz, scope); + + uint32_t err_code + = addUnNode(&else_scope, ZIR_INST_ERR_UNION_CODE, operand, node); + + // Emit defers for error path (AstGen.zig:6019). + if (ag->fn_block != NULL) { + const Scope* fn_block_scope = &((GenZir*)ag->fn_block)->base; + genDefers(&else_scope, fn_block_scope, scope, DEFER_BOTH_SANS_ERR); + } + + // Emit dbg_stmt at try keyword for error return tracing (AstGen.zig:6020). + emitDbgStmt(&else_scope, try_lc_line, try_lc_column); + + // ret_node with error code (AstGen.zig:6021). + addUnNode(&else_scope, ZIR_INST_RET_NODE, err_code, node); + + setTryBody(ag, &else_scope, try_inst, operand); + // else_scope unstacked by setTryBody. + + return try_inst + ZIR_REF_START_INDEX; // toRef() +} + +// --- boolBinOp (AstGen.zig:6274) --- +// Short-circuiting boolean and/or. + +static uint32_t boolBinOp( + GenZir* gz, Scope* scope, uint32_t node, ZirInstTag zir_tag) { + AstGenCtx* ag = gz->astgen; + AstData nd = ag->tree->nodes.datas[node]; + uint32_t lhs_node = nd.lhs; + uint32_t rhs_node = nd.rhs; + + // Evaluate LHS (AstGen.zig:6285). + uint32_t lhs = expr(gz, scope, lhs_node); + + // Reserve the bool_br instruction (payload set later) + // (AstGen.zig:6286). + uint32_t bool_br = reserveInstructionIndex(ag); + gzAppendInstruction(gz, bool_br); + + // Evaluate RHS in sub-block (AstGen.zig:6288-6293). + GenZir rhs_scope = makeSubBlock(gz, scope); + uint32_t rhs = expr(&rhs_scope, &rhs_scope.base, rhs_node); + + if (!ag->has_compile_errors) { + // break_inline from rhs to bool_br (AstGen.zig:6292). + makeBreakInline(&rhs_scope, bool_br, rhs, + (int32_t)rhs_node - (int32_t)rhs_scope.decl_node_index); + } + + // setBoolBrBody (AstGen.zig:6294, 11929-11944). + uint32_t raw_body_len = gzInstructionsLen(&rhs_scope); + const uint32_t* body = gzInstructionsSlice(&rhs_scope); + uint32_t body_len = countBodyLenAfterFixups(ag, body, raw_body_len); + ensureExtraCapacity(ag, 2 + body_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = lhs; // BoolBr.lhs + ag->extra[ag->extra_len++] = body_len; // BoolBr.body_len + for (uint32_t i = 0; i < raw_body_len; i++) + appendPossiblyRefdBodyInst(ag, body[i]); + gzUnstack(&rhs_scope); + + // Fill in the bool_br instruction. + ag->inst_tags[bool_br] = zir_tag; + ag->inst_datas[bool_br].pl_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + ag->inst_datas[bool_br].pl_node.payload_index = payload_index; + + return bool_br + ZIR_REF_START_INDEX; +} + +// Mirrors expr (AstGen.zig:634) — main expression dispatcher. +static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + if (node == 0) { + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + AstNodeTag tag = ag->tree->nodes.tags[node]; + AstData nd = ag->tree->nodes.datas[node]; + + switch (tag) { + case AST_NODE_NUMBER_LITERAL: + return rvalue(gz, rl, numberLiteral(gz, node), node); + case AST_NODE_BUILTIN_CALL_TWO: + case AST_NODE_BUILTIN_CALL_TWO_COMMA: + return rvalue(gz, rl, builtinCall(gz, scope, rl, node), node); + case AST_NODE_FIELD_ACCESS: + return fieldAccessExpr(gz, scope, rl, node); + case AST_NODE_IDENTIFIER: + return identifierExpr(gz, scope, rl, node); + case AST_NODE_STRING_LITERAL: { + // Mirrors stringLiteral (AstGen.zig:8626). + uint32_t str_lit_token = ag->tree->nodes.main_tokens[node]; + uint32_t str_index, str_len; + strLitAsString(ag, str_lit_token, &str_index, &str_len); + ZirInstData data; + data.str.start = str_index; + data.str.len = str_len; + uint32_t str_result = addInstruction(gz, ZIR_INST_STR, data); + return rvalue(gz, rl, str_result, node); + } + // address_of (AstGen.zig:953-960): evaluate operand with .ref rl. + case AST_NODE_ADDRESS_OF: { + uint32_t operand_node = ag->tree->nodes.datas[node].lhs; + // Check for result type to emit validate_ref_ty (AstGen.zig:954-956). + uint32_t res_ty = rlResultType(gz, rl, node); + ResultLoc operand_rl; + if (res_ty != 0) { + addUnTok(gz, ZIR_INST_VALIDATE_REF_TY, res_ty, + firstToken(ag->tree, node)); + // Pass ref_coerced_ty so init expressions can use the type + // (AstGen.zig:958). + operand_rl = (ResultLoc) { + .tag = RL_REF_COERCED_TY, .data = res_ty, .src_node = 0 + }; + } else { + operand_rl = RL_REF_VAL; + } + uint32_t result = exprRl(gz, scope, operand_rl, operand_node); + return rvalue(gz, rl, result, node); + } + // ptr_type (AstGen.zig:1077-1081). + case AST_NODE_PTR_TYPE_ALIGNED: + case AST_NODE_PTR_TYPE_SENTINEL: + case AST_NODE_PTR_TYPE: + case AST_NODE_PTR_TYPE_BIT_RANGE: + return rvalue(gz, rl, ptrTypeExpr(gz, scope, node), node); + // array_type (AstGen.zig:940). + case AST_NODE_ARRAY_TYPE: + return rvalue(gz, rl, arrayTypeExpr(gz, scope, node), node); + // array_init variants (AstGen.zig:836-856). + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: + return arrayInitExpr(gz, scope, rl, node); + // array_cat (AstGen.zig:772): ++ binary operator. + case AST_NODE_ARRAY_CAT: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_ARRAY_CAT), node); + // grouped_expression (AstGen.zig:1100): passthrough. + case AST_NODE_GROUPED_EXPRESSION: + return exprRl(gz, scope, rl, ag->tree->nodes.datas[node].lhs); + // unreachable_literal (AstGen.zig:846-854). + case AST_NODE_UNREACHABLE_LITERAL: { + emitDbgNode(gz, node); + ZirInstData udata; + memset(&udata, 0, sizeof(udata)); + udata.unreachable_data.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction(gz, ZIR_INST_UNREACHABLE, udata); + return ZIR_REF_UNREACHABLE_VALUE; + } + // enum_literal (AstGen.zig:993). + case AST_NODE_ENUM_LITERAL: { + uint32_t ident_token = ag->tree->nodes.main_tokens[node]; + uint32_t str_index = identAsString(ag, ident_token); + // If result type available, emit decl_literal (AstGen.zig:993-1003). + uint32_t res_ty = rlResultType(gz, rl, node); + if (res_ty != 0) { + uint32_t res = addPlNodeBin( + gz, ZIR_INST_DECL_LITERAL, node, res_ty, str_index); + // decl_literal does the coercion for us (AstGen.zig:1001). + // Only need rvalue for ptr/inferred_ptr/ref_coerced_ty. + if (rl.tag == RL_TY || rl.tag == RL_COERCED_TY) + return res; + return rvalue(gz, rl, res, node); + } + return rvalue(gz, rl, + addStrTok(gz, ZIR_INST_ENUM_LITERAL, str_index, ident_token), + node); + } + // multiline_string_literal (AstGen.zig:8645). + case AST_NODE_MULTILINE_STRING_LITERAL: + return rvalue(gz, rl, multilineStringLiteral(gz, scope, node), node); + // return (AstGen.zig:856). + case AST_NODE_RETURN: + return retExpr(gz, scope, node); + // call (AstGen.zig:783-790). + case AST_NODE_CALL_ONE: + case AST_NODE_CALL_ONE_COMMA: + case AST_NODE_CALL: + case AST_NODE_CALL_COMMA: + return rvalue(gz, rl, callExpr(gz, scope, rl, node), node); + // struct_init (AstGen.zig:836-839). + case AST_NODE_STRUCT_INIT_DOT_TWO: + case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA: + case AST_NODE_STRUCT_INIT_DOT: + case AST_NODE_STRUCT_INIT_DOT_COMMA: + case AST_NODE_STRUCT_INIT_ONE: + case AST_NODE_STRUCT_INIT_ONE_COMMA: + case AST_NODE_STRUCT_INIT: + case AST_NODE_STRUCT_INIT_COMMA: + return structInitExpr(gz, scope, rl, node); + // container_decl (AstGen.zig:1083-1098). + case AST_NODE_CONTAINER_DECL: + case AST_NODE_CONTAINER_DECL_TRAILING: + case AST_NODE_CONTAINER_DECL_TWO: + case AST_NODE_CONTAINER_DECL_TWO_TRAILING: + case AST_NODE_CONTAINER_DECL_ARG: + case AST_NODE_CONTAINER_DECL_ARG_TRAILING: + case AST_NODE_TAGGED_UNION: + case AST_NODE_TAGGED_UNION_TRAILING: + case AST_NODE_TAGGED_UNION_TWO: + case AST_NODE_TAGGED_UNION_TWO_TRAILING: + case AST_NODE_TAGGED_UNION_ENUM_TAG: + case AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING: + return rvalue(gz, rl, containerDecl(gz, scope, node), node); + // try (AstGen.zig:831). + case AST_NODE_TRY: + return rvalue(gz, rl, tryExpr(gz, scope, node), node); + // Comparison operators (AstGen.zig:714-726). + case AST_NODE_EQUAL_EQUAL: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_CMP_EQ), node); + case AST_NODE_BANG_EQUAL: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_CMP_NEQ), node); + case AST_NODE_LESS_THAN: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_CMP_LT), node); + case AST_NODE_GREATER_THAN: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_CMP_GT), node); + case AST_NODE_LESS_OR_EQUAL: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_CMP_LTE), node); + case AST_NODE_GREATER_OR_EQUAL: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_CMP_GTE), node); + // Arithmetic (AstGen.zig:656-698). + case AST_NODE_ADD: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_ADD), node); + case AST_NODE_SUB: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_SUB), node); + case AST_NODE_MUL: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_MUL), node); + case AST_NODE_DIV: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_DIV), node); + case AST_NODE_MOD: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_MOD), node); + // Bitwise (AstGen.zig:700-712). + case AST_NODE_BIT_AND: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_BIT_AND), node); + case AST_NODE_BIT_OR: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_BIT_OR), node); + case AST_NODE_BIT_XOR: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_XOR), node); + case AST_NODE_SHL: + return rvalue(gz, rl, shiftOp(gz, scope, node, ZIR_INST_SHL), node); + case AST_NODE_SHR: + return rvalue(gz, rl, shiftOp(gz, scope, node, ZIR_INST_SHR), node); + // Boolean operators (AstGen.zig:728-731) — special: boolBinOp. + case AST_NODE_BOOL_AND: + return rvalue( + gz, rl, boolBinOp(gz, scope, node, ZIR_INST_BOOL_BR_AND), node); + case AST_NODE_BOOL_OR: + return rvalue( + gz, rl, boolBinOp(gz, scope, node, ZIR_INST_BOOL_BR_OR), node); + // Unary operators (AstGen.zig:919-938). + case AST_NODE_BOOL_NOT: + return rvalue(gz, rl, + addUnNode(gz, ZIR_INST_BOOL_NOT, expr(gz, scope, nd.lhs), node), + node); + case AST_NODE_BIT_NOT: + return rvalue(gz, rl, + addUnNode(gz, ZIR_INST_BIT_NOT, expr(gz, scope, nd.lhs), node), + node); + case AST_NODE_NEGATION: + return rvalue(gz, rl, + addUnNode(gz, ZIR_INST_NEGATE, expr(gz, scope, nd.lhs), node), + node); + case AST_NODE_NEGATION_WRAP: + return rvalue(gz, rl, + addUnNode(gz, ZIR_INST_NEGATE_WRAP, expr(gz, scope, nd.lhs), node), + node); + // deref (AstGen.zig:942-951). + case AST_NODE_DEREF: { + uint32_t lhs = expr(gz, scope, nd.lhs); + addUnNode(gz, ZIR_INST_VALIDATE_DEREF, lhs, node); + if (RL_IS_REF(rl)) + return lhs; + return rvalue(gz, rl, addUnNode(gz, ZIR_INST_LOAD, lhs, node), node); + } + // optional_type (AstGen.zig:961-964). + case AST_NODE_OPTIONAL_TYPE: + return rvalue(gz, rl, + addUnNode( + gz, ZIR_INST_OPTIONAL_TYPE, typeExpr(gz, scope, nd.lhs), node), + node); + // unwrap_optional (AstGen.zig:966-985). + case AST_NODE_UNWRAP_OPTIONAL: { + uint32_t lhs = expr(gz, scope, nd.lhs); + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + emitDbgStmt(gz, saved_line, saved_col); + return rvalue(gz, rl, + addUnNode(gz, ZIR_INST_OPTIONAL_PAYLOAD_SAFE, lhs, node), node); + } + // error_union type (AstGen.zig:788-797). + case AST_NODE_ERROR_UNION: { + uint32_t lhs = typeExpr(gz, scope, nd.lhs); + uint32_t rhs = typeExpr(gz, scope, nd.rhs); + return rvalue(gz, rl, + addPlNodeBin(gz, ZIR_INST_ERROR_UNION_TYPE, node, lhs, rhs), node); + } + // char_literal (AstGen.zig:8662-8675). + case AST_NODE_CHAR_LITERAL: { + uint32_t main_tok = ag->tree->nodes.main_tokens[node]; + uint32_t tok_start = ag->tree->tokens.starts[main_tok]; + const char* src = ag->tree->source; + uint32_t ci = tok_start + 1; // skip opening quote + uint64_t char_val; + if (src[ci] == '\\') { + // Escape sequence (AstGen.zig:8668-8675). + ci++; + switch (src[ci]) { + case 'n': + char_val = '\n'; + break; + case 'r': + char_val = '\r'; + break; + case 't': + char_val = '\t'; + break; + case '\\': + char_val = '\\'; + break; + case '\'': + char_val = '\''; + break; + case '"': + char_val = '"'; + break; + case 'x': { + // \xNN hex escape. + uint8_t val = 0; + for (int k = 0; k < 2; k++) { + ci++; + char c = src[ci]; + if (c >= '0' && c <= '9') + val = (uint8_t)(val * 16 + (uint8_t)(c - '0')); + else if (c >= 'a' && c <= 'f') + val = (uint8_t)(val * 16 + 10 + (uint8_t)(c - 'a')); + else if (c >= 'A' && c <= 'F') + val = (uint8_t)(val * 16 + 10 + (uint8_t)(c - 'A')); + } + char_val = val; + break; + } + default: + char_val = (uint8_t)src[ci]; + break; + } + } else { + char_val = (uint64_t)(uint8_t)src[ci]; + } + return rvalue(gz, rl, addInt(gz, char_val), node); + } + // arrayAccess (AstGen.zig:6192-6221). + case AST_NODE_ARRAY_ACCESS: { + if (RL_IS_REF(rl)) { + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs); + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t rhs = expr(gz, scope, nd.rhs); + emitDbgStmt(gz, saved_line, saved_col); + return addPlNodeBin(gz, ZIR_INST_ELEM_PTR_NODE, node, lhs, rhs); + } + uint32_t lhs = expr(gz, scope, nd.lhs); + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t rhs = expr(gz, scope, nd.rhs); + emitDbgStmt(gz, saved_line, saved_col); + return rvalue(gz, rl, + addPlNodeBin(gz, ZIR_INST_ELEM_VAL_NODE, node, lhs, rhs), node); + } + // slice (AstGen.zig:882-939). + case AST_NODE_SLICE_OPEN: { + // (AstGen.zig:908-937). + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs); + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + ResultLoc usize_rl = { .tag = RL_COERCED_TY, + .data = ZIR_REF_USIZE_TYPE, + .src_node = 0, + .ctx = RI_CTX_NONE }; + uint32_t start = exprRl(gz, scope, usize_rl, nd.rhs); + emitDbgStmt(gz, saved_line, saved_col); + return rvalue(gz, rl, + addPlNodeBin(gz, ZIR_INST_SLICE_START, node, lhs, start), node); + } + case AST_NODE_SLICE: { + // Slice[rhs]: { start, end } (AstGen.zig:908-937). + const Ast* stree = ag->tree; + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs); + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t start_node = stree->extra_data.arr[nd.rhs]; + uint32_t end_node = stree->extra_data.arr[nd.rhs + 1]; + ResultLoc usize_rl = { .tag = RL_COERCED_TY, + .data = ZIR_REF_USIZE_TYPE, + .src_node = 0, + .ctx = RI_CTX_NONE }; + uint32_t start_ref = exprRl(gz, scope, usize_rl, start_node); + uint32_t end_ref = exprRl(gz, scope, usize_rl, end_node); + emitDbgStmt(gz, saved_line, saved_col); + ensureExtraCapacity(ag, 3); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = lhs; + ag->extra[ag->extra_len++] = start_ref; + ag->extra[ag->extra_len++] = end_ref; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return rvalue( + gz, rl, addInstruction(gz, ZIR_INST_SLICE_END, data), node); + } + case AST_NODE_SLICE_SENTINEL: { + // SliceSentinel[rhs]: { start, end, sentinel } + // (AstGen.zig:908-925). + const Ast* stree = ag->tree; + uint32_t lhs = exprRl(gz, scope, RL_REF_VAL, nd.lhs); + advanceSourceCursorToMainToken(ag, gz, node); + uint32_t saved_line = ag->source_line - gz->decl_line; + uint32_t saved_col = ag->source_column; + uint32_t start_node = stree->extra_data.arr[nd.rhs]; + uint32_t end_node = stree->extra_data.arr[nd.rhs + 1]; + uint32_t sentinel_node = stree->extra_data.arr[nd.rhs + 2]; + // start/end coerced to usize (AstGen.zig:911-912). + ResultLoc usize_rl = { .tag = RL_COERCED_TY, + .data = ZIR_REF_USIZE_TYPE, + .src_node = 0, + .ctx = RI_CTX_NONE }; + uint32_t start_ref = exprRl(gz, scope, usize_rl, start_node); + uint32_t end_ref = (end_node != 0) + ? exprRl(gz, scope, usize_rl, end_node) + : ZIR_REF_NONE; + // sentinel: create slice_sentinel_ty and coerce (AstGen.zig:913-916). + uint32_t sentinel_ty + = addUnNode(gz, ZIR_INST_SLICE_SENTINEL_TY, lhs, node); + ResultLoc sent_rl = { .tag = RL_COERCED_TY, + .data = sentinel_ty, + .src_node = 0, + .ctx = RI_CTX_NONE }; + uint32_t sentinel_ref = exprRl(gz, scope, sent_rl, sentinel_node); + emitDbgStmt(gz, saved_line, saved_col); + ensureExtraCapacity(ag, 4); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = lhs; + ag->extra[ag->extra_len++] = start_ref; + ag->extra[ag->extra_len++] = end_ref; + ag->extra[ag->extra_len++] = sentinel_ref; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return rvalue( + gz, rl, addInstruction(gz, ZIR_INST_SLICE_SENTINEL, data), node); + } + // orelse (AstGen.zig:6031-6142). + case AST_NODE_ORELSE: + return orelseCatchExpr(gz, scope, rl, node, false); + // catch (AstGen.zig:6031-6142). + case AST_NODE_CATCH: + return orelseCatchExpr(gz, scope, rl, node, true); + // Block expressions (AstGen.zig:984-992). + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: + return rvalue(gz, rl, blockExprExpr(gz, scope, rl, node), node); + // Anonymous array init (AstGen.zig:1119-1127). + case AST_NODE_ARRAY_INIT_DOT_TWO: + case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA: + case AST_NODE_ARRAY_INIT_DOT: + case AST_NODE_ARRAY_INIT_DOT_COMMA: + return arrayInitDotExpr(gz, scope, rl, node); + // if (AstGen.zig:1013-1024). + case AST_NODE_IF_SIMPLE: + case AST_NODE_IF: + return ifExpr(gz, scope, rlBr(rl), node); + // for (AstGen.zig:1043-1060). + case AST_NODE_FOR_SIMPLE: + case AST_NODE_FOR: + return rvalue(gz, rl, forExpr(gz, scope, node, false), node); + // Merge error sets (AstGen.zig:788-797). + case AST_NODE_MERGE_ERROR_SETS: { + uint32_t lhs = typeExpr(gz, scope, nd.lhs); + uint32_t rhs = typeExpr(gz, scope, nd.rhs); + return rvalue(gz, rl, + addPlNodeBin(gz, ZIR_INST_MERGE_ERROR_SETS, node, lhs, rhs), node); + } + // Wrapping arithmetic. + case AST_NODE_ADD_WRAP: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_ADDWRAP), node); + case AST_NODE_SUB_WRAP: + return rvalue( + gz, rl, simpleBinOp(gz, scope, node, ZIR_INST_SUBWRAP), node); + // break (AstGen.zig:2150-2237). + case AST_NODE_BREAK: { + uint32_t opt_break_label = nd.lhs; // UINT32_MAX = none + uint32_t opt_rhs = nd.rhs; // 0 = none + + // Walk scope chain to find target block (AstGen.zig:2157-2187). + for (Scope* s = scope; s != NULL;) { + if (s->tag == SCOPE_GEN_ZIR) { + GenZir* block_gz = (GenZir*)s; + uint32_t block_inst = UINT32_MAX; + if (opt_break_label != UINT32_MAX) { + // Labeled break: check label on GenZir. + // Use direct source text comparison, not identAsString, + // to avoid adding label names to string_bytes + // (AstGen.zig:2176 uses tokenIdentEql). + if (block_gz->label_token != UINT32_MAX + && tokenIdentEql(ag->tree, opt_break_label, + block_gz->label_token)) { + block_inst = block_gz->label_block_inst; + } + } else { + // Unlabeled break: check break_block. + if (block_gz->break_block != UINT32_MAX) + block_inst = block_gz->break_block; + } + if (block_inst != UINT32_MAX) { + // Found target (AstGen.zig:2188-2228). + ZirInstTag break_tag = block_gz->is_inline + ? ZIR_INST_BREAK_INLINE + : ZIR_INST_BREAK; + if (opt_rhs == 0) { + // Void break (AstGen.zig:2195-2206). + rvalue(gz, block_gz->break_result_info, + ZIR_REF_VOID_VALUE, node); + genDefers(gz, s, scope, DEFER_NORMAL_ONLY); + if (!block_gz->is_comptime) { + ZirInstData rdata; + rdata.un_node.operand + = block_inst + ZIR_REF_START_INDEX; + rdata.un_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction(gz, + ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, + rdata); + } + addBreak(gz, break_tag, block_inst, ZIR_REF_VOID_VALUE, + AST_NODE_OFFSET_NONE); + } else { + // Value break (AstGen.zig:2208-2228). + uint32_t operand = exprRl( + gz, scope, block_gz->break_result_info, opt_rhs); + genDefers(gz, s, scope, DEFER_NORMAL_ONLY); + if (!block_gz->is_comptime) + restoreErrRetIndex(gz, block_inst, + block_gz->break_result_info, opt_rhs, operand); + switch (block_gz->break_result_info.tag) { + case RL_PTR: + case RL_DISCARD: + addBreak(gz, break_tag, block_inst, + ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); + break; + default: + addBreak(gz, break_tag, block_inst, operand, + (int32_t)opt_rhs + - (int32_t)gz->decl_node_index); + break; + } + } + return ZIR_REF_UNREACHABLE_VALUE; + } + s = block_gz->parent; + } else if (s->tag == SCOPE_LOCAL_VAL) { + s = ((ScopeLocalVal*)s)->parent; + } else if (s->tag == SCOPE_LOCAL_PTR) { + s = ((ScopeLocalPtr*)s)->parent; + } else if (s->tag == SCOPE_DEFER_NORMAL + || s->tag == SCOPE_DEFER_ERROR) { + s = ((ScopeDefer*)s)->parent; + } else if (s->tag == SCOPE_LABEL) { + s = ((ScopeLabel*)s)->parent; + } else { + break; + } + } + SET_ERROR(ag); + return ZIR_REF_UNREACHABLE_VALUE; + } + // continue (AstGen.zig:2246-2340). + case AST_NODE_CONTINUE: { + // Walk scope chain to find GenZir with continue_block. + for (Scope* s = scope; s != NULL;) { + if (s->tag == SCOPE_GEN_ZIR) { + GenZir* gz2 = (GenZir*)s; + if (gz2->continue_block != UINT32_MAX) { + genDefers(gz, s, scope, DEFER_NORMAL_ONLY); + ZirInstTag break_tag = gz2->is_inline + ? ZIR_INST_BREAK_INLINE + : ZIR_INST_BREAK; + if (break_tag == ZIR_INST_BREAK_INLINE) { + // AstGen.zig:2328-2330. + addUnNode(gz, ZIR_INST_CHECK_COMPTIME_CONTROL_FLOW, + gz2->continue_block + ZIR_REF_START_INDEX, node); + } + // Restore error return index (AstGen.zig:2333-2334). + if (!gz2->is_comptime) { + ZirInstData rdata; + rdata.un_node.operand + = gz2->continue_block + ZIR_REF_START_INDEX; + rdata.un_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction(gz, + ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, + rdata); + } + addBreak(gz, break_tag, gz2->continue_block, + ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); + return ZIR_REF_UNREACHABLE_VALUE; + } + s = gz2->parent; + } else if (s->tag == SCOPE_LOCAL_VAL) { + s = ((ScopeLocalVal*)s)->parent; + } else if (s->tag == SCOPE_LOCAL_PTR) { + s = ((ScopeLocalPtr*)s)->parent; + } else if (s->tag == SCOPE_DEFER_NORMAL + || s->tag == SCOPE_DEFER_ERROR) { + s = ((ScopeDefer*)s)->parent; + } else if (s->tag == SCOPE_LABEL) { + s = ((ScopeLabel*)s)->parent; + } else { + break; + } + } + SET_ERROR(ag); + return ZIR_REF_UNREACHABLE_VALUE; + } + // comptime (AstGen.zig:1104-1105). + case AST_NODE_COMPTIME: { + // comptimeExprAst / comptimeExpr2 (AstGen.zig:2104, 1982). + uint32_t body_node = nd.lhs; + + // If already comptime, just pass through (AstGen.zig:1990-1992). + if (gz->is_comptime) + return exprRl(gz, scope, rl, body_node); + + // Create comptime block (AstGen.zig:2078-2098). + uint32_t block_inst + = makeBlockInst(ag, ZIR_INST_BLOCK_COMPTIME, gz, node); + GenZir block_scope = makeSubBlock(gz, scope); + block_scope.is_comptime = true; + + // Transform RL to type-only (AstGen.zig:2084-2090). + // Runtime-to-comptime boundary: can't pass runtime pointers. + ResultLoc ty_only_rl; + uint32_t res_ty = rlResultType(gz, rl, node); + if (res_ty != 0) + ty_only_rl = (ResultLoc) { .tag = RL_COERCED_TY, + .data = res_ty, + .src_node = 0, + .ctx = rl.ctx }; + else + ty_only_rl = (ResultLoc) { + .tag = RL_NONE, .data = 0, .src_node = 0, .ctx = rl.ctx + }; + + uint32_t result = exprRl(&block_scope, scope, ty_only_rl, body_node); + addBreak(&block_scope, ZIR_INST_BREAK_INLINE, block_inst, result, + AST_NODE_OFFSET_NONE); + setBlockComptimeBody( + ag, &block_scope, block_inst, COMPTIME_REASON_COMPTIME_KEYWORD); + gzAppendInstruction(gz, block_inst); + + // Apply rvalue to handle RL_PTR etc (AstGen.zig:2098). + return rvalue(gz, rl, block_inst + ZIR_REF_START_INDEX, node); + } + // switch (AstGen.zig:1072-1078). + case AST_NODE_SWITCH: + case AST_NODE_SWITCH_COMMA: + return switchExpr(gz, scope, rlBr(rl), node); + // while (AstGen.zig:1037-1042). + case AST_NODE_WHILE_SIMPLE: + case AST_NODE_WHILE_CONT: + case AST_NODE_WHILE: + return rvalue(gz, rl, whileExpr(gz, scope, node, false), node); + // error_value (AstGen.zig:1005-1010). + case AST_NODE_ERROR_VALUE: { + uint32_t error_token = nd.rhs; + uint32_t str = identAsString(ag, error_token); + return rvalue(gz, rl, + addStrTok(gz, ZIR_INST_ERROR_VALUE, str, error_token), node); + } + // error_set_decl (AstGen.zig:5905-5955). + case AST_NODE_ERROR_SET_DECL: { + AstData esd = ag->tree->nodes.datas[node]; + uint32_t lbrace = esd.lhs; + uint32_t rbrace = esd.rhs; + // Reserve 1 extra word for ErrorSetDecl.fields_len. + ensureExtraCapacity(ag, 1 + (rbrace - lbrace)); + uint32_t payload_index = ag->extra_len; + ag->extra_len++; // placeholder for fields_len + uint32_t fields_len = 0; + for (uint32_t tok = lbrace + 1; tok < rbrace; tok++) { + TokenizerTag ttag = ag->tree->tokens.tags[tok]; + if (ttag == TOKEN_DOC_COMMENT || ttag == TOKEN_COMMA) + continue; + if (ttag == TOKEN_IDENTIFIER) { + uint32_t str_index = identAsString(ag, tok); + ensureExtraCapacity(ag, 1); + ag->extra[ag->extra_len++] = str_index; + fields_len++; + } + } + ag->extra[payload_index] = fields_len; + return rvalue(gz, rl, + addPlNodePayloadIndex( + gz, ZIR_INST_ERROR_SET_DECL, node, payload_index), + node); + } + // assign in expr context (AstGen.zig:1011-1014). + case AST_NODE_ASSIGN: + assignStmt(gz, scope, node); + return rvalue(gz, rl, ZIR_REF_VOID_VALUE, node); + // Compound assignment operators (AstGen.zig:685-744). + case AST_NODE_ASSIGN_ADD: + assignOp(gz, scope, node, ZIR_INST_ADD); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_SUB: + assignOp(gz, scope, node, ZIR_INST_SUB); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_MUL: + assignOp(gz, scope, node, ZIR_INST_MUL); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_DIV: + assignOp(gz, scope, node, ZIR_INST_DIV); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_MOD: + assignOp(gz, scope, node, ZIR_INST_MOD_REM); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_BIT_AND: + assignOp(gz, scope, node, ZIR_INST_BIT_AND); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_BIT_OR: + assignOp(gz, scope, node, ZIR_INST_BIT_OR); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_BIT_XOR: + assignOp(gz, scope, node, ZIR_INST_XOR); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_ADD_WRAP: + assignOp(gz, scope, node, ZIR_INST_ADDWRAP); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_SUB_WRAP: + assignOp(gz, scope, node, ZIR_INST_SUBWRAP); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_MUL_WRAP: + assignOp(gz, scope, node, ZIR_INST_MULWRAP); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_ADD_SAT: + assignOp(gz, scope, node, ZIR_INST_ADD_SAT); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_SUB_SAT: + assignOp(gz, scope, node, ZIR_INST_SUB_SAT); + return ZIR_REF_VOID_VALUE; + case AST_NODE_ASSIGN_MUL_SAT: + assignOp(gz, scope, node, ZIR_INST_MUL_SAT); + return ZIR_REF_VOID_VALUE; + default: + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } +} + +static uint32_t expr(GenZir* gz, Scope* scope, uint32_t node) { + return exprRl(gz, scope, RL_NONE_VAL, node); +} + +// --- blockExprExpr (AstGen.zig:2388-2536) --- +// Handles block expressions (labeled and unlabeled). +// Unlabeled blocks just execute statements and return void. +// Labeled blocks (blk: { ... break :blk val; }) need a block instruction. + +static uint32_t blockExprExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + (void)rl; + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Extract statements. + uint32_t stmt_buf[2]; + const uint32_t* statements = NULL; + uint32_t stmt_count = 0; + + switch (tag) { + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: { + uint32_t idx = 0; + if (nd.lhs != 0) + stmt_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + stmt_buf[idx++] = nd.rhs; + statements = stmt_buf; + stmt_count = idx; + break; + } + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + statements = tree->extra_data.arr + start; + stmt_count = end - start; + break; + } + default: + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + + // Check if labeled (AstGen.zig:2397-2402). + // A labeled block has: identifier colon before the lbrace. + uint32_t lbrace = tree->nodes.main_tokens[node]; + bool is_labeled + = (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON + && tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER); + + if (!is_labeled) { + if (!gz->is_comptime) { + // Non-comptime unlabeled block (AstGen.zig:2404-2425). + // Create block_inst FIRST, add to gz, then process body. + uint32_t block_inst = makeBlockInst(ag, ZIR_INST_BLOCK, gz, node); + gzAppendInstruction(gz, block_inst); + + GenZir block_scope = makeSubBlock(gz, scope); + blockExprStmts( + &block_scope, &block_scope.base, statements, stmt_count); + + if (!endsWithNoReturn(&block_scope)) { + // restore_err_ret_index on gz (AstGen.zig:2420). + ZirInstData rdata; + rdata.un_node.operand = block_inst + ZIR_REF_START_INDEX; + rdata.un_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction( + gz, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + // break on block_scope (AstGen.zig:2422). + addBreak(&block_scope, ZIR_INST_BREAK, block_inst, + ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); + } + setBlockBody(ag, &block_scope, block_inst); + } else { + // Comptime unlabeled block: inline statements + // (AstGen.zig:2426-2429). + GenZir sub_gz = makeSubBlock(gz, scope); + blockExprStmts(&sub_gz, &sub_gz.base, statements, stmt_count); + } + return ZIR_REF_VOID_VALUE; + } + + // Labeled block (AstGen.zig:2466-2536). + bool force_comptime = gz->is_comptime; + uint32_t label_token = lbrace - 2; + + // Compute break result info (AstGen.zig:2484-2492). + bool need_rl = nodesNeedRlContains(ag, node); + ResultLoc break_ri = breakResultInfo(gz, rl, node, need_rl); + bool need_result_rvalue = (break_ri.tag != rl.tag); + + // Reserve the block instruction (AstGen.zig:2500-2501). + ZirInstTag block_tag + = force_comptime ? ZIR_INST_BLOCK_COMPTIME : ZIR_INST_BLOCK; + uint32_t block_inst = makeBlockInst(ag, block_tag, gz, node); + gzAppendInstruction(gz, block_inst); + + GenZir block_scope = makeSubBlock(gz, scope); + block_scope.is_inline = force_comptime; // AstGen.zig:2503 + if (force_comptime) + block_scope.is_comptime = true; + // Set label on block_scope (AstGen.zig:2504-2508). + block_scope.label_token = label_token; + block_scope.label_block_inst = block_inst; + block_scope.break_result_info = break_ri; + + // Process statements (AstGen.zig:2512). + blockExprStmts(&block_scope, &block_scope.base, statements, stmt_count); + + if (!endsWithNoReturn(&block_scope)) { + // Emit restore_err_ret_index (AstGen.zig:2515). + if (!force_comptime) { + ZirInstData rdata; + rdata.un_node.operand = block_inst + ZIR_REF_START_INDEX; + rdata.un_node.src_node + = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction( + gz, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + } + // rvalue + break (AstGen.zig:2516-2518). + uint32_t result = rvalue( + gz, block_scope.break_result_info, ZIR_REF_VOID_VALUE, node); + ZirInstTag break_tag + = force_comptime ? ZIR_INST_BREAK_INLINE : ZIR_INST_BREAK; + addBreak( + &block_scope, break_tag, block_inst, result, AST_NODE_OFFSET_NONE); + } + + if (force_comptime) { + setBlockComptimeBody( + ag, &block_scope, block_inst, COMPTIME_REASON_COMPTIME_KEYWORD); + } else { + setBlockBody(ag, &block_scope, block_inst); + } + + // AstGen.zig:2531-2534. + if (need_result_rvalue) + return rvalue(gz, rl, block_inst + ZIR_REF_START_INDEX, node); + return block_inst + ZIR_REF_START_INDEX; +} + +// --- arrayInitDotExpr (AstGen.zig:1576-1595) --- +// Handles anonymous array init: `.{a, b, c}`. +// Emits array_init_anon instruction with MultiOp payload. + +static uint32_t arrayInitDotExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Extract elements. + uint32_t elem_buf[2]; + const uint32_t* elements = NULL; + uint32_t elem_count = 0; + + switch (tag) { + case AST_NODE_ARRAY_INIT_DOT_TWO: + case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA: { + uint32_t idx = 0; + if (nd.lhs != 0) + elem_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + elem_buf[idx++] = nd.rhs; + elements = elem_buf; + elem_count = idx; + break; + } + case AST_NODE_ARRAY_INIT_DOT: + case AST_NODE_ARRAY_INIT_DOT_COMMA: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + elements = tree->extra_data.arr + start; + elem_count = end - start; + break; + } + default: + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + + // Dispatch based on RL (AstGen.zig:1515-1572). + switch (rl.tag) { + case RL_NONE: { + // arrayInitExprAnon (AstGen.zig:1576-1595). + ensureExtraCapacity(ag, 1 + elem_count); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = elem_count; + uint32_t extra_start = ag->extra_len; + ag->extra_len += elem_count; + for (uint32_t i = 0; i < elem_count; i++) { + uint32_t elem_ref = expr(gz, scope, elements[i]); + ag->extra[extra_start + i] = elem_ref; + } + return addPlNodePayloadIndex( + gz, ZIR_INST_ARRAY_INIT_ANON, node, payload_index); + } + case RL_TY: + case RL_COERCED_TY: { + // validate_array_init_result_ty + arrayInitExprTyped + // (AstGen.zig:1534-1539). + uint32_t result_ty = rl.data; + // Emit ArrayInit { ty, init_count } payload for + // validate_array_init_result_ty. + ensureExtraCapacity(ag, 2); + uint32_t val_payload = ag->extra_len; + ag->extra[ag->extra_len++] = result_ty; + ag->extra[ag->extra_len++] = elem_count; + addPlNodePayloadIndex( + gz, ZIR_INST_VALIDATE_ARRAY_INIT_RESULT_TY, node, val_payload); + + // arrayInitExprTyped (AstGen.zig:1598-1642) with elem_ty=none. + uint32_t operands_len = elem_count + 1; // +1 for type + ensureExtraCapacity(ag, 1 + operands_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = operands_len; + ag->extra[ag->extra_len++] = result_ty; + uint32_t extra_start = ag->extra_len; + ag->extra_len += elem_count; + for (uint32_t i = 0; i < elem_count; i++) { + // array_init_elem_type uses bin data (AstGen.zig:1626-1632). + uint32_t elem_ty + = addBin(gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, result_ty, i); + ResultLoc elem_rl + = { .tag = RL_COERCED_TY, .data = elem_ty, .src_node = 0 }; + uint32_t elem_ref = exprRl(gz, scope, elem_rl, elements[i]); + ag->extra[extra_start + i] = elem_ref; + } + return addPlNodePayloadIndex( + gz, ZIR_INST_ARRAY_INIT, node, payload_index); + } + case RL_INFERRED_PTR: { + // arrayInitExprAnon + rvalue (AstGen.zig:1545-1551). + ensureExtraCapacity(ag, 1 + elem_count); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = elem_count; + uint32_t extra_start = ag->extra_len; + ag->extra_len += elem_count; + for (uint32_t i = 0; i < elem_count; i++) { + uint32_t elem_ref = expr(gz, scope, elements[i]); + ag->extra[extra_start + i] = elem_ref; + } + uint32_t result = addPlNodePayloadIndex( + gz, ZIR_INST_ARRAY_INIT_ANON, node, payload_index); + return rvalue(gz, rl, result, node); + } + case RL_DISCARD: { + // Evaluate and discard each element (AstGen.zig:1517-1522). + for (uint32_t i = 0; i < elem_count; i++) { + exprRl(gz, scope, RL_DISCARD_VAL, elements[i]); + } + return ZIR_REF_VOID_VALUE; + } + case RL_REF: { + // arrayInitExprAnon + ref (AstGen.zig:1523-1526). + ensureExtraCapacity(ag, 1 + elem_count); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = elem_count; + uint32_t extra_start = ag->extra_len; + ag->extra_len += elem_count; + for (uint32_t i = 0; i < elem_count; i++) { + uint32_t elem_ref = expr(gz, scope, elements[i]); + ag->extra[extra_start + i] = elem_ref; + } + uint32_t result = addPlNodePayloadIndex( + gz, ZIR_INST_ARRAY_INIT_ANON, node, payload_index); + return rvalue(gz, rl, result, node); + } + case RL_REF_COERCED_TY: { + // validate_array_init_ref_ty + arrayInitExprTyped + // (AstGen.zig:1527-1532). + uint32_t ptr_ty_inst = rl.data; + ensureExtraCapacity(ag, 2); + uint32_t val_payload = ag->extra_len; + ag->extra[ag->extra_len++] = ptr_ty_inst; + ag->extra[ag->extra_len++] = elem_count; + uint32_t dest_arr_ty_inst = addPlNodePayloadIndex( + gz, ZIR_INST_VALIDATE_ARRAY_INIT_REF_TY, node, val_payload); + + // arrayInitExprTyped with elem_ty=none, is_ref=true. + uint32_t operands_len = elem_count + 1; + ensureExtraCapacity(ag, 1 + operands_len); + uint32_t ai_payload = ag->extra_len; + ag->extra[ag->extra_len++] = operands_len; + ag->extra[ag->extra_len++] = dest_arr_ty_inst; + uint32_t extra_start2 = ag->extra_len; + ag->extra_len += elem_count; + for (uint32_t i = 0; i < elem_count; i++) { + // array_init_elem_type uses bin data (AstGen.zig:1626-1632). + uint32_t elem_ty = addBin( + gz, ZIR_INST_ARRAY_INIT_ELEM_TYPE, dest_arr_ty_inst, i); + ResultLoc elem_rl + = { .tag = RL_COERCED_TY, .data = elem_ty, .src_node = 0 }; + uint32_t elem_ref = exprRl(gz, scope, elem_rl, elements[i]); + ag->extra[extra_start2 + i] = elem_ref; + } + return addPlNodePayloadIndex( + gz, ZIR_INST_ARRAY_INIT_REF, node, ai_payload); + } + case RL_PTR: { + // arrayInitExprPtr (AstGen.zig:1541-1543, 1645-1672). + uint32_t array_ptr_inst + = addUnNode(gz, ZIR_INST_OPT_EU_BASE_PTR_INIT, rl.data, node); + // Block payload: body_len = elem_count. + ensureExtraCapacity(ag, 1 + elem_count); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = elem_count; + uint32_t items_start = ag->extra_len; + ag->extra_len += elem_count; + + for (uint32_t i = 0; i < elem_count; i++) { + // array_init_elem_ptr: ElemPtrImm{ptr, index}. + uint32_t elem_ptr_inst = addPlNodeBin(gz, + ZIR_INST_ARRAY_INIT_ELEM_PTR, elements[i], array_ptr_inst, i); + ag->extra[items_start + i] + = elem_ptr_inst - ZIR_REF_START_INDEX; // .toIndex() + // Evaluate element with ptr RL (AstGen.zig:1668). + ResultLoc ptr_rl = { .tag = RL_PTR, + .data = elem_ptr_inst, + .src_node = 0, + .ctx = rl.ctx }; + exprRl(gz, scope, ptr_rl, elements[i]); + } + addPlNodePayloadIndex( + gz, ZIR_INST_VALIDATE_PTR_ARRAY_INIT, node, payload_index); + return ZIR_REF_VOID_VALUE; + } + } + + // Fallback: anon init + rvalue. + ensureExtraCapacity(ag, 1 + elem_count); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = elem_count; + uint32_t extra_start = ag->extra_len; + ag->extra_len += elem_count; + for (uint32_t i = 0; i < elem_count; i++) { + uint32_t elem_ref = expr(gz, scope, elements[i]); + ag->extra[extra_start + i] = elem_ref; + } + uint32_t result = addPlNodePayloadIndex( + gz, ZIR_INST_ARRAY_INIT_ANON, node, payload_index); + return rvalue(gz, rl, result, node); +} + +// --- ifExpr (AstGen.zig:6300-6528) --- +// Handles if and if_simple expressions. +// Pattern: block_scope with condbr → then/else branches → setCondBrPayload. + +static uint32_t ifExpr(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + bool need_rl = nodesNeedRlContains(ag, node); + ResultLoc break_rl = breakResultInfo(gz, rl, node, need_rl); + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + uint32_t cond_node = nd.lhs; + uint32_t then_node, else_node; + + if (tag == AST_NODE_IF_SIMPLE) { + then_node = nd.rhs; + else_node = 0; + } else { + // AST_NODE_IF: rhs is index into extra → If{then_expr, else_expr} + then_node = tree->extra_data.arr[nd.rhs]; + else_node = tree->extra_data.arr[nd.rhs + 1]; + } + + // Detect payload capture: if (cond) |x| (AstGen.zig Ast.fullIf). + // payload_pipe = lastToken(cond_expr) + 2; if pipe → payload_token + 1. + uint32_t payload_token = 0; // 0 = no payload + uint32_t last_cond_tok = lastToken(tree, cond_node); + uint32_t pipe_tok = last_cond_tok + 2; + if (pipe_tok < tree->tokens.len + && tree->tokens.tags[pipe_tok] == TOKEN_PIPE) { + payload_token = pipe_tok + 1; // identifier token + } + + // Detect error token: then_expr else |e| (AstGen.zig Ast.fullIf). + uint32_t error_token = 0; + if (else_node != 0) { + uint32_t else_tok = lastToken(tree, then_node) + 1; // "else" keyword + if (else_tok + 1 < tree->tokens.len + && tree->tokens.tags[else_tok + 1] == TOKEN_PIPE) { + error_token = else_tok + 2; + } + } + + // Create block_scope (AstGen.zig:6326-6328). + GenZir block_scope = makeSubBlock(gz, scope); + + // Emit DBG_STMT for condition (AstGen.zig:6335). + // NOTE: upstream emits into parent_gz AFTER block_scope is created, + // so the dbg_stmt ends up in block_scope's range (shared array). + emitDbgNode(gz, cond_node); + + // Evaluate condition (AstGen.zig:6335-6363). + uint32_t cond_inst; // the value (optional/err-union/bool) + uint32_t bool_bit; // the boolean for condbr + if (error_token != 0) { + // Error union condition: if (err_union) |val| else |err|. + // (AstGen.zig:6341). + ResultLoc cond_rl = RL_NONE_VAL; + cond_rl.ctx = RI_CTX_ERROR_HANDLING_EXPR; + cond_inst + = exprRl(&block_scope, &block_scope.base, cond_rl, cond_node); + bool_bit = addUnNode( + &block_scope, ZIR_INST_IS_NON_ERR, cond_inst, cond_node); + } else if (payload_token != 0) { + // Optional condition: if (optional) |val|. + cond_inst = expr(&block_scope, &block_scope.base, cond_node); + bool_bit = addUnNode( + &block_scope, ZIR_INST_IS_NON_NULL, cond_inst, cond_node); + } else { + // Bool condition (AstGen.zig:6356-6362). + cond_inst = expr(&block_scope, &block_scope.base, cond_node); + bool_bit = cond_inst; + } + + uint32_t condbr = addCondBr(&block_scope, ZIR_INST_CONDBR, node); + uint32_t block_inst = makeBlockInst(ag, ZIR_INST_BLOCK, gz, node); + setBlockBody(ag, &block_scope, block_inst); + gzAppendInstruction(gz, block_inst); + + // Then branch (AstGen.zig:6372-6441). + GenZir then_scope = makeSubBlock(gz, scope); + Scope* then_sub_scope = &then_scope.base; + ScopeLocalVal payload_val_scope; + memset(&payload_val_scope, 0, sizeof(payload_val_scope)); + + if (error_token != 0 && payload_token != 0) { + // Error union with payload: unwrap payload (AstGen.zig:6379-6407). + uint32_t payload_inst = addUnNode(&then_scope, + ZIR_INST_ERR_UNION_PAYLOAD_UNSAFE, cond_inst, then_node); + uint32_t ident_name = identAsString(ag, payload_token); + payload_val_scope = (ScopeLocalVal) { + .base = { .tag = SCOPE_LOCAL_VAL }, + .parent = &then_scope.base, + .gen_zir = &then_scope, + .inst = payload_inst, + .token_src = payload_token, + .name = ident_name, + }; + addDbgVar(&then_scope, ZIR_INST_DBG_VAR_VAL, ident_name, payload_inst); + then_sub_scope = &payload_val_scope.base; + } else if (payload_token != 0) { + // Optional with payload: unwrap optional (AstGen.zig:6408-6431). + uint32_t payload_inst = addUnNode(&then_scope, + ZIR_INST_OPTIONAL_PAYLOAD_UNSAFE, cond_inst, then_node); + uint32_t ident_name = identAsString(ag, payload_token); + payload_val_scope = (ScopeLocalVal) { + .base = { .tag = SCOPE_LOCAL_VAL }, + .parent = &then_scope.base, + .gen_zir = &then_scope, + .inst = payload_inst, + .token_src = payload_token, + .name = ident_name, + }; + addDbgVar(&then_scope, ZIR_INST_DBG_VAR_VAL, ident_name, payload_inst); + then_sub_scope = &payload_val_scope.base; + } + + // Use fullBodyExpr for then body (AstGen.zig:6437). + uint32_t then_result + = fullBodyExpr(&then_scope, then_sub_scope, break_rl, then_node); + if (!endsWithNoReturn(&then_scope)) { + addBreak(&then_scope, ZIR_INST_BREAK, block_inst, then_result, + (int32_t)then_node - (int32_t)gz->decl_node_index); + } + + // Else branch (AstGen.zig:6443-6489). + GenZir else_scope = makeSubBlock(gz, scope); + + // save_err_ret_index (AstGen.zig:6448-6449). + bool do_err_trace = ag->fn_ret_ty != 0 && error_token != 0; + if (do_err_trace && nodeMayAppendToErrorTrace(tree, cond_node)) + addSaveErrRetIndex(&else_scope, ZIR_REF_NONE); + + if (else_node != 0) { + Scope* else_sub_scope = &else_scope.base; + ScopeLocalVal error_val_scope; + memset(&error_val_scope, 0, sizeof(error_val_scope)); + + if (error_token != 0) { + // Error capture: else |err| (AstGen.zig:6452-6475). + uint32_t err_inst = addUnNode( + &else_scope, ZIR_INST_ERR_UNION_CODE, cond_inst, cond_node); + uint32_t err_name = identAsString(ag, error_token); + error_val_scope = (ScopeLocalVal) { + .base = { .tag = SCOPE_LOCAL_VAL }, + .parent = &else_scope.base, + .gen_zir = &else_scope, + .inst = err_inst, + .token_src = error_token, + .name = err_name, + }; + addDbgVar(&else_scope, ZIR_INST_DBG_VAR_VAL, err_name, err_inst); + else_sub_scope = &error_val_scope.base; + } + + // Use fullBodyExpr for else body (AstGen.zig:6478). + uint32_t else_result + = fullBodyExpr(&else_scope, else_sub_scope, break_rl, else_node); + if (!endsWithNoReturn(&else_scope)) { + // Restore error return index (AstGen.zig:6480-6482). + if (do_err_trace) + restoreErrRetIndex( + &else_scope, block_inst, break_rl, else_node, else_result); + addBreak(&else_scope, ZIR_INST_BREAK, block_inst, else_result, + (int32_t)else_node - (int32_t)gz->decl_node_index); + } + } else { + addBreak(&else_scope, ZIR_INST_BREAK, block_inst, ZIR_REF_VOID_VALUE, + AST_NODE_OFFSET_NONE); + } + + // Wire up condbr (AstGen.zig:6491). + setCondBrPayload(ag, condbr, bool_bit, &then_scope, &else_scope); + + // AstGen.zig:6493-6497. + bool need_result_rvalue = (break_rl.tag != rl.tag); + if (need_result_rvalue) + return rvalue(gz, rl, block_inst + ZIR_REF_START_INDEX, node); + return block_inst + ZIR_REF_START_INDEX; +} + +// --- forExpr (AstGen.zig:6819-7125) --- +// Handles for_simple and for (multi-input). +// Supports both indexable and for_range inputs. + +#define FOR_MAX_INPUTS 16 + +static uint32_t forExpr( + GenZir* gz, Scope* scope, uint32_t node, bool is_statement) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + AstNodeTag node_tag = tree->nodes.tags[node]; + + // Detect inline keyword (AstGen.zig:6847). + uint32_t main_token = tree->nodes.main_tokens[node]; + bool is_inline = (main_token > 0 + && tree->tokens.tags[main_token - 1] == TOKEN_KEYWORD_INLINE); + + // Extract input nodes and body/else nodes. + // FOR_SIMPLE: lhs = input node, rhs = body (Ast.zig:1960-1968). + // FOR: lhs = extra_data index, rhs = packed AstFor (Ast.zig:1970-1981). + uint32_t input_nodes[FOR_MAX_INPUTS]; + uint32_t num_inputs; + uint32_t body_node; + if (node_tag == AST_NODE_FOR_SIMPLE) { + input_nodes[0] = nd.lhs; + num_inputs = 1; + body_node = nd.rhs; + } else { + uint32_t extra_idx = nd.lhs; + AstFor for_data; + memcpy(&for_data, &nd.rhs, sizeof(AstFor)); + num_inputs = for_data.inputs; + if (num_inputs == 0 || num_inputs > FOR_MAX_INPUTS) { + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + for (uint32_t i = 0; i < num_inputs; i++) + input_nodes[i] = tree->extra_data.arr[extra_idx + i]; + body_node = tree->extra_data.arr[extra_idx + num_inputs]; + } + + // Per-input arrays (AstGen.zig:6858-6862). + uint32_t indexables[FOR_MAX_INPUTS]; + uint32_t lens[FOR_MAX_INPUTS][2]; // [ref0, ref1] per input + + // Allocate index counter (AstGen.zig:6865-6874). + ZirInstTag alloc_tag + = is_inline ? ZIR_INST_ALLOC_COMPTIME_MUT : ZIR_INST_ALLOC; + uint32_t index_ptr = addUnNode(gz, alloc_tag, ZIR_REF_USIZE_TYPE, node); + addPlNodeBin(gz, ZIR_INST_STORE_NODE, node, index_ptr, ZIR_REF_ZERO_USIZE); + + // Compute payload_token (AstGen.zig fullForComponents:2349-2350). + // payload_token = lastToken(inputs[last]) + 3 + has_comma + uint32_t last_cond_tok = lastToken(tree, input_nodes[num_inputs - 1]); + bool has_comma = (last_cond_tok + 1 < tree->tokens.len + && tree->tokens.tags[last_cond_tok + 1] == TOKEN_COMMA); + uint32_t payload_token = last_cond_tok + 3 + (has_comma ? 1 : 0); + + // Process each input (AstGen.zig:6878-6925). + uint32_t capture_token = payload_token; + for (uint32_t i = 0; i < num_inputs; i++) { + uint32_t input = input_nodes[i]; + // Advance capture_token past this capture's ident (+comma). + bool capture_is_ref + = (tree->tokens.tags[capture_token] == TOKEN_ASTERISK); + uint32_t ident_tok = capture_token + (capture_is_ref ? 1u : 0u); + capture_token = ident_tok + 2; // skip ident + comma/pipe + + emitDbgNode(gz, input); + + if (tree->nodes.tags[input] == AST_NODE_FOR_RANGE) { + // Range input (AstGen.zig:6892-6916). + AstData range_nd = tree->nodes.datas[input]; + uint32_t start_node = range_nd.lhs; + uint32_t end_node = range_nd.rhs; + + // AstGen.zig:6897-6902: expr with .rl = .{ .ty = .usize_type } + ResultLoc usize_rl + = { .tag = RL_TY, .data = ZIR_REF_USIZE_TYPE, .src_node = 0 }; + uint32_t start_val = exprRl(gz, scope, usize_rl, start_node); + + uint32_t end_val = ZIR_REF_NONE; + if (end_node != 0) { + end_val = exprRl(gz, scope, usize_rl, end_node); + } + + if (end_val == ZIR_REF_NONE) { + lens[i][0] = ZIR_REF_NONE; + lens[i][1] = ZIR_REF_NONE; + } else { + lens[i][0] = start_val; + lens[i][1] = end_val; + } + + // Check if start is trivially zero. + bool start_is_zero = false; + if (tree->nodes.tags[start_node] == AST_NODE_NUMBER_LITERAL) { + uint32_t tok = tree->nodes.main_tokens[start_node]; + uint32_t ts = tree->tokens.starts[tok]; + if (tree->source[ts] == '0' + && (ts + 1 >= tree->source_len + || tree->source[ts + 1] < '0' + || tree->source[ts + 1] > '9')) + start_is_zero = true; + } + indexables[i] = start_is_zero ? ZIR_REF_NONE : start_val; + } else { + // Regular indexable (AstGen.zig:6918-6923). + uint32_t indexable = expr(gz, scope, input); + indexables[i] = indexable; + lens[i][0] = indexable; + lens[i][1] = ZIR_REF_NONE; + } + } + + // Emit for_len as MultiOp (AstGen.zig:6933-6942). + uint32_t len; + { + uint32_t operands_len = num_inputs * 2; + ensureExtraCapacity(ag, 1 + operands_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = operands_len; + for (uint32_t i = 0; i < num_inputs; i++) { + ag->extra[ag->extra_len++] = lens[i][0]; + ag->extra[ag->extra_len++] = lens[i][1]; + } + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + len = addInstruction(gz, ZIR_INST_FOR_LEN, data); + } + + // Create loop (AstGen.zig:6944-6956). + ZirInstTag loop_tag = is_inline ? ZIR_INST_BLOCK_INLINE : ZIR_INST_LOOP; + uint32_t loop_inst = makeBlockInst(ag, loop_tag, gz, node); + + GenZir loop_scope = makeSubBlock(gz, scope); + loop_scope.is_inline = is_inline; + + // Load index (AstGen.zig:6955-6956). + // We need to finish loop_scope later once we have the deferred refs from + // then_scope. However, the load must be removed from instructions in the + // meantime or it appears to be part of parent_gz. + uint32_t index = addUnNode(&loop_scope, ZIR_INST_LOAD, index_ptr, node); + ag->scratch_inst_len--; // pop from loop_scope (AstGen.zig:6956) + + // Condition: added to cond_scope (AstGen.zig:6958-6962). + GenZir cond_scope = makeSubBlock(gz, &loop_scope.base); + uint32_t cond + = addPlNodeBin(&cond_scope, ZIR_INST_CMP_LT, node, index, len); + + // Create condbr + block (AstGen.zig:6967-6974). + ZirInstTag condbr_tag + = is_inline ? ZIR_INST_CONDBR_INLINE : ZIR_INST_CONDBR; + uint32_t condbr = addCondBr(&cond_scope, condbr_tag, node); + ZirInstTag block_tag = is_inline ? ZIR_INST_BLOCK_INLINE : ZIR_INST_BLOCK; + uint32_t cond_block = makeBlockInst(ag, block_tag, &loop_scope, node); + setBlockBody(ag, &cond_scope, cond_block); + + loop_scope.break_block = loop_inst; + loop_scope.continue_block = cond_block; // AstGen.zig:6974 + + // Then branch: loop body (AstGen.zig:6982-7065). + GenZir then_scope = makeSubBlock(gz, &cond_scope.base); + + // Set up capture scopes for all inputs (AstGen.zig:6986-7045). + ScopeLocalVal capture_scopes[FOR_MAX_INPUTS]; + Scope* body_scope_parent = &then_scope.base; + { + capture_token = payload_token; + for (uint32_t i = 0; i < num_inputs; i++) { + uint32_t input = input_nodes[i]; + bool capture_is_ref + = (tree->tokens.tags[capture_token] == TOKEN_ASTERISK); + uint32_t ident_tok = capture_token + (capture_is_ref ? 1u : 0u); + capture_token = ident_tok + 2; + + // Check if discard (AstGen.zig:6999). + uint32_t ts = tree->tokens.starts[ident_tok]; + bool is_discard = (tree->source[ts] == '_' + && (ts + 1 >= tree->source_len + || !((tree->source[ts + 1] >= 'a' + && tree->source[ts + 1] <= 'z') + || (tree->source[ts + 1] >= 'A' + && tree->source[ts + 1] <= 'Z') + || tree->source[ts + 1] == '_' + || (tree->source[ts + 1] >= '0' + && tree->source[ts + 1] <= '9')))); + if (is_discard) + continue; + + // Compute capture inst (AstGen.zig:7004-7028). + uint32_t capture_inst; + bool is_counter = (tree->nodes.tags[input] == AST_NODE_FOR_RANGE); + + if (indexables[i] == ZIR_REF_NONE) { + // Start=0 counter: use index directly. + capture_inst = index; + } else if (is_counter) { + // Counter with nonzero start: add. + capture_inst = addPlNodeBin( + &then_scope, ZIR_INST_ADD, input, indexables[i], index); + } else if (capture_is_ref) { + // Indexable by ref: elem_ptr. + capture_inst = addPlNodeBin(&then_scope, ZIR_INST_ELEM_PTR, + input, indexables[i], index); + } else { + // Indexable by val: elem_val. + capture_inst = addPlNodeBin(&then_scope, ZIR_INST_ELEM_VAL, + input, indexables[i], index); + } + + uint32_t name_str = identAsString(ag, ident_tok); + capture_scopes[i] = (ScopeLocalVal) { + .base = { .tag = SCOPE_LOCAL_VAL }, + .parent = body_scope_parent, + .gen_zir = &then_scope, + .inst = capture_inst, + .token_src = ident_tok, + .name = name_str, + }; + // AstGen.zig:7040. + addDbgVar( + &then_scope, ZIR_INST_DBG_VAR_VAL, name_str, capture_inst); + body_scope_parent = &capture_scopes[i].base; + } + } + + // Execute body (AstGen.zig:7047-7048). + uint32_t then_result + = fullBodyExpr(&then_scope, body_scope_parent, RL_NONE_VAL, body_node); + addEnsureResult(&then_scope, then_result, body_node); + + // dbg_stmt + dbg_empty_stmt (AstGen.zig:7052-7061). + advanceSourceCursor(ag, tree->tokens.starts[lastToken(tree, body_node)]); + emitDbgStmt(gz, ag->source_line - gz->decl_line, ag->source_column); + { + ZirInstData ext_data; + ext_data.extended.opcode = (uint16_t)ZIR_EXT_DBG_EMPTY_STMT; + ext_data.extended.small = 0; + ext_data.extended.operand = 0; + addInstruction(gz, ZIR_INST_EXTENDED, ext_data); + } + + ZirInstTag break_tag = is_inline ? ZIR_INST_BREAK_INLINE : ZIR_INST_BREAK; + addBreak(&then_scope, break_tag, cond_block, ZIR_REF_VOID_VALUE, + AST_NODE_OFFSET_NONE); + + // Else branch: break out of loop (AstGen.zig:7066-7091). + GenZir else_scope = makeSubBlock(gz, &cond_scope.base); + addBreak(&else_scope, break_tag, loop_inst, ZIR_REF_VOID_VALUE, + AST_NODE_OFFSET_NONE); + + setCondBrPayload(ag, condbr, cond, &then_scope, &else_scope); + + // then_scope and else_scope unstacked now. Resurrect loop_scope to + // finally finish it (AstGen.zig:7095-7113). + { + // Reset loop_scope instructions and re-add index + cond_block. + loop_scope.instructions_top = ag->scratch_inst_len; + gzAppendInstruction(&loop_scope, index - ZIR_REF_START_INDEX); + gzAppendInstruction(&loop_scope, cond_block); + + // Increment the index variable (AstGen.zig:7100-7108). + uint32_t index_plus_one = addPlNodeBin( + &loop_scope, ZIR_INST_ADD_UNSAFE, node, index, ZIR_REF_ONE_USIZE); + addPlNodeBin( + &loop_scope, ZIR_INST_STORE_NODE, node, index_ptr, index_plus_one); + + // Repeat (AstGen.zig:7110-7111). + ZirInstTag repeat_tag + = is_inline ? ZIR_INST_REPEAT_INLINE : ZIR_INST_REPEAT; + ZirInstData repeat_data; + memset(&repeat_data, 0, sizeof(repeat_data)); + repeat_data.node = (int32_t)node - (int32_t)loop_scope.decl_node_index; + addInstruction(&loop_scope, repeat_tag, repeat_data); + + setBlockBody(ag, &loop_scope, loop_inst); + } + gzAppendInstruction(gz, loop_inst); + + uint32_t result = loop_inst + ZIR_REF_START_INDEX; + + // Emit ensure_result_used when used as statement (AstGen.zig:7121-7123). + if (is_statement) { + addUnNode(gz, ZIR_INST_ENSURE_RESULT_USED, result, node); + } + + return result; +} + +// --- orelseCatchExpr (AstGen.zig:6031-6142) --- +// Handles `lhs orelse rhs` and `lhs catch rhs`. + +static uint32_t orelseCatchExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node, bool is_catch) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + + bool do_err_trace = is_catch && ag->fn_ret_ty != 0; + + // breakResultInfo (AstGen.zig:6046-6058). + bool need_rl = nodesNeedRlContains(ag, node); + ResultLoc break_rl = breakResultInfo(gz, rl, node, need_rl); + bool need_result_rvalue = (break_rl.tag != rl.tag); + + // Create block_scope (AstGen.zig:6062-6063). + GenZir block_scope = makeSubBlock(gz, scope); + + // Evaluate operand in block_scope (AstGen.zig:6066-6074). + ResultLoc operand_rl = RL_NONE_VAL; + if (do_err_trace) { + operand_rl.ctx = RI_CTX_ERROR_HANDLING_EXPR; + } + uint32_t operand + = exprRl(&block_scope, &block_scope.base, operand_rl, nd.lhs); + + // Check condition in block_scope (AstGen.zig:6075). + ZirInstTag test_tag + = is_catch ? ZIR_INST_IS_NON_ERR : ZIR_INST_IS_NON_NULL; + uint32_t condition = addUnNode(&block_scope, test_tag, operand, node); + + // condbr in block_scope (AstGen.zig:6076). + uint32_t condbr = addCondBr(&block_scope, ZIR_INST_CONDBR, node); + + // Create block in parent gz (AstGen.zig:6078-6081). + uint32_t block_inst = makeBlockInst(ag, ZIR_INST_BLOCK, gz, node); + setBlockBody(ag, &block_scope, block_inst); + // block_scope unstacked now. + gzAppendInstruction(gz, block_inst); + + // Then branch: unwrap payload (AstGen.zig:6083-6092). + GenZir then_scope = makeSubBlock(&block_scope, scope); + ZirInstTag unwrap_tag = is_catch ? ZIR_INST_ERR_UNION_PAYLOAD_UNSAFE + : ZIR_INST_OPTIONAL_PAYLOAD_UNSAFE; + uint32_t unwrapped = addUnNode(&then_scope, unwrap_tag, operand, node); + // Apply rvalue coercion unless rl is ref/ref_coerced_ty + // (AstGen.zig:6088-6091). + uint32_t then_result = (rl.tag == RL_REF || rl.tag == RL_REF_COERCED_TY) + ? unwrapped + : rvalue(&then_scope, break_rl, unwrapped, node); + addBreak(&then_scope, ZIR_INST_BREAK, block_inst, then_result, + (int32_t)node - (int32_t)gz->decl_node_index); + + // Else branch: evaluate RHS (AstGen.zig:6094-6131). + GenZir else_scope = makeSubBlock(&block_scope, scope); + + // save_err_ret_index (AstGen.zig:6099-6100). + if (do_err_trace && nodeMayAppendToErrorTrace(tree, nd.lhs)) + addSaveErrRetIndex(&else_scope, ZIR_REF_NONE); + + // Use fullBodyExpr (not expr) to inline unlabeled blocks + // (AstGen.zig:6125). + uint32_t else_result + = fullBodyExpr(&else_scope, &else_scope.base, break_rl, nd.rhs); + if (!endsWithNoReturn(&else_scope)) { + // restoreErrRetIndex (AstGen.zig:6128-6129). + if (do_err_trace) + restoreErrRetIndex( + &else_scope, block_inst, break_rl, nd.rhs, else_result); + addBreak(&else_scope, ZIR_INST_BREAK, block_inst, else_result, + (int32_t)nd.rhs - (int32_t)gz->decl_node_index); + } + + setCondBrPayload(ag, condbr, condition, &then_scope, &else_scope); + + // AstGen.zig:6137-6141. + if (need_result_rvalue) + return rvalue(gz, rl, block_inst + ZIR_REF_START_INDEX, node); + return block_inst + ZIR_REF_START_INDEX; +} + +// --- whileExpr (AstGen.zig:6529-6805) --- +// Handles while_simple. +// Structure: loop { cond_block { cond, condbr }, repeat } +// condbr → then { continue_block { body, break continue }, break cond } +// → else { break loop } + +static uint32_t whileExpr( + GenZir* gz, Scope* scope, uint32_t node, bool is_statement) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + + // Detect inline keyword (AstGen.zig:6558). + uint32_t main_token = tree->nodes.main_tokens[node]; + bool is_inline = (main_token > 0 + && tree->tokens.tags[main_token - 1] == TOKEN_KEYWORD_INLINE); + + // WHILE_SIMPLE: lhs = cond_expr, rhs = body. + uint32_t cond_node = nd.lhs; + uint32_t body_node = nd.rhs; + + // Create loop instruction (AstGen.zig:6562-6564). + ZirInstTag loop_tag = is_inline ? ZIR_INST_BLOCK_INLINE : ZIR_INST_LOOP; + uint32_t loop_inst = makeBlockInst(ag, loop_tag, gz, node); + gzAppendInstruction(gz, loop_inst); + + GenZir loop_scope = makeSubBlock(gz, scope); + loop_scope.is_inline = is_inline; + + // Evaluate condition in cond_scope (AstGen.zig:6571-6607). + GenZir cond_scope = makeSubBlock(&loop_scope, &loop_scope.base); + // Emit debug node for the condition expression (AstGen.zig:6579). + emitDbgNode(&cond_scope, cond_node); + uint32_t cond = expr(&cond_scope, &cond_scope.base, cond_node); + + // Create condbr + cond_block (AstGen.zig:6609-6615). + ZirInstTag condbr_tag + = is_inline ? ZIR_INST_CONDBR_INLINE : ZIR_INST_CONDBR; + uint32_t condbr = addCondBr(&cond_scope, condbr_tag, node); + ZirInstTag block_tag = is_inline ? ZIR_INST_BLOCK_INLINE : ZIR_INST_BLOCK; + uint32_t cond_block = makeBlockInst(ag, block_tag, &loop_scope, node); + setBlockBody(ag, &cond_scope, cond_block); // unstacks cond_scope + gzAppendInstruction(&loop_scope, cond_block); + + // Create continue_block (AstGen.zig:6694). + uint32_t continue_block = makeBlockInst(ag, block_tag, &loop_scope, node); + + // Add repeat to loop_scope (AstGen.zig:6696-6697). + { + ZirInstTag repeat_tag + = is_inline ? ZIR_INST_REPEAT_INLINE : ZIR_INST_REPEAT; + ZirInstData repeat_data; + memset(&repeat_data, 0, sizeof(repeat_data)); + repeat_data.node = (int32_t)node - (int32_t)loop_scope.decl_node_index; + addInstruction(&loop_scope, repeat_tag, repeat_data); + } + + // Set loop body and configure break/continue (AstGen.zig:6699-6701). + setBlockBody(ag, &loop_scope, loop_inst); // unstacks loop_scope + loop_scope.break_block = loop_inst; + loop_scope.continue_block = continue_block; + + // Stack then_scope (AstGen.zig:6708-6709). + GenZir then_scope = makeSubBlock(gz, &cond_scope.base); + + // Add continue_block to then_scope (AstGen.zig:6716). + gzAppendInstruction(&then_scope, continue_block); + + // Create continue_scope inside then_scope (AstGen.zig:6725). + GenZir continue_scope = makeSubBlock(&then_scope, &then_scope.base); + + // Execute body (AstGen.zig:6727-6730). + emitDbgNode(&continue_scope, body_node); + fullBodyExpr( + &continue_scope, &continue_scope.base, RL_NONE_VAL, body_node); + + // Break continue_block if not noreturn (AstGen.zig:6735-6747). + if (!endsWithNoReturn(&continue_scope)) { + // dbg_stmt + dbg_empty_stmt (AstGen.zig:6737-6745). + advanceSourceCursor( + ag, tree->tokens.starts[lastToken(tree, body_node)]); + emitDbgStmt(gz, ag->source_line - gz->decl_line, ag->source_column); + { + ZirInstData ext_data; + ext_data.extended.opcode = (uint16_t)ZIR_EXT_DBG_EMPTY_STMT; + ext_data.extended.small = 0; + ext_data.extended.operand = 0; + addInstruction(gz, ZIR_INST_EXTENDED, ext_data); + } + ZirInstTag break_tag + = is_inline ? ZIR_INST_BREAK_INLINE : ZIR_INST_BREAK; + addBreak(&continue_scope, break_tag, continue_block, + ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); + } + setBlockBody(ag, &continue_scope, continue_block); + + // Break cond_block from then_scope (AstGen.zig:7064). + { + ZirInstTag break_tag + = is_inline ? ZIR_INST_BREAK_INLINE : ZIR_INST_BREAK; + addBreak(&then_scope, break_tag, cond_block, ZIR_REF_VOID_VALUE, + AST_NODE_OFFSET_NONE); + } + + // Else scope: break loop with void (AstGen.zig:6785-6788). + GenZir else_scope = makeSubBlock(gz, &cond_scope.base); + { + ZirInstTag break_tag + = is_inline ? ZIR_INST_BREAK_INLINE : ZIR_INST_BREAK; + addBreak(&else_scope, break_tag, loop_inst, ZIR_REF_VOID_VALUE, + AST_NODE_OFFSET_NONE); + } + + // Wire up condbr (AstGen.zig:6795). + setCondBrPayload(ag, condbr, cond, &then_scope, &else_scope); + + uint32_t result = loop_inst + ZIR_REF_START_INDEX; + + // Emit ensure_result_used when used as statement (AstGen.zig:6812-6813). + if (is_statement) { + addUnNode(gz, ZIR_INST_ENSURE_RESULT_USED, result, node); + } + + return result; +} + +// --- switchExpr (AstGen.zig:7625-8117) --- +// Handles switch and switch_comma expressions. +// Encoding: switch_block pl_node with SwitchBlock extra payload. + +static uint32_t switchExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + bool need_rl = nodesNeedRlContains(ag, node); + ResultLoc break_rl = breakResultInfo(gz, rl, node, need_rl); + AstData nd = tree->nodes.datas[node]; + + // AST_NODE_SWITCH: lhs = condition node, rhs = extra index for SubRange. + // SubRange[rhs] = { cases_start, cases_end }. + // Case nodes are at extra_data[cases_start..cases_end]. + uint32_t cond_node = nd.lhs; + uint32_t extra_idx = nd.rhs; + uint32_t cases_start = tree->extra_data.arr[extra_idx]; + uint32_t cases_end = tree->extra_data.arr[extra_idx + 1]; + const uint32_t* case_nodes_arr = tree->extra_data.arr + cases_start; + uint32_t case_count = cases_end - cases_start; + + // Save operand source location before evaluating (AstGen.zig:7774-7775). + advanceSourceCursorToNode(ag, cond_node); + uint32_t operand_lc_line = ag->source_line - gz->decl_line; + uint32_t operand_lc_col = ag->source_column; + + // Evaluate switch operand (AstGen.zig:7777). + uint32_t cond_ref = expr(gz, scope, cond_node); + + // --- First pass: categorize cases (AstGen.zig:7671-7762) --- + uint32_t scalar_cases_len = 0; + uint32_t multi_cases_len = 0; + bool has_else = false; + + for (uint32_t ci = 0; ci < case_count; ci++) { + uint32_t cn = case_nodes_arr[ci]; + AstNodeTag ct = tree->nodes.tags[cn]; + AstData cd = tree->nodes.datas[cn]; + + switch (ct) { + case AST_NODE_SWITCH_CASE_ONE: + case AST_NODE_SWITCH_CASE_INLINE_ONE: + if (cd.lhs == 0) + has_else = true; + else if (tree->nodes.tags[cd.lhs] == AST_NODE_SWITCH_RANGE) + multi_cases_len++; + else + scalar_cases_len++; + break; + case AST_NODE_SWITCH_CASE: + case AST_NODE_SWITCH_CASE_INLINE: + multi_cases_len++; + break; + default: + break; + } + } + + // Sema expects a dbg_stmt immediately before switch_block + // (AstGen.zig:7806). + emitDbgStmtForceCurrentIndex(gz, operand_lc_line, operand_lc_col); + // --- Create switch_block instruction (AstGen.zig:7809) --- + uint32_t switch_inst = makeBlockInst(ag, ZIR_INST_SWITCH_BLOCK, gz, node); + + // --- Single-pass evaluation in source order (AstGen.zig:7849-8027) --- + // Case table + payload buffer pattern (like upstream scratch). + // Table layout: [else?] [scalar_0..N] [multi_0..N] + // Each entry points to the start of that case's data in the buffer. + uint32_t table_size + = (has_else ? 1 : 0) + scalar_cases_len + multi_cases_len; + uint32_t else_tbl = 0; + uint32_t scalar_tbl = (has_else ? 1 : 0); + uint32_t multi_tbl = scalar_tbl + scalar_cases_len; + + uint32_t pay_cap = table_size + case_count * 16; + uint32_t* pay = malloc(pay_cap * sizeof(uint32_t)); + uint32_t pay_len = table_size; + + uint32_t scalar_ci = 0; + uint32_t multi_ci = 0; + + for (uint32_t ci = 0; ci < case_count; ci++) { + uint32_t cn = case_nodes_arr[ci]; + AstNodeTag ct = tree->nodes.tags[cn]; + AstData cd = tree->nodes.datas[cn]; + uint32_t hdr = pay_len; + uint32_t prong_info_slot = 0; + + // Ensure capacity for items (generous estimate). + if (pay_len + 32 > pay_cap) { + pay_cap *= 2; + uint32_t* p = realloc(pay, pay_cap * sizeof(uint32_t)); + if (!p) + abort(); + pay = p; + } + + switch (ct) { + case AST_NODE_SWITCH_CASE_ONE: + case AST_NODE_SWITCH_CASE_INLINE_ONE: + if (cd.lhs == 0) { + // Else: [prong_info, body...] + pay[else_tbl] = hdr; + prong_info_slot = pay_len++; + } else if (tree->nodes.tags[cd.lhs] == AST_NODE_SWITCH_RANGE) { + // Single range → multi case: + // [items_len=0, ranges_len=1, prong_info, first, last] + pay[multi_tbl + multi_ci++] = hdr; + pay[pay_len++] = 0; + pay[pay_len++] = 1; + prong_info_slot = pay_len++; + AstData rng = tree->nodes.datas[cd.lhs]; + pay[pay_len++] = comptimeExpr(gz, scope, RL_NONE_VAL, rng.lhs, + COMPTIME_REASON_SWITCH_ITEM); + pay[pay_len++] = comptimeExpr(gz, scope, RL_NONE_VAL, rng.rhs, + COMPTIME_REASON_SWITCH_ITEM); + } else { + // Scalar: [item_ref, prong_info, body...] + pay[scalar_tbl + scalar_ci++] = hdr; + pay[pay_len++] = comptimeExpr(gz, scope, RL_NONE_VAL, cd.lhs, + COMPTIME_REASON_SWITCH_ITEM); + prong_info_slot = pay_len++; + } + break; + case AST_NODE_SWITCH_CASE: + case AST_NODE_SWITCH_CASE_INLINE: { + // Multi-item: SubRange[lhs] of items, rhs = body. + pay[multi_tbl + multi_ci++] = hdr; + uint32_t ist = tree->extra_data.arr[cd.lhs]; + uint32_t ien = tree->extra_data.arr[cd.lhs + 1]; + uint32_t nitems = 0, nranges = 0; + for (uint32_t j = ist; j < ien; j++) { + if (tree->nodes.tags[tree->extra_data.arr[j]] + == AST_NODE_SWITCH_RANGE) + nranges++; + else + nitems++; + } + pay[pay_len++] = nitems; + pay[pay_len++] = nranges; + prong_info_slot = pay_len++; + // Non-range items. + for (uint32_t j = ist; j < ien; j++) { + uint32_t item = tree->extra_data.arr[j]; + if (tree->nodes.tags[item] != AST_NODE_SWITCH_RANGE) { + if (pay_len + 2 > pay_cap) { + pay_cap *= 2; + uint32_t* p = realloc(pay, pay_cap * sizeof(uint32_t)); + if (!p) + abort(); + pay = p; + } + pay[pay_len++] = comptimeExpr(gz, scope, RL_NONE_VAL, item, + COMPTIME_REASON_SWITCH_ITEM); + } + } + // Range pairs. + for (uint32_t j = ist; j < ien; j++) { + uint32_t item = tree->extra_data.arr[j]; + if (tree->nodes.tags[item] == AST_NODE_SWITCH_RANGE) { + AstData rng = tree->nodes.datas[item]; + if (pay_len + 2 > pay_cap) { + pay_cap *= 2; + uint32_t* p = realloc(pay, pay_cap * sizeof(uint32_t)); + if (!p) + abort(); + pay = p; + } + pay[pay_len++] = comptimeExpr(gz, scope, RL_NONE_VAL, + rng.lhs, COMPTIME_REASON_SWITCH_ITEM); + pay[pay_len++] = comptimeExpr(gz, scope, RL_NONE_VAL, + rng.rhs, COMPTIME_REASON_SWITCH_ITEM); + } + } + break; + } + default: + continue; + } + + // Evaluate body (AstGen.zig:7997-8026). + uint32_t body_node = cd.rhs; + GenZir case_scope = makeSubBlock(gz, scope); + + // Note: upstream regular switchExpr (AstGen.zig:7625) does NOT emit + // save_err_ret_index. Only switchExprErrUnion (AstGen.zig:7524) does. + + // Use fullBodyExpr to process body inline (AstGen.zig:8009). + uint32_t result + = fullBodyExpr(&case_scope, &case_scope.base, break_rl, body_node); + if (!refIsNoReturn(gz, result)) { + addBreak(&case_scope, ZIR_INST_BREAK, switch_inst, result, + (int32_t)body_node - (int32_t)gz->decl_node_index); + } + uint32_t body_len = gzInstructionsLen(&case_scope); + const uint32_t* body = gzInstructionsSlice(&case_scope); + + pay[prong_info_slot] = body_len & 0x0FFFFFFFu; + + if (pay_len + body_len > pay_cap) { + while (pay_len + body_len > pay_cap) + pay_cap *= 2; + uint32_t* p = realloc(pay, pay_cap * sizeof(uint32_t)); + if (!p) + abort(); + pay = p; + } + for (uint32_t i = 0; i < body_len; i++) + pay[pay_len++] = body[i]; + gzUnstack(&case_scope); + } + + // --- Serialize to extra in payload order (AstGen.zig:8036-8110) --- + ensureExtraCapacity(ag, + 2 + (uint32_t)(multi_cases_len > 0 ? 1 : 0) + pay_len - table_size); + uint32_t payload_index = ag->extra_len; + + ag->extra[ag->extra_len++] = cond_ref; + + uint32_t bits = 0; + if (multi_cases_len > 0) + bits |= 1u; + if (has_else) + bits |= (1u << 1); + bits |= (scalar_cases_len & 0x1FFFFFFu) << 7; + ag->extra[ag->extra_len++] = bits; + + if (multi_cases_len > 0) + ag->extra[ag->extra_len++] = multi_cases_len; + + // Else prong. + if (has_else) { + uint32_t si = pay[else_tbl]; + uint32_t bl = pay[si] & 0x0FFFFFFFu; + for (uint32_t i = 0; i < 1 + bl; i++) + ag->extra[ag->extra_len++] = pay[si + i]; + } + // Scalar cases. + for (uint32_t i = 0; i < scalar_cases_len; i++) { + uint32_t si = pay[scalar_tbl + i]; + uint32_t bl = pay[si + 1] & 0x0FFFFFFFu; + for (uint32_t j = 0; j < 2 + bl; j++) + ag->extra[ag->extra_len++] = pay[si + j]; + } + // Multi cases. + for (uint32_t i = 0; i < multi_cases_len; i++) { + uint32_t si = pay[multi_tbl + i]; + uint32_t ni = pay[si]; + uint32_t nr = pay[si + 1]; + uint32_t bl = pay[si + 2] & 0x0FFFFFFFu; + uint32_t total = 3 + ni + nr * 2 + bl; + for (uint32_t j = 0; j < total; j++) + ag->extra[ag->extra_len++] = pay[si + j]; + } + + free(pay); + + ag->inst_datas[switch_inst].pl_node.payload_index = payload_index; + gzAppendInstruction(gz, switch_inst); + + // AstGen.zig:8112-8115. + bool need_result_rvalue = (break_rl.tag != rl.tag); + if (need_result_rvalue) + return rvalue(gz, rl, switch_inst + ZIR_REF_START_INDEX, node); + return switch_inst + ZIR_REF_START_INDEX; +} + +// --- rvalue (AstGen.zig:11029) --- +// Simplified: handles .none and .discard result locations. + +static uint32_t rvalueDiscard(GenZir* gz, uint32_t result, uint32_t src_node) { + // .discard => emit ensure_result_non_error, return .void_value + // (AstGen.zig:11071-11074) + ZirInstData data; + data.un_node.src_node = (int32_t)src_node - (int32_t)gz->decl_node_index; + data.un_node.operand = result; + addInstruction(gz, ZIR_INST_ENSURE_RESULT_NON_ERROR, data); + return ZIR_REF_VOID_VALUE; +} + +// --- emitDbgNode / emitDbgStmt (AstGen.zig:3422, 13713) --- + +static void emitDbgStmt(GenZir* gz, uint32_t line, uint32_t column) { + if (gz->is_comptime) + return; + // Check if last instruction is already dbg_stmt; if so, update it. + // (AstGen.zig:13715-13724) + AstGenCtx* ag = gz->astgen; + uint32_t gz_len = gzInstructionsLen(gz); + if (gz_len > 0) { + uint32_t last = gzInstructionsSlice(gz)[gz_len - 1]; + if (ag->inst_tags[last] == ZIR_INST_DBG_STMT) { + ag->inst_datas[last].dbg_stmt.line = line; + ag->inst_datas[last].dbg_stmt.column = column; + return; + } + } + ZirInstData data; + data.dbg_stmt.line = line; + data.dbg_stmt.column = column; + addInstruction(gz, ZIR_INST_DBG_STMT, data); +} + +// Mirrors emitDbgStmtForceCurrentIndex (AstGen.zig:13739-13760). +static void emitDbgStmtForceCurrentIndex( + GenZir* gz, uint32_t line, uint32_t column) { + AstGenCtx* ag = gz->astgen; + uint32_t gz_len = gzInstructionsLen(gz); + if (gz_len > 0 + && gzInstructionsSlice(gz)[gz_len - 1] == ag->inst_len - 1) { + uint32_t last = ag->inst_len - 1; + if (ag->inst_tags[last] == ZIR_INST_DBG_STMT) { + ag->inst_datas[last].dbg_stmt.line = line; + ag->inst_datas[last].dbg_stmt.column = column; + return; + } + } + ZirInstData data; + data.dbg_stmt.line = line; + data.dbg_stmt.column = column; + addInstruction(gz, ZIR_INST_DBG_STMT, data); +} + +static void emitDbgNode(GenZir* gz, uint32_t node) { + if (gz->is_comptime) + return; + AstGenCtx* ag = gz->astgen; + advanceSourceCursorToNode(ag, node); + uint32_t line = ag->source_line - gz->decl_line; + uint32_t column = ag->source_column; + emitDbgStmt(gz, line, column); +} + +// --- assign (AstGen.zig:3434) --- +// Handles `_ = expr` discard pattern. + +static void assignStmt(GenZir* gz, Scope* scope, uint32_t infix_node) { + emitDbgNode(gz, infix_node); + const AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + AstData nd = tree->nodes.datas[infix_node]; + uint32_t lhs = nd.lhs; + uint32_t rhs = nd.rhs; + + // Check if LHS is `_` identifier for discard (AstGen.zig:3440-3446). + if (tree->nodes.tags[lhs] == AST_NODE_IDENTIFIER) { + uint32_t ident_tok = tree->nodes.main_tokens[lhs]; + uint32_t tok_start = tree->tokens.starts[ident_tok]; + if (tree->source[tok_start] == '_' + && (tok_start + 1 >= tree->source_len + || !((tree->source[tok_start + 1] >= 'a' + && tree->source[tok_start + 1] <= 'z') + || (tree->source[tok_start + 1] >= 'A' + && tree->source[tok_start + 1] <= 'Z') + || tree->source[tok_start + 1] == '_' + || (tree->source[tok_start + 1] >= '0' + && tree->source[tok_start + 1] <= '9')))) { + // Discard: evaluate RHS with .discard result location. + uint32_t result = expr(gz, scope, rhs); + rvalueDiscard(gz, result, rhs); + return; + } + } + + // Non-discard assignment: evaluate LHS as lvalue, pass ptr rl to RHS. + // (AstGen.zig:3448-3452). + { + uint32_t lhs_ptr = exprRl(gz, scope, RL_REF_VAL, lhs); + ResultLoc ptr_rl + = { .tag = RL_PTR, .data = lhs_ptr, .src_node = infix_node }; + (void)exprRl(gz, scope, ptr_rl, rhs); + } +} + +// --- assignOp (AstGen.zig:3731) --- +// Handles compound assignment operators (+=, -=, *=, etc.). + +static void assignOp( + GenZir* gz, Scope* scope, uint32_t infix_node, ZirInstTag op_tag) { + emitDbgNode(gz, infix_node); + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + + AstData nd = tree->nodes.datas[infix_node]; + uint32_t lhs_node = nd.lhs; + uint32_t rhs_node = nd.rhs; + + // Evaluate LHS as lvalue pointer (AstGen.zig:3742). + uint32_t lhs_ptr = exprRl(gz, scope, RL_REF_VAL, lhs_node); + + // Advance cursor for add/sub/mul/div/mod_rem (AstGen.zig:3744-3747). + uint32_t cursor_line = 0, cursor_col = 0; + bool need_dbg = false; + if (op_tag == ZIR_INST_ADD || op_tag == ZIR_INST_SUB + || op_tag == ZIR_INST_MUL || op_tag == ZIR_INST_DIV + || op_tag == ZIR_INST_MOD_REM) { + if (!gz->is_comptime) { + advanceSourceCursorToMainToken(ag, gz, infix_node); + } + cursor_line = ag->source_line - gz->decl_line; + cursor_col = ag->source_column; + need_dbg = true; + } + + // Load current value (AstGen.zig:3748). + uint32_t lhs = addUnNode(gz, ZIR_INST_LOAD, lhs_ptr, infix_node); + + // Determine RHS result type (AstGen.zig:3750-3766). + uint32_t rhs_res_ty; + if (op_tag == ZIR_INST_ADD || op_tag == ZIR_INST_SUB) { + // Emit inplace_arith_result_ty extended instruction. + uint16_t inplace_op + = (op_tag == ZIR_INST_ADD) ? 0 : 1; // add_eq=0, sub_eq=1 + ZirInstData ext_data; + memset(&ext_data, 0, sizeof(ext_data)); + ext_data.extended.opcode = (uint16_t)ZIR_EXT_INPLACE_ARITH_RESULT_TY; + ext_data.extended.small = inplace_op; + ext_data.extended.operand = lhs; + rhs_res_ty = addInstruction(gz, ZIR_INST_EXTENDED, ext_data); + } else { + rhs_res_ty = addUnNode(gz, ZIR_INST_TYPEOF, lhs, infix_node); + } + + // Evaluate RHS with type coercion (AstGen.zig:3768). + uint32_t rhs_raw = expr(gz, scope, rhs_node); + uint32_t rhs + = addPlNodeBin(gz, ZIR_INST_AS_NODE, rhs_node, rhs_res_ty, rhs_raw); + + // Emit debug statement for arithmetic ops (AstGen.zig:3770-3775). + if (need_dbg) { + emitDbgStmt(gz, cursor_line, cursor_col); + } + + // Emit the operation (AstGen.zig:3776-3779). + uint32_t result = addPlNodeBin(gz, op_tag, infix_node, lhs, rhs); + + // Store result back (AstGen.zig:3780-3783). + addPlNodeBin(gz, ZIR_INST_STORE_NODE, infix_node, lhs_ptr, result); +} + +// --- builtinEvalToError (BuiltinFn.zig) --- +// Returns per-builtin eval_to_error. Default is .never; only a few are +// .maybe or .always. Mirrors BuiltinFn.list lookup in AstGen.zig:10539. +static int builtinEvalToError(const Ast* tree, uint32_t node) { + uint32_t main_tok = tree->nodes.main_tokens[node]; + uint32_t tok_start = tree->tokens.starts[main_tok]; + const char* source = tree->source; + uint32_t name_start = tok_start + 1; // skip '@' + uint32_t name_end = name_start; + while (name_end < tree->source_len + && ((source[name_end] >= 'a' && source[name_end] <= 'z') + || (source[name_end] >= 'A' && source[name_end] <= 'Z') + || source[name_end] == '_')) { + name_end++; + } + uint32_t name_len = name_end - name_start; + const char* name = source + name_start; + // clang-format off + // .always: + if (name_len == 12 && memcmp(name, "errorFromInt", 12) == 0) + return 1; // EVAL_TO_ERROR_ALWAYS + // .maybe: + if (name_len == 2 && memcmp(name, "as", 2) == 0) return 2; + if (name_len == 4 && memcmp(name, "call", 4) == 0) return 2; + if (name_len == 5 && memcmp(name, "field", 5) == 0) return 2; + if (name_len == 9 && memcmp(name, "errorCast", 9) == 0) return 2; + // clang-format on + // Default: .never + return 0; +} + +// --- nodeMayEvalToError (AstGen.zig:10340) --- +// Three-way result: 0=never, 1=always, 2=maybe. +#define EVAL_TO_ERROR_NEVER 0 +#define EVAL_TO_ERROR_ALWAYS 1 +#define EVAL_TO_ERROR_MAYBE 2 + +static int nodeMayEvalToError(const Ast* tree, uint32_t node) { + uint32_t n = node; + while (true) { + AstNodeTag tag = tree->nodes.tags[n]; + switch (tag) { + case AST_NODE_ERROR_VALUE: + return EVAL_TO_ERROR_ALWAYS; + // These may evaluate to errors. + case AST_NODE_IDENTIFIER: + case AST_NODE_FIELD_ACCESS: + case AST_NODE_DEREF: + case AST_NODE_ARRAY_ACCESS: + case AST_NODE_WHILE_SIMPLE: + case AST_NODE_WHILE_CONT: + case AST_NODE_WHILE: + case AST_NODE_FOR_SIMPLE: + case AST_NODE_FOR: + case AST_NODE_IF_SIMPLE: + case AST_NODE_IF: + case AST_NODE_SWITCH: + case AST_NODE_SWITCH_COMMA: + case AST_NODE_CALL_ONE: + case AST_NODE_CALL_ONE_COMMA: + case AST_NODE_CALL: + case AST_NODE_CALL_COMMA: + case AST_NODE_ASM_SIMPLE: + case AST_NODE_ASM_LEGACY: + case AST_NODE_ASM: + case AST_NODE_CATCH: + case AST_NODE_ORELSE: + return EVAL_TO_ERROR_MAYBE; + // Forward to sub-expression. + case AST_NODE_TRY: + case AST_NODE_COMPTIME: + case AST_NODE_NOSUSPEND: + n = tree->nodes.datas[n].lhs; + continue; + case AST_NODE_GROUPED_EXPRESSION: + case AST_NODE_UNWRAP_OPTIONAL: + n = tree->nodes.datas[n].lhs; + continue; + // Labeled blocks may need a memory location. + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: { + uint32_t lbrace = tree->nodes.main_tokens[n]; + if (lbrace > 0 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON) + return EVAL_TO_ERROR_MAYBE; + return EVAL_TO_ERROR_NEVER; + } + // Builtins: look up per-builtin eval_to_error + // (AstGen.zig:10530-10541). + case AST_NODE_BUILTIN_CALL: + case AST_NODE_BUILTIN_CALL_COMMA: + case AST_NODE_BUILTIN_CALL_TWO: + case AST_NODE_BUILTIN_CALL_TWO_COMMA: + return builtinEvalToError(tree, n); + // Everything else: .never + default: + return EVAL_TO_ERROR_NEVER; + } + } +} + +// --- nodeMayAppendToErrorTrace (AstGen.zig:10315) --- +// Returns true if the expression may append to the error return trace. +static bool nodeMayAppendToErrorTrace(const Ast* tree, uint32_t node) { + uint32_t n = node; + while (true) { + AstNodeTag tag = tree->nodes.tags[n]; + switch (tag) { + // These don't call runtime functions. + case AST_NODE_ERROR_VALUE: + case AST_NODE_IDENTIFIER: + case AST_NODE_COMPTIME: + return false; + // Forward to sub-expression. + case AST_NODE_TRY: + case AST_NODE_NOSUSPEND: + n = tree->nodes.datas[n].lhs; + continue; + case AST_NODE_GROUPED_EXPRESSION: + case AST_NODE_UNWRAP_OPTIONAL: + n = tree->nodes.datas[n].lhs; + continue; + // Anything else: check if it may eval to error. + default: + return nodeMayEvalToError(tree, n) != EVAL_TO_ERROR_NEVER; + } + } +} + +// --- addSaveErrRetIndex (AstGen.zig:12556) --- +// Emits SAVE_ERR_RET_INDEX instruction. +// operand is the init inst ref (or ZIR_REF_NONE for .always). +static void addSaveErrRetIndex(GenZir* gz, uint32_t operand) { + ZirInstData data; + data.save_err_ret_index.operand = operand; + data.save_err_ret_index._pad = 0; + addInstruction(gz, ZIR_INST_SAVE_ERR_RET_INDEX, data); +} + +// --- addRestoreErrRetIndexBlock (AstGen.zig:12607-12614) --- +// Emits extended RESTORE_ERR_RET_INDEX with block target (if_non_error +// condition). Payload: src_node, block_ref, operand. +static void addRestoreErrRetIndexBlock( + GenZir* gz, uint32_t block_inst, uint32_t operand, uint32_t node) { + AstGenCtx* ag = gz->astgen; + ensureExtraCapacity(ag, 3); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] + = (uint32_t)((int32_t)node - (int32_t)gz->decl_node_index); + ag->extra[ag->extra_len++] = block_inst + ZIR_REF_START_INDEX; + ag->extra[ag->extra_len++] = operand; + + ZirInstData ext_data; + ext_data.extended.opcode = (uint16_t)ZIR_EXT_RESTORE_ERR_RET_INDEX; + ext_data.extended.small = 0; + ext_data.extended.operand = payload_index; + addInstruction(gz, ZIR_INST_EXTENDED, ext_data); +} + +// --- restoreErrRetIndex (AstGen.zig:2121-2148) --- +// Emits restore_err_ret_index for block target based on nodeMayEvalToError. +static void restoreErrRetIndex(GenZir* gz, uint32_t block_inst, ResultLoc rl, + uint32_t node, uint32_t result) { + const Ast* tree = gz->astgen->tree; + int eval = nodeMayEvalToError(tree, node); + if (eval == EVAL_TO_ERROR_ALWAYS) + return; // never restore/pop + uint32_t op; + if (eval == EVAL_TO_ERROR_NEVER) { + op = ZIR_REF_NONE; // always restore/pop + } else { + // EVAL_TO_ERROR_MAYBE + // Simplified: without ri.ctx, treat non-ptr RL as result + // (AstGen.zig:2131-2144). + if (rl.tag == RL_PTR) { + op = addUnNode(gz, ZIR_INST_LOAD, rl.data, node); + } else if (rl.tag == RL_INFERRED_PTR) { + op = ZIR_REF_NONE; + } else { + op = result; + } + } + addRestoreErrRetIndexBlock(gz, block_inst, op, node); +} + +// --- varDecl (AstGen.zig:3189) --- +// Handles local const/var declarations. Returns new scope with the variable. +// scope_out: set to new scope if variable is added; unchanged otherwise. + +static void varDecl(GenZir* gz, Scope* scope, uint32_t node, + ScopeLocalVal* val_out, ScopeLocalPtr* ptr_out, Scope** scope_out) { + AstGenCtx* ag = gz->astgen; + emitDbgNode(gz, node); // AstGen.zig:3196 + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + AstNodeTag tag = tree->nodes.tags[node]; + + uint32_t mut_token = tree->nodes.main_tokens[node]; + uint32_t name_token = mut_token + 1; + bool is_const = (tree->source[tree->tokens.starts[mut_token]] == 'c'); + + uint32_t ident_name = identAsString(ag, name_token); + + // Extract type_node and init_node based on variant. + uint32_t type_node = 0; + uint32_t init_node = 0; + + if (tag == AST_NODE_SIMPLE_VAR_DECL) { + // lhs = type (optional), rhs = init (optional). + type_node = nd.lhs; + init_node = nd.rhs; + } else if (tag == AST_NODE_LOCAL_VAR_DECL) { + // lhs = extra_data index, rhs = init. + // extra: {type_node, align_node, addrspace_node, section_node} + // Simplified: just extract type_node. + uint32_t extra_idx = nd.lhs; + type_node = tree->extra_data.arr[extra_idx]; // type_node + init_node = nd.rhs; + } else if (tag == AST_NODE_ALIGNED_VAR_DECL) { + // lhs = align expr, rhs = init. + // No type node in this variant. + init_node = nd.rhs; + } else { + // global_var_decl or unknown — bail. + SET_ERROR(ag); + return; + } + + if (init_node == 0) { + // Variables must be initialized (AstGen.zig:3228). + SET_ERROR(ag); + return; + } + + if (is_const) { + // --- CONST path (AstGen.zig:3232-3340) --- + if (!nodesNeedRlContains(ag, node)) { + // Rvalue path (AstGen.zig:3246-3271). + // Evaluate type annotation and build result_info + // (AstGen.zig:3247-3250). + ResultLoc result_info; + if (type_node != 0) { + uint32_t type_ref = typeExpr(gz, scope, type_node); + result_info = (ResultLoc) { .tag = RL_TY, + .data = type_ref, + .src_node = 0, + .ctx = RI_CTX_CONST_INIT }; + } else { + result_info = (ResultLoc) { .tag = RL_NONE, + .data = 0, + .src_node = 0, + .ctx = RI_CTX_CONST_INIT }; + } + + // Evaluate init expression (AstGen.zig:3251-3252). + uint32_t init_ref = exprRl(gz, scope, result_info, init_node); + + if (ag->has_compile_errors) + return; + + // validate_const (AstGen.zig:3266). + addUnNode(gz, ZIR_INST_VALIDATE_CONST, init_ref, init_node); + + // dbg_var_val (AstGen.zig:3269). + addDbgVar(gz, ZIR_INST_DBG_VAR_VAL, ident_name, init_ref); + + // save_err_ret_index (AstGen.zig:3259-3260). + if (nodeMayAppendToErrorTrace(tree, init_node)) + addSaveErrRetIndex(gz, init_ref); + + // Create ScopeLocalVal (AstGen.zig:3276-3284). + val_out->base.tag = SCOPE_LOCAL_VAL; + val_out->parent = *scope_out; + val_out->gen_zir = gz; + val_out->inst = init_ref; + val_out->token_src = name_token; + val_out->name = ident_name; + *scope_out = &val_out->base; + } else { + // Alloc path (AstGen.zig:3277-3340). + // The init expression needs a result pointer (nodes_need_rl). + bool is_comptime_init = gz->is_comptime + || tree->nodes.tags[init_node] == AST_NODE_COMPTIME; + + uint32_t var_ptr; + bool resolve_inferred; + + if (type_node != 0) { + // Typed const: alloc (AstGen.zig:3280). + uint32_t type_ref = typeExpr(gz, scope, type_node); + var_ptr = addUnNode(gz, ZIR_INST_ALLOC, type_ref, node); + resolve_inferred = false; + } else { + // Inferred type: alloc_inferred (AstGen.zig:3291-3296). + ZirInstTag alloc_tag = is_comptime_init + ? ZIR_INST_ALLOC_INFERRED_COMPTIME + : ZIR_INST_ALLOC_INFERRED; + ZirInstData adata; + adata.node = (int32_t)node - (int32_t)gz->decl_node_index; + var_ptr = addInstruction(gz, alloc_tag, adata); + resolve_inferred = true; + } + + // Evaluate init with RL pointing to alloc (AstGen.zig:3313-3316). + ResultLoc init_rl; + if (type_node != 0) { + init_rl.tag = RL_PTR; + init_rl.data = var_ptr; + init_rl.src_node = 0; // upstream: .none (PtrResultLoc.src_node + // defaults to null) + } else { + init_rl.tag = RL_INFERRED_PTR; + init_rl.data = var_ptr; + init_rl.src_node = 0; + } + init_rl.ctx = RI_CTX_CONST_INIT; + uint32_t init_ref = exprRl(gz, scope, init_rl, init_node); + + if (ag->has_compile_errors) + return; + + // save_err_ret_index (AstGen.zig:3320-3321). + if (nodeMayAppendToErrorTrace(tree, init_node)) + addSaveErrRetIndex(gz, init_ref); + + // resolve_inferred_alloc or make_ptr_const (AstGen.zig:3323-3326). + uint32_t const_ptr; + if (resolve_inferred) + const_ptr = addUnNode( + gz, ZIR_INST_RESOLVE_INFERRED_ALLOC, var_ptr, node); + else + const_ptr + = addUnNode(gz, ZIR_INST_MAKE_PTR_CONST, var_ptr, node); + + // dbg_var_ptr (AstGen.zig:3328). + addDbgVar(gz, ZIR_INST_DBG_VAR_PTR, ident_name, const_ptr); + + // Create ScopeLocalPtr (AstGen.zig:3330-3340). + ptr_out->base.tag = SCOPE_LOCAL_PTR; + ptr_out->parent = *scope_out; + ptr_out->gen_zir = gz; + ptr_out->ptr = const_ptr; + ptr_out->token_src = name_token; + ptr_out->name = ident_name; + ptr_out->maybe_comptime = true; + *scope_out = &ptr_out->base; + } + } else { + // --- VAR path (AstGen.zig:3342-3416) --- + + uint32_t alloc_ref; + bool resolve_inferred = false; + + if (type_node != 0) { + // Typed var: alloc_mut (AstGen.zig:3361-3375). + uint32_t type_ref = typeExpr(gz, scope, type_node); + ZirInstTag alloc_tag = gz->is_comptime + ? ZIR_INST_ALLOC_COMPTIME_MUT + : ZIR_INST_ALLOC_MUT; + alloc_ref = addUnNode(gz, alloc_tag, type_ref, node); + } else { + // Inferred type var: alloc_inferred_mut + // (AstGen.zig:3384-3392). + ZirInstTag alloc_tag = gz->is_comptime + ? ZIR_INST_ALLOC_INFERRED_COMPTIME_MUT + : ZIR_INST_ALLOC_INFERRED_MUT; + ZirInstData adata; + adata.node = (int32_t)node - (int32_t)gz->decl_node_index; + alloc_ref = addInstruction(gz, alloc_tag, adata); + resolve_inferred = true; + } + + // Evaluate init with RL pointing to alloc (AstGen.zig:3395-3402). + ResultLoc var_init_rl; + if (type_node != 0) { + var_init_rl.tag = RL_PTR; + var_init_rl.data = alloc_ref; + var_init_rl.src_node = 0; // upstream: .none (PtrResultLoc.src_node + // defaults to null) + } else { + var_init_rl.tag = RL_INFERRED_PTR; + var_init_rl.data = alloc_ref; + var_init_rl.src_node = 0; + } + var_init_rl.ctx = RI_CTX_NONE; + uint32_t init_ref = exprRl(gz, scope, var_init_rl, init_node); + (void)init_ref; + + if (ag->has_compile_errors) + return; + + // resolve_inferred_alloc if type was inferred + // (AstGen.zig:3407-3408). + uint32_t final_ptr = alloc_ref; + if (resolve_inferred) + final_ptr = addUnNode( + gz, ZIR_INST_RESOLVE_INFERRED_ALLOC, alloc_ref, node); + + // dbg_var_ptr (AstGen.zig:3411). + addDbgVar(gz, ZIR_INST_DBG_VAR_PTR, ident_name, final_ptr); + + // Create ScopeLocalPtr (AstGen.zig:3413-3422). + ptr_out->base.tag = SCOPE_LOCAL_PTR; + ptr_out->parent = *scope_out; + ptr_out->gen_zir = gz; + ptr_out->ptr = final_ptr; + ptr_out->token_src = name_token; + ptr_out->name = ident_name; + ptr_out->maybe_comptime = gz->is_comptime; + *scope_out = &ptr_out->base; + } +} + +// --- addEnsureResult (AstGen.zig:2649) --- +// After evaluating an expression as a statement, optionally emits +// ensure_result_used. For call/field_call, sets flag in extra data instead. +// Returns true if the result is noreturn (AstGen.zig:2909). +static bool addEnsureResult( + GenZir* gz, uint32_t maybe_unused_result, uint32_t statement) { + AstGenCtx* ag = gz->astgen; + bool elide_check; + bool is_noreturn = false; + if (maybe_unused_result >= ZIR_REF_START_INDEX) { + uint32_t inst = maybe_unused_result - ZIR_REF_START_INDEX; + ZirInstTag tag = ag->inst_tags[inst]; + switch (tag) { + // For call/field_call/builtin_call: set ensure_result_used flag + // (bit 3 of flags at offset 0). Flags *must* be at offset 0 in all + // three structs (AstGen.zig:2658-2665, Zir.zig:3022). + case ZIR_INST_CALL: + case ZIR_INST_FIELD_CALL: + case ZIR_INST_BUILTIN_CALL: { + uint32_t pi = ag->inst_datas[inst].pl_node.payload_index; + ag->extra[pi] |= (1u << 3); // ensure_result_used + elide_check = true; + break; + } + // Always noreturn → elide (AstGen.zig:2909). + case ZIR_INST_BREAK: + case ZIR_INST_BREAK_INLINE: + case ZIR_INST_CONDBR: + case ZIR_INST_CONDBR_INLINE: + case ZIR_INST_RET_NODE: + case ZIR_INST_RET_LOAD: + case ZIR_INST_RET_IMPLICIT: + case ZIR_INST_RET_ERR_VALUE: + case ZIR_INST_UNREACHABLE: + case ZIR_INST_REPEAT: + case ZIR_INST_REPEAT_INLINE: + case ZIR_INST_PANIC: + case ZIR_INST_TRAP: + case ZIR_INST_CHECK_COMPTIME_CONTROL_FLOW: + case ZIR_INST_SWITCH_CONTINUE: + is_noreturn = true; + elide_check = true; + break; + // Always void → elide. + case ZIR_INST_DBG_STMT: + case ZIR_INST_DBG_VAR_PTR: + case ZIR_INST_DBG_VAR_VAL: + case ZIR_INST_ENSURE_RESULT_USED: + case ZIR_INST_ENSURE_RESULT_NON_ERROR: + case ZIR_INST_ENSURE_ERR_UNION_PAYLOAD_VOID: + case ZIR_INST_EXPORT: + case ZIR_INST_SET_EVAL_BRANCH_QUOTA: + case ZIR_INST_ATOMIC_STORE: + case ZIR_INST_STORE_NODE: + case ZIR_INST_STORE_TO_INFERRED_PTR: + case ZIR_INST_RESOLVE_INFERRED_ALLOC: + case ZIR_INST_SET_RUNTIME_SAFETY: + case ZIR_INST_MEMCPY: + case ZIR_INST_MEMSET: + case ZIR_INST_MEMMOVE: + case ZIR_INST_VALIDATE_DEREF: + case ZIR_INST_VALIDATE_DESTRUCTURE: + case ZIR_INST_SAVE_ERR_RET_INDEX: + case ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL: + case ZIR_INST_RESTORE_ERR_RET_INDEX_FN_ENTRY: + case ZIR_INST_VALIDATE_STRUCT_INIT_TY: + case ZIR_INST_VALIDATE_STRUCT_INIT_RESULT_TY: + case ZIR_INST_VALIDATE_PTR_STRUCT_INIT: + case ZIR_INST_VALIDATE_ARRAY_INIT_TY: + case ZIR_INST_VALIDATE_ARRAY_INIT_RESULT_TY: + case ZIR_INST_VALIDATE_PTR_ARRAY_INIT: + case ZIR_INST_VALIDATE_REF_TY: + case ZIR_INST_VALIDATE_CONST: + elide_check = true; + break; + // Extended: check opcode. + case ZIR_INST_EXTENDED: { + uint32_t opcode = ag->inst_datas[inst].extended.opcode; + elide_check = (opcode == ZIR_EXT_BREAKPOINT + || opcode == ZIR_EXT_BRANCH_HINT + || opcode == ZIR_EXT_SET_FLOAT_MODE + || opcode == ZIR_EXT_DISABLE_INSTRUMENTATION + || opcode == ZIR_EXT_DISABLE_INTRINSICS); + break; + } + // Everything else: might produce non-void result → emit check. + default: + elide_check = false; + break; + } + } else { + // Named ref constant. + is_noreturn = (maybe_unused_result == ZIR_REF_UNREACHABLE_VALUE); + elide_check + = (is_noreturn || maybe_unused_result == ZIR_REF_VOID_VALUE); + } + if (!elide_check) { + addUnNode( + gz, ZIR_INST_ENSURE_RESULT_USED, maybe_unused_result, statement); + } + return is_noreturn; +} + +// --- countDefers (AstGen.zig:2966) --- +// Walk scope chain and count defer types. + +static DeferCounts countDefers(const Scope* outer_scope, Scope* inner_scope) { + DeferCounts c = { false, false, false, false }; + Scope* s = inner_scope; + while (s != outer_scope) { + switch (s->tag) { + case SCOPE_GEN_ZIR: + s = ((GenZir*)s)->parent; + break; + case SCOPE_LOCAL_VAL: + s = ((ScopeLocalVal*)s)->parent; + break; + case SCOPE_LOCAL_PTR: + s = ((ScopeLocalPtr*)s)->parent; + break; + case SCOPE_DEFER_NORMAL: { + ScopeDefer* d = (ScopeDefer*)s; + s = d->parent; + c.have_normal = true; + break; + } + case SCOPE_DEFER_ERROR: { + ScopeDefer* d = (ScopeDefer*)s; + s = d->parent; + c.have_err = true; + // need_err_code if remapped_err_code exists (we don't + // implement err capture yet, so always false). + break; + } + default: + return c; + } + } + c.have_any = c.have_normal || c.have_err; + return c; +} + +// --- genDefers (AstGen.zig:3014) --- +// Walk scope chain from inner to outer, emitting .defer instructions. +// which: DEFER_NORMAL_ONLY or DEFER_BOTH_SANS_ERR. + +static void genDefers( + GenZir* gz, const Scope* outer_scope, Scope* inner_scope, int which) { + Scope* s = inner_scope; + while (s != outer_scope) { + switch (s->tag) { + case SCOPE_GEN_ZIR: { + GenZir* g = (GenZir*)s; + s = g->parent; + break; + } + case SCOPE_LOCAL_VAL: { + ScopeLocalVal* lv = (ScopeLocalVal*)s; + s = lv->parent; + break; + } + case SCOPE_LOCAL_PTR: { + ScopeLocalPtr* lp = (ScopeLocalPtr*)s; + s = lp->parent; + break; + } + case SCOPE_DEFER_NORMAL: { + ScopeDefer* d = (ScopeDefer*)s; + s = d->parent; + // Emit ZIR_INST_DEFER (AstGen.zig:3031). + ZirInstData data; + data.defer_data.index = d->index; + data.defer_data.len = d->len; + addInstruction(gz, ZIR_INST_DEFER, data); + break; + } + case SCOPE_DEFER_ERROR: { + ScopeDefer* d = (ScopeDefer*)s; + s = d->parent; + if (which == DEFER_BOTH_SANS_ERR) { + // Emit regular DEFER for error defers too (AstGen.zig:3038). + ZirInstData data; + data.defer_data.index = d->index; + data.defer_data.len = d->len; + addInstruction(gz, ZIR_INST_DEFER, data); + } + // DEFER_NORMAL_ONLY: skip error defers (AstGen.zig:3063). + break; + } + case SCOPE_LABEL: { + // Labels store parent in the GenZir they're attached to. + // Just skip by going to the parent scope stored in parent. + // Actually labels don't have a separate parent pointer in our + // representation; they're part of GenZir. This case shouldn't + // appear when walking from blockExprStmts scope. + return; + } + case SCOPE_NAMESPACE: + case SCOPE_TOP: + default: + return; + } + } +} + +// --- blockExprStmts (AstGen.zig:2538) --- +// Processes block statements sequentially, threading scope. + +static void blockExprStmts(GenZir* gz, Scope* scope, + const uint32_t* statements, uint32_t stmt_count) { + AstGenCtx* ag = gz->astgen; + // Stack-allocated scope storage for local variables and defers. + // Max 64 local variable declarations and 64 defers per block. + ScopeLocalVal val_scopes[64]; + ScopeLocalPtr ptr_scopes[64]; + ScopeDefer defer_scopes[64]; + uint32_t val_idx = 0; + uint32_t ptr_idx = 0; + uint32_t defer_idx = 0; + Scope* cur_scope = scope; + bool noreturn_stmt = false; + + for (uint32_t i = 0; i < stmt_count; i++) { + if (ag->has_compile_errors) + return; + uint32_t stmt = statements[i]; + AstNodeTag tag = ag->tree->nodes.tags[stmt]; + switch (tag) { + case AST_NODE_ASSIGN: + assignStmt(gz, cur_scope, stmt); + break; + // Compound assignment operators (AstGen.zig:2588-2607). + case AST_NODE_ASSIGN_ADD: + assignOp(gz, cur_scope, stmt, ZIR_INST_ADD); + break; + case AST_NODE_ASSIGN_SUB: + assignOp(gz, cur_scope, stmt, ZIR_INST_SUB); + break; + case AST_NODE_ASSIGN_MUL: + assignOp(gz, cur_scope, stmt, ZIR_INST_MUL); + break; + case AST_NODE_ASSIGN_DIV: + assignOp(gz, cur_scope, stmt, ZIR_INST_DIV); + break; + case AST_NODE_ASSIGN_MOD: + assignOp(gz, cur_scope, stmt, ZIR_INST_MOD_REM); + break; + case AST_NODE_ASSIGN_BIT_AND: + assignOp(gz, cur_scope, stmt, ZIR_INST_BIT_AND); + break; + case AST_NODE_ASSIGN_BIT_OR: + assignOp(gz, cur_scope, stmt, ZIR_INST_BIT_OR); + break; + case AST_NODE_ASSIGN_BIT_XOR: + assignOp(gz, cur_scope, stmt, ZIR_INST_XOR); + break; + case AST_NODE_ASSIGN_ADD_WRAP: + assignOp(gz, cur_scope, stmt, ZIR_INST_ADDWRAP); + break; + case AST_NODE_ASSIGN_SUB_WRAP: + assignOp(gz, cur_scope, stmt, ZIR_INST_SUBWRAP); + break; + case AST_NODE_ASSIGN_MUL_WRAP: + assignOp(gz, cur_scope, stmt, ZIR_INST_MULWRAP); + break; + case AST_NODE_ASSIGN_ADD_SAT: + assignOp(gz, cur_scope, stmt, ZIR_INST_ADD_SAT); + break; + case AST_NODE_ASSIGN_SUB_SAT: + assignOp(gz, cur_scope, stmt, ZIR_INST_SUB_SAT); + break; + case AST_NODE_ASSIGN_MUL_SAT: + assignOp(gz, cur_scope, stmt, ZIR_INST_MUL_SAT); + break; + case AST_NODE_SIMPLE_VAR_DECL: + case AST_NODE_LOCAL_VAR_DECL: + case AST_NODE_ALIGNED_VAR_DECL: + if (val_idx < 64 && ptr_idx < 64) { + varDecl(gz, cur_scope, stmt, &val_scopes[val_idx], + &ptr_scopes[ptr_idx], &cur_scope); + // Check which one was used: if scope now points to + // val_scopes[val_idx], advance val_idx; same for ptr. + if (cur_scope == &val_scopes[val_idx].base) + val_idx++; + else if (cur_scope == &ptr_scopes[ptr_idx].base) + ptr_idx++; + } else { + SET_ERROR(ag); + } + break; + // defer/errdefer (AstGen.zig:2580-2581). + case AST_NODE_DEFER: + case AST_NODE_ERRDEFER: { + if (defer_idx >= 64) { + SET_ERROR(ag); + break; + } + ScopeTag scope_tag = (tag == AST_NODE_DEFER) ? SCOPE_DEFER_NORMAL + : SCOPE_DEFER_ERROR; + // Create sub-block for defer body (AstGen.zig:3123-3126). + GenZir defer_gen = makeSubBlock(gz, cur_scope); + + // Evaluate deferred expression (AstGen.zig:3165). + // DEFER: lhs is the deferred expression, rhs = 0. + // ERRDEFER: lhs is optional error capture token, rhs is expr. + AstData dnd = ag->tree->nodes.datas[stmt]; + uint32_t expr_node; + if (tag == AST_NODE_DEFER) { + expr_node = dnd.lhs; + } else { + expr_node = dnd.rhs; + } + // unusedResultExpr pattern (AstGen.zig:3165, 2641-2646). + emitDbgNode(&defer_gen, expr_node); + uint32_t defer_result + = expr(&defer_gen, &defer_gen.base, expr_node); + addEnsureResult(&defer_gen, defer_result, expr_node); + + // Add break_inline at end (AstGen.zig:3167). + addBreak(&defer_gen, ZIR_INST_BREAK_INLINE, 0, ZIR_REF_VOID_VALUE, + AST_NODE_OFFSET_NONE); + + // Write body to extra (AstGen.zig:3173-3175). + uint32_t raw_body_len = gzInstructionsLen(&defer_gen); + const uint32_t* body = gzInstructionsSlice(&defer_gen); + uint32_t extra_index = ag->extra_len; + uint32_t fixup_len + = countBodyLenAfterFixups(ag, body, raw_body_len); + ensureExtraCapacity(ag, fixup_len); + for (uint32_t b = 0; b < raw_body_len; b++) + appendPossiblyRefdBodyInst(ag, body[b]); + gzUnstack(&defer_gen); + + // Create scope (AstGen.zig:3179-3185). + defer_scopes[defer_idx] = (ScopeDefer) { + .base = { .tag = scope_tag }, + .parent = cur_scope, + .index = extra_index, + .len = fixup_len, + }; + cur_scope = &defer_scopes[defer_idx].base; + defer_idx++; + break; + } + // while/for as statements (AstGen.zig:2605-2610). + // These do NOT get emitDbgNode; they emit their own dbg_stmt. + case AST_NODE_WHILE_SIMPLE: + case AST_NODE_WHILE_CONT: + case AST_NODE_WHILE: + (void)whileExpr(gz, cur_scope, stmt, true); + break; + case AST_NODE_FOR_SIMPLE: + case AST_NODE_FOR: + (void)forExpr(gz, cur_scope, stmt, true); + break; + default: { + // Expression statement (AstGen.zig:2627 unusedResultExpr). + emitDbgNode(gz, stmt); + uint32_t result = expr(gz, cur_scope, stmt); + noreturn_stmt = addEnsureResult(gz, result, stmt); + break; + } + } + } + // Emit normal defers at block exit (AstGen.zig:2633-2634). + if (!noreturn_stmt) { + genDefers(gz, scope, cur_scope, DEFER_NORMAL_ONLY); + } +} + +// --- fullBodyExpr (AstGen.zig:2358) --- +// Processes a body expression. If it's an unlabeled block, processes +// statements inline without creating a BLOCK instruction (unlike blockExprExpr +// which wraps in BLOCK). Returns the result ref. + +static uint32_t fullBodyExpr( + GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { + const Ast* tree = gz->astgen->tree; + AstNodeTag tag = tree->nodes.tags[node]; + + // Extract block statements (AstGen.zig:2368). + AstData nd = tree->nodes.datas[node]; + uint32_t stmt_buf[2]; + const uint32_t* statements = NULL; + uint32_t stmt_count = 0; + + switch (tag) { + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: { + uint32_t idx = 0; + if (nd.lhs != 0) + stmt_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + stmt_buf[idx++] = nd.rhs; + statements = stmt_buf; + stmt_count = idx; + break; + } + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + statements = tree->extra_data.arr + start; + stmt_count = end - start; + break; + } + default: + // Not a block — treat as single expression (AstGen.zig:2369). + return exprRl(gz, scope, rl, node); + } + + // Check if labeled (AstGen.zig:2373-2377). + uint32_t lbrace = tree->nodes.main_tokens[node]; + bool is_labeled + = (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON + && tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER); + if (is_labeled) { + // Labeled blocks need a proper block instruction. + return blockExprExpr(gz, scope, rl, node); + } + + // Unlabeled block: process statements inline (AstGen.zig:2380-2383). + GenZir sub_gz = makeSubBlock(gz, scope); + blockExprStmts(&sub_gz, &sub_gz.base, statements, stmt_count); + return rvalue(gz, rl, ZIR_REF_VOID_VALUE, node); +} + +// --- lastToken (Ast.zig:874) --- +// Mechanical port of Ast.lastToken. Uses iterative end_offset accumulation. + +static uint32_t lastToken(const Ast* tree, uint32_t node) { + uint32_t n = node; + uint32_t end_offset = 0; + while (1) { + AstNodeTag tag = tree->nodes.tags[n]; + AstData nd = tree->nodes.datas[n]; + switch (tag) { + case AST_NODE_ROOT: + return tree->tokens.len - 1; + + // Binary ops: recurse into RHS (Ast.zig:893-948). + case AST_NODE_ASSIGN: + case AST_NODE_ADD: + case AST_NODE_SUB: + case AST_NODE_MUL: + case AST_NODE_DIV: + case AST_NODE_MOD: + case AST_NODE_BIT_AND: + case AST_NODE_BIT_OR: + case AST_NODE_BIT_XOR: + case AST_NODE_SHL: + case AST_NODE_SHR: + case AST_NODE_ARRAY_CAT: + case AST_NODE_ARRAY_MULT: + case AST_NODE_ADD_WRAP: + case AST_NODE_SUB_WRAP: + case AST_NODE_ADD_SAT: + case AST_NODE_SUB_SAT: + case AST_NODE_MUL_WRAP: + case AST_NODE_MUL_SAT: + case AST_NODE_MERGE_ERROR_SETS: + case AST_NODE_EQUAL_EQUAL: + case AST_NODE_BANG_EQUAL: + case AST_NODE_LESS_THAN: + case AST_NODE_GREATER_THAN: + case AST_NODE_LESS_OR_EQUAL: + case AST_NODE_GREATER_OR_EQUAL: + case AST_NODE_BOOL_AND: + case AST_NODE_BOOL_OR: + case AST_NODE_ORELSE: + case AST_NODE_CATCH: + case AST_NODE_ERROR_UNION: + case AST_NODE_SHL_SAT: + n = nd.rhs; + continue; + + // field_access: return field token + end_offset (Ast.zig:979). + case AST_NODE_FIELD_ACCESS: + return nd.rhs + end_offset; + + // test_decl: recurse into body node (Ast.zig:950). + case AST_NODE_TEST_DECL: + n = nd.rhs; + continue; + + // defer: recurse into body (lhs) (Ast.zig:951). + case AST_NODE_DEFER: + n = nd.lhs; + continue; + + // errdefer: recurse into body (rhs) (Ast.zig:950). + case AST_NODE_ERRDEFER: + n = nd.rhs; + continue; + + // block (Ast.zig:1085): end_offset += 1 (rbrace), recurse into last. + case AST_NODE_BLOCK: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + assert(start != end); + end_offset += 1; + n = tree->extra_data.arr[end - 1]; + continue; + } + + // block_semicolon (Ast.zig:1097): += 2 (semicolon + rbrace). + case AST_NODE_BLOCK_SEMICOLON: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + assert(start != end); + end_offset += 2; + n = tree->extra_data.arr[end - 1]; + continue; + } + + // block_two (Ast.zig:1117): if rhs, recurse rhs +1; if lhs, +1; else + // +1. Note: C parser uses 0 for "none" (OptionalIndex), not + // UINT32_MAX. + case AST_NODE_BLOCK_TWO: { + if (nd.rhs != 0) { + end_offset += 1; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 1; + n = nd.lhs; + } else { + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + // block_two_semicolon (Ast.zig:1153). + case AST_NODE_BLOCK_TWO_SEMICOLON: { + if (nd.rhs != 0) { + end_offset += 2; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 2; + n = nd.lhs; + } else { + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + // builtin_call_two (Ast.zig:1118): recurse into args + rparen. + case AST_NODE_BUILTIN_CALL_TWO: { + if (nd.rhs != 0) { + end_offset += 1; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 1; + n = nd.lhs; + } else { + end_offset += 2; // lparen + rparen + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + case AST_NODE_BUILTIN_CALL_TWO_COMMA: { + if (nd.rhs != 0) { + end_offset += 2; // comma + rparen + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 2; + n = nd.lhs; + } else { + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + } + + // Unary ops: recurse into lhs (Ast.zig:895-910). + case AST_NODE_BOOL_NOT: + case AST_NODE_BIT_NOT: + case AST_NODE_NEGATION: + case AST_NODE_NEGATION_WRAP: + case AST_NODE_ADDRESS_OF: + case AST_NODE_TRY: + case AST_NODE_AWAIT: + case AST_NODE_OPTIONAL_TYPE: + case AST_NODE_COMPTIME: + case AST_NODE_NOSUSPEND: + case AST_NODE_RESUME: + n = nd.lhs; + continue; + + // return: optional operand (Ast.zig:998-1002). + case AST_NODE_RETURN: + if (nd.lhs != 0) { + n = nd.lhs; + continue; + } + return tree->nodes.main_tokens[n] + end_offset; + + // deref: main_token is the dot, +1 for '*' (Ast.zig:974). + case AST_NODE_DEREF: + return tree->nodes.main_tokens[n] + 1 + end_offset; + + // unwrap_optional: +1 for '?' (Ast.zig:971). + case AST_NODE_UNWRAP_OPTIONAL: + return tree->nodes.main_tokens[n] + 1 + end_offset; + + // for_range: recurse into rhs if present, else lhs. + case AST_NODE_FOR_RANGE: + if (nd.rhs != 0) { + n = nd.rhs; + } else { + // Unbounded range: last token is the '..' operator. + // main_token + 1 (the second dot of ..) + return tree->nodes.main_tokens[n] + 1 + end_offset; + } + continue; + + // error_value: main_token is `error`, last token is name (+2) + // (Ast.zig:986). + case AST_NODE_ERROR_VALUE: + return tree->nodes.main_tokens[n] + 2 + end_offset; + + // Terminals: return main_token + end_offset (Ast.zig:988-996). + case AST_NODE_NUMBER_LITERAL: + case AST_NODE_STRING_LITERAL: + case AST_NODE_IDENTIFIER: + case AST_NODE_ENUM_LITERAL: + case AST_NODE_CHAR_LITERAL: + case AST_NODE_UNREACHABLE_LITERAL: + case AST_NODE_ANYFRAME_LITERAL: + return tree->nodes.main_tokens[n] + end_offset; + + // call_one: recurse into lhs, +1 for ')'. + case AST_NODE_CALL_ONE: + end_offset += 1; // rparen + if (nd.rhs != 0) { + n = nd.rhs; + } else { + n = nd.lhs; + } + continue; + case AST_NODE_CALL_ONE_COMMA: + end_offset += 2; // comma + rparen + if (nd.rhs != 0) { + n = nd.rhs; + } else { + n = nd.lhs; + } + continue; + + // array_access: end_offset += 1 (rbracket), recurse rhs. + case AST_NODE_ARRAY_ACCESS: + end_offset += 1; + n = nd.rhs; + continue; + + // simple_var_decl: recurse into init/type (Ast.zig:1169-1178). + case AST_NODE_SIMPLE_VAR_DECL: + if (nd.rhs != 0) { + n = nd.rhs; // init expr + } else if (nd.lhs != 0) { + n = nd.lhs; // type expr + } else { + end_offset += 1; // from mut token to name + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + + // aligned_var_decl: recurse into init/align (Ast.zig:1180-1187). + case AST_NODE_ALIGNED_VAR_DECL: + if (nd.rhs != 0) { + n = nd.rhs; // init expr + } else { + end_offset += 1; // rparen + n = nd.lhs; // align expr + } + continue; + + // local_var_decl (Ast.zig:1209-1217). + case AST_NODE_LOCAL_VAR_DECL: + if (nd.rhs != 0) { + n = nd.rhs; // init expr + } else { + // extra[lhs] has align_node + end_offset += 1; // rparen + n = tree->extra_data.arr[nd.lhs]; // align_node + } + continue; + + // global_var_decl (Ast.zig:1189-1207). + case AST_NODE_GLOBAL_VAR_DECL: + if (nd.rhs != 0) { + n = nd.rhs; // init expr + } else { + // extra[lhs] = {type_node, align_node, ...} + // complex; approximate by using main_token + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + + // slice_open: end_offset += 2 (ellipsis2 + rbracket), recurse rhs + // (Ast.zig:1245-1248). + case AST_NODE_SLICE_OPEN: + end_offset += 2; + n = nd.rhs; + continue; + + // grouped_expression: end_offset += 1 (rparen), recurse lhs. + case AST_NODE_GROUPED_EXPRESSION: + end_offset += 1; + n = nd.lhs; + continue; + + // if_simple: recurse into body (rhs) (Ast.zig:942). + case AST_NODE_IF_SIMPLE: + case AST_NODE_WHILE_SIMPLE: + case AST_NODE_FOR_SIMPLE: + case AST_NODE_FN_DECL: + case AST_NODE_ARRAY_TYPE: + n = nd.rhs; + continue; + + // if: recurse into else_expr (Ast.zig:1295). + case AST_NODE_IF: { + // If[rhs]: { then_expr, else_expr } + n = tree->extra_data.arr[nd.rhs + 1]; // else_expr + continue; + } + + // while: recurse into else_expr (Ast.zig:1290). + case AST_NODE_WHILE: { + // While[rhs]: { cont_expr, then_expr, else_expr } + n = tree->extra_data.arr[nd.rhs + 2]; // else_expr + continue; + } + + // while_cont: recurse into then_expr (Ast.zig:943-like). + case AST_NODE_WHILE_CONT: { + // WhileCont[rhs]: { cont_expr, then_expr } + n = tree->extra_data.arr[nd.rhs + 1]; // then_expr + continue; + } + + // switch: recurse into last case (Ast.zig:1031-1041). + case AST_NODE_SWITCH: { + uint32_t ei = nd.rhs; + uint32_t cs = tree->extra_data.arr[ei]; + uint32_t ce = tree->extra_data.arr[ei + 1]; + if (cs == ce) { + end_offset += 3; // rparen, lbrace, rbrace + n = nd.lhs; + } else { + end_offset += 1; // rbrace + n = tree->extra_data.arr[ce - 1]; + } + continue; + } + case AST_NODE_SWITCH_COMMA: { + uint32_t ei = nd.rhs; + uint32_t cs = tree->extra_data.arr[ei]; + uint32_t ce = tree->extra_data.arr[ei + 1]; + assert(cs != ce); + end_offset += 2; // comma + rbrace + n = tree->extra_data.arr[ce - 1]; + continue; + } + + // switch_case_one: recurse into rhs (body) (Ast.zig:942). + case AST_NODE_SWITCH_CASE_ONE: + case AST_NODE_SWITCH_CASE_INLINE_ONE: + case AST_NODE_SWITCH_CASE: + case AST_NODE_SWITCH_CASE_INLINE: + n = nd.rhs; + continue; + + // switch_range: recurse into rhs (Ast.zig: binary op pattern). + case AST_NODE_SWITCH_RANGE: + n = nd.rhs; + continue; + + // struct_init_one: recurse into field if present, +1. + case AST_NODE_STRUCT_INIT_ONE: + end_offset += 1; // rbrace + if (nd.rhs != 0) { + n = nd.rhs; + } else { + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + case AST_NODE_STRUCT_INIT_ONE_COMMA: + end_offset += 2; // comma + rbrace + n = nd.rhs; + continue; + + // struct_init_dot_two: similar to block_two. + case AST_NODE_STRUCT_INIT_DOT_TWO: + if (nd.rhs != 0) { + end_offset += 1; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 1; + n = nd.lhs; + } else { + end_offset += 1; // rbrace + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA: + end_offset += 2; + if (nd.rhs != 0) { + n = nd.rhs; + } else { + n = nd.lhs; + } + continue; + + // struct_init_dot: SubRange pattern. + case AST_NODE_STRUCT_INIT_DOT: + assert(nd.lhs != nd.rhs); + end_offset += 1; + n = tree->extra_data.arr[nd.rhs - 1]; + continue; + + // struct_init: node_and_extra SubRange pattern. + case AST_NODE_STRUCT_INIT: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 1; + n = tree->extra_data.arr[se - 1]; + continue; + } + + // call: SubRange pattern. + case AST_NODE_CALL: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 1; + n = tree->extra_data.arr[se - 1]; + continue; + } + case AST_NODE_CALL_COMMA: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 2; + n = tree->extra_data.arr[se - 1]; + continue; + } + + // fn_proto_simple: recurse into rhs (return type). + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO: + n = nd.rhs; + continue; + + // error_set_decl: rhs is the closing rbrace token. + case AST_NODE_ERROR_SET_DECL: + return nd.rhs + end_offset; + + // ptr_type variants: recurse into rhs (child type). + case AST_NODE_PTR_TYPE_ALIGNED: + case AST_NODE_PTR_TYPE_SENTINEL: + case AST_NODE_PTR_TYPE: + case AST_NODE_PTR_TYPE_BIT_RANGE: + n = nd.rhs; + continue; + + // container_decl: extra_range pattern. + case AST_NODE_CONTAINER_DECL: + case AST_NODE_TAGGED_UNION: + assert(nd.lhs != nd.rhs); + end_offset += 1; + n = tree->extra_data.arr[nd.rhs - 1]; + continue; + case AST_NODE_CONTAINER_DECL_TRAILING: + case AST_NODE_TAGGED_UNION_TRAILING: + assert(nd.lhs != nd.rhs); + end_offset += 2; + n = tree->extra_data.arr[nd.rhs - 1]; + continue; + + // container_decl_two: like block_two. + case AST_NODE_CONTAINER_DECL_TWO: + case AST_NODE_TAGGED_UNION_TWO: + if (nd.rhs != 0) { + end_offset += 1; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 1; + n = nd.lhs; + } else { + end_offset += 2; // lbrace + rbrace + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + case AST_NODE_CONTAINER_DECL_TWO_TRAILING: + case AST_NODE_TAGGED_UNION_TWO_TRAILING: + end_offset += 2; + if (nd.rhs != 0) { + n = nd.rhs; + } else { + n = nd.lhs; + } + continue; + + // container_decl_arg: node_and_extra SubRange. + case AST_NODE_CONTAINER_DECL_ARG: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + if (si == se) { + end_offset += 3; // rparen + lbrace + rbrace + n = nd.lhs; + } else { + end_offset += 1; + n = tree->extra_data.arr[se - 1]; + } + continue; + } + case AST_NODE_CONTAINER_DECL_ARG_TRAILING: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 2; + n = tree->extra_data.arr[se - 1]; + continue; + } + + // slice: extra data pattern. + case AST_NODE_SLICE: { + // Slice[rhs]: { start, end } + end_offset += 1; + n = tree->extra_data.arr[nd.rhs + 1]; // end + continue; + } + case AST_NODE_SLICE_SENTINEL: { + // SliceSentinel[rhs]: { start, end, sentinel } + end_offset += 1; + n = tree->extra_data.arr[nd.rhs + 2]; // sentinel + continue; + } + + // array_type_sentinel: extra data. + case AST_NODE_ARRAY_TYPE_SENTINEL: { + // ArrayTypeSentinel[rhs]: { sentinel, elem_type } + n = tree->extra_data.arr[nd.rhs + 1]; // elem_type + continue; + } + + // multiline_string_literal: main_token + end_offset. + case AST_NODE_MULTILINE_STRING_LITERAL: + return nd.rhs + end_offset; + + // break/continue (Ast.zig:1275-1283). + // lhs is opt_token (null_token = UINT32_MAX), rhs is opt_node (0 = + // none). + case AST_NODE_BREAK: + case AST_NODE_CONTINUE: + if (nd.rhs != 0) { + n = nd.rhs; // optional rhs expression + } else if (nd.lhs != UINT32_MAX) { + return nd.lhs + end_offset; // label token + } else { + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + + // array_init_one: end_offset += 1 (rbrace), recurse rhs + // (Ast.zig:1224-1230). + case AST_NODE_ARRAY_INIT_ONE: + end_offset += 1; + n = nd.rhs; + continue; + + case AST_NODE_ARRAY_INIT_ONE_COMMA: + end_offset += 2; // comma + rbrace + n = nd.rhs; + continue; + + // struct_init_dot_comma: SubRange pattern. + case AST_NODE_STRUCT_INIT_DOT_COMMA: + assert(nd.lhs != nd.rhs); + end_offset += 2; // comma + rbrace + n = tree->extra_data.arr[nd.rhs - 1]; + continue; + + // struct_init_comma: node_and_extra SubRange. + case AST_NODE_STRUCT_INIT_COMMA: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 2; + n = tree->extra_data.arr[se - 1]; + continue; + } + + // array_init variants. + case AST_NODE_ARRAY_INIT: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 1; + n = tree->extra_data.arr[se - 1]; + continue; + } + case AST_NODE_ARRAY_INIT_COMMA: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 2; + n = tree->extra_data.arr[se - 1]; + continue; + } + + // array_init_dot variants. + case AST_NODE_ARRAY_INIT_DOT_TWO: + if (nd.rhs != 0) { + end_offset += 1; + n = nd.rhs; + } else if (nd.lhs != 0) { + end_offset += 1; + n = nd.lhs; + } else { + end_offset += 1; + return tree->nodes.main_tokens[n] + end_offset; + } + continue; + case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA: + end_offset += 2; + if (nd.rhs != 0) { + n = nd.rhs; + } else { + n = nd.lhs; + } + continue; + case AST_NODE_ARRAY_INIT_DOT: + assert(nd.lhs != nd.rhs); + end_offset += 1; + n = tree->extra_data.arr[nd.rhs - 1]; + continue; + case AST_NODE_ARRAY_INIT_DOT_COMMA: + assert(nd.lhs != nd.rhs); + end_offset += 2; + n = tree->extra_data.arr[nd.rhs - 1]; + continue; + + // builtin_call (Ast.zig:1083-1105). + case AST_NODE_BUILTIN_CALL: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 1; + n = tree->extra_data.arr[se - 1]; + continue; + } + case AST_NODE_BUILTIN_CALL_COMMA: { + uint32_t si = tree->extra_data.arr[nd.rhs]; + uint32_t se = tree->extra_data.arr[nd.rhs + 1]; + assert(si != se); + end_offset += 2; + n = tree->extra_data.arr[se - 1]; + continue; + } + + // for (Ast.zig:1300-1303): complex extra data. + case AST_NODE_FOR: { + // lhs = span.start (extra_data index), + // rhs = packed(inputs:u31, has_else:u1 at bit 31). + // extra[lhs..] = input nodes, then_body, [else_body]. + uint32_t span_start = nd.lhs; + uint32_t for_packed = nd.rhs; + uint32_t inputs = for_packed & 0x7FFFFFFFu; + bool has_else = (for_packed >> 31) != 0; + uint32_t last_idx = span_start + inputs + (has_else ? 1 : 0); + n = tree->extra_data.arr[last_idx]; + continue; + } + + default: + // Fallback: return main_token + end_offset. + return tree->nodes.main_tokens[n] + end_offset; + } + } +} + +// --- addParam (AstGen.zig:12390) --- +// Creates a param instruction with pl_tok data and type body in extra. + +static uint32_t addParam(GenZir* gz, GenZir* param_gz, ZirInstTag tag, + uint32_t abs_tok_index, uint32_t name) { + AstGenCtx* ag = gz->astgen; + + uint32_t body_len = gzInstructionsLen(param_gz); + const uint32_t* param_body = gzInstructionsSlice(param_gz); + + // Param payload: name, type{body_len:u31|is_generic:u1} + ensureExtraCapacity(ag, 2 + body_len); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = name; + ag->extra[ag->extra_len++] = body_len & 0x7FFFFFFFu; // is_generic = false + for (uint32_t i = 0; i < body_len; i++) { + ag->extra[ag->extra_len++] = param_body[i]; + } + gzUnstack(param_gz); + + // Emit the param instruction. + ensureInstCapacity(ag, 1); + uint32_t idx = ag->inst_len; + ag->inst_tags[idx] = tag; + ZirInstData data; + data.pl_tok.src_tok = tokenIndexToRelative(gz, abs_tok_index); + data.pl_tok.payload_index = payload_index; + ag->inst_datas[idx] = data; + ag->inst_len++; + gzAppendInstruction(gz, idx); + return idx; +} + +// --- addDbgVar (AstGen.zig:13196) --- + +static void addDbgVar( + GenZir* gz, ZirInstTag tag, uint32_t name, uint32_t inst) { + if (gz->is_comptime) + return; + ZirInstData data; + data.str_op.str = name; + data.str_op.operand = inst; + addInstruction(gz, tag, data); +} + +// --- addFunc (AstGen.zig:12023) --- +// Handles non-fancy func/func_inferred instructions. +// ret_body/ret_body_len: instructions for the return type sub-block (may be +// 0). ret_ref: if ret_body_len==0, the return type as a simple Ref. + +static uint32_t addFunc(GenZir* gz, uint32_t src_node, uint32_t block_node, + uint32_t param_block, uint32_t ret_ref, const uint32_t* ret_body, + uint32_t ret_body_len, const uint32_t* body, uint32_t body_len, + const uint32_t* param_insts, uint32_t param_insts_len, + uint32_t lbrace_line, uint32_t lbrace_column, bool is_inferred_error) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + uint32_t rbrace_tok = lastToken(tree, block_node); + uint32_t rbrace_start = tree->tokens.starts[rbrace_tok]; + advanceSourceCursor(ag, rbrace_start); + uint32_t rbrace_line = ag->source_line - gz->decl_line; + uint32_t rbrace_column = ag->source_column; + + // Build Func payload (Zir.Inst.Func: ret_ty, param_block, body_len). + // (AstGen.zig:12187-12194) + uint32_t ret_ty_packed_len; + if (ret_body_len > 0) { + ret_ty_packed_len = ret_body_len; // body-based return type + } else if (ret_ref != ZIR_REF_NONE) { + ret_ty_packed_len = 1; // simple Ref + } else { + ret_ty_packed_len = 0; // void return + } + // Pack RetTy: body_len:u31 | is_generic:bool(u1) = just body_len. + uint32_t ret_ty_packed + = ret_ty_packed_len & 0x7FFFFFFFu; // is_generic=false + + uint32_t fixup_body_len = countBodyLenAfterFixupsExtraRefs( + ag, body, body_len, param_insts, param_insts_len); + ensureExtraCapacity(ag, 3 + ret_ty_packed_len + fixup_body_len + 7); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = ret_ty_packed; // Func.ret_ty + ag->extra[ag->extra_len++] = param_block; // Func.param_block + ag->extra[ag->extra_len++] = fixup_body_len; // Func.body_len + + // Trailing ret_ty: either body instructions or a single ref. + if (ret_body_len > 0) { + for (uint32_t i = 0; i < ret_body_len; i++) + ag->extra[ag->extra_len++] = ret_body[i]; + } else if (ret_ref != ZIR_REF_NONE) { + ag->extra[ag->extra_len++] = ret_ref; + } + + // Body instructions with extra_refs for param_insts + // (AstGen.zig:12206). + appendBodyWithFixupsExtraRefs( + ag, body, body_len, param_insts, param_insts_len); + + // SrcLocs (AstGen.zig:12098-12106). + uint32_t columns = (lbrace_column & 0xFFFFu) | (rbrace_column << 16); + ag->extra[ag->extra_len++] = lbrace_line; + ag->extra[ag->extra_len++] = rbrace_line; + ag->extra[ag->extra_len++] = columns; + // proto_hash (4 words): zero for now. + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + + // Emit the func instruction (AstGen.zig:12220-12226). + ZirInstTag tag + = is_inferred_error ? ZIR_INST_FUNC_INFERRED : ZIR_INST_FUNC; + ZirInstData data; + data.pl_node.src_node = (int32_t)src_node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, tag, data); +} + +// --- testDecl (AstGen.zig:4708) --- + +static void testDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + uint32_t body_node = nd.rhs; + + // makeDeclaration before advanceSourceCursorToNode (AstGen.zig:4726-4729). + uint32_t decl_inst = makeDeclaration(ag, node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + advanceSourceCursorToNode(ag, node); + + uint32_t decl_line = ag->source_line; + uint32_t decl_column = ag->source_column; + + // Extract test name (AstGen.zig:4748-4835). + uint32_t test_token = tree->nodes.main_tokens[node]; + uint32_t test_name_token = test_token + 1; + uint32_t test_name = 0; // NullTerminatedString.empty + DeclFlagsId decl_id = DECL_ID_UNNAMED_TEST; + + // Check if the token after 'test' is a string literal. + // We identify string literals by checking the source character. + uint32_t name_tok_start = tree->tokens.starts[test_name_token]; + if (name_tok_start < tree->source_len + && tree->source[name_tok_start] == '"') { + // String literal name. + uint32_t name_len; + strLitAsString(ag, test_name_token, &test_name, &name_len); + decl_id = DECL_ID_TEST; + } + // TODO: handle identifier test names (decltest). + + // Set up decl_block GenZir (AstGen.zig:4735-4743). + GenZir decl_block; + memset(&decl_block, 0, sizeof(decl_block)); + decl_block.base.tag = SCOPE_GEN_ZIR; + decl_block.parent = NULL; + decl_block.astgen = ag; + decl_block.decl_node_index = node; + decl_block.decl_line = decl_line; + decl_block.is_comptime = true; + decl_block.instructions_top = ag->scratch_inst_len; + decl_block.break_block = UINT32_MAX; + + // Set up fn_block GenZir (AstGen.zig:4837-4845). + GenZir fn_block; + memset(&fn_block, 0, sizeof(fn_block)); + fn_block.base.tag = SCOPE_GEN_ZIR; + fn_block.parent = &decl_block.base; + fn_block.astgen = ag; + fn_block.decl_node_index = node; + fn_block.decl_line = decl_line; + fn_block.is_comptime = false; + fn_block.instructions_top = ag->scratch_inst_len; + fn_block.break_block = UINT32_MAX; + + // Set fn_block and fn_ret_ty for the body (AstGen.zig:4849-4853). + void* prev_fn_block = ag->fn_block; + uint32_t prev_fn_ret_ty = ag->fn_ret_ty; + setFnBlock(ag, &fn_block); + ag->fn_ret_ty = ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE; + + // Compute lbrace source location (AstGen.zig:4860-4862). + advanceSourceCursorToNode(ag, body_node); + uint32_t lbrace_line = ag->source_line - decl_line; + uint32_t lbrace_column = ag->source_column; + + // Process test body (AstGen.zig:4864). + uint32_t block_result + = fullBodyExpr(&fn_block, &fn_block.base, RL_NONE_VAL, body_node); + + ag->fn_block = prev_fn_block; + ag->fn_ret_ty = prev_fn_ret_ty; + + // If we hit unimplemented features, bail out. + if (ag->has_compile_errors) + return; + + // Add restore_err_ret_index + ret_implicit (AstGen.zig:4865-4871). + if (gzInstructionsLen(&fn_block) == 0 + || !refIsNoReturn(&fn_block, block_result)) { + ZirInstData rdata; + rdata.un_node.operand = ZIR_REF_NONE; // .none for .ret + rdata.un_node.src_node + = (int32_t)node - (int32_t)fn_block.decl_node_index; + addInstruction( + &fn_block, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + + uint32_t body_last_tok = lastToken(tree, body_node); + ZirInstData rdata2; + rdata2.un_tok.operand = ZIR_REF_VOID_VALUE; + rdata2.un_tok.src_tok = tokenIndexToRelative(&fn_block, body_last_tok); + addInstruction(&fn_block, ZIR_INST_RET_IMPLICIT, rdata2); + } + + // Read fn_block body before unstacking (AstGen.zig:4874). + // Upstream unstacks fn_block inside addFunc before appending the func + // instruction to decl_block. We must unstack fn_block first so that + // addFunc's addInstruction goes into decl_block's range. + const uint32_t* fn_body = gzInstructionsSlice(&fn_block); + uint32_t fn_body_len = gzInstructionsLen(&fn_block); + gzUnstack(&fn_block); + + // Create func instruction (AstGen.zig:4874-4897). + uint32_t func_ref = addFunc(&decl_block, node, body_node, decl_inst, + ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE, NULL, 0, fn_body, fn_body_len, + NULL, 0, lbrace_line, lbrace_column, false); + + // break_inline returning func to declaration (AstGen.zig:4899). + makeBreakInline(&decl_block, decl_inst, func_ref, AST_NODE_OFFSET_NONE); + + // setDeclaration (AstGen.zig:4903-4923). + setDeclaration(ag, decl_inst, + (SetDeclArgs) { .src_line = decl_line, + .src_column = decl_column, + .id = decl_id, + .name = test_name, + .lib_name = UINT32_MAX, + .value_body = gzInstructionsSlice(&decl_block), + .value_body_len = gzInstructionsLen(&decl_block) }); + gzUnstack(&decl_block); + + (void)gz; +} + +// --- fnDecl (AstGen.zig:4067) / fnDeclInner (AstGen.zig:4228) --- +// Handles non-extern function declarations with bodies, including params. + +static void fnDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + const Ast* tree = ag->tree; + AstData nd = tree->nodes.datas[node]; + + // For fn_decl: data.lhs = fn_proto node, data.rhs = body node. + uint32_t proto_node = nd.lhs; + uint32_t body_node = nd.rhs; + + // Get function name token (main_token of proto + 1 = fn name). + uint32_t fn_token = tree->nodes.main_tokens[proto_node]; + uint32_t fn_name_token = fn_token + 1; + + // Check for 'pub' modifier (Ast.zig:2003-2025). + bool is_pub = (fn_token > 0 + && tree->tokens.tags[fn_token - 1] == TOKEN_KEYWORD_PUB); + + // makeDeclaration on fn_proto node (AstGen.zig:4090). + uint32_t decl_inst = makeDeclaration(ag, proto_node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + + advanceSourceCursorToNode(ag, node); + uint32_t decl_line = ag->source_line; + uint32_t decl_column = ag->source_column; + + // Save source cursor for restoring after ret_gz (AstGen.zig:4387-4388). + uint32_t saved_source_offset = ag->source_offset; + uint32_t saved_source_line = ag->source_line; + uint32_t saved_source_column = ag->source_column; + + AstNodeTag proto_tag = tree->nodes.tags[proto_node]; + AstData proto_data = tree->nodes.datas[proto_node]; + + // Extract return type node (rhs for all fn_proto variants). + uint32_t return_type_node = proto_data.rhs; + + // Detect inferred error set: token before return type is '!' + // (AstGen.zig:4249-4251). + bool is_inferred_error = false; + if (return_type_node != 0) { + uint32_t ret_first_tok = firstToken(tree, return_type_node); + if (ret_first_tok > 0) { + uint32_t maybe_bang = ret_first_tok - 1; + uint32_t bang_start = tree->tokens.starts[maybe_bang]; + if (tree->source[bang_start] == '!') + is_inferred_error = true; + } + } + + // Extract param type nodes from proto variant (AstGen.zig:4253-4254). + uint32_t param_nodes_buf[1]; // buffer for fn_proto_simple/fn_proto_one + const uint32_t* param_nodes = NULL; + uint32_t params_len = 0; + + if (proto_tag == AST_NODE_FN_PROTO_SIMPLE) { + // data.lhs = optional param node, data.rhs = return type. + if (proto_data.lhs != 0) { + param_nodes_buf[0] = proto_data.lhs; + param_nodes = param_nodes_buf; + params_len = 1; + } + } else if (proto_tag == AST_NODE_FN_PROTO_ONE) { + // data.lhs = extra_data index → AstFnProtoOne. + uint32_t extra_idx = proto_data.lhs; + uint32_t param + = tree->extra_data.arr[extra_idx]; // AstFnProtoOne.param + if (param != 0) { + param_nodes_buf[0] = param; + param_nodes = param_nodes_buf; + params_len = 1; + } + } else if (proto_tag == AST_NODE_FN_PROTO_MULTI) { + // data.lhs = extra_data index → SubRange{start, end}. + uint32_t extra_idx = proto_data.lhs; + uint32_t range_start = tree->extra_data.arr[extra_idx]; + uint32_t range_end = tree->extra_data.arr[extra_idx + 1]; + param_nodes = tree->extra_data.arr + range_start; + params_len = range_end - range_start; + } else if (proto_tag == AST_NODE_FN_PROTO) { + // data.lhs = extra_data index → AstFnProto{params_start, params_end, + // ...}. + uint32_t extra_idx = proto_data.lhs; + uint32_t pstart = tree->extra_data.arr[extra_idx]; // params_start + uint32_t pend = tree->extra_data.arr[extra_idx + 1]; // params_end + param_nodes = tree->extra_data.arr + pstart; + params_len = pend - pstart; + } + + // decl_gz (called value_gz in caller, decl_gz in fnDeclInner) + // (AstGen.zig:4194-4201). + GenZir decl_gz; + memset(&decl_gz, 0, sizeof(decl_gz)); + decl_gz.base.tag = SCOPE_GEN_ZIR; + decl_gz.parent = NULL; + decl_gz.astgen = ag; + decl_gz.decl_node_index = proto_node; + decl_gz.decl_line = decl_line; + decl_gz.is_comptime = true; + decl_gz.instructions_top = ag->scratch_inst_len; + decl_gz.break_block = UINT32_MAX; + + // --- Parameter iteration (AstGen.zig:4260-4363) --- + // Walk params, creating param instructions and ScopeLocalVal entries. + // We keep param scopes on the C stack (max 32 params like upstream). + Scope* params_scope = &decl_gz.base; + ScopeLocalVal param_scopes[32]; + uint32_t param_scope_count = 0; + // Collect param instruction indices (AstGen.zig:4254, 4360). + uint32_t param_insts[32]; + uint32_t param_insts_len = 0; + + for (uint32_t param_i = 0; param_i < params_len; param_i++) { + uint32_t param_type_node = param_nodes[param_i]; + + // Find param name token by scanning backwards from firstToken of + // type expression (mirrors FnProto.Iterator.next, Ast.zig:2687). + // Layout: [comptime] [name] [:] type_expr + // So: type_first_tok - 1 is ':', type_first_tok - 2 is name. + uint32_t type_first_tok = firstToken(tree, param_type_node); + uint32_t name_token = 0; // 0 = no name found + bool is_comptime_param = false; + if (type_first_tok >= 2 + && tree->tokens.tags[type_first_tok - 1] == TOKEN_COLON) { + // Named parameter: name is at type_first_tok - 2. + uint32_t maybe_name = type_first_tok - 2; + uint32_t name_start = tree->tokens.starts[maybe_name]; + char ch = tree->source[name_start]; + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') + || ch == '_' || ch == '@') { + // Could be name or comptime/noalias keyword. + if (name_start + 8 <= tree->source_len + && memcmp(tree->source + name_start, "comptime", 8) == 0) { + is_comptime_param = true; + } else if (name_start + 7 <= tree->source_len + && memcmp(tree->source + name_start, "noalias", 7) == 0) { + // noalias keyword, not a name. + } else { + name_token = maybe_name; + // Check for preceding comptime keyword. + if (maybe_name > 0) { + uint32_t prev = maybe_name - 1; + uint32_t prev_start = tree->tokens.starts[prev]; + if (prev_start + 8 <= tree->source_len + && memcmp(tree->source + prev_start, "comptime", 8) + == 0) + is_comptime_param = true; + } + } + } + } + + // Determine param name string (AstGen.zig:4283-4321). + // Must be resolved BEFORE type expression to match upstream string + // table ordering. + uint32_t param_name_str = 0; // NullTerminatedString.empty + if (name_token != 0) { + uint32_t name_start = tree->tokens.starts[name_token]; + char nch = tree->source[name_start]; + // Skip "_" params (AstGen.zig:4285-4286). + if (nch == '_') { + uint32_t next_start = tree->tokens.starts[name_token + 1]; + if (next_start == name_start + 1) { + // Single underscore: empty name. + param_name_str = 0; + } else { + param_name_str = identAsString(ag, name_token); + } + } else { + param_name_str = identAsString(ag, name_token); + } + } + + // Evaluate param type expression in a sub-block + // (AstGen.zig:4333-4337). + GenZir param_gz = makeSubBlock(&decl_gz, params_scope); + uint32_t param_type_ref + = expr(¶m_gz, params_scope, param_type_node); + + if (ag->has_compile_errors) + return; + + // The break_inline target is the param instruction we're about to + // create (AstGen.zig:4336-4337). + uint32_t param_inst_expected = ag->inst_len + 1; + // +1 because: the break_inline is emitted first (uses inst_len), + // then addParam emits the param instruction at inst_len. + // Actually, addParam emits the param after break_inline. The + // break_inline's block_inst field should point to the param inst. + // We know it will be at ag->inst_len after the break_inline. + makeBreakInline(¶m_gz, param_inst_expected, param_type_ref, + (int32_t)param_type_node - (int32_t)param_gz.decl_node_index); + + // Create param instruction (AstGen.zig:4341-4343). + ZirInstTag param_tag + = is_comptime_param ? ZIR_INST_PARAM_COMPTIME : ZIR_INST_PARAM; + uint32_t name_tok_for_src = name_token != 0 + ? name_token + : tree->nodes.main_tokens[param_type_node]; + uint32_t param_inst = addParam( + &decl_gz, ¶m_gz, param_tag, name_tok_for_src, param_name_str); + (void)param_inst_expected; + // Record param instruction index (AstGen.zig:4360). + if (param_insts_len < 32) + param_insts[param_insts_len++] = param_inst; + + // Create ScopeLocalVal for this param (AstGen.zig:4349-4359). + if (param_name_str != 0 && param_scope_count < 32) { + ScopeLocalVal* lv = ¶m_scopes[param_scope_count++]; + lv->base.tag = SCOPE_LOCAL_VAL; + lv->parent = params_scope; + lv->gen_zir = &decl_gz; + lv->inst = param_inst + ZIR_REF_START_INDEX; // toRef() + lv->token_src = name_token; + lv->name = param_name_str; + params_scope = &lv->base; + } + } + + // --- Return type (AstGen.zig:4369-4383) --- + GenZir ret_gz = makeSubBlock(&decl_gz, params_scope); + uint32_t ret_ref = ZIR_REF_NONE; + if (return_type_node != 0) { + ret_ref = expr(&ret_gz, params_scope, return_type_node); + if (ag->has_compile_errors) + return; + // If ret_gz produced instructions, add break_inline + // (AstGen.zig:4377-4381). + if (gzInstructionsLen(&ret_gz) > 0) { + // break_inline targets the func instruction (which doesn't + // exist yet). We use 0 as placeholder and patch later. + makeBreakInline(&ret_gz, 0, ret_ref, AST_NODE_OFFSET_NONE); + } + } + // Map void_type → .none (AstGen.zig:12054). + if (ret_ref == ZIR_REF_VOID_TYPE) + ret_ref = ZIR_REF_NONE; + + uint32_t ret_body_len = gzInstructionsLen(&ret_gz); + // Copy ret_body before unstacking: body_gz reuses the same scratch area. + uint32_t* ret_body = NULL; + if (ret_body_len > 0) { + ret_body = malloc(ret_body_len * sizeof(uint32_t)); + if (!ret_body) + abort(); + memcpy(ret_body, gzInstructionsSlice(&ret_gz), + ret_body_len * sizeof(uint32_t)); + } + gzUnstack(&ret_gz); + + // Restore source cursor (AstGen.zig:4387-4388). + ag->source_offset = saved_source_offset; + ag->source_line = saved_source_line; + ag->source_column = saved_source_column; + + // --- Body (AstGen.zig:4415-4424) --- + GenZir body_gz; + memset(&body_gz, 0, sizeof(body_gz)); + body_gz.base.tag = SCOPE_GEN_ZIR; + body_gz.parent = params_scope; + body_gz.astgen = ag; + body_gz.decl_node_index = proto_node; + body_gz.decl_line = decl_line; + body_gz.is_comptime = false; + body_gz.instructions_top = ag->scratch_inst_len; + + // Set fn_block and fn_ret_ty for the body (AstGen.zig:4442-4455). + void* prev_fn_block = ag->fn_block; + setFnBlock(ag, &body_gz); + uint32_t prev_fn_ret_ty = ag->fn_ret_ty; + if (is_inferred_error || ret_ref == ZIR_REF_NONE) { + // Non-void non-trivial return type: emit ret_type instruction. + if (ret_body_len > 0 || is_inferred_error) { + ZirInstData rtdata; + memset(&rtdata, 0, sizeof(rtdata)); + rtdata.node = (int32_t)node - (int32_t)body_gz.decl_node_index; + ag->fn_ret_ty + = addInstruction(&body_gz, ZIR_INST_RET_TYPE, rtdata); + } else { + ag->fn_ret_ty = ret_ref; // void + } + } else { + // ret_ref is a simple ref (not void, not inferred error). + // Still need ret_type instruction if it resolved to an inst. + if (ret_ref >= ZIR_REF_START_INDEX) { + ZirInstData rtdata; + memset(&rtdata, 0, sizeof(rtdata)); + rtdata.node = (int32_t)node - (int32_t)body_gz.decl_node_index; + ag->fn_ret_ty + = addInstruction(&body_gz, ZIR_INST_RET_TYPE, rtdata); + } else { + ag->fn_ret_ty = ret_ref; + } + } + + // Process function body (AstGen.zig:4461-4465). + advanceSourceCursorToNode(ag, body_node); + uint32_t lbrace_line = ag->source_line - decl_line; + uint32_t lbrace_column = ag->source_column; + + fullBodyExpr(&body_gz, &body_gz.base, RL_NONE_VAL, body_node); + + ag->fn_block = prev_fn_block; + ag->fn_ret_ty = prev_fn_ret_ty; + + if (ag->has_compile_errors) { + free(ret_body); + return; + } + + // Add implicit return at end of function body + // (AstGen.zig:4465-4871). + if (!endsWithNoReturn(&body_gz)) { + ZirInstData rdata; + rdata.un_node.operand = ZIR_REF_NONE; + rdata.un_node.src_node + = (int32_t)node - (int32_t)body_gz.decl_node_index; + addInstruction( + &body_gz, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); + + uint32_t body_last_tok = lastToken(tree, body_node); + ZirInstData rdata2; + rdata2.un_tok.operand = ZIR_REF_VOID_VALUE; + rdata2.un_tok.src_tok = tokenIndexToRelative(&body_gz, body_last_tok); + addInstruction(&body_gz, ZIR_INST_RET_IMPLICIT, rdata2); + } + + // Read body before unstacking (AstGen.zig:12215-12218). + const uint32_t* fn_body = gzInstructionsSlice(&body_gz); + uint32_t fn_body_len = gzInstructionsLen(&body_gz); + gzUnstack(&body_gz); + + // Create func instruction (AstGen.zig:4476-4494). + uint32_t func_ref = addFunc(&decl_gz, node, body_node, decl_inst, ret_ref, + ret_body, ret_body_len, fn_body, fn_body_len, param_insts, + param_insts_len, lbrace_line, lbrace_column, is_inferred_error); + + // Patch ret_body break_inline to point to func instruction + // (AstGen.zig:12199-12202). + if (ret_body_len > 0) { + uint32_t break_inst = ret_body[ret_body_len - 1]; + // The break_inline payload is at payload_index; block_inst is at + // offset 1 in the Break struct. + uint32_t break_payload + = ag->inst_datas[break_inst].break_data.payload_index; + ag->extra[break_payload + 1] = func_ref - ZIR_REF_START_INDEX; + } + free(ret_body); + + // break_inline returning func to declaration (AstGen.zig:4495). + // nodeIndexToRelative(decl_node) = node - decl_gz.decl_node_index. + makeBreakInline( + &decl_gz, decl_inst, func_ref, (int32_t)node - (int32_t)proto_node); + + // setDeclaration (AstGen.zig:4208-4225). + DeclFlagsId decl_id + = is_pub ? DECL_ID_PUB_CONST_SIMPLE : DECL_ID_CONST_SIMPLE; + uint32_t name_str = identAsString(ag, fn_name_token); + setDeclaration(ag, decl_inst, + (SetDeclArgs) { .src_line = decl_line, + .src_column = decl_column, + .id = decl_id, + .name = name_str, + .lib_name = UINT32_MAX, + .value_body = gzInstructionsSlice(&decl_gz), + .value_body_len = gzInstructionsLen(&decl_gz) }); + gzUnstack(&decl_gz); + + (void)gz; +} + +// --- comptimeDecl (AstGen.zig:4645) --- + +static void comptimeDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + // makeDeclaration before advanceSourceCursorToNode (AstGen.zig:4663-4665). + uint32_t decl_inst = makeDeclaration(ag, node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + + advanceSourceCursorToNode(ag, node); + + uint32_t decl_line = ag->source_line; + uint32_t decl_column = ag->source_column; + + // Value sub-block (AstGen.zig:4675-4686). + GenZir value_gz; + memset(&value_gz, 0, sizeof(value_gz)); + value_gz.base.tag = SCOPE_GEN_ZIR; + value_gz.parent = NULL; + value_gz.astgen = ag; + value_gz.decl_node_index = node; + value_gz.decl_line = decl_line; + value_gz.is_comptime = true; + value_gz.instructions_top = ag->scratch_inst_len; + + // For comptime {}: body is empty block → no instructions generated. + // comptime_gz.isEmpty() == true → addBreak(.break_inline, decl_inst, + // .void_value) (AstGen.zig:4685-4686) + makeBreakInline( + &value_gz, decl_inst, ZIR_REF_VOID_VALUE, AST_NODE_OFFSET_NONE); + + setDeclaration(ag, decl_inst, + (SetDeclArgs) { .src_line = decl_line, + .src_column = decl_column, + .id = DECL_ID_COMPTIME, + .name = 0, + .lib_name = UINT32_MAX, + .value_body = gzInstructionsSlice(&value_gz), + .value_body_len = gzInstructionsLen(&value_gz) }); + gzUnstack(&value_gz); + + (void)gz; +} + +// --- globalVarDecl (AstGen.zig:4498) --- + +// Extract VarDecl fields from an AST node (Ast.zig:1326-1380). +typedef struct { + uint32_t mut_token; + uint32_t type_node; // 0 = none + uint32_t align_node; // 0 = none + uint32_t addrspace_node; // 0 = none + uint32_t section_node; // 0 = none + uint32_t init_node; // UINT32_MAX = none + bool is_pub; + bool is_extern; + bool is_export; + bool is_mutable; + bool is_threadlocal; + uint32_t lib_name_token; // UINT32_MAX = none +} VarDeclInfo; + +static VarDeclInfo extractVarDecl(const Ast* tree, uint32_t node) { + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + uint32_t mut_token = tree->nodes.main_tokens[node]; + VarDeclInfo info; + memset(&info, 0, sizeof(info)); + info.mut_token = mut_token; + info.init_node = UINT32_MAX; + info.lib_name_token = UINT32_MAX; + + switch (tag) { + case AST_NODE_SIMPLE_VAR_DECL: + // lhs = type_node (optional), rhs = init_node (optional) + info.type_node = nd.lhs; + info.init_node = nd.rhs; + break; + case AST_NODE_ALIGNED_VAR_DECL: + // lhs = align_node, rhs = init_node (optional) + info.align_node = nd.lhs; + info.init_node = nd.rhs; + break; + case AST_NODE_GLOBAL_VAR_DECL: { + // lhs = extra_data index, rhs = init_node (optional) + uint32_t ei = nd.lhs; + info.type_node = tree->extra_data.arr[ei + 0]; + info.align_node = tree->extra_data.arr[ei + 1]; + info.addrspace_node = tree->extra_data.arr[ei + 2]; + info.section_node = tree->extra_data.arr[ei + 3]; + info.init_node = nd.rhs; + break; + } + case AST_NODE_LOCAL_VAR_DECL: { + // lhs = extra_data index, rhs = init_node (optional) + uint32_t ei = nd.lhs; + info.type_node = tree->extra_data.arr[ei + 0]; + info.align_node = tree->extra_data.arr[ei + 1]; + info.init_node = nd.rhs; + break; + } + default: + break; + } + + // Scan backwards from mut_token to find modifiers (Ast.zig:2003-2025). + info.is_mutable = (tree->tokens.tags[mut_token] == TOKEN_KEYWORD_VAR); + for (uint32_t i = mut_token; i > 0;) { + i--; + TokenizerTag ttag = tree->tokens.tags[i]; + if (ttag == TOKEN_KEYWORD_EXTERN) + info.is_extern = true; + else if (ttag == TOKEN_KEYWORD_EXPORT) + info.is_export = true; + else if (ttag == TOKEN_KEYWORD_PUB) + info.is_pub = true; + else if (ttag == TOKEN_KEYWORD_THREADLOCAL) + info.is_threadlocal = true; + else if (ttag == TOKEN_STRING_LITERAL) + info.lib_name_token = i; + else + break; + } + return info; +} + +// Compute DeclFlagsId from VarDecl properties (AstGen.zig:13916-13972). +static DeclFlagsId computeVarDeclId(bool is_mutable, bool is_pub, + bool is_extern, bool is_export, bool is_threadlocal, bool has_type_body, + bool has_special_body, bool has_lib_name) { + if (!is_mutable) { + // const + if (is_extern) { + if (is_pub) { + if (has_lib_name || has_special_body) + return DECL_ID_PUB_EXTERN_CONST; + return DECL_ID_PUB_EXTERN_CONST_SIMPLE; + } + if (has_lib_name || has_special_body) + return DECL_ID_EXTERN_CONST; + return DECL_ID_EXTERN_CONST_SIMPLE; + } + if (is_export) + return is_pub ? DECL_ID_PUB_EXPORT_CONST : DECL_ID_EXPORT_CONST; + if (is_pub) { + if (has_special_body) + return DECL_ID_PUB_CONST; + if (has_type_body) + return DECL_ID_PUB_CONST_TYPED; + return DECL_ID_PUB_CONST_SIMPLE; + } + if (has_special_body) + return DECL_ID_CONST; + if (has_type_body) + return DECL_ID_CONST_TYPED; + return DECL_ID_CONST_SIMPLE; + } + // var + if (is_extern) { + if (is_pub) { + if (is_threadlocal) + return DECL_ID_PUB_EXTERN_VAR_THREADLOCAL; + return DECL_ID_PUB_EXTERN_VAR; + } + if (is_threadlocal) + return DECL_ID_EXTERN_VAR_THREADLOCAL; + return DECL_ID_EXTERN_VAR; + } + if (is_export) { + if (is_pub) { + if (is_threadlocal) + return DECL_ID_PUB_EXPORT_VAR_THREADLOCAL; + return DECL_ID_PUB_EXPORT_VAR; + } + if (is_threadlocal) + return DECL_ID_EXPORT_VAR_THREADLOCAL; + return DECL_ID_EXPORT_VAR; + } + if (is_pub) { + if (is_threadlocal) + return DECL_ID_PUB_VAR_THREADLOCAL; + if (has_special_body || has_type_body) + return DECL_ID_PUB_VAR; + return DECL_ID_PUB_VAR_SIMPLE; + } + if (is_threadlocal) + return DECL_ID_VAR_THREADLOCAL; + if (has_special_body || has_type_body) + return DECL_ID_VAR; + return DECL_ID_VAR_SIMPLE; +} + +static void globalVarDecl(AstGenCtx* ag, GenZir* gz, uint32_t* wip_decl_insts, + uint32_t* decl_idx, uint32_t node) { + const Ast* tree = ag->tree; + VarDeclInfo vd = extractVarDecl(tree, node); + uint32_t name_token = vd.mut_token + 1; + + // advanceSourceCursorToNode before makeDeclaration (AstGen.zig:4542-4546). + advanceSourceCursorToNode(ag, node); + uint32_t decl_column = ag->source_column; + + uint32_t decl_inst = makeDeclaration(ag, node); + wip_decl_insts[*decl_idx] = decl_inst; + (*decl_idx)++; + + // Set up type sub-block (AstGen.zig:4574-4582). + GenZir type_gz; + memset(&type_gz, 0, sizeof(type_gz)); + type_gz.base.tag = SCOPE_GEN_ZIR; + type_gz.astgen = ag; + type_gz.decl_node_index = node; + type_gz.instructions_top = ag->scratch_inst_len; + type_gz.decl_line = ag->source_line; + type_gz.is_comptime = true; + + if (vd.type_node != 0) { + uint32_t type_inst = typeExpr(&type_gz, &type_gz.base, vd.type_node); + makeBreakInline(&type_gz, decl_inst, type_inst, 0); + } + + // Record type_gz boundary for slicing. + uint32_t type_top = ag->scratch_inst_len; + + // Align sub-block (AstGen.zig:4592-4596). + GenZir align_gz; + memset(&align_gz, 0, sizeof(align_gz)); + align_gz.base.tag = SCOPE_GEN_ZIR; + align_gz.astgen = ag; + align_gz.decl_node_index = node; + align_gz.instructions_top = type_top; + align_gz.decl_line = ag->source_line; + align_gz.is_comptime = true; + + if (vd.align_node != 0) { + uint32_t align_inst = expr(&align_gz, &align_gz.base, vd.align_node); + makeBreakInline(&align_gz, decl_inst, align_inst, 0); + } + + uint32_t align_top = ag->scratch_inst_len; + + // Linksection sub-block (AstGen.zig:4598-4602). + GenZir linksection_gz; + memset(&linksection_gz, 0, sizeof(linksection_gz)); + linksection_gz.base.tag = SCOPE_GEN_ZIR; + linksection_gz.astgen = ag; + linksection_gz.decl_node_index = node; + linksection_gz.instructions_top = align_top; + linksection_gz.decl_line = ag->source_line; + linksection_gz.is_comptime = true; + + if (vd.section_node != 0) { + uint32_t ls_inst + = expr(&linksection_gz, &linksection_gz.base, vd.section_node); + makeBreakInline(&linksection_gz, decl_inst, ls_inst, 0); + } + + uint32_t linksection_top = ag->scratch_inst_len; + + // Addrspace sub-block (AstGen.zig:4604-4608). + GenZir addrspace_gz; + memset(&addrspace_gz, 0, sizeof(addrspace_gz)); + addrspace_gz.base.tag = SCOPE_GEN_ZIR; + addrspace_gz.astgen = ag; + addrspace_gz.decl_node_index = node; + addrspace_gz.instructions_top = linksection_top; + addrspace_gz.decl_line = ag->source_line; + addrspace_gz.is_comptime = true; + + if (vd.addrspace_node != 0) { + uint32_t as_inst + = expr(&addrspace_gz, &addrspace_gz.base, vd.addrspace_node); + makeBreakInline(&addrspace_gz, decl_inst, as_inst, 0); + } + + uint32_t addrspace_top = ag->scratch_inst_len; + + // Value sub-block (AstGen.zig:4610-4620). + GenZir value_gz; + memset(&value_gz, 0, sizeof(value_gz)); + value_gz.base.tag = SCOPE_GEN_ZIR; + value_gz.astgen = ag; + value_gz.decl_node_index = node; + value_gz.instructions_top = addrspace_top; + value_gz.decl_line = ag->source_line; + value_gz.is_comptime = true; + + if (vd.init_node != UINT32_MAX && vd.init_node != 0) { + uint32_t init_ref = expr(&value_gz, &value_gz.base, vd.init_node); + makeBreakInline(&value_gz, decl_inst, init_ref, 0); + } + + // Compute body slices (instructionsSliceUpto). + const uint32_t* type_body + = ag->scratch_instructions + type_gz.instructions_top; + uint32_t type_body_len = type_top - type_gz.instructions_top; + const uint32_t* align_body + = ag->scratch_instructions + align_gz.instructions_top; + uint32_t align_body_len = align_top - align_gz.instructions_top; + const uint32_t* ls_body + = ag->scratch_instructions + linksection_gz.instructions_top; + uint32_t ls_body_len = linksection_top - linksection_gz.instructions_top; + const uint32_t* as_body + = ag->scratch_instructions + addrspace_gz.instructions_top; + uint32_t as_body_len = addrspace_top - addrspace_gz.instructions_top; + const uint32_t* val_body = gzInstructionsSlice(&value_gz); + uint32_t val_body_len = gzInstructionsLen(&value_gz); + + bool has_type_body = (type_body_len > 0); + bool has_special_body + = (align_body_len > 0 || ls_body_len > 0 || as_body_len > 0); + bool has_lib_name = (vd.lib_name_token != UINT32_MAX); + + uint32_t name_str = identAsString(ag, name_token); + + DeclFlagsId decl_id = computeVarDeclId(vd.is_mutable, vd.is_pub, + vd.is_extern, vd.is_export, vd.is_threadlocal, has_type_body, + has_special_body, has_lib_name); + + // Compute lib_name string index. + uint32_t lib_name = UINT32_MAX; + if (has_lib_name) { + uint32_t li, ll; + strLitAsString(ag, vd.lib_name_token, &li, &ll); + lib_name = li; + } + + setDeclaration(ag, decl_inst, + (SetDeclArgs) { .src_line = ag->source_line, + .src_column = decl_column, + .id = decl_id, + .name = name_str, + .lib_name = lib_name, + .type_body = type_body, + .type_body_len = type_body_len, + .align_body = align_body, + .align_body_len = align_body_len, + .linksection_body = ls_body, + .linksection_body_len = ls_body_len, + .addrspace_body = as_body, + .addrspace_body_len = as_body_len, + .value_body = val_body, + .value_body_len = val_body_len }); + + gzUnstack(&value_gz); + + (void)gz; +} + +// --- nodeImpliesMoreThanOnePossibleValue (AstGen.zig:10548) --- +// Check if an identifier is a primitive type with more than one value. +static bool identImpliesMoreThanOnePossibleValue( + const Ast* tree, uint32_t main_token) { + uint32_t start = tree->tokens.starts[main_token]; + const char* src = tree->source + start; + // Match known primitive types that have more than one possible value. + // (AstGen.zig:10729-10766) + if (src[0] == 'u' || src[0] == 'i') { + // u8, u16, u32, u64, u128, u1, u29, usize, i8, i16, i32, i64, i128, + // isize + char c1 = src[1]; + if (c1 >= '0' && c1 <= '9') + return true; + if (c1 == 's') // usize, isize + return (src[2] == 'i' && src[3] == 'z' && src[4] == 'e'); + } + if (src[0] == 'f') { + // f16, f32, f64, f80, f128 + char c1 = src[1]; + if (c1 >= '0' && c1 <= '9') + return true; + } + if (src[0] == 'b' && src[1] == 'o' && src[2] == 'o' && src[3] == 'l' + && !(src[4] >= 'a' && src[4] <= 'z') + && !(src[4] >= 'A' && src[4] <= 'Z') + && !(src[4] >= '0' && src[4] <= '9') && src[4] != '_') + return true; + if (src[0] == 'c' && src[1] == '_') + return true; // c_int, c_long, etc. + if (src[0] == 'a' && src[1] == 'n' && src[2] == 'y') { + // anyerror, anyframe, anyopaque + return true; + } + if (src[0] == 'c' && src[1] == 'o' && src[2] == 'm' && src[3] == 'p' + && src[4] == 't' && src[5] == 'i' && src[6] == 'm' && src[7] == 'e') + return true; // comptime_float, comptime_int + if (src[0] == 't' && src[1] == 'y' && src[2] == 'p' && src[3] == 'e' + && !(src[4] >= 'a' && src[4] <= 'z') + && !(src[4] >= 'A' && src[4] <= 'Z') + && !(src[4] >= '0' && src[4] <= '9') && src[4] != '_') + return true; + return false; +} + +static bool nodeImpliesMoreThanOnePossibleValue( + const Ast* tree, uint32_t node) { + uint32_t cur = node; + while (1) { + AstNodeTag tag = tree->nodes.tags[cur]; + switch (tag) { + // Pointer/optional/array/anyframe types → true + // (AstGen.zig:10718-10725) + case AST_NODE_PTR_TYPE_ALIGNED: + case AST_NODE_PTR_TYPE_SENTINEL: + case AST_NODE_PTR_TYPE: + case AST_NODE_PTR_TYPE_BIT_RANGE: + case AST_NODE_OPTIONAL_TYPE: + case AST_NODE_ANYFRAME_TYPE: + case AST_NODE_ARRAY_TYPE_SENTINEL: + return true; + // Forward to LHS: try, comptime, nosuspend + // (AstGen.zig:10710-10713) + case AST_NODE_TRY: + case AST_NODE_COMPTIME: + case AST_NODE_NOSUSPEND: + cur = tree->nodes.datas[cur].lhs; + continue; + // Forward to LHS: grouped_expression, unwrap_optional + // (AstGen.zig:10714-10716) + case AST_NODE_GROUPED_EXPRESSION: + case AST_NODE_UNWRAP_OPTIONAL: + cur = tree->nodes.datas[cur].lhs; + continue; + // Identifier: check primitives (AstGen.zig:10727-10780) + case AST_NODE_IDENTIFIER: + return identImpliesMoreThanOnePossibleValue( + tree, tree->nodes.main_tokens[cur]); + default: + return false; + } + } +} + +// --- nodeImpliesComptimeOnly (AstGen.zig:10787) --- + +static bool identImpliesComptimeOnly(const Ast* tree, uint32_t main_token) { + uint32_t start = tree->tokens.starts[main_token]; + const char* src = tree->source + start; + // Only comptime_float, comptime_int, type → true + // (AstGen.zig:11010-11013) + if (src[0] == 'c' && src[1] == 'o' && src[2] == 'm' && src[3] == 'p' + && src[4] == 't' && src[5] == 'i' && src[6] == 'm' && src[7] == 'e') + return true; // comptime_float, comptime_int + if (src[0] == 't' && src[1] == 'y' && src[2] == 'p' && src[3] == 'e' + && !(src[4] >= 'a' && src[4] <= 'z') + && !(src[4] >= 'A' && src[4] <= 'Z') + && !(src[4] >= '0' && src[4] <= '9') && src[4] != '_') + return true; + return false; +} + +static bool nodeImpliesComptimeOnly(const Ast* tree, uint32_t node) { + uint32_t cur = node; + while (1) { + AstNodeTag tag = tree->nodes.tags[cur]; + switch (tag) { + // Function prototypes → true (AstGen.zig:10950-10955) + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO: + return true; + // Forward to LHS: try, comptime, nosuspend + case AST_NODE_TRY: + case AST_NODE_COMPTIME: + case AST_NODE_NOSUSPEND: + cur = tree->nodes.datas[cur].lhs; + continue; + case AST_NODE_GROUPED_EXPRESSION: + case AST_NODE_UNWRAP_OPTIONAL: + cur = tree->nodes.datas[cur].lhs; + continue; + // Identifier: check primitives + case AST_NODE_IDENTIFIER: + return identImpliesComptimeOnly( + tree, tree->nodes.main_tokens[cur]); + default: + return false; + } + } +} + +// --- WipMembers (AstGen.zig:3989) --- +// Tracks decl indices, field bit-flags, and per-field data during container +// processing. All data lives in a single malloc'd array laid out as: +// [decls (decl_count)] [field_bits (ceil)] [fields (up to field_count*max)] +// Bodies are tracked separately in a dynamic array. + +typedef struct { + uint32_t* payload; // malloc'd array + uint32_t payload_top; // always 0 (start of decls region) + uint32_t field_bits_start; + uint32_t fields_start; + uint32_t fields_end; + uint32_t decl_index; + uint32_t field_index; + // Bodies scratch: dynamically grown array for field type/align/init + // bodies. + uint32_t* bodies; + uint32_t bodies_len; + uint32_t bodies_cap; +} WipMembers; + +static WipMembers wipMembersInit(uint32_t decl_count, uint32_t field_count) { + // bits_per_field = 4, max_field_size = 5 + uint32_t fields_per_u32 = 8; // 32 / 4 + uint32_t field_bits_start = decl_count; + uint32_t bit_words = field_count > 0 + ? (field_count + fields_per_u32 - 1) / fields_per_u32 + : 0; + uint32_t fields_start = field_bits_start + bit_words; + uint32_t payload_end = fields_start + field_count * 5; + uint32_t alloc_size = payload_end > 0 ? payload_end : 1; + uint32_t* payload = calloc(alloc_size, sizeof(uint32_t)); + if (!payload) + exit(1); + WipMembers wm; + memset(&wm, 0, sizeof(wm)); + wm.payload = payload; + wm.payload_top = 0; + wm.field_bits_start = field_bits_start; + wm.fields_start = fields_start; + wm.fields_end = fields_start; + wm.decl_index = 0; + wm.field_index = 0; + wm.bodies = NULL; + wm.bodies_len = 0; + wm.bodies_cap = 0; + return wm; +} + +static void wipMembersDeinit(WipMembers* wm) { + free(wm->payload); + free(wm->bodies); +} + +static void wipMembersNextDecl(WipMembers* wm, uint32_t decl_inst) { + wm->payload[wm->payload_top + wm->decl_index] = decl_inst; + wm->decl_index++; +} + +// bits_per_field = 4: bits[0]=have_align, bits[1]=have_value, +// bits[2]=is_comptime, bits[3]=have_type_body +static void wipMembersNextField(WipMembers* wm, bool bits[4]) { + uint32_t fields_per_u32 = 8; // 32 / 4 + uint32_t index = wm->field_bits_start + wm->field_index / fields_per_u32; + uint32_t bit_bag + = (wm->field_index % fields_per_u32 == 0) ? 0 : wm->payload[index]; + bit_bag >>= 4; + for (int i = 0; i < 4; i++) { + bit_bag |= ((uint32_t)(bits[i] ? 1 : 0)) << (32 - 4 + i); + } + wm->payload[index] = bit_bag; + wm->field_index++; +} + +static void wipMembersAppendToField(WipMembers* wm, uint32_t data) { + wm->payload[wm->fields_end] = data; + wm->fields_end++; +} + +static void wipMembersFinishBits(WipMembers* wm) { + uint32_t fields_per_u32 = 8; // 32 / 4 + uint32_t empty_field_slots + = fields_per_u32 - (wm->field_index % fields_per_u32); + if (wm->field_index > 0 && empty_field_slots < fields_per_u32) { + uint32_t index + = wm->field_bits_start + wm->field_index / fields_per_u32; + wm->payload[index] >>= (empty_field_slots * 4); + } +} + +// Returns pointer to decls region and its length. +static const uint32_t* wipMembersDeclsSlice( + const WipMembers* wm, uint32_t* out_len) { + *out_len = wm->decl_index; + return wm->payload + wm->payload_top; +} + +// Returns pointer to fields region (field_bits + field_data) and its length. +static const uint32_t* wipMembersFieldsSlice( + const WipMembers* wm, uint32_t* out_len) { + *out_len = wm->fields_end - wm->field_bits_start; + return wm->payload + wm->field_bits_start; +} + +// Append body instructions to the WipMembers bodies scratch. +static void wipMembersBodiesAppend( + WipMembers* wm, const uint32_t* data, uint32_t len) { + if (wm->bodies_len + len > wm->bodies_cap) { + uint32_t new_cap = wm->bodies_cap == 0 ? 64 : wm->bodies_cap * 2; + while (new_cap < wm->bodies_len + len) + new_cap *= 2; + wm->bodies = realloc(wm->bodies, new_cap * sizeof(uint32_t)); + if (!wm->bodies) + exit(1); + wm->bodies_cap = new_cap; + } + memcpy(wm->bodies + wm->bodies_len, data, len * sizeof(uint32_t)); + wm->bodies_len += len; +} + +// Append body instructions with ref_table fixups to wm->bodies. +static void wipMembersBodiesAppendWithFixups( + WipMembers* wm, AstGenCtx* ag, const uint32_t* body, uint32_t body_len) { + for (uint32_t i = 0; i < body_len; i++) { + uint32_t inst = body[i]; + // Grow if needed. + if (wm->bodies_len + 1 > wm->bodies_cap) { + uint32_t new_cap = wm->bodies_cap == 0 ? 64 : wm->bodies_cap * 2; + wm->bodies = realloc(wm->bodies, new_cap * sizeof(uint32_t)); + if (!wm->bodies) + exit(1); + wm->bodies_cap = new_cap; + } + wm->bodies[wm->bodies_len++] = inst; + // Check for ref fixup. + uint32_t ref_inst; + while (refTableFetchRemove(ag, inst, &ref_inst)) { + if (wm->bodies_len + 1 > wm->bodies_cap) { + uint32_t new_cap = wm->bodies_cap * 2; + wm->bodies = realloc(wm->bodies, new_cap * sizeof(uint32_t)); + if (!wm->bodies) + exit(1); + wm->bodies_cap = new_cap; + } + wm->bodies[wm->bodies_len++] = ref_inst; + inst = ref_inst; + } + } +} + +// --- containerDecl (AstGen.zig:5468) --- +// Handles container declarations as expressions (struct{}, enum{}, etc.). + +static uint32_t containerDecl(GenZir* gz, Scope* scope, uint32_t node) { + AstGenCtx* ag = gz->astgen; + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Extract members based on node type (Ast.zig:2459-2470). + uint32_t members_buf[2]; + const uint32_t* members; + uint32_t members_len; + + switch (tag) { + case AST_NODE_CONTAINER_DECL_TWO: + case AST_NODE_CONTAINER_DECL_TWO_TRAILING: + case AST_NODE_TAGGED_UNION_TWO: + case AST_NODE_TAGGED_UNION_TWO_TRAILING: { + // lhs and rhs are optional member nodes (0 = none). + members_len = 0; + if (nd.lhs != 0) + members_buf[members_len++] = nd.lhs; + if (nd.rhs != 0) + members_buf[members_len++] = nd.rhs; + members = members_buf; + break; + } + case AST_NODE_CONTAINER_DECL: + case AST_NODE_CONTAINER_DECL_TRAILING: + case AST_NODE_TAGGED_UNION: + case AST_NODE_TAGGED_UNION_TRAILING: { + // extra_data[lhs..rhs] contains members. + members = tree->extra_data.arr + nd.lhs; + members_len = nd.rhs - nd.lhs; + break; + } + case AST_NODE_CONTAINER_DECL_ARG: + case AST_NODE_CONTAINER_DECL_ARG_TRAILING: + case AST_NODE_TAGGED_UNION_ENUM_TAG: + case AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING: { + // lhs is arg node, rhs is extra index → SubRange(start, end). + uint32_t start = tree->extra_data.arr[nd.rhs]; + uint32_t end = tree->extra_data.arr[nd.rhs + 1]; + members = tree->extra_data.arr + start; + members_len = end - start; + break; + } + default: + SET_ERROR(ag); + return ZIR_REF_VOID_VALUE; + } + + // Save/clear fn_block for nested containers (AstGen.zig:5480-5482). + void* prev_fn_block = ag->fn_block; + ag->fn_block = NULL; + + // Dispatch based on container keyword (AstGen.zig:5485-5536). + uint32_t main_token = tree->nodes.main_tokens[node]; + TokenizerTag kw_tag = tree->tokens.tags[main_token]; + uint32_t decl_inst; + switch (kw_tag) { + case TOKEN_KEYWORD_STRUCT: + decl_inst = structDeclInner(ag, gz, node, members, members_len); + break; + case TOKEN_KEYWORD_ENUM: + decl_inst = enumDeclInner(ag, gz, node, members, members_len); + break; + default: + // union/opaque: fall back to struct for now. + decl_inst = structDeclInner(ag, gz, node, members, members_len); + break; + } + (void)scope; + + ag->fn_block = prev_fn_block; + return decl_inst + ZIR_REF_START_INDEX; +} + +// --- EnumDecl.Small packing (Zir.zig EnumDecl.Small) --- + +typedef struct { + bool has_tag_type; + bool has_captures_len; + bool has_body_len; + bool has_fields_len; + bool has_decls_len; + uint8_t name_strategy; // 2 bits + bool nonexhaustive; +} EnumDeclSmall; + +static uint16_t packEnumDeclSmall(EnumDeclSmall s) { + uint16_t r = 0; + if (s.has_tag_type) + r |= (1u << 0); + if (s.has_captures_len) + r |= (1u << 1); + if (s.has_body_len) + r |= (1u << 2); + if (s.has_fields_len) + r |= (1u << 3); + if (s.has_decls_len) + r |= (1u << 4); + r |= (uint16_t)(s.name_strategy & 0x3u) << 5; + if (s.nonexhaustive) + r |= (1u << 7); + return r; +} + +// Mirrors GenZir.setEnum (AstGen.zig:13080). +static void setEnum(AstGenCtx* ag, uint32_t inst, uint32_t src_node, + EnumDeclSmall small, uint32_t fields_len, uint32_t decls_len) { + ensureExtraCapacity(ag, 6 + 3); + + uint32_t payload_index = ag->extra_len; + + // fields_hash (4 words): zero-filled. + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + ag->extra[ag->extra_len++] = 0; + + ag->extra[ag->extra_len++] = ag->source_line; + ag->extra[ag->extra_len++] = src_node; + + if (small.has_fields_len) + ag->extra[ag->extra_len++] = fields_len; + if (small.has_decls_len) + ag->extra[ag->extra_len++] = decls_len; + + ag->inst_tags[inst] = ZIR_INST_EXTENDED; + ZirInstData data; + memset(&data, 0, sizeof(data)); + data.extended.opcode = (uint16_t)ZIR_EXT_ENUM_DECL; + data.extended.small = packEnumDeclSmall(small); + data.extended.operand = payload_index; + ag->inst_datas[inst] = data; +} + +// --- enumDeclInner (AstGen.zig:5508) --- + +static uint32_t enumDeclInner(AstGenCtx* ag, GenZir* gz, uint32_t node, + const uint32_t* members, uint32_t members_len) { + const Ast* tree = ag->tree; + uint32_t decl_inst = reserveInstructionIndex(ag); + gzAppendInstruction(gz, decl_inst); + + if (members_len == 0) { + EnumDeclSmall small; + memset(&small, 0, sizeof(small)); + setEnum(ag, decl_inst, node, small, 0, 0); + return decl_inst; + } + + advanceSourceCursorToNode(ag, node); + + uint32_t decl_count = scanContainer(ag, members, members_len); + uint32_t field_count = members_len - decl_count; + + // Use WipMembers for decls and field data. + // Enum fields: 1 bit per field (has_value), max 2 words per field + // (name + value). + WipMembers wm = wipMembersInit(decl_count, field_count); + + // Enum fields use 1 bit per field: has_value. + // We use the same WipMembers but with 1-bit fields. + // Actually, upstream uses bits_per_field=1, max_field_size=2. + // Re-init with correct params would be better but let's reuse. + // For simplicity: track field data manually. + uint32_t* field_names = NULL; + uint32_t field_names_len = 0; + uint32_t field_names_cap = 0; + + for (uint32_t i = 0; i < members_len; i++) { + uint32_t member_node = members[i]; + AstNodeTag mtag = tree->nodes.tags[member_node]; + switch (mtag) { + case AST_NODE_COMPTIME: + comptimeDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_SIMPLE_VAR_DECL: + case AST_NODE_GLOBAL_VAR_DECL: + case AST_NODE_LOCAL_VAR_DECL: + case AST_NODE_ALIGNED_VAR_DECL: + globalVarDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_FN_DECL: + fnDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_TEST_DECL: + testDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_CONTAINER_FIELD_INIT: + case AST_NODE_CONTAINER_FIELD_ALIGN: + case AST_NODE_CONTAINER_FIELD: { + // Enum field: just a name (AstGen.zig:5617-5670). + uint32_t main_token = tree->nodes.main_tokens[member_node]; + uint32_t field_name = identAsString(ag, main_token); + // Grow field_names array. + if (field_names_len >= field_names_cap) { + uint32_t new_cap + = field_names_cap == 0 ? 8 : field_names_cap * 2; + field_names = realloc(field_names, new_cap * sizeof(uint32_t)); + if (!field_names) + exit(1); + field_names_cap = new_cap; + } + field_names[field_names_len++] = field_name; + break; + } + default: + SET_ERROR(ag); + break; + } + } + + EnumDeclSmall small; + memset(&small, 0, sizeof(small)); + small.has_fields_len = (field_count > 0); + small.has_decls_len = (decl_count > 0); + setEnum(ag, decl_inst, node, small, field_count, decl_count); + + // Append: decls, field_bits, field_names (AstGen.zig:5724-5729). + uint32_t decls_len_out; + const uint32_t* decls_slice = wipMembersDeclsSlice(&wm, &decls_len_out); + + // Field bits: 1 bit per field (has_value = false for simple enums). + uint32_t fields_per_u32 = 32; + uint32_t bit_words = field_count > 0 + ? (field_count + fields_per_u32 - 1) / fields_per_u32 + : 0; + + ensureExtraCapacity(ag, decls_len_out + bit_words + field_names_len); + for (uint32_t i = 0; i < decls_len_out; i++) + ag->extra[ag->extra_len++] = decls_slice[i]; + // Field bits: all zero (no values). + for (uint32_t i = 0; i < bit_words; i++) + ag->extra[ag->extra_len++] = 0; + // Field names. + for (uint32_t i = 0; i < field_names_len; i++) + ag->extra[ag->extra_len++] = field_names[i]; + + free(field_names); + wipMembersDeinit(&wm); + return decl_inst; +} + +// --- structDeclInner (AstGen.zig:4926) --- + +static uint32_t structDeclInner(AstGenCtx* ag, GenZir* gz, uint32_t node, + const uint32_t* members, uint32_t members_len) { + const Ast* tree = ag->tree; + uint32_t decl_inst = reserveInstructionIndex(ag); + gzAppendInstruction(gz, decl_inst); + + // Fast path: no members, no backing int (AstGen.zig:4954-4970). + if (members_len == 0) { + StructDeclSmall small; + memset(&small, 0, sizeof(small)); + setStruct(ag, decl_inst, node, small, 0, 0, 0); + return decl_inst; + } + + // Non-empty container (AstGen.zig:4973-5189). + advanceSourceCursorToNode(ag, node); + + uint32_t decl_count = scanContainer(ag, members, members_len); + uint32_t field_count = members_len - decl_count; + + WipMembers wm = wipMembersInit(decl_count, field_count); + + // Set up block_scope for field type/align/init expressions. + // (AstGen.zig:4983-4992) + GenZir block_scope; + memset(&block_scope, 0, sizeof(block_scope)); + block_scope.base.tag = SCOPE_GEN_ZIR; + block_scope.parent = NULL; + block_scope.astgen = ag; + block_scope.decl_node_index = node; + block_scope.decl_line = ag->source_line; + block_scope.is_comptime = true; + block_scope.instructions_top = ag->scratch_inst_len; + + bool known_non_opv = false; + bool known_comptime_only = false; + bool any_comptime_fields = false; + bool any_aligned_fields = false; + bool any_default_inits = false; + + // Process each member (AstGen.zig:5060-5147). + for (uint32_t i = 0; i < members_len; i++) { + uint32_t member_node = members[i]; + AstNodeTag mtag = tree->nodes.tags[member_node]; + switch (mtag) { + case AST_NODE_COMPTIME: + comptimeDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_SIMPLE_VAR_DECL: + globalVarDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_TEST_DECL: + testDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_FN_DECL: + fnDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_USINGNAMESPACE: + case AST_NODE_GLOBAL_VAR_DECL: + case AST_NODE_LOCAL_VAR_DECL: + case AST_NODE_ALIGNED_VAR_DECL: + globalVarDecl(ag, gz, wm.payload, &wm.decl_index, member_node); + break; + case AST_NODE_CONTAINER_FIELD_INIT: + case AST_NODE_CONTAINER_FIELD_ALIGN: + case AST_NODE_CONTAINER_FIELD: { + // Extract field info from AST node (Ast.zig:1413-1454). + uint32_t main_token = tree->nodes.main_tokens[member_node]; + AstData nd = tree->nodes.datas[member_node]; + uint32_t type_node = nd.lhs; + uint32_t align_node = 0; + uint32_t value_node = 0; + bool has_comptime_token = false; + + switch (mtag) { + case AST_NODE_CONTAINER_FIELD_INIT: + // lhs = type_expr, rhs = value_expr (optional, 0=none) + value_node = nd.rhs; + break; + case AST_NODE_CONTAINER_FIELD_ALIGN: + // lhs = type_expr, rhs = align_expr + align_node = nd.rhs; + break; + case AST_NODE_CONTAINER_FIELD: + // lhs = type_expr, rhs = extra index to {align, value} + if (nd.rhs != 0) { + align_node = tree->extra_data.arr[nd.rhs]; + value_node = tree->extra_data.arr[nd.rhs + 1]; + } + break; + default: + break; + } + + // Check for comptime token preceding main_token + // (Ast.zig:2071-2082). + if (main_token > 0 + && tree->tokens.tags[main_token - 1] + == TOKEN_KEYWORD_COMPTIME) { + has_comptime_token = true; + } + + // Field name (AstGen.zig:5080). + uint32_t field_name = identAsString(ag, main_token); + wipMembersAppendToField(&wm, field_name); + + // Type expression (AstGen.zig:5089-5109). + bool have_type_body = false; + uint32_t field_type = 0; + if (type_node != 0) { + field_type + = typeExpr(&block_scope, &block_scope.base, type_node); + have_type_body = (gzInstructionsLen(&block_scope) > 0); + } + + bool have_align = (align_node != 0); + bool have_value = (value_node != 0); + bool is_comptime = has_comptime_token; + + if (is_comptime) { + any_comptime_fields = true; + } else { + // (AstGen.zig:5106-5109) + if (type_node != 0) { + known_non_opv = known_non_opv + || nodeImpliesMoreThanOnePossibleValue( + tree, type_node); + known_comptime_only = known_comptime_only + || nodeImpliesComptimeOnly(tree, type_node); + } + } + + bool field_bits[4] + = { have_align, have_value, is_comptime, have_type_body }; + wipMembersNextField(&wm, field_bits); + + if (have_type_body) { + // Emit break_inline to carry the type value + // (AstGen.zig:5097-5099). + if (!endsWithNoReturn(&block_scope)) { + makeBreakInline(&block_scope, decl_inst, field_type, + AST_NODE_OFFSET_NONE); + } + uint32_t raw_len = gzInstructionsLen(&block_scope); + const uint32_t* body = gzInstructionsSlice(&block_scope); + uint32_t body_len = countBodyLenAfterFixups(ag, body, raw_len); + uint32_t bodies_before = wm.bodies_len; + wipMembersBodiesAppendWithFixups(&wm, ag, body, raw_len); + (void)bodies_before; + wipMembersAppendToField(&wm, body_len); + // Reset block_scope. + ag->scratch_inst_len = block_scope.instructions_top; + } else { + wipMembersAppendToField(&wm, field_type); + } + + if (have_align) { + any_aligned_fields = true; + uint32_t align_ref + = expr(&block_scope, &block_scope.base, align_node); + if (!endsWithNoReturn(&block_scope)) { + makeBreakInline(&block_scope, decl_inst, align_ref, + AST_NODE_OFFSET_NONE); + } + uint32_t raw_len = gzInstructionsLen(&block_scope); + const uint32_t* body = gzInstructionsSlice(&block_scope); + uint32_t body_len = countBodyLenAfterFixups(ag, body, raw_len); + wipMembersBodiesAppendWithFixups(&wm, ag, body, raw_len); + wipMembersAppendToField(&wm, body_len); + ag->scratch_inst_len = block_scope.instructions_top; + } + + if (have_value) { + any_default_inits = true; + uint32_t default_ref + = expr(&block_scope, &block_scope.base, value_node); + if (!endsWithNoReturn(&block_scope)) { + makeBreakInline(&block_scope, decl_inst, default_ref, + AST_NODE_OFFSET_NONE); + } + uint32_t raw_len = gzInstructionsLen(&block_scope); + const uint32_t* body = gzInstructionsSlice(&block_scope); + uint32_t body_len = countBodyLenAfterFixups(ag, body, raw_len); + wipMembersBodiesAppendWithFixups(&wm, ag, body, raw_len); + wipMembersAppendToField(&wm, body_len); + ag->scratch_inst_len = block_scope.instructions_top; + } + break; + } + default: + SET_ERROR(ag); + break; + } + } + + wipMembersFinishBits(&wm); + + // setStruct (AstGen.zig:5152-5166). + StructDeclSmall small; + memset(&small, 0, sizeof(small)); + small.has_decls_len = (decl_count > 0); + small.has_fields_len = (field_count > 0); + small.known_non_opv = known_non_opv; + small.known_comptime_only = known_comptime_only; + small.any_comptime_fields = any_comptime_fields; + small.any_default_inits = any_default_inits; + small.any_aligned_fields = any_aligned_fields; + setStruct(ag, decl_inst, node, small, 0, field_count, decl_count); + + // Append: captures (none), backing_int (none), decls, fields, bodies + // (AstGen.zig:5176-5189). + uint32_t decls_len; + const uint32_t* decls_slice = wipMembersDeclsSlice(&wm, &decls_len); + uint32_t fields_len; + const uint32_t* fields_slice = wipMembersFieldsSlice(&wm, &fields_len); + + ensureExtraCapacity(ag, decls_len + fields_len + wm.bodies_len); + for (uint32_t i = 0; i < decls_len; i++) + ag->extra[ag->extra_len++] = decls_slice[i]; + for (uint32_t i = 0; i < fields_len; i++) + ag->extra[ag->extra_len++] = fields_slice[i]; + for (uint32_t i = 0; i < wm.bodies_len; i++) + ag->extra[ag->extra_len++] = wm.bodies[i]; + + gzUnstack(&block_scope); + wipMembersDeinit(&wm); + return decl_inst; +} + +// --- AstRlAnnotate (AstRlAnnotate.zig) --- +// Pre-pass to determine which AST nodes need result locations. + +typedef struct { + bool have_type; + bool have_ptr; +} RlResultInfo; + +#define RL_RI_NONE ((RlResultInfo) { false, false }) +#define RL_RI_TYPED_PTR ((RlResultInfo) { true, true }) +#define RL_RI_INFERRED_PTR ((RlResultInfo) { false, true }) +#define RL_RI_TYPE_ONLY ((RlResultInfo) { true, false }) + +// Block for label tracking (AstRlAnnotate.zig:56-62). +typedef struct RlBlock { + struct RlBlock* parent; + uint32_t label_token; // UINT32_MAX = no label + bool is_loop; + RlResultInfo ri; + bool consumes_res_ptr; +} RlBlock; + +static void nodesNeedRlAdd(AstGenCtx* ag, uint32_t node) { + if (ag->nodes_need_rl_len >= ag->nodes_need_rl_cap) { + uint32_t new_cap + = ag->nodes_need_rl_cap == 0 ? 16 : ag->nodes_need_rl_cap * 2; + ag->nodes_need_rl + = realloc(ag->nodes_need_rl, new_cap * sizeof(uint32_t)); + ag->nodes_need_rl_cap = new_cap; + } + ag->nodes_need_rl[ag->nodes_need_rl_len++] = node; +} + +static bool nodesNeedRlContains(const AstGenCtx* ag, uint32_t node) { + for (uint32_t i = 0; i < ag->nodes_need_rl_len; i++) { + if (ag->nodes_need_rl[i] == node) + return true; + } + return false; +} + +// Compare two identifier tokens by their source text. +static bool rlTokenIdentEqual( + const Ast* tree, uint32_t tok_a, uint32_t tok_b) { + const char* src = tree->source; + uint32_t a_start = tree->tokens.starts[tok_a]; + uint32_t b_start = tree->tokens.starts[tok_b]; + for (uint32_t i = 0;; i++) { + char ca = src[a_start + i]; + char cb = src[b_start + i]; + bool a_id = (ca >= 'a' && ca <= 'z') || (ca >= 'A' && ca <= 'Z') + || (ca >= '0' && ca <= '9') || ca == '_'; + bool b_id = (cb >= 'a' && cb <= 'z') || (cb >= 'A' && cb <= 'Z') + || (cb >= '0' && cb <= '9') || cb == '_'; + if (!a_id && !b_id) + return true; + if (!a_id || !b_id) + return false; + if (ca != cb) + return false; + } +} + +// Forward declarations. +static bool rlExpr( + AstGenCtx* ag, uint32_t node, RlBlock* block, RlResultInfo ri); +static void rlContainerDecl(AstGenCtx* ag, RlBlock* block, uint32_t node); +static bool rlBlockExpr(AstGenCtx* ag, RlBlock* parent_block, RlResultInfo ri, + uint32_t node, const uint32_t* stmts, uint32_t count); +static bool rlBuiltinCall(AstGenCtx* ag, RlBlock* block, uint32_t node, + const uint32_t* args, uint32_t nargs); + +// containerDecl (AstRlAnnotate.zig:89-127). +static void rlContainerDecl(AstGenCtx* ag, RlBlock* block, uint32_t node) { + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + // Extract arg and members depending on variant. + // All container decls: recurse arg with type_only, members with none. + // (The keyword type — struct/union/enum/opaque — doesn't matter for RL.) + uint32_t member_buf[2]; + const uint32_t* members = NULL; + uint32_t members_len = 0; + uint32_t arg_node = 0; // 0 = no arg + + switch (tag) { + case AST_NODE_CONTAINER_DECL_TWO: + case AST_NODE_CONTAINER_DECL_TWO_TRAILING: + case AST_NODE_TAGGED_UNION_TWO: + case AST_NODE_TAGGED_UNION_TWO_TRAILING: { + uint32_t idx = 0; + if (nd.lhs != 0) + member_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + member_buf[idx++] = nd.rhs; + members = member_buf; + members_len = idx; + break; + } + case AST_NODE_CONTAINER_DECL: + case AST_NODE_CONTAINER_DECL_TRAILING: + case AST_NODE_TAGGED_UNION: + case AST_NODE_TAGGED_UNION_TRAILING: + members = tree->extra_data.arr + nd.lhs; + members_len = nd.rhs - nd.lhs; + break; + case AST_NODE_CONTAINER_DECL_ARG: + case AST_NODE_CONTAINER_DECL_ARG_TRAILING: + case AST_NODE_TAGGED_UNION_ENUM_TAG: + case AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING: { + arg_node = nd.lhs; + uint32_t extra_idx = nd.rhs; + uint32_t start = tree->extra_data.arr[extra_idx]; + uint32_t end = tree->extra_data.arr[extra_idx + 1]; + members = tree->extra_data.arr + start; + members_len = end - start; + break; + } + default: + return; + } + + if (arg_node != 0) + (void)rlExpr(ag, arg_node, block, RL_RI_TYPE_ONLY); + for (uint32_t i = 0; i < members_len; i++) + (void)rlExpr(ag, members[i], block, RL_RI_NONE); +} + +// blockExpr (AstRlAnnotate.zig:787-814). +static bool rlBlockExpr(AstGenCtx* ag, RlBlock* parent_block, RlResultInfo ri, + uint32_t node, const uint32_t* stmts, uint32_t count) { + const Ast* tree = ag->tree; + uint32_t lbrace = tree->nodes.main_tokens[node]; + bool is_labeled + = (lbrace >= 2 && tree->tokens.tags[lbrace - 1] == TOKEN_COLON + && tree->tokens.tags[lbrace - 2] == TOKEN_IDENTIFIER); + + if (is_labeled) { + RlBlock new_block; + new_block.parent = parent_block; + new_block.label_token = lbrace - 2; + new_block.is_loop = false; + new_block.ri = ri; + new_block.consumes_res_ptr = false; + for (uint32_t i = 0; i < count; i++) + (void)rlExpr(ag, stmts[i], &new_block, RL_RI_NONE); + if (new_block.consumes_res_ptr) + nodesNeedRlAdd(ag, node); + return new_block.consumes_res_ptr; + } else { + for (uint32_t i = 0; i < count; i++) + (void)rlExpr(ag, stmts[i], parent_block, RL_RI_NONE); + return false; + } +} + +// builtinCall (AstRlAnnotate.zig:816-1100). +// Simplified: no builtin currently consumes its result location, +// so we just recurse into all args with RL_RI_NONE. +static bool rlBuiltinCall(AstGenCtx* ag, RlBlock* block, uint32_t node, + const uint32_t* args, uint32_t nargs) { + (void)node; + for (uint32_t i = 0; i < nargs; i++) + (void)rlExpr(ag, args[i], block, RL_RI_NONE); + return false; +} + +// expr (AstRlAnnotate.zig:130-771). +static bool rlExpr( + AstGenCtx* ag, uint32_t node, RlBlock* block, RlResultInfo ri) { + const Ast* tree = ag->tree; + AstNodeTag tag = tree->nodes.tags[node]; + AstData nd = tree->nodes.datas[node]; + + switch (tag) { + // Unreachable nodes (AstRlAnnotate.zig:133-142). + case AST_NODE_ROOT: + case AST_NODE_SWITCH_CASE_ONE: + case AST_NODE_SWITCH_CASE_INLINE_ONE: + case AST_NODE_SWITCH_CASE: + case AST_NODE_SWITCH_CASE_INLINE: + case AST_NODE_SWITCH_RANGE: + case AST_NODE_FOR_RANGE: + case AST_NODE_ASM_OUTPUT: + case AST_NODE_ASM_INPUT: + return false; // unreachable in upstream + + // errdefer (AstRlAnnotate.zig:144-147). + case AST_NODE_ERRDEFER: + (void)rlExpr(ag, nd.rhs, block, RL_RI_NONE); + return false; + + // defer (AstRlAnnotate.zig:148-151). + case AST_NODE_DEFER: + (void)rlExpr(ag, nd.rhs, block, RL_RI_NONE); + return false; + + // container_field (AstRlAnnotate.zig:153-167). + case AST_NODE_CONTAINER_FIELD_INIT: { + // lhs = type_expr, rhs = value_expr + if (nd.lhs != 0) + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + if (nd.rhs != 0) + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + } + case AST_NODE_CONTAINER_FIELD_ALIGN: { + // lhs = type_expr, rhs = align_expr + if (nd.lhs != 0) + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + if (nd.rhs != 0) + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + } + case AST_NODE_CONTAINER_FIELD: { + // lhs = type_expr, rhs = extra index to {align_expr, value_expr} + if (nd.lhs != 0) + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + if (nd.rhs != 0) { + uint32_t align_node = tree->extra_data.arr[nd.rhs]; + uint32_t value_node = tree->extra_data.arr[nd.rhs + 1]; + if (align_node != 0) + (void)rlExpr(ag, align_node, block, RL_RI_TYPE_ONLY); + if (value_node != 0) + (void)rlExpr(ag, value_node, block, RL_RI_TYPE_ONLY); + } + return false; + } + + // test_decl (AstRlAnnotate.zig:168-171). + case AST_NODE_TEST_DECL: + (void)rlExpr(ag, nd.rhs, block, RL_RI_NONE); + return false; + + // var_decl (AstRlAnnotate.zig:172-202). + case AST_NODE_GLOBAL_VAR_DECL: + case AST_NODE_LOCAL_VAR_DECL: + case AST_NODE_SIMPLE_VAR_DECL: + case AST_NODE_ALIGNED_VAR_DECL: { + uint32_t type_node = 0; + uint32_t init_node = 0; + uint32_t mut_token = tree->nodes.main_tokens[node]; + if (tag == AST_NODE_SIMPLE_VAR_DECL) { + type_node = nd.lhs; + init_node = nd.rhs; + } else if (tag == AST_NODE_LOCAL_VAR_DECL + || tag == AST_NODE_GLOBAL_VAR_DECL) { + type_node = tree->extra_data.arr[nd.lhs]; + init_node = nd.rhs; + } else { // ALIGNED_VAR_DECL + init_node = nd.rhs; + } + RlResultInfo init_ri; + if (type_node != 0) { + (void)rlExpr(ag, type_node, block, RL_RI_TYPE_ONLY); + init_ri = RL_RI_TYPED_PTR; + } else { + init_ri = RL_RI_INFERRED_PTR; + } + if (init_node == 0) + return false; + bool is_const = (tree->source[tree->tokens.starts[mut_token]] == 'c'); + if (is_const) { + bool init_consumes_rl = rlExpr(ag, init_node, block, init_ri); + if (init_consumes_rl) + nodesNeedRlAdd(ag, node); + return false; + } else { + (void)rlExpr(ag, init_node, block, init_ri); + return false; + } + } + + // assign (AstRlAnnotate.zig:212-217). + case AST_NODE_ASSIGN: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPED_PTR); + return false; + + // compound assign (AstRlAnnotate.zig:218-240). + case AST_NODE_ASSIGN_SHL: + case AST_NODE_ASSIGN_SHL_SAT: + case AST_NODE_ASSIGN_SHR: + case AST_NODE_ASSIGN_BIT_AND: + case AST_NODE_ASSIGN_BIT_OR: + case AST_NODE_ASSIGN_BIT_XOR: + case AST_NODE_ASSIGN_DIV: + case AST_NODE_ASSIGN_SUB: + case AST_NODE_ASSIGN_SUB_WRAP: + case AST_NODE_ASSIGN_SUB_SAT: + case AST_NODE_ASSIGN_MOD: + case AST_NODE_ASSIGN_ADD: + case AST_NODE_ASSIGN_ADD_WRAP: + case AST_NODE_ASSIGN_ADD_SAT: + case AST_NODE_ASSIGN_MUL: + case AST_NODE_ASSIGN_MUL_WRAP: + case AST_NODE_ASSIGN_MUL_SAT: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_NONE); + return false; + + // shl/shr (AstRlAnnotate.zig:241-246). + case AST_NODE_SHL: + case AST_NODE_SHR: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + + // binary arithmetic/comparison (AstRlAnnotate.zig:247-274). + case AST_NODE_ADD: + case AST_NODE_ADD_WRAP: + case AST_NODE_ADD_SAT: + case AST_NODE_SUB: + case AST_NODE_SUB_WRAP: + case AST_NODE_SUB_SAT: + case AST_NODE_MUL: + case AST_NODE_MUL_WRAP: + case AST_NODE_MUL_SAT: + case AST_NODE_DIV: + case AST_NODE_MOD: + case AST_NODE_SHL_SAT: + case AST_NODE_BIT_AND: + case AST_NODE_BIT_OR: + case AST_NODE_BIT_XOR: + case AST_NODE_BANG_EQUAL: + case AST_NODE_EQUAL_EQUAL: + case AST_NODE_GREATER_THAN: + case AST_NODE_GREATER_OR_EQUAL: + case AST_NODE_LESS_THAN: + case AST_NODE_LESS_OR_EQUAL: + case AST_NODE_ARRAY_CAT: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_NONE); + return false; + + // array_mult (AstRlAnnotate.zig:276-281). + case AST_NODE_ARRAY_MULT: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + + // error_union, merge_error_sets (AstRlAnnotate.zig:282-287). + case AST_NODE_ERROR_UNION: + case AST_NODE_MERGE_ERROR_SETS: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_NONE); + return false; + + // bool_and, bool_or (AstRlAnnotate.zig:288-295). + case AST_NODE_BOOL_AND: + case AST_NODE_BOOL_OR: + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + + // bool_not (AstRlAnnotate.zig:296-299). + case AST_NODE_BOOL_NOT: + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + return false; + + // bit_not, negation, negation_wrap (AstRlAnnotate.zig:300-303). + case AST_NODE_BIT_NOT: + case AST_NODE_NEGATION: + case AST_NODE_NEGATION_WRAP: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + + // Leaves (AstRlAnnotate.zig:305-320). + case AST_NODE_IDENTIFIER: + case AST_NODE_STRING_LITERAL: + case AST_NODE_MULTILINE_STRING_LITERAL: + case AST_NODE_NUMBER_LITERAL: + case AST_NODE_UNREACHABLE_LITERAL: + case AST_NODE_ASM_SIMPLE: + case AST_NODE_ASM: + case AST_NODE_ASM_LEGACY: + case AST_NODE_ENUM_LITERAL: + case AST_NODE_ERROR_VALUE: + case AST_NODE_ANYFRAME_LITERAL: + case AST_NODE_CONTINUE: + case AST_NODE_CHAR_LITERAL: + case AST_NODE_ERROR_SET_DECL: + return false; + + // builtin_call (AstRlAnnotate.zig:322-330). + case AST_NODE_BUILTIN_CALL_TWO: + case AST_NODE_BUILTIN_CALL_TWO_COMMA: { + uint32_t args[2]; + uint32_t nargs = 0; + if (nd.lhs != 0) + args[nargs++] = nd.lhs; + if (nd.rhs != 0) + args[nargs++] = nd.rhs; + return rlBuiltinCall(ag, block, node, args, nargs); + } + case AST_NODE_BUILTIN_CALL: + case AST_NODE_BUILTIN_CALL_COMMA: { + uint32_t start = nd.lhs; + uint32_t end = nd.rhs; + return rlBuiltinCall( + ag, block, node, tree->extra_data.arr + start, end - start); + } + + // call (AstRlAnnotate.zig:332-351). + case AST_NODE_CALL_ONE: + case AST_NODE_CALL_ONE_COMMA: { + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + if (nd.rhs != 0) + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + } + case AST_NODE_CALL: + case AST_NODE_CALL_COMMA: { + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + uint32_t start = tree->extra_data.arr[nd.rhs]; + uint32_t end = tree->extra_data.arr[nd.rhs + 1]; + for (uint32_t i = start; i < end; i++) + (void)rlExpr(ag, tree->extra_data.arr[i], block, RL_RI_TYPE_ONLY); + return false; + } + + // return (AstRlAnnotate.zig:353-361). + case AST_NODE_RETURN: + if (nd.lhs != 0) { + bool ret_consumes_rl = rlExpr(ag, nd.lhs, block, RL_RI_TYPED_PTR); + if (ret_consumes_rl) + nodesNeedRlAdd(ag, node); + } + return false; + + // field_access (AstRlAnnotate.zig:363-367). + case AST_NODE_FIELD_ACCESS: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + + // if_simple, if (AstRlAnnotate.zig:369-387). + case AST_NODE_IF_SIMPLE: + case AST_NODE_IF: { + uint32_t cond_node = nd.lhs; + uint32_t then_node, else_node = 0; + if (tag == AST_NODE_IF_SIMPLE) { + then_node = nd.rhs; + } else { + then_node = tree->extra_data.arr[nd.rhs]; + else_node = tree->extra_data.arr[nd.rhs + 1]; + } + // Detect payload/error token. + uint32_t last_cond_tok = lastToken(tree, cond_node); + uint32_t pipe_tok = last_cond_tok + 2; + bool has_payload = (pipe_tok < tree->tokens.len + && tree->tokens.tags[pipe_tok] == TOKEN_PIPE); + bool has_error = false; + if (else_node != 0) { + uint32_t else_tok = lastToken(tree, then_node) + 1; + has_error = (else_tok + 1 < tree->tokens.len + && tree->tokens.tags[else_tok + 1] == TOKEN_PIPE); + } + if (has_error || has_payload) + (void)rlExpr(ag, cond_node, block, RL_RI_NONE); + else + (void)rlExpr(ag, cond_node, block, RL_RI_TYPE_ONLY); + + if (else_node != 0) { + bool then_uses = rlExpr(ag, then_node, block, ri); + bool else_uses = rlExpr(ag, else_node, block, ri); + bool uses_rl = then_uses || else_uses; + if (uses_rl) + nodesNeedRlAdd(ag, node); + return uses_rl; + } else { + (void)rlExpr(ag, then_node, block, RL_RI_NONE); + return false; + } + } + + // while (AstRlAnnotate.zig:389-419). + case AST_NODE_WHILE_SIMPLE: + case AST_NODE_WHILE_CONT: + case AST_NODE_WHILE: { + uint32_t cond_node = nd.lhs; + uint32_t body_node, cont_node = 0, else_node = 0; + if (tag == AST_NODE_WHILE_SIMPLE) { + body_node = nd.rhs; + } else if (tag == AST_NODE_WHILE_CONT) { + cont_node = tree->extra_data.arr[nd.rhs]; + body_node = tree->extra_data.arr[nd.rhs + 1]; + } else { + cont_node = tree->extra_data.arr[nd.rhs]; + body_node = tree->extra_data.arr[nd.rhs + 1]; + else_node = tree->extra_data.arr[nd.rhs + 2]; + } + uint32_t main_tok = tree->nodes.main_tokens[node]; + bool is_labeled + = (main_tok >= 2 && tree->tokens.tags[main_tok - 1] == TOKEN_COLON + && tree->tokens.tags[main_tok - 2] == TOKEN_IDENTIFIER); + uint32_t label_token = is_labeled ? main_tok - 2 : UINT32_MAX; + + // Detect payload/error. + uint32_t last_cond_tok = lastToken(tree, cond_node); + uint32_t pipe_tok = last_cond_tok + 2; + bool has_payload = (pipe_tok < tree->tokens.len + && tree->tokens.tags[pipe_tok] == TOKEN_PIPE); + // Error token detection for while: check for else |err|. + bool has_error = false; + if (else_node != 0) { + uint32_t else_tok = lastToken(tree, body_node) + 1; + has_error = (else_tok + 1 < tree->tokens.len + && tree->tokens.tags[else_tok + 1] == TOKEN_PIPE); + } + if (has_error || has_payload) + (void)rlExpr(ag, cond_node, block, RL_RI_NONE); + else + (void)rlExpr(ag, cond_node, block, RL_RI_TYPE_ONLY); + + RlBlock new_block; + new_block.parent = block; + new_block.label_token = label_token; + new_block.is_loop = true; + new_block.ri = ri; + new_block.consumes_res_ptr = false; + + if (cont_node != 0) + (void)rlExpr(ag, cont_node, &new_block, RL_RI_NONE); + (void)rlExpr(ag, body_node, &new_block, RL_RI_NONE); + bool else_consumes = false; + if (else_node != 0) + else_consumes = rlExpr(ag, else_node, block, ri); + if (new_block.consumes_res_ptr || else_consumes) { + nodesNeedRlAdd(ag, node); + return true; + } + return false; + } + + // for (AstRlAnnotate.zig:421-454). + case AST_NODE_FOR_SIMPLE: + case AST_NODE_FOR: { + uint32_t input_buf[16]; + const uint32_t* inputs = NULL; + uint32_t num_inputs = 0; + uint32_t body_node = 0; + uint32_t else_node = 0; + + if (tag == AST_NODE_FOR_SIMPLE) { + input_buf[0] = nd.lhs; + inputs = input_buf; + num_inputs = 1; + body_node = nd.rhs; + } else { + AstFor for_data; + memcpy(&for_data, &nd.rhs, sizeof(AstFor)); + num_inputs = for_data.inputs; + if (num_inputs > 16) + num_inputs = 16; + for (uint32_t i = 0; i < num_inputs; i++) + input_buf[i] = tree->extra_data.arr[nd.lhs + i]; + inputs = input_buf; + body_node = tree->extra_data.arr[nd.lhs + num_inputs]; + if (for_data.has_else) + else_node = tree->extra_data.arr[nd.lhs + num_inputs + 1]; + } + + uint32_t main_tok = tree->nodes.main_tokens[node]; + bool is_labeled + = (main_tok >= 2 && tree->tokens.tags[main_tok - 1] == TOKEN_COLON + && tree->tokens.tags[main_tok - 2] == TOKEN_IDENTIFIER); + uint32_t label_token = is_labeled ? main_tok - 2 : UINT32_MAX; + + for (uint32_t i = 0; i < num_inputs; i++) { + uint32_t input = inputs[i]; + if (tree->nodes.tags[input] == AST_NODE_FOR_RANGE) { + AstData range_nd = tree->nodes.datas[input]; + (void)rlExpr(ag, range_nd.lhs, block, RL_RI_TYPE_ONLY); + if (range_nd.rhs != 0) + (void)rlExpr(ag, range_nd.rhs, block, RL_RI_TYPE_ONLY); + } else { + (void)rlExpr(ag, input, block, RL_RI_NONE); + } + } + + RlBlock new_block; + new_block.parent = block; + new_block.label_token = label_token; + new_block.is_loop = true; + new_block.ri = ri; + new_block.consumes_res_ptr = false; + + (void)rlExpr(ag, body_node, &new_block, RL_RI_NONE); + bool else_consumes = false; + if (else_node != 0) + else_consumes = rlExpr(ag, else_node, block, ri); + if (new_block.consumes_res_ptr || else_consumes) { + nodesNeedRlAdd(ag, node); + return true; + } + return false; + } + + // slice (AstRlAnnotate.zig:456-480). + case AST_NODE_SLICE_OPEN: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + case AST_NODE_SLICE: { + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + uint32_t start = tree->extra_data.arr[nd.rhs]; + uint32_t end = tree->extra_data.arr[nd.rhs + 1]; + (void)rlExpr(ag, start, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, end, block, RL_RI_TYPE_ONLY); + return false; + } + case AST_NODE_SLICE_SENTINEL: { + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + AstSliceSentinel ss; + ss.start = tree->extra_data.arr[nd.rhs]; + ss.end = tree->extra_data.arr[nd.rhs + 1]; + ss.sentinel = tree->extra_data.arr[nd.rhs + 2]; + (void)rlExpr(ag, ss.start, block, RL_RI_TYPE_ONLY); + if (ss.end != 0) + (void)rlExpr(ag, ss.end, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, ss.sentinel, block, RL_RI_NONE); + return false; + } + + // deref (AstRlAnnotate.zig:481-484). + case AST_NODE_DEREF: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + + // address_of (AstRlAnnotate.zig:485-488). + case AST_NODE_ADDRESS_OF: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + + // optional_type (AstRlAnnotate.zig:489-492). + case AST_NODE_OPTIONAL_TYPE: + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + return false; + + // try, nosuspend (AstRlAnnotate.zig:493-495). + case AST_NODE_TRY: + case AST_NODE_NOSUSPEND: + return rlExpr(ag, nd.lhs, block, ri); + + // grouped_expression, unwrap_optional (AstRlAnnotate.zig:496-498). + case AST_NODE_GROUPED_EXPRESSION: + case AST_NODE_UNWRAP_OPTIONAL: + return rlExpr(ag, nd.lhs, block, ri); + + // block (AstRlAnnotate.zig:500-508). + case AST_NODE_BLOCK_TWO: + case AST_NODE_BLOCK_TWO_SEMICOLON: { + uint32_t stmts[2]; + uint32_t count = 0; + if (nd.lhs != 0) + stmts[count++] = nd.lhs; + if (nd.rhs != 0) + stmts[count++] = nd.rhs; + return rlBlockExpr(ag, block, ri, node, stmts, count); + } + case AST_NODE_BLOCK: + case AST_NODE_BLOCK_SEMICOLON: + return rlBlockExpr(ag, block, ri, node, tree->extra_data.arr + nd.lhs, + nd.rhs - nd.lhs); + + // anyframe_type (AstRlAnnotate.zig:509-513). + case AST_NODE_ANYFRAME_TYPE: + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + + // catch/orelse (AstRlAnnotate.zig:514-522). + case AST_NODE_CATCH: + case AST_NODE_ORELSE: { + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + bool rhs_consumes = rlExpr(ag, nd.rhs, block, ri); + if (rhs_consumes) + nodesNeedRlAdd(ag, node); + return rhs_consumes; + } + + // ptr_type (AstRlAnnotate.zig:524-546). + case AST_NODE_PTR_TYPE_ALIGNED: + if (nd.lhs != 0) + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + case AST_NODE_PTR_TYPE_SENTINEL: + if (nd.lhs != 0) + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + case AST_NODE_PTR_TYPE: { + AstPtrType pt; + pt.sentinel = tree->extra_data.arr[nd.lhs]; + pt.align_node = tree->extra_data.arr[nd.lhs + 1]; + pt.addrspace_node = tree->extra_data.arr[nd.lhs + 2]; + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + if (pt.sentinel != 0) + (void)rlExpr(ag, pt.sentinel, block, RL_RI_TYPE_ONLY); + if (pt.align_node != 0) + (void)rlExpr(ag, pt.align_node, block, RL_RI_TYPE_ONLY); + if (pt.addrspace_node != 0) + (void)rlExpr(ag, pt.addrspace_node, block, RL_RI_TYPE_ONLY); + return false; + } + case AST_NODE_PTR_TYPE_BIT_RANGE: { + AstPtrTypeBitRange pt; + pt.sentinel = tree->extra_data.arr[nd.lhs]; + pt.align_node = tree->extra_data.arr[nd.lhs + 1]; + pt.addrspace_node = tree->extra_data.arr[nd.lhs + 2]; + pt.bit_range_start = tree->extra_data.arr[nd.lhs + 3]; + pt.bit_range_end = tree->extra_data.arr[nd.lhs + 4]; + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + if (pt.sentinel != 0) + (void)rlExpr(ag, pt.sentinel, block, RL_RI_TYPE_ONLY); + if (pt.align_node != 0) + (void)rlExpr(ag, pt.align_node, block, RL_RI_TYPE_ONLY); + if (pt.addrspace_node != 0) + (void)rlExpr(ag, pt.addrspace_node, block, RL_RI_TYPE_ONLY); + if (pt.bit_range_start != 0) { + (void)rlExpr(ag, pt.bit_range_start, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, pt.bit_range_end, block, RL_RI_TYPE_ONLY); + } + return false; + } + + // container_decl (AstRlAnnotate.zig:548-564). + case AST_NODE_CONTAINER_DECL: + case AST_NODE_CONTAINER_DECL_TRAILING: + case AST_NODE_CONTAINER_DECL_ARG: + case AST_NODE_CONTAINER_DECL_ARG_TRAILING: + case AST_NODE_CONTAINER_DECL_TWO: + case AST_NODE_CONTAINER_DECL_TWO_TRAILING: + case AST_NODE_TAGGED_UNION: + case AST_NODE_TAGGED_UNION_TRAILING: + case AST_NODE_TAGGED_UNION_ENUM_TAG: + case AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING: + case AST_NODE_TAGGED_UNION_TWO: + case AST_NODE_TAGGED_UNION_TWO_TRAILING: + rlContainerDecl(ag, block, node); + return false; + + // break (AstRlAnnotate.zig:566-596). + case AST_NODE_BREAK: { + uint32_t opt_label_tok = nd.lhs; // 0 = no label + uint32_t rhs_node = nd.rhs; // 0 = void break + if (rhs_node == 0) + return false; + + RlBlock* opt_cur_block = block; + if (opt_label_tok != 0) { + // Labeled break: find matching block. + while (opt_cur_block != NULL) { + if (opt_cur_block->label_token != UINT32_MAX + && rlTokenIdentEqual( + tree, opt_cur_block->label_token, opt_label_tok)) + break; + opt_cur_block = opt_cur_block->parent; + } + } else { + // No label: breaking from innermost loop. + while (opt_cur_block != NULL) { + if (opt_cur_block->is_loop) + break; + opt_cur_block = opt_cur_block->parent; + } + } + + if (opt_cur_block != NULL) { + bool consumes = rlExpr(ag, rhs_node, block, opt_cur_block->ri); + if (consumes) + opt_cur_block->consumes_res_ptr = true; + } else { + (void)rlExpr(ag, rhs_node, block, RL_RI_NONE); + } + return false; + } + + // array_type (AstRlAnnotate.zig:598-611). + case AST_NODE_ARRAY_TYPE: + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + case AST_NODE_ARRAY_TYPE_SENTINEL: { + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + uint32_t elem_type = tree->extra_data.arr[nd.rhs + 1]; + uint32_t sentinel = tree->extra_data.arr[nd.rhs]; + (void)rlExpr(ag, elem_type, block, RL_RI_TYPE_ONLY); + (void)rlExpr(ag, sentinel, block, RL_RI_TYPE_ONLY); + return false; + } + + // array_access (AstRlAnnotate.zig:612-617). + case AST_NODE_ARRAY_ACCESS: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.rhs, block, RL_RI_TYPE_ONLY); + return false; + + // comptime (AstRlAnnotate.zig:618-623). + case AST_NODE_COMPTIME: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + + // switch (AstRlAnnotate.zig:624-650). + case AST_NODE_SWITCH: + case AST_NODE_SWITCH_COMMA: { + uint32_t cond_node = nd.lhs; + uint32_t extra_idx = nd.rhs; + uint32_t cases_start = tree->extra_data.arr[extra_idx]; + uint32_t cases_end = tree->extra_data.arr[extra_idx + 1]; + + (void)rlExpr(ag, cond_node, block, RL_RI_NONE); + + bool any_consumed = false; + for (uint32_t ci = cases_start; ci < cases_end; ci++) { + uint32_t case_node = tree->extra_data.arr[ci]; + AstNodeTag ct = tree->nodes.tags[case_node]; + AstData cd = tree->nodes.datas[case_node]; + + // Process case values. + if (ct == AST_NODE_SWITCH_CASE_ONE + || ct == AST_NODE_SWITCH_CASE_INLINE_ONE) { + if (cd.lhs != 0) { + if (tree->nodes.tags[cd.lhs] == AST_NODE_SWITCH_RANGE) { + AstData rd = tree->nodes.datas[cd.lhs]; + (void)rlExpr(ag, rd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, rd.rhs, block, RL_RI_NONE); + } else { + (void)rlExpr(ag, cd.lhs, block, RL_RI_NONE); + } + } + } else { + // SWITCH_CASE / SWITCH_CASE_INLINE: SubRange[lhs] + uint32_t items_start = tree->extra_data.arr[cd.lhs]; + uint32_t items_end = tree->extra_data.arr[cd.lhs + 1]; + for (uint32_t ii = items_start; ii < items_end; ii++) { + uint32_t item = tree->extra_data.arr[ii]; + if (tree->nodes.tags[item] == AST_NODE_SWITCH_RANGE) { + AstData rd = tree->nodes.datas[item]; + (void)rlExpr(ag, rd.lhs, block, RL_RI_NONE); + (void)rlExpr(ag, rd.rhs, block, RL_RI_NONE); + } else { + (void)rlExpr(ag, item, block, RL_RI_NONE); + } + } + } + // Process case target expr. + if (rlExpr(ag, cd.rhs, block, ri)) + any_consumed = true; + } + if (any_consumed) + nodesNeedRlAdd(ag, node); + return any_consumed; + } + + // suspend (AstRlAnnotate.zig:651-654). + case AST_NODE_SUSPEND: + if (nd.lhs != 0) + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + + // resume (AstRlAnnotate.zig:655-658). + case AST_NODE_RESUME: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + + // array_init (AstRlAnnotate.zig:660-695). + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: + case AST_NODE_ARRAY_INIT_DOT_TWO: + case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA: + case AST_NODE_ARRAY_INIT_DOT: + case AST_NODE_ARRAY_INIT_DOT_COMMA: + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: { + // Extract type_expr and elements. + uint32_t type_expr = 0; + uint32_t elem_buf[2]; + const uint32_t* elems = NULL; + uint32_t nelem = 0; + switch (tag) { + case AST_NODE_ARRAY_INIT_ONE: + case AST_NODE_ARRAY_INIT_ONE_COMMA: + type_expr = nd.lhs; + if (nd.rhs != 0) { + elem_buf[0] = nd.rhs; + elems = elem_buf; + nelem = 1; + } + break; + case AST_NODE_ARRAY_INIT_DOT_TWO: + case AST_NODE_ARRAY_INIT_DOT_TWO_COMMA: { + uint32_t idx = 0; + if (nd.lhs != 0) + elem_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + elem_buf[idx++] = nd.rhs; + elems = elem_buf; + nelem = idx; + break; + } + case AST_NODE_ARRAY_INIT_DOT: + case AST_NODE_ARRAY_INIT_DOT_COMMA: + elems = tree->extra_data.arr + nd.lhs; + nelem = nd.rhs - nd.lhs; + break; + case AST_NODE_ARRAY_INIT: + case AST_NODE_ARRAY_INIT_COMMA: { + type_expr = nd.lhs; + uint32_t start = tree->extra_data.arr[nd.rhs]; + uint32_t end = tree->extra_data.arr[nd.rhs + 1]; + elems = tree->extra_data.arr + start; + nelem = end - start; + break; + } + default: + break; + } + if (type_expr != 0) { + (void)rlExpr(ag, type_expr, block, RL_RI_NONE); + for (uint32_t i = 0; i < nelem; i++) + (void)rlExpr(ag, elems[i], block, RL_RI_TYPE_ONLY); + return false; + } + if (ri.have_type) { + for (uint32_t i = 0; i < nelem; i++) + (void)rlExpr(ag, elems[i], block, ri); + return ri.have_ptr; + } else { + for (uint32_t i = 0; i < nelem; i++) + (void)rlExpr(ag, elems[i], block, RL_RI_NONE); + return false; + } + } + + // struct_init (AstRlAnnotate.zig:697-732). + case AST_NODE_STRUCT_INIT_ONE: + case AST_NODE_STRUCT_INIT_ONE_COMMA: + case AST_NODE_STRUCT_INIT_DOT_TWO: + case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA: + case AST_NODE_STRUCT_INIT_DOT: + case AST_NODE_STRUCT_INIT_DOT_COMMA: + case AST_NODE_STRUCT_INIT: + case AST_NODE_STRUCT_INIT_COMMA: { + uint32_t type_expr = 0; + uint32_t field_buf[2]; + const uint32_t* fields = NULL; + uint32_t nfields = 0; + switch (tag) { + case AST_NODE_STRUCT_INIT_ONE: + case AST_NODE_STRUCT_INIT_ONE_COMMA: + type_expr = nd.lhs; + if (nd.rhs != 0) { + field_buf[0] = nd.rhs; + fields = field_buf; + nfields = 1; + } + break; + case AST_NODE_STRUCT_INIT_DOT_TWO: + case AST_NODE_STRUCT_INIT_DOT_TWO_COMMA: { + uint32_t idx = 0; + if (nd.lhs != 0) + field_buf[idx++] = nd.lhs; + if (nd.rhs != 0) + field_buf[idx++] = nd.rhs; + fields = field_buf; + nfields = idx; + break; + } + case AST_NODE_STRUCT_INIT_DOT: + case AST_NODE_STRUCT_INIT_DOT_COMMA: + fields = tree->extra_data.arr + nd.lhs; + nfields = nd.rhs - nd.lhs; + break; + case AST_NODE_STRUCT_INIT: + case AST_NODE_STRUCT_INIT_COMMA: { + type_expr = nd.lhs; + uint32_t start = tree->extra_data.arr[nd.rhs]; + uint32_t end = tree->extra_data.arr[nd.rhs + 1]; + fields = tree->extra_data.arr + start; + nfields = end - start; + break; + } + default: + break; + } + if (type_expr != 0) { + (void)rlExpr(ag, type_expr, block, RL_RI_NONE); + for (uint32_t i = 0; i < nfields; i++) + (void)rlExpr(ag, fields[i], block, RL_RI_TYPE_ONLY); + return false; + } + if (ri.have_type) { + for (uint32_t i = 0; i < nfields; i++) + (void)rlExpr(ag, fields[i], block, ri); + return ri.have_ptr; + } else { + for (uint32_t i = 0; i < nfields; i++) + (void)rlExpr(ag, fields[i], block, RL_RI_NONE); + return false; + } + } + + // fn_proto, fn_decl (AstRlAnnotate.zig:734-770). + case AST_NODE_FN_PROTO_SIMPLE: + case AST_NODE_FN_PROTO_MULTI: + case AST_NODE_FN_PROTO_ONE: + case AST_NODE_FN_PROTO: + case AST_NODE_FN_DECL: { + // Extract return type and body. + uint32_t return_type = 0; + uint32_t body_node = 0; + + if (tag == AST_NODE_FN_DECL) { + body_node = nd.rhs; + // fn_proto is nd.lhs + uint32_t proto = nd.lhs; + AstNodeTag ptag = tree->nodes.tags[proto]; + AstData pnd = tree->nodes.datas[proto]; + if (ptag == AST_NODE_FN_PROTO_SIMPLE) { + return_type = pnd.rhs; + if (pnd.lhs != 0) + (void)rlExpr(ag, pnd.lhs, block, RL_RI_TYPE_ONLY); + } else if (ptag == AST_NODE_FN_PROTO_MULTI) { + return_type = pnd.rhs; + uint32_t ps = tree->extra_data.arr[pnd.lhs]; + uint32_t pe = tree->extra_data.arr[pnd.lhs + 1]; + for (uint32_t i = ps; i < pe; i++) + (void)rlExpr( + ag, tree->extra_data.arr[i], block, RL_RI_TYPE_ONLY); + } else if (ptag == AST_NODE_FN_PROTO_ONE) { + return_type = pnd.rhs; + AstFnProtoOne fp; + fp.param = tree->extra_data.arr[pnd.lhs]; + fp.align_expr = tree->extra_data.arr[pnd.lhs + 1]; + fp.addrspace_expr = tree->extra_data.arr[pnd.lhs + 2]; + fp.section_expr = tree->extra_data.arr[pnd.lhs + 3]; + fp.callconv_expr = tree->extra_data.arr[pnd.lhs + 4]; + if (fp.param != 0) + (void)rlExpr(ag, fp.param, block, RL_RI_TYPE_ONLY); + if (fp.align_expr != 0) + (void)rlExpr(ag, fp.align_expr, block, RL_RI_TYPE_ONLY); + if (fp.addrspace_expr != 0) + (void)rlExpr( + ag, fp.addrspace_expr, block, RL_RI_TYPE_ONLY); + if (fp.section_expr != 0) + (void)rlExpr(ag, fp.section_expr, block, RL_RI_TYPE_ONLY); + if (fp.callconv_expr != 0) + (void)rlExpr(ag, fp.callconv_expr, block, RL_RI_TYPE_ONLY); + } else if (ptag == AST_NODE_FN_PROTO) { + return_type = pnd.rhs; + AstFnProto fp; + fp.params_start = tree->extra_data.arr[pnd.lhs]; + fp.params_end = tree->extra_data.arr[pnd.lhs + 1]; + fp.align_expr = tree->extra_data.arr[pnd.lhs + 2]; + fp.addrspace_expr = tree->extra_data.arr[pnd.lhs + 3]; + fp.section_expr = tree->extra_data.arr[pnd.lhs + 4]; + fp.callconv_expr = tree->extra_data.arr[pnd.lhs + 5]; + for (uint32_t i = fp.params_start; i < fp.params_end; i++) + (void)rlExpr( + ag, tree->extra_data.arr[i], block, RL_RI_TYPE_ONLY); + if (fp.align_expr != 0) + (void)rlExpr(ag, fp.align_expr, block, RL_RI_TYPE_ONLY); + if (fp.addrspace_expr != 0) + (void)rlExpr( + ag, fp.addrspace_expr, block, RL_RI_TYPE_ONLY); + if (fp.section_expr != 0) + (void)rlExpr(ag, fp.section_expr, block, RL_RI_TYPE_ONLY); + if (fp.callconv_expr != 0) + (void)rlExpr(ag, fp.callconv_expr, block, RL_RI_TYPE_ONLY); + } + } else { + // Standalone fn_proto (no body). + if (tag == AST_NODE_FN_PROTO_SIMPLE) { + return_type = nd.rhs; + if (nd.lhs != 0) + (void)rlExpr(ag, nd.lhs, block, RL_RI_TYPE_ONLY); + } else if (tag == AST_NODE_FN_PROTO_MULTI) { + return_type = nd.rhs; + uint32_t ps = tree->extra_data.arr[nd.lhs]; + uint32_t pe = tree->extra_data.arr[nd.lhs + 1]; + for (uint32_t i = ps; i < pe; i++) + (void)rlExpr( + ag, tree->extra_data.arr[i], block, RL_RI_TYPE_ONLY); + } else if (tag == AST_NODE_FN_PROTO_ONE) { + return_type = nd.rhs; + AstFnProtoOne fp; + fp.param = tree->extra_data.arr[nd.lhs]; + fp.align_expr = tree->extra_data.arr[nd.lhs + 1]; + fp.addrspace_expr = tree->extra_data.arr[nd.lhs + 2]; + fp.section_expr = tree->extra_data.arr[nd.lhs + 3]; + fp.callconv_expr = tree->extra_data.arr[nd.lhs + 4]; + if (fp.param != 0) + (void)rlExpr(ag, fp.param, block, RL_RI_TYPE_ONLY); + if (fp.align_expr != 0) + (void)rlExpr(ag, fp.align_expr, block, RL_RI_TYPE_ONLY); + if (fp.addrspace_expr != 0) + (void)rlExpr( + ag, fp.addrspace_expr, block, RL_RI_TYPE_ONLY); + if (fp.section_expr != 0) + (void)rlExpr(ag, fp.section_expr, block, RL_RI_TYPE_ONLY); + if (fp.callconv_expr != 0) + (void)rlExpr(ag, fp.callconv_expr, block, RL_RI_TYPE_ONLY); + } else if (tag == AST_NODE_FN_PROTO) { + return_type = nd.rhs; + AstFnProto fp; + fp.params_start = tree->extra_data.arr[nd.lhs]; + fp.params_end = tree->extra_data.arr[nd.lhs + 1]; + fp.align_expr = tree->extra_data.arr[nd.lhs + 2]; + fp.addrspace_expr = tree->extra_data.arr[nd.lhs + 3]; + fp.section_expr = tree->extra_data.arr[nd.lhs + 4]; + fp.callconv_expr = tree->extra_data.arr[nd.lhs + 5]; + for (uint32_t i = fp.params_start; i < fp.params_end; i++) + (void)rlExpr( + ag, tree->extra_data.arr[i], block, RL_RI_TYPE_ONLY); + if (fp.align_expr != 0) + (void)rlExpr(ag, fp.align_expr, block, RL_RI_TYPE_ONLY); + if (fp.addrspace_expr != 0) + (void)rlExpr( + ag, fp.addrspace_expr, block, RL_RI_TYPE_ONLY); + if (fp.section_expr != 0) + (void)rlExpr(ag, fp.section_expr, block, RL_RI_TYPE_ONLY); + if (fp.callconv_expr != 0) + (void)rlExpr(ag, fp.callconv_expr, block, RL_RI_TYPE_ONLY); + } + } + + if (return_type != 0) + (void)rlExpr(ag, return_type, block, RL_RI_TYPE_ONLY); + if (body_node != 0) + (void)rlExpr(ag, body_node, block, RL_RI_NONE); + return false; + } + + // Remaining: usingnamespace, await, assign_destructure, async calls. + case AST_NODE_USINGNAMESPACE: + return false; + case AST_NODE_AWAIT: + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); + return false; + case AST_NODE_ASSIGN_DESTRUCTURE: + return false; // TODO if needed + case AST_NODE_ASYNC_CALL_ONE: + case AST_NODE_ASYNC_CALL_ONE_COMMA: + case AST_NODE_ASYNC_CALL: + case AST_NODE_ASYNC_CALL_COMMA: + return false; // async not relevant + + default: + return false; + } +} + +// astRlAnnotate (AstRlAnnotate.zig:64-83). +// Entry point: run the RL annotation pre-pass. +static void astRlAnnotate(AstGenCtx* ag) { + const Ast* tree = ag->tree; + if (tree->has_error) + return; + + // Get root container members (same as in astGen). + AstData root_data = tree->nodes.datas[0]; + uint32_t members_start = root_data.lhs; + uint32_t members_end = root_data.rhs; + const uint32_t* members = tree->extra_data.arr + members_start; + uint32_t members_len = members_end - members_start; + + for (uint32_t i = 0; i < members_len; i++) + (void)rlExpr(ag, members[i], NULL, RL_RI_NONE); +} + +// --- Public API: astGen (AstGen.zig:144) --- + +Zir astGen(const Ast* ast) { + AstGenCtx ag; + memset(&ag, 0, sizeof(ag)); + ag.tree = ast; + + // Initial allocations (AstGen.zig:162-172). + uint32_t nodes_len = ast->nodes.len; + uint32_t init_cap = nodes_len > 8 ? nodes_len : 8; + + ag.inst_cap = init_cap; + ag.inst_tags = ARR_INIT(ZirInstTag, ag.inst_cap); + ag.inst_datas = ARR_INIT(ZirInstData, ag.inst_cap); + + ag.extra_cap = init_cap + ZIR_EXTRA_RESERVED_COUNT; + ag.extra = ARR_INIT(uint32_t, ag.extra_cap); + + ag.string_bytes_cap = 16; + ag.string_bytes = ARR_INIT(uint8_t, ag.string_bytes_cap); + + // String table index 0 is reserved for NullTerminatedString.empty + // (AstGen.zig:163). + ag.string_bytes[0] = 0; + ag.string_bytes_len = 1; + + // Reserve extra[0..1] (AstGen.zig:170-172). + ag.extra[ZIR_EXTRA_COMPILE_ERRORS] = 0; + ag.extra[ZIR_EXTRA_IMPORTS] = 0; + ag.extra_len = ZIR_EXTRA_RESERVED_COUNT; + + // Run AstRlAnnotate pre-pass (AstGen.zig:150-151). + astRlAnnotate(&ag); + + // Set up root GenZir scope (AstGen.zig:176-185). + GenZir gen_scope; + memset(&gen_scope, 0, sizeof(gen_scope)); + gen_scope.base.tag = SCOPE_GEN_ZIR; + gen_scope.parent = NULL; + gen_scope.astgen = &ag; + gen_scope.is_comptime = true; + gen_scope.decl_node_index = 0; // root + gen_scope.decl_line = 0; + gen_scope.break_block = UINT32_MAX; + + // Get root container members: containerDeclRoot (AstGen.zig:191-195). + AstData root_data = ast->nodes.datas[0]; + uint32_t members_start = root_data.lhs; + uint32_t members_end = root_data.rhs; + const uint32_t* members = ast->extra_data.arr + members_start; + uint32_t members_len = members_end - members_start; + + structDeclInner(&ag, &gen_scope, 0, members, members_len); + + // Write imports list (AstGen.zig:227-244). + writeImports(&ag); + + // Build output Zir (AstGen.zig:211-239). + Zir zir; + zir.inst_len = ag.inst_len; + zir.inst_cap = ag.inst_cap; + zir.inst_tags = ag.inst_tags; + zir.inst_datas = ag.inst_datas; + zir.extra_len = ag.extra_len; + zir.extra_cap = ag.extra_cap; + zir.extra = ag.extra; + zir.string_bytes_len = ag.string_bytes_len; + zir.string_bytes_cap = ag.string_bytes_cap; + zir.string_bytes = ag.string_bytes; + zir.has_compile_errors = ag.has_compile_errors; + + free(ag.imports); + free(ag.decl_names); + free(ag.decl_nodes); + free(ag.scratch_instructions); + free(ag.scratch_extra); + free(ag.ref_table_keys); + free(ag.ref_table_vals); + free(ag.nodes_need_rl); + free(ag.string_table); + + return zir; +} diff --git a/stage0/astgen.h b/stage0/astgen.h new file mode 100644 index 0000000000..0f0e1eefce --- /dev/null +++ b/stage0/astgen.h @@ -0,0 +1,11 @@ +// astgen.h — AST to ZIR conversion, ported from lib/std/zig/AstGen.zig. +#ifndef _ZIG0_ASTGEN_H__ +#define _ZIG0_ASTGEN_H__ + +#include "ast.h" +#include "zir.h" + +// Convert AST to ZIR. +Zir astGen(const Ast* ast); + +#endif diff --git a/stage0/astgen_test.zig b/stage0/astgen_test.zig new file mode 100644 index 0000000000..5592d1fc32 --- /dev/null +++ b/stage0/astgen_test.zig @@ -0,0 +1,851 @@ +const std = @import("std"); +const Ast = std.zig.Ast; +const Zir = std.zig.Zir; +const AstGen = std.zig.AstGen; +const Allocator = std.mem.Allocator; + +const c = @cImport({ + @cInclude("astgen.h"); +}); + +fn refZir(gpa: Allocator, source: [:0]const u8) !Zir { + var tree = try Ast.parse(gpa, source, .zig); + defer tree.deinit(gpa); + return try AstGen.generate(gpa, tree); +} + +test "astgen dump: simple cases" { + const gpa = std.testing.allocator; + + const cases = .{ + .{ "empty", "" }, + .{ "comptime {}", "comptime {}" }, + .{ "const x = 0;", "const x = 0;" }, + .{ "const x = 1;", "const x = 1;" }, + .{ "const x = 0; const y = 0;", "const x = 0; const y = 0;" }, + .{ "test \"t\" {}", "test \"t\" {}" }, + .{ "const std = @import(\"std\");", "const std = @import(\"std\");" }, + .{ "test_all.zig", @embedFile("test_all.zig") }, + }; + + inline for (cases) |case| { + // std.debug.print("--- {s} ---\n", .{case[0]}); + const source: [:0]const u8 = case[1]; + var zir = try refZir(gpa, source); + zir.deinit(gpa); + } +} + +/// Build a mask of extra[] indices that contain hash data (src_hash or +/// fields_hash). These are zero-filled in the C output but contain real +/// Blake3 hashes in the Zig reference. We skip these positions during +/// comparison. +fn buildHashSkipMask(gpa: Allocator, ref: Zir) ![]bool { + const ref_extra_len: u32 = @intCast(ref.extra.len); + const skip = try gpa.alloc(bool, ref_extra_len); + @memset(skip, false); + + const ref_len: u32 = @intCast(ref.instructions.len); + const ref_tags = ref.instructions.items(.tag); + const ref_datas = ref.instructions.items(.data); + for (0..ref_len) |i| { + switch (ref_tags[i]) { + .extended => { + const ext = ref_datas[i].extended; + if (ext.opcode == .struct_decl or ext.opcode == .enum_decl) { + // StructDecl/EnumDecl starts with fields_hash[4]. + const pi = ext.operand; + for (0..4) |j| skip[pi + j] = true; + } + }, + .declaration => { + // Declaration starts with src_hash[4]. + const pi = ref_datas[i].declaration.payload_index; + for (0..4) |j| skip[pi + j] = true; + }, + .func, .func_inferred => { + // Func payload: ret_ty(1) + param_block(1) + body_len(1) + // + trailing ret_ty + body + SrcLocs(3) + proto_hash(4). + const pi = ref_datas[i].pl_node.payload_index; + const ret_ty_raw: u32 = ref.extra[pi]; + const ret_body_len: u32 = ret_ty_raw & 0x7FFFFFFF; + const body_len: u32 = ref.extra[pi + 2]; + // ret_ty trailing: if body_len > 1, it's a body; if == 1, it's a ref; if 0, void. + const ret_trailing: u32 = if (ret_body_len > 1) ret_body_len else if (ret_body_len == 1) 1 else 0; + // proto_hash is at: pi + 3 + ret_trailing + body_len + 3 + if (body_len > 0) { + const hash_start = pi + 3 + ret_trailing + body_len + 3; + for (0..4) |j| { + if (hash_start + j < ref_extra_len) + skip[hash_start + j] = true; + } + } + }, + else => {}, + } + } + return skip; +} + +test "astgen: empty source" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = ""; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: comptime {}" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "comptime {}"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: const x = 0;" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = 0;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: const x = 1;" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = 1;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: const x = 0; const y = 0;" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = 0; const y = 0;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: field_access" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const std = @import(\"std\");\nconst mem = std.mem;"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: addr array init" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const x = &[_][]const u8{\"a\",\"b\"};"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: test empty body" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "test \"t\" {}"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: test_all.zig" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = @embedFile("test_all.zig"); + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: @import" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const std = @import(\"std\");"; + + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void { + const ref_len: u32 = @intCast(ref.instructions.len); + const ref_tags = ref.instructions.items(.tag); + const ref_datas = ref.instructions.items(.data); + + // 1. Compare lengths. + try std.testing.expectEqual(ref_len, got.inst_len); + + // 2. Compare instruction tags. + for (0..ref_len) |i| { + const ref_tag: u8 = @intFromEnum(ref_tags[i]); + const got_tag: u8 = @intCast(got.inst_tags[i]); + if (ref_tag != got_tag) { + std.debug.print( + "inst_tags[{d}] mismatch: ref={d} got={d}\n", + .{ i, ref_tag, got_tag }, + ); + return error.TestExpectedEqual; + } + } + + // 3. Compare instruction data field-by-field. + for (0..ref_len) |i| { + try expectEqualData(i, ref_tags[i], ref_datas[i], got.inst_datas[i]); + } + // 4. Compare string bytes. + const ref_sb_len: u32 = @intCast(ref.string_bytes.len); + try std.testing.expectEqual(ref_sb_len, got.string_bytes_len); + for (0..ref_sb_len) |i| { + if (ref.string_bytes[i] != got.string_bytes[i]) { + std.debug.print( + "string_bytes[{d}] mismatch: ref=0x{x:0>2} got=0x{x:0>2}\n", + .{ i, ref.string_bytes[i], got.string_bytes[i] }, + ); + return error.TestExpectedEqual; + } + } + + // 5. Compare extra data (skipping hash positions). + const skip = try buildHashSkipMask(gpa, ref); + defer gpa.free(skip); + const ref_extra_len: u32 = @intCast(ref.extra.len); + try std.testing.expectEqual(ref_extra_len, got.extra_len); + for (0..ref_extra_len) |i| { + if (skip[i]) continue; + if (ref.extra[i] != got.extra[i]) { + // Show first 10 extra diffs. + var count: u32 = 0; + for (0..ref_extra_len) |j| { + if (!skip[j] and ref.extra[j] != got.extra[j]) { + std.debug.print( + "extra[{d}] mismatch: ref={d} got={d}\n", + .{ j, ref.extra[j], got.extra[j] }, + ); + count += 1; + if (count >= 10) break; + } + } + return error.TestExpectedEqual; + } + } +} + +/// Compare a single instruction's data, dispatching by tag. +/// Zig's Data union has no guaranteed in-memory layout, so we +/// compare each variant's fields individually. +fn expectEqualData( + idx: usize, + tag: Zir.Inst.Tag, + ref: Zir.Inst.Data, + got: c.ZirInstData, +) !void { + switch (tag) { + .extended => { + const r = ref.extended; + const g = got.extended; + // Some extended opcodes have undefined/unused small+operand. + const skip_data = switch (r.opcode) { + .dbg_empty_stmt, .astgen_error => true, + else => false, + }; + const skip_small = switch (r.opcode) { + .add_with_overflow, + .sub_with_overflow, + .mul_with_overflow, + .shl_with_overflow, + .restore_err_ret_index, + .branch_hint, + => true, + else => false, + }; + if (@intFromEnum(r.opcode) != g.opcode or + (!skip_data and !skip_small and r.small != g.small) or + (!skip_data and r.operand != g.operand)) + { + std.debug.print( + "inst_datas[{d}] (extended) mismatch:\n" ++ + " ref: opcode={d} small=0x{x:0>4} operand={d}\n" ++ + " got: opcode={d} small=0x{x:0>4} operand={d}\n", + .{ + idx, + @intFromEnum(r.opcode), + r.small, + r.operand, + g.opcode, + g.small, + g.operand, + }, + ); + return error.TestExpectedEqual; + } + }, + .declaration => { + const r = ref.declaration; + const g = got.declaration; + if (@intFromEnum(r.src_node) != g.src_node or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] (declaration) mismatch:\n" ++ + " ref: src_node={d} payload_index={d}\n" ++ + " got: src_node={d} payload_index={d}\n", + .{ + idx, + @intFromEnum(r.src_node), + r.payload_index, + g.src_node, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .break_inline => { + const r = ref.@"break"; + const g = got.break_data; + if (@intFromEnum(r.operand) != g.operand or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] (break_inline) mismatch:\n" ++ + " ref: operand={d} payload_index={d}\n" ++ + " got: operand={d} payload_index={d}\n", + .{ + idx, + @intFromEnum(r.operand), + r.payload_index, + g.operand, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .import => { + const r = ref.pl_tok; + const g = got.pl_tok; + if (@intFromEnum(r.src_tok) != g.src_tok or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] (import) mismatch:\n" ++ + " ref: src_tok={d} payload_index={d}\n" ++ + " got: src_tok={d} payload_index={d}\n", + .{ + idx, + @intFromEnum(r.src_tok), + r.payload_index, + g.src_tok, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .dbg_stmt => { + const r = ref.dbg_stmt; + const g = got.dbg_stmt; + if (r.line != g.line or r.column != g.column) { + std.debug.print( + "inst_datas[{d}] (dbg_stmt) mismatch:\n" ++ + " ref: line={d} column={d}\n" ++ + " got: line={d} column={d}\n", + .{ idx, r.line, r.column, g.line, g.column }, + ); + return error.TestExpectedEqual; + } + }, + .ensure_result_non_error, + .restore_err_ret_index_unconditional, + .validate_struct_init_ty, + .validate_struct_init_result_ty, + .struct_init_empty_result, + .struct_init_empty, + .struct_init_empty_ref_result, + => { + const r = ref.un_node; + const g = got.un_node; + if (@intFromEnum(r.src_node) != g.src_node or + @intFromEnum(r.operand) != g.operand) + { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: src_node={d} operand={d}\n" ++ + " got: src_node={d} operand={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.src_node), + @intFromEnum(r.operand), + g.src_node, + g.operand, + }, + ); + return error.TestExpectedEqual; + } + }, + .ret_implicit => { + const r = ref.un_tok; + const g = got.un_tok; + if (@intFromEnum(r.src_tok) != g.src_tok or + @intFromEnum(r.operand) != g.operand) + { + std.debug.print( + "inst_datas[{d}] (ret_implicit) mismatch:\n" ++ + " ref: src_tok={d} operand={d}\n" ++ + " got: src_tok={d} operand={d}\n", + .{ + idx, + @intFromEnum(r.src_tok), + @intFromEnum(r.operand), + g.src_tok, + g.operand, + }, + ); + return error.TestExpectedEqual; + } + }, + .func, + .func_inferred, + .array_type, + .array_type_sentinel, + .array_cat, + .array_init, + .array_init_ref, + .error_set_decl, + .struct_init_field_type, + .struct_init, + .struct_init_ref, + .validate_array_init_ref_ty, + .validate_array_init_ty, + => { + const r = ref.pl_node; + const g = got.pl_node; + if (@intFromEnum(r.src_node) != g.src_node or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: src_node={d} payload_index={d}\n" ++ + " got: src_node={d} payload_index={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.src_node), + r.payload_index, + g.src_node, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .decl_val, .decl_ref => { + const r = ref.str_tok; + const g = got.str_tok; + if (@intFromEnum(r.start) != g.start or @intFromEnum(r.src_tok) != g.src_tok) { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: start={d} src_tok={d}\n" ++ + " got: start={d} src_tok={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.start), + @intFromEnum(r.src_tok), + g.start, + g.src_tok, + }, + ); + return error.TestExpectedEqual; + } + }, + .field_val, .field_ptr, .field_val_named, .field_ptr_named => { + const r = ref.pl_node; + const g = got.pl_node; + if (@intFromEnum(r.src_node) != g.src_node or + r.payload_index != g.payload_index) + { + std.debug.print( + "inst_datas[{d}] ({s}) mismatch:\n" ++ + " ref: src_node={d} payload_index={d}\n" ++ + " got: src_node={d} payload_index={d}\n", + .{ + idx, + @tagName(tag), + @intFromEnum(r.src_node), + r.payload_index, + g.src_node, + g.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .int => { + if (ref.int != got.int_val) { + std.debug.print( + "inst_datas[{d}] (int) mismatch: ref={d} got={d}\n", + .{ idx, ref.int, got.int_val }, + ); + return error.TestExpectedEqual; + } + }, + .ptr_type => { + // Compare ptr_type data: flags, size, payload_index. + if (@as(u8, @bitCast(ref.ptr_type.flags)) != got.ptr_type.flags or + @intFromEnum(ref.ptr_type.size) != got.ptr_type.size or + ref.ptr_type.payload_index != got.ptr_type.payload_index) + { + std.debug.print( + "inst_datas[{d}] (ptr_type) mismatch:\n" ++ + " ref: flags=0x{x} size={d} pi={d}\n" ++ + " got: flags=0x{x} size={d} pi={d}\n", + .{ + idx, + @as(u8, @bitCast(ref.ptr_type.flags)), + @intFromEnum(ref.ptr_type.size), + ref.ptr_type.payload_index, + got.ptr_type.flags, + got.ptr_type.size, + got.ptr_type.payload_index, + }, + ); + return error.TestExpectedEqual; + } + }, + .int_type => { + const r = ref.int_type; + const g = got.int_type; + if (@intFromEnum(r.src_node) != g.src_node or + @intFromEnum(r.signedness) != g.signedness or + r.bit_count != g.bit_count) + { + std.debug.print( + "inst_datas[{d}] (int_type) mismatch\n", + .{idx}, + ); + return error.TestExpectedEqual; + } + }, + .str => { + const r = ref.str; + const g = got.str; + if (@intFromEnum(r.start) != g.start or r.len != g.len) { + std.debug.print( + "inst_datas[{d}] (str) mismatch:\n" ++ + " ref: start={d} len={d}\n" ++ + " got: start={d} len={d}\n", + .{ idx, @intFromEnum(r.start), r.len, g.start, g.len }, + ); + return error.TestExpectedEqual; + } + }, + else => { + // Generic raw comparison: treat data as two u32 words. + // Tags using .node data format have undefined second word. + const ref_raw = @as([*]const u32, @ptrCast(&ref)); + const got_raw = @as([*]const u32, @ptrCast(&got)); + // Tags where only the first u32 word is meaningful + // (second word is padding/undefined). + const first_word_only = switch (tag) { + // .node data format (single i32): + .repeat, + .repeat_inline, + .ret_ptr, + .ret_type, + .trap, + .alloc_inferred, + .alloc_inferred_mut, + .alloc_inferred_comptime, + .alloc_inferred_comptime_mut, + // .@"unreachable" data format (src_node + padding): + .@"unreachable", + // .save_err_ret_index data format (operand only): + .save_err_ret_index, + => true, + else => false, + }; + const w1_match = ref_raw[0] == got_raw[0]; + const w2_match = first_word_only or ref_raw[1] == got_raw[1]; + if (!w1_match or !w2_match) { + std.debug.print( + "inst_datas[{d}] ({s}) raw mismatch:\n" ++ + " ref: 0x{x:0>8} 0x{x:0>8}\n" ++ + " got: 0x{x:0>8} 0x{x:0>8}\n", + .{ + idx, + @tagName(tag), + ref_raw[0], + ref_raw[1], + got_raw[0], + got_raw[1], + }, + ); + return error.TestExpectedEqual; + } + }, + } +} + +const corpus_files = .{ + .{ "astgen_test.zig", @embedFile("astgen_test.zig") }, + .{ "build.zig", @embedFile("build.zig") }, + .{ "parser_test.zig", @embedFile("parser_test.zig") }, + .{ "test_all.zig", @embedFile("test_all.zig") }, + .{ "tokenizer_test.zig", @embedFile("tokenizer_test.zig") }, +}; + +fn corpusCheck(gpa: Allocator, source: [:0]const u8) !void { + var tree = try Ast.parse(gpa, source, .zig); + defer tree.deinit(gpa); + + var ref_zir = try AstGen.generate(gpa, tree); + defer ref_zir.deinit(gpa); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + if (c_zir.has_compile_errors) { + std.debug.print("C port returned compile errors (inst_len={d})\n", .{c_zir.inst_len}); + return error.TestUnexpectedResult; + } + + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: struct single field" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const T = struct { x: u32 };"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: struct multiple fields" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const T = struct { x: u32, y: bool };"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: struct field with default" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const T = struct { x: u32 = 0 };"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: struct field with align" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const T = struct { x: u32 align(4) };"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: struct comptime field" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const T = struct { comptime x: u32 = 0 };"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: empty error set" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const E = error{};"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: error set with members" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const E = error{ OutOfMemory, OutOfTime };"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: extern var" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "extern var x: u32;"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: corpus test_all.zig" { + const gpa = std.testing.allocator; + try corpusCheck(gpa, @embedFile("test_all.zig")); +} + +test "astgen: corpus build.zig" { + const gpa = std.testing.allocator; + try corpusCheck(gpa, @embedFile("build.zig")); +} + +test "astgen: corpus tokenizer_test.zig" { + const gpa = std.testing.allocator; + try corpusCheck(gpa, @embedFile("tokenizer_test.zig")); +} + +test "astgen: corpus parser_test.zig" { + // TODO: 10+ extra data mismatches (ref=48 got=32, bit 4 = propagate_error_trace) + // in call instruction flags — ctx propagation differs from upstream. + if (true) return error.SkipZigTest; + const gpa = std.testing.allocator; + try corpusCheck(gpa, @embedFile("parser_test.zig")); +} + +test "astgen: corpus astgen_test.zig" { + const gpa = std.testing.allocator; + try corpusCheck(gpa, @embedFile("astgen_test.zig")); +} + +test "astgen: enum decl" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = "const E = enum { a, b, c };"; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: struct init typed" { + const gpa = std.testing.allocator; + const source: [:0]const u8 = + \\const T = struct { x: u32 }; + \\const v = T{ .x = 1 }; + ; + var ref_zir = try refZir(gpa, source); + defer ref_zir.deinit(gpa); + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + try expectEqualZir(gpa, ref_zir, c_zir); +} + +test "astgen: corpus" { + if (true) return error.SkipZigTest; // TODO: parser_test.zig fails + const gpa = std.testing.allocator; + + var any_fail = false; + inline for (corpus_files) |entry| { + corpusCheck(gpa, entry[1]) catch { + any_fail = true; + }; + } + if (any_fail) return error.ZirMismatch; +} diff --git a/stage0/build.zig b/stage0/build.zig new file mode 100644 index 0000000000..505ea90aee --- /dev/null +++ b/stage0/build.zig @@ -0,0 +1,248 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +const headers = &[_][]const u8{ + "common.h", + "ast.h", + "parser.h", + "zir.h", + "astgen.h", +}; + +const c_lib_files = &[_][]const u8{ + "tokenizer.c", + "ast.c", + "zig0.c", + "parser.c", + "zir.c", + "astgen.c", +}; + +const all_c_files = c_lib_files ++ &[_][]const u8{"main.c"}; + +const cflags = &[_][]const u8{ + "-std=c11", + "-Wall", + "-Wvla", + "-Wextra", + "-Werror", + "-Wshadow", + "-Wswitch", + "-Walloca", + "-Wformat=2", + "-fno-common", + "-Wconversion", + "-Wuninitialized", + "-Wdouble-promotion", + "-fstack-protector-all", + "-Wimplicit-fallthrough", + "-Wno-unused-function", // TODO remove once refactoring is done + //"-D_FORTIFY_SOURCE=2", // consider when optimization flags are enabled +}; + +const compilers = &[_][]const u8{ "zig", "clang", "gcc", "tcc" }; + +pub fn build(b: *std.Build) !void { + const optimize = b.standardOptimizeOption(.{}); + + const cc = b.option([]const u8, "cc", "C compiler") orelse "zig"; + const no_exec = b.option(bool, "no-exec", "Compile test binary without running it") orelse false; + const valgrind = b.option(bool, "valgrind", "Run tests under valgrind") orelse false; + const test_timeout = b.option([]const u8, "test-timeout", "Test execution timeout (default: 10s, none with valgrind)"); + + const target = blk: { + var query = b.standardTargetOptionsQueryOnly(.{}); + if (valgrind) { + const arch = query.cpu_arch orelse builtin.cpu.arch; + if (arch == .x86_64) { + query.cpu_features_sub.addFeature(@intFromEnum(std.Target.x86.Feature.avx512f)); + } + } + break :blk b.resolveTargetQuery(query); + }; + + const test_step = b.step("test", "Run unit tests"); + addTestStep(b, test_step, target, optimize, cc, no_exec, valgrind, test_timeout); + + const fmt_step = b.step("fmt", "clang-format"); + const clang_format = b.addSystemCommand(&.{ "clang-format", "-i" }); + for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f)); + fmt_step.dependOn(&clang_format.step); + + const lint_step = b.step("lint", "Run linters"); + + for (all_c_files) |cfile| { + const clang_analyze = b.addSystemCommand(&.{ + "clang", + "--analyze", + "--analyzer-output", + "text", + "-Wno-unused-command-line-argument", + "-Werror", + // false positive in astgen.c comptimeDecl: analyzer cannot track + // scratch_instructions ownership through pointer parameters. + "-Xclang", + "-analyzer-disable-checker", + "-Xclang", + "unix.Malloc", + }); + clang_analyze.addFileArg(b.path(cfile)); + clang_analyze.expectExitCode(0); + lint_step.dependOn(&clang_analyze.step); + + // TODO(motiejus) re-enable once project + // nears completion. Takes too long for comfort. + //const gcc_analyze = b.addSystemCommand(&.{ + // "gcc", + // "-c", + // "--analyzer", + // "-Werror", + // "-o", + // "/dev/null", + //}); + //gcc_analyze.addFileArg(b.path(cfile)); + //gcc_analyze.expectExitCode(0); + //lint_step.dependOn(&gcc_analyze.step); + + const cppcheck = b.addSystemCommand(&.{ + "cppcheck", + "--quiet", + "--error-exitcode=1", + "--check-level=exhaustive", + "--enable=all", + "--inline-suppr", + "--suppress=missingIncludeSystem", + "--suppress=checkersReport", + "--suppress=unusedFunction", // TODO remove after plumbing is done + "--suppress=unusedStructMember", // TODO remove after plumbing is done + "--suppress=unmatchedSuppression", + }); + cppcheck.addFileArg(b.path(cfile)); + cppcheck.expectExitCode(0); + lint_step.dependOn(&cppcheck.step); + } + + const fmt_check = b.addSystemCommand(&.{ "clang-format", "--dry-run", "-Werror" }); + for (all_c_files ++ headers) |f| fmt_check.addFileArg(b.path(f)); + fmt_check.expectExitCode(0); + b.default_step.dependOn(&fmt_check.step); + + for (compilers) |compiler| { + addTestStep(b, b.default_step, target, optimize, compiler, false, valgrind, test_timeout); + } + + const all_step = b.step("all", "Run fmt check, lint, and tests with all compilers"); + all_step.dependOn(b.default_step); + all_step.dependOn(lint_step); +} + +fn addTestStep( + b: *std.Build, + step: *std.Build.Step, + target: std.Build.ResolvedTarget, + optimize: std.builtin.OptimizeMode, + cc: []const u8, + no_exec: bool, + valgrind: bool, + test_timeout: ?[]const u8, +) void { + const test_mod = b.createModule(.{ + .root_source_file = b.path("test_all.zig"), + .optimize = optimize, + .target = target, + }); + test_mod.addIncludePath(b.path(".")); + + // TODO(zig 0.16+): remove this if block entirely; keep only the addLibrary branch. + // Also delete addCObjectsDirectly. + // Zig 0.15's ELF archive parser fails on archives containing odd-sized objects + // (off-by-one after 2-byte alignment). This is fixed on zig master/0.16. + if (comptime builtin.zig_version.order(.{ .major = 0, .minor = 16, .patch = 0 }) == .lt) { + addCObjectsDirectly(b, test_mod, cc, optimize); + } else { + const lib_mod = b.createModule(.{ + .optimize = optimize, + .target = target, + .link_libc = true, + }); + const lib = b.addLibrary(.{ + .name = b.fmt("zig0-{s}", .{cc}), + .root_module = lib_mod, + }); + addCSources(b, lib.root_module, cc, optimize); + test_mod.linkLibrary(lib); + } + + const test_exe = b.addTest(.{ + .root_module = test_mod, + .use_llvm = false, + .use_lld = false, + }); + const timeout: ?[]const u8 = test_timeout orelse if (valgrind) null else "10"; + if (valgrind) { + if (timeout) |t| + test_exe.setExecCmd(&.{ + "timeout", + t, + "valgrind", + "--error-exitcode=2", + "--leak-check=full", + "--show-leak-kinds=all", + "--errors-for-leak-kinds=all", + "--track-fds=yes", + null, + }) + else + test_exe.setExecCmd(&.{ + "valgrind", + "--error-exitcode=2", + "--leak-check=full", + "--show-leak-kinds=all", + "--errors-for-leak-kinds=all", + "--track-fds=yes", + null, + }); + } else { + test_exe.setExecCmd(&.{ "timeout", timeout orelse "10", null }); + } + if (no_exec) { + const install = b.addInstallArtifact(test_exe, .{}); + step.dependOn(&install.step); + } else { + step.dependOn(&b.addRunArtifact(test_exe).step); + } +} + +fn addCSources( + b: *std.Build, + mod: *std.Build.Module, + cc: []const u8, + optimize: std.builtin.OptimizeMode, +) void { + if (std.mem.eql(u8, cc, "zig")) { + mod.addCSourceFiles(.{ .files = c_lib_files, .flags = cflags }); + } else for (c_lib_files) |cfile| { + const cc1 = b.addSystemCommand(&.{cc}); + cc1.addArgs(cflags ++ .{"-g"}); + cc1.addArg(switch (optimize) { + .Debug => "-O0", + .ReleaseFast, .ReleaseSafe => "-O3", + .ReleaseSmall => "-Os", + }); + cc1.addArg("-c"); + cc1.addFileArg(b.path(cfile)); + cc1.addArg("-o"); + mod.addObjectFile(cc1.addOutputFileArg(b.fmt("{s}.o", .{cfile[0 .. cfile.len - 2]}))); + } +} + +// TODO(zig 0.16+): delete this function. +fn addCObjectsDirectly( + b: *std.Build, + mod: *std.Build.Module, + cc: []const u8, + optimize: std.builtin.OptimizeMode, +) void { + addCSources(b, mod, cc, optimize); + mod.linkSystemLibrary("c", .{}); +} diff --git a/stage0/check_test_order.py b/stage0/check_test_order.py new file mode 100644 index 0000000000..79bfbba552 --- /dev/null +++ b/stage0/check_test_order.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +"""Check and optionally fix test order in parser_test.zig to match upstream.""" + +import re +import sys + +OURS = "parser_test.zig" +UPSTREAM = "../zig/lib/std/zig/parser_test.zig" + + +def extract_test_names(path): + with open(path) as f: + return re.findall(r'^test "(.+?)" \{', f.read(), re.M) + + +def extract_test_blocks(path): + """Split file into: header, list of (name, content) test blocks, footer.""" + with open(path) as f: + lines = f.readlines() + + header = [] + footer = [] + blocks = [] + current_name = None + current_lines = [] + brace_depth = 0 + in_test = False + found_first_test = False + + for line in lines: + m = re.match(r'^test "(.+?)" \{', line) + if m and not in_test: + found_first_test = True + if current_name is not None: + blocks.append((current_name, "".join(current_lines))) + current_name = m.group(1) + current_lines = [line] + brace_depth = 1 + in_test = True + continue + + if in_test: + current_lines.append(line) + brace_depth += line.count("{") - line.count("}") + if brace_depth == 0: + in_test = False + elif not found_first_test: + header.append(line) + else: + # Non-test content after tests started — could be blank lines + # between tests or footer content + if current_name is not None: + # Append to previous test block as trailing content + current_lines.append(line) + else: + footer.append(line) + + if current_name is not None: + blocks.append((current_name, "".join(current_lines))) + + # Anything after the last test block is footer + # Split last block's trailing non-test content into footer + if blocks: + last_name, last_content = blocks[-1] + last_lines = last_content.split('\n') + # Find where the test block ends (} at column 0) + test_end = len(last_lines) + for i, line in enumerate(last_lines): + if line == '}' and i > 0: + test_end = i + 1 + if test_end < len(last_lines): + blocks[-1] = (last_name, '\n'.join(last_lines[:test_end]) + '\n') + footer = ['\n'.join(last_lines[test_end:]) + '\n'] + footer + + return "".join(header), blocks, "".join(footer) + + +def main(): + fix = "--fix" in sys.argv + + upstream_order = extract_test_names(UPSTREAM) + our_names = extract_test_names(OURS) + + # Build position map for upstream + upstream_pos = {name: i for i, name in enumerate(upstream_order)} + + # Check order + our_in_upstream = [n for n in our_names if n in upstream_pos] + positions = [upstream_pos[n] for n in our_in_upstream] + is_sorted = positions == sorted(positions) + + if is_sorted: + print(f"OK: {len(our_names)} tests in correct order") + return 0 + + # Find out-of-order tests + out_of_order = [] + prev_pos = -1 + for name in our_in_upstream: + pos = upstream_pos[name] + if pos < prev_pos: + out_of_order.append(name) + prev_pos = max(prev_pos, pos) + + print(f"WARN: {len(out_of_order)} tests out of order:") + for name in out_of_order[:10]: + print(f" - {name}") + if len(out_of_order) > 10: + print(f" ... and {len(out_of_order) - 10} more") + + if not fix: + print("\nRun with --fix to reorder") + return 1 + + # Fix: reorder + header, blocks, footer = extract_test_blocks(OURS) + block_map = {name: content for name, content in blocks} + + # Reorder: upstream-ordered first, then extras + ordered = [] + seen = set() + for name in upstream_order: + if name in block_map and name not in seen: + ordered.append((name, block_map[name])) + seen.add(name) + for name, content in blocks: + if name not in seen: + ordered.append((name, content)) + seen.add(name) + + with open(OURS, "w") as f: + f.write(header) + for _, content in ordered: + f.write("\n") + f.write(content) + f.write(footer) + + print(f"Fixed: {len(ordered)} tests reordered") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/stage0/common.h b/stage0/common.h new file mode 100644 index 0000000000..da415d7afe --- /dev/null +++ b/stage0/common.h @@ -0,0 +1,54 @@ +// common.h — must be included before any system headers. +#ifndef _ZIG0_COMMON_H__ +#define _ZIG0_COMMON_H__ + +#include +#include + +#define SLICE(Type) \ + struct Type##Slice { \ + uint32_t len; \ + uint32_t cap; \ + Type* arr; \ + } + +#define ARR_INIT(Type, initial_cap) \ + ({ \ + Type* arr = calloc(initial_cap, sizeof(Type)); \ + if (!arr) \ + exit(1); \ + arr; \ + }) + +#define SLICE_INIT(Type, initial_cap) \ + { .len = 0, .cap = (initial_cap), .arr = ARR_INIT(Type, initial_cap) } + +#define SLICE_RESIZE(Type, slice, new_cap) \ + ({ \ + const uint32_t cap = (new_cap); \ + Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \ + if (new_arr == NULL) { \ + free((slice)->arr); \ + exit(1); \ + } \ + (slice)->arr = new_arr; \ + (slice)->cap = cap; \ + }) + +#define SLICE_ENSURE_CAPACITY(Type, slice, additional) \ + ({ \ + if ((slice)->len + (additional) > (slice)->cap) { \ + SLICE_RESIZE(Type, slice, \ + ((slice)->cap * 2 > (slice)->len + (additional)) \ + ? (slice)->cap * 2 \ + : (slice)->len + (additional)); \ + } \ + }) + +#define SLICE_APPEND(Type, slice, item) \ + ({ \ + SLICE_ENSURE_CAPACITY(Type, slice, 1); \ + (slice)->arr[(slice)->len++] = (item); \ + }) + +#endif diff --git a/stage0/main.c b/stage0/main.c new file mode 100644 index 0000000000..16909955eb --- /dev/null +++ b/stage0/main.c @@ -0,0 +1,39 @@ +#include "common.h" + +#include +#include +#include + +int zig0Run(char* program, char** msg); +int zig0RunFile(char* fname, char** msg); + +static void usage(const char* argv0) { + fprintf(stderr, "Usage: %s program.zig\n", argv0); +} + +int main(int argc, char** argv) { + if (argc != 2) { + usage(argv[0]); + return 1; + } + + char* msg; + switch (zig0RunFile(argv[1], &msg)) { + case 0: + return 0; + break; + case 1: + fprintf(stderr, "panic: %s\n", msg); + free(msg); + return 0; + break; + case 2: + fprintf(stderr, "interpreter error: %s\n", msg); + free(msg); + return 1; + break; + case 3: + return 1; + break; + } +} diff --git a/stage0/parser.c b/stage0/parser.c new file mode 100644 index 0000000000..1aeca2ade5 --- /dev/null +++ b/stage0/parser.c @@ -0,0 +1,3458 @@ + +#include "common.h" + +#include +#include +#include +#include + +#include "ast.h" +#include "parser.h" + +const AstNodeIndex null_node = 0; +const AstTokenIndex null_token = ~(AstTokenIndex)(0); + +// OPT encodes a node index as OptionalIndex: 0 → ~0 (none) +#define OPT(x) ((x) == 0 ? ~(AstNodeIndex)0 : (x)) + +typedef struct { + uint32_t len; + AstNodeIndex lhs; + AstNodeIndex rhs; + bool trailing; +} Members; + +typedef struct { + enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag; + union { + uint32_t end; + } payload; +} FieldState; + +typedef struct { + enum { SMALL_SPAN_ZERO_OR_ONE, SMALL_SPAN_MULTI } tag; + union { + AstNodeIndex zero_or_one; + AstSubRange multi; + } payload; +} SmallSpan; + +typedef struct { + AstNodeIndex align_node; + AstNodeIndex addrspace_node; + AstNodeIndex bit_range_start; + AstNodeIndex bit_range_end; +} PtrModifiers; + +typedef struct { + int8_t prec; + AstNodeTag tag; + enum { + ASSOC_LEFT, + ASSOC_NONE, + } assoc; +} OperInfo; + +typedef struct { + AstNodeIndex align_expr, value_expr; +} NodeContainerField; + +static AstNodeIndex addExtra(Parser*, const AstNodeIndex*, uint32_t); +static AstNodeIndex addNode(AstNodeList*, AstNodeItem); +static AstNodeTag assignOpNode(TokenizerTag); +static AstTokenIndex assertToken(Parser*, TokenizerTag); +static void astNodeListEnsureCapacity(AstNodeList*, uint32_t); +static AstTokenIndex eatDocComments(Parser*); +static AstTokenIndex eatToken(Parser*, TokenizerTag); +static AstNodeIndex expectBlockExprStatement(Parser*); +static AstNodeIndex expectContainerField(Parser*); +static AstNodeIndex expectExpr(Parser*); +static AstNodeIndex expectIfStatement(Parser*); +static AstNodeIndex expectParamDecl(Parser*); +static AstNodeIndex expectSemicolon(Parser*); +static AstNodeIndex expectStatement(Parser*, bool); +static AstNodeIndex expectTestDecl(Parser*); +static AstTokenIndex expectToken(Parser*, TokenizerTag); +static AstNodeIndex expectTopLevelDecl(Parser*); +static AstNodeIndex expectVarDeclExprStatement(Parser*, AstTokenIndex); +static void findNextContainerMember(Parser*); +static AstNodeIndex finishAssignExpr(Parser*, AstNodeIndex); +static uint32_t forPrefix(Parser*); +static AstSubRange listToSpan(Parser*, const AstNodeIndex*, uint32_t); +static AstNodeIndex makePtrTypeNode( + Parser*, AstTokenIndex, AstNodeIndex, PtrModifiers, AstNodeIndex); +static AstSubRange membersToSpan(const Members, Parser*); +static AstTokenIndex nextToken(Parser*); +static OperInfo operTable(TokenizerTag); +static AstNodeIndex parseAddrSpace(Parser*); +static AstNodeIndex parseAsmExpr(Parser*); +static AstNodeIndex parseAsmInputItem(Parser*); +static AstNodeIndex parseAsmOutputItem(Parser*); +static AstNodeIndex parseAssignExpr(Parser*); +static AstNodeIndex parseBlock(Parser*); +static AstNodeIndex parseBlockExpr(Parser*); +static AstTokenIndex parseBlockLabel(Parser*); +static AstTokenIndex parseBreakLabel(Parser*); +static AstNodeIndex parseBuiltinCall(Parser*); +static AstNodeIndex parseByteAlign(Parser*); +static AstNodeIndex parseCallconv(Parser*); +static AstNodeIndex parseContainerDeclAuto(Parser*); +static Members parseContainerMembers(Parser*); +static AstNodeIndex parseCurlySuffixExpr(Parser*); +static AstNodeIndex parseErrorUnionExpr(Parser*); +static AstNodeIndex parseExpr(Parser*); +static AstNodeIndex parseExprPrecedence(Parser*, int32_t); +static AstNodeIndex parseFieldInit(Parser*); +static AstNodeIndex parseFnProto(Parser*); +static AstNodeIndex parseForExpr(Parser*); +static AstNodeIndex parseForStatement(Parser*); +static AstNodeIndex parseGlobalVarDecl(Parser*); +static AstNodeIndex parseIfExpr(Parser*); +static AstNodeIndex parseInitList(Parser*, AstNodeIndex, AstTokenIndex); +static AstNodeIndex parseLabeledStatement(Parser*); +static AstNodeIndex parseLinkSection(Parser*); +static AstNodeIndex parseLoopStatement(Parser*); +static SmallSpan parseParamDeclList(Parser*); +static void parsePayload(Parser*); +static AstNodeIndex parsePrefixExpr(Parser*); +static AstNodeIndex parsePrimaryExpr(Parser*); +static AstNodeIndex parsePrimaryTypeExpr(Parser*); +static PtrModifiers parsePtrModifiers(Parser*); +static void parsePtrPayload(Parser*); +static AstNodeIndex parseSingleAssignExpr(Parser*); +static AstNodeIndex parseSuffixExpr(Parser*); +static AstNodeIndex parseSuffixOp(Parser*, AstNodeIndex); +static AstNodeIndex parseSwitchExpr(Parser*); +static AstNodeIndex parseSwitchItem(Parser*); +static AstNodeIndex parseSwitchProng(Parser*); +static AstSubRange parseSwitchProngList(Parser*); +static AstNodeIndex parseTypeExpr(Parser*); +static AstNodeIndex parseVarDeclProto(Parser*); +static AstNodeIndex parseWhileContinueExpr(Parser*); +static AstNodeIndex parseWhileExpr(Parser*); +static AstNodeIndex parseWhileStatement(Parser*); +static uint32_t reserveNode(Parser*, AstNodeTag); +static AstNodeIndex setNode(Parser*, uint32_t, AstNodeItem); +static uint32_t tokenTagLexemeLen(TokenizerTag); +static bool tokensOnSameLine(Parser*, AstTokenIndex, AstTokenIndex); + +static AstSubRange membersToSpan(const Members self, Parser* p) { + if (self.len <= 2) { + const AstNodeIndex nodes[] = { self.lhs, self.rhs }; + return listToSpan(p, nodes, self.len); + } else { + return (AstSubRange) { .start = self.lhs, .end = self.rhs }; + } +} + +static AstSubRange listToSpan( + Parser* p, const AstNodeIndex* list, uint32_t count) { + SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); + memcpy(p->extra_data.arr + p->extra_data.len, list, + count * sizeof(AstNodeIndex)); + p->extra_data.len += count; + return (AstSubRange) { + .start = p->extra_data.len - count, + .end = p->extra_data.len, + }; +} + +static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) { + astNodeListEnsureCapacity(nodes, 1); + nodes->tags[nodes->len] = item.tag; + nodes->main_tokens[nodes->len] = item.main_token; + nodes->datas[nodes->len] = item.data; + return nodes->len++; +} + +static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { + p->nodes.tags[i] = item.tag; + p->nodes.main_tokens[i] = item.main_token; + p->nodes.datas[i] = item.data; + return i; +} + +static uint32_t reserveNode(Parser* p, AstNodeTag tag) { + astNodeListEnsureCapacity(&p->nodes, 1); + p->nodes.len++; + p->nodes.tags[p->nodes.len - 1] = tag; + return p->nodes.len - 1; +} + +static AstNodeIndex addExtra( + Parser* p, const AstNodeIndex* extra, uint32_t count) { + const AstNodeIndex result = p->extra_data.len; + SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); + memcpy(p->extra_data.arr + p->extra_data.len, extra, + count * sizeof(AstNodeIndex)); + p->extra_data.len += count; + return result; +} + +static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) { + const uint32_t new_len = list->len + additional; + if (new_len <= list->cap) { + return; + } + + const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; + list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag)); + list->main_tokens + = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex)); + list->datas = realloc(list->datas, new_cap * sizeof(AstData)); + if (!list->tags || !list->main_tokens || !list->datas) + exit(1); + list->cap = new_cap; +} + +void parseRoot(Parser* p) { + addNode( + &p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 }); + + Members root_members = parseContainerMembers(p); + AstSubRange root_decls = membersToSpan(root_members, p); + + if (p->token_tags[p->tok_i] != TOKEN_EOF) { + fail(p, "expected EOF"); + } + + p->nodes.datas[0].lhs = root_decls.start; + p->nodes.datas[0].rhs = root_decls.end; +} + +static Members parseContainerMembers(Parser* p) { + const uint32_t scratch_top = p->scratch.len; + while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token) + ; + + FieldState field_state = { .tag = FIELD_STATE_NONE }; + + bool trailing = false; + while (1) { + const AstTokenIndex doc_comment = eatDocComments(p); + switch (p->token_tags[p->tok_i]) { + case TOKEN_KEYWORD_TEST: { + if (doc_comment != null_token) + fail(p, "test_doc_comment"); + const AstNodeIndex test_decl = expectTestDecl(p); + if (field_state.tag == FIELD_STATE_SEEN) { + field_state.tag = FIELD_STATE_END; + field_state.payload.end = test_decl; + } + SLICE_APPEND(AstNodeIndex, &p->scratch, test_decl); + trailing = false; + break; + } + case TOKEN_KEYWORD_COMPTIME: + // comptime can be a container field modifier or a comptime + // block/decl. Check if it's followed by a block (comptime { ... + // }). + if (p->token_tags[p->tok_i + 1] == TOKEN_L_BRACE) { + if (doc_comment != null_token) { + fail(p, "comptime_doc_comment"); + } + const AstTokenIndex comptime_token = nextToken(p); + const AstNodeIndex block_node = parseBlock(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, + addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = comptime_token, + .data = { .lhs = block_node, .rhs = 0 }, + })); + trailing = false; + break; + } + // Otherwise it's a container field with comptime modifier + goto container_field; + case TOKEN_KEYWORD_PUB: { + p->tok_i++; + AstNodeIndex top_level_decl = expectTopLevelDecl(p); + if (top_level_decl != 0) { + if (field_state.tag == FIELD_STATE_SEEN) { + field_state.tag = FIELD_STATE_END; + field_state.payload.end = top_level_decl; + } + SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); + } + trailing = p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON; + break; + } + case TOKEN_KEYWORD_CONST: + case TOKEN_KEYWORD_VAR: + case TOKEN_KEYWORD_THREADLOCAL: + case TOKEN_KEYWORD_EXPORT: + case TOKEN_KEYWORD_EXTERN: + case TOKEN_KEYWORD_INLINE: + case TOKEN_KEYWORD_NOINLINE: + case TOKEN_KEYWORD_FN: { + const AstNodeIndex top_level_decl = expectTopLevelDecl(p); + if (top_level_decl != 0) { + if (field_state.tag == FIELD_STATE_SEEN) { + field_state.tag = FIELD_STATE_END; + field_state.payload.end = top_level_decl; + } + SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); + } + trailing = (p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON); + break; + } + case TOKEN_EOF: + case TOKEN_R_BRACE: + goto break_loop; + container_field: + default:; + // skip parseCStyleContainer + const AstNodeIndex field_node = expectContainerField(p); + switch (field_state.tag) { + case FIELD_STATE_NONE: + field_state.tag = FIELD_STATE_SEEN; + break; + case FIELD_STATE_SEEN: + break; + case FIELD_STATE_END: + fail(p, "parseContainerMembers error condition"); + } + SLICE_APPEND(AstNodeIndex, &p->scratch, field_node); + switch (p->token_tags[p->tok_i]) { + case TOKEN_COMMA: + p->tok_i++; + trailing = true; + continue; + case TOKEN_R_BRACE: + case TOKEN_EOF: + trailing = false; + goto break_loop; + default: + fail(p, "expected comma after field"); + } + } + } + +break_loop:; + + const uint32_t items_len = p->scratch.len - scratch_top; + p->scratch.len = scratch_top; + switch (items_len) { + case 0: + return (Members) { + .len = 0, + .lhs = 0, + .rhs = 0, + .trailing = trailing, + }; + case 1: + return (Members) { + .len = 1, + .lhs = p->scratch.arr[scratch_top], + .rhs = 0, + .trailing = trailing, + }; + case 2: + return (Members) { + .len = 2, + .lhs = p->scratch.arr[scratch_top], + .rhs = p->scratch.arr[scratch_top + 1], + .trailing = trailing, + }; + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], items_len); + return (Members) { + .len = items_len, + .lhs = span.start, + .rhs = span.end, + .trailing = trailing, + }; + } +} + +static void findNextContainerMember(Parser* p) { + uint32_t level = 0; + + while (true) { + AstTokenIndex tok = nextToken(p); + + switch (p->token_tags[tok]) { + // Any of these can start a new top level declaration + case TOKEN_KEYWORD_TEST: + case TOKEN_KEYWORD_COMPTIME: + case TOKEN_KEYWORD_PUB: + case TOKEN_KEYWORD_EXPORT: + case TOKEN_KEYWORD_EXTERN: + case TOKEN_KEYWORD_INLINE: + case TOKEN_KEYWORD_NOINLINE: + case TOKEN_KEYWORD_THREADLOCAL: + case TOKEN_KEYWORD_CONST: + case TOKEN_KEYWORD_VAR: + case TOKEN_KEYWORD_FN: + if (level == 0) { + p->tok_i--; + return; + } + break; + case TOKEN_IDENTIFIER: + if (p->token_tags[tok + 1] == TOKEN_COMMA && level == 0) { + p->tok_i--; + return; + } + break; + case TOKEN_COMMA: + case TOKEN_SEMICOLON: + // This decl was likely meant to end here + if (level == 0) + return; + break; + case TOKEN_L_PAREN: + case TOKEN_L_BRACKET: + case TOKEN_L_BRACE: + level++; + break; + case TOKEN_R_PAREN: + case TOKEN_R_BRACKET: + if (level != 0) + level--; + break; + case TOKEN_R_BRACE: + if (level == 0) { + // end of container, exit + p->tok_i--; + return; + } + level--; + break; + case TOKEN_EOF: + p->tok_i--; + return; + default: + break; + } + } +} + +static AstNodeIndex expectTestDecl(Parser* p) { + const AstTokenIndex test_token = assertToken(p, TOKEN_KEYWORD_TEST); + const AstTokenIndex test_name + = (p->token_tags[p->tok_i] == TOKEN_STRING_LITERAL + || p->token_tags[p->tok_i] == TOKEN_IDENTIFIER) + ? nextToken(p) + : null_token; + const AstNodeIndex body = parseBlock(p); + if (body == 0) + fail(p, "expected block after test"); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_TEST_DECL, + .main_token = test_token, + .data = { .lhs = test_name, .rhs = body }, + }); +} + +static AstNodeIndex expectTopLevelDecl(Parser* p) { + AstTokenIndex extern_export_inline_token = nextToken(p); + bool is_extern = false; + + switch (p->token_tags[extern_export_inline_token]) { + case TOKEN_KEYWORD_EXTERN: + eatToken(p, TOKEN_STRING_LITERAL); + is_extern = true; + break; + case TOKEN_KEYWORD_EXPORT: + case TOKEN_KEYWORD_INLINE: + case TOKEN_KEYWORD_NOINLINE: + break; + default: + p->tok_i--; + } + + AstNodeIndex fn_proto = parseFnProto(p); + if (fn_proto != 0) { + switch (p->token_tags[p->tok_i]) { + case TOKEN_SEMICOLON: + p->tok_i++; + return fn_proto; + case TOKEN_L_BRACE:; + if (is_extern) { + fail(p, "extern_fn_body"); + } + AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL); + AstNodeIndex body_block = parseBlock(p); + return setNode(p, fn_decl_index, + (AstNodeItem) { + .tag = AST_NODE_FN_DECL, + .main_token = p->nodes.main_tokens[fn_proto], + .data = { .lhs = fn_proto, .rhs = body_block }, + }); + default: + fail(p, "expected semicolon or lbrace"); + } + } + + eatToken(p, TOKEN_KEYWORD_THREADLOCAL); + AstNodeIndex var_decl = parseGlobalVarDecl(p); + if (var_decl != 0) { + return var_decl; + } + + // assuming the program is correct... + fail(p, "the next token should be usingnamespace, which is not supported"); + return 0; // make tcc happy +} + +static AstNodeIndex parseFnProto(Parser* p) { + AstTokenIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN); + if (fn_token == null_token) + return null_node; + + AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO); + + eatToken(p, TOKEN_IDENTIFIER); + + SmallSpan params = parseParamDeclList(p); + const AstNodeIndex align_expr = parseByteAlign(p); + const AstNodeIndex addrspace_expr = parseAddrSpace(p); + const AstNodeIndex section_expr = parseLinkSection(p); + const AstNodeIndex callconv_expr = parseCallconv(p); + eatToken(p, TOKEN_BANG); + + const AstNodeIndex return_type_expr = parseTypeExpr(p); + if (return_type_expr == 0) { + fail(p, "expected_return_type"); + } + + if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 + && addrspace_expr == 0) { + switch (params.tag) { + case SMALL_SPAN_ZERO_OR_ONE: + return setNode(p, fn_proto_index, + (AstNodeItem) { + .tag = AST_NODE_FN_PROTO_SIMPLE, + .main_token = fn_token, + .data = { + .lhs = params.payload.zero_or_one, + .rhs = return_type_expr, + }, + }); + case SMALL_SPAN_MULTI: + return setNode(p, fn_proto_index, + (AstNodeItem) { + .tag = AST_NODE_FN_PROTO_MULTI, + .main_token = fn_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { + params.payload.multi.start, + params.payload.multi.end }, + 2), + .rhs = return_type_expr, + }, + }); + } + } + + // Complex fn proto with align/section/callconv/addrspace + switch (params.tag) { + case SMALL_SPAN_ZERO_OR_ONE: + return setNode(p, fn_proto_index, + (AstNodeItem) { + .tag = AST_NODE_FN_PROTO_ONE, + .main_token = fn_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { + OPT(params.payload.zero_or_one), + OPT(align_expr), OPT(addrspace_expr), + OPT(section_expr), OPT(callconv_expr) }, + 5), + .rhs = return_type_expr, + }, + }); + case SMALL_SPAN_MULTI: + return setNode(p, fn_proto_index, + (AstNodeItem) { + .tag = AST_NODE_FN_PROTO, + .main_token = fn_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { + params.payload.multi.start, + params.payload.multi.end, + OPT(align_expr), OPT(addrspace_expr), + OPT(section_expr), OPT(callconv_expr) }, + 6), + .rhs = return_type_expr, + }, + }); + } + return 0; // tcc +} + +static AstNodeIndex parseVarDeclProto(Parser* p) { + AstTokenIndex mut_token; + if ((mut_token = eatToken(p, TOKEN_KEYWORD_CONST)) == null_token) + if ((mut_token = eatToken(p, TOKEN_KEYWORD_VAR)) == null_token) + return null_node; + + expectToken(p, TOKEN_IDENTIFIER); + const AstNodeIndex type_node + = eatToken(p, TOKEN_COLON) == null_token ? 0 : parseTypeExpr(p); + const AstNodeIndex align_node = parseByteAlign(p); + const AstNodeIndex addrspace_node = parseAddrSpace(p); + const AstNodeIndex section_node = parseLinkSection(p); + + if (section_node == 0 && addrspace_node == 0) { + if (align_node == 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SIMPLE_VAR_DECL, + .main_token = mut_token, + .data = { .lhs = type_node, .rhs = 0 }, + }); + } + if (type_node == 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ALIGNED_VAR_DECL, + .main_token = mut_token, + .data = { .lhs = align_node, .rhs = 0 }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_LOCAL_VAR_DECL, + .main_token = mut_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { type_node, align_node }, 2), + .rhs = 0, + }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_GLOBAL_VAR_DECL, + .main_token = mut_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { OPT(type_node), OPT(align_node), + OPT(addrspace_node), OPT(section_node) }, + 4), + .rhs = 0, + }, + }); +} + +static AstNodeIndex parseGlobalVarDecl(Parser* p) { + const AstNodeIndex var_decl = parseVarDeclProto(p); + if (var_decl == 0) { + return null_node; + } + + if (eatToken(p, TOKEN_EQUAL) != null_token) { + const AstNodeIndex init_expr = expectExpr(p); + p->nodes.datas[var_decl].rhs = init_expr; + } + expectToken(p, TOKEN_SEMICOLON); + return var_decl; +} + +static AstNodeIndex expectContainerField(Parser* p) { + eatToken(p, TOKEN_KEYWORD_COMPTIME); + const AstTokenIndex main_token = p->tok_i; + if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER + && p->token_tags[p->tok_i + 1] == TOKEN_COLON) + p->tok_i += 2; + + const AstNodeIndex type_expr = parseTypeExpr(p); + if (type_expr == 0) { + fail(p, "expected type expression"); + } + const AstNodeIndex align_expr = parseByteAlign(p); + const AstNodeIndex value_expr + = eatToken(p, TOKEN_EQUAL) != null_token ? expectExpr(p) : 0; + + if (align_expr == 0) { + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_CONTAINER_FIELD_INIT, + .main_token = main_token, + .data = { + .lhs = type_expr, + .rhs = value_expr, + }, + }); + } else if (value_expr == 0) { + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_CONTAINER_FIELD_ALIGN, + .main_token = main_token, + .data = { + .lhs = type_expr, + .rhs = align_expr, + }, + }); + } else { + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_CONTAINER_FIELD, + .main_token = main_token, + .data = { + .lhs = type_expr, + .rhs = addExtra(p, (AstNodeIndex[]) { align_expr, value_expr }, 2), + }, + }); + } +} + +static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { + const AstTokenIndex comptime_token = eatToken(p, TOKEN_KEYWORD_COMPTIME); + if (comptime_token != null_token) { + // comptime followed by block => comptime block statement + const AstNodeIndex block = parseBlock(p); + if (block != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = comptime_token, + .data = { .lhs = block, .rhs = 0 }, + }); + } + // comptime var decl or expression + if (allow_defer_var) + return expectVarDeclExprStatement(p, comptime_token); + { + const AstNodeIndex assign = parseAssignExpr(p); + if (assign == 0) { + fail(p, "expected expression"); + } + expectSemicolon(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = comptime_token, + .data = { .lhs = assign, .rhs = 0 }, + }); + } + } + + const AstNodeIndex tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKEN_KEYWORD_DEFER: + if (allow_defer_var) + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_DEFER, + .main_token = nextToken(p), + .data = { + .lhs = expectBlockExprStatement(p), + .rhs = 0, + }, + }); + break; + case TOKEN_KEYWORD_ERRDEFER: + if (allow_defer_var) { + const AstTokenIndex errdefer_token = nextToken(p); + AstTokenIndex payload = null_token; + if (p->token_tags[p->tok_i] == TOKEN_PIPE) { + p->tok_i++; + payload = expectToken(p, TOKEN_IDENTIFIER); + expectToken(p, TOKEN_PIPE); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ERRDEFER, + .main_token = errdefer_token, + .data = { + .lhs = payload, + .rhs = expectBlockExprStatement(p), + }, + }); + } + break; + case TOKEN_KEYWORD_NOSUSPEND: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_NOSUSPEND, + .main_token = nextToken(p), + .data = { + .lhs = expectBlockExprStatement(p), + .rhs = 0, + }, + }); + case TOKEN_KEYWORD_SUSPEND: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SUSPEND, + .main_token = nextToken(p), + .data = { + .lhs = expectBlockExprStatement(p), + .rhs = 0, + }, + }); + case TOKEN_KEYWORD_IF: + return expectIfStatement(p); + case TOKEN_KEYWORD_ENUM: + case TOKEN_KEYWORD_STRUCT: + case TOKEN_KEYWORD_UNION:; + fail(p, "unsupported statement keyword"); + default:; + } + + const AstNodeIndex labeled_statement = parseLabeledStatement(p); + if (labeled_statement != 0) + return labeled_statement; + + if (allow_defer_var) { + return expectVarDeclExprStatement(p, null_token); + } else { + const AstNodeIndex assign_expr = parseAssignExpr(p); + expectSemicolon(p); + return assign_expr; + } +} + +static AstNodeIndex expectVarDeclExprStatement( + Parser* p, AstTokenIndex comptime_token) { + const uint32_t scratch_top = p->scratch.len; + + while (true) { + const AstNodeIndex var_decl_proto = parseVarDeclProto(p); + if (var_decl_proto != 0) { + SLICE_APPEND(AstNodeIndex, &p->scratch, var_decl_proto); + } else { + const AstNodeIndex expr = parseExpr(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, expr); + } + if (eatToken(p, TOKEN_COMMA) == null_token) + break; + } + + const uint32_t lhs_count = p->scratch.len - scratch_top; + assert(lhs_count > 0); + + // Try to eat '=' for assignment/initialization + // (matches upstream: `const equal_token = p.eatToken(.equal) orelse eql:`) + AstTokenIndex equal_token = eatToken(p, TOKEN_EQUAL); + if (equal_token == null_token) { + if (lhs_count > 1) { + // Destructure requires '=' + fail(p, "expected '='"); + } + const AstNodeIndex lhs = p->scratch.arr[scratch_top]; + p->scratch.len = scratch_top; + const AstNodeTag lhs_tag = p->nodes.tags[lhs]; + if (lhs_tag == AST_NODE_SIMPLE_VAR_DECL + || lhs_tag == AST_NODE_ALIGNED_VAR_DECL + || lhs_tag == AST_NODE_LOCAL_VAR_DECL + || lhs_tag == AST_NODE_GLOBAL_VAR_DECL) { + // var decl without init requires '=' + fail(p, "expected '='"); + } + // Expression statement: finish with assignment operators or semicolon + const AstNodeIndex expr = finishAssignExpr(p, lhs); + // Semicolon is optional for block-terminated expressions + eatToken(p, TOKEN_SEMICOLON); + if (comptime_token != null_token) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = comptime_token, + .data = { .lhs = expr, .rhs = 0 }, + }); + } + return expr; + } + + // Have '=', parse RHS and semicolon + const AstNodeIndex rhs = expectExpr(p); + expectSemicolon(p); + + if (lhs_count == 1) { + const AstNodeIndex lhs = p->scratch.arr[scratch_top]; + p->scratch.len = scratch_top; + const AstNodeTag lhs_tag = p->nodes.tags[lhs]; + if (lhs_tag == AST_NODE_SIMPLE_VAR_DECL + || lhs_tag == AST_NODE_ALIGNED_VAR_DECL + || lhs_tag == AST_NODE_LOCAL_VAR_DECL + || lhs_tag == AST_NODE_GLOBAL_VAR_DECL) { + // var decl initialization: const x = val; + p->nodes.datas[lhs].rhs = rhs; + return lhs; + } + // Simple assignment: x = val; + const AstNodeIndex assign = addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASSIGN, + .main_token = equal_token, + .data = { .lhs = lhs, .rhs = rhs }, + }); + if (comptime_token != null_token) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = comptime_token, + .data = { .lhs = assign, .rhs = 0 }, + }); + } + return assign; + } + + // Destructure: a, b, c = rhs + // rhs and semicolon already parsed above + + // Store count + lhs nodes in extra_data + const AstNodeIndex extra_start = p->extra_data.len; + SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, lhs_count + 1); + p->extra_data.arr[p->extra_data.len++] = lhs_count; + memcpy(p->extra_data.arr + p->extra_data.len, &p->scratch.arr[scratch_top], + lhs_count * sizeof(AstNodeIndex)); + p->extra_data.len += lhs_count; + p->scratch.len = scratch_top; + + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASSIGN_DESTRUCTURE, + .main_token = equal_token, + .data = { .lhs = extra_start, .rhs = rhs }, + }); +} + +static AstNodeIndex expectIfStatement(Parser* p) { + const AstTokenIndex if_token = assertToken(p, TOKEN_KEYWORD_IF); + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex condition = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + parsePtrPayload(p); + bool else_required = false; + AstNodeIndex then_body; + const AstNodeIndex block2 = parseBlockExpr(p); + if (block2 != 0) { + then_body = block2; + } else { + then_body = parseAssignExpr(p); + if (then_body == 0) + fail(p, "expected block or assignment"); + if (eatToken(p, TOKEN_SEMICOLON) != null_token) + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF_SIMPLE, + .main_token = if_token, + .data = { .lhs = condition, .rhs = then_body }, + }); + else_required = true; + } + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { + if (else_required) + fail(p, "expected_semi_or_else"); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF_SIMPLE, + .main_token = if_token, + .data = { .lhs = condition, .rhs = then_body }, + }); + } + parsePayload(p); + const AstNodeIndex else_body = expectStatement(p, false); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF, + .main_token = if_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { then_body, else_body }, 2), + }, + }); +} + +static AstNodeIndex parseLabeledStatement(Parser* p) { + const AstNodeIndex label_token = parseBlockLabel(p); + const AstNodeIndex block = parseBlock(p); + if (block != 0) + return block; + + const AstNodeIndex loop_stmt = parseLoopStatement(p); + if (loop_stmt != 0) + return loop_stmt; + + const AstNodeIndex switch_expr = parseSwitchExpr(p); + if (switch_expr != 0) + return switch_expr; + + if (label_token != 0) { + fail(p, "expected_labelable"); + } + + return null_node; +} + +static AstNodeIndex parseLoopStatement(Parser* p) { + const AstTokenIndex inline_token = eatToken(p, TOKEN_KEYWORD_INLINE); + + const AstNodeIndex for_statement = parseForStatement(p); + if (for_statement != 0) + return for_statement; + + const AstNodeIndex while_statement = parseWhileStatement(p); + if (while_statement != 0) + return while_statement; + + if (inline_token == null_token) + return null_node; + + fail(p, "seen 'inline', there should have been a 'for' or 'while'"); + return 0; // tcc +} + +static AstNodeIndex parseForStatement(Parser* p) { + const AstTokenIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR); + if (for_token == null_token) + return null_node; + + const uint32_t scratch_top = p->scratch.len; + const uint32_t inputs = forPrefix(p); + + // Statement body: block or assign expr + bool else_required = false; + bool seen_semicolon = false; + AstNodeIndex then_body; + const AstNodeIndex block = parseBlock(p); + if (block != 0) { + then_body = block; + } else { + then_body = parseAssignExpr(p); + if (then_body == 0) { + fail(p, "expected_block_or_assignment"); + } + if (eatToken(p, TOKEN_SEMICOLON) != null_token) { + seen_semicolon = true; + } else { + else_required = true; + } + } + + bool has_else = false; + if (!seen_semicolon && eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) { + parsePayload(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, then_body); + const AstNodeIndex else_body = expectStatement(p, false); + SLICE_APPEND(AstNodeIndex, &p->scratch, else_body); + has_else = true; + } else if (inputs == 1) { + if (else_required) + fail(p, "expected_semi_or_else"); + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR_SIMPLE, + .main_token = for_token, + .data = { + .lhs = p->scratch.arr[scratch_top], + .rhs = then_body, + }, + }); + } else { + if (else_required) + fail(p, "expected_semi_or_else"); + SLICE_APPEND(AstNodeIndex, &p->scratch, then_body); + } + + const uint32_t total = p->scratch.len - scratch_top; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], total); + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR, + .main_token = for_token, + .data = { + .lhs = span.start, + .rhs = ((uint32_t)inputs & 0x7FFFFFFF) + | (has_else ? (1u << 31) : 0), + }, + }); +} + +static AstNodeIndex parseForExpr(Parser* p) { + const AstTokenIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR); + if (for_token == null_token) + return null_node; + + const uint32_t scratch_top = p->scratch.len; + const uint32_t inputs = forPrefix(p); + + const AstNodeIndex then_expr = expectExpr(p); + + if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) { + parsePayload(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, then_expr); + const AstNodeIndex else_expr = expectExpr(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr); + const uint32_t total = p->scratch.len - scratch_top; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], total); + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR, + .main_token = for_token, + .data = { + .lhs = span.start, + .rhs = ((uint32_t)inputs & 0x7FFFFFFF) | (1u << 31), + }, + }); + } + + if (inputs == 1) { + const AstNodeIndex input = p->scratch.arr[scratch_top]; + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR_SIMPLE, + .main_token = for_token, + .data = { .lhs = input, .rhs = then_expr }, + }); + } + + SLICE_APPEND(AstNodeIndex, &p->scratch, then_expr); + const uint32_t total = p->scratch.len - scratch_top; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], total); + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR, + .main_token = for_token, + .data = { + .lhs = span.start, + .rhs = (uint32_t)inputs & 0x7FFFFFFF, + }, + }); +} + +static AstNodeIndex parseWhileStatement(Parser* p) { + const AstTokenIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE); + if (while_token == null_token) + return null_node; + + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex condition = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + parsePtrPayload(p); + + const AstNodeIndex cont_expr = parseWhileContinueExpr(p); + + // Statement body: block, or assign expr + bool else_required = false; + AstNodeIndex body; + const AstNodeIndex block = parseBlock(p); + if (block != 0) { + body = block; + } else { + body = parseAssignExpr(p); + if (body == 0) { + fail(p, "expected_block_or_assignment"); + } + if (eatToken(p, TOKEN_SEMICOLON) != null_token) { + if (cont_expr != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_CONT, + .main_token = while_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { cont_expr, body }, 2), + }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_SIMPLE, + .main_token = while_token, + .data = { .lhs = condition, .rhs = body }, + }); + } + else_required = true; + } + + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { + if (else_required) + fail(p, "expected_semi_or_else"); + if (cont_expr != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_CONT, + .main_token = while_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { cont_expr, body }, 2), + }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_SIMPLE, + .main_token = while_token, + .data = { .lhs = condition, .rhs = body }, + }); + } + + parsePayload(p); + const AstNodeIndex else_body = expectStatement(p, false); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE, + .main_token = while_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { OPT(cont_expr), body, else_body }, + 3), + }, + }); +} + +static AstNodeIndex expectBlockExprStatement(Parser* p) { + const AstNodeIndex block_expr = parseBlockExpr(p); + if (block_expr != 0) + return block_expr; + // Assign expr + semicolon + const AstNodeIndex expr = parseAssignExpr(p); + if (expr != 0) { + expectSemicolon(p); + return expr; + } + fail(p, "expectBlockExprStatement: expected block or expr"); + return 0; // tcc +} + +static AstNodeIndex parseBlockExpr(Parser* p) { + if (p->token_tags[p->tok_i] == TOKEN_L_BRACE) + return parseBlock(p); + if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER + && p->token_tags[p->tok_i + 1] == TOKEN_COLON + && p->token_tags[p->tok_i + 2] == TOKEN_L_BRACE) { + p->tok_i += 2; + return parseBlock(p); + } + return null_node; +} + +static AstNodeIndex parseAssignExpr(Parser* p) { + const AstNodeIndex expr = parseExpr(p); + if (expr == 0) + return null_node; + return finishAssignExpr(p, expr); +} + +static AstNodeIndex parseSingleAssignExpr(Parser* p) { + const AstNodeIndex expr = parseExpr(p); + if (expr == 0) + return null_node; + const AstNodeTag tag = assignOpNode(p->token_tags[p->tok_i]); + if (tag == AST_NODE_ROOT) + return expr; + const AstTokenIndex op_token = nextToken(p); + const AstNodeIndex rhs = expectExpr(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = tag, + .main_token = op_token, + .data = { .lhs = expr, .rhs = rhs }, + }); +} + +static AstNodeIndex finishAssignExpr(Parser* p, AstNodeIndex lhs) { + const AstNodeTag assign_tag = assignOpNode(p->token_tags[p->tok_i]); + if (assign_tag == AST_NODE_ROOT) + return lhs; + + const AstTokenIndex op_token = nextToken(p); + const AstNodeIndex rhs = expectExpr(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = assign_tag, + .main_token = op_token, + .data = { .lhs = lhs, .rhs = rhs }, + }); +} + +static AstNodeTag assignOpNode(TokenizerTag tok) { + switch (tok) { + case TOKEN_EQUAL: + return AST_NODE_ASSIGN; + case TOKEN_PLUS_EQUAL: + return AST_NODE_ASSIGN_ADD; + case TOKEN_MINUS_EQUAL: + return AST_NODE_ASSIGN_SUB; + case TOKEN_ASTERISK_EQUAL: + return AST_NODE_ASSIGN_MUL; + case TOKEN_SLASH_EQUAL: + return AST_NODE_ASSIGN_DIV; + case TOKEN_PERCENT_EQUAL: + return AST_NODE_ASSIGN_MOD; + case TOKEN_AMPERSAND_EQUAL: + return AST_NODE_ASSIGN_BIT_AND; + case TOKEN_PIPE_EQUAL: + return AST_NODE_ASSIGN_BIT_OR; + case TOKEN_CARET_EQUAL: + return AST_NODE_ASSIGN_BIT_XOR; + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL: + return AST_NODE_ASSIGN_SHL; + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL: + return AST_NODE_ASSIGN_SHR; + case TOKEN_PLUS_PERCENT_EQUAL: + return AST_NODE_ASSIGN_ADD_WRAP; + case TOKEN_MINUS_PERCENT_EQUAL: + return AST_NODE_ASSIGN_SUB_WRAP; + case TOKEN_ASTERISK_PERCENT_EQUAL: + return AST_NODE_ASSIGN_MUL_WRAP; + case TOKEN_PLUS_PIPE_EQUAL: + return AST_NODE_ASSIGN_ADD_SAT; + case TOKEN_MINUS_PIPE_EQUAL: + return AST_NODE_ASSIGN_SUB_SAT; + case TOKEN_ASTERISK_PIPE_EQUAL: + return AST_NODE_ASSIGN_MUL_SAT; + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL: + return AST_NODE_ASSIGN_SHL_SAT; + default: + return AST_NODE_ROOT; // not an assignment op + } +} + +static AstNodeIndex parseExpr(Parser* p) { return parseExprPrecedence(p, 0); } + +static AstNodeIndex expectExpr(Parser* p) { + const AstNodeIndex node = parseExpr(p); + if (node == 0) { + fail(p, "expected expression"); + } + return node; +} + +static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) { + assert(min_prec >= 0); + + AstNodeIndex node = parsePrefixExpr(p); + if (node == 0) + return null_node; + + int8_t banned_prec = -1; + + while (true) { + const TokenizerTag tok_tag = p->token_tags[p->tok_i]; + const OperInfo info = operTable(tok_tag); + if (info.prec < min_prec) + break; + + if (info.prec == banned_prec) { + fail(p, "chained comparison operators"); + } + + const AstTokenIndex oper_token = nextToken(p); + if (tok_tag == TOKEN_KEYWORD_CATCH) + parsePayload(p); + const AstNodeIndex rhs = parseExprPrecedence(p, info.prec + 1); + if (rhs == 0) { + fail(p, "expected expression"); + } + + { + const uint32_t tok_len = tokenTagLexemeLen(tok_tag); + if (tok_len > 0) { + const uint32_t tok_start = p->token_starts[oper_token]; + const char char_before = p->source[tok_start - 1]; + const char char_after = p->source[tok_start + tok_len]; + if (tok_tag == TOKEN_AMPERSAND && char_after == '&') { + fail(p, "invalid ampersand ampersand"); + } else if (isspace((unsigned char)char_before) + != isspace((unsigned char)char_after)) { + fail(p, "mismatched binary op whitespace"); + } + } + } + + node = addNode( + &p->nodes, + (AstNodeItem) { + .tag = info.tag, + .main_token = oper_token, + .data = { + .lhs = node, + .rhs = rhs, + }, + }); + + if (info.assoc == ASSOC_NONE) + banned_prec = info.prec; + } + + return node; +} + +static uint32_t tokenTagLexemeLen(TokenizerTag tag) { + switch (tag) { + case TOKEN_PLUS: + case TOKEN_MINUS: + case TOKEN_ASTERISK: + case TOKEN_SLASH: + case TOKEN_PERCENT: + case TOKEN_AMPERSAND: + case TOKEN_CARET: + case TOKEN_PIPE: + case TOKEN_ANGLE_BRACKET_LEFT: + case TOKEN_ANGLE_BRACKET_RIGHT: + return 1; + case TOKEN_PLUS_PLUS: + case TOKEN_MINUS_PERCENT: + case TOKEN_PLUS_PERCENT: + case TOKEN_MINUS_PIPE: + case TOKEN_PLUS_PIPE: + case TOKEN_ASTERISK_ASTERISK: + case TOKEN_ASTERISK_PERCENT: + case TOKEN_ASTERISK_PIPE: + case TOKEN_PIPE_PIPE: + case TOKEN_EQUAL_EQUAL: + case TOKEN_BANG_EQUAL: + case TOKEN_ANGLE_BRACKET_LEFT_EQUAL: + case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL: + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: + return 2; + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: + return 3; + case TOKEN_KEYWORD_OR: + return 2; + case TOKEN_KEYWORD_AND: + return 3; + case TOKEN_KEYWORD_ORELSE: + return 6; + case TOKEN_KEYWORD_CATCH: + return 5; + default: + return 0; + } +} + +static OperInfo operTable(TokenizerTag tok_tag) { + switch (tok_tag) { + case TOKEN_KEYWORD_OR: + return (OperInfo) { .prec = 10, .tag = AST_NODE_BOOL_OR }; + case TOKEN_KEYWORD_AND: + return (OperInfo) { .prec = 20, .tag = AST_NODE_BOOL_AND }; + + case TOKEN_EQUAL_EQUAL: + return (OperInfo) { + .prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE + }; + case TOKEN_BANG_EQUAL: + return (OperInfo) { + .prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE + }; + case TOKEN_ANGLE_BRACKET_LEFT: + return (OperInfo) { + .prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE + }; + case TOKEN_ANGLE_BRACKET_RIGHT: + return (OperInfo) { + .prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE + }; + case TOKEN_ANGLE_BRACKET_LEFT_EQUAL: + return (OperInfo) { + .prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE + }; + case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL: + return (OperInfo) { + .prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE + }; + + case TOKEN_AMPERSAND: + return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_AND }; + case TOKEN_CARET: + return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_XOR }; + case TOKEN_PIPE: + return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_OR }; + case TOKEN_KEYWORD_ORELSE: + return (OperInfo) { .prec = 40, .tag = AST_NODE_ORELSE }; + case TOKEN_KEYWORD_CATCH: + return (OperInfo) { .prec = 40, .tag = AST_NODE_CATCH }; + + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: + return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL }; + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: + return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL_SAT }; + case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: + return (OperInfo) { .prec = 50, .tag = AST_NODE_SHR }; + + case TOKEN_PLUS: + return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD }; + case TOKEN_MINUS: + return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB }; + case TOKEN_PLUS_PLUS: + return (OperInfo) { .prec = 60, .tag = AST_NODE_ARRAY_CAT }; + case TOKEN_PLUS_PERCENT: + return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_WRAP }; + case TOKEN_MINUS_PERCENT: + return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_WRAP }; + case TOKEN_PLUS_PIPE: + return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_SAT }; + case TOKEN_MINUS_PIPE: + return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_SAT }; + + case TOKEN_PIPE_PIPE: + return (OperInfo) { .prec = 70, .tag = AST_NODE_MERGE_ERROR_SETS }; + case TOKEN_ASTERISK: + return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL }; + case TOKEN_SLASH: + return (OperInfo) { .prec = 70, .tag = AST_NODE_DIV }; + case TOKEN_PERCENT: + return (OperInfo) { .prec = 70, .tag = AST_NODE_MOD }; + case TOKEN_ASTERISK_ASTERISK: + return (OperInfo) { .prec = 70, .tag = AST_NODE_ARRAY_MULT }; + case TOKEN_ASTERISK_PERCENT: + return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_WRAP }; + case TOKEN_ASTERISK_PIPE: + return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_SAT }; + + default: + return (OperInfo) { .prec = -1, .tag = AST_NODE_ROOT }; + } +} + +static AstNodeIndex parsePrefixExpr(Parser* p) { + AstNodeTag tag; + switch (p->token_tags[p->tok_i]) { + case TOKEN_BANG: + tag = AST_NODE_BOOL_NOT; + break; + case TOKEN_MINUS: + tag = AST_NODE_NEGATION; + break; + case TOKEN_TILDE: + tag = AST_NODE_BIT_NOT; + break; + case TOKEN_MINUS_PERCENT: + tag = AST_NODE_NEGATION_WRAP; + break; + case TOKEN_AMPERSAND: + tag = AST_NODE_ADDRESS_OF; + break; + case TOKEN_KEYWORD_TRY: + tag = AST_NODE_TRY; + break; + default: + return parsePrimaryExpr(p); + } + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = tag, + .main_token = nextToken(p), + .data = { + .lhs = parsePrefixExpr(p), + .rhs = 0, + }, + }); +} + +static AstNodeIndex parseTypeExpr(Parser* p) { + const TokenizerTag tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKEN_QUESTION_MARK: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_OPTIONAL_TYPE, + .main_token = nextToken(p), + .data = { .lhs = parseTypeExpr(p), .rhs = 0 }, + }); + case TOKEN_KEYWORD_ANYFRAME: + fail(p, "unsupported type expression"); + case TOKEN_ASTERISK: { + const AstTokenIndex asterisk = nextToken(p); + const PtrModifiers mods = parsePtrModifiers(p); + const AstNodeIndex elem_type = parseTypeExpr(p); + return makePtrTypeNode(p, asterisk, 0, mods, elem_type); + } + case TOKEN_ASTERISK_ASTERISK: { + const AstTokenIndex asterisk = nextToken(p); + const PtrModifiers mods = parsePtrModifiers(p); + const AstNodeIndex elem_type = parseTypeExpr(p); + if (elem_type == 0) { + fail(p, "expected type expression"); + } + const AstNodeIndex inner + = makePtrTypeNode(p, asterisk, 0, mods, elem_type); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_ALIGNED, + .main_token = asterisk, + .data = { .lhs = 0, .rhs = inner }, + }); + } + case TOKEN_L_BRACKET: { + const AstTokenIndex lbracket = nextToken(p); + if (p->token_tags[p->tok_i] == TOKEN_ASTERISK) { + // [*] many-item pointer, [*c] C pointer, [*:s] sentinel + p->tok_i++; // consume * + AstNodeIndex sentinel = 0; + if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER) { + // Check for 'c' modifier: [*c] + const char c = p->source[p->token_starts[p->tok_i]]; + if (c == 'c' + && p->token_starts[p->tok_i + 1] + - p->token_starts[p->tok_i] + <= 2) { + p->tok_i++; // consume 'c' + } + } else if (eatToken(p, TOKEN_COLON) != null_token) { + sentinel = expectExpr(p); + } + expectToken(p, TOKEN_R_BRACKET); + const PtrModifiers mods = parsePtrModifiers(p); + const AstNodeIndex elem_type = parseTypeExpr(p); + return makePtrTypeNode(p, lbracket, sentinel, mods, elem_type); + } + const AstNodeIndex len_expr = parseExpr(p); + const AstNodeIndex sentinel + = eatToken(p, TOKEN_COLON) != null_token ? expectExpr(p) : 0; + expectToken(p, TOKEN_R_BRACKET); + if (len_expr == 0) { + // Slice type: []T or [:s]T + const PtrModifiers mods = parsePtrModifiers(p); + const AstNodeIndex elem_type = parseTypeExpr(p); + if (mods.bit_range_start != 0) { + fail(p, "invalid_bit_range"); + } + return makePtrTypeNode(p, lbracket, sentinel, mods, elem_type); + } + // Array type: [N]T or [N:s]T + const AstNodeIndex elem_type = parseTypeExpr(p); + if (sentinel == 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ARRAY_TYPE, + .main_token = lbracket, + .data = { .lhs = len_expr, .rhs = elem_type }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ARRAY_TYPE_SENTINEL, + .main_token = lbracket, + .data = { + .lhs = len_expr, + .rhs = addExtra(p, + (AstNodeIndex[]) { sentinel, elem_type }, 2), + }, + }); + } + case TOKEN_KEYWORD_IF: { + // if-type-expr: uses parseTypeExpr for branches instead of parseExpr + const AstTokenIndex if_token = nextToken(p); + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex condition = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + parsePtrPayload(p); + const AstNodeIndex then_expr = parseTypeExpr(p); + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF_SIMPLE, + .main_token = if_token, + .data = { .lhs = condition, .rhs = then_expr }, + }); + parsePayload(p); + const AstNodeIndex else_expr = parseTypeExpr(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF, + .main_token = if_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { then_expr, else_expr }, 2), + }, + }); + } + case TOKEN_KEYWORD_FOR: { + // for-type-expr: uses parseTypeExpr for body instead of parseExpr + const AstTokenIndex for_token = nextToken(p); + const uint32_t scratch_top2 = p->scratch.len; + const uint32_t inputs = forPrefix(p); + const AstNodeIndex body = parseTypeExpr(p); + bool has_else = false; + if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) { + parsePayload(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, body); + const AstNodeIndex else_expr = parseTypeExpr(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr); + has_else = true; + } else if (inputs == 1) { + p->scratch.len = scratch_top2; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR_SIMPLE, + .main_token = for_token, + .data = { + .lhs = p->scratch.arr[scratch_top2], + .rhs = body, + }, + }); + } else { + SLICE_APPEND(AstNodeIndex, &p->scratch, body); + } + const uint32_t total = p->scratch.len - scratch_top2; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top2], total); + p->scratch.len = scratch_top2; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR, + .main_token = for_token, + .data = { + .lhs = span.start, + .rhs = ((uint32_t)inputs & 0x7FFFFFFF) + | (has_else ? (1u << 31) : 0), + }, + }); + } + case TOKEN_KEYWORD_WHILE: { + // while-type-expr: uses parseTypeExpr for body instead of parseExpr + const AstTokenIndex while_token = nextToken(p); + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex condition = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + parsePtrPayload(p); + const AstNodeIndex cont_expr + = eatToken(p, TOKEN_COLON) != null_token ? expectExpr(p) : 0; + const AstNodeIndex body = parseTypeExpr(p); + if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) { + parsePayload(p); + const AstNodeIndex else_expr = parseTypeExpr(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE, + .main_token = while_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { cont_expr, body, else_expr }, 3), + }, + }); + } + if (cont_expr != 0) + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_CONT, + .main_token = while_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { cont_expr, body }, 2), + }, + }); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_SIMPLE, + .main_token = while_token, + .data = { .lhs = condition, .rhs = body }, + }); + } + default: + return parseErrorUnionExpr(p); + } + return 0; // tcc +} + +static AstNodeIndex makePtrTypeNode(Parser* p, AstTokenIndex main_token, + AstNodeIndex sentinel, PtrModifiers mods, AstNodeIndex elem_type) { + if (mods.bit_range_start != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_BIT_RANGE, + .main_token = main_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { OPT(sentinel), mods.align_node, + OPT(mods.addrspace_node), mods.bit_range_start, + mods.bit_range_end }, + 5), + .rhs = elem_type, + }, + }); + } + if (mods.addrspace_node != 0 || (sentinel != 0 && mods.align_node != 0)) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE, + .main_token = main_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { OPT(sentinel), + OPT(mods.align_node), + OPT(mods.addrspace_node) }, + 3), + .rhs = elem_type, + }, + }); + } + if (sentinel != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_SENTINEL, + .main_token = main_token, + .data = { .lhs = sentinel, .rhs = elem_type }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_ALIGNED, + .main_token = main_token, + .data = { .lhs = mods.align_node, .rhs = elem_type }, + }); +} + +static AstNodeIndex parsePrimaryExpr(Parser* p) { + switch (p->token_tags[p->tok_i]) { + case TOKEN_KEYWORD_ASM: + return parseAsmExpr(p); + case TOKEN_KEYWORD_IF: + return parseIfExpr(p); + case TOKEN_KEYWORD_BREAK: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_BREAK, + .main_token = nextToken(p), + .data = { + .lhs = parseBreakLabel(p), + .rhs = parseExpr(p), + }, + }); + case TOKEN_KEYWORD_CONTINUE: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_CONTINUE, + .main_token = nextToken(p), + .data = { + .lhs = parseBreakLabel(p), + .rhs = parseExpr(p), + }, + }); + case TOKEN_KEYWORD_COMPTIME: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = nextToken(p), + .data = { .lhs = expectExpr(p), .rhs = 0 }, + }); + case TOKEN_KEYWORD_NOSUSPEND: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_NOSUSPEND, + .main_token = nextToken(p), + .data = { .lhs = expectExpr(p), .rhs = 0 }, + }); + case TOKEN_KEYWORD_RESUME: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_RESUME, + .main_token = nextToken(p), + .data = { .lhs = expectExpr(p), .rhs = 0 }, + }); + case TOKEN_KEYWORD_RETURN: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_RETURN, + .main_token = nextToken(p), + .data = { .lhs = parseExpr(p), .rhs = 0 }, + }); + case TOKEN_IDENTIFIER: + if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) { + switch (p->token_tags[p->tok_i + 2]) { + case TOKEN_KEYWORD_INLINE: + p->tok_i += 3; + if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_FOR) + return parseForExpr(p); + if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_WHILE) + return parseWhileExpr(p); + fail(p, "expected for or while after inline"); + return 0; // tcc + case TOKEN_KEYWORD_FOR: + p->tok_i += 2; + return parseForExpr(p); + case TOKEN_KEYWORD_WHILE: + p->tok_i += 2; + return parseWhileExpr(p); + case TOKEN_L_BRACE: + p->tok_i += 2; + return parseBlock(p); + default: + return parseCurlySuffixExpr(p); + } + } else { + return parseCurlySuffixExpr(p); + } + case TOKEN_KEYWORD_WHILE: + return parseWhileExpr(p); + case TOKEN_KEYWORD_FOR: + return parseForExpr(p); + case TOKEN_KEYWORD_INLINE: + p->tok_i++; + if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_FOR) + return parseForExpr(p); + if (p->token_tags[p->tok_i] == TOKEN_KEYWORD_WHILE) + return parseWhileExpr(p); + fail(p, "parsePrimaryExpr: inline without for/while"); + return 0; // tcc + case TOKEN_L_BRACE: + return parseBlock(p); + default: + return parseCurlySuffixExpr(p); + } + + return 0; // tcc +} + +static AstNodeIndex parseIfExpr(Parser* p) { + const AstTokenIndex if_token = eatToken(p, TOKEN_KEYWORD_IF); + if (if_token == null_token) + return null_node; + + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex condition = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + parsePtrPayload(p); + + const AstNodeIndex then_expr = expectExpr(p); + + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF_SIMPLE, + .main_token = if_token, + .data = { .lhs = condition, .rhs = then_expr }, + }); + } + + parsePayload(p); + const AstNodeIndex else_expr = expectExpr(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IF, + .main_token = if_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { then_expr, else_expr }, 2), + }, + }); +} + +static AstNodeIndex parseBlock(Parser* p) { + const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE); + if (lbrace == null_token) + return null_node; + + const uint32_t scratch_top = p->scratch.len; + + while (1) { + if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) + break; + + // "const AstNodeIndex statement" once tinycc supports typeof_unqual + // (C23) + AstNodeIndex statement = expectStatement(p, true); + if (statement == 0) + break; + SLICE_APPEND(AstNodeIndex, &p->scratch, statement); + } + expectToken(p, TOKEN_R_BRACE); + const uint32_t statements_len = p->scratch.len - scratch_top; + p->scratch.len = scratch_top; + const bool semicolon = statements_len != 0 + && (p->token_tags[p->tok_i - 2] == TOKEN_SEMICOLON); + switch (statements_len) { + case 0: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_BLOCK_TWO, + .main_token = lbrace, + .data = { + .lhs = 0, + .rhs = 0, + }, + }); + case 1: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, + .main_token = lbrace, + .data = { + .lhs = p->scratch.arr[scratch_top], + .rhs = 0, + }, + }); + case 2: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, + .main_token = lbrace, + .data = { + .lhs = p->scratch.arr[scratch_top], + .rhs = p->scratch.arr[scratch_top + 1], + }, + }); + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], statements_len); + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = semicolon ? AST_NODE_BLOCK_SEMICOLON : AST_NODE_BLOCK, + .main_token = lbrace, + .data = { + .lhs = span.start, + .rhs = span.end, + }, + }); + } + + return 0; +} + +// forPrefix parses the for prefix: (expr, expr, ...) |captures|. +// Returns the number of input expressions. The inputs are appended +// to the scratch buffer. +static uint32_t forPrefix(Parser* p) { + const uint32_t start = p->scratch.len; + expectToken(p, TOKEN_L_PAREN); + + while (true) { + AstNodeIndex input = expectExpr(p); + if (eatToken(p, TOKEN_ELLIPSIS2) != null_token) { + const AstTokenIndex ellipsis = p->tok_i - 1; + const AstNodeIndex end = parseExpr(p); + input = addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FOR_RANGE, + .main_token = ellipsis, + .data = { .lhs = input, .rhs = end }, + }); + } + SLICE_APPEND(AstNodeIndex, &p->scratch, input); + if (p->token_tags[p->tok_i] == TOKEN_COMMA) { + p->tok_i++; + if (eatToken(p, TOKEN_R_PAREN) != null_token) + break; + continue; + } + expectToken(p, TOKEN_R_PAREN); + break; + } + const uint32_t inputs = p->scratch.len - start; + + // Parse payload |a, *b, c| + if (eatToken(p, TOKEN_PIPE) == null_token) { + fail(p, "expected loop payload"); + } + { + while (true) { + eatToken(p, TOKEN_ASTERISK); + expectToken(p, TOKEN_IDENTIFIER); + if (p->token_tags[p->tok_i] == TOKEN_COMMA) { + p->tok_i++; + if (eatToken(p, TOKEN_PIPE) != null_token) + break; + continue; + } + expectToken(p, TOKEN_PIPE); + break; + } + } + return inputs; +} + +static AstNodeIndex parseWhileExpr(Parser* p) { + const AstTokenIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE); + if (while_token == null_token) + return null_node; + + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex condition = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + parsePtrPayload(p); + + const AstNodeIndex cont_expr = parseWhileContinueExpr(p); + + const AstNodeIndex body = expectExpr(p); + + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { + if (cont_expr != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_CONT, + .main_token = while_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { cont_expr, body }, 2), + }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE_SIMPLE, + .main_token = while_token, + .data = { .lhs = condition, .rhs = body }, + }); + } + + parsePayload(p); + const AstNodeIndex else_expr = expectExpr(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_WHILE, + .main_token = while_token, + .data = { + .lhs = condition, + .rhs = addExtra(p, + (AstNodeIndex[]) { OPT(cont_expr), body, else_expr }, + 3), + }, + }); +} + +static AstNodeIndex parseWhileContinueExpr(Parser* p) { + if (eatToken(p, TOKEN_COLON) == null_token) + return null_node; + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex expr = parseAssignExpr(p); + expectToken(p, TOKEN_R_PAREN); + return expr; +} + +static AstNodeIndex parseCurlySuffixExpr(Parser* p) { + const AstNodeIndex lhs = parseTypeExpr(p); + if (lhs == 0) + return null_node; + + const AstTokenIndex lbrace = eatToken(p, TOKEN_L_BRACE); + if (lbrace == null_token) + return lhs; + + return parseInitList(p, lhs, lbrace); +} + +// parseInitList parses the contents of { ... } for struct/array init. +// lhs is the type expression (0 for anonymous .{...}). +// lbrace is the lbrace token index. +static AstNodeIndex parseInitList( + Parser* p, AstNodeIndex lhs, AstTokenIndex lbrace) { + const uint32_t scratch_top = p->scratch.len; + + const AstNodeIndex field_init = parseFieldInit(p); + if (field_init != 0) { + // Struct init + SLICE_APPEND(AstNodeIndex, &p->scratch, field_init); + while (true) { + if (p->token_tags[p->tok_i] == TOKEN_COMMA) + p->tok_i++; + else if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) { + p->tok_i++; + break; + } else { + fail(p, "parseInitList: expected , or } in struct init"); + } + if (eatToken(p, TOKEN_R_BRACE) != null_token) + break; + const AstNodeIndex next = parseFieldInit(p); + assert(next != 0); + SLICE_APPEND(AstNodeIndex, &p->scratch, next); + } + const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; + const uint32_t inits_len = p->scratch.len - scratch_top; + p->scratch.len = scratch_top; + if (lhs == 0) { + // Anonymous struct init: .{...} + switch (inits_len) { + case 0: + case 1: + case 2: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma + ? AST_NODE_STRUCT_INIT_DOT_TWO_COMMA + : AST_NODE_STRUCT_INIT_DOT_TWO, + .main_token = lbrace, + .data = { + .lhs = inits_len >= 1 + ? p->scratch.arr[scratch_top] + : 0, + .rhs = inits_len >= 2 + ? p->scratch.arr[scratch_top + 1] + : 0, + }, + }); + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], inits_len); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_STRUCT_INIT_DOT_COMMA + : AST_NODE_STRUCT_INIT_DOT, + .main_token = lbrace, + .data = { .lhs = span.start, .rhs = span.end }, + }); + } + } + // Named struct init: X{...} + switch (inits_len) { + case 0: + case 1: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_STRUCT_INIT_ONE_COMMA + : AST_NODE_STRUCT_INIT_ONE, + .main_token = lbrace, + .data = { + .lhs = lhs, + .rhs = inits_len >= 1 + ? p->scratch.arr[scratch_top] + : 0, + }, + }); + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], inits_len); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_STRUCT_INIT_COMMA + : AST_NODE_STRUCT_INIT, + .main_token = lbrace, + .data = { + .lhs = lhs, + .rhs = addExtra(p, + (AstNodeIndex[]) { span.start, span.end }, 2), + }, + }); + } + } + + // Array init or empty init + while (true) { + if (eatToken(p, TOKEN_R_BRACE) != null_token) + break; + const AstNodeIndex elem = expectExpr(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, elem); + if (p->token_tags[p->tok_i] == TOKEN_COMMA) + p->tok_i++; + else if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) { + p->tok_i++; + break; + } else { + fail(p, "parseInitList: expected , or } in array init"); + } + } + + const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; + const uint32_t elems_len = p->scratch.len - scratch_top; + p->scratch.len = scratch_top; + if (lhs == 0) { + // Anonymous array init: .{a, b, ...} + switch (elems_len) { + case 0: + case 1: + case 2: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = (elems_len == 0) + ? AST_NODE_STRUCT_INIT_DOT_TWO + : (comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA + : AST_NODE_ARRAY_INIT_DOT_TWO), + .main_token = lbrace, + .data = { + .lhs = elems_len >= 1 + ? p->scratch.arr[scratch_top] + : 0, + .rhs = elems_len >= 2 + ? p->scratch.arr[scratch_top + 1] + : 0, + }, + }); + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], elems_len); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_ARRAY_INIT_DOT_COMMA + : AST_NODE_ARRAY_INIT_DOT, + .main_token = lbrace, + .data = { .lhs = span.start, .rhs = span.end }, + }); + } + } + // Named init: X{a, b, ...} + switch (elems_len) { + case 0: + // Empty init X{} — treat as struct init + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_STRUCT_INIT_ONE, + .main_token = lbrace, + .data = { .lhs = lhs, .rhs = 0 }, + }); + case 1: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_ARRAY_INIT_ONE_COMMA + : AST_NODE_ARRAY_INIT_ONE, + .main_token = lbrace, + .data = { + .lhs = lhs, + .rhs = p->scratch.arr[scratch_top], + }, + }); + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], elems_len); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_ARRAY_INIT_COMMA + : AST_NODE_ARRAY_INIT, + .main_token = lbrace, + .data = { + .lhs = lhs, + .rhs = addExtra(p, + (AstNodeIndex[]) { span.start, span.end }, 2), + }, + }); + } +} + +static AstNodeIndex parseErrorUnionExpr(Parser* p) { + const AstNodeIndex suffix_expr = parseSuffixExpr(p); + if (suffix_expr == 0) + return null_node; + + const AstNodeIndex bang = eatToken(p, TOKEN_BANG); + if (bang == null_token) + return suffix_expr; + + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ERROR_UNION, + .main_token = bang, + .data = { + .lhs = suffix_expr, + .rhs = parseTypeExpr(p), + }, + }); +} + +static AstNodeIndex parseSuffixExpr(Parser* p) { + AstNodeIndex res = parsePrimaryTypeExpr(p); + if (res == 0) + return res; + + while (true) { + const AstNodeIndex suffix_op = parseSuffixOp(p, res); + if (suffix_op != 0) { + res = suffix_op; + continue; + } + const AstTokenIndex lparen = eatToken(p, TOKEN_L_PAREN); + if (lparen == null_token) + return res; + + const uint32_t scratch_top = p->scratch.len; + while (true) { + if (eatToken(p, TOKEN_R_PAREN) != null_token) + break; + const AstNodeIndex arg = expectExpr(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, arg); + if (p->token_tags[p->tok_i] == TOKEN_COMMA) { + p->tok_i++; + continue; + } + expectToken(p, TOKEN_R_PAREN); + break; + } + + const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; + const uint32_t params_len = p->scratch.len - scratch_top; + p->scratch.len = scratch_top; + switch (params_len) { + case 0: + res = addNode( + &p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, + .main_token = lparen, + .data = { + .lhs = res, + .rhs = 0, + }, + }); + break; + case 1: + res = addNode( + &p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, + .main_token = lparen, + .data = { + .lhs = res, + .rhs = p->scratch.arr[scratch_top], + }, + }); + break; + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], params_len); + res = addNode( + &p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_CALL_COMMA : AST_NODE_CALL, + .main_token = lparen, + .data = { + .lhs = res, + .rhs = addExtra(p, (AstNodeIndex[]) { + span.start, + span.end, + }, 2), + }, + }); + break; + } + } +} + +static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { + const TokenizerTag tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKEN_CHAR_LITERAL: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_CHAR_LITERAL, + .main_token = nextToken(p), + .data = {}, + }); + case TOKEN_NUMBER_LITERAL: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_NUMBER_LITERAL, + .main_token = nextToken(p), + .data = {}, + }); + case TOKEN_KEYWORD_UNREACHABLE: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_UNREACHABLE_LITERAL, + .main_token = nextToken(p), + .data = {}, + }); + case TOKEN_KEYWORD_ANYFRAME: + fail(p, "unsupported primary type expression"); + case TOKEN_STRING_LITERAL: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_STRING_LITERAL, + .main_token = nextToken(p), + .data = {}, + }); + case TOKEN_BUILTIN: + return parseBuiltinCall(p); + case TOKEN_KEYWORD_FN: + return parseFnProto(p); + case TOKEN_KEYWORD_IF: + return parseIfExpr(p); + case TOKEN_KEYWORD_SWITCH: + return parseSwitchExpr(p); + case TOKEN_KEYWORD_EXTERN: + case TOKEN_KEYWORD_PACKED: + // extern/packed can precede struct/union/enum + switch (p->token_tags[p->tok_i + 1]) { + case TOKEN_KEYWORD_STRUCT: + case TOKEN_KEYWORD_UNION: + case TOKEN_KEYWORD_ENUM: + p->tok_i++; // consume extern/packed + return parseContainerDeclAuto(p); + default: + fail(p, "unsupported primary type expression"); + } + case TOKEN_KEYWORD_STRUCT: + case TOKEN_KEYWORD_OPAQUE: + case TOKEN_KEYWORD_ENUM: + case TOKEN_KEYWORD_UNION: + return parseContainerDeclAuto(p); + case TOKEN_KEYWORD_COMPTIME: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = nextToken(p), + .data = { .lhs = parseTypeExpr(p), .rhs = 0 }, + }); + case TOKEN_MULTILINE_STRING_LITERAL_LINE: { + const AstTokenIndex first = nextToken(p); + AstTokenIndex last = first; + while (p->token_tags[p->tok_i] == TOKEN_MULTILINE_STRING_LITERAL_LINE) + last = nextToken(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_MULTILINE_STRING_LITERAL, + .main_token = first, + .data = { .lhs = first, .rhs = last }, + }); + } + case TOKEN_IDENTIFIER: + if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) { + switch (p->token_tags[p->tok_i + 2]) { + case TOKEN_L_BRACE: { + // Labeled block: label: { ... } + nextToken(p); // consume label + nextToken(p); // consume ':' + return parseBlock(p); + } + case TOKEN_KEYWORD_WHILE: + return parseLabeledStatement(p); + case TOKEN_KEYWORD_FOR: + return parseLabeledStatement(p); + default: + break; + } + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_IDENTIFIER, + .main_token = nextToken(p), + .data = {}, + }); + case TOKEN_KEYWORD_FOR: + return parseForExpr(p); + case TOKEN_KEYWORD_WHILE: + return parseWhileExpr(p); + case TOKEN_KEYWORD_INLINE: + case TOKEN_PERIOD: + switch (p->token_tags[p->tok_i + 1]) { + case TOKEN_IDENTIFIER: { + const AstTokenIndex dot = nextToken(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ENUM_LITERAL, + .main_token = nextToken(p), + .data = { .lhs = dot, .rhs = 0 }, + }); + } + case TOKEN_L_BRACE: { + // Anonymous struct/array init: .{ ... } + const AstTokenIndex lbrace = p->tok_i + 1; + p->tok_i = lbrace + 1; + return parseInitList(p, null_node, lbrace); + } + default: + fail(p, "unsupported period suffix"); + } + return 0; // tcc + case TOKEN_KEYWORD_ERROR: + switch (p->token_tags[p->tok_i + 1]) { + case TOKEN_PERIOD: { + const AstTokenIndex error_token = nextToken(p); + const AstTokenIndex dot = nextToken(p); + const AstTokenIndex value = expectToken(p, TOKEN_IDENTIFIER); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ERROR_VALUE, + .main_token = error_token, + .data = { .lhs = dot, .rhs = value }, + }); + } + case TOKEN_L_BRACE: { + const AstTokenIndex error_token = nextToken(p); + const AstTokenIndex lbrace = nextToken(p); + while (p->token_tags[p->tok_i] != TOKEN_R_BRACE) + p->tok_i++; + const AstTokenIndex rbrace = nextToken(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ERROR_SET_DECL, + .main_token = error_token, + .data = { .lhs = lbrace, .rhs = rbrace }, + }); + } + default: { + const AstTokenIndex main_token = nextToken(p); + const AstTokenIndex period = eatToken(p, TOKEN_PERIOD); + if (period == null_token) { + fail(p, "expected '.'"); + } + const AstTokenIndex identifier = eatToken(p, TOKEN_IDENTIFIER); + if (identifier == null_token) { + fail(p, "expected identifier"); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ERROR_VALUE, + .main_token = main_token, + .data = { .lhs = period, .rhs = identifier }, + }); + } + } + case TOKEN_L_PAREN: { + const AstTokenIndex lparen = nextToken(p); + const AstNodeIndex inner = expectExpr(p); + const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_GROUPED_EXPRESSION, + .main_token = lparen, + .data = { .lhs = inner, .rhs = rparen }, + }); + } + default: + return null_node; + } +} + +static AstNodeIndex parseSwitchExpr(Parser* p) { + const AstTokenIndex switch_token = eatToken(p, TOKEN_KEYWORD_SWITCH); + if (switch_token == null_token) + return null_node; + + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex operand = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + expectToken(p, TOKEN_L_BRACE); + + const AstSubRange span = parseSwitchProngList(p); + const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? AST_NODE_SWITCH_COMMA : AST_NODE_SWITCH, + .main_token = switch_token, + .data = { + .lhs = operand, + .rhs = addExtra(p, + (AstNodeIndex[]) { span.start, span.end }, 2), + }, + }); +} + +static AstNodeIndex parseAsmExpr(Parser* p) { + const AstTokenIndex asm_token = nextToken(p); + assert(p->token_tags[asm_token] == TOKEN_KEYWORD_ASM); + eatToken(p, TOKEN_KEYWORD_VOLATILE); + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex template = expectExpr(p); + + // Simple asm: asm("...") + if (eatToken(p, TOKEN_R_PAREN) != null_token) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASM_SIMPLE, + .main_token = asm_token, + .data = { .lhs = template, .rhs = p->tok_i - 1 }, + }); + } + + // Complex asm with outputs, inputs, clobbers + expectToken(p, TOKEN_COLON); + + const uint32_t scratch_top = p->scratch.len; + + // Parse outputs + while (true) { + const AstNodeIndex output = parseAsmOutputItem(p); + if (output == 0) + break; + SLICE_APPEND(AstNodeIndex, &p->scratch, output); + if (eatToken(p, TOKEN_COMMA) == null_token) + break; + } + + // Parse inputs (after second colon) + if (eatToken(p, TOKEN_COLON) != null_token) { + while (true) { + const AstNodeIndex input = parseAsmInputItem(p); + if (input == 0) + break; + SLICE_APPEND(AstNodeIndex, &p->scratch, input); + if (eatToken(p, TOKEN_COMMA) == null_token) + break; + } + } + + // Parse clobbers (after third colon) + if (eatToken(p, TOKEN_COLON) != null_token) { + if (p->token_tags[p->tok_i] == TOKEN_STRING_LITERAL) { + // Legacy clobber format: "str1", "str2", ... + // Produces asm_legacy node + while (p->token_tags[p->tok_i] == TOKEN_STRING_LITERAL) { + p->tok_i++; + if (eatToken(p, TOKEN_COMMA) == null_token) + break; + } + const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); + const uint32_t items_len = p->scratch.len - scratch_top; + const AstSubRange items_span + = listToSpan(p, &p->scratch.arr[scratch_top], items_len); + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASM_LEGACY, + .main_token = asm_token, + .data = { + .lhs = template, + .rhs = addExtra(p, + (AstNodeIndex[]) { items_span.start, + items_span.end, rparen }, + 3), + }, + }); + } + // New clobber format: expression (e.g. .{ .clobber = true }) + AstNodeIndex clobbers = 0; + if (p->token_tags[p->tok_i] != TOKEN_R_PAREN) + clobbers = expectExpr(p); + + const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); + const uint32_t items_len = p->scratch.len - scratch_top; + const AstSubRange items_span + = listToSpan(p, &p->scratch.arr[scratch_top], items_len); + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASM, + .main_token = asm_token, + .data = { + .lhs = template, + .rhs = addExtra(p, + (AstNodeIndex[]) { items_span.start, + items_span.end, OPT(clobbers), rparen }, + 4), + }, + }); + } + + // No clobbers + const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); + const uint32_t items_len = p->scratch.len - scratch_top; + const AstSubRange items_span + = listToSpan(p, &p->scratch.arr[scratch_top], items_len); + p->scratch.len = scratch_top; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASM, + .main_token = asm_token, + .data = { + .lhs = template, + .rhs = addExtra(p, + (AstNodeIndex[]) { items_span.start, items_span.end, + OPT((AstNodeIndex)0), rparen }, + 4), + }, + }); +} + +static AstNodeIndex parseAsmOutputItem(Parser* p) { + if (p->token_tags[p->tok_i] == TOKEN_L_BRACKET) { + p->tok_i++; // [ + const AstTokenIndex ident = expectToken(p, TOKEN_IDENTIFIER); + expectToken(p, TOKEN_R_BRACKET); + expectToken(p, TOKEN_STRING_LITERAL); + expectToken(p, TOKEN_L_PAREN); + AstNodeIndex type_expr = 0; + if (eatToken(p, TOKEN_ARROW) != null_token) { + type_expr = parseTypeExpr(p); + } else { + expectToken(p, TOKEN_IDENTIFIER); + } + const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASM_OUTPUT, + .main_token = ident, + .data = { .lhs = type_expr, .rhs = rparen }, + }); + } + return null_node; +} + +static AstNodeIndex parseAsmInputItem(Parser* p) { + if (p->token_tags[p->tok_i] == TOKEN_L_BRACKET) { + p->tok_i++; // [ + const AstTokenIndex ident = expectToken(p, TOKEN_IDENTIFIER); + expectToken(p, TOKEN_R_BRACKET); + expectToken(p, TOKEN_STRING_LITERAL); + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex operand = expectExpr(p); + const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ASM_INPUT, + .main_token = ident, + .data = { .lhs = operand, .rhs = rparen }, + }); + } + return null_node; +} + +static AstTokenIndex parseBreakLabel(Parser* p) { + if (eatToken(p, TOKEN_COLON) == null_token) + return null_token; + return expectToken(p, TOKEN_IDENTIFIER); +} + +static AstTokenIndex parseBlockLabel(Parser* p) { + if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER + && p->token_tags[p->tok_i + 1] == TOKEN_COLON) { + const AstTokenIndex identifier = p->tok_i; + p->tok_i += 2; + return identifier; + } + return null_node; +} + +// parseFieldInit tries to parse .field_name = expr; returns 0 if not a +// field init +static AstNodeIndex parseFieldInit(Parser* p) { + if (p->token_tags[p->tok_i] == TOKEN_PERIOD + && p->token_tags[p->tok_i + 1] == TOKEN_IDENTIFIER + && p->token_tags[p->tok_i + 2] == TOKEN_EQUAL) { + p->tok_i += 3; + return expectExpr(p); + } + return null_node; +} + +static AstNodeIndex parseLinkSection(Parser* p) { + if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) == null_token) + return null_node; + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + return expr; +} + +static AstNodeIndex parseCallconv(Parser* p) { + if (eatToken(p, TOKEN_KEYWORD_CALLCONV) == null_token) + return null_node; + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + return expr; +} + +static AstNodeIndex parseAddrSpace(Parser* p) { + if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) == null_token) + return null_node; + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + return expr; +} + +static AstNodeIndex expectParamDecl(Parser* p) { + eatDocComments(p); + eatToken(p, TOKEN_KEYWORD_COMPTIME); + eatToken(p, TOKEN_KEYWORD_NOALIAS); + if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER + && p->token_tags[p->tok_i + 1] == TOKEN_COLON) + p->tok_i += 2; + if (eatToken(p, TOKEN_KEYWORD_ANYTYPE) != null_token) + return 0; + return parseTypeExpr(p); +} + +static void parsePayload(Parser* p) { + if (eatToken(p, TOKEN_PIPE) == null_token) + return; + expectToken(p, TOKEN_IDENTIFIER); + expectToken(p, TOKEN_PIPE); +} + +static void parsePtrPayload(Parser* p) { + if (eatToken(p, TOKEN_PIPE) == null_token) + return; + while (true) { + eatToken(p, TOKEN_ASTERISK); + expectToken(p, TOKEN_IDENTIFIER); + if (p->token_tags[p->tok_i] == TOKEN_COMMA) { + p->tok_i++; + continue; + } + break; + } + expectToken(p, TOKEN_PIPE); +} + +static AstNodeIndex parseSwitchProng(Parser* p) { + const uint32_t items_old_len = p->scratch.len; + + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { + while (true) { + const AstNodeIndex item = parseSwitchItem(p); + if (item == 0) + break; + SLICE_APPEND(AstNodeIndex, &p->scratch, item); + if (eatToken(p, TOKEN_COMMA) == null_token) + break; + } + if (p->scratch.len == items_old_len) + return null_node; + } + + const AstTokenIndex arrow + = expectToken(p, TOKEN_EQUAL_ANGLE_BRACKET_RIGHT); + parsePtrPayload(p); + const AstNodeIndex case_body = parseAssignExpr(p); + if (case_body == 0) { + fail(p, "expected expression"); + } + + const uint32_t items_len = p->scratch.len - items_old_len; + AstNodeIndex case_node; + switch (items_len) { + case 0: + case 1: + case_node = addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SWITCH_CASE_ONE, + .main_token = arrow, + .data = { + .lhs + = items_len >= 1 ? p->scratch.arr[items_old_len] : 0, + .rhs = case_body, + }, + }); + break; + default: { + const AstSubRange span + = listToSpan(p, &p->scratch.arr[items_old_len], items_len); + case_node = addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SWITCH_CASE, + .main_token = arrow, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { span.start, span.end }, 2), + .rhs = case_body, + }, + }); + } break; + } + + p->scratch.len = items_old_len; + return case_node; +} + +static AstNodeIndex parseSwitchItem(Parser* p) { + const AstNodeIndex expr = parseExpr(p); + if (expr == 0) + return null_node; + if (p->token_tags[p->tok_i] == TOKEN_ELLIPSIS3) { + const AstTokenIndex range_tok = nextToken(p); + const AstNodeIndex range_end = expectExpr(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SWITCH_RANGE, + .main_token = range_tok, + .data = { .lhs = expr, .rhs = range_end }, + }); + } + return expr; +} + +static PtrModifiers parsePtrModifiers(Parser* p) { + PtrModifiers mods = {}; + + while (true) { + switch (p->token_tags[p->tok_i]) { + case TOKEN_KEYWORD_CONST: + case TOKEN_KEYWORD_VOLATILE: + case TOKEN_KEYWORD_ALLOWZERO: + p->tok_i++; + continue; + case TOKEN_KEYWORD_ALIGN: + p->tok_i++; + expectToken(p, TOKEN_L_PAREN); + mods.align_node = expectExpr(p); + if (eatToken(p, TOKEN_COLON) != null_token) { + mods.bit_range_start = expectExpr(p); + expectToken(p, TOKEN_COLON); + mods.bit_range_end = expectExpr(p); + } + expectToken(p, TOKEN_R_PAREN); + continue; + case TOKEN_KEYWORD_ADDRSPACE: + p->tok_i++; + expectToken(p, TOKEN_L_PAREN); + mods.addrspace_node = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + continue; + default: + return mods; + } + } +} + +static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) { + const TokenizerTag tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKEN_L_BRACKET: { + const AstTokenIndex lbracket = nextToken(p); + const AstNodeIndex index_expr = expectExpr(p); + switch (p->token_tags[p->tok_i]) { + case TOKEN_R_BRACKET: + p->tok_i++; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_ARRAY_ACCESS, + .main_token = lbracket, + .data = { .lhs = lhs, .rhs = index_expr }, + }); + case TOKEN_ELLIPSIS2: { + p->tok_i++; // consume .. + const AstNodeIndex end_expr = parseExpr(p); + if (eatToken(p, TOKEN_COLON) != null_token) { + const AstNodeIndex sentinel = expectExpr(p); + expectToken(p, TOKEN_R_BRACKET); + // end_expr 0 means "no end" — encode as ~0 for + // OptionalIndex.none + const AstNodeIndex opt_end + = end_expr == 0 ? ~(AstNodeIndex)0 : end_expr; + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SLICE_SENTINEL, + .main_token = lbracket, + .data = { + .lhs = lhs, + .rhs = addExtra(p, + (AstNodeIndex[]) { + index_expr, opt_end, sentinel }, + 3), + }, + }); + } + expectToken(p, TOKEN_R_BRACKET); + if (end_expr == 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SLICE_OPEN, + .main_token = lbracket, + .data = { .lhs = lhs, .rhs = index_expr }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_SLICE, + .main_token = lbracket, + .data = { + .lhs = lhs, + .rhs = addExtra(p, + (AstNodeIndex[]) { index_expr, end_expr }, 2), + }, + }); + } + default: + fail(p, "parseSuffixOp: expected ] or .. after index expr"); + } + return 0; // tcc + } + case TOKEN_PERIOD_ASTERISK: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_DEREF, + .main_token = nextToken(p), + .data = { .lhs = lhs, .rhs = 0 }, + }); + case TOKEN_INVALID_PERIODASTERISKS: + fail(p, "unsupported suffix op"); + case TOKEN_PERIOD: + if (p->token_tags[p->tok_i + 1] == TOKEN_IDENTIFIER) { + const AstTokenIndex dot = nextToken(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_FIELD_ACCESS, + .main_token = dot, + .data = { .lhs = lhs, .rhs = nextToken(p) }, + }); + } + if (p->token_tags[p->tok_i + 1] == TOKEN_ASTERISK) { + const AstTokenIndex dot = nextToken(p); + nextToken(p); // consume the * + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_DEREF, + .main_token = dot, + .data = { .lhs = lhs, .rhs = 0 }, + }); + } + if (p->token_tags[p->tok_i + 1] == TOKEN_QUESTION_MARK) { + const AstTokenIndex dot = nextToken(p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_UNWRAP_OPTIONAL, + .main_token = dot, + .data = { .lhs = lhs, .rhs = nextToken(p) }, + }); + } + fail(p, "parseSuffixOp: unsupported period suffix"); + return 0; // tcc + default: + return null_node; + } +} + +static AstNodeIndex parseContainerDeclAuto(Parser* p) { + const AstTokenIndex main_token = nextToken(p); + AstNodeIndex arg_expr = null_node; + switch (p->token_tags[main_token]) { + case TOKEN_KEYWORD_OPAQUE: + break; + case TOKEN_KEYWORD_STRUCT: + case TOKEN_KEYWORD_ENUM: + if (eatToken(p, TOKEN_L_PAREN) != null_token) { + arg_expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + } + break; + case TOKEN_KEYWORD_UNION: + if (eatToken(p, TOKEN_L_PAREN) != null_token) { + if (eatToken(p, TOKEN_KEYWORD_ENUM) != null_token) { + if (eatToken(p, TOKEN_L_PAREN) != null_token) { + const AstNodeIndex enum_tag_expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + expectToken(p, TOKEN_R_PAREN); + expectToken(p, TOKEN_L_BRACE); + const Members members = parseContainerMembers(p); + const AstSubRange members_span = membersToSpan(members, p); + expectToken(p, TOKEN_R_BRACE); + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING + : AST_NODE_TAGGED_UNION_ENUM_TAG, + .main_token = main_token, + .data = { + .lhs = enum_tag_expr, + .rhs = addExtra(p, + (AstNodeIndex[]) { + members_span.start, + members_span.end }, + 2), + }, + }); + } + expectToken(p, TOKEN_R_PAREN); + expectToken(p, TOKEN_L_BRACE); + const Members members = parseContainerMembers(p); + expectToken(p, TOKEN_R_BRACE); + if (members.len <= 2) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_TAGGED_UNION_TWO_TRAILING + : AST_NODE_TAGGED_UNION_TWO, + .main_token = main_token, + .data = { .lhs = members.lhs, .rhs = members.rhs }, + }); + } + const AstSubRange span = membersToSpan(members, p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_TAGGED_UNION_TRAILING + : AST_NODE_TAGGED_UNION, + .main_token = main_token, + .data = { .lhs = span.start, .rhs = span.end }, + }); + } + arg_expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + } + break; + default: + fail(p, "parseContainerDeclAuto: unexpected token"); + } + + expectToken(p, TOKEN_L_BRACE); + const Members members = parseContainerMembers(p); + expectToken(p, TOKEN_R_BRACE); + + if (arg_expr == null_node) { + if (members.len <= 2) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_CONTAINER_DECL_TWO_TRAILING + : AST_NODE_CONTAINER_DECL_TWO, + .main_token = main_token, + .data = { .lhs = members.lhs, .rhs = members.rhs }, + }); + } + const AstSubRange span = membersToSpan(members, p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing ? AST_NODE_CONTAINER_DECL_TRAILING + : AST_NODE_CONTAINER_DECL, + .main_token = main_token, + .data = { .lhs = span.start, .rhs = span.end }, + }); + } + + const AstSubRange span = membersToSpan(members, p); + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_CONTAINER_DECL_ARG_TRAILING + : AST_NODE_CONTAINER_DECL_ARG, + .main_token = main_token, + .data = { + .lhs = arg_expr, + .rhs = addExtra(p, + (AstNodeIndex[]) { span.start, span.end }, 2), + }, + }); +} + +static AstNodeIndex parseByteAlign(Parser* p) { + if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token) + return null_node; + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + return expr; +} + +static AstSubRange parseSwitchProngList(Parser* p) { + const uint32_t scratch_top = p->scratch.len; + while (true) { + if (eatToken(p, TOKEN_R_BRACE) != null_token) + break; + eatDocComments(p); + const AstNodeIndex case_node = parseSwitchProng(p); + if (case_node == 0) + break; + SLICE_APPEND(AstNodeIndex, &p->scratch, case_node); + if (p->token_tags[p->tok_i] == TOKEN_COMMA) + p->tok_i++; + } + const uint32_t cases_len = p->scratch.len - scratch_top; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], cases_len); + p->scratch.len = scratch_top; + return span; +} + +static SmallSpan parseParamDeclList(Parser* p) { + expectToken(p, TOKEN_L_PAREN); + + const uint32_t scratch_top = p->scratch.len; + + // 0 = none, 1 = seen, 2 = nonfinal + int varargs = 0; + + while (true) { + if (eatToken(p, TOKEN_R_PAREN) != null_token) + break; + if (varargs == 1) + varargs = 2; + + if (p->token_tags[p->tok_i] == TOKEN_ELLIPSIS3) { + p->tok_i++; + if (varargs == 0) + varargs = 1; + if (eatToken(p, TOKEN_R_PAREN) != null_token) + break; + expectToken(p, TOKEN_COMMA); + continue; + } + + const AstNodeIndex type_expr = expectParamDecl(p); + if (type_expr != 0) + SLICE_APPEND(AstNodeIndex, &p->scratch, type_expr); + + if (p->token_tags[p->tok_i] == TOKEN_COMMA) { + p->tok_i++; + continue; + } + expectToken(p, TOKEN_R_PAREN); + break; + } + + if (varargs == 2) { + fail(p, "varargs_nonfinal"); + } + + const uint32_t params_len = p->scratch.len - scratch_top; + p->scratch.len = scratch_top; + switch (params_len) { + case 0: + return (SmallSpan) { + .tag = SMALL_SPAN_ZERO_OR_ONE, + .payload = { .zero_or_one = 0 }, + }; + case 1: + return (SmallSpan) { + .tag = SMALL_SPAN_ZERO_OR_ONE, + .payload = { .zero_or_one = p->scratch.arr[scratch_top] }, + }; + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], params_len); + return (SmallSpan) { + .tag = SMALL_SPAN_MULTI, + .payload = { .multi = span }, + }; + } +} + +static AstNodeIndex parseBuiltinCall(Parser* p) { + const AstTokenIndex builtin_token = assertToken(p, TOKEN_BUILTIN); + assertToken(p, TOKEN_L_PAREN); + + const uint32_t scratch_top = p->scratch.len; + + while (true) { + if (eatToken(p, TOKEN_R_PAREN) != null_token) + break; + + const AstNodeIndex param = expectExpr(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, param); + switch (p->token_tags[p->tok_i]) { + case TOKEN_COMMA: + p->tok_i++; + break; + case TOKEN_R_PAREN: + p->tok_i++; + goto end_loop; + default: + fail(p, "expected comma after arg"); + } + } +end_loop:; + + const bool comma = (p->token_tags[p->tok_i - 2] == TOKEN_COMMA); + const uint32_t params_len = p->scratch.len - scratch_top; + p->scratch.len = scratch_top; + switch (params_len) { + case 0: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_BUILTIN_CALL_TWO, + .main_token = builtin_token, + .data = { + .lhs = 0, + .rhs = 0, + }, + }); + case 1: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? + AST_NODE_BUILTIN_CALL_TWO_COMMA : + AST_NODE_BUILTIN_CALL_TWO, + .main_token = builtin_token, + .data = { + .lhs = p->scratch.arr[scratch_top], + .rhs = 0, + }, + }); + case 2: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = comma ? + AST_NODE_BUILTIN_CALL_TWO_COMMA : + AST_NODE_BUILTIN_CALL_TWO, + .main_token = builtin_token, + .data = { + .lhs = p->scratch.arr[scratch_top], + .rhs = p->scratch.arr[scratch_top+1], + }, + }); + default:; + const AstSubRange span + = listToSpan(p, &p->scratch.arr[scratch_top], params_len); + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = comma ? + AST_NODE_BUILTIN_CALL_COMMA : + AST_NODE_BUILTIN_CALL, + .main_token = builtin_token, + .data = { + .lhs = span.start, + .rhs = span.end, + }, + }); + } +} + +static AstTokenIndex eatDocComments(Parser* p) { + AstTokenIndex first = null_token; + AstTokenIndex tok; + while ((tok = eatToken(p, TOKEN_DOC_COMMENT)) != null_token) { + if (first == null_token) { + if (tok > 0 && tokensOnSameLine(p, tok - 1, tok)) { + fail(p, "same_line_doc_comment"); + } + first = tok; + } + } + return first; +} + +static bool tokensOnSameLine( + Parser* p, AstTokenIndex tok1, AstTokenIndex tok2) { + const uint32_t start1 = p->token_starts[tok1]; + const uint32_t start2 = p->token_starts[tok2]; + for (uint32_t i = start1; i < start2; i++) { + if (p->source[i] == '\n') + return false; + } + return true; +} + +static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) { + if (p->token_tags[p->tok_i] == tag) { + return nextToken(p); + } else { + return null_token; + } +} + +static AstTokenIndex assertToken(Parser* p, TokenizerTag tag) { + const AstTokenIndex token = nextToken(p); + if (p->token_tags[token] != tag) { + fail(p, "unexpected token"); + } + return token; +} + +static AstTokenIndex expectToken(Parser* p, TokenizerTag tag) { + if (p->token_tags[p->tok_i] == tag) { + return nextToken(p); + } else { + fail(p, "unexpected token"); + } + return 0; // tcc +} + +static AstNodeIndex expectSemicolon(Parser* p) { + return expectToken(p, TOKEN_SEMICOLON); +} + +static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } diff --git a/stage0/parser.h b/stage0/parser.h new file mode 100644 index 0000000000..448194d8c9 --- /dev/null +++ b/stage0/parser.h @@ -0,0 +1,44 @@ +// parser.h +#ifndef _ZIG0_PARSE_H__ +#define _ZIG0_PARSE_H__ + +#include "ast.h" +#include "common.h" +#include +#include +#include +#include + +typedef struct { + const char* source; + uint32_t source_len; + + TokenizerTag* token_tags; + AstIndex* token_starts; + uint32_t tokens_len; + + AstTokenIndex tok_i; + + AstNodeList nodes; + AstNodeIndexSlice extra_data; + AstNodeIndexSlice scratch; + jmp_buf error_jmp; + char* err_buf; +} Parser; + +#define PARSE_ERR_BUF_SIZE 200 + +_Noreturn static inline void fail(Parser* p, const char* msg) { + size_t len = strlen(msg); + if (len >= PARSE_ERR_BUF_SIZE) + len = PARSE_ERR_BUF_SIZE - 1; + memcpy(p->err_buf, msg, len); + p->err_buf[len] = '\0'; + longjmp(p->error_jmp, 1); +} + +Parser* parserInit(const char* source, uint32_t len); +void parserDeinit(Parser* parser); +void parseRoot(Parser* parser); + +#endif diff --git a/stage0/parser_test.zig b/stage0/parser_test.zig new file mode 100644 index 0000000000..134d65aae4 --- /dev/null +++ b/stage0/parser_test.zig @@ -0,0 +1,7021 @@ +const std = @import("std"); +const mem = std.mem; +const print = std.debug.print; +const io = std.io; +const maxInt = std.math.maxInt; + +test "zig fmt: remove extra whitespace at start and end of file with comment between" { + try testTransform( + \\ + \\ + \\// hello + \\ + \\ + , + \\// hello + \\ + ); +} + +test "zig fmt: tuple struct" { + try testCanonical( + \\const T = struct { + \\ /// doc comment on tuple field + \\ comptime comptime u32, + \\ /// another doc comment on tuple field + \\ *u32 = 1, + \\ // needs to be wrapped in parentheses to not be parsed as a function decl + \\ (fn () void) align(1), + \\}; + \\ + ); +} + +test "zig fmt: preserves clobbers in inline asm with stray comma" { + try testTransform( + \\fn foo() void { + \\ asm volatile ("" + \\ : [_] "" (-> type), + \\ : + \\ : "clobber" + \\ ); + \\ asm volatile ("" + \\ : + \\ : [_] "" (type), + \\ : "clobber" + \\ ); + \\} + \\ + , + \\fn foo() void { + \\ asm volatile ("" + \\ : [_] "" (-> type), + \\ : + \\ : .{ .clobber = true } + \\ ); + \\ asm volatile ("" + \\ : + \\ : [_] "" (type), + \\ : .{ .clobber = true } + \\ ); + \\} + \\ + ); +} + +test "zig fmt: remove trailing comma at the end of assembly clobber" { + try testTransform( + \\fn foo() void { + \\ asm volatile ("" + \\ : [_] "" (-> type), + \\ : + \\ : "clobber1", "clobber2", + \\ ); + \\} + \\ + , + \\fn foo() void { + \\ asm volatile ("" + \\ : [_] "" (-> type), + \\ : + \\ : .{ .clobber1 = true, .clobber2 = true } + \\ ); + \\} + \\ + ); +} + +test "zig fmt: respect line breaks in struct field value declaration" { + try testCanonical( + \\const Foo = struct { + \\ bar: u32 = + \\ 42, + \\ bar: u32 = + \\ // a comment + \\ 42, + \\ bar: u32 = + \\ 42, + \\ // a comment + \\ bar: []const u8 = + \\ \\ foo + \\ \\ bar + \\ \\ baz + \\ , + \\ bar: u32 = + \\ blk: { + \\ break :blk 42; + \\ }, + \\}; + \\ + ); +} + +test "zig fmt: respect line breaks before functions" { + try testCanonical( + \\const std = @import("std"); + \\ + \\inline fn foo() void {} + \\ + \\noinline fn foo() void {} + \\ + \\export fn foo() void {} + \\ + \\extern fn foo() void; + \\ + \\extern "foo" fn foo() void; + \\ + ); +} + +test "zig fmt: rewrite callconv(.@\"inline\") to the inline keyword" { + try testTransform( + \\fn foo() callconv(.@"inline") void {} + \\const bar: @import("std").builtin.CallingConvention = .@"inline"; + \\fn foo() callconv(bar) void {} + \\ + , + \\inline fn foo() void {} + \\const bar: @import("std").builtin.CallingConvention = .@"inline"; + \\fn foo() callconv(bar) void {} + \\ + ); +} + +test "zig fmt: simple top level comptime block" { + try testCanonical( + \\// line comment + \\comptime {} + \\ + ); +} + +test "zig fmt: two spaced line comments before decl" { + try testCanonical( + \\// line comment + \\ + \\// another + \\comptime {} + \\ + ); +} + +test "zig fmt: respect line breaks after var declarations" { + try testCanonical( + \\const crc = + \\ lookup_tables[0][p[7]] ^ + \\ lookup_tables[1][p[6]] ^ + \\ lookup_tables[2][p[5]] ^ + \\ lookup_tables[3][p[4]] ^ + \\ lookup_tables[4][@as(u8, self.crc >> 24)] ^ + \\ lookup_tables[5][@as(u8, self.crc >> 16)] ^ + \\ lookup_tables[6][@as(u8, self.crc >> 8)] ^ + \\ lookup_tables[7][@as(u8, self.crc >> 0)]; + \\ + ); +} + +test "zig fmt: multiline string mixed with comments" { + try testCanonical( + \\const s1 = + \\ //\\one + \\ \\two) + \\ \\three + \\; + \\const s2 = + \\ \\one + \\ \\two) + \\ //\\three + \\; + \\const s3 = + \\ \\one + \\ //\\two) + \\ \\three + \\; + \\const s4 = + \\ \\one + \\ //\\two + \\ \\three + \\ //\\four + \\ \\five + \\; + \\const a = + \\ 1; + \\ + ); +} + +test "zig fmt: empty file" { + try testCanonical( + \\ + ); +} + +test "zig fmt: file ends in comment" { + try testTransform( + \\ //foobar + , + \\//foobar + \\ + ); +} + +test "zig fmt: file ends in multi line comment" { + try testTransform( + \\ \\foobar + , + \\\\foobar + \\ + ); +} + +test "zig fmt: file ends in comment after var decl" { + try testTransform( + \\const x = 42; + \\ //foobar + , + \\const x = 42; + \\//foobar + \\ + ); +} + +test "zig fmt: if statement" { + try testCanonical( + \\test "" { + \\ if (optional()) |some| + \\ bar = some.foo(); + \\} + \\ + ); +} + +test "zig fmt: top-level fields" { + try testCanonical( + \\a: did_you_know, + \\b: all_files_are, + \\structs: ?x, + \\ + ); +} + +test "zig fmt: top-level tuple function call type" { + try testCanonical( + \\foo() + \\ + ); +} + +test "zig fmt: top-level enum missing 'const name ='" { + try testError( + \\enum(u32) + \\ + , &[_]Error{.expected_token}); +} + +test "zig fmt: top-level for/while loop" { + try testCanonical( + \\for (foo) |_| foo + \\ + ); + try testCanonical( + \\while (foo) |_| foo + \\ + ); +} + +test "zig fmt: top-level bare asterisk+identifier" { + try testCanonical( + \\*x + \\ + ); +} + +test "zig fmt: top-level bare asterisk+asterisk+identifier" { + try testCanonical( + \\**x + \\ + ); +} + +test "zig fmt: C style containers" { + try testError( + \\struct Foo { + \\ a: u32, + \\}; + , &[_]Error{ + .c_style_container, + .zig_style_container, + }); + try testError( + \\test { + \\ struct Foo { + \\ a: u32, + \\ }; + \\} + , &[_]Error{ + .c_style_container, + .zig_style_container, + }); +} + +test "zig fmt: decl between fields" { + try testError( + \\const S = struct { + \\ const foo = 2; + \\ const bar = 2; + \\ const baz = 2; + \\ a: usize, + \\ const foo1 = 2; + \\ const bar1 = 2; + \\ const baz1 = 2; + \\ b: usize, + \\}; + , &[_]Error{ + .decl_between_fields, + .previous_field, + .next_field, + }); +} + +test "zig fmt: errdefer with payload" { + try testCanonical( + \\pub fn main() anyerror!void { + \\ errdefer |a| x += 1; + \\ errdefer |a| {} + \\ errdefer |a| { + \\ x += 1; + \\ } + \\} + \\ + ); +} + +test "zig fmt: nosuspend block" { + try testCanonical( + \\pub fn main() anyerror!void { + \\ nosuspend { + \\ var foo: Foo = .{ .bar = 42 }; + \\ } + \\} + \\ + ); +} + +test "zig fmt: container declaration, single line" { + try testCanonical( + \\const X = struct { foo: i32 }; + \\const X = struct { foo: i32, bar: i32 }; + \\const X = struct { foo: i32 = 1, bar: i32 = 2 }; + \\const X = struct { foo: i32 align(4), bar: i32 align(4) }; + \\const X = struct { foo: i32 align(4) = 1, bar: i32 align(4) = 2 }; + \\ + ); +} + +test "zig fmt: container declaration, one item, multi line trailing comma" { + try testCanonical( + \\test "" { + \\ comptime { + \\ const X = struct { + \\ x: i32, + \\ }; + \\ } + \\} + \\ + ); +} + +test "zig fmt: container declaration, no trailing comma on separate line" { + try testTransform( + \\test "" { + \\ comptime { + \\ const X = struct { + \\ x: i32 + \\ }; + \\ } + \\} + \\ + , + \\test "" { + \\ comptime { + \\ const X = struct { x: i32 }; + \\ } + \\} + \\ + ); +} + +test "zig fmt: container declaration, line break, no trailing comma" { + try testTransform( + \\const X = struct { + \\ foo: i32, bar: i8 }; + , + \\const X = struct { foo: i32, bar: i8 }; + \\ + ); +} + +test "zig fmt: container declaration, transform trailing comma" { + try testTransform( + \\const X = struct { + \\ foo: i32, bar: i8, }; + , + \\const X = struct { + \\ foo: i32, + \\ bar: i8, + \\}; + \\ + ); +} + +test "zig fmt: container declaration, comment, add trailing comma" { + try testTransform( + \\const X = struct { + \\ foo: i32, // foo + \\ bar: i8 + \\}; + , + \\const X = struct { + \\ foo: i32, // foo + \\ bar: i8, + \\}; + \\ + ); + try testTransform( + \\const X = struct { + \\ foo: i32 // foo + \\}; + , + \\const X = struct { + \\ foo: i32, // foo + \\}; + \\ + ); +} + +test "zig fmt: container declaration, multiline string, add trailing comma" { + try testTransform( + \\const X = struct { + \\ foo: []const u8 = + \\ \\ foo + \\ , + \\ bar: i8 + \\}; + , + \\const X = struct { + \\ foo: []const u8 = + \\ \\ foo + \\ , + \\ bar: i8, + \\}; + \\ + ); +} + +test "zig fmt: container declaration, doc comment on member, add trailing comma" { + try testTransform( + \\pub const Pos = struct { + \\ /// X-axis. + \\ x: u32, + \\ /// Y-axis. + \\ y: u32 + \\}; + , + \\pub const Pos = struct { + \\ /// X-axis. + \\ x: u32, + \\ /// Y-axis. + \\ y: u32, + \\}; + \\ + ); +} + +test "zig fmt: remove empty lines at start/end of container decl" { + try testTransform( + \\const X = struct { + \\ + \\ foo: i32, + \\ + \\ bar: i8, + \\ + \\}; + \\ + , + \\const X = struct { + \\ foo: i32, + \\ + \\ bar: i8, + \\}; + \\ + ); +} + +test "zig fmt: remove empty lines at start/end of block" { + try testTransform( + \\test { + \\ + \\ if (foo) { + \\ foo(); + \\ } + \\ + \\} + \\ + , + \\test { + \\ if (foo) { + \\ foo(); + \\ } + \\} + \\ + ); +} + +test "zig fmt: allow empty line before comment at start of block" { + try testCanonical( + \\test { + \\ + \\ // foo + \\ const x = 42; + \\} + \\ + ); +} + +test "zig fmt: trailing comma in fn parameter list" { + try testCanonical( + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) align(8) i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) addrspace(.generic) i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) linksection(".text") i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) callconv(.c) i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) align(8) linksection(".text") i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) align(8) callconv(.c) i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) align(8) linksection(".text") callconv(.c) i32 {} + \\pub fn f( + \\ a: i32, + \\ b: i32, + \\) linksection(".text") callconv(.c) i32 {} + \\ + ); +} + +test "zig fmt: comptime struct field" { + try testCanonical( + \\const Foo = struct { + \\ a: i32, + \\ comptime b: i32 = 1234, + \\}; + \\ + ); +} + +test "zig fmt: break from block" { + try testCanonical( + \\const a = blk: { + \\ break :blk 42; + \\}; + \\const b = blk: { + \\ break :blk; + \\}; + \\const c = { + \\ break 42; + \\}; + \\const d = { + \\ break; + \\}; + \\ + ); +} + +test "zig fmt: grouped expressions (parentheses)" { + try testCanonical( + \\const r = (x + y) * (a + b); + \\ + ); +} + +test "zig fmt: c pointer type" { + try testCanonical( + \\pub extern fn repro() [*c]const u8; + \\ + ); +} + +test "zig fmt: builtin call with trailing comma" { + try testCanonical( + \\pub fn main() void { + \\ @breakpoint(); + \\ _ = @intFromBool(a); + \\ _ = @call( + \\ a, + \\ b, + \\ c, + \\ ); + \\} + \\ + ); +} + +test "zig fmt: asm expression with comptime content" { + try testTransform( + \\comptime { + \\ asm ("foo" ++ "bar"); + \\} + \\pub fn main() void { + \\ asm volatile ("foo" ++ "bar"); + \\ asm volatile ("foo" ++ "bar" + \\ : [_] "" (x), + \\ ); + \\ asm volatile ("foo" ++ "bar" + \\ : [_] "" (x), + \\ : [_] "" (y), + \\ ); + \\ asm volatile ("foo" ++ "bar" + \\ : [_] "" (x), + \\ : [_] "" (y), + \\ : "h", "e", "l", "l", "o" + \\ ); + \\} + \\ + , + \\comptime { + \\ asm ("foo" ++ "bar"); + \\} + \\pub fn main() void { + \\ asm volatile ("foo" ++ "bar"); + \\ asm volatile ("foo" ++ "bar" + \\ : [_] "" (x), + \\ ); + \\ asm volatile ("foo" ++ "bar" + \\ : [_] "" (x), + \\ : [_] "" (y), + \\ ); + \\ asm volatile ("foo" ++ "bar" + \\ : [_] "" (x), + \\ : [_] "" (y), + \\ : .{ .h = true, .e = true, .l = true, .l = true, .o = true } + \\ ); + \\} + \\ + ); +} + +test "zig fmt: array types last token" { + try testCanonical( + \\test { + \\ const x = [40]u32; + \\} + \\ + \\test { + \\ const x = [40:0]u32; + \\} + \\ + ); +} + +test "zig fmt: sentinel-terminated array type" { + try testCanonical( + \\pub fn cStrToPrefixedFileW(s: [*:0]const u8) ![PATH_MAX_WIDE:0]u16 { + \\ return sliceToPrefixedFileW(mem.toSliceConst(u8, s)); + \\} + \\ + ); +} + +test "zig fmt: sentinel-terminated slice type" { + try testCanonical( + \\pub fn toSlice(self: Buffer) [:0]u8 { + \\ return self.list.toSlice()[0..self.len()]; + \\} + \\ + ); +} + +test "zig fmt: pointer-to-one with modifiers" { + try testCanonical( + \\const x: *u32 = undefined; + \\const y: *allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const z: *allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: pointer-to-many with modifiers" { + try testCanonical( + \\const x: [*]u32 = undefined; + \\const y: [*]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const z: [*]allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: sentinel pointer with modifiers" { + try testCanonical( + \\const x: [*:42]u32 = undefined; + \\const y: [*:42]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const y: [*:42]allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: c pointer with modifiers" { + try testCanonical( + \\const x: [*c]u32 = undefined; + \\const y: [*c]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const z: [*c]allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: slice with modifiers" { + try testCanonical( + \\const x: []u32 = undefined; + \\const y: []allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: sentinel slice with modifiers" { + try testCanonical( + \\const x: [:42]u32 = undefined; + \\const y: [:42]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: anon literal in array" { + try testCanonical( + \\var arr: [2]Foo = .{ + \\ .{ .a = 2 }, + \\ .{ .b = 3 }, + \\}; + \\ + ); +} + +test "zig fmt: alignment in anonymous literal" { + try testTransform( + \\const a = .{ + \\ "U", "L", "F", + \\ "U'", + \\ "L'", + \\ "F'", + \\}; + \\ + , + \\const a = .{ + \\ "U", "L", "F", + \\ "U'", "L'", "F'", + \\}; + \\ + ); +} + +test "zig fmt: anon struct literal 0 element" { + try testCanonical( + \\test { + \\ const x = .{}; + \\} + \\ + ); +} + +test "zig fmt: anon struct literal 1 element" { + try testCanonical( + \\test { + \\ const x = .{ .a = b }; + \\} + \\ + ); +} + +test "zig fmt: anon struct literal 1 element comma" { + try testCanonical( + \\test { + \\ const x = .{ + \\ .a = b, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: anon struct literal 2 element" { + try testCanonical( + \\test { + \\ const x = .{ .a = b, .c = d }; + \\} + \\ + ); +} + +test "zig fmt: anon struct literal 2 element comma" { + try testCanonical( + \\test { + \\ const x = .{ + \\ .a = b, + \\ .c = d, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: anon struct literal 3 element" { + try testCanonical( + \\test { + \\ const x = .{ .a = b, .c = d, .e = f }; + \\} + \\ + ); +} + +test "zig fmt: anon struct literal 3 element comma" { + try testCanonical( + \\test { + \\ const x = .{ + \\ .a = b, + \\ .c = d, + \\ .e = f, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: struct literal 0 element" { + try testCanonical( + \\test { + \\ const x = X{}; + \\} + \\ + ); +} + +test "zig fmt: struct literal 1 element" { + try testCanonical( + \\test { + \\ const x = X{ .a = b }; + \\} + \\ + ); +} + +test "zig fmt: Unicode code point literal larger than u8" { + try testCanonical( + \\test { + \\ const x = X{ + \\ .a = b, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: struct literal 2 element" { + try testCanonical( + \\test { + \\ const x = X{ .a = b, .c = d }; + \\} + \\ + ); +} + +test "zig fmt: struct literal 2 element comma" { + try testCanonical( + \\test { + \\ const x = X{ + \\ .a = b, + \\ .c = d, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: struct literal 3 element" { + try testCanonical( + \\test { + \\ const x = X{ .a = b, .c = d, .e = f }; + \\} + \\ + ); +} + +test "zig fmt: struct literal 3 element comma" { + try testCanonical( + \\test { + \\ const x = X{ + \\ .a = b, + \\ .c = d, + \\ .e = f, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: anon list literal 1 element" { + try testCanonical( + \\test { + \\ const x = .{a}; + \\} + \\ + ); +} + +test "zig fmt: anon list literal 1 element comma" { + try testCanonical( + \\test { + \\ const x = .{ + \\ a, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: anon list literal 2 element" { + try testCanonical( + \\test { + \\ const x = .{ a, b }; + \\} + \\ + ); +} + +test "zig fmt: anon list literal 2 element comma" { + try testCanonical( + \\test { + \\ const x = .{ + \\ a, + \\ b, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: anon list literal 3 element" { + try testCanonical( + \\test { + \\ const x = .{ a, b, c }; + \\} + \\ + ); +} + +test "zig fmt: anon list literal 3 element comma" { + try testCanonical( + \\test { + \\ const x = .{ + \\ a, + \\ // foo + \\ b, + \\ + \\ c, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: array literal 0 element" { + try testCanonical( + \\test { + \\ const x = [_]u32{}; + \\} + \\ + ); +} + +test "zig fmt: array literal 1 element" { + try testCanonical( + \\test { + \\ const x = [_]u32{a}; + \\} + \\ + ); +} + +test "zig fmt: array literal 1 element comma" { + try testCanonical( + \\test { + \\ const x = [1]u32{ + \\ a, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: array literal 2 element" { + try testCanonical( + \\test { + \\ const x = [_]u32{ a, b }; + \\} + \\ + ); +} + +test "zig fmt: array literal 2 element comma" { + try testCanonical( + \\test { + \\ const x = [2]u32{ + \\ a, + \\ b, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: array literal 3 element" { + try testCanonical( + \\test { + \\ const x = [_]u32{ a, b, c }; + \\} + \\ + ); +} + +test "zig fmt: array literal 3 element comma" { + try testCanonical( + \\test { + \\ const x = [3]u32{ + \\ a, + \\ b, + \\ c, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: sentinel array literal 1 element" { + try testCanonical( + \\test { + \\ const x = [_:9000]u32{a}; + \\} + \\ + ); +} + +test "zig fmt: slices" { + try testCanonical( + \\const a = b[0..]; + \\const c = d[0..1]; + \\const d = f[0.. :0]; + \\const e = f[0..1 :0]; + \\ + ); +} + +test "zig fmt: slices with spaces in bounds" { + try testCanonical( + \\const a = b[0 + 0 ..]; + \\const c = d[0 + 0 .. 1]; + \\const c = d[0 + 0 .. :0]; + \\const e = f[0 .. 1 + 1 :0]; + \\ + ); +} + +test "zig fmt: block in slice expression" { + try testCanonical( + \\const a = b[{ + \\ _ = x; + \\}..]; + \\const c = d[0..{ + \\ _ = x; + \\ _ = y; + \\}]; + \\const e = f[0..1 :{ + \\ _ = x; + \\ _ = y; + \\ _ = z; + \\}]; + \\ + ); +} + +test "zig fmt: whitespace fixes" { + try testTransform("test \"\" {\r\n\tconst hi = x;\r\n}\n// zig fmt: off\ntest \"\"{\r\n\tconst a = b;}\r\n", + \\test "" { + \\ const hi = x; + \\} + \\// zig fmt: off + \\test ""{ + \\ const a = b;} + \\ + ); +} + +test "zig fmt: while else err prong with no block" { + try testCanonical( + \\test "" { + \\ const result = while (returnError()) |value| { + \\ break value; + \\ } else |err| @as(i32, 2); + \\ try expect(result == 2); + \\} + \\ + ); +} + +test "zig fmt: tagged union with enum values" { + try testCanonical( + \\const MultipleChoice2 = union(enum(u32)) { + \\ Unspecified1: i32, + \\ A: f32 = 20, + \\ Unspecified2: void, + \\ B: bool = 40, + \\ Unspecified3: i32, + \\ C: i8 = 60, + \\ Unspecified4: void, + \\ D: void = 1000, + \\ Unspecified5: i32, + \\}; + \\ + ); +} + +test "zig fmt: tagged union enum tag last token" { + try testCanonical( + \\test { + \\ const U = union(enum(u32)) {}; + \\} + \\ + \\test { + \\ const U = union(enum(u32)) { foo }; + \\} + \\ + \\test { + \\ const U = union(enum(u32)) { + \\ foo, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: allowzero pointer" { + try testCanonical( + \\const T = [*]allowzero const u8; + \\ + ); +} + +test "zig fmt: empty enum decls" { + try testCanonical( + \\const A = enum {}; + \\const B = enum(u32) {}; + \\const C = extern enum(c_int) {}; + \\const D = packed enum(u8) {}; + \\ + ); +} + +test "zig fmt: empty union decls" { + try testCanonical( + \\const A = union {}; + \\const B = union(enum) {}; + \\const C = union(Foo) {}; + \\const D = extern union {}; + \\const E = packed union {}; + \\ + ); +} + +test "zig fmt: enum literal" { + try testCanonical( + \\const x = .hi; + \\ + ); +} + +test "zig fmt: enum literal inside array literal" { + try testCanonical( + \\test "enums in arrays" { + \\ var colors = []Color{.Green}; + \\ colors = []Colors{ .Green, .Cyan }; + \\ colors = []Colors{ + \\ .Grey, + \\ .Green, + \\ .Cyan, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: character literal larger than u8" { + try testCanonical( + \\const x = '\u{01f4a9}'; + \\ + ); +} + +test "zig fmt: infix operator and then multiline string literal" { + try testCanonical( + \\const x = "" ++ + \\ \\ hi + \\; + \\ + ); +} + +test "zig fmt: infix operator and then multiline string literal over multiple lines" { + try testCanonical( + \\const x = "" ++ + \\ \\ hi0 + \\ \\ hi1 + \\ \\ hi2 + \\; + \\ + ); +} + +test "zig fmt: C pointers" { + try testCanonical( + \\const Ptr = [*c]i32; + \\ + ); +} + +test "zig fmt: threadlocal" { + try testCanonical( + \\threadlocal var x: i32 = 1234; + \\ + ); +} + +test "zig fmt: linksection" { + try testCanonical( + \\export var aoeu: u64 linksection(".text.derp") = 1234; + \\export fn _start() linksection(".text.boot") callconv(.naked) noreturn {} + \\ + ); +} + +test "zig fmt: addrspace" { + try testCanonical( + \\export var python_length: u64 align(1) addrspace(.generic); + \\export var python_color: Color addrspace(.generic) = .green; + \\export var python_legs: u0 align(8) addrspace(.generic) linksection(".python") = 0; + \\export fn python_hiss() align(8) addrspace(.generic) linksection(".python") void; + \\ + ); +} + +test "zig fmt: correctly space struct fields with doc comments" { + try testTransform( + \\pub const S = struct { + \\ /// A + \\ a: u8, + \\ /// B + \\ /// B (cont) + \\ b: u8, + \\ + \\ + \\ /// C + \\ c: u8, + \\}; + \\ + , + \\pub const S = struct { + \\ /// A + \\ a: u8, + \\ /// B + \\ /// B (cont) + \\ b: u8, + \\ + \\ /// C + \\ c: u8, + \\}; + \\ + ); +} + +test "zig fmt: doc comments on param decl" { + try testCanonical( + \\pub const Allocator = struct { + \\ shrinkFn: fn ( + \\ self: Allocator, + \\ /// Guaranteed to be the same as what was returned from most recent call to + \\ /// `allocFn`, `reallocFn`, or `shrinkFn`. + \\ old_mem: []u8, + \\ /// Guaranteed to be the same as what was returned from most recent call to + \\ /// `allocFn`, `reallocFn`, or `shrinkFn`. + \\ old_alignment: u29, + \\ /// Guaranteed to be less than or equal to `old_mem.len`. + \\ new_byte_count: usize, + \\ /// Guaranteed to be less than or equal to `old_alignment`. + \\ new_alignment: u29, + \\ ) []u8, + \\}; + \\ + ); +} + +test "zig fmt: aligned struct field" { + try testCanonical( + \\pub const S = struct { + \\ f: i32 align(32), + \\}; + \\ + ); + try testCanonical( + \\pub const S = struct { + \\ f: i32 align(32) = 1, + \\}; + \\ + ); +} + +test "zig fmt: comment to disable/enable zig fmt first" { + try testCanonical( + \\// Test trailing comma syntax + \\// zig fmt: off + \\ + \\const struct_trailing_comma = struct { x: i32, y: i32, }; + ); +} + +test "zig fmt: 'zig fmt: (off|on)' can be surrounded by arbitrary whitespace" { + try testTransform( + \\// Test trailing comma syntax + \\// zig fmt: off + \\ + \\const struct_trailing_comma = struct { x: i32, y: i32, }; + \\ + \\// zig fmt: on + , + \\// Test trailing comma syntax + \\// zig fmt: off + \\ + \\const struct_trailing_comma = struct { x: i32, y: i32, }; + \\ + \\// zig fmt: on + \\ + ); +} + +test "zig fmt: comment to disable/enable zig fmt" { + try testTransform( + \\const a = b; + \\// zig fmt: off + \\const c = d; + \\// zig fmt: on + \\const e = f; + , + \\const a = b; + \\// zig fmt: off + \\const c = d; + \\// zig fmt: on + \\const e = f; + \\ + ); +} + +test "zig fmt: line comment following 'zig fmt: off'" { + try testCanonical( + \\// zig fmt: off + \\// Test + \\const e = f; + ); +} + +test "zig fmt: doc comment following 'zig fmt: off'" { + try testCanonical( + \\// zig fmt: off + \\/// test + \\const e = f; + ); +} + +test "zig fmt: line and doc comment following 'zig fmt: off'" { + try testCanonical( + \\// zig fmt: off + \\// test 1 + \\/// test 2 + \\const e = f; + ); +} + +test "zig fmt: doc and line comment following 'zig fmt: off'" { + try testCanonical( + \\// zig fmt: off + \\/// test 1 + \\// test 2 + \\const e = f; + ); +} + +test "zig fmt: alternating 'zig fmt: off' and 'zig fmt: on'" { + try testCanonical( + \\// zig fmt: off + \\// zig fmt: on + \\// zig fmt: off + \\const e = f; + \\// zig fmt: off + \\// zig fmt: on + \\// zig fmt: off + \\const a = b; + \\// zig fmt: on + \\const c = d; + \\// zig fmt: on + \\ + ); +} + +test "zig fmt: line comment following 'zig fmt: on'" { + try testCanonical( + \\// zig fmt: off + \\const e = f; + \\// zig fmt: on + \\// test + \\const e = f; + \\ + ); +} + +test "zig fmt: doc comment following 'zig fmt: on'" { + try testCanonical( + \\// zig fmt: off + \\const e = f; + \\// zig fmt: on + \\/// test + \\const e = f; + \\ + ); +} + +test "zig fmt: line and doc comment following 'zig fmt: on'" { + try testCanonical( + \\// zig fmt: off + \\const e = f; + \\// zig fmt: on + \\// test1 + \\/// test2 + \\const e = f; + \\ + ); +} + +test "zig fmt: doc and line comment following 'zig fmt: on'" { + try testCanonical( + \\// zig fmt: off + \\const e = f; + \\// zig fmt: on + \\/// test1 + \\// test2 + \\const e = f; + \\ + ); +} + +test "zig fmt: 'zig fmt: (off|on)' works in the middle of code" { + try testTransform( + \\test "" { + \\ const x = 42; + \\ + \\ if (foobar) |y| { + \\ // zig fmt: off + \\ }// zig fmt: on + \\ + \\ const z = 420; + \\} + \\ + , + \\test "" { + \\ const x = 42; + \\ + \\ if (foobar) |y| { + \\ // zig fmt: off + \\ }// zig fmt: on + \\ + \\ const z = 420; + \\} + \\ + ); +} + +test "zig fmt: 'zig fmt: on' indentation is unchanged" { + try testCanonical( + \\fn initOptionsAndLayouts(output: *Output, context: *Context) !void { + \\ // zig fmt: off + \\ try output.main_amount.init(output, "main_amount"); errdefer optput.main_amount.deinit(); + \\ try output.main_factor.init(output, "main_factor"); errdefer optput.main_factor.deinit(); + \\ try output.view_padding.init(output, "view_padding"); errdefer optput.view_padding.deinit(); + \\ try output.outer_padding.init(output, "outer_padding"); errdefer optput.outer_padding.deinit(); + \\ // zig fmt: on + \\ + \\ // zig fmt: off + \\ try output.top.init(output, .top); errdefer optput.top.deinit(); + \\ try output.right.init(output, .right); errdefer optput.right.deinit(); + \\ try output.bottom.init(output, .bottom); errdefer optput.bottom.deinit(); + \\ try output.left.init(output, .left); errdefer optput.left.deinit(); + \\ // zig fmt: on + \\} + \\ + ); +} + +test "zig fmt: pointer of unknown length" { + try testCanonical( + \\fn foo(ptr: [*]u8) void {} + \\ + ); +} + +test "zig fmt: spaces around slice operator" { + try testCanonical( + \\var a = b[c..d]; + \\var a = b[c..d :0]; + \\var a = b[c + 1 .. d]; + \\var a = b[c + 1 ..]; + \\var a = b[c .. d + 1]; + \\var a = b[c .. d + 1 :0]; + \\var a = b[c.a..d.e]; + \\var a = b[c.a..d.e :0]; + \\ + ); +} + +test "zig fmt: 2nd arg multiline string" { + try testCanonical( + \\comptime { + \\ cases.addAsm("hello world linux x86_64", + \\ \\.text + \\ , "Hello, world!\n"); + \\} + \\ + ); + try testTransform( + \\comptime { + \\ cases.addAsm("hello world linux x86_64", + \\ \\.text + \\ , "Hello, world!\n",); + \\} + , + \\comptime { + \\ cases.addAsm( + \\ "hello world linux x86_64", + \\ \\.text + \\ , + \\ "Hello, world!\n", + \\ ); + \\} + \\ + ); +} + +test "zig fmt: 2nd arg multiline string many args" { + try testCanonical( + \\comptime { + \\ cases.addAsm("hello world linux x86_64", + \\ \\.text + \\ , "Hello, world!\n", "Hello, world!\n"); + \\} + \\ + ); +} + +test "zig fmt: final arg multiline string" { + try testCanonical( + \\comptime { + \\ cases.addAsm("hello world linux x86_64", "Hello, world!\n", + \\ \\.text + \\ ); + \\} + \\ + ); +} + +test "zig fmt: if condition wraps" { + try testTransform( + \\comptime { + \\ if (cond and + \\ cond) { + \\ return x; + \\ } + \\ while (cond and + \\ cond) { + \\ return x; + \\ } + \\ if (a == b and + \\ c) { + \\ a = b; + \\ } + \\ while (a == b and + \\ c) { + \\ a = b; + \\ } + \\ if ((cond and + \\ cond)) { + \\ return x; + \\ } + \\ while ((cond and + \\ cond)) { + \\ return x; + \\ } + \\ var a = if (a) |*f| x: { + \\ break :x &a.b; + \\ } else |err| err; + \\ var a = if (cond and + \\ cond) |*f| + \\ x: { + \\ break :x &a.b; + \\ } else |err| err; + \\} + , + \\comptime { + \\ if (cond and + \\ cond) + \\ { + \\ return x; + \\ } + \\ while (cond and + \\ cond) + \\ { + \\ return x; + \\ } + \\ if (a == b and + \\ c) + \\ { + \\ a = b; + \\ } + \\ while (a == b and + \\ c) + \\ { + \\ a = b; + \\ } + \\ if ((cond and + \\ cond)) + \\ { + \\ return x; + \\ } + \\ while ((cond and + \\ cond)) + \\ { + \\ return x; + \\ } + \\ var a = if (a) |*f| x: { + \\ break :x &a.b; + \\ } else |err| err; + \\ var a = if (cond and + \\ cond) |*f| + \\ x: { + \\ break :x &a.b; + \\ } else |err| err; + \\} + \\ + ); +} + +test "zig fmt: if condition has line break but must not wrap" { + try testCanonical( + \\comptime { + \\ if (self.user_input_options.put( + \\ name, + \\ UserInputOption{ + \\ .name = name, + \\ .used = false, + \\ }, + \\ ) catch unreachable) |*prev_value| { + \\ foo(); + \\ bar(); + \\ } + \\ if (put( + \\ a, + \\ b, + \\ )) { + \\ foo(); + \\ } + \\} + \\ + ); +} + +test "zig fmt: if condition has line break but must not wrap (no fn call comma)" { + try testCanonical( + \\comptime { + \\ if (self.user_input_options.put(name, UserInputOption{ + \\ .name = name, + \\ .used = false, + \\ }) catch unreachable) |*prev_value| { + \\ foo(); + \\ bar(); + \\ } + \\ if (put( + \\ a, + \\ b, + \\ )) { + \\ foo(); + \\ } + \\} + \\ + ); +} + +test "zig fmt: function call with multiline argument" { + try testCanonical( + \\comptime { + \\ self.user_input_options.put(name, UserInputOption{ + \\ .name = name, + \\ .used = false, + \\ }); + \\} + \\ + ); +} + +test "zig fmt: if-else with comment before else" { + try testCanonical( + \\comptime { + \\ // cexp(finite|nan +- i inf|nan) = nan + i nan + \\ if ((hx & 0x7fffffff) != 0x7f800000) { + \\ return Complex(f32).init(y - y, y - y); + \\ } // cexp(-inf +- i inf|nan) = 0 + i0 + \\ else if (hx & 0x80000000 != 0) { + \\ return Complex(f32).init(0, 0); + \\ } // cexp(+inf +- i inf|nan) = inf + i nan + \\ else { + \\ return Complex(f32).init(x, y - y); + \\ } + \\} + \\ + ); +} + +test "zig fmt: if nested" { + try testCanonical( + \\pub fn foo() void { + \\ return if ((aInt & bInt) >= 0) + \\ if (aInt < bInt) + \\ GE_LESS + \\ else if (aInt == bInt) + \\ GE_EQUAL + \\ else + \\ GE_GREATER + \\ // comment + \\ else if (aInt > bInt) + \\ GE_LESS + \\ else if (aInt == bInt) + \\ GE_EQUAL + \\ else + \\ GE_GREATER; + \\ // comment + \\} + \\ + ); +} + +test "zig fmt: respect line breaks in if-else" { + try testCanonical( + \\comptime { + \\ return if (cond) a else b; + \\ return if (cond) + \\ a + \\ else + \\ b; + \\ return if (cond) + \\ a + \\ else if (cond) + \\ b + \\ else + \\ c; + \\} + \\ + ); +} + +test "zig fmt: respect line breaks after infix operators" { + try testCanonical( + \\comptime { + \\ self.crc = + \\ lookup_tables[0][p[7]] ^ + \\ lookup_tables[1][p[6]] ^ + \\ lookup_tables[2][p[5]] ^ + \\ lookup_tables[3][p[4]] ^ + \\ lookup_tables[4][@as(u8, self.crc >> 24)] ^ + \\ lookup_tables[5][@as(u8, self.crc >> 16)] ^ + \\ lookup_tables[6][@as(u8, self.crc >> 8)] ^ + \\ lookup_tables[7][@as(u8, self.crc >> 0)]; + \\} + \\ + ); +} + +test "zig fmt: fn decl with trailing comma" { + try testTransform( + \\fn foo(a: i32, b: i32,) void {} + , + \\fn foo( + \\ a: i32, + \\ b: i32, + \\) void {} + \\ + ); +} + +test "zig fmt: enum decl with no trailing comma" { + try testTransform( + \\const StrLitKind = enum {Normal, C}; + , + \\const StrLitKind = enum { Normal, C }; + \\ + ); +} + +test "zig fmt: switch comment before prong" { + try testCanonical( + \\comptime { + \\ switch (a) { + \\ // hi + \\ 0 => {}, + \\ } + \\} + \\ + ); +} + +test "zig fmt: switch comment after prong" { + try testCanonical( + \\comptime { + \\ switch (a) { + \\ 0, + \\ // hi + \\ => {}, + \\ } + \\} + \\ + ); +} + +test "zig fmt: struct literal no trailing comma" { + try testTransform( + \\const a = foo{ .x = 1, .y = 2 }; + \\const a = foo{ .x = 1, + \\ .y = 2 }; + \\const a = foo{ .x = 1, + \\ .y = 2, }; + , + \\const a = foo{ .x = 1, .y = 2 }; + \\const a = foo{ .x = 1, .y = 2 }; + \\const a = foo{ + \\ .x = 1, + \\ .y = 2, + \\}; + \\ + ); +} + +test "zig fmt: struct literal containing a multiline expression" { + try testTransform( + \\const a = A{ .x = if (f1()) 10 else 20 }; + \\const a = A{ .x = if (f1()) 10 else 20, }; + \\const a = A{ .x = if (f1()) + \\ 10 else 20 }; + \\const a = A{ .x = if (f1()) + \\ 10 else 20,}; + \\const a = A{ .x = if (f1()) 10 else 20, .y = f2() + 100 }; + \\const a = A{ .x = if (f1()) 10 else 20, .y = f2() + 100, }; + \\const a = A{ .x = if (f1()) + \\ 10 else 20}; + \\const a = A{ .x = if (f1()) + \\ 10 else 20,}; + \\const a = A{ .x = switch(g) {0 => "ok", else => "no"} }; + \\const a = A{ .x = switch(g) {0 => "ok", else => "no"}, }; + \\ + , + \\const a = A{ .x = if (f1()) 10 else 20 }; + \\const a = A{ + \\ .x = if (f1()) 10 else 20, + \\}; + \\const a = A{ .x = if (f1()) + \\ 10 + \\else + \\ 20 }; + \\const a = A{ + \\ .x = if (f1()) + \\ 10 + \\ else + \\ 20, + \\}; + \\const a = A{ .x = if (f1()) 10 else 20, .y = f2() + 100 }; + \\const a = A{ + \\ .x = if (f1()) 10 else 20, + \\ .y = f2() + 100, + \\}; + \\const a = A{ .x = if (f1()) + \\ 10 + \\else + \\ 20 }; + \\const a = A{ + \\ .x = if (f1()) + \\ 10 + \\ else + \\ 20, + \\}; + \\const a = A{ .x = switch (g) { + \\ 0 => "ok", + \\ else => "no", + \\} }; + \\const a = A{ + \\ .x = switch (g) { + \\ 0 => "ok", + \\ else => "no", + \\ }, + \\}; + \\ + ); +} + +test "zig fmt: array literal with hint" { + try testTransform( + \\const a = []u8{ + \\ 1, 2, // + \\ 3, + \\ 4, + \\ 5, + \\ 6, + \\ 7 }; + \\const a = []u8{ + \\ 1, 2, // + \\ 3, + \\ 4, + \\ 5, + \\ 6, + \\ 7, 8 }; + \\const a = []u8{ + \\ 1, 2, // + \\ 3, + \\ 4, + \\ 5, + \\ 6, // blah + \\ 7, 8 }; + \\const a = []u8{ + \\ 1, 2, // + \\ 3, // + \\ 4, + \\ 5, + \\ 6, + \\ 7 }; + \\const a = []u8{ + \\ 1, + \\ 2, + \\ 3, 4, // + \\ 5, 6, // + \\ 7, 8, // + \\}; + , + \\const a = []u8{ + \\ 1, 2, // + \\ 3, 4, + \\ 5, 6, + \\ 7, + \\}; + \\const a = []u8{ + \\ 1, 2, // + \\ 3, 4, + \\ 5, 6, + \\ 7, 8, + \\}; + \\const a = []u8{ + \\ 1, 2, // + \\ 3, 4, + \\ 5, + \\ 6, // blah + \\ 7, + \\ 8, + \\}; + \\const a = []u8{ + \\ 1, 2, // + \\ 3, // + \\ 4, + \\ 5, + \\ 6, + \\ 7, + \\}; + \\const a = []u8{ + \\ 1, + \\ 2, + \\ 3, 4, // + \\ 5, 6, // + \\ 7, 8, // + \\}; + \\ + ); +} + +test "zig fmt: array literal vertical column alignment" { + try testTransform( + \\const a = []u8{ + \\ 1000, 200, + \\ 30, 4, + \\ 50000, 60, + \\}; + \\const a = []u8{0, 1, 2, 3, 40, + \\ 4,5,600,7, + \\ 80, + \\ 9, 10, 11, 0, 13, 14, 15,}; + \\const a = [12]u8{ + \\ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + \\const a = [12]u8{ + \\ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, }; + \\ + , + \\const a = []u8{ + \\ 1000, 200, + \\ 30, 4, + \\ 50000, 60, + \\}; + \\const a = []u8{ + \\ 0, 1, 2, 3, 40, + \\ 4, 5, 600, 7, 80, + \\ 9, 10, 11, 0, 13, + \\ 14, 15, + \\}; + \\const a = [12]u8{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + \\const a = [12]u8{ + \\ 31, + \\ 28, + \\ 31, + \\ 30, + \\ 31, + \\ 30, + \\ 31, + \\ 31, + \\ 30, + \\ 31, + \\ 30, + \\ 31, + \\}; + \\ + ); +} + +test "zig fmt: multiline string with backslash at end of line" { + try testCanonical( + \\comptime { + \\ err( + \\ \\\ + \\ ); + \\} + \\ + ); +} + +test "zig fmt: multiline string parameter in fn call with trailing comma" { + try testCanonical( + \\fn foo() void { + \\ try stdout.print( + \\ \\ZIG_CMAKE_BINARY_DIR {s} + \\ \\ZIG_C_HEADER_FILES {s} + \\ \\ZIG_DIA_GUIDS_LIB {s} + \\ \\ + \\ , + \\ std.mem.sliceTo(c.ZIG_CMAKE_BINARY_DIR, 0), + \\ std.mem.sliceTo(c.ZIG_CXX_COMPILER, 0), + \\ std.mem.sliceTo(c.ZIG_DIA_GUIDS_LIB, 0), + \\ ); + \\} + \\ + ); +} + +test "zig fmt: trailing comma on fn call" { + try testCanonical( + \\comptime { + \\ var module = try Module.create( + \\ allocator, + \\ zig_lib_dir, + \\ full_cache_dir, + \\ ); + \\} + \\ + ); +} + +test "zig fmt: multi line arguments without last comma" { + try testTransform( + \\pub fn foo( + \\ a: usize, + \\ b: usize, + \\ c: usize, + \\ d: usize + \\) usize { + \\ return a + b + c + d; + \\} + \\ + , + \\pub fn foo(a: usize, b: usize, c: usize, d: usize) usize { + \\ return a + b + c + d; + \\} + \\ + ); +} + +test "zig fmt: empty block with only comment" { + try testCanonical( + \\comptime { + \\ { + \\ // comment + \\ } + \\} + \\ + ); +} + +test "zig fmt: trailing commas on struct decl" { + try testTransform( + \\const RoundParam = struct { + \\ k: usize, s: u32, t: u32 + \\}; + \\const RoundParam = struct { + \\ k: usize, s: u32, t: u32, + \\}; + , + \\const RoundParam = struct { k: usize, s: u32, t: u32 }; + \\const RoundParam = struct { + \\ k: usize, + \\ s: u32, + \\ t: u32, + \\}; + \\ + ); +} + +test "zig fmt: extra newlines at the end" { + try testTransform( + \\const a = b; + \\ + \\ + \\ + , + \\const a = b; + \\ + ); +} + +test "zig fmt: simple asm" { + try testTransform( + \\comptime { + \\ asm volatile ( + \\ \\.globl aoeu; + \\ \\.type aoeu, @function; + \\ \\.set aoeu, derp; + \\ ); + \\ + \\ asm ("not real assembly" + \\ :[a] "x" (x),); + \\ asm ("not real assembly" + \\ :[a] "x" (->i32),:[a] "x" (1),); + \\ asm ("still not real assembly" + \\ :::"a","b",); + \\} + , + \\comptime { + \\ asm volatile ( + \\ \\.globl aoeu; + \\ \\.type aoeu, @function; + \\ \\.set aoeu, derp; + \\ ); + \\ + \\ asm ("not real assembly" + \\ : [a] "x" (x), + \\ ); + \\ asm ("not real assembly" + \\ : [a] "x" (-> i32), + \\ : [a] "x" (1), + \\ ); + \\ asm ("still not real assembly" ::: .{ .a = true, .b = true }); + \\} + \\ + ); +} + +test "zig fmt: nested struct literal with one item" { + try testCanonical( + \\const a = foo{ + \\ .item = bar{ .a = b }, + \\}; + \\ + ); +} + +test "zig fmt: switch cases trailing comma" { + try testTransform( + \\test "switch cases trailing comma"{ + \\ switch (x) { + \\ 1,2,3 => {}, + \\ 4,5, => {}, + \\ 6... 8, => {}, + \\ 9 ... + \\ 10 => {}, + \\ 11 => {}, + \\ 12, => {}, + \\ else => {}, + \\ } + \\} + , + \\test "switch cases trailing comma" { + \\ switch (x) { + \\ 1, 2, 3 => {}, + \\ 4, + \\ 5, + \\ => {}, + \\ 6...8, + \\ => {}, + \\ 9...10 => {}, + \\ 11 => {}, + \\ 12, + \\ => {}, + \\ else => {}, + \\ } + \\} + \\ + ); +} + +test "zig fmt: slice align" { + try testCanonical( + \\const A = struct { + \\ items: []align(A) T, + \\}; + \\ + ); +} + +test "zig fmt: add trailing comma to array literal" { + try testTransform( + \\comptime { + \\ return []u16{'m', 's', 'y', 's', '-' // hi + \\ }; + \\ return []u16{'m', 's', 'y', 's', + \\ '-'}; + \\ return []u16{'m', 's', 'y', 's', '-'}; + \\} + , + \\comptime { + \\ return []u16{ + \\ 'm', 's', 'y', 's', '-', // hi + \\ }; + \\ return []u16{ 'm', 's', 'y', 's', '-' }; + \\ return []u16{ 'm', 's', 'y', 's', '-' }; + \\} + \\ + ); +} + +test "zig fmt: first thing in file is line comment" { + try testCanonical( + \\// Introspection and determination of system libraries needed by zig. + \\ + \\// Introspection and determination of system libraries needed by zig. + \\ + \\const std = @import("std"); + \\ + ); +} + +test "zig fmt: line comment after doc comment" { + try testCanonical( + \\/// doc comment + \\// line comment + \\fn foo() void {} + \\ + ); +} + +test "zig fmt: bit field alignment" { + try testCanonical( + \\test { + \\ assert(@TypeOf(&blah.b) == *align(1:3:6) const u3); + \\} + \\ + ); +} + +test "zig fmt: nested switch" { + try testCanonical( + \\test { + \\ switch (state) { + \\ TermState.Start => switch (c) { + \\ '\x1b' => state = TermState.Escape, + \\ else => try out.writeByte(c), + \\ }, + \\ } + \\} + \\ + ); +} + +test "zig fmt: float literal with exponent" { + try testCanonical( + \\pub const f64_true_min = 4.94065645841246544177e-324; + \\const threshold = 0x1.a827999fcef32p+1022; + \\ + ); +} + +test "zig fmt: if-else end of comptime" { + try testCanonical( + \\comptime { + \\ if (a) { + \\ b(); + \\ } else { + \\ b(); + \\ } + \\} + \\ + ); +} + +test "zig fmt: nested blocks" { + try testCanonical( + \\comptime { + \\ { + \\ { + \\ { + \\ a(); + \\ } + \\ } + \\ } + \\} + \\ + ); +} + +test "zig fmt: block with same line comment after end brace" { + try testCanonical( + \\comptime { + \\ { + \\ b(); + \\ } // comment + \\} + \\ + ); +} + +test "zig fmt: statements with comment between" { + try testCanonical( + \\comptime { + \\ a = b; + \\ // comment + \\ a = b; + \\} + \\ + ); +} + +test "zig fmt: statements with empty line between" { + try testCanonical( + \\comptime { + \\ a = b; + \\ + \\ a = b; + \\} + \\ + ); +} + +test "zig fmt: ptr deref operator and unwrap optional operator" { + try testCanonical( + \\const a = b.*; + \\const a = b.?; + \\ + ); +} + +test "zig fmt: comment after if before another if" { + try testCanonical( + \\test "aoeu" { + \\ // comment + \\ if (x) { + \\ bar(); + \\ } + \\} + \\ + \\test "aoeu" { + \\ if (x) { + \\ foo(); + \\ } + \\ // comment + \\ if (x) { + \\ bar(); + \\ } + \\} + \\ + ); +} + +test "zig fmt: line comment between if block and else keyword" { + try testCanonical( + \\test "aoeu" { + \\ // cexp(finite|nan +- i inf|nan) = nan + i nan + \\ if ((hx & 0x7fffffff) != 0x7f800000) { + \\ return Complex(f32).init(y - y, y - y); + \\ } + \\ // cexp(-inf +- i inf|nan) = 0 + i0 + \\ else if (hx & 0x80000000 != 0) { + \\ return Complex(f32).init(0, 0); + \\ } + \\ // cexp(+inf +- i inf|nan) = inf + i nan + \\ // another comment + \\ else { + \\ return Complex(f32).init(x, y - y); + \\ } + \\} + \\ + ); +} + +test "zig fmt: same line comments in expression" { + try testCanonical( + \\test "aoeu" { + \\ const x = ( // a + \\ 0 // b + \\ ); // c + \\} + \\ + ); +} + +test "zig fmt: add comma on last switch prong" { + try testTransform( + \\test "aoeu" { + \\switch (self.init_arg_expr) { + \\ InitArg.Type => |t| { }, + \\ InitArg.None, + \\ InitArg.Enum => { } + \\} + \\ switch (self.init_arg_expr) { + \\ InitArg.Type => |t| { }, + \\ InitArg.None, + \\ InitArg.Enum => { }//line comment + \\ } + \\} + , + \\test "aoeu" { + \\ switch (self.init_arg_expr) { + \\ InitArg.Type => |t| {}, + \\ InitArg.None, InitArg.Enum => {}, + \\ } + \\ switch (self.init_arg_expr) { + \\ InitArg.Type => |t| {}, + \\ InitArg.None, InitArg.Enum => {}, //line comment + \\ } + \\} + \\ + ); +} + +test "zig fmt: same-line comment after a statement" { + try testCanonical( + \\test "" { + \\ a = b; + \\ debug.assert(H.digest_size <= H.block_size); // HMAC makes this assumption + \\ a = b; + \\} + \\ + ); +} + +test "zig fmt: same-line comment after var decl in struct" { + try testCanonical( + \\pub const vfs_cap_data = extern struct { + \\ const Data = struct {}; // when on disk. + \\}; + \\ + ); +} + +test "zig fmt: same-line comment after field decl" { + try testCanonical( + \\pub const dirent = extern struct { + \\ d_name: u8, + \\ d_name: u8, // comment 1 + \\ d_name: u8, + \\ d_name: u8, // comment 2 + \\ d_name: u8, + \\}; + \\ + ); +} + +test "zig fmt: same-line comment after switch prong" { + try testCanonical( + \\test "" { + \\ switch (err) { + \\ error.PathAlreadyExists => {}, // comment 2 + \\ else => return err, // comment 1 + \\ } + \\} + \\ + ); +} + +test "zig fmt: same-line comment after non-block if expression" { + try testCanonical( + \\comptime { + \\ if (sr > n_uword_bits - 1) // d > r + \\ return 0; + \\} + \\ + ); +} + +test "zig fmt: same-line comment on comptime expression" { + try testCanonical( + \\test "" { + \\ comptime assert(@typeInfo(T) == .int); // must pass an integer to absInt + \\} + \\ + ); +} + +test "zig fmt: switch with empty body" { + try testCanonical( + \\test "" { + \\ foo() catch |err| switch (err) {}; + \\} + \\ + ); +} + +test "zig fmt: line comments in struct initializer" { + try testCanonical( + \\fn foo() void { + \\ return Self{ + \\ .a = b, + \\ + \\ // Initialize these two fields to buffer_size so that + \\ // in `readFn` we treat the state as being able to read + \\ .start_index = buffer_size, + \\ .end_index = buffer_size, + \\ + \\ // middle + \\ + \\ .a = b, + \\ + \\ // end + \\ }; + \\} + \\ + ); +} + +test "zig fmt: first line comment in struct initializer" { + try testCanonical( + \\pub fn acquire(self: *Self) HeldLock { + \\ return HeldLock{ + \\ // guaranteed allocation elision + \\ .held = self.lock.acquire(), + \\ .value = &self.private_data, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: doc comments before struct field" { + try testCanonical( + \\pub const Allocator = struct { + \\ /// Allocate byte_count bytes and return them in a slice, with the + \\ /// slice's pointer aligned at least to alignment bytes. + \\ allocFn: fn () void, + \\}; + \\ + ); +} + +test "zig fmt: error set declaration" { + try testCanonical( + \\const E = error{ + \\ A, + \\ B, + \\ + \\ C, + \\}; + \\ + \\const Error = error{ + \\ /// no more memory + \\ OutOfMemory, + \\}; + \\ + \\const Error = error{ + \\ /// no more memory + \\ OutOfMemory, + \\ + \\ /// another + \\ Another, + \\ + \\ // end + \\}; + \\ + \\const Error = error{OutOfMemory}; + \\const Error = error{}; + \\ + \\const Error = error{ OutOfMemory, OutOfTime }; + \\ + ); +} + +test "zig fmt: union(enum(u32)) with assigned enum values" { + try testCanonical( + \\const MultipleChoice = union(enum(u32)) { + \\ A = 20, + \\ B = 40, + \\ C = 60, + \\ D = 1000, + \\}; + \\ + ); +} + +test "zig fmt: resume from suspend block" { + try testCanonical( + \\fn foo() void { + \\ suspend { + \\ resume @frame(); + \\ } + \\} + \\ + ); +} + +test "zig fmt: comments before error set decl" { + try testCanonical( + \\const UnexpectedError = error{ + \\ /// The Operating System returned an undocumented error code. + \\ Unexpected, + \\ // another + \\ Another, + \\ + \\ // in between + \\ + \\ // at end + \\}; + \\ + ); +} + +test "zig fmt: comments before switch prong" { + try testCanonical( + \\test "" { + \\ switch (err) { + \\ error.PathAlreadyExists => continue, + \\ + \\ // comment 1 + \\ + \\ // comment 2 + \\ else => return err, + \\ // at end + \\ } + \\} + \\ + ); +} + +test "zig fmt: comments before var decl in struct" { + try testCanonical( + \\pub const vfs_cap_data = extern struct { + \\ // All of these are mandated as little endian + \\ // when on disk. + \\ const Data = struct { + \\ permitted: u32, + \\ inheritable: u32, + \\ }; + \\ + \\ // in between + \\ + \\ /// All of these are mandated as little endian + \\ /// when on disk. + \\ const Data = struct { + \\ permitted: u32, + \\ inheritable: u32, + \\ }; + \\ + \\ // at end + \\}; + \\ + ); +} + +test "zig fmt: array literal with 1 item on 1 line" { + try testCanonical( + \\var s = []const u64{0} ** 25; + \\ + ); +} + +test "zig fmt: comments before global variables" { + try testCanonical( + \\/// Foo copies keys and values before they go into the map, and + \\/// frees them when they get removed. + \\pub const Foo = struct {}; + \\ + ); +} + +test "zig fmt: comments in statements" { + try testCanonical( + \\test "std" { + \\ // statement comment + \\ _ = @import("foo/bar.zig"); + \\ + \\ // middle + \\ // middle2 + \\ + \\ // end + \\} + \\ + ); +} + +test "zig fmt: comments before test decl" { + try testCanonical( + \\// top level normal comment + \\test "hi" {} + \\ + \\// middle + \\ + \\// end + \\ + ); +} + +test "zig fmt: preserve spacing" { + try testCanonical( + \\const std = @import("std"); + \\ + \\pub fn main() !void { + \\ var stdout_file = std.lol.abcd; + \\ var stdout_file = std.lol.abcd; + \\ + \\ var stdout_file = std.lol.abcd; + \\ var stdout_file = std.lol.abcd; + \\} + \\ + ); +} + +test "zig fmt: return types" { + try testCanonical( + \\pub fn main() !void {} + \\pub fn main() FooBar {} + \\pub fn main() i32 {} + \\ + ); +} + +test "zig fmt: imports" { + try testCanonical( + \\const std = @import("std"); + \\const std = @import(); + \\ + ); +} + +test "zig fmt: global declarations" { + try testCanonical( + \\const a = b; + \\pub const a = b; + \\var a = b; + \\pub var a = b; + \\const a: i32 = b; + \\pub const a: i32 = b; + \\var a: i32 = b; + \\pub var a: i32 = b; + \\extern const a: i32 = b; + \\pub extern const a: i32 = b; + \\extern var a: i32 = b; + \\pub extern var a: i32 = b; + \\extern "a" const a: i32 = b; + \\pub extern "a" const a: i32 = b; + \\extern "a" var a: i32 = b; + \\pub extern "a" var a: i32 = b; + \\ + ); +} + +test "zig fmt: extern declaration" { + try testCanonical( + \\extern var foo: c_int; + \\ + ); +} + +test "zig fmt: alignment" { + try testCanonical( + \\var foo: c_int align(1); + \\ + ); +} + +test "zig fmt: C main" { + try testCanonical( + \\fn main(argc: c_int, argv: **u8) c_int { + \\ const a = b; + \\} + \\ + ); +} + +test "zig fmt: return" { + try testCanonical( + \\fn foo(argc: c_int, argv: **u8) c_int { + \\ return 0; + \\} + \\ + \\fn bar() void { + \\ return; + \\} + \\ + ); +} + +test "zig fmt: function attributes" { + try testCanonical( + \\export fn foo() void {} + \\pub export fn foo() void {} + \\extern fn foo() void; + \\pub extern fn foo() void; + \\extern "c" fn foo() void; + \\pub extern "c" fn foo() void; + \\noinline fn foo() void {} + \\pub noinline fn foo() void {} + \\ + ); +} + +test "zig fmt: nested pointers with ** tokens" { + try testCanonical( + \\const x: *u32 = undefined; + \\const x: **u32 = undefined; + \\const x: ***u32 = undefined; + \\const x: ****u32 = undefined; + \\const x: *****u32 = undefined; + \\const x: ******u32 = undefined; + \\const x: *******u32 = undefined; + \\ + ); +} + +test "zig fmt: pointer attributes" { + try testCanonical( + \\extern fn f1(s: *align(*u8) u8) c_int; + \\extern fn f2(s: **align(1) *const *volatile u8) c_int; + \\extern fn f3(s: *align(1) const *align(1) volatile *const volatile u8) c_int; + \\extern fn f4(s: *align(1) const volatile u8) c_int; + \\extern fn f5(s: [*:0]align(1) const volatile u8) c_int; + \\ + ); +} + +test "zig fmt: slice attributes" { + try testCanonical( + \\extern fn f1(s: []align(*u8) u8) c_int; + \\extern fn f2(s: []align(1) []const []volatile u8) c_int; + \\extern fn f3(s: []align(1) const [:0]align(1) volatile []const volatile u8) c_int; + \\extern fn f4(s: []align(1) const volatile u8) c_int; + \\extern fn f5(s: [:0]align(1) const volatile u8) c_int; + \\ + ); +} + +test "zig fmt: test declaration" { + try testCanonical( + \\test "test name" { + \\ const a = 1; + \\ var b = 1; + \\} + \\ + ); +} + +test "zig fmt: destructure" { + try testCanonical( + \\comptime { + \\ var w: u8, var x: u8 = .{ 1, 2 }; + \\ w, var y: u8 = .{ 3, 4 }; + \\ var z: u8, x = .{ 5, 6 }; + \\ y, z = .{ 7, 8 }; + \\} + \\ + \\comptime { + \\ comptime var w, var x = .{ 1, 2 }; + \\ comptime w, var y = .{ 3, 4 }; + \\ comptime var z, x = .{ 5, 6 }; + \\ comptime y, z = .{ 7, 8 }; + \\} + \\ + ); +} + +test "zig fmt: infix operators" { + try testCanonical( + \\test { + \\ var i = undefined; + \\ i = 2; + \\ i *= 2; + \\ i |= 2; + \\ i ^= 2; + \\ i <<= 2; + \\ i >>= 2; + \\ i &= 2; + \\ i *= 2; + \\ i *%= 2; + \\ i -= 2; + \\ i -%= 2; + \\ i += 2; + \\ i +%= 2; + \\ i /= 2; + \\ i %= 2; + \\ _ = i == i; + \\ _ = i != i; + \\ _ = i != i; + \\ _ = i.i; + \\ _ = i || i; + \\ _ = i!i; + \\ _ = i ** i; + \\ _ = i ++ i; + \\ _ = i orelse i; + \\ _ = i % i; + \\ _ = i / i; + \\ _ = i *% i; + \\ _ = i * i; + \\ _ = i -% i; + \\ _ = i - i; + \\ _ = i +% i; + \\ _ = i + i; + \\ _ = i << i; + \\ _ = i >> i; + \\ _ = i & i; + \\ _ = i ^ i; + \\ _ = i | i; + \\ _ = i >= i; + \\ _ = i <= i; + \\ _ = i > i; + \\ _ = i < i; + \\ _ = i and i; + \\ _ = i or i; + \\} + \\ + ); +} + +test "zig fmt: precedence" { + try testCanonical( + \\test "precedence" { + \\ a!b(); + \\ (a!b)(); + \\ !a!b; + \\ !(a!b); + \\ !a{}; + \\ !(a{}); + \\ a + b{}; + \\ (a + b){}; + \\ a << b + c; + \\ (a << b) + c; + \\ a & b << c; + \\ (a & b) << c; + \\ a ^ b & c; + \\ (a ^ b) & c; + \\ a | b ^ c; + \\ (a | b) ^ c; + \\ a == b | c; + \\ (a == b) | c; + \\ a and b == c; + \\ (a and b) == c; + \\ a or b and c; + \\ (a or b) and c; + \\ (a or b) and c; + \\ a == b and c == d; + \\} + \\ + ); +} + +test "zig fmt: prefix operators" { + try testCanonical( + \\test "prefix operators" { + \\ try return --%~!&0; + \\} + \\ + ); +} + +test "zig fmt: call expression" { + try testCanonical( + \\test "test calls" { + \\ a(); + \\ a(1); + \\ a(1, 2); + \\ a(1, 2) + a(1, 2); + \\} + \\ + ); +} + +test "zig fmt: anytype type" { + try testCanonical( + \\fn print(args: anytype) @This() {} + \\ + ); +} + +test "zig fmt: functions" { + try testCanonical( + \\extern fn puts(s: *const u8) c_int; + \\extern "c" fn puts(s: *const u8) c_int; + \\export fn puts(s: *const u8) c_int; + \\inline fn puts(s: *const u8) c_int; + \\noinline fn puts(s: *const u8) c_int; + \\pub extern fn puts(s: *const u8) c_int; + \\pub extern "c" fn puts(s: *const u8) c_int; + \\pub export fn puts(s: *const u8) c_int; + \\pub inline fn puts(s: *const u8) c_int; + \\pub noinline fn puts(s: *const u8) c_int; + \\pub extern fn puts(s: *const u8) align(2 + 2) c_int; + \\pub extern "c" fn puts(s: *const u8) align(2 + 2) c_int; + \\pub export fn puts(s: *const u8) align(2 + 2) c_int; + \\pub inline fn puts(s: *const u8) align(2 + 2) c_int; + \\pub noinline fn puts(s: *const u8) align(2 + 2) c_int; + \\pub fn callInlineFn(func: fn () callconv(.@"inline") void) void { + \\ func(); + \\} + \\ + ); +} + +test "zig fmt: multiline string" { + try testCanonical( + \\test "" { + \\ const s1 = + \\ \\one + \\ \\two) + \\ \\three + \\ ; + \\ const s3 = // hi + \\ \\one + \\ \\two) + \\ \\three + \\ ; + \\} + \\ + ); +} + +test "zig fmt: multiline string with CRLF line endings" { + try testTransform("" ++ + "const s =\r\n" ++ + " \\\\one\r\n" ++ + " \\\\two)\r\n" ++ + " \\\\three\r\n" ++ + ";\r\n", + \\const s = + \\ \\one + \\ \\two) + \\ \\three + \\; + \\ + ); +} + +test "zig fmt: values" { + try testCanonical( + \\test "values" { + \\ 1; + \\ 1.0; + \\ "string"; + \\ 'c'; + \\ true; + \\ false; + \\ null; + \\ undefined; + \\ anyerror; + \\ this; + \\ unreachable; + \\} + \\ + ); +} + +test "zig fmt: indexing" { + try testCanonical( + \\test "test index" { + \\ a[0]; + \\ a[0 + 5]; + \\ a[0..]; + \\ a[0..5]; + \\ a[a[0]]; + \\ a[a[0..]]; + \\ a[a[0..5]]; + \\ a[a[0]..]; + \\ a[a[0..5]..]; + \\ a[a[0]..a[0]]; + \\ a[a[0..5]..a[0]]; + \\ a[a[0..5]..a[0..5]]; + \\} + \\ + ); +} + +test "zig fmt: struct declaration" { + try testCanonical( + \\const S = struct { + \\ const Self = @This(); + \\ f1: u8, + \\ f3: u8, + \\ + \\ f2: u8, + \\ + \\ fn method(self: *Self) Self { + \\ return self.*; + \\ } + \\}; + \\ + \\const Ps = packed struct { + \\ a: u8, + \\ b: u8, + \\ + \\ c: u8, + \\}; + \\ + \\const Ps = packed struct(u32) { + \\ a: u1, + \\ b: u2, + \\ + \\ c: u29, + \\}; + \\ + \\const Es = extern struct { + \\ a: u8, + \\ b: u8, + \\ + \\ c: u8, + \\}; + \\ + ); +} + +test "zig fmt: enum declaration" { + try testCanonical( + \\const E = enum { + \\ Ok, + \\ SomethingElse = 0, + \\}; + \\ + \\const E2 = enum(u8) { + \\ Ok, + \\ SomethingElse = 255, + \\ SomethingThird, + \\}; + \\ + \\const Ee = extern enum { + \\ Ok, + \\ SomethingElse, + \\ SomethingThird, + \\}; + \\ + \\const Ep = packed enum { + \\ Ok, + \\ SomethingElse, + \\ SomethingThird, + \\}; + \\ + ); +} + +test "zig fmt: union declaration" { + try testCanonical( + \\const U = union { + \\ Int: u8, + \\ Float: f32, + \\ None, + \\ Bool: bool, + \\}; + \\ + \\const Ue = union(enum) { + \\ Int: u8, + \\ Float: f32, + \\ None, + \\ Bool: bool, + \\}; + \\ + \\const E = enum { + \\ Int, + \\ Float, + \\ None, + \\ Bool, + \\}; + \\ + \\const Ue2 = union(E) { + \\ Int: u8, + \\ Float: f32, + \\ None, + \\ Bool: bool, + \\}; + \\ + \\const Eu = extern union { + \\ Int: u8, + \\ Float: f32, + \\ None, + \\ Bool: bool, + \\}; + \\ + ); +} + +test "zig fmt: arrays" { + try testCanonical( + \\test "test array" { + \\ const a: [2]u8 = [2]u8{ + \\ 1, + \\ 2, + \\ }; + \\ const a: [2]u8 = []u8{ + \\ 1, + \\ 2, + \\ }; + \\ const a: [0]u8 = []u8{}; + \\ const x: [4:0]u8 = undefined; + \\} + \\ + ); +} + +test "zig fmt: container initializers" { + try testCanonical( + \\const a0 = []u8{}; + \\const a1 = []u8{1}; + \\const a2 = []u8{ + \\ 1, + \\ 2, + \\ 3, + \\ 4, + \\}; + \\const s0 = S{}; + \\const s1 = S{ .a = 1 }; + \\const s2 = S{ + \\ .a = 1, + \\ .b = 2, + \\}; + \\ + ); +} + +test "zig fmt: catch" { + try testCanonical( + \\test "catch" { + \\ const a: anyerror!u8 = 0; + \\ _ = a catch return; + \\ _ = a catch + \\ return; + \\ _ = a catch |err| return; + \\ _ = a catch |err| + \\ return; + \\} + \\ + ); +} + +test "zig fmt: blocks" { + try testCanonical( + \\test "blocks" { + \\ { + \\ const a = 0; + \\ const b = 0; + \\ } + \\ + \\ blk: { + \\ const a = 0; + \\ const b = 0; + \\ } + \\ + \\ const r = blk: { + \\ const a = 0; + \\ const b = 0; + \\ }; + \\} + \\ + ); +} + +test "zig fmt: switch" { + try testCanonical( + \\test "switch" { + \\ switch (0) { + \\ 0 => {}, + \\ 1 => unreachable, + \\ 2, 3 => {}, + \\ 4...7 => {}, + \\ 1 + 4 * 3 + 22 => {}, + \\ else => { + \\ const a = 1; + \\ const b = a; + \\ }, + \\ } + \\ + \\ const res = switch (0) { + \\ 0 => 0, + \\ 1 => 2, + \\ 1 => a = 4, + \\ else => 4, + \\ }; + \\ + \\ const Union = union(enum) { + \\ Int: i64, + \\ Float: f64, + \\ }; + \\ + \\ switch (u) { + \\ Union.Int => |int| {}, + \\ Union.Float => |*float| unreachable, + \\ 1 => |a, b| unreachable, + \\ 2 => |*a, b| unreachable, + \\ } + \\} + \\ + ); + + try testTransform( + \\test { + \\ switch (x) { + \\ foo => + \\ "bar", + \\ } + \\} + \\ + , + \\test { + \\ switch (x) { + \\ foo => "bar", + \\ } + \\} + \\ + ); +} + +test "zig fmt: switch multiline string" { + try testCanonical( + \\test "switch multiline string" { + \\ const x: u32 = 0; + \\ const str = switch (x) { + \\ 1 => "one", + \\ 2 => + \\ \\ Comma after the multiline string + \\ \\ is needed + \\ , + \\ 3 => "three", + \\ else => "else", + \\ }; + \\ + \\ const Union = union(enum) { + \\ Int: i64, + \\ Float: f64, + \\ }; + \\ + \\ const str = switch (u) { + \\ Union.Int => |int| + \\ \\ Comma after the multiline string + \\ \\ is needed + \\ , + \\ Union.Float => |*float| unreachable, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: while" { + try testCanonical( + \\test "while" { + \\ while (10 < 1) unreachable; + \\ + \\ while (10 < 1) unreachable else unreachable; + \\ + \\ while (10 < 1) { + \\ unreachable; + \\ } + \\ + \\ while (10 < 1) + \\ unreachable; + \\ + \\ var i: usize = 0; + \\ while (i < 10) : (i += 1) { + \\ continue; + \\ } + \\ + \\ i = 0; + \\ while (i < 10) : (i += 1) + \\ continue; + \\ + \\ i = 0; + \\ var j: usize = 0; + \\ while (i < 10) : ({ + \\ i += 1; + \\ j += 1; + \\ }) continue; + \\ + \\ while (i < 10) : ({ + \\ i += 1; + \\ j += 1; + \\ }) { + \\ continue; + \\ } + \\ + \\ var a: ?u8 = 2; + \\ while (a) |v| : (a = null) { + \\ continue; + \\ } + \\ + \\ while (a) |v| : (a = null) + \\ unreachable; + \\ + \\ label: while (10 < 0) { + \\ unreachable; + \\ } + \\ + \\ const res = while (0 < 10) { + \\ break 7; + \\ } else { + \\ unreachable; + \\ }; + \\ + \\ const res = while (0 < 10) + \\ break 7 + \\ else + \\ unreachable; + \\ + \\ var a: anyerror!u8 = 0; + \\ while (a) |v| { + \\ a = error.Err; + \\ } else |err| { + \\ i = 1; + \\ } + \\ + \\ comptime var k: usize = 0; + \\ inline while (i < 10) : (i += 1) + \\ j += 2; + \\} + \\ + ); +} + +test "zig fmt: for" { + try testCanonical( + \\test "for" { + \\ for (a) |v| { + \\ continue; + \\ } + \\ + \\ for (a) |v| continue; + \\ + \\ for (a) |v| continue else return; + \\ + \\ for (a) |v| { + \\ continue; + \\ } else return; + \\ + \\ for (a) |v| continue else { + \\ return; + \\ } + \\ + \\ for (a) |v| + \\ continue + \\ else + \\ return; + \\ + \\ for (a) |v| + \\ continue; + \\ + \\ for (a) |*v| + \\ continue; + \\ + \\ for (a, 0..) |v, i| { + \\ continue; + \\ } + \\ + \\ for (a, 0..) |v, i| + \\ continue; + \\ + \\ for (a) |b| switch (b) { + \\ c => {}, + \\ d => {}, + \\ }; + \\ + \\ const res = for (a, 0..) |v, i| { + \\ break v; + \\ } else { + \\ unreachable; + \\ }; + \\ + \\ var num: usize = 0; + \\ inline for (a, 0..1) |v, i| { + \\ num += v; + \\ num += i; + \\ } + \\ + \\ for (a, b) | + \\ long_name, + \\ another_long_name, + \\ | { + \\ continue; + \\ } + \\} + \\ + ); + + try testTransform( + \\test "fix for" { + \\ for (a) |x| + \\ f(x) else continue; + \\} + \\ + , + \\test "fix for" { + \\ for (a) |x| + \\ f(x) + \\ else + \\ continue; + \\} + \\ + ); + + try testTransform( + \\test "fix for" { + \\ for (a, b, c,) |long, another, third,| {} + \\} + \\ + , + \\test "fix for" { + \\ for ( + \\ a, + \\ b, + \\ c, + \\ ) | + \\ long, + \\ another, + \\ third, + \\ | {} + \\} + \\ + ); +} + +test "zig fmt: for if" { + try testCanonical( + \\test { + \\ for (a) |x| if (x) f(x); + \\ + \\ for (a) |x| if (x) + \\ f(x); + \\ + \\ for (a) |x| if (x) { + \\ f(x); + \\ }; + \\ + \\ for (a) |x| + \\ if (x) + \\ f(x); + \\ + \\ for (a) |x| + \\ if (x) { + \\ f(x); + \\ }; + \\} + \\ + ); +} + +test "zig fmt: if for" { + try testCanonical( + \\test { + \\ if (a) for (x) |x| f(x); + \\ + \\ if (a) for (x) |x| + \\ f(x); + \\ + \\ if (a) for (x) |x| { + \\ f(x); + \\ }; + \\ + \\ if (a) + \\ for (x) |x| + \\ f(x); + \\ + \\ if (a) + \\ for (x) |x| { + \\ f(x); + \\ }; + \\} + \\ + ); +} + +test "zig fmt: while if" { + try testCanonical( + \\test { + \\ while (a) if (x) f(x); + \\ + \\ while (a) if (x) + \\ f(x); + \\ + \\ while (a) if (x) { + \\ f(x); + \\ }; + \\ + \\ while (a) + \\ if (x) + \\ f(x); + \\ + \\ while (a) + \\ if (x) { + \\ f(x); + \\ }; + \\} + \\ + ); +} + +test "zig fmt: if while" { + try testCanonical( + \\test { + \\ if (a) while (x) : (cont) f(x); + \\ + \\ if (a) while (x) : (cont) + \\ f(x); + \\ + \\ if (a) while (x) : (cont) { + \\ f(x); + \\ }; + \\ + \\ if (a) + \\ while (x) : (cont) + \\ f(x); + \\ + \\ if (a) + \\ while (x) : (cont) { + \\ f(x); + \\ }; + \\} + \\ + ); +} + +test "zig fmt: while for" { + try testCanonical( + \\test { + \\ while (a) for (x) |x| f(x); + \\ + \\ while (a) for (x) |x| + \\ f(x); + \\ + \\ while (a) for (x) |x| { + \\ f(x); + \\ }; + \\ + \\ while (a) + \\ for (x) |x| + \\ f(x); + \\ + \\ while (a) + \\ for (x) |x| { + \\ f(x); + \\ }; + \\} + \\ + ); +} + +test "zig fmt: for while" { + try testCanonical( + \\test { + \\ for (a) |a| while (x) |x| f(x); + \\ + \\ for (a) |a| while (x) |x| + \\ f(x); + \\ + \\ for (a) |a| while (x) |x| { + \\ f(x); + \\ }; + \\ + \\ for (a) |a| + \\ while (x) |x| + \\ f(x); + \\ + \\ for (a) |a| + \\ while (x) |x| { + \\ f(x); + \\ }; + \\} + \\ + ); +} + +test "zig fmt: if" { + try testCanonical( + \\test "if" { + \\ if (10 < 0) { + \\ unreachable; + \\ } + \\ + \\ if (10 < 0) unreachable; + \\ + \\ if (10 < 0) { + \\ unreachable; + \\ } else { + \\ const a = 20; + \\ } + \\ + \\ if (10 < 0) { + \\ unreachable; + \\ } else if (5 < 0) { + \\ unreachable; + \\ } else { + \\ const a = 20; + \\ } + \\ + \\ const is_world_broken = if (10 < 0) true else false; + \\ const some_number = 1 + if (10 < 0) 2 else 3; + \\ + \\ const a: ?u8 = 10; + \\ const b: ?u8 = null; + \\ if (a) |v| { + \\ const some = v; + \\ } else if (b) |*v| { + \\ unreachable; + \\ } else { + \\ const some = 10; + \\ } + \\ + \\ const non_null_a = if (a) |v| v else 0; + \\ + \\ const a_err: anyerror!u8 = 0; + \\ if (a_err) |v| { + \\ const p = v; + \\ } else |err| { + \\ unreachable; + \\ } + \\} + \\ + ); +} + +test "zig fmt: fix single statement if/for/while line breaks" { + try testTransform( + \\test { + \\ if (cond) a + \\ else b; + \\ + \\ if (cond) + \\ a + \\ else b; + \\ + \\ for (xs) |x| foo() + \\ else bar(); + \\ + \\ for (xs) |x| + \\ foo() + \\ else bar(); + \\ + \\ while (a) : (b) foo() + \\ else bar(); + \\ + \\ while (a) : (b) + \\ foo() + \\ else bar(); + \\} + \\ + , + \\test { + \\ if (cond) a else b; + \\ + \\ if (cond) + \\ a + \\ else + \\ b; + \\ + \\ for (xs) |x| foo() else bar(); + \\ + \\ for (xs) |x| + \\ foo() + \\ else + \\ bar(); + \\ + \\ while (a) : (b) foo() else bar(); + \\ + \\ while (a) : (b) + \\ foo() + \\ else + \\ bar(); + \\} + \\ + ); +} + +test "zig fmt: anon struct/array literal in if" { + try testCanonical( + \\test { + \\ const a = if (cond) .{ + \\ 1, 2, + \\ 3, 4, + \\ } else .{ + \\ 1, + \\ 2, + \\ 3, + \\ }; + \\ + \\ const rl_and_tag: struct { rl: ResultLoc, tag: zir.Inst.Tag } = if (any_payload_is_ref) .{ + \\ .rl = .ref, + \\ .tag = .switchbr_ref, + \\ } else .{ + \\ .rl = .none, + \\ .tag = .switchbr, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: defer" { + try testCanonical( + \\test "defer" { + \\ var i: usize = 0; + \\ defer i = 1; + \\ defer { + \\ i += 2; + \\ i *= i; + \\ } + \\ + \\ errdefer i += 3; + \\ errdefer { + \\ i += 2; + \\ i /= i; + \\ } + \\} + \\ + ); +} + +test "zig fmt: comptime" { + try testCanonical( + \\fn a() u8 { + \\ return 5; + \\} + \\ + \\fn b(comptime i: u8) u8 { + \\ return i; + \\} + \\ + \\const av = comptime a(); + \\const av2 = comptime blk: { + \\ var res = a(); + \\ res *= b(2); + \\ break :blk res; + \\}; + \\ + \\comptime { + \\ _ = a(); + \\} + \\ + \\test "comptime" { + \\ const av3 = comptime a(); + \\ const av4 = comptime blk: { + \\ var res = a(); + \\ res *= a(); + \\ break :blk res; + \\ }; + \\ + \\ comptime var i = 0; + \\ comptime { + \\ i = a(); + \\ i += b(i); + \\ } + \\} + \\ + ); +} + +test "zig fmt: fn type" { + try testCanonical( + \\fn a(i: u8) u8 { + \\ return i + 1; + \\} + \\ + \\const a: fn (u8) u8 = undefined; + \\const b: fn (u8) callconv(.naked) u8 = undefined; + \\const ap: fn (u8) u8 = a; + \\ + ); +} + +test "zig fmt: inline asm" { + try testTransform( + \\pub fn syscall1(number: usize, arg1: usize) usize { + \\ return asm volatile ("syscall" + \\ : [ret] "={rax}" (-> usize), + \\ : [number] "{rax}" (number), + \\ [arg1] "{rdi}" (arg1), + \\ : "rcx", "r11" + \\ ); + \\} + \\ + , + \\pub fn syscall1(number: usize, arg1: usize) usize { + \\ return asm volatile ("syscall" + \\ : [ret] "={rax}" (-> usize), + \\ : [number] "{rax}" (number), + \\ [arg1] "{rdi}" (arg1), + \\ : .{ .rcx = true, .r11 = true } + \\ ); + \\} + \\ + ); +} + +test "zig fmt: nosuspend" { + try testCanonical( + \\const a = nosuspend foo(); + \\ + ); +} + +test "zig fmt: Block after if" { + try testCanonical( + \\test { + \\ if (true) { + \\ const a = 0; + \\ } + \\ + \\ { + \\ const a = 0; + \\ } + \\} + \\ + ); +} + +test "zig fmt: string identifier" { + try testCanonical( + \\const @"a b" = @"c d".@"e f"; + \\fn @"g h"() void {} + \\ + ); +} + +test "zig fmt: error return" { + try testCanonical( + \\fn err() anyerror { + \\ call(); + \\ return error.InvalidArgs; + \\} + \\ + ); +} + +test "zig fmt: comptime block in container" { + try testCanonical( + \\pub fn container() type { + \\ return struct { + \\ comptime { + \\ if (false) { + \\ unreachable; + \\ } + \\ } + \\ }; + \\} + \\ + ); +} + +test "zig fmt: inline asm parameter alignment" { + try testCanonical( + \\pub fn main() void { + \\ asm volatile ( + \\ \\ foo + \\ \\ bar + \\ ); + \\ asm volatile ( + \\ \\ foo + \\ \\ bar + \\ : [_] "" (-> usize), + \\ [_] "" (-> usize), + \\ ); + \\ asm volatile ( + \\ \\ foo + \\ \\ bar + \\ : + \\ : [_] "" (0), + \\ [_] "" (0), + \\ ); + \\ asm volatile ( + \\ \\ foo + \\ \\ bar + \\ ::: .{ .a = true, .b = true }); + \\ asm volatile ( + \\ \\ foo + \\ \\ bar + \\ : [_] "" (-> usize), + \\ [_] "" (-> usize), + \\ : [_] "" (0), + \\ [_] "" (0), + \\ : .{}); + \\} + \\ + ); +} + +test "zig fmt: multiline string in array" { + try testCanonical( + \\const Foo = [][]const u8{ + \\ \\aaa + \\ , + \\ \\bbb + \\}; + \\ + \\fn bar() void { + \\ const Foo = [][]const u8{ + \\ \\aaa + \\ , + \\ \\bbb + \\ }; + \\ const Bar = [][]const u8{ // comment here + \\ \\aaa + \\ \\ + \\ , // and another comment can go here + \\ \\bbb + \\ }; + \\} + \\ + ); +} + +test "zig fmt: if type expr" { + try testCanonical( + \\const mycond = true; + \\pub fn foo() if (mycond) i32 else void { + \\ if (mycond) { + \\ return 42; + \\ } + \\} + \\ + ); +} +test "zig fmt: file ends with struct field" { + try testCanonical( + \\a: bool + \\ + ); +} + +test "zig fmt: comment after empty comment" { + try testCanonical( + \\const x = true; // + \\// + \\// + \\//a + \\ + ); +} + +test "zig fmt: line comment in array" { + try testTransform( + \\test "a" { + \\ var arr = [_]u32{ + \\ 0 + \\ // 1, + \\ // 2, + \\ }; + \\} + \\ + , + \\test "a" { + \\ var arr = [_]u32{ + \\ 0, + \\ // 1, + \\ // 2, + \\ }; + \\} + \\ + ); + try testCanonical( + \\test "a" { + \\ var arr = [_]u32{ + \\ 0, + \\ // 1, + \\ // 2, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: comment after params" { + try testTransform( + \\fn a( + \\ b: u32 + \\ // c: u32, + \\ // d: u32, + \\) void {} + \\ + , + \\fn a( + \\ b: u32, + \\ // c: u32, + \\ // d: u32, + \\) void {} + \\ + ); + try testCanonical( + \\fn a( + \\ b: u32, + \\ // c: u32, + \\ // d: u32, + \\) void {} + \\ + ); +} + +test "zig fmt: comment in array initializer/access" { + try testCanonical( + \\test "a" { + \\ var a = x{ //aa + \\ //bb + \\ }; + \\ var a = []x{ //aa + \\ //bb + \\ }; + \\ var b = [ //aa + \\ _ + \\ ]x{ //aa + \\ //bb + \\ 9, + \\ }; + \\ var c = b[ //aa + \\ 0 + \\ ]; + \\ var d = [ + \\ _ + \\ //aa + \\ : + \\ 0 + \\ ]x{ //aa + \\ //bb + \\ 9, + \\ }; + \\ var e = d[ + \\ 0 + \\ //aa + \\ ]; + \\} + \\ + ); +} + +test "zig fmt: comments at several places in struct init" { + try testTransform( + \\var bar = Bar{ + \\ .x = 10, // test + \\ .y = "test" + \\ // test + \\}; + \\ + , + \\var bar = Bar{ + \\ .x = 10, // test + \\ .y = "test", + \\ // test + \\}; + \\ + ); + + try testCanonical( + \\var bar = Bar{ // test + \\ .x = 10, // test + \\ .y = "test", + \\ // test + \\}; + \\ + ); +} + +test "zig fmt: container doc comments" { + try testCanonical( + \\//! tld 1 + \\//! tld 2 + \\//! tld 3 + \\ + \\// comment + \\ + \\/// A doc + \\const A = struct { + \\ //! A tld 1 + \\ //! A tld 2 + \\ //! A tld 3 + \\}; + \\ + \\/// B doc + \\const B = struct { + \\ //! B tld 1 + \\ //! B tld 2 + \\ //! B tld 3 + \\ + \\ /// B doc + \\ b: u32, + \\}; + \\ + \\/// C doc + \\const C = union(enum) { // comment + \\ //! C tld 1 + \\ //! C tld 2 + \\ //! C tld 3 + \\}; + \\ + \\/// D doc + \\const D = union(Foo) { + \\ //! D tld 1 + \\ //! D tld 2 + \\ //! D tld 3 + \\ + \\ /// D doc + \\ b: u32, + \\}; + \\ + ); + try testCanonical( + \\//! Top-level documentation. + \\ + \\/// This is A + \\pub const A = usize; + \\ + ); + try testCanonical( + \\//! Nothing here + \\ + ); +} + +test "zig fmt: remove newlines surrounding doc comment" { + try testTransform( + \\ + \\ + \\ + \\/// doc comment + \\ + \\fn foo() void {} + \\ + , + \\/// doc comment + \\fn foo() void {} + \\ + ); +} + +test "zig fmt: remove newlines surrounding doc comment between members" { + try testTransform( + \\f1: i32, + \\ + \\ + \\/// doc comment + \\ + \\f2: i32, + \\ + , + \\f1: i32, + \\ + \\/// doc comment + \\f2: i32, + \\ + ); +} + +test "zig fmt: remove newlines surrounding doc comment between members within container decl (1)" { + try testTransform( + \\const Foo = struct { + \\ fn foo() void {} + \\ + \\ + \\ /// doc comment + \\ + \\ + \\ fn bar() void {} + \\}; + \\ + , + \\const Foo = struct { + \\ fn foo() void {} + \\ + \\ /// doc comment + \\ fn bar() void {} + \\}; + \\ + ); +} + +test "zig fmt: remove newlines surrounding doc comment between members within container decl (2)" { + try testTransform( + \\const Foo = struct { + \\ fn foo() void {} + \\ /// doc comment 1 + \\ + \\ /// doc comment 2 + \\ + \\ fn bar() void {} + \\}; + \\ + , + \\const Foo = struct { + \\ fn foo() void {} + \\ /// doc comment 1 + \\ /// doc comment 2 + \\ fn bar() void {} + \\}; + \\ + ); +} + +test "zig fmt: remove newlines surrounding doc comment within container decl" { + try testTransform( + \\const Foo = struct { + \\ + \\ + \\ /// doc comment + \\ + \\ fn foo() void {} + \\}; + \\ + , + \\const Foo = struct { + \\ /// doc comment + \\ fn foo() void {} + \\}; + \\ + ); +} + +test "zig fmt: comptime before comptime field" { + try testError( + \\const Foo = struct { + \\ a: i32, + \\ comptime comptime b: i32 = 1234, + \\}; + \\ + , &[_]Error{ + .expected_comma_after_field, + }); +} + +test "zig fmt: invalid doc comments on comptime and test blocks" { + try testError( + \\/// This is a doc comment for a comptime block. + \\comptime {} + \\/// This is a doc comment for a test + \\test "This is my test" {} + , &[_]Error{ + .comptime_doc_comment, + .test_doc_comment, + }); +} + +test "zig fmt: comments with CRLF line endings" { + try testTransform("" ++ + "//! Top-level doc comment\r\n" ++ + "//! Continuing to another line\r\n" ++ + "\r\n" ++ + "/// Regular doc comment\r\n" ++ + "const S = struct {\r\n" ++ + " // Regular comment\r\n" ++ + " // More content\r\n" ++ + "};\r\n", + \\//! Top-level doc comment + \\//! Continuing to another line + \\ + \\/// Regular doc comment + \\const S = struct { + \\ // Regular comment + \\ // More content + \\}; + \\ + ); +} + +test "zig fmt: else comptime expr" { + try testCanonical( + \\comptime { + \\ if (true) {} else comptime foo(); + \\} + \\comptime { + \\ while (true) {} else comptime foo(); + \\} + \\comptime { + \\ for ("") |_| {} else comptime foo(); + \\} + \\ + ); +} + +test "zig fmt: invalid else branch statement" { + try testError( + \\comptime { + \\ if (true) {} else var a = 0; + \\ if (true) {} else defer {} + \\} + \\comptime { + \\ while (true) {} else var a = 0; + \\ while (true) {} else defer {} + \\} + \\comptime { + \\ for ("") |_| {} else var a = 0; + \\ for ("") |_| {} else defer {} + \\} + , &[_]Error{ + .expected_expr_or_assignment, + .expected_expr_or_assignment, + .expected_expr_or_assignment, + .expected_expr_or_assignment, + .expected_expr_or_assignment, + .expected_expr_or_assignment, + }); +} + +test "zig fmt: anytype struct field" { + try testError( + \\pub const Pointer = struct { + \\ sentinel: anytype, + \\}; + \\ + , &[_]Error{ + .expected_type_expr, + }); +} + +test "zig fmt: extern without container keyword returns error" { + try testError( + \\const container = extern {}; + \\ + , &[_]Error{ + .expected_container, + }); +} + +test "zig fmt: same line doc comment returns error" { + try testError( + \\const Foo = struct{ + \\ bar: u32, /// comment + \\ foo: u32, /// comment + \\ /// comment + \\}; + \\ + \\const a = 42; /// comment + \\ + \\extern fn foo() void; /// comment + \\ + \\/// comment + \\ + , &[_]Error{ + .same_line_doc_comment, + .same_line_doc_comment, + .unattached_doc_comment, + .same_line_doc_comment, + .same_line_doc_comment, + .unattached_doc_comment, + }); +} + +test "zig fmt: integer literals with underscore separators" { + try testTransform( + \\const + \\ x = + \\ 1_234_567 + \\ + (0b0_1-0o7_0+0xff_FF ) + 1_0; + , + \\const x = + \\ 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 1_0; + \\ + ); +} + +test "zig fmt: hex literals with underscore separators" { + try testTransform( + \\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 { + \\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000; + \\ for (c [ 1_0 .. ], 0..) |_, i| { + \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA; + \\ } + \\ return c; + \\} + \\ + \\ + , + \\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 { + \\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000; + \\ for (c[1_0..], 0..) |_, i| { + \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA; + \\ } + \\ return c; + \\} + \\ + ); +} + +test "zig fmt: decimal float literals with underscore separators" { + try testTransform( + \\pub fn main() void { + \\ const a:f64=(10.0e-0+(10.0e+0))+10_00.00_00e-2+20_00.00_10e+4; + \\ const b:f64=1_0.0--10_10.0+1_0_0.0_0+1e2; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + , + \\pub fn main() void { + \\ const a: f64 = (10.0e-0 + (10.0e+0)) + 10_00.00_00e-2 + 20_00.00_10e+4; + \\ const b: f64 = 1_0.0 - -10_10.0 + 1_0_0.0_0 + 1e2; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + \\ + ); +} + +test "zig fmt: hexadecimal float literals with underscore separators" { + try testTransform( + \\pub fn main() void { + \\ const a: f64 = (0x10.0p-0+(0x10.0p+0))+0x10_00.00_00p-8+0x00_00.00_10p+16; + \\ const b: f64 = 0x0010.0--0x00_10.0+0x10.00+0x1p4; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + , + \\pub fn main() void { + \\ const a: f64 = (0x10.0p-0 + (0x10.0p+0)) + 0x10_00.00_00p-8 + 0x00_00.00_10p+16; + \\ const b: f64 = 0x0010.0 - -0x00_10.0 + 0x10.00 + 0x1p4; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + \\ + ); +} + +test "zig fmt: C var args" { + try testCanonical( + \\pub extern "c" fn printf(format: [*:0]const u8, ...) c_int; + \\ + ); +} + +test "zig fmt: Only indent multiline string literals in function calls" { + try testCanonical( + \\test "zig fmt:" { + \\ try testTransform( + \\ \\const X = struct { + \\ \\ foo: i32, bar: i8 }; + \\ , + \\ \\const X = struct { + \\ \\ foo: i32, bar: i8 + \\ \\}; + \\ \\ + \\ ); + \\} + \\ + ); +} + +test "zig fmt: Don't add extra newline after if" { + try testCanonical( + \\pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path: []const u8) !void { + \\ if (cwd().symLink(existing_path, new_path, .{})) { + \\ return; + \\ } + \\} + \\ + ); +} + +test "zig fmt: comments in ternary ifs" { + try testCanonical( + \\const x = if (true) { + \\ 1; + \\} else if (false) + \\ // Comment + \\ 0; + \\const y = if (true) + \\ // Comment + \\ 1 + \\else + \\ // Comment + \\ 0; + \\ + \\pub extern "c" fn printf(format: [*:0]const u8, ...) c_int; + \\ + ); +} + +test "zig fmt: while statement in blockless if" { + try testCanonical( + \\pub fn main() void { + \\ const zoom_node = if (focused_node == layout_first) + \\ while (it.next()) |node| { + \\ if (!node.view.pending.float and !node.view.pending.fullscreen) break node; + \\ } else null + \\ else + \\ focused_node; + \\} + \\ + ); +} + +test "zig fmt: test comments in field access chain" { + try testCanonical( + \\pub const str = struct { + \\ pub const Thing = more.more // + \\ .more() // + \\ .more().more() // + \\ .more() // + \\ // .more() // + \\ .more() // + \\ .more(); + \\ data: Data, + \\}; + \\ + \\pub const str = struct { + \\ pub const Thing = more.more // + \\ .more() // + \\ // .more() // + \\ // .more() // + \\ // .more() // + \\ .more() // + \\ .more(); + \\ data: Data, + \\}; + \\ + \\pub const str = struct { + \\ pub const Thing = more // + \\ .more // + \\ .more() // + \\ .more(); + \\ data: Data, + \\}; + \\ + ); +} + +test "zig fmt: allow line break before field access" { + try testCanonical( + \\test { + \\ const w = foo.bar().zippy(zag).iguessthisisok(); + \\ + \\ const x = foo + \\ .bar() + \\ . // comment + \\ // comment + \\ swooop().zippy(zag) + \\ .iguessthisisok(); + \\ + \\ const y = view.output.root.server.input_manager.default_seat.wlr_seat.name; + \\ + \\ const z = view.output.root.server + \\ .input_manager // + \\ .default_seat + \\ . // comment + \\ // another comment + \\ wlr_seat.name; + \\} + \\ + ); + try testTransform( + \\test { + \\ const x = foo. + \\ bar() + \\ .zippy(zag).iguessthisisok(); + \\ + \\ const z = view.output.root.server. + \\ input_manager. + \\ default_seat.wlr_seat.name; + \\} + \\ + , + \\test { + \\ const x = foo + \\ .bar() + \\ .zippy(zag).iguessthisisok(); + \\ + \\ const z = view.output.root.server + \\ .input_manager + \\ .default_seat.wlr_seat.name; + \\} + \\ + ); +} + +test "zig fmt: Indent comma correctly after multiline string literals in arg list (trailing comma)" { + try testCanonical( + \\fn foo() void { + \\ z.display_message_dialog( + \\ *const [323:0]u8, + \\ \\Message Text + \\ \\------------ + \\ \\xxxxxxxxxxxx + \\ \\xxxxxxxxxxxx + \\ , + \\ g.GtkMessageType.GTK_MESSAGE_WARNING, + \\ null, + \\ ); + \\ + \\ z.display_message_dialog(*const [323:0]u8, + \\ \\Message Text + \\ \\------------ + \\ \\xxxxxxxxxxxx + \\ \\xxxxxxxxxxxx + \\ , g.GtkMessageType.GTK_MESSAGE_WARNING, null); + \\} + \\ + ); +} + +test "zig fmt: Control flow statement as body of blockless if" { + try testCanonical( + \\pub fn main() void { + \\ const zoom_node = if (focused_node == layout_first) + \\ if (it.next()) { + \\ if (!node.view.pending.float and !node.view.pending.fullscreen) break node; + \\ } else null + \\ else + \\ focused_node; + \\ + \\ const zoom_node = if (focused_node == layout_first) while (it.next()) |node| { + \\ if (!node.view.pending.float and !node.view.pending.fullscreen) break node; + \\ } else null else focused_node; + \\ + \\ const zoom_node = if (focused_node == layout_first) + \\ if (it.next()) { + \\ if (!node.view.pending.float and !node.view.pending.fullscreen) break node; + \\ } else null; + \\ + \\ const zoom_node = if (focused_node == layout_first) while (it.next()) |node| { + \\ if (!node.view.pending.float and !node.view.pending.fullscreen) break node; + \\ }; + \\ + \\ const zoom_node = if (focused_node == layout_first) for (nodes) |node| { + \\ break node; + \\ }; + \\ + \\ const zoom_node = if (focused_node == layout_first) switch (nodes) { + \\ 0 => 0, + \\ } else focused_node; + \\} + \\ + ); +} + +test "zig fmt: regression test for #5722" { + try testCanonical( + \\pub fn sendViewTags(self: Self) void { + \\ var it = ViewStack(View).iterator(self.output.views.first, std.math.maxInt(u32)); + \\ while (it.next()) |node| + \\ view_tags.append(node.view.current_tags) catch { + \\ c.wl_resource_post_no_memory(self.wl_resource); + \\ log.err(.river_status, "out of memory", .{}); + \\ return; + \\ }; + \\} + \\ + ); +} + +test "zig fmt: regression test for #8974" { + try testCanonical( + \\pub const VARIABLE; + \\ + ); +} + +test "zig fmt: allow trailing line comments to do manual array formatting" { + try testCanonical( + \\fn foo() void { + \\ self.code.appendSliceAssumeCapacity(&[_]u8{ + \\ 0x55, // push rbp + \\ 0x48, 0x89, 0xe5, // mov rbp, rsp + \\ 0x48, 0x81, 0xec, // sub rsp, imm32 (with reloc) + \\ }); + \\ + \\ di_buf.appendAssumeCapacity(&[_]u8{ + \\ 1, DW.TAG_compile_unit, DW.CHILDREN_no, // header + \\ DW.AT_stmt_list, DW_FORM_data4, // form value pairs + \\ DW.AT_low_pc, DW_FORM_addr, + \\ DW.AT_high_pc, DW_FORM_addr, + \\ DW.AT_name, DW_FORM_strp, + \\ DW.AT_comp_dir, DW_FORM_strp, + \\ DW.AT_producer, DW_FORM_strp, + \\ DW.AT_language, DW_FORM_data2, + \\ 0, 0, // sentinel + \\ }); + \\ + \\ self.code.appendSliceAssumeCapacity(&[_]u8{ + \\ 0x55, // push rbp + \\ 0x48, 0x89, 0xe5, // mov rbp, rsp + \\ // How do we handle this? + \\ //0x48, 0x81, 0xec, // sub rsp, imm32 (with reloc) + \\ // Here's a blank line, should that be allowed? + \\ + \\ 0x48, 0x89, 0xe5, + \\ 0x33, 0x45, + \\ // Now the comment breaks a single line -- how do we handle this? + \\ 0x88, + \\ }); + \\} + \\ + ); +} + +test "zig fmt: multiline string literals should play nice with array initializers" { + try testCanonical( + \\fn main() void { + \\ var a = .{.{.{.{.{.{.{.{ + \\ 0, + \\ }}}}}}}}; + \\ myFunc(.{ + \\ "aaaaaaa", "bbbbbb", "ccccc", + \\ "dddd", ("eee"), ("fff"), + \\ ("gggg"), + \\ // Line comment + \\ \\Multiline String Literals can be quite long + \\ , + \\ \\Multiline String Literals can be quite long + \\ \\Multiline String Literals can be quite long + \\ , + \\ \\Multiline String Literals can be quite long + \\ \\Multiline String Literals can be quite long + \\ \\Multiline String Literals can be quite long + \\ \\Multiline String Literals can be quite long + \\ , + \\ ( + \\ \\Multiline String Literals can be quite long + \\ ), + \\ .{ + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ }, + \\ .{( + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ )}, + \\ .{ + \\ "xxxxxxx", "xxx", + \\ ( + \\ \\ xxx + \\ ), + \\ "xxx", + \\ "xxx", + \\ }, + \\ .{ "xxxxxxx", "xxx", "xxx", "xxx" }, + \\ .{ "xxxxxxx", "xxx", "xxx", "xxx" }, + \\ "aaaaaaa", "bbbbbb", "ccccc", // - + \\ "dddd", ("eee"), ("fff"), + \\ .{ + \\ "xxx", "xxx", + \\ ( + \\ \\ xxx + \\ ), + \\ "xxxxxxxxxxxxxx", + \\ "xxx", + \\ }, + \\ .{ + \\ ( + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ ), + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ }, + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ \\xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + \\ }); + \\} + \\ + ); +} + +test "zig fmt: use of comments and multiline string literals may force the parameters over multiple lines" { + try testCanonical( + \\pub fn makeMemUndefined(qzz: []u8) i1 { + \\ cases.add( // fixed bug foo + \\ "compile diagnostic string for top level decl type", + \\ \\export fn entry() void { + \\ \\ var foo: u32 = @This(){}; + \\ \\} + \\ , &[_][]const u8{ + \\ "tmp.zig:2:27: error: type 'u32' does not support array initialization", + \\ }); + \\ @compileError( + \\ \\ unknown-length pointers and C pointers cannot be hashed deeply. + \\ \\ Consider providing your own hash function. + \\ \\ unknown-length pointers and C pointers cannot be hashed deeply. + \\ \\ Consider providing your own hash function. + \\ ); + \\ return @intCast(doMemCheckClientRequestExpr(0, // default return + \\ .MakeMemUndefined, @intFromPtr(qzz.ptr), qzz.len, 0, 0, 0)); + \\} + \\ + \\// This looks like garbage don't do this + \\const rparen = tree.prevToken( + \\ // the first token for the annotation expressions is the left + \\ // parenthesis, hence the need for two prevToken + \\ if (fn_proto.getAlignExpr()) |align_expr| + \\ tree.prevToken(tree.prevToken(align_expr.firstToken())) + \\ else if (fn_proto.getSectionExpr()) |section_expr| + \\ tree.prevToken(tree.prevToken(section_expr.firstToken())) + \\ else if (fn_proto.getCallconvExpr()) |callconv_expr| + \\ tree.prevToken(tree.prevToken(callconv_expr.firstToken())) + \\ else switch (fn_proto.return_type) { + \\ .Explicit => |node| node.firstToken(), + \\ .InferErrorSet => |node| tree.prevToken(node.firstToken()), + \\ .Invalid => unreachable, + \\ }); + \\ + ); +} + +test "zig fmt: single argument trailing commas in @builtins()" { + try testCanonical( + \\pub fn foo(qzz: []u8) i1 { + \\ @panic( + \\ foo, + \\ ); + \\ panic( + \\ foo, + \\ ); + \\ @panic( + \\ foo, + \\ bar, + \\ ); + \\} + \\ + ); +} + +test "zig fmt: trailing comma should force multiline 1 column" { + try testTransform( + \\pub const UUID_NULL: uuid_t = [16]u8{0,0,0,0,}; + \\ + , + \\pub const UUID_NULL: uuid_t = [16]u8{ + \\ 0, + \\ 0, + \\ 0, + \\ 0, + \\}; + \\ + ); +} + +test "zig fmt: function params should align nicely" { + try testCanonical( + \\pub fn foo() void { + \\ cases.addRuntimeSafety("slicing operator with sentinel", + \\ \\const std = @import("std"); + \\ ++ check_panic_msg ++ + \\ \\pub fn main() void { + \\ \\ var buf = [4]u8{'a','b','c',0}; + \\ \\ const slice = buf[0..:0]; + \\ \\} + \\ ); + \\} + \\ + ); +} + +test "zig fmt: fn proto end with anytype and comma" { + try testCanonical( + \\pub fn format( + \\ out_stream: anytype, + \\) !void {} + \\ + ); +} + +test "zig fmt: space after top level doc comment" { + try testCanonical( + \\//! top level doc comment + \\ + \\field: i32, + \\ + ); +} + +test "zig fmt: remove trailing whitespace after container doc comment" { + try testTransform( + \\//! top level doc comment + \\ + , + \\//! top level doc comment + \\ + ); +} + +test "zig fmt: remove trailing whitespace after doc comment" { + try testTransform( + \\/// doc comment + \\a = 0, + \\ + , + \\/// doc comment + \\a = 0, + \\ + ); +} + +test "zig fmt: for loop with ptr payload and index" { + try testCanonical( + \\test { + \\ for (self.entries.items, 0..) |*item, i| {} + \\ for (self.entries.items, 0..) |*item, i| + \\ a = b; + \\ for (self.entries.items, 0..) |*item, i| a = b; + \\} + \\ + ); +} + +test "zig fmt: proper indent line comment after multi-line single expr while loop" { + try testCanonical( + \\test { + \\ while (a) : (b) + \\ foo(); + \\ + \\ // bar + \\ baz(); + \\} + \\ + ); +} + +test "zig fmt: function with labeled block as return type" { + try testCanonical( + \\fn foo() t: { + \\ break :t bar; + \\} { + \\ baz(); + \\} + \\ + ); +} + +test "zig fmt: extern function with missing param name" { + try testCanonical( + \\extern fn a( + \\ *b, + \\ c: *d, + \\) e; + \\extern fn f(*g, h: *i) j; + \\ + ); +} + +test "zig fmt: line comment after multiline single expr if statement with multiline string" { + try testCanonical( + \\test { + \\ if (foo) + \\ x = + \\ \\hello + \\ \\hello + \\ \\ + \\ ; + \\ + \\ // bar + \\ baz(); + \\ + \\ if (foo) + \\ x = + \\ \\hello + \\ \\hello + \\ \\ + \\ else + \\ y = + \\ \\hello + \\ \\hello + \\ \\ + \\ ; + \\ + \\ // bar + \\ baz(); + \\} + \\ + ); +} + +test "zig fmt: respect extra newline between switch items" { + try testCanonical( + \\const a = switch (b) { + \\ .c => {}, + \\ + \\ .d, + \\ .e, + \\ => f, + \\}; + \\ + ); +} + +test "zig fmt: assignment with inline for and inline while" { + try testCanonical( + \\const tmp = inline for (items) |item| {}; + \\ + ); + + try testCanonical( + \\const tmp2 = inline while (true) {}; + \\ + ); +} + +test "zig fmt: saturating arithmetic" { + try testCanonical( + \\test { + \\ const actual = switch (op) { + \\ .add => a +| b, + \\ .sub => a -| b, + \\ .mul => a *| b, + \\ .shl => a <<| b, + \\ }; + \\ switch (op) { + \\ .add => actual +|= b, + \\ .sub => actual -|= b, + \\ .mul => actual *|= b, + \\ .shl => actual <<|= b, + \\ } + \\} + \\ + ); +} + +test "zig fmt: insert trailing comma if there are comments between switch values" { + try testTransform( + \\const a = switch (b) { + \\ .c => {}, + \\ + \\ .d, // foobar + \\ .e + \\ => f, + \\ + \\ .g, .h + \\ // comment + \\ => i, + \\}; + \\ + , + \\const a = switch (b) { + \\ .c => {}, + \\ + \\ .d, // foobar + \\ .e, + \\ => f, + \\ + \\ .g, + \\ .h, + \\ // comment + \\ => i, + \\}; + \\ + ); +} + +test "zig fmt: insert trailing comma if comments in array init" { + try testTransform( + \\var a = .{ + \\ "foo", // + \\ "bar" + \\}; + \\var a = .{ + \\ "foo", + \\ "bar" // + \\}; + \\var a = .{ + \\ "foo", + \\ "//" + \\}; + \\var a = .{ + \\ "foo", + \\ "//" // + \\}; + \\ + , + \\var a = .{ + \\ "foo", // + \\ "bar", + \\}; + \\var a = .{ + \\ "foo", + \\ "bar", // + \\}; + \\var a = .{ "foo", "//" }; + \\var a = .{ + \\ "foo", + \\ "//", // + \\}; + \\ + ); +} + +test "zig fmt: make single-line if no trailing comma" { + try testTransform( + \\test "function call no trailing comma" { + \\ foo( + \\ 1, + \\ 2 + \\ ); + \\} + \\ + , + \\test "function call no trailing comma" { + \\ foo(1, 2); + \\} + \\ + ); + + try testTransform( + \\test "struct no trailing comma" { + \\ const a = .{ + \\ .foo = 1, + \\ .bar = 2 + \\ }; + \\} + \\ + , + \\test "struct no trailing comma" { + \\ const a = .{ .foo = 1, .bar = 2 }; + \\} + \\ + ); + + try testTransform( + \\test "array no trailing comma" { + \\ var stream = multiOutStream(.{ + \\ fbs1.outStream(), + \\ fbs2.outStream() + \\ }); + \\} + \\ + , + \\test "array no trailing comma" { + \\ var stream = multiOutStream(.{ fbs1.outStream(), fbs2.outStream() }); + \\} + \\ + ); +} + +test "zig fmt: preserve container doc comment in container without trailing comma" { + try testTransform( + \\const A = enum(u32) { + \\//! comment + \\_ }; + \\ + , + \\const A = enum(u32) { + \\ //! comment + \\ _, + \\}; + \\ + ); +} + +test "zig fmt: make single-line if no trailing comma, fmt: off" { + try testCanonical( + \\// Test trailing comma syntax + \\// zig fmt: off + \\ + \\extern var a: c_int; + \\extern "c" var b: c_int; + \\export var c: c_int = 0; + \\threadlocal var d: c_int = 0; + \\extern threadlocal var e: c_int; + \\extern "c" threadlocal var f: c_int; + \\export threadlocal var g: c_int = 0; + \\ + \\const struct_trailing_comma = struct { x: i32, y: i32, }; + \\const struct_no_comma = struct { x: i32, y: i32 }; + \\const struct_fn_no_comma = struct { fn m() void {} y: i32 }; + \\ + \\const enum_no_comma = enum { A, B }; + \\ + \\fn container_init() void { + \\ const S = struct { x: i32, y: i32 }; + \\ _ = S { .x = 1, .y = 2 }; + \\ _ = S { .x = 1, .y = 2, }; + \\} + \\ + \\fn type_expr_return1() if (true) A {} + \\fn type_expr_return2() for (true) |_| A {} + \\fn type_expr_return3() while (true) A {} + \\ + \\fn switch_cases(x: i32) void { + \\ switch (x) { + \\ 1,2,3 => {}, + \\ 4,5, => {}, + \\ 6...8, => {}, + \\ else => {}, + \\ } + \\} + \\ + \\fn switch_prongs(x: i32) void { + \\ switch (x) { + \\ 0 => {}, + \\ else => {}, + \\ } + \\ switch (x) { + \\ 0 => {}, + \\ else => {} + \\ } + \\} + \\ + \\const fn_no_comma = fn (i32, i32) void; + \\const fn_trailing_comma = fn (i32, i32,) void; + \\ + \\fn fn_calls() void { + \\ fn add(x: i32, y: i32,) i32 { x + y }; + \\ _ = add(1, 2); + \\ _ = add(1, 2,); + \\} + \\ + \\fn asm_lists() void { + \\ if (false) { // Build AST but don't analyze + \\ asm ("not real assembly" + \\ :[a] "x" (x),); + \\ asm ("not real assembly" + \\ :[a] "x" (->i32),:[a] "x" (1),); + \\ asm volatile ("still not real assembly" + \\ :::.{.a = true,.b = true,}); + \\ } + \\} + ); +} + +test "zig fmt: variable initialized with ==" { + try testError( + \\comptime { + \\ var z: u32 == 12 + 1; + \\} + , &.{.wrong_equal_var_decl}); +} + +test "zig fmt: missing const/var before local variable in comptime block" { + try testError( + \\comptime { + \\ z: u32; + \\} + \\comptime { + \\ z: u32 align(1); + \\} + \\comptime { + \\ z: u32 addrspace(.generic); + \\} + \\comptime { + \\ z: u32 linksection("foo"); + \\} + \\comptime { + \\ z: u32 = 1; + \\} + , &.{ + .expected_labelable, + .expected_var_const, + .expected_var_const, + .expected_var_const, + .expected_var_const, + }); +} + +test "zig fmt: missing const/var before local variable" { + try testError( + \\std = foo, + \\std = foo; + \\*u32 = foo; + , &.{ + .expected_comma_after_field, + .var_const_decl, + .expected_comma_after_field, + }); +} + +test "zig fmt: while continue expr" { + try testCanonical( + \\test { + \\ while (i > 0) + \\ (i * 2); + \\} + \\ + ); + try testError( + \\test { + \\ while (i > 0) (i -= 1) { + \\ print("test123", .{}); + \\ } + \\} + , &[_]Error{ + .expected_continue_expr, + }); +} + +test "zig fmt: canonicalize symbols (simple)" { + try testTransform( + \\const val_normal: Normal = .{}; + \\const @"val_unesc_me": @"UnescMe" = .{}; + \\const @"val_esc!": @"Esc!" = .{}; + \\ + \\fn fnNormal() void {} + \\fn @"fnUnescMe"() void {} + \\fn @"fnEsc!"() void {} + \\ + \\extern fn protoNormal() void; + \\extern fn @"protoUnescMe"() void; + \\extern fn @"protoEsc!"() void; + \\ + \\fn fnWithArgs(normal: Normal, @"unesc_me": @"UnescMe", @"esc!": @"Esc!") void { + \\ _ = normal; + \\ _ = @"unesc_me"; + \\ _ = @"esc!"; + \\} + \\ + \\const Normal = struct {}; + \\const @"UnescMe" = struct { + \\ @"x": @"X", + \\ const X = union(@"EnumUnesc") { + \\ normal, + \\ @"unesc_me", + \\ @"esc!", + \\ }; + \\ const @"EnumUnesc" = enum { + \\ normal, + \\ @"unesc_me", + \\ @"esc!", + \\ }; + \\}; + \\const @"Esc!" = struct { + \\ normal: bool = false, + \\ @"unesc_me": bool = false, + \\ @"esc!": bool = false, + \\}; + \\ + \\pub fn main() void { + \\ _ = val_normal; + \\ _ = @"val_normal"; + \\ _ = val_unesc_me; + \\ _ = @"val_unesc_me"; + \\ _ = @"val_esc!"; + \\ + \\ fnNormal(); + \\ @"fnNormal"(); + \\ fnUnescMe(); + \\ @"fnUnescMe"(); + \\ @"fnEsc!"(); + \\ + \\ fnWithArgs(1, Normal{}, UnescMe{}, @"Esc!"{}); + \\ fnWithArgs(1, @"Normal"{}, @"UnescMe"{}, @"Esc!"{}); + \\ fnWithArgs(1, @"Normal"{}, @"Normal"{}, @"Esc!"{}); + \\ + \\ const local_val1: @"Normal" = .{}; + \\ const @"local_val2": UnescMe = .{ + \\ .@"x" = .@"unesc_me", + \\ }; + \\ fnWithArgs(@"local_val1", @"local_val2", .{ .@"normal" = true, .@"unesc_me" = true, .@"esc!" = true }); + \\ fnWithArgs(local_val1, local_val2, .{ .normal = true, .unesc_me = true, .@"esc!" = true }); + \\ + \\ var x: u8 = 'x'; + \\ switch (@"x") { + \\ @"x" => {}, + \\ } + \\ + \\ _ = @import("std"); // Don't mess with @builtins + \\ // @"comment" + \\} + \\ + , + \\const val_normal: Normal = .{}; + \\const val_unesc_me: UnescMe = .{}; + \\const @"val_esc!": @"Esc!" = .{}; + \\ + \\fn fnNormal() void {} + \\fn fnUnescMe() void {} + \\fn @"fnEsc!"() void {} + \\ + \\extern fn protoNormal() void; + \\extern fn protoUnescMe() void; + \\extern fn @"protoEsc!"() void; + \\ + \\fn fnWithArgs(normal: Normal, unesc_me: UnescMe, @"esc!": @"Esc!") void { + \\ _ = normal; + \\ _ = unesc_me; + \\ _ = @"esc!"; + \\} + \\ + \\const Normal = struct {}; + \\const UnescMe = struct { + \\ x: X, + \\ const X = union(EnumUnesc) { + \\ normal, + \\ unesc_me, + \\ @"esc!", + \\ }; + \\ const EnumUnesc = enum { + \\ normal, + \\ unesc_me, + \\ @"esc!", + \\ }; + \\}; + \\const @"Esc!" = struct { + \\ normal: bool = false, + \\ unesc_me: bool = false, + \\ @"esc!": bool = false, + \\}; + \\ + \\pub fn main() void { + \\ _ = val_normal; + \\ _ = val_normal; + \\ _ = val_unesc_me; + \\ _ = val_unesc_me; + \\ _ = @"val_esc!"; + \\ + \\ fnNormal(); + \\ fnNormal(); + \\ fnUnescMe(); + \\ fnUnescMe(); + \\ @"fnEsc!"(); + \\ + \\ fnWithArgs(1, Normal{}, UnescMe{}, @"Esc!"{}); + \\ fnWithArgs(1, Normal{}, UnescMe{}, @"Esc!"{}); + \\ fnWithArgs(1, Normal{}, Normal{}, @"Esc!"{}); + \\ + \\ const local_val1: Normal = .{}; + \\ const local_val2: UnescMe = .{ + \\ .x = .unesc_me, + \\ }; + \\ fnWithArgs(local_val1, local_val2, .{ .normal = true, .unesc_me = true, .@"esc!" = true }); + \\ fnWithArgs(local_val1, local_val2, .{ .normal = true, .unesc_me = true, .@"esc!" = true }); + \\ + \\ var x: u8 = 'x'; + \\ switch (x) { + \\ x => {}, + \\ } + \\ + \\ _ = @import("std"); // Don't mess with @builtins + \\ // @"comment" + \\} + \\ + ); +} + +// Contextually unescape when shadowing primitive types and values. +test "zig fmt: canonicalize symbols (primitive types)" { + try testTransform( + \\const @"anyopaque" = struct { + \\ @"u8": @"type" = true, + \\ @"_": @"false" = @"true", + \\ const @"type" = bool; + \\ const @"false" = bool; + \\ const @"true" = false; + \\}; + \\ + \\const U = union(@"null") { + \\ @"type", + \\ const @"null" = enum { + \\ @"type", + \\ }; + \\}; + \\ + \\test { + \\ const E = enum { @"anyopaque" }; + \\ _ = U{ .@"type" = {} }; + \\ _ = U.@"type"; + \\ _ = E.@"anyopaque"; + \\} + \\ + \\fn @"i10"(@"void": @"anyopaque", @"type": @"anyopaque".@"type") error{@"null"}!void { + \\ var @"f32" = @"void"; + \\ @"f32".@"u8" = false; + \\ _ = @"type"; + \\ _ = type; + \\ if (@"f32".@"u8") { + \\ return @"i10"(.{ .@"u8" = true, .@"_" = false }, false); + \\ } else { + \\ return error.@"null"; + \\ } + \\} + \\ + \\test @"i10" { + \\ try @"i10"(.{}, true); + \\ _ = @"void": while (null) |@"u3"| { + \\ break :@"void" @"u3"; + \\ }; + \\ _ = @"void": { + \\ break :@"void"; + \\ }; + \\ for ("hi", 0..) |@"u3", @"i4"| { + \\ _ = @"u3"; + \\ _ = @"i4"; + \\ } + \\ if (false) {} else |@"bool"| { + \\ _ = @"bool"; + \\ } + \\} + \\ + , + \\const @"anyopaque" = struct { + \\ u8: @"type" = true, + \\ _: @"false" = @"true", + \\ const @"type" = bool; + \\ const @"false" = bool; + \\ const @"true" = false; + \\}; + \\ + \\const U = union(@"null") { + \\ type, + \\ const @"null" = enum { + \\ type, + \\ }; + \\}; + \\ + \\test { + \\ const E = enum { anyopaque }; + \\ _ = U{ .type = {} }; + \\ _ = U.type; + \\ _ = E.anyopaque; + \\} + \\ + \\fn @"i10"(@"void": @"anyopaque", @"type": @"anyopaque".type) error{null}!void { + \\ var @"f32" = @"void"; + \\ @"f32".u8 = false; + \\ _ = @"type"; + \\ _ = type; + \\ if (@"f32".u8) { + \\ return @"i10"(.{ .u8 = true, ._ = false }, false); + \\ } else { + \\ return error.null; + \\ } + \\} + \\ + \\test @"i10" { + \\ try @"i10"(.{}, true); + \\ _ = void: while (null) |@"u3"| { + \\ break :void @"u3"; + \\ }; + \\ _ = void: { + \\ break :void; + \\ }; + \\ for ("hi", 0..) |@"u3", @"i4"| { + \\ _ = @"u3"; + \\ _ = @"i4"; + \\ } + \\ if (false) {} else |@"bool"| { + \\ _ = @"bool"; + \\ } + \\} + \\ + ); +} + +test "zig fmt: no space before newline before multiline string" { + try testCanonical( + \\const S = struct { + \\ text: []const u8, + \\ comment: []const u8, + \\}; + \\ + \\test { + \\ const s1 = .{ + \\ .text = + \\ \\hello + \\ \\world + \\ , + \\ .comment = "test", + \\ }; + \\ _ = s1; + \\ const s2 = .{ + \\ .comment = "test", + \\ .text = + \\ \\hello + \\ \\world + \\ , + \\ }; + \\ _ = s2; + \\} + \\ + ); +} + +// Normalize \xNN and \u{NN} escapes and unicode inside @"" escapes. +test "zig fmt: canonicalize symbols (character escapes)" { + try testTransform( + \\const @"\x46\x6f\x6f\x64" = struct { + \\ @"\x62\x61\x72\x6E": @"\x43\x72\x61\x62" = false, + \\ @"\u{67}\u{6C}o\u{70}\xFF": @"Cra\x62" = false, + \\ @"\x65\x72\x72\x6F\x72": Crab = true, + \\ @"\x74\x72\x79": Crab = true, + \\ @"\u{74}\u{79}\u{70}\u{65}": @"any\u{6F}\u{70}\u{61}\u{71}\u{75}\u{65}", + \\ + \\ const @"\x43\x72\x61\x62" = bool; + \\ const @"\x61\x6E\x79\x6F\x70\x61que" = void; + \\}; + \\ + \\test "unicode" { + \\ const @"cąbbäge ⚡" = 2; + \\ _ = @"cąbbäge ⚡"; + \\ const @"\u{01f422} friend\u{f6}" = 4; + \\ _ = @"🐢 friendö"; + \\} + \\ + , + \\const Food = struct { + \\ barn: Crab = false, + \\ @"glop\xFF": Crab = false, + \\ @"error": Crab = true, + \\ @"try": Crab = true, + \\ type: @"anyopaque", + \\ + \\ const Crab = bool; + \\ const @"anyopaque" = void; + \\}; + \\ + \\test "unicode" { + \\ const @"cąbbäge ⚡" = 2; + \\ _ = @"cąbbäge ⚡"; + \\ const @"\u{01f422} friend\u{f6}" = 4; + \\ _ = @"🐢 friendö"; + \\} + \\ + ); +} + +test "zig fmt: canonicalize symbols (asm)" { + try testTransform( + \\test "asm" { + \\ const @"null" = usize; + \\ const @"try": usize = 808; + \\ const arg: usize = 2; + \\ _ = asm volatile ("syscall" + \\ : [@"void"] "={rax}" (-> @"null"), + \\ : [@"error"] "{rax}" (@"try"), + \\ [@"arg1"] "{rdi}" (arg), + \\ [arg2] "{rsi}" (arg), + \\ [arg3] "{rdx}" (arg), + \\ : "rcx", "fn" + \\ ); + \\ + \\ const @"false": usize = 10; + \\ const @"true" = "explode"; + \\ _ = asm volatile (@"true" + \\ : [one] "={rax}" (@"false"), + \\ : [two] "{rax}" (@"false"), + \\ ); + \\} + \\ + , + \\test "asm" { + \\ const @"null" = usize; + \\ const @"try": usize = 808; + \\ const arg: usize = 2; + \\ _ = asm volatile ("syscall" + \\ : [void] "={rax}" (-> @"null"), + \\ : [@"error"] "{rax}" (@"try"), + \\ [arg1] "{rdi}" (arg), + \\ [arg2] "{rsi}" (arg), + \\ [arg3] "{rdx}" (arg), + \\ : .{ .rcx = true, .@"fn" = true } + \\ ); + \\ + \\ const @"false": usize = 10; + \\ const @"true" = "explode"; + \\ _ = asm volatile (@"true" + \\ : [one] "={rax}" (false), + \\ : [two] "{rax}" (@"false"), + \\ ); + \\} + \\ + ); +} + +test "zig fmt: don't canonicalize _ in enums" { + try testTransform( + \\const A = enum { + \\ first, + \\ second, + \\ third, + \\ _, + \\}; + \\const B = enum { + \\ @"_", + \\ @"__", + \\ @"___", + \\ @"____", + \\}; + \\const C = struct { + \\ @"_": u8, + \\ @"__": u8, + \\ @"___": u8, + \\ @"____": u8, + \\}; + \\const D = union { + \\ @"_": u8, + \\ @"__": u8, + \\ @"___": u8, + \\ @"____": u8, + \\}; + \\ + , + \\const A = enum { + \\ first, + \\ second, + \\ third, + \\ _, + \\}; + \\const B = enum { + \\ @"_", + \\ __, + \\ ___, + \\ ____, + \\}; + \\const C = struct { + \\ _: u8, + \\ __: u8, + \\ ___: u8, + \\ ____: u8, + \\}; + \\const D = union { + \\ _: u8, + \\ __: u8, + \\ ___: u8, + \\ ____: u8, + \\}; + \\ + ); +} + +test "zig fmt: error for missing sentinel value in sentinel slice" { + try testError( + \\const foo = foo[0..:]; + , &[_]Error{ + .expected_expr, + }); +} + +test "zig fmt: error for invalid bit range" { + try testError( + \\var x: []align(0:0:0)u8 = bar; + , &[_]Error{ + .invalid_bit_range, + }); +} + +test "zig fmt: error for ptr mod on array child type" { + try testError( + \\var a: [10]align(10) u8 = e; + \\var b: [10]const u8 = f; + \\var c: [10]volatile u8 = g; + \\var d: [10]allowzero u8 = h; + , &[_]Error{ + .ptr_mod_on_array_child_type, + .ptr_mod_on_array_child_type, + .ptr_mod_on_array_child_type, + .ptr_mod_on_array_child_type, + }); +} + +test "zig fmt: pointer type syntax to index" { + try testCanonical( + \\test { + \\ _ = .{}[*0]; + \\} + \\ + ); +} + +test "zig fmt: binop indentation in if statement" { + try testCanonical( + \\test { + \\ if (first_param_type.isGenericPoison() or + \\ (first_param_type.zigTypeTag(zcu) == .pointer and + \\ (first_param_type.ptrSize(zcu) == .One or + \\ first_param_type.ptrSize(zcu) == .C) and + \\ first_param_type.childType(zcu).eql(concrete_ty, zcu))) + \\ { + \\ f(x); + \\ } + \\} + \\ + ); +} + +test "zig fmt: test indentation after equals sign" { + try testCanonical( + \\test { + \\ const foo = + \\ if (1 == 2) + \\ 1 + \\ else if (3 > 4) + \\ 2 + \\ else + \\ 0; + \\ + \\ const foo, const bar = + \\ if (1 == 2) + \\ .{ 0, 0 } + \\ else if (3 > 4) + \\ .{ 1, 1 } + \\ else + \\ .{ 2, 2 }; + \\ + \\ while (foo) if (bar) + \\ f(x); + \\ + \\ foobar = + \\ if (true) + \\ 1 + \\ else + \\ 0; + \\ + \\ const foo = if (1 == 2) + \\ 1 + \\ else if (3 > 4) + \\ 2 + \\ else + \\ 0; + \\ + \\ const foo, const bar = if (1 == 2) + \\ .{ 0, 0 } + \\ else if (3 > 4) + \\ .{ 1, 1 } + \\ else + \\ .{ 2, 2 }; + \\ + \\ foobar = if (true) + \\ 1 + \\ else + \\ 0; + \\ + \\ const is_alphanum = + \\ (ch >= 'a' and ch <= 'z') or + \\ (ch >= 'A' and ch <= 'Z') or + \\ (ch >= '0' and ch <= '9'); + \\ + \\ const bar = 100 + calculate( + \\ 200, + \\ 300, + \\ ); + \\ + \\ const gcc_pragma = std.meta.stringToEnum(Directive, pp.expandedSlice(directive_tok)) orelse + \\ return pp.comp.addDiagnostic(.{ + \\ .tag = .unknown_gcc_pragma, + \\ .loc = directive_tok.loc, + \\ }, pp.expansionSlice(start_idx + 1)); + \\ + \\ const vec4s = + \\ [_][4]i32{ + \\ [_]i32{ 0, 1, 0, 0 }, + \\ [_]i32{ 0, -1, 0, 0 }, + \\ [_]i32{ 2, 1, 2, 0 }, + \\ }; + \\} + \\ + ); +} + +test "zig fmt: test indentation of if expressions" { + try testCanonical( + \\test { + \\ const foo = 1 + + \\ if (1 == 2) + \\ 2 + \\ else + \\ 0; + \\ + \\ const foo = 1 + if (1 == 2) + \\ 2 + \\ else + \\ 0; + \\ + \\ errval catch |e| + \\ if (e == error.Meow) + \\ return 0x1F408 + \\ else + \\ unreachable; + \\ + \\ errval catch |e| if (e == error.Meow) + \\ return 0x1F408 + \\ else + \\ unreachable; + \\ + \\ return if (1 == 2) + \\ 1 + \\ else if (3 > 4) + \\ 2 + \\ else + \\ 0; + \\} + \\ + ); +} + +test "zig fmt: indentation of comments within catch, else, orelse" { + try testCanonical( + \\comptime { + \\ _ = foo() catch + \\ // + \\ bar(); + \\ + \\ _ = if (foo) bar() else + \\ // + \\ qux(); + \\ + \\ _ = foo() orelse + \\ // + \\ qux(); + \\} + \\ + ); +} + +test "zig fmt: canonicalize cast builtins" { + try testTransform( + \\const foo = @alignCast(@ptrCast(bar)); + \\const baz = @constCast(@ptrCast(@addrSpaceCast(@volatileCast(@alignCast(bar))))); + \\ + , + \\const foo = @ptrCast(@alignCast(bar)); + \\const baz = @ptrCast(@alignCast(@addrSpaceCast(@constCast(@volatileCast(bar))))); + \\ + ); +} + +test "zig fmt: do not canonicalize invalid cast builtins" { + try testCanonical( + \\const foo = @alignCast(@volatileCast(@ptrCast(@alignCast(bar)))); + \\ + ); +} + +test "recovery: top level" { + try testError( + \\test "" {inline} + \\test "" {inline} + , &[_]Error{ + .expected_inlinable, + .expected_inlinable, + }); +} + +test "recovery: block statements" { + try testError( + \\test "" { + \\ foo + +; + \\ inline; + \\} + , &[_]Error{ + .expected_expr, + .expected_semi_after_stmt, + .expected_statement, + .expected_inlinable, + }); +} + +test "recovery: missing comma" { + try testError( + \\test "" { + \\ switch (foo) { + \\ 2 => {} + \\ 3 => {} + \\ else => { + \\ foo & bar +; + \\ } + \\ } + \\} + , &[_]Error{ + .expected_comma_after_switch_prong, + .expected_comma_after_switch_prong, + .expected_expr, + }); +} + +test "recovery: non-associative operators" { + try testError( + \\const x = a == b == c; + \\const x = a == b != c; + , &[_]Error{ + .chained_comparison_operators, + .chained_comparison_operators, + }); +} + +test "recovery: extra qualifier" { + try testError( + \\const a: *const const u8; + \\test "" + , &[_]Error{ + .extra_const_qualifier, + .expected_block, + }); +} + +test "recovery: missing return type" { + try testError( + \\fn foo() { + \\ a & b; + \\} + \\test "" + , &[_]Error{ + .expected_return_type, + .expected_block, + }); +} + +test "recovery: invalid extern/inline" { + try testError( + \\inline test "" { a & b; } + , &[_]Error{ + .expected_fn, + }); + try testError( + \\extern "" test "" { a & b; } + , &[_]Error{ + .expected_var_decl_or_fn, + }); +} + +test "recovery: missing semicolon" { + try testError( + \\test "" { + \\ comptime a & b + \\ c & d + \\ @foo + \\} + , &[_]Error{ + .expected_semi_after_stmt, + .expected_semi_after_stmt, + .expected_param_list, + .expected_semi_after_stmt, + }); +} + +// TODO after https://github.com/ziglang/zig/issues/35 is implemented, +// we should be able to recover from this *at any indentation level*, +// reporting a parse error and yet also parsing all the decls even +// inside structs. +test "recovery: extra '}' at top level" { + try testError( + \\}}} + \\test "" { + \\ a & b; + \\} + , &[_]Error{ + .expected_token, + }); +} + +test "recovery: mismatched bracket at top level" { + try testError( + \\const S = struct { + \\ arr: 128]?G + \\}; + , &[_]Error{ + .expected_comma_after_field, + }); +} + +test "recovery: invalid global error set access" { + try testError( + \\test "" { + \\ error & foo; + \\} + , &[_]Error{ + .expected_token, + }); +} + +test "recovery: invalid asterisk after pointer dereference" { + try testError( + \\test "" { + \\ var sequence = "repeat".*** 10; + \\} + , &[_]Error{ + .asterisk_after_ptr_deref, + .mismatched_binary_op_whitespace, + }); + try testError( + \\test "" { + \\ var sequence = "repeat".** 10&a; + \\} + , &[_]Error{ + .asterisk_after_ptr_deref, + .mismatched_binary_op_whitespace, + }); +} + +test "recovery: missing semicolon after if, for, while stmt" { + try testError( + \\test "" { + \\ if (foo) bar + \\ for (foo) |a| bar + \\ while (foo) bar + \\ a & b; + \\} + , &[_]Error{ + .expected_semi_or_else, + .expected_semi_or_else, + .expected_semi_or_else, + }); +} + +test "recovery: invalid comptime" { + try testError( + \\comptime + , &[_]Error{ + .expected_type_expr, + }); +} + +test "recovery: missing block after suspend" { + try testError( + \\fn foo() void { + \\ suspend; + \\ nosuspend; + \\} + , &[_]Error{ + .expected_block_or_expr, + .expected_block_or_expr, + }); +} + +test "recovery: missing block after for/while loops" { + try testError( + \\test "" { while (foo) } + , &[_]Error{ + .expected_block_or_assignment, + }); + try testError( + \\test "" { for (foo) |bar| } + , &[_]Error{ + .expected_block_or_assignment, + }); +} + +test "recovery: missing for payload" { + try testError( + \\comptime { + \\ const a = for(a) {}; + \\ const a: for(a) blk: {} = {}; + \\ for(a) {} + \\} + , &[_]Error{ + .expected_loop_payload, + .expected_loop_payload, + .expected_loop_payload, + }); +} + +test "recovery: missing comma in params" { + try testError( + \\fn foo(comptime bool what what) void { } + \\fn bar(a: i32, b: i32 c) void { } + \\ + , &[_]Error{ + .expected_comma_after_param, + .expected_comma_after_param, + .expected_comma_after_param, + }); +} + +test "recovery: missing while rbrace" { + try testError( + \\fn a() b { + \\ while (d) { + \\} + , &[_]Error{ + .expected_statement, + }); +} + +test "recovery: nonfinal varargs" { + try testError( + \\extern fn f(a: u32, ..., b: u32) void; + \\extern fn g(a: u32, ..., b: anytype) void; + \\extern fn h(a: u32, ..., ...) void; + , &[_]Error{ + .varargs_nonfinal, + .varargs_nonfinal, + .varargs_nonfinal, + }); +} + +test "recovery: eof in c pointer" { + try testError( + \\const Ptr = [*c + , &[_]Error{ + .expected_token, + }); +} + +test "matching whitespace on minus op" { + try testError( + \\ _ = 2 -1, + \\ _ = 2- 1, + \\ _ = 2- + \\ 2, + \\ _ = 2 + \\ -2, + , &[_]Error{ + .mismatched_binary_op_whitespace, + .mismatched_binary_op_whitespace, + .mismatched_binary_op_whitespace, + .mismatched_binary_op_whitespace, + }); + + try testError( + \\ _ = - 1, + \\ _ = -1, + \\ _ = 2 - -1, + \\ _ = 2 - 1, + \\ _ = 2-1, + \\ _ = 2 - + \\1, + \\ _ = 2 + \\ - 1, + , &[_]Error{}); +} + +test "ampersand" { + try testError( + \\ _ = bar && foo, + \\ _ = bar&&foo, + \\ _ = bar& & foo, + \\ _ = bar& &foo, + , &.{ + .invalid_ampersand_ampersand, + .invalid_ampersand_ampersand, + .mismatched_binary_op_whitespace, + .mismatched_binary_op_whitespace, + }); + + try testError( + \\ _ = bar & &foo, + \\ _ = bar & &&foo, + \\ _ = &&foo, + , &.{}); +} + +var fixed_buffer_mem: [100 * 1024]u8 = undefined; + +fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 { + var c_tree = c.astParse(source, @intCast(source.len)); + defer c.astDeinit(&c_tree); + var tree = try zigAst(allocator, c_tree); + defer tree.deinit(allocator); + + // Skip consistency check under valgrind: Zig's tokenizer uses SIMD + // instructions (AVX-512) that valgrind does not support. + if (!@import("std").debug.inValgrind()) { + var zig_tree = try Ast.parse(allocator, source, .zig); + defer zig_tree.deinit(allocator); + try expectAstConsistent(tree, zig_tree, source); + } + + if (tree.errors.len != 0) { + return error.ParseError; + } + + const formatted = try tree.renderAlloc(allocator); + anything_changed.* = !mem.eql(u8, formatted, source); + return formatted; +} +fn testTransformImpl(allocator: mem.Allocator, fba: *std.heap.FixedBufferAllocator, source: [:0]const u8, expected_source: []const u8) !void { + // reset the fixed buffer allocator each run so that it can be re-used for each + // iteration of the failing index + fba.reset(); + var anything_changed: bool = undefined; + const result_source = try testParse(source, allocator, &anything_changed); + try std.testing.expectEqualStrings(expected_source, result_source); + const changes_expected = source.ptr != expected_source.ptr; + if (anything_changed != changes_expected) { + print("std.zig.render returned {} instead of {}\n", .{ anything_changed, changes_expected }); + return error.TestFailed; + } + try std.testing.expect(anything_changed == changes_expected); + allocator.free(result_source); +} +fn testTransform(source: [:0]const u8, expected_source: []const u8) !void { + var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]); + return std.testing.checkAllAllocationFailures(fixed_allocator.allocator(), testTransformImpl, .{ &fixed_allocator, source, expected_source }); +} +fn testCanonical(source: [:0]const u8) !void { + return testTransform(source, source); +} + +const Error = std.zig.Ast.Error.Tag; + +fn testError(source: [:0]const u8, expected_errors: []const Error) !void { + var c_tree = c.astParse(source, @intCast(source.len)); + defer c.astDeinit(&c_tree); + if (expected_errors.len == 0) { + try std.testing.expect(!c_tree.has_error); + } else { + try std.testing.expect(c_tree.has_error); + } +} + +// Returns the number of meaningful u32 fields in Node.Data for a given tag. +// 0 = data is undefined/unused, 1 = only first u32 is meaningful, 2 = both meaningful. +fn dataFieldCount(tag: Ast.Node.Tag) u2 { + return switch (tag) { + // data unused (undefined in Zig parser) + .identifier, + .string_literal, + .char_literal, + .number_literal, + .unreachable_literal, + .anyframe_literal, + .enum_literal, + .error_value, + => 0, + + // .node or .opt_node — only first u32 + .@"defer", + .@"comptime", + .@"nosuspend", + .@"suspend", + .@"resume", + .bool_not, + .negation, + .bit_not, + .negation_wrap, + .address_of, + .@"try", + .deref, + .optional_type, + .@"return", + => 1, + + // everything else — both u32 fields + else => 2, + }; +} + +fn expectAstConsistent(c_tree: Ast, zig_tree: Ast, source: [:0]const u8) !void { + _ = source; + + if (c_tree.tokens.len != zig_tree.tokens.len) { + print("token count mismatch: c={d} zig={d}\n", .{ c_tree.tokens.len, zig_tree.tokens.len }); + return error.TestExpectedEqual; + } + for (0..c_tree.tokens.len) |i| { + if (c_tree.tokens.items(.start)[i] != zig_tree.tokens.items(.start)[i]) { + print("token[{d}] start mismatch: c={d} zig={d}\n", .{ i, c_tree.tokens.items(.start)[i], zig_tree.tokens.items(.start)[i] }); + return error.TestExpectedEqual; + } + if (c_tree.tokens.items(.tag)[i] != zig_tree.tokens.items(.tag)[i]) { + print("token[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tree.tokens.items(.tag)[i]), @tagName(zig_tree.tokens.items(.tag)[i]) }); + return error.TestExpectedEqual; + } + } + + if (c_tree.nodes.len != zig_tree.nodes.len) { + print("node count mismatch: c={d} zig={d}\n", .{ c_tree.nodes.len, zig_tree.nodes.len }); + return error.TestExpectedEqual; + } + for (0..c_tree.nodes.len) |i| { + const c_tag = c_tree.nodes.items(.tag)[i]; + const z_tag = zig_tree.nodes.items(.tag)[i]; + if (c_tag != z_tag) { + print("node[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tag), @tagName(z_tag) }); + return error.TestExpectedEqual; + } + if (c_tree.nodes.items(.main_token)[i] != zig_tree.nodes.items(.main_token)[i]) { + print("node[{d}] main_token mismatch: c={d} zig={d}\n", .{ i, c_tree.nodes.items(.main_token)[i], zig_tree.nodes.items(.main_token)[i] }); + return error.TestExpectedEqual; + } + const field_count = dataFieldCount(c_tag); + if (field_count >= 1) { + const c_data: *const [2]u32 = @ptrCast(&c_tree.nodes.items(.data)[i]); + const z_data: *const [2]u32 = @ptrCast(&zig_tree.nodes.items(.data)[i]); + if (c_data[0] != z_data[0]) { + print("node[{d}] data[0] mismatch: c={d} zig={d}\n", .{ i, c_data[0], z_data[0] }); + return error.TestExpectedEqual; + } + if (field_count >= 2 and c_data[1] != z_data[1]) { + print("node[{d}] data[1] mismatch: c={d} zig={d}\n", .{ i, c_data[1], z_data[1] }); + return error.TestExpectedEqual; + } + } + } + + if (c_tree.extra_data.len != zig_tree.extra_data.len) { + print("extra_data length mismatch: c={d} zig={d}\n", .{ c_tree.extra_data.len, zig_tree.extra_data.len }); + return error.TestExpectedEqual; + } + for (0..c_tree.extra_data.len) |i| { + if (c_tree.extra_data[i] != zig_tree.extra_data[i]) { + print("extra_data[{d}] mismatch: c={d} zig={d}\n", .{ i, c_tree.extra_data[i], zig_tree.extra_data[i] }); + return error.TestExpectedEqual; + } + } +} + +const testing = std.testing; + +const Ast = std.zig.Ast; +const Allocator = std.mem.Allocator; + +const c = @cImport({ + @cInclude("ast.h"); +}); + +const zigToken = @import("./tokenizer_test.zig").zigToken; + +pub fn zigNode(token: c_uint) Ast.Node.Tag { + return switch (token) { + c.AST_NODE_ROOT => .root, + c.AST_NODE_TEST_DECL => .test_decl, + c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl, + c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl, + c.AST_NODE_SIMPLE_VAR_DECL => .simple_var_decl, + c.AST_NODE_ALIGNED_VAR_DECL => .aligned_var_decl, + c.AST_NODE_ERRDEFER => .@"errdefer", + c.AST_NODE_DEFER => .@"defer", + c.AST_NODE_CATCH => .@"catch", + c.AST_NODE_FIELD_ACCESS => .field_access, + c.AST_NODE_UNWRAP_OPTIONAL => .unwrap_optional, + c.AST_NODE_EQUAL_EQUAL => .equal_equal, + c.AST_NODE_BANG_EQUAL => .bang_equal, + c.AST_NODE_LESS_THAN => .less_than, + c.AST_NODE_GREATER_THAN => .greater_than, + c.AST_NODE_LESS_OR_EQUAL => .less_or_equal, + c.AST_NODE_GREATER_OR_EQUAL => .greater_or_equal, + c.AST_NODE_ASSIGN_MUL => .assign_mul, + c.AST_NODE_ASSIGN_DIV => .assign_div, + c.AST_NODE_ASSIGN_MOD => .assign_mod, + c.AST_NODE_ASSIGN_ADD => .assign_add, + c.AST_NODE_ASSIGN_SUB => .assign_sub, + c.AST_NODE_ASSIGN_SHL => .assign_shl, + c.AST_NODE_ASSIGN_SHL_SAT => .assign_shl_sat, + c.AST_NODE_ASSIGN_SHR => .assign_shr, + c.AST_NODE_ASSIGN_BIT_AND => .assign_bit_and, + c.AST_NODE_ASSIGN_BIT_XOR => .assign_bit_xor, + c.AST_NODE_ASSIGN_BIT_OR => .assign_bit_or, + c.AST_NODE_ASSIGN_MUL_WRAP => .assign_mul_wrap, + c.AST_NODE_ASSIGN_ADD_WRAP => .assign_add_wrap, + c.AST_NODE_ASSIGN_SUB_WRAP => .assign_sub_wrap, + c.AST_NODE_ASSIGN_MUL_SAT => .assign_mul_sat, + c.AST_NODE_ASSIGN_ADD_SAT => .assign_add_sat, + c.AST_NODE_ASSIGN_SUB_SAT => .assign_sub_sat, + c.AST_NODE_ASSIGN => .assign, + c.AST_NODE_ASSIGN_DESTRUCTURE => .assign_destructure, + c.AST_NODE_MERGE_ERROR_SETS => .merge_error_sets, + c.AST_NODE_MUL => .mul, + c.AST_NODE_DIV => .div, + c.AST_NODE_MOD => .mod, + c.AST_NODE_ARRAY_MULT => .array_mult, + c.AST_NODE_MUL_WRAP => .mul_wrap, + c.AST_NODE_MUL_SAT => .mul_sat, + c.AST_NODE_ADD => .add, + c.AST_NODE_SUB => .sub, + c.AST_NODE_ARRAY_CAT => .array_cat, + c.AST_NODE_ADD_WRAP => .add_wrap, + c.AST_NODE_SUB_WRAP => .sub_wrap, + c.AST_NODE_ADD_SAT => .add_sat, + c.AST_NODE_SUB_SAT => .sub_sat, + c.AST_NODE_SHL => .shl, + c.AST_NODE_SHL_SAT => .shl_sat, + c.AST_NODE_SHR => .shr, + c.AST_NODE_BIT_AND => .bit_and, + c.AST_NODE_BIT_XOR => .bit_xor, + c.AST_NODE_BIT_OR => .bit_or, + c.AST_NODE_ORELSE => .@"orelse", + c.AST_NODE_BOOL_AND => .bool_and, + c.AST_NODE_BOOL_OR => .bool_or, + c.AST_NODE_BOOL_NOT => .bool_not, + c.AST_NODE_NEGATION => .negation, + c.AST_NODE_BIT_NOT => .bit_not, + c.AST_NODE_NEGATION_WRAP => .negation_wrap, + c.AST_NODE_ADDRESS_OF => .address_of, + c.AST_NODE_TRY => .@"try", + c.AST_NODE_OPTIONAL_TYPE => .optional_type, + c.AST_NODE_ARRAY_TYPE => .array_type, + c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel, + c.AST_NODE_PTR_TYPE_ALIGNED => .ptr_type_aligned, + c.AST_NODE_PTR_TYPE_SENTINEL => .ptr_type_sentinel, + c.AST_NODE_PTR_TYPE => .ptr_type, + c.AST_NODE_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range, + c.AST_NODE_SLICE_OPEN => .slice_open, + c.AST_NODE_SLICE => .slice, + c.AST_NODE_SLICE_SENTINEL => .slice_sentinel, + c.AST_NODE_DEREF => .deref, + c.AST_NODE_ARRAY_ACCESS => .array_access, + c.AST_NODE_ARRAY_INIT_ONE => .array_init_one, + c.AST_NODE_ARRAY_INIT_ONE_COMMA => .array_init_one_comma, + c.AST_NODE_ARRAY_INIT_DOT_TWO => .array_init_dot_two, + c.AST_NODE_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma, + c.AST_NODE_ARRAY_INIT_DOT => .array_init_dot, + c.AST_NODE_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma, + c.AST_NODE_ARRAY_INIT => .array_init, + c.AST_NODE_ARRAY_INIT_COMMA => .array_init_comma, + c.AST_NODE_STRUCT_INIT_ONE => .struct_init_one, + c.AST_NODE_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma, + c.AST_NODE_STRUCT_INIT_DOT_TWO => .struct_init_dot_two, + c.AST_NODE_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma, + c.AST_NODE_STRUCT_INIT_DOT => .struct_init_dot, + c.AST_NODE_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma, + c.AST_NODE_STRUCT_INIT => .struct_init, + c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma, + c.AST_NODE_CALL_ONE => .call_one, + c.AST_NODE_CALL_ONE_COMMA => .call_one_comma, + c.AST_NODE_CALL => .call, + c.AST_NODE_CALL_COMMA => .call_comma, + c.AST_NODE_SWITCH => .@"switch", + c.AST_NODE_SWITCH_COMMA => .switch_comma, + c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one, + c.AST_NODE_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one, + c.AST_NODE_SWITCH_CASE => .switch_case, + c.AST_NODE_SWITCH_CASE_INLINE => .switch_case_inline, + c.AST_NODE_SWITCH_RANGE => .switch_range, + c.AST_NODE_WHILE_SIMPLE => .while_simple, + c.AST_NODE_WHILE_CONT => .while_cont, + c.AST_NODE_WHILE => .@"while", + c.AST_NODE_FOR_SIMPLE => .for_simple, + c.AST_NODE_FOR => .@"for", + c.AST_NODE_FOR_RANGE => .for_range, + c.AST_NODE_IF_SIMPLE => .if_simple, + c.AST_NODE_IF => .@"if", + c.AST_NODE_SUSPEND => .@"suspend", + c.AST_NODE_RESUME => .@"resume", + c.AST_NODE_CONTINUE => .@"continue", + c.AST_NODE_BREAK => .@"break", + c.AST_NODE_RETURN => .@"return", + c.AST_NODE_FN_PROTO_SIMPLE => .fn_proto_simple, + c.AST_NODE_FN_PROTO_MULTI => .fn_proto_multi, + c.AST_NODE_FN_PROTO_ONE => .fn_proto_one, + c.AST_NODE_FN_PROTO => .fn_proto, + c.AST_NODE_FN_DECL => .fn_decl, + c.AST_NODE_ANYFRAME_TYPE => .anyframe_type, + c.AST_NODE_ANYFRAME_LITERAL => .anyframe_literal, + c.AST_NODE_CHAR_LITERAL => .char_literal, + c.AST_NODE_NUMBER_LITERAL => .number_literal, + c.AST_NODE_UNREACHABLE_LITERAL => .unreachable_literal, + c.AST_NODE_IDENTIFIER => .identifier, + c.AST_NODE_ENUM_LITERAL => .enum_literal, + c.AST_NODE_STRING_LITERAL => .string_literal, + c.AST_NODE_MULTILINE_STRING_LITERAL => .multiline_string_literal, + c.AST_NODE_GROUPED_EXPRESSION => .grouped_expression, + c.AST_NODE_BUILTIN_CALL_TWO => .builtin_call_two, + c.AST_NODE_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma, + c.AST_NODE_BUILTIN_CALL => .builtin_call, + c.AST_NODE_BUILTIN_CALL_COMMA => .builtin_call_comma, + c.AST_NODE_ERROR_SET_DECL => .error_set_decl, + c.AST_NODE_CONTAINER_DECL => .container_decl, + c.AST_NODE_CONTAINER_DECL_TRAILING => .container_decl_trailing, + c.AST_NODE_CONTAINER_DECL_TWO => .container_decl_two, + c.AST_NODE_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing, + c.AST_NODE_CONTAINER_DECL_ARG => .container_decl_arg, + c.AST_NODE_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing, + c.AST_NODE_TAGGED_UNION => .tagged_union, + c.AST_NODE_TAGGED_UNION_TRAILING => .tagged_union_trailing, + c.AST_NODE_TAGGED_UNION_TWO => .tagged_union_two, + c.AST_NODE_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing, + c.AST_NODE_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag, + c.AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing, + c.AST_NODE_CONTAINER_FIELD_INIT => .container_field_init, + c.AST_NODE_CONTAINER_FIELD_ALIGN => .container_field_align, + c.AST_NODE_CONTAINER_FIELD => .container_field, + c.AST_NODE_COMPTIME => .@"comptime", + c.AST_NODE_NOSUSPEND => .@"nosuspend", + c.AST_NODE_BLOCK_TWO => .block_two, + c.AST_NODE_BLOCK_TWO_SEMICOLON => .block_two_semicolon, + c.AST_NODE_BLOCK => .block, + c.AST_NODE_BLOCK_SEMICOLON => .block_semicolon, + c.AST_NODE_ASM_SIMPLE => .asm_simple, + c.AST_NODE_ASM_LEGACY => .asm_legacy, + c.AST_NODE_ASM => .@"asm", + c.AST_NODE_ASM_OUTPUT => .asm_output, + c.AST_NODE_ASM_INPUT => .asm_input, + c.AST_NODE_ERROR_VALUE => .error_value, + c.AST_NODE_ERROR_UNION => .error_union, + else => undefined, + }; +} + +fn toIndex(v: u32) Ast.Node.Index { + return @enumFromInt(v); +} + +fn toOptIndex(v: u32) Ast.Node.OptionalIndex { + return if (v == 0) .none else @enumFromInt(v); +} + +fn toExtraIndex(v: u32) Ast.ExtraIndex { + return @enumFromInt(v); +} + +fn toOptTokenIndex(v: u32) Ast.OptionalTokenIndex { + return @enumFromInt(v); +} + +fn zigData(tag: Ast.Node.Tag, lhs: u32, rhs: u32) Ast.Node.Data { + return switch (tag) { + // data unused + .identifier, + .string_literal, + .char_literal, + .number_literal, + .unreachable_literal, + .anyframe_literal, + .enum_literal, + .error_value, + => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, + + // .node (single node index) + .@"defer", + .@"comptime", + .@"nosuspend", + .@"suspend", + .@"resume", + .bool_not, + .negation, + .bit_not, + .negation_wrap, + .address_of, + .@"try", + .deref, + .optional_type, + => .{ .node = toIndex(lhs) }, + + // .opt_node (single optional node) + .@"return", + => .{ .opt_node = toOptIndex(lhs) }, + + // .node_and_node + .fn_decl, + .container_field_align, + .error_union, + .@"catch", + .equal_equal, + .bang_equal, + .less_than, + .greater_than, + .less_or_equal, + .greater_or_equal, + .assign_mul, + .assign_div, + .assign_mod, + .assign_add, + .assign_sub, + .assign_shl, + .assign_shl_sat, + .assign_shr, + .assign_bit_and, + .assign_bit_xor, + .assign_bit_or, + .assign_mul_wrap, + .assign_add_wrap, + .assign_sub_wrap, + .assign_mul_sat, + .assign_add_sat, + .assign_sub_sat, + .assign, + .merge_error_sets, + .mul, + .div, + .mod, + .array_mult, + .mul_wrap, + .mul_sat, + .add, + .sub, + .array_cat, + .add_wrap, + .sub_wrap, + .add_sat, + .sub_sat, + .shl, + .shl_sat, + .shr, + .bit_and, + .bit_xor, + .bit_or, + .@"orelse", + .bool_and, + .bool_or, + .array_type, + .array_access, + .switch_range, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + // .opt_node_and_opt_node + .fn_proto_simple, + .simple_var_decl, + .block_two, + .block_two_semicolon, + .builtin_call_two, + .builtin_call_two_comma, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + .struct_init_dot_two, + .struct_init_dot_two_comma, + .array_init_dot_two, + .array_init_dot_two_comma, + => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, + + // .node_and_opt_node + .call_one, + .call_one_comma, + .struct_init_one, + .struct_init_one_comma, + .container_field_init, + .aligned_var_decl, + => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, + + // .node_and_node (array_init_one uses node_and_node, not + // node_and_opt_node) + .array_init_one, + .array_init_one_comma, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + // .opt_node_and_node + .ptr_type_aligned, + .ptr_type_sentinel, + .switch_case_one, + .switch_case_inline_one, + => .{ .opt_node_and_node = .{ toOptIndex(lhs), toIndex(rhs) } }, + + // .node_and_extra + .call, + .call_comma, + .container_field, + .array_type_sentinel, + .slice, + .slice_sentinel, + .array_init, + .array_init_comma, + .struct_init, + .struct_init_comma, + .@"switch", + .switch_comma, + .container_decl_arg, + .container_decl_arg_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .@"asm", + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + + // .extra_and_node + .assign_destructure, + .switch_case, + .switch_case_inline, + .ptr_type, + .ptr_type_bit_range, + => .{ .extra_and_node = .{ toExtraIndex(lhs), toIndex(rhs) } }, + + // .extra_and_opt_node + .global_var_decl, + .local_var_decl, + .fn_proto_multi, + .fn_proto_one, + .fn_proto, + => .{ .extra_and_opt_node = .{ toExtraIndex(lhs), toOptIndex(rhs) } }, + + // .extra_range (SubRange) + .root, + .block, + .block_semicolon, + .builtin_call, + .builtin_call_comma, + .container_decl, + .container_decl_trailing, + .tagged_union, + .tagged_union_trailing, + .array_init_dot, + .array_init_dot_comma, + .struct_init_dot, + .struct_init_dot_comma, + => .{ .extra_range = .{ .start = toExtraIndex(lhs), .end = toExtraIndex(rhs) } }, + + // .node_and_token + .grouped_expression, + .asm_input, + .asm_simple, + .field_access, + .unwrap_optional, + => .{ .node_and_token = .{ toIndex(lhs), rhs } }, + + // .opt_node_and_token + .asm_output, + => .{ .opt_node_and_token = .{ toOptIndex(lhs), rhs } }, + + // .opt_token_and_node + .test_decl, + .@"errdefer", + => .{ .opt_token_and_node = .{ toOptTokenIndex(lhs), toIndex(rhs) } }, + + // .opt_token_and_opt_node + .@"break", + .@"continue", + => .{ .opt_token_and_opt_node = .{ toOptTokenIndex(lhs), toOptIndex(rhs) } }, + + // .token_and_token + .error_set_decl, + .multiline_string_literal, + => .{ .token_and_token = .{ lhs, rhs } }, + + // .token_and_node + .anyframe_type, + => .{ .token_and_node = .{ lhs, toIndex(rhs) } }, + + // .node_and_node for slice_open (lhs[rhs..]) + .slice_open, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + .while_simple, + .for_simple, + .if_simple, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + .while_cont, + .@"while", + .@"if", + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + + .for_range, + => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, + + .@"for", + => .{ .@"for" = .{ toExtraIndex(lhs), @bitCast(rhs) } }, + + .asm_legacy, + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + }; +} + +// zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). +pub fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { + var tokens = Ast.TokenList{}; + try tokens.resize(gpa, c_ast.tokens.len); + errdefer tokens.deinit(gpa); + + for (0..c_ast.tokens.len) |i| + tokens.set(i, .{ + .tag = zigToken(c_ast.tokens.tags[i]), + .start = c_ast.tokens.starts[i], + }); + + var nodes = Ast.NodeList{}; + try nodes.resize(gpa, c_ast.nodes.len); + errdefer nodes.deinit(gpa); + + for (0..c_ast.nodes.len) |i| { + const tag = zigNode(c_ast.nodes.tags[i]); + nodes.set(i, .{ + .tag = tag, + .main_token = c_ast.nodes.main_tokens[i], + .data = zigData(tag, c_ast.nodes.datas[i].lhs, c_ast.nodes.datas[i].rhs), + }); + } + + const extra_data = try gpa.alloc(u32, c_ast.extra_data.len); + errdefer gpa.free(extra_data); + @memcpy(extra_data, c_ast.extra_data.arr[0..c_ast.extra_data.len]); + + const errors = if (c_ast.has_error) blk: { + const errs = try gpa.alloc(Ast.Error, 1); + errs[0] = .{ .tag = .expected_token, .token = 0, .extra = .{ .none = {} } }; + break :blk errs; + } else try gpa.alloc(Ast.Error, 0); + errdefer gpa.free(errors); + + return Ast{ + .source = c_ast.source[0..c_ast.source_len :0], + .mode = .zig, + .tokens = tokens.slice(), + .nodes = nodes.slice(), + .extra_data = extra_data, + .errors = errors, + }; +} diff --git a/stage0/test_all.zig b/stage0/test_all.zig new file mode 100644 index 0000000000..560d7d77d4 --- /dev/null +++ b/stage0/test_all.zig @@ -0,0 +1,5 @@ +test "zig0 test suite" { + _ = @import("tokenizer_test.zig"); + _ = @import("parser_test.zig"); + _ = @import("astgen_test.zig"); +} diff --git a/stage0/tokenizer.c b/stage0/tokenizer.c new file mode 100644 index 0000000000..c103160b36 --- /dev/null +++ b/stage0/tokenizer.c @@ -0,0 +1,1096 @@ +#include "common.h" + +#include +#include +#include + +#include "tokenizer.h" + +typedef struct { + const char* keyword; + TokenizerTag tag; +} KeywordMap; + +const char* tokenizerGetTagString(TokenizerTag tag) { + switch (tag) { + TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_CASE) + default: + return "UNKNOWN"; + } +} + +// clang-format off +const KeywordMap keywords[] = { + { "addrspace", TOKEN_KEYWORD_ADDRSPACE }, + { "align", TOKEN_KEYWORD_ALIGN }, + { "allowzero", TOKEN_KEYWORD_ALLOWZERO }, + { "and", TOKEN_KEYWORD_AND }, + { "anyframe", TOKEN_KEYWORD_ANYFRAME }, + { "anytype", TOKEN_KEYWORD_ANYTYPE }, + { "asm", TOKEN_KEYWORD_ASM }, + { "break", TOKEN_KEYWORD_BREAK }, + { "callconv", TOKEN_KEYWORD_CALLCONV }, + { "catch", TOKEN_KEYWORD_CATCH }, + { "comptime", TOKEN_KEYWORD_COMPTIME }, + { "const", TOKEN_KEYWORD_CONST }, + { "continue", TOKEN_KEYWORD_CONTINUE }, + { "defer", TOKEN_KEYWORD_DEFER }, + { "else", TOKEN_KEYWORD_ELSE }, + { "enum", TOKEN_KEYWORD_ENUM }, + { "errdefer", TOKEN_KEYWORD_ERRDEFER }, + { "error", TOKEN_KEYWORD_ERROR }, + { "export", TOKEN_KEYWORD_EXPORT }, + { "extern", TOKEN_KEYWORD_EXTERN }, + { "fn", TOKEN_KEYWORD_FN }, + { "for", TOKEN_KEYWORD_FOR }, + { "if", TOKEN_KEYWORD_IF }, + { "inline", TOKEN_KEYWORD_INLINE }, + { "linksection", TOKEN_KEYWORD_LINKSECTION }, + { "noalias", TOKEN_KEYWORD_NOALIAS }, + { "noinline", TOKEN_KEYWORD_NOINLINE }, + { "nosuspend", TOKEN_KEYWORD_NOSUSPEND }, + { "opaque", TOKEN_KEYWORD_OPAQUE }, + { "or", TOKEN_KEYWORD_OR }, + { "orelse", TOKEN_KEYWORD_ORELSE }, + { "packed", TOKEN_KEYWORD_PACKED }, + { "pub", TOKEN_KEYWORD_PUB }, + { "resume", TOKEN_KEYWORD_RESUME }, + { "return", TOKEN_KEYWORD_RETURN }, + { "struct", TOKEN_KEYWORD_STRUCT }, + { "suspend", TOKEN_KEYWORD_SUSPEND }, + { "switch", TOKEN_KEYWORD_SWITCH }, + { "test", TOKEN_KEYWORD_TEST }, + { "threadlocal", TOKEN_KEYWORD_THREADLOCAL }, + { "try", TOKEN_KEYWORD_TRY }, + { "union", TOKEN_KEYWORD_UNION }, + { "unreachable", TOKEN_KEYWORD_UNREACHABLE }, + { "var", TOKEN_KEYWORD_VAR }, + { "volatile", TOKEN_KEYWORD_VOLATILE }, + { "while", TOKEN_KEYWORD_WHILE } +}; +// clang-format on + +// TODO binary search +static TokenizerTag getKeyword(const char* bytes, const uint32_t len) { + for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) { + size_t klen = strlen(keywords[i].keyword); + size_t minlen = klen < len ? klen : len; + int cmp = strncmp(bytes, keywords[i].keyword, minlen); + if (cmp == 0) { + if (len == klen) { + return keywords[i].tag; + } else if (len < klen) { + return TOKEN_INVALID; + } + // len > klen: input is longer than keyword (e.g., "orelse" vs + // "or"), continue searching. + } else if (cmp < 0) { + return TOKEN_INVALID; + } + } + return TOKEN_INVALID; +} + +Tokenizer tokenizerInit(const char* buffer, const uint32_t len) { + return (Tokenizer) { + .buffer = buffer, + .buffer_len = len, + .index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0, + }; +} + +TokenizerToken tokenizerNext(Tokenizer* self) { + TokenizerToken result = (TokenizerToken) { + .tag = TOKEN_INVALID, + .loc = { + .start = self->index, + }, + }; + + TokenizerState state = TOKENIZER_STATE_START; + +state: + switch (state) { + case TOKENIZER_STATE_START: + switch (self->buffer[self->index]) { + case 0: + if (self->index == self->buffer_len) { + return (TokenizerToken) { .tag = TOKEN_EOF, + .loc = { + .start = self->index, + .end = self->index, + } }; + } else { + state = TOKENIZER_STATE_INVALID; + goto state; + } + case ' ': + case '\n': + case '\t': + case '\r': + self->index++; + result.loc.start = self->index; + goto state; + case '"': + result.tag = TOKEN_STRING_LITERAL; + state = TOKENIZER_STATE_STRING_LITERAL; + goto state; + case '\'': + result.tag = TOKEN_CHAR_LITERAL; + state = TOKENIZER_STATE_CHAR_LITERAL; + goto state; + case 'a' ... 'z': + case 'A' ... 'Z': + case '_': + result.tag = TOKEN_IDENTIFIER; + state = TOKENIZER_STATE_IDENTIFIER; + goto state; + case '@': + state = TOKENIZER_STATE_SAW_AT_SIGN; + goto state; + case '=': + state = TOKENIZER_STATE_EQUAL; + goto state; + case '!': + state = TOKENIZER_STATE_BANG; + goto state; + case '|': + state = TOKENIZER_STATE_PIPE; + goto state; + case '(': + result.tag = TOKEN_L_PAREN; + self->index++; + break; + case ')': + result.tag = TOKEN_R_PAREN; + self->index++; + break; + case '[': + result.tag = TOKEN_L_BRACKET; + self->index++; + break; + case ']': + result.tag = TOKEN_R_BRACKET; + self->index++; + break; + case ';': + result.tag = TOKEN_SEMICOLON; + self->index++; + break; + case ',': + result.tag = TOKEN_COMMA; + self->index++; + break; + case '?': + result.tag = TOKEN_QUESTION_MARK; + self->index++; + break; + case ':': + result.tag = TOKEN_COLON; + self->index++; + break; + case '%': + state = TOKENIZER_STATE_PERCENT; + goto state; + case '*': + state = TOKENIZER_STATE_ASTERISK; + goto state; + case '+': + state = TOKENIZER_STATE_PLUS; + goto state; + case '<': + state = TOKENIZER_STATE_ANGLE_BRACKET_LEFT; + goto state; + case '>': + state = TOKENIZER_STATE_ANGLE_BRACKET_RIGHT; + goto state; + case '^': + state = TOKENIZER_STATE_CARET; + goto state; + case '\\': + result.tag = TOKEN_MULTILINE_STRING_LITERAL_LINE; + state = TOKENIZER_STATE_BACKSLASH; + goto state; + case '{': + result.tag = TOKEN_L_BRACE; + self->index++; + break; + case '}': + result.tag = TOKEN_R_BRACE; + self->index++; + break; + case '~': + result.tag = TOKEN_TILDE; + self->index++; + break; + case '.': + state = TOKENIZER_STATE_PERIOD; + goto state; + case '-': + state = TOKENIZER_STATE_MINUS; + goto state; + case '/': + state = TOKENIZER_STATE_SLASH; + goto state; + case '&': + state = TOKENIZER_STATE_AMPERSAND; + goto state; + case '0' ... '9': + result.tag = TOKEN_NUMBER_LITERAL; + self->index++; + state = TOKENIZER_STATE_INT; + goto state; + default: + state = TOKENIZER_STATE_INVALID; + goto state; + }; + break; + + case TOKENIZER_STATE_EXPECT_NEWLINE: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index == self->buffer_len) { + result.tag = TOKEN_INVALID; + } else { + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + case '\n': + self->index++; + result.loc.start = self->index; + state = TOKENIZER_STATE_START; + goto state; + default: + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + + case TOKENIZER_STATE_INVALID: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index == self->buffer_len) { + result.tag = TOKEN_INVALID; + } else { + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + case '\n': + result.tag = TOKEN_INVALID; + break; + default: + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + + case TOKENIZER_STATE_SAW_AT_SIGN: + self->index++; + switch (self->buffer[self->index]) { + case 0: + case '\n': + result.tag = TOKEN_INVALID; + break; + case '"': + result.tag = TOKEN_IDENTIFIER; + state = TOKENIZER_STATE_STRING_LITERAL; + goto state; + case 'a' ... 'z': + case 'A' ... 'Z': + case '_': + result.tag = TOKEN_BUILTIN; + state = TOKENIZER_STATE_BUILTIN; + goto state; + default: + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + + case TOKENIZER_STATE_AMPERSAND: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_AMPERSAND_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_AMPERSAND; + break; + } + break; + + case TOKENIZER_STATE_ASTERISK: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_ASTERISK_EQUAL; + self->index++; + break; + case '*': + result.tag = TOKEN_ASTERISK_ASTERISK; + self->index++; + break; + case '%': + state = TOKENIZER_STATE_ASTERISK_PERCENT; + goto state; + case '|': + state = TOKENIZER_STATE_ASTERISK_PIPE; + goto state; + default: + result.tag = TOKEN_ASTERISK; + break; + } + break; + + case TOKENIZER_STATE_ASTERISK_PERCENT: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_ASTERISK_PERCENT_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_ASTERISK_PERCENT; + break; + } + break; + + case TOKENIZER_STATE_ASTERISK_PIPE: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_ASTERISK_PIPE_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_ASTERISK_PIPE; + break; + } + break; + + case TOKENIZER_STATE_PERCENT: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_PERCENT_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_PERCENT; + break; + } + break; + + case TOKENIZER_STATE_PLUS: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_PLUS_EQUAL; + self->index++; + break; + case '+': + result.tag = TOKEN_PLUS_PLUS; + self->index++; + break; + case '%': + state = TOKENIZER_STATE_PLUS_PERCENT; + goto state; + case '|': + state = TOKENIZER_STATE_PLUS_PIPE; + goto state; + default: + result.tag = TOKEN_PLUS; + break; + } + break; + + case TOKENIZER_STATE_PLUS_PERCENT: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_PLUS_PERCENT_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_PLUS_PERCENT; + break; + } + break; + + case TOKENIZER_STATE_PLUS_PIPE: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_PLUS_PIPE_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_PLUS_PIPE; + break; + } + break; + + case TOKENIZER_STATE_CARET: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_CARET_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_CARET; + break; + } + break; + + case TOKENIZER_STATE_IDENTIFIER: + self->index++; + switch (self->buffer[self->index]) { + case 'a' ... 'z': + case 'A' ... 'Z': + case '_': + case '0' ... '9': + state = TOKENIZER_STATE_IDENTIFIER; + goto state; + default:; // Once we're at C23, this semicolon can be removed. + const char* start = self->buffer + result.loc.start; + uint32_t len = self->index - result.loc.start; + TokenizerTag tag = getKeyword(start, len); + if (tag != TOKEN_INVALID) + result.tag = tag; + } + break; + + case TOKENIZER_STATE_BUILTIN: + self->index++; + switch (self->buffer[self->index]) { + case 'a' ... 'z': + case 'A' ... 'Z': + case '_': + case '0' ... '9': + state = TOKENIZER_STATE_BUILTIN; + goto state; + break; + } + break; + + case TOKENIZER_STATE_BACKSLASH: + self->index++; + switch (self->buffer[self->index]) { + case 0: + result.tag = TOKEN_INVALID; + break; + case '\\': + state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; + goto state; + case '\n': + result.tag = TOKEN_INVALID; + break; + default: + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + + case TOKENIZER_STATE_STRING_LITERAL: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index != self->buffer_len) { + state = TOKENIZER_STATE_INVALID; + goto state; + } else { + result.tag = TOKEN_INVALID; + } + break; + case '\n': + result.tag = TOKEN_INVALID; + break; + case '\\': + state = TOKENIZER_STATE_STRING_LITERAL_BACKSLASH; + goto state; + case '"': + self->index++; + break; + case 0x01 ... 0x09: + case 0x0b ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + state = TOKENIZER_STATE_STRING_LITERAL; + goto state; + } + break; + + case TOKENIZER_STATE_STRING_LITERAL_BACKSLASH: + self->index++; + switch (self->buffer[self->index]) { + case 0: + case '\n': + result.tag = TOKEN_INVALID; + break; + default: + state = TOKENIZER_STATE_STRING_LITERAL; + goto state; + } + break; + + case TOKENIZER_STATE_CHAR_LITERAL: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index != self->buffer_len) { + state = TOKENIZER_STATE_INVALID; + goto state; + } else { + result.tag = TOKEN_INVALID; + } + break; + case '\n': + result.tag = TOKEN_INVALID; + break; + case '\\': + state = TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH; + goto state; + case '\'': + self->index++; + break; + case 0x01 ... 0x09: + case 0x0b ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + state = TOKENIZER_STATE_CHAR_LITERAL; + goto state; + } + break; + + case TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index != self->buffer_len) { + state = TOKENIZER_STATE_INVALID; + goto state; + } else { + result.tag = TOKEN_INVALID; + } + break; + case '\n': + result.tag = TOKEN_INVALID; + break; + case 0x01 ... 0x09: + case 0x0b ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + state = TOKENIZER_STATE_CHAR_LITERAL; + goto state; + } + break; + + case TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index != self->buffer_len) { + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + case '\n': + break; + case '\r': + if (self->buffer[self->index + 1] != '\n') { + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + case 0x01 ... 0x09: + case 0x0b ... 0x0c: + case 0x0e ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; + goto state; + } + break; + + case TOKENIZER_STATE_BANG: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_BANG_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_BANG; + break; + } + break; + + case TOKENIZER_STATE_PIPE: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_PIPE_EQUAL; + self->index++; + break; + case '|': + result.tag = TOKEN_PIPE_PIPE; + self->index++; + break; + default: + result.tag = TOKEN_PIPE; + break; + } + break; + + case TOKENIZER_STATE_EQUAL: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_EQUAL_EQUAL; + self->index++; + break; + case '>': + result.tag = TOKEN_EQUAL_ANGLE_BRACKET_RIGHT; + self->index++; + break; + default: + result.tag = TOKEN_EQUAL; + break; + } + break; + + case TOKENIZER_STATE_MINUS: + self->index++; + switch (self->buffer[self->index]) { + case '>': + result.tag = TOKEN_ARROW; + self->index++; + break; + case '=': + result.tag = TOKEN_MINUS_EQUAL; + self->index++; + break; + case '%': + state = TOKENIZER_STATE_MINUS_PERCENT; + goto state; + case '|': + state = TOKENIZER_STATE_MINUS_PIPE; + goto state; + default: + result.tag = TOKEN_MINUS; + break; + } + break; + + case TOKENIZER_STATE_MINUS_PERCENT: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_MINUS_PERCENT_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_MINUS_PERCENT; + break; + } + break; + + case TOKENIZER_STATE_MINUS_PIPE: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_MINUS_PIPE_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_MINUS_PIPE; + break; + } + break; + + case TOKENIZER_STATE_ANGLE_BRACKET_LEFT: + self->index++; + switch (self->buffer[self->index]) { + case '<': + state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; + goto state; + case '=': + result.tag = TOKEN_ANGLE_BRACKET_LEFT_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_ANGLE_BRACKET_LEFT; + break; + } + break; + + case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL; + self->index++; + break; + case '|': + state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; + goto state; + default: + result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; + break; + } + break; + + case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; + break; + } + break; + + case TOKENIZER_STATE_ANGLE_BRACKET_RIGHT: + self->index++; + switch (self->buffer[self->index]) { + case '>': + state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; + goto state; + case '=': + result.tag = TOKEN_ANGLE_BRACKET_RIGHT_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_ANGLE_BRACKET_RIGHT; + break; + } + break; + + case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: + self->index++; + switch (self->buffer[self->index]) { + case '=': + result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; + break; + } + break; + + case TOKENIZER_STATE_PERIOD: + self->index++; + switch (self->buffer[self->index]) { + case '.': + state = TOKENIZER_STATE_PERIOD_2; + goto state; + case '*': + state = TOKENIZER_STATE_PERIOD_ASTERISK; + goto state; + default: + result.tag = TOKEN_PERIOD; + break; + } + break; + + case TOKENIZER_STATE_PERIOD_2: + self->index++; + switch (self->buffer[self->index]) { + case '.': + result.tag = TOKEN_ELLIPSIS3; + self->index++; + break; + default: + result.tag = TOKEN_ELLIPSIS2; + break; + } + break; + + case TOKENIZER_STATE_PERIOD_ASTERISK: + self->index++; + switch (self->buffer[self->index]) { + case '*': + result.tag = TOKEN_INVALID_PERIODASTERISKS; + break; + default: + result.tag = TOKEN_PERIOD_ASTERISK; + break; + } + break; + + case TOKENIZER_STATE_SLASH: + self->index++; + switch (self->buffer[self->index]) { + case '/': + state = TOKENIZER_STATE_LINE_COMMENT_START; + goto state; + case '=': + result.tag = TOKEN_SLASH_EQUAL; + self->index++; + break; + default: + result.tag = TOKEN_SLASH; + break; + } + break; + + case TOKENIZER_STATE_LINE_COMMENT_START: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index != self->buffer_len) { + state = TOKENIZER_STATE_INVALID; + goto state; + } else { + return (TokenizerToken) { .tag = TOKEN_EOF, + .loc = { + .start = self->index, + .end = self->index, + } }; + } + break; + case '!': + result.tag = TOKEN_CONTAINER_DOC_COMMENT; + state = TOKENIZER_STATE_DOC_COMMENT; + goto state; + case '\n': + self->index++; + result.loc.start = self->index; + state = TOKENIZER_STATE_START; + goto state; + case '/': + state = TOKENIZER_STATE_DOC_COMMENT_START; + goto state; + case '\r': + state = TOKENIZER_STATE_EXPECT_NEWLINE; + goto state; + case 0x01 ... 0x09: + case 0x0b ... 0x0c: + case 0x0e ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + state = TOKENIZER_STATE_LINE_COMMENT; + goto state; + } + break; + + case TOKENIZER_STATE_DOC_COMMENT_START: + self->index++; + switch (self->buffer[self->index]) { + case 0: + case '\n': + result.tag = TOKEN_DOC_COMMENT; + break; + case '\r': + if (self->buffer[self->index + 1] == '\n') { + result.tag = TOKEN_DOC_COMMENT; + } else { + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + case '/': + state = TOKENIZER_STATE_LINE_COMMENT; + goto state; + case 0x01 ... 0x09: + case 0x0b ... 0x0c: + case 0x0e ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + result.tag = TOKEN_DOC_COMMENT; + state = TOKENIZER_STATE_DOC_COMMENT; + goto state; + } + break; + + case TOKENIZER_STATE_LINE_COMMENT: + self->index++; + switch (self->buffer[self->index]) { + case 0: + if (self->index != self->buffer_len) { + state = TOKENIZER_STATE_INVALID; + goto state; + } else { + return (TokenizerToken) { .tag = TOKEN_EOF, + .loc = { + .start = self->index, + .end = self->index, + } }; + } + break; + case '\n': + self->index++; + result.loc.start = self->index; + state = TOKENIZER_STATE_START; + goto state; + case '\r': + state = TOKENIZER_STATE_EXPECT_NEWLINE; + goto state; + case 0x01 ... 0x09: + case 0x0b ... 0x0c: + case 0x0e ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + state = TOKENIZER_STATE_LINE_COMMENT; + goto state; + } + break; + + case TOKENIZER_STATE_DOC_COMMENT: + self->index++; + switch (self->buffer[self->index]) { + case 0: + case '\n': + break; + case '\r': + if (self->buffer[self->index + 1] != '\n') { + state = TOKENIZER_STATE_INVALID; + goto state; + } + break; + case 0x01 ... 0x09: + case 0x0b ... 0x0c: + case 0x0e ... 0x1f: + case 0x7f: + state = TOKENIZER_STATE_INVALID; + goto state; + default: + state = TOKENIZER_STATE_DOC_COMMENT; + goto state; + } + break; + + case TOKENIZER_STATE_INT: + switch (self->buffer[self->index]) { + case '.': + state = TOKENIZER_STATE_INT_PERIOD; + goto state; + case '_': + case 'a' ... 'd': + case 'f' ... 'o': + case 'q' ... 'z': + case 'A' ... 'D': + case 'F' ... 'O': + case 'Q' ... 'Z': + case '0' ... '9': + self->index++; + state = TOKENIZER_STATE_INT; + goto state; + case 'e': + case 'E': + case 'p': + case 'P': + state = TOKENIZER_STATE_INT_EXPONENT; + goto state; + default: + break; + } + break; + + case TOKENIZER_STATE_INT_EXPONENT: + self->index++; + switch (self->buffer[self->index]) { + case '-': + case '+': + self->index++; + state = TOKENIZER_STATE_FLOAT; + goto state; + default: + state = TOKENIZER_STATE_INT; + goto state; + } + break; + + case TOKENIZER_STATE_INT_PERIOD: + self->index++; + switch (self->buffer[self->index]) { + case '_': + case 'a' ... 'd': + case 'f' ... 'o': + case 'q' ... 'z': + case 'A' ... 'D': + case 'F' ... 'O': + case 'Q' ... 'Z': + case '0' ... '9': + self->index++; + state = TOKENIZER_STATE_FLOAT; + goto state; + case 'e': + case 'E': + case 'p': + case 'P': + state = TOKENIZER_STATE_FLOAT_EXPONENT; + goto state; + default: + self->index--; + break; + } + break; + + case TOKENIZER_STATE_FLOAT: + switch (self->buffer[self->index]) { + case '_': + case 'a' ... 'd': + case 'f' ... 'o': + case 'q' ... 'z': + case 'A' ... 'D': + case 'F' ... 'O': + case 'Q' ... 'Z': + case '0' ... '9': + self->index++; + state = TOKENIZER_STATE_FLOAT; + goto state; + case 'e': + case 'E': + case 'p': + case 'P': + state = TOKENIZER_STATE_FLOAT_EXPONENT; + goto state; + default: + break; + } + break; + + case TOKENIZER_STATE_FLOAT_EXPONENT: + self->index++; + switch (self->buffer[self->index]) { + case '-': + case '+': + self->index++; + state = TOKENIZER_STATE_FLOAT; + goto state; + default: + state = TOKENIZER_STATE_FLOAT; + goto state; + } + break; + } + + result.loc.end = self->index; + + return result; +} diff --git a/stage0/tokenizer.h b/stage0/tokenizer.h new file mode 100644 index 0000000000..4d9c67606f --- /dev/null +++ b/stage0/tokenizer.h @@ -0,0 +1,204 @@ +#ifndef _ZIG0_TOKENIZER_H__ +#define _ZIG0_TOKENIZER_H__ + +#include +#include + +#define TOKENIZER_FOREACH_TAG_ENUM(TAG) \ + TAG(TOKEN_INVALID) \ + TAG(TOKEN_INVALID_PERIODASTERISKS) \ + TAG(TOKEN_IDENTIFIER) \ + TAG(TOKEN_STRING_LITERAL) \ + TAG(TOKEN_MULTILINE_STRING_LITERAL_LINE) \ + TAG(TOKEN_CHAR_LITERAL) \ + TAG(TOKEN_EOF) \ + TAG(TOKEN_BUILTIN) \ + TAG(TOKEN_BANG) \ + TAG(TOKEN_PIPE) \ + TAG(TOKEN_PIPE_PIPE) \ + TAG(TOKEN_PIPE_EQUAL) \ + TAG(TOKEN_EQUAL) \ + TAG(TOKEN_EQUAL_EQUAL) \ + TAG(TOKEN_EQUAL_ANGLE_BRACKET_RIGHT) \ + TAG(TOKEN_BANG_EQUAL) \ + TAG(TOKEN_L_PAREN) \ + TAG(TOKEN_R_PAREN) \ + TAG(TOKEN_SEMICOLON) \ + TAG(TOKEN_PERCENT) \ + TAG(TOKEN_PERCENT_EQUAL) \ + TAG(TOKEN_L_BRACE) \ + TAG(TOKEN_R_BRACE) \ + TAG(TOKEN_L_BRACKET) \ + TAG(TOKEN_R_BRACKET) \ + TAG(TOKEN_PERIOD) \ + TAG(TOKEN_PERIOD_ASTERISK) \ + TAG(TOKEN_ELLIPSIS2) \ + TAG(TOKEN_ELLIPSIS3) \ + TAG(TOKEN_CARET) \ + TAG(TOKEN_CARET_EQUAL) \ + TAG(TOKEN_PLUS) \ + TAG(TOKEN_PLUS_PLUS) \ + TAG(TOKEN_PLUS_EQUAL) \ + TAG(TOKEN_PLUS_PERCENT) \ + TAG(TOKEN_PLUS_PERCENT_EQUAL) \ + TAG(TOKEN_PLUS_PIPE) \ + TAG(TOKEN_PLUS_PIPE_EQUAL) \ + TAG(TOKEN_MINUS) \ + TAG(TOKEN_MINUS_EQUAL) \ + TAG(TOKEN_MINUS_PERCENT) \ + TAG(TOKEN_MINUS_PERCENT_EQUAL) \ + TAG(TOKEN_MINUS_PIPE) \ + TAG(TOKEN_MINUS_PIPE_EQUAL) \ + TAG(TOKEN_ASTERISK) \ + TAG(TOKEN_ASTERISK_EQUAL) \ + TAG(TOKEN_ASTERISK_ASTERISK) \ + TAG(TOKEN_ASTERISK_PERCENT) \ + TAG(TOKEN_ASTERISK_PERCENT_EQUAL) \ + TAG(TOKEN_ASTERISK_PIPE) \ + TAG(TOKEN_ASTERISK_PIPE_EQUAL) \ + TAG(TOKEN_ARROW) \ + TAG(TOKEN_COLON) \ + TAG(TOKEN_SLASH) \ + TAG(TOKEN_SLASH_EQUAL) \ + TAG(TOKEN_COMMA) \ + TAG(TOKEN_AMPERSAND) \ + TAG(TOKEN_AMPERSAND_EQUAL) \ + TAG(TOKEN_QUESTION_MARK) \ + TAG(TOKEN_ANGLE_BRACKET_LEFT) \ + TAG(TOKEN_ANGLE_BRACKET_LEFT_EQUAL) \ + TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT) \ + TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL) \ + TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE) \ + TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL) \ + TAG(TOKEN_ANGLE_BRACKET_RIGHT) \ + TAG(TOKEN_ANGLE_BRACKET_RIGHT_EQUAL) \ + TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT) \ + TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL) \ + TAG(TOKEN_TILDE) \ + TAG(TOKEN_NUMBER_LITERAL) \ + TAG(TOKEN_DOC_COMMENT) \ + TAG(TOKEN_CONTAINER_DOC_COMMENT) \ + TAG(TOKEN_KEYWORD_ADDRSPACE) \ + TAG(TOKEN_KEYWORD_ALIGN) \ + TAG(TOKEN_KEYWORD_ALLOWZERO) \ + TAG(TOKEN_KEYWORD_AND) \ + TAG(TOKEN_KEYWORD_ANYFRAME) \ + TAG(TOKEN_KEYWORD_ANYTYPE) \ + TAG(TOKEN_KEYWORD_ASM) \ + TAG(TOKEN_KEYWORD_BREAK) \ + TAG(TOKEN_KEYWORD_CALLCONV) \ + TAG(TOKEN_KEYWORD_CATCH) \ + TAG(TOKEN_KEYWORD_COMPTIME) \ + TAG(TOKEN_KEYWORD_CONST) \ + TAG(TOKEN_KEYWORD_CONTINUE) \ + TAG(TOKEN_KEYWORD_DEFER) \ + TAG(TOKEN_KEYWORD_ELSE) \ + TAG(TOKEN_KEYWORD_ENUM) \ + TAG(TOKEN_KEYWORD_ERRDEFER) \ + TAG(TOKEN_KEYWORD_ERROR) \ + TAG(TOKEN_KEYWORD_EXPORT) \ + TAG(TOKEN_KEYWORD_EXTERN) \ + TAG(TOKEN_KEYWORD_FN) \ + TAG(TOKEN_KEYWORD_FOR) \ + TAG(TOKEN_KEYWORD_IF) \ + TAG(TOKEN_KEYWORD_INLINE) \ + TAG(TOKEN_KEYWORD_NOALIAS) \ + TAG(TOKEN_KEYWORD_NOINLINE) \ + TAG(TOKEN_KEYWORD_NOSUSPEND) \ + TAG(TOKEN_KEYWORD_OPAQUE) \ + TAG(TOKEN_KEYWORD_OR) \ + TAG(TOKEN_KEYWORD_ORELSE) \ + TAG(TOKEN_KEYWORD_PACKED) \ + TAG(TOKEN_KEYWORD_PUB) \ + TAG(TOKEN_KEYWORD_RESUME) \ + TAG(TOKEN_KEYWORD_RETURN) \ + TAG(TOKEN_KEYWORD_LINKSECTION) \ + TAG(TOKEN_KEYWORD_STRUCT) \ + TAG(TOKEN_KEYWORD_SUSPEND) \ + TAG(TOKEN_KEYWORD_SWITCH) \ + TAG(TOKEN_KEYWORD_TEST) \ + TAG(TOKEN_KEYWORD_THREADLOCAL) \ + TAG(TOKEN_KEYWORD_TRY) \ + TAG(TOKEN_KEYWORD_UNION) \ + TAG(TOKEN_KEYWORD_UNREACHABLE) \ + TAG(TOKEN_KEYWORD_VAR) \ + TAG(TOKEN_KEYWORD_VOLATILE) \ + TAG(TOKEN_KEYWORD_WHILE) + +#define TOKENIZER_GENERATE_ENUM(ENUM) ENUM, +#define TOKENIZER_GENERATE_CASE(ENUM) \ + case ENUM: \ + return #ENUM; + +// First define the enum +typedef enum { + TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_ENUM) +} TokenizerTag; + +const char* tokenizerGetTagString(TokenizerTag tag); + +typedef enum { + TOKENIZER_STATE_START, + TOKENIZER_STATE_EXPECT_NEWLINE, + TOKENIZER_STATE_IDENTIFIER, + TOKENIZER_STATE_BUILTIN, + TOKENIZER_STATE_STRING_LITERAL, + TOKENIZER_STATE_STRING_LITERAL_BACKSLASH, + TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE, + TOKENIZER_STATE_CHAR_LITERAL, + TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH, + TOKENIZER_STATE_BACKSLASH, + TOKENIZER_STATE_EQUAL, + TOKENIZER_STATE_BANG, + TOKENIZER_STATE_PIPE, + TOKENIZER_STATE_MINUS, + TOKENIZER_STATE_MINUS_PERCENT, + TOKENIZER_STATE_MINUS_PIPE, + TOKENIZER_STATE_ASTERISK, + TOKENIZER_STATE_ASTERISK_PERCENT, + TOKENIZER_STATE_ASTERISK_PIPE, + TOKENIZER_STATE_SLASH, + TOKENIZER_STATE_LINE_COMMENT_START, + TOKENIZER_STATE_LINE_COMMENT, + TOKENIZER_STATE_DOC_COMMENT_START, + TOKENIZER_STATE_DOC_COMMENT, + TOKENIZER_STATE_INT, + TOKENIZER_STATE_INT_EXPONENT, + TOKENIZER_STATE_INT_PERIOD, + TOKENIZER_STATE_FLOAT, + TOKENIZER_STATE_FLOAT_EXPONENT, + TOKENIZER_STATE_AMPERSAND, + TOKENIZER_STATE_CARET, + TOKENIZER_STATE_PERCENT, + TOKENIZER_STATE_PLUS, + TOKENIZER_STATE_PLUS_PERCENT, + TOKENIZER_STATE_PLUS_PIPE, + TOKENIZER_STATE_ANGLE_BRACKET_LEFT, + TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, + TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, + TOKENIZER_STATE_ANGLE_BRACKET_RIGHT, + TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, + TOKENIZER_STATE_PERIOD, + TOKENIZER_STATE_PERIOD_2, + TOKENIZER_STATE_PERIOD_ASTERISK, + TOKENIZER_STATE_SAW_AT_SIGN, + TOKENIZER_STATE_INVALID, +} TokenizerState; + +typedef struct { + TokenizerTag tag; + struct { + uint32_t start, end; + } loc; +} TokenizerToken; + +typedef struct { + const char* buffer; + const uint32_t buffer_len; + uint32_t index; +} Tokenizer; + +Tokenizer tokenizerInit(const char* buffer, uint32_t len); +TokenizerToken tokenizerNext(Tokenizer* self); + +#endif diff --git a/stage0/tokenizer_test.zig b/stage0/tokenizer_test.zig new file mode 100644 index 0000000000..e57d1d0713 --- /dev/null +++ b/stage0/tokenizer_test.zig @@ -0,0 +1,767 @@ +const std = @import("std"); +const testing = std.testing; + +const Token = std.zig.Token; +const Tokenizer = std.zig.Tokenizer; + +const c = @cImport({ + @cInclude("tokenizer.h"); +}); + +pub fn zigToken(token: c_uint) Token.Tag { + return switch (token) { + c.TOKEN_INVALID => .invalid, + c.TOKEN_INVALID_PERIODASTERISKS => .invalid_periodasterisks, + c.TOKEN_IDENTIFIER => .identifier, + c.TOKEN_STRING_LITERAL => .string_literal, + c.TOKEN_MULTILINE_STRING_LITERAL_LINE => .multiline_string_literal_line, + c.TOKEN_CHAR_LITERAL => .char_literal, + c.TOKEN_EOF => .eof, + c.TOKEN_BUILTIN => .builtin, + c.TOKEN_BANG => .bang, + c.TOKEN_PIPE => .pipe, + c.TOKEN_PIPE_PIPE => .pipe_pipe, + c.TOKEN_PIPE_EQUAL => .pipe_equal, + c.TOKEN_EQUAL => .equal, + c.TOKEN_EQUAL_EQUAL => .equal_equal, + c.TOKEN_EQUAL_ANGLE_BRACKET_RIGHT => .equal_angle_bracket_right, + c.TOKEN_BANG_EQUAL => .bang_equal, + c.TOKEN_L_PAREN => .l_paren, + c.TOKEN_R_PAREN => .r_paren, + c.TOKEN_SEMICOLON => .semicolon, + c.TOKEN_PERCENT => .percent, + c.TOKEN_PERCENT_EQUAL => .percent_equal, + c.TOKEN_L_BRACE => .l_brace, + c.TOKEN_R_BRACE => .r_brace, + c.TOKEN_L_BRACKET => .l_bracket, + c.TOKEN_R_BRACKET => .r_bracket, + c.TOKEN_PERIOD => .period, + c.TOKEN_PERIOD_ASTERISK => .period_asterisk, + c.TOKEN_ELLIPSIS2 => .ellipsis2, + c.TOKEN_ELLIPSIS3 => .ellipsis3, + c.TOKEN_CARET => .caret, + c.TOKEN_CARET_EQUAL => .caret_equal, + c.TOKEN_PLUS => .plus, + c.TOKEN_PLUS_PLUS => .plus_plus, + c.TOKEN_PLUS_EQUAL => .plus_equal, + c.TOKEN_PLUS_PERCENT => .plus_percent, + c.TOKEN_PLUS_PERCENT_EQUAL => .plus_percent_equal, + c.TOKEN_PLUS_PIPE => .plus_pipe, + c.TOKEN_PLUS_PIPE_EQUAL => .plus_pipe_equal, + c.TOKEN_MINUS => .minus, + c.TOKEN_MINUS_EQUAL => .minus_equal, + c.TOKEN_MINUS_PERCENT => .minus_percent, + c.TOKEN_MINUS_PERCENT_EQUAL => .minus_percent_equal, + c.TOKEN_MINUS_PIPE => .minus_pipe, + c.TOKEN_MINUS_PIPE_EQUAL => .minus_pipe_equal, + c.TOKEN_ASTERISK => .asterisk, + c.TOKEN_ASTERISK_EQUAL => .asterisk_equal, + c.TOKEN_ASTERISK_ASTERISK => .asterisk_asterisk, + c.TOKEN_ASTERISK_PERCENT => .asterisk_percent, + c.TOKEN_ASTERISK_PERCENT_EQUAL => .asterisk_percent_equal, + c.TOKEN_ASTERISK_PIPE => .asterisk_pipe, + c.TOKEN_ASTERISK_PIPE_EQUAL => .asterisk_pipe_equal, + c.TOKEN_ARROW => .arrow, + c.TOKEN_COLON => .colon, + c.TOKEN_SLASH => .slash, + c.TOKEN_SLASH_EQUAL => .slash_equal, + c.TOKEN_COMMA => .comma, + c.TOKEN_AMPERSAND => .ampersand, + c.TOKEN_AMPERSAND_EQUAL => .ampersand_equal, + c.TOKEN_QUESTION_MARK => .question_mark, + c.TOKEN_ANGLE_BRACKET_LEFT => .angle_bracket_left, + c.TOKEN_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_left_equal, + c.TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT => .angle_bracket_angle_bracket_left, + c.TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_angle_bracket_left_equal, + c.TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE => .angle_bracket_angle_bracket_left_pipe, + c.TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL => .angle_bracket_angle_bracket_left_pipe_equal, + c.TOKEN_ANGLE_BRACKET_RIGHT => .angle_bracket_right, + c.TOKEN_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_right_equal, + c.TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT => .angle_bracket_angle_bracket_right, + c.TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_angle_bracket_right_equal, + c.TOKEN_TILDE => .tilde, + c.TOKEN_NUMBER_LITERAL => .number_literal, + c.TOKEN_DOC_COMMENT => .doc_comment, + c.TOKEN_CONTAINER_DOC_COMMENT => .container_doc_comment, + c.TOKEN_KEYWORD_ADDRSPACE => .keyword_addrspace, + c.TOKEN_KEYWORD_ALIGN => .keyword_align, + c.TOKEN_KEYWORD_ALLOWZERO => .keyword_allowzero, + c.TOKEN_KEYWORD_AND => .keyword_and, + c.TOKEN_KEYWORD_ANYFRAME => .keyword_anyframe, + c.TOKEN_KEYWORD_ANYTYPE => .keyword_anytype, + c.TOKEN_KEYWORD_ASM => .keyword_asm, + c.TOKEN_KEYWORD_BREAK => .keyword_break, + c.TOKEN_KEYWORD_CALLCONV => .keyword_callconv, + c.TOKEN_KEYWORD_CATCH => .keyword_catch, + c.TOKEN_KEYWORD_COMPTIME => .keyword_comptime, + c.TOKEN_KEYWORD_CONST => .keyword_const, + c.TOKEN_KEYWORD_CONTINUE => .keyword_continue, + c.TOKEN_KEYWORD_DEFER => .keyword_defer, + c.TOKEN_KEYWORD_ELSE => .keyword_else, + c.TOKEN_KEYWORD_ENUM => .keyword_enum, + c.TOKEN_KEYWORD_ERRDEFER => .keyword_errdefer, + c.TOKEN_KEYWORD_ERROR => .keyword_error, + c.TOKEN_KEYWORD_EXPORT => .keyword_export, + c.TOKEN_KEYWORD_EXTERN => .keyword_extern, + c.TOKEN_KEYWORD_FN => .keyword_fn, + c.TOKEN_KEYWORD_FOR => .keyword_for, + c.TOKEN_KEYWORD_IF => .keyword_if, + c.TOKEN_KEYWORD_INLINE => .keyword_inline, + c.TOKEN_KEYWORD_NOALIAS => .keyword_noalias, + c.TOKEN_KEYWORD_NOINLINE => .keyword_noinline, + c.TOKEN_KEYWORD_NOSUSPEND => .keyword_nosuspend, + c.TOKEN_KEYWORD_OPAQUE => .keyword_opaque, + c.TOKEN_KEYWORD_OR => .keyword_or, + c.TOKEN_KEYWORD_ORELSE => .keyword_orelse, + c.TOKEN_KEYWORD_PACKED => .keyword_packed, + c.TOKEN_KEYWORD_PUB => .keyword_pub, + c.TOKEN_KEYWORD_RESUME => .keyword_resume, + c.TOKEN_KEYWORD_RETURN => .keyword_return, + c.TOKEN_KEYWORD_LINKSECTION => .keyword_linksection, + c.TOKEN_KEYWORD_STRUCT => .keyword_struct, + c.TOKEN_KEYWORD_SUSPEND => .keyword_suspend, + c.TOKEN_KEYWORD_SWITCH => .keyword_switch, + c.TOKEN_KEYWORD_TEST => .keyword_test, + c.TOKEN_KEYWORD_THREADLOCAL => .keyword_threadlocal, + c.TOKEN_KEYWORD_TRY => .keyword_try, + c.TOKEN_KEYWORD_UNION => .keyword_union, + c.TOKEN_KEYWORD_UNREACHABLE => .keyword_unreachable, + c.TOKEN_KEYWORD_VAR => .keyword_var, + c.TOKEN_KEYWORD_VOLATILE => .keyword_volatile, + c.TOKEN_KEYWORD_WHILE => .keyword_while, + else => undefined, + }; +} + +// Copy-pasted from lib/std/zig/tokenizer.zig +fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void { + // Do the C thing + { + var ctokenizer = c.tokenizerInit(source.ptr, @intCast(source.len)); + for (expected_token_tags) |expected_token_tag| { + const token = c.tokenizerNext(&ctokenizer); + try std.testing.expectEqual(expected_token_tag, zigToken(token.tag)); + } + const last_token = c.tokenizerNext(&ctokenizer); + try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag)); + } + + { + var tokenizer = Tokenizer.init(source); + for (expected_token_tags) |expected_token_tag| { + const token = tokenizer.next(); + try std.testing.expectEqual(expected_token_tag, token.tag); + } + // Last token should always be eof, even when the last token was invalid, + // in which case the tokenizer is in an invalid state, which can only be + // recovered by opinionated means outside the scope of this implementation. + const last_token = tokenizer.next(); + try std.testing.expectEqual(Token.Tag.eof, last_token.tag); + try std.testing.expectEqual(source.len, last_token.loc.start); + try std.testing.expectEqual(source.len, last_token.loc.end); + } +} + +test "keywords" { + try testTokenize("test const else", &.{ .keyword_test, .keyword_const, .keyword_else }); +} + +test "line comment followed by top-level comptime" { + try testTokenize( + \\// line comment + \\comptime {} + \\ + , &.{ + .keyword_comptime, + .l_brace, + .r_brace, + }); +} + +test "unknown length pointer and then c pointer" { + try testTokenize( + \\[*]u8 + \\[*c]u8 + , &.{ + .l_bracket, + .asterisk, + .r_bracket, + .identifier, + .l_bracket, + .asterisk, + .identifier, + .r_bracket, + .identifier, + }); +} + +test "code point literal with hex escape" { + try testTokenize( + \\'\x1b' + , &.{.char_literal}); + try testTokenize( + \\'\x1' + , &.{.char_literal}); +} + +test "newline in char literal" { + try testTokenize( + \\' + \\' + , &.{ .invalid, .invalid }); +} + +test "newline in string literal" { + try testTokenize( + \\" + \\" + , &.{ .invalid, .invalid }); +} + +test "code point literal with unicode escapes" { + // Valid unicode escapes + try testTokenize( + \\'\u{3}' + , &.{.char_literal}); + try testTokenize( + \\'\u{01}' + , &.{.char_literal}); + try testTokenize( + \\'\u{2a}' + , &.{.char_literal}); + try testTokenize( + \\'\u{3f9}' + , &.{.char_literal}); + try testTokenize( + \\'\u{6E09aBc1523}' + , &.{.char_literal}); + try testTokenize( + \\"\u{440}" + , &.{.string_literal}); + + // Invalid unicode escapes + try testTokenize( + \\'\u' + , &.{.char_literal}); + try testTokenize( + \\'\u{{' + , &.{.char_literal}); + try testTokenize( + \\'\u{}' + , &.{.char_literal}); + try testTokenize( + \\'\u{s}' + , &.{.char_literal}); + try testTokenize( + \\'\u{2z}' + , &.{.char_literal}); + try testTokenize( + \\'\u{4a' + , &.{.char_literal}); + + // Test old-style unicode literals + try testTokenize( + \\'\u0333' + , &.{.char_literal}); + try testTokenize( + \\'\U0333' + , &.{.char_literal}); +} + +test "code point literal with unicode code point" { + try testTokenize( + \\'💩' + , &.{.char_literal}); +} + +test "float literal e exponent" { + try testTokenize("a = 4.94065645841246544177e-324;\n", &.{ + .identifier, + .equal, + .number_literal, + .semicolon, + }); +} + +test "float literal p exponent" { + try testTokenize("a = 0x1.a827999fcef32p+1022;\n", &.{ + .identifier, + .equal, + .number_literal, + .semicolon, + }); +} + +test "chars" { + try testTokenize("'c'", &.{.char_literal}); +} + +test "invalid token characters" { + try testTokenize("#", &.{.invalid}); + try testTokenize("`", &.{.invalid}); + try testTokenize("'c", &.{.invalid}); + try testTokenize("'", &.{.invalid}); + try testTokenize("''", &.{.char_literal}); + try testTokenize("'\n'", &.{ .invalid, .invalid }); +} + +test "invalid literal/comment characters" { + try testTokenize("\"\x00\"", &.{.invalid}); + try testTokenize("`\x00`", &.{.invalid}); + try testTokenize("//\x00", &.{.invalid}); + try testTokenize("//\x1f", &.{.invalid}); + try testTokenize("//\x7f", &.{.invalid}); +} + +test "utf8" { + try testTokenize("//\xc2\x80", &.{}); + try testTokenize("//\xf4\x8f\xbf\xbf", &.{}); +} + +test "invalid utf8" { + try testTokenize("//\x80", &.{}); + try testTokenize("//\xbf", &.{}); + try testTokenize("//\xf8", &.{}); + try testTokenize("//\xff", &.{}); + try testTokenize("//\xc2\xc0", &.{}); + try testTokenize("//\xe0", &.{}); + try testTokenize("//\xf0", &.{}); + try testTokenize("//\xf0\x90\x80\xc0", &.{}); +} + +test "illegal unicode codepoints" { + // unicode newline characters.U+0085, U+2028, U+2029 + try testTokenize("//\xc2\x84", &.{}); + try testTokenize("//\xc2\x85", &.{}); + try testTokenize("//\xc2\x86", &.{}); + try testTokenize("//\xe2\x80\xa7", &.{}); + try testTokenize("//\xe2\x80\xa8", &.{}); + try testTokenize("//\xe2\x80\xa9", &.{}); + try testTokenize("//\xe2\x80\xaa", &.{}); +} + +test "string identifier and builtin fns" { + try testTokenize( + \\const @"if" = @import("std"); + , &.{ + .keyword_const, + .identifier, + .equal, + .builtin, + .l_paren, + .string_literal, + .r_paren, + .semicolon, + }); +} + +test "pipe and then invalid" { + try testTokenize("||=", &.{ + .pipe_pipe, + .equal, + }); +} + +test "line comment and doc comment" { + try testTokenize("//", &.{}); + try testTokenize("// a / b", &.{}); + try testTokenize("// /", &.{}); + try testTokenize("/// a", &.{.doc_comment}); + try testTokenize("///", &.{.doc_comment}); + try testTokenize("////", &.{}); + try testTokenize("//!", &.{.container_doc_comment}); + try testTokenize("//!!", &.{.container_doc_comment}); +} + +test "line comment followed by identifier" { + try testTokenize( + \\ Unexpected, + \\ // another + \\ Another, + , &.{ + .identifier, + .comma, + .identifier, + .comma, + }); +} + +test "UTF-8 BOM is recognized and skipped" { + try testTokenize("\xEF\xBB\xBFa;\n", &.{ + .identifier, + .semicolon, + }); +} + +test "correctly parse pointer assignment" { + try testTokenize("b.*=3;\n", &.{ + .identifier, + .period_asterisk, + .equal, + .number_literal, + .semicolon, + }); +} + +test "correctly parse pointer dereference followed by asterisk" { + try testTokenize("\"b\".* ** 10", &.{ + .string_literal, + .period_asterisk, + .asterisk_asterisk, + .number_literal, + }); + + try testTokenize("(\"b\".*)** 10", &.{ + .l_paren, + .string_literal, + .period_asterisk, + .r_paren, + .asterisk_asterisk, + .number_literal, + }); + + try testTokenize("\"b\".*** 10", &.{ + .string_literal, + .invalid_periodasterisks, + .asterisk_asterisk, + .number_literal, + }); +} + +test "range literals" { + try testTokenize("0...9", &.{ .number_literal, .ellipsis3, .number_literal }); + try testTokenize("'0'...'9'", &.{ .char_literal, .ellipsis3, .char_literal }); + try testTokenize("0x00...0x09", &.{ .number_literal, .ellipsis3, .number_literal }); + try testTokenize("0b00...0b11", &.{ .number_literal, .ellipsis3, .number_literal }); + try testTokenize("0o00...0o11", &.{ .number_literal, .ellipsis3, .number_literal }); +} + +test "number literals decimal" { + try testTokenize("0", &.{.number_literal}); + try testTokenize("1", &.{.number_literal}); + try testTokenize("2", &.{.number_literal}); + try testTokenize("3", &.{.number_literal}); + try testTokenize("4", &.{.number_literal}); + try testTokenize("5", &.{.number_literal}); + try testTokenize("6", &.{.number_literal}); + try testTokenize("7", &.{.number_literal}); + try testTokenize("8", &.{.number_literal}); + try testTokenize("9", &.{.number_literal}); + try testTokenize("1..", &.{ .number_literal, .ellipsis2 }); + try testTokenize("0a", &.{.number_literal}); + try testTokenize("9b", &.{.number_literal}); + try testTokenize("1z", &.{.number_literal}); + try testTokenize("1z_1", &.{.number_literal}); + try testTokenize("9z3", &.{.number_literal}); + + try testTokenize("0_0", &.{.number_literal}); + try testTokenize("0001", &.{.number_literal}); + try testTokenize("01234567890", &.{.number_literal}); + try testTokenize("012_345_6789_0", &.{.number_literal}); + try testTokenize("0_1_2_3_4_5_6_7_8_9_0", &.{.number_literal}); + + try testTokenize("00_", &.{.number_literal}); + try testTokenize("0_0_", &.{.number_literal}); + try testTokenize("0__0", &.{.number_literal}); + try testTokenize("0_0f", &.{.number_literal}); + try testTokenize("0_0_f", &.{.number_literal}); + try testTokenize("0_0_f_00", &.{.number_literal}); + try testTokenize("1_,", &.{ .number_literal, .comma }); + + try testTokenize("0.0", &.{.number_literal}); + try testTokenize("1.0", &.{.number_literal}); + try testTokenize("10.0", &.{.number_literal}); + try testTokenize("0e0", &.{.number_literal}); + try testTokenize("1e0", &.{.number_literal}); + try testTokenize("1e100", &.{.number_literal}); + try testTokenize("1.0e100", &.{.number_literal}); + try testTokenize("1.0e+100", &.{.number_literal}); + try testTokenize("1.0e-100", &.{.number_literal}); + try testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &.{.number_literal}); + + try testTokenize("1.", &.{ .number_literal, .period }); + try testTokenize("1e", &.{.number_literal}); + try testTokenize("1.e100", &.{.number_literal}); + try testTokenize("1.0e1f0", &.{.number_literal}); + try testTokenize("1.0p100", &.{.number_literal}); + try testTokenize("1.0p-100", &.{.number_literal}); + try testTokenize("1.0p1f0", &.{.number_literal}); + try testTokenize("1.0_,", &.{ .number_literal, .comma }); + try testTokenize("1_.0", &.{.number_literal}); + try testTokenize("1._", &.{.number_literal}); + try testTokenize("1.a", &.{.number_literal}); + try testTokenize("1.z", &.{.number_literal}); + try testTokenize("1._0", &.{.number_literal}); + try testTokenize("1.+", &.{ .number_literal, .period, .plus }); + try testTokenize("1._+", &.{ .number_literal, .plus }); + try testTokenize("1._e", &.{.number_literal}); + try testTokenize("1.0e", &.{.number_literal}); + try testTokenize("1.0e,", &.{ .number_literal, .comma }); + try testTokenize("1.0e_", &.{.number_literal}); + try testTokenize("1.0e+_", &.{.number_literal}); + try testTokenize("1.0e-_", &.{.number_literal}); + try testTokenize("1.0e0_+", &.{ .number_literal, .plus }); +} + +test "number literals binary" { + try testTokenize("0b0", &.{.number_literal}); + try testTokenize("0b1", &.{.number_literal}); + try testTokenize("0b2", &.{.number_literal}); + try testTokenize("0b3", &.{.number_literal}); + try testTokenize("0b4", &.{.number_literal}); + try testTokenize("0b5", &.{.number_literal}); + try testTokenize("0b6", &.{.number_literal}); + try testTokenize("0b7", &.{.number_literal}); + try testTokenize("0b8", &.{.number_literal}); + try testTokenize("0b9", &.{.number_literal}); + try testTokenize("0ba", &.{.number_literal}); + try testTokenize("0bb", &.{.number_literal}); + try testTokenize("0bc", &.{.number_literal}); + try testTokenize("0bd", &.{.number_literal}); + try testTokenize("0be", &.{.number_literal}); + try testTokenize("0bf", &.{.number_literal}); + try testTokenize("0bz", &.{.number_literal}); + + try testTokenize("0b0000_0000", &.{.number_literal}); + try testTokenize("0b1111_1111", &.{.number_literal}); + try testTokenize("0b10_10_10_10", &.{.number_literal}); + try testTokenize("0b0_1_0_1_0_1_0_1", &.{.number_literal}); + try testTokenize("0b1.", &.{ .number_literal, .period }); + try testTokenize("0b1.0", &.{.number_literal}); + + try testTokenize("0B0", &.{.number_literal}); + try testTokenize("0b_", &.{.number_literal}); + try testTokenize("0b_0", &.{.number_literal}); + try testTokenize("0b1_", &.{.number_literal}); + try testTokenize("0b0__1", &.{.number_literal}); + try testTokenize("0b0_1_", &.{.number_literal}); + try testTokenize("0b1e", &.{.number_literal}); + try testTokenize("0b1p", &.{.number_literal}); + try testTokenize("0b1e0", &.{.number_literal}); + try testTokenize("0b1p0", &.{.number_literal}); + try testTokenize("0b1_,", &.{ .number_literal, .comma }); +} + +test "number literals octal" { + try testTokenize("0o0", &.{.number_literal}); + try testTokenize("0o1", &.{.number_literal}); + try testTokenize("0o2", &.{.number_literal}); + try testTokenize("0o3", &.{.number_literal}); + try testTokenize("0o4", &.{.number_literal}); + try testTokenize("0o5", &.{.number_literal}); + try testTokenize("0o6", &.{.number_literal}); + try testTokenize("0o7", &.{.number_literal}); + try testTokenize("0o8", &.{.number_literal}); + try testTokenize("0o9", &.{.number_literal}); + try testTokenize("0oa", &.{.number_literal}); + try testTokenize("0ob", &.{.number_literal}); + try testTokenize("0oc", &.{.number_literal}); + try testTokenize("0od", &.{.number_literal}); + try testTokenize("0oe", &.{.number_literal}); + try testTokenize("0of", &.{.number_literal}); + try testTokenize("0oz", &.{.number_literal}); + + try testTokenize("0o01234567", &.{.number_literal}); + try testTokenize("0o0123_4567", &.{.number_literal}); + try testTokenize("0o01_23_45_67", &.{.number_literal}); + try testTokenize("0o0_1_2_3_4_5_6_7", &.{.number_literal}); + try testTokenize("0o7.", &.{ .number_literal, .period }); + try testTokenize("0o7.0", &.{.number_literal}); + + try testTokenize("0O0", &.{.number_literal}); + try testTokenize("0o_", &.{.number_literal}); + try testTokenize("0o_0", &.{.number_literal}); + try testTokenize("0o1_", &.{.number_literal}); + try testTokenize("0o0__1", &.{.number_literal}); + try testTokenize("0o0_1_", &.{.number_literal}); + try testTokenize("0o1e", &.{.number_literal}); + try testTokenize("0o1p", &.{.number_literal}); + try testTokenize("0o1e0", &.{.number_literal}); + try testTokenize("0o1p0", &.{.number_literal}); + try testTokenize("0o_,", &.{ .number_literal, .comma }); +} + +test "number literals hexadecimal" { + try testTokenize("0x0", &.{.number_literal}); + try testTokenize("0x1", &.{.number_literal}); + try testTokenize("0x2", &.{.number_literal}); + try testTokenize("0x3", &.{.number_literal}); + try testTokenize("0x4", &.{.number_literal}); + try testTokenize("0x5", &.{.number_literal}); + try testTokenize("0x6", &.{.number_literal}); + try testTokenize("0x7", &.{.number_literal}); + try testTokenize("0x8", &.{.number_literal}); + try testTokenize("0x9", &.{.number_literal}); + try testTokenize("0xa", &.{.number_literal}); + try testTokenize("0xb", &.{.number_literal}); + try testTokenize("0xc", &.{.number_literal}); + try testTokenize("0xd", &.{.number_literal}); + try testTokenize("0xe", &.{.number_literal}); + try testTokenize("0xf", &.{.number_literal}); + try testTokenize("0xA", &.{.number_literal}); + try testTokenize("0xB", &.{.number_literal}); + try testTokenize("0xC", &.{.number_literal}); + try testTokenize("0xD", &.{.number_literal}); + try testTokenize("0xE", &.{.number_literal}); + try testTokenize("0xF", &.{.number_literal}); + try testTokenize("0x0z", &.{.number_literal}); + try testTokenize("0xz", &.{.number_literal}); + + try testTokenize("0x0123456789ABCDEF", &.{.number_literal}); + try testTokenize("0x0123_4567_89AB_CDEF", &.{.number_literal}); + try testTokenize("0x01_23_45_67_89AB_CDE_F", &.{.number_literal}); + try testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &.{.number_literal}); + + try testTokenize("0X0", &.{.number_literal}); + try testTokenize("0x_", &.{.number_literal}); + try testTokenize("0x_1", &.{.number_literal}); + try testTokenize("0x1_", &.{.number_literal}); + try testTokenize("0x0__1", &.{.number_literal}); + try testTokenize("0x0_1_", &.{.number_literal}); + try testTokenize("0x_,", &.{ .number_literal, .comma }); + + try testTokenize("0x1.0", &.{.number_literal}); + try testTokenize("0xF.0", &.{.number_literal}); + try testTokenize("0xF.F", &.{.number_literal}); + try testTokenize("0xF.Fp0", &.{.number_literal}); + try testTokenize("0xF.FP0", &.{.number_literal}); + try testTokenize("0x1p0", &.{.number_literal}); + try testTokenize("0xfp0", &.{.number_literal}); + try testTokenize("0x1.0+0xF.0", &.{ .number_literal, .plus, .number_literal }); + + try testTokenize("0x1.", &.{ .number_literal, .period }); + try testTokenize("0xF.", &.{ .number_literal, .period }); + try testTokenize("0x1.+0xF.", &.{ .number_literal, .period, .plus, .number_literal, .period }); + try testTokenize("0xff.p10", &.{.number_literal}); + + try testTokenize("0x0123456.789ABCDEF", &.{.number_literal}); + try testTokenize("0x0_123_456.789_ABC_DEF", &.{.number_literal}); + try testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &.{.number_literal}); + try testTokenize("0x0p0", &.{.number_literal}); + try testTokenize("0x0.0p0", &.{.number_literal}); + try testTokenize("0xff.ffp10", &.{.number_literal}); + try testTokenize("0xff.ffP10", &.{.number_literal}); + try testTokenize("0xffp10", &.{.number_literal}); + try testTokenize("0xff_ff.ff_ffp1_0_0_0", &.{.number_literal}); + try testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &.{.number_literal}); + try testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &.{.number_literal}); + + try testTokenize("0x1e", &.{.number_literal}); + try testTokenize("0x1e0", &.{.number_literal}); + try testTokenize("0x1p", &.{.number_literal}); + try testTokenize("0xfp0z1", &.{.number_literal}); + try testTokenize("0xff.ffpff", &.{.number_literal}); + try testTokenize("0x0.p", &.{.number_literal}); + try testTokenize("0x0.z", &.{.number_literal}); + try testTokenize("0x0._", &.{.number_literal}); + try testTokenize("0x0_.0", &.{.number_literal}); + try testTokenize("0x0_.0.0", &.{ .number_literal, .period, .number_literal }); + try testTokenize("0x0._0", &.{.number_literal}); + try testTokenize("0x0.0_", &.{.number_literal}); + try testTokenize("0x0_p0", &.{.number_literal}); + try testTokenize("0x0_.p0", &.{.number_literal}); + try testTokenize("0x0._p0", &.{.number_literal}); + try testTokenize("0x0.0_p0", &.{.number_literal}); + try testTokenize("0x0._0p0", &.{.number_literal}); + try testTokenize("0x0.0p_0", &.{.number_literal}); + try testTokenize("0x0.0p+_0", &.{.number_literal}); + try testTokenize("0x0.0p-_0", &.{.number_literal}); + try testTokenize("0x0.0p0_", &.{.number_literal}); +} + +test "multi line string literal with only 1 backslash" { + try testTokenize("x \\\n;", &.{ .identifier, .invalid, .semicolon }); +} + +test "invalid builtin identifiers" { + try testTokenize("@()", &.{.invalid}); + try testTokenize("@0()", &.{.invalid}); +} + +test "invalid token with unfinished escape right before eof" { + try testTokenize("\"\\", &.{.invalid}); + try testTokenize("'\\", &.{.invalid}); + try testTokenize("'\\u", &.{.invalid}); +} + +test "saturating operators" { + try testTokenize("<<", &.{.angle_bracket_angle_bracket_left}); + try testTokenize("<<|", &.{.angle_bracket_angle_bracket_left_pipe}); + try testTokenize("<<|=", &.{.angle_bracket_angle_bracket_left_pipe_equal}); + + try testTokenize("*", &.{.asterisk}); + try testTokenize("*|", &.{.asterisk_pipe}); + try testTokenize("*|=", &.{.asterisk_pipe_equal}); + + try testTokenize("+", &.{.plus}); + try testTokenize("+|", &.{.plus_pipe}); + try testTokenize("+|=", &.{.plus_pipe_equal}); + + try testTokenize("-", &.{.minus}); + try testTokenize("-|", &.{.minus_pipe}); + try testTokenize("-|=", &.{.minus_pipe_equal}); +} + +test "null byte before eof" { + try testTokenize("123 \x00 456", &.{ .number_literal, .invalid }); + try testTokenize("//\x00", &.{.invalid}); + try testTokenize("\\\\\x00", &.{.invalid}); + try testTokenize("\x00", &.{.invalid}); + try testTokenize("// NUL\x00\n", &.{.invalid}); + try testTokenize("///\x00\n", &.{ .doc_comment, .invalid }); + try testTokenize("/// NUL\x00\n", &.{ .doc_comment, .invalid }); +} + +test "invalid tabs and carriage returns" { + // "Inside Line Comments and Documentation Comments, Any TAB is rejected by + // the grammar since it is ambiguous how it should be rendered." + // https://github.com/ziglang/zig-spec/issues/38 + try testTokenize("//\t", &.{.invalid}); + try testTokenize("// \t", &.{.invalid}); + try testTokenize("///\t", &.{.invalid}); + try testTokenize("/// \t", &.{.invalid}); + try testTokenize("//!\t", &.{.invalid}); + try testTokenize("//! \t", &.{.invalid}); + + // "Inside Line Comments and Documentation Comments, CR directly preceding + // NL is unambiguously part of the newline sequence. It is accepted by the + // grammar and removed by zig fmt, leaving only NL. CR anywhere else is + // rejected by the grammar." + // https://github.com/ziglang/zig-spec/issues/38 + try testTokenize("//\r", &.{.invalid}); + try testTokenize("// \r", &.{.invalid}); + try testTokenize("///\r", &.{.invalid}); + try testTokenize("/// \r", &.{.invalid}); + try testTokenize("//\r ", &.{.invalid}); + try testTokenize("// \r ", &.{.invalid}); + try testTokenize("///\r ", &.{.invalid}); + try testTokenize("/// \r ", &.{.invalid}); + try testTokenize("//\r\n", &.{}); + try testTokenize("// \r\n", &.{}); + try testTokenize("///\r\n", &.{.doc_comment}); + try testTokenize("/// \r\n", &.{.doc_comment}); + try testTokenize("//!\r", &.{.invalid}); + try testTokenize("//! \r", &.{.invalid}); + try testTokenize("//!\r ", &.{.invalid}); + try testTokenize("//! \r ", &.{.invalid}); + try testTokenize("//!\r\n", &.{.container_doc_comment}); + try testTokenize("//! \r\n", &.{.container_doc_comment}); + + // The control characters TAB and CR are rejected by the grammar inside multi-line string literals, + // except if CR is directly before NL. + // https://github.com/ziglang/zig-spec/issues/38 + try testTokenize("\\\\\r", &.{.invalid}); + try testTokenize("\\\\\r ", &.{.invalid}); + try testTokenize("\\\\ \r", &.{.invalid}); + try testTokenize("\\\\\t", &.{.invalid}); + try testTokenize("\\\\\t ", &.{.invalid}); + try testTokenize("\\\\ \t", &.{.invalid}); + try testTokenize("\\\\\r\n", &.{.multiline_string_literal_line}); + + // "TAB used as whitespace is...accepted by the grammar. CR used as + // whitespace, whether directly preceding NL or stray, is...accepted by the + // grammar." + // https://github.com/ziglang/zig-spec/issues/38 + try testTokenize("\tpub\tswitch\t", &.{ .keyword_pub, .keyword_switch }); + try testTokenize("\rpub\rswitch\r", &.{ .keyword_pub, .keyword_switch }); +} diff --git a/stage0/zig-interp.txt b/stage0/zig-interp.txt new file mode 100644 index 0000000000..2e41715c81 --- /dev/null +++ b/stage0/zig-interp.txt @@ -0,0 +1,5 @@ +1. implement @panic, write a test that does it. +2. local variables. +3. control flow. +4. functions. +5. imports until one can import stdlib. diff --git a/stage0/zig0.c b/stage0/zig0.c new file mode 100644 index 0000000000..528305a9bb --- /dev/null +++ b/stage0/zig0.c @@ -0,0 +1,59 @@ +#include "common.h" + +#include +#include +#include + +// API: +// - code = 0: program successfully terminated. +// - code = 1: panicked, panic message in msg. Caller should free msg. +// - code = 2: interpreter error, error in msg. Caller should free msg. +static int zig0Run(const char* program, char** msg) { + (void)program; + (void)msg; + return 0; +} + +// API: run and: +// code = 3: abnormal error, expect something in stderr. +int zig0RunFile(const char* fname, char** msg) { + FILE* f = fopen(fname, "r"); + if (f == NULL) { + perror("fopen"); + return 3; + } + fseek(f, 0, SEEK_END); + long fsizel = ftell(f); + if (fsizel == -1) { + perror("ftell"); + fclose(f); + return 3; + } + unsigned long fsize = (unsigned long)fsizel; + fseek(f, 0, SEEK_SET); + + char* program = malloc(fsize + 1); + if (program == NULL) { + perror("malloc"); + fclose(f); + return 3; + } + + size_t bytes_read = fread(program, 1, fsize, f); + if (bytes_read < fsize) { + if (ferror(f)) { + perror("fread"); + } else { + fprintf(stderr, "Unexpected end of file\n"); + } + free(program); + fclose(f); + return 3; + } + fclose(f); + program[fsize] = 0; + + int code = zig0Run(program, msg); + free(program); + return code; +} diff --git a/stage0/zir.c b/stage0/zir.c new file mode 100644 index 0000000000..8e6b406aaf --- /dev/null +++ b/stage0/zir.c @@ -0,0 +1,19 @@ +#include "zir.h" +#include + +void zirDeinit(Zir* zir) { + free(zir->inst_tags); + free(zir->inst_datas); + free(zir->extra); + free(zir->string_bytes); + zir->inst_tags = NULL; + zir->inst_datas = NULL; + zir->extra = NULL; + zir->string_bytes = NULL; + zir->inst_len = 0; + zir->inst_cap = 0; + zir->extra_len = 0; + zir->extra_cap = 0; + zir->string_bytes_len = 0; + zir->string_bytes_cap = 0; +} diff --git a/stage0/zir.h b/stage0/zir.h new file mode 100644 index 0000000000..10950e1249 --- /dev/null +++ b/stage0/zir.h @@ -0,0 +1,544 @@ +// zir.h — ZIR data structures, ported from lib/std/zig/Zir.zig. +#ifndef _ZIG0_ZIR_H__ +#define _ZIG0_ZIR_H__ + +#include "common.h" +#include +#include + +// --- ZIR instruction tags (uint8_t) --- +// Matches Zir.Inst.Tag enum order from Zir.zig. + +#define ZIR_INST_FOREACH_TAG(TAG) \ + TAG(ZIR_INST_ADD) \ + TAG(ZIR_INST_ADDWRAP) \ + TAG(ZIR_INST_ADD_SAT) \ + TAG(ZIR_INST_ADD_UNSAFE) \ + TAG(ZIR_INST_SUB) \ + TAG(ZIR_INST_SUBWRAP) \ + TAG(ZIR_INST_SUB_SAT) \ + TAG(ZIR_INST_MUL) \ + TAG(ZIR_INST_MULWRAP) \ + TAG(ZIR_INST_MUL_SAT) \ + TAG(ZIR_INST_DIV_EXACT) \ + TAG(ZIR_INST_DIV_FLOOR) \ + TAG(ZIR_INST_DIV_TRUNC) \ + TAG(ZIR_INST_MOD) \ + TAG(ZIR_INST_REM) \ + TAG(ZIR_INST_MOD_REM) \ + TAG(ZIR_INST_SHL) \ + TAG(ZIR_INST_SHL_EXACT) \ + TAG(ZIR_INST_SHL_SAT) \ + TAG(ZIR_INST_SHR) \ + TAG(ZIR_INST_SHR_EXACT) \ + TAG(ZIR_INST_PARAM) \ + TAG(ZIR_INST_PARAM_COMPTIME) \ + TAG(ZIR_INST_PARAM_ANYTYPE) \ + TAG(ZIR_INST_PARAM_ANYTYPE_COMPTIME) \ + TAG(ZIR_INST_ARRAY_CAT) \ + TAG(ZIR_INST_ARRAY_MUL) \ + TAG(ZIR_INST_ARRAY_TYPE) \ + TAG(ZIR_INST_ARRAY_TYPE_SENTINEL) \ + TAG(ZIR_INST_VECTOR_TYPE) \ + TAG(ZIR_INST_ELEM_TYPE) \ + TAG(ZIR_INST_INDEXABLE_PTR_ELEM_TYPE) \ + TAG(ZIR_INST_SPLAT_OP_RESULT_TY) \ + TAG(ZIR_INST_INDEXABLE_PTR_LEN) \ + TAG(ZIR_INST_ANYFRAME_TYPE) \ + TAG(ZIR_INST_AS_NODE) \ + TAG(ZIR_INST_AS_SHIFT_OPERAND) \ + TAG(ZIR_INST_BIT_AND) \ + TAG(ZIR_INST_BITCAST) \ + TAG(ZIR_INST_BIT_NOT) \ + TAG(ZIR_INST_BIT_OR) \ + TAG(ZIR_INST_BLOCK) \ + TAG(ZIR_INST_BLOCK_COMPTIME) \ + TAG(ZIR_INST_BLOCK_INLINE) \ + TAG(ZIR_INST_DECLARATION) \ + TAG(ZIR_INST_SUSPEND_BLOCK) \ + TAG(ZIR_INST_BOOL_NOT) \ + TAG(ZIR_INST_BOOL_BR_AND) \ + TAG(ZIR_INST_BOOL_BR_OR) \ + TAG(ZIR_INST_BREAK) \ + TAG(ZIR_INST_BREAK_INLINE) \ + TAG(ZIR_INST_SWITCH_CONTINUE) \ + TAG(ZIR_INST_CHECK_COMPTIME_CONTROL_FLOW) \ + TAG(ZIR_INST_CALL) \ + TAG(ZIR_INST_FIELD_CALL) \ + TAG(ZIR_INST_BUILTIN_CALL) \ + TAG(ZIR_INST_CMP_LT) \ + TAG(ZIR_INST_CMP_LTE) \ + TAG(ZIR_INST_CMP_EQ) \ + TAG(ZIR_INST_CMP_GTE) \ + TAG(ZIR_INST_CMP_GT) \ + TAG(ZIR_INST_CMP_NEQ) \ + TAG(ZIR_INST_CONDBR) \ + TAG(ZIR_INST_CONDBR_INLINE) \ + TAG(ZIR_INST_TRY) \ + TAG(ZIR_INST_TRY_PTR) \ + TAG(ZIR_INST_ERROR_SET_DECL) \ + TAG(ZIR_INST_DBG_STMT) \ + TAG(ZIR_INST_DBG_VAR_PTR) \ + TAG(ZIR_INST_DBG_VAR_VAL) \ + TAG(ZIR_INST_DECL_REF) \ + TAG(ZIR_INST_DECL_VAL) \ + TAG(ZIR_INST_LOAD) \ + TAG(ZIR_INST_DIV) \ + TAG(ZIR_INST_ELEM_PTR_NODE) \ + TAG(ZIR_INST_ELEM_PTR) \ + TAG(ZIR_INST_ELEM_VAL_NODE) \ + TAG(ZIR_INST_ELEM_VAL) \ + TAG(ZIR_INST_ELEM_VAL_IMM) \ + TAG(ZIR_INST_ENSURE_RESULT_USED) \ + TAG(ZIR_INST_ENSURE_RESULT_NON_ERROR) \ + TAG(ZIR_INST_ENSURE_ERR_UNION_PAYLOAD_VOID) \ + TAG(ZIR_INST_ERROR_UNION_TYPE) \ + TAG(ZIR_INST_ERROR_VALUE) \ + TAG(ZIR_INST_EXPORT) \ + TAG(ZIR_INST_FIELD_PTR) \ + TAG(ZIR_INST_FIELD_VAL) \ + TAG(ZIR_INST_FIELD_PTR_NAMED) \ + TAG(ZIR_INST_FIELD_VAL_NAMED) \ + TAG(ZIR_INST_FUNC) \ + TAG(ZIR_INST_FUNC_INFERRED) \ + TAG(ZIR_INST_FUNC_FANCY) \ + TAG(ZIR_INST_IMPORT) \ + TAG(ZIR_INST_INT) \ + TAG(ZIR_INST_INT_BIG) \ + TAG(ZIR_INST_FLOAT) \ + TAG(ZIR_INST_FLOAT128) \ + TAG(ZIR_INST_INT_TYPE) \ + TAG(ZIR_INST_IS_NON_NULL) \ + TAG(ZIR_INST_IS_NON_NULL_PTR) \ + TAG(ZIR_INST_IS_NON_ERR) \ + TAG(ZIR_INST_IS_NON_ERR_PTR) \ + TAG(ZIR_INST_RET_IS_NON_ERR) \ + TAG(ZIR_INST_LOOP) \ + TAG(ZIR_INST_REPEAT) \ + TAG(ZIR_INST_REPEAT_INLINE) \ + TAG(ZIR_INST_FOR_LEN) \ + TAG(ZIR_INST_MERGE_ERROR_SETS) \ + TAG(ZIR_INST_REF) \ + TAG(ZIR_INST_RET_NODE) \ + TAG(ZIR_INST_RET_LOAD) \ + TAG(ZIR_INST_RET_IMPLICIT) \ + TAG(ZIR_INST_RET_ERR_VALUE) \ + TAG(ZIR_INST_RET_ERR_VALUE_CODE) \ + TAG(ZIR_INST_RET_PTR) \ + TAG(ZIR_INST_RET_TYPE) \ + TAG(ZIR_INST_PTR_TYPE) \ + TAG(ZIR_INST_SLICE_START) \ + TAG(ZIR_INST_SLICE_END) \ + TAG(ZIR_INST_SLICE_SENTINEL) \ + TAG(ZIR_INST_SLICE_LENGTH) \ + TAG(ZIR_INST_SLICE_SENTINEL_TY) \ + TAG(ZIR_INST_STORE_NODE) \ + TAG(ZIR_INST_STORE_TO_INFERRED_PTR) \ + TAG(ZIR_INST_STR) \ + TAG(ZIR_INST_NEGATE) \ + TAG(ZIR_INST_NEGATE_WRAP) \ + TAG(ZIR_INST_TYPEOF) \ + TAG(ZIR_INST_TYPEOF_BUILTIN) \ + TAG(ZIR_INST_TYPEOF_LOG2_INT_TYPE) \ + TAG(ZIR_INST_UNREACHABLE) \ + TAG(ZIR_INST_XOR) \ + TAG(ZIR_INST_OPTIONAL_TYPE) \ + TAG(ZIR_INST_OPTIONAL_PAYLOAD_SAFE) \ + TAG(ZIR_INST_OPTIONAL_PAYLOAD_UNSAFE) \ + TAG(ZIR_INST_OPTIONAL_PAYLOAD_SAFE_PTR) \ + TAG(ZIR_INST_OPTIONAL_PAYLOAD_UNSAFE_PTR) \ + TAG(ZIR_INST_ERR_UNION_PAYLOAD_UNSAFE) \ + TAG(ZIR_INST_ERR_UNION_PAYLOAD_UNSAFE_PTR) \ + TAG(ZIR_INST_ERR_UNION_CODE) \ + TAG(ZIR_INST_ERR_UNION_CODE_PTR) \ + TAG(ZIR_INST_ENUM_LITERAL) \ + TAG(ZIR_INST_DECL_LITERAL) \ + TAG(ZIR_INST_DECL_LITERAL_NO_COERCE) \ + TAG(ZIR_INST_SWITCH_BLOCK) \ + TAG(ZIR_INST_SWITCH_BLOCK_REF) \ + TAG(ZIR_INST_SWITCH_BLOCK_ERR_UNION) \ + TAG(ZIR_INST_VALIDATE_DEREF) \ + TAG(ZIR_INST_VALIDATE_DESTRUCTURE) \ + TAG(ZIR_INST_FIELD_TYPE_REF) \ + TAG(ZIR_INST_OPT_EU_BASE_PTR_INIT) \ + TAG(ZIR_INST_COERCE_PTR_ELEM_TY) \ + TAG(ZIR_INST_VALIDATE_REF_TY) \ + TAG(ZIR_INST_VALIDATE_CONST) \ + TAG(ZIR_INST_STRUCT_INIT_EMPTY) \ + TAG(ZIR_INST_STRUCT_INIT_EMPTY_RESULT) \ + TAG(ZIR_INST_STRUCT_INIT_EMPTY_REF_RESULT) \ + TAG(ZIR_INST_STRUCT_INIT_ANON) \ + TAG(ZIR_INST_STRUCT_INIT) \ + TAG(ZIR_INST_STRUCT_INIT_REF) \ + TAG(ZIR_INST_VALIDATE_STRUCT_INIT_TY) \ + TAG(ZIR_INST_VALIDATE_STRUCT_INIT_RESULT_TY) \ + TAG(ZIR_INST_VALIDATE_PTR_STRUCT_INIT) \ + TAG(ZIR_INST_STRUCT_INIT_FIELD_TYPE) \ + TAG(ZIR_INST_STRUCT_INIT_FIELD_PTR) \ + TAG(ZIR_INST_ARRAY_INIT_ANON) \ + TAG(ZIR_INST_ARRAY_INIT) \ + TAG(ZIR_INST_ARRAY_INIT_REF) \ + TAG(ZIR_INST_VALIDATE_ARRAY_INIT_TY) \ + TAG(ZIR_INST_VALIDATE_ARRAY_INIT_RESULT_TY) \ + TAG(ZIR_INST_VALIDATE_ARRAY_INIT_REF_TY) \ + TAG(ZIR_INST_VALIDATE_PTR_ARRAY_INIT) \ + TAG(ZIR_INST_ARRAY_INIT_ELEM_TYPE) \ + TAG(ZIR_INST_ARRAY_INIT_ELEM_PTR) \ + TAG(ZIR_INST_UNION_INIT) \ + TAG(ZIR_INST_TYPE_INFO) \ + TAG(ZIR_INST_SIZE_OF) \ + TAG(ZIR_INST_BIT_SIZE_OF) \ + TAG(ZIR_INST_INT_FROM_PTR) \ + TAG(ZIR_INST_COMPILE_ERROR) \ + TAG(ZIR_INST_SET_EVAL_BRANCH_QUOTA) \ + TAG(ZIR_INST_INT_FROM_ENUM) \ + TAG(ZIR_INST_ALIGN_OF) \ + TAG(ZIR_INST_INT_FROM_BOOL) \ + TAG(ZIR_INST_EMBED_FILE) \ + TAG(ZIR_INST_ERROR_NAME) \ + TAG(ZIR_INST_PANIC) \ + TAG(ZIR_INST_TRAP) \ + TAG(ZIR_INST_SET_RUNTIME_SAFETY) \ + TAG(ZIR_INST_SQRT) \ + TAG(ZIR_INST_SIN) \ + TAG(ZIR_INST_COS) \ + TAG(ZIR_INST_TAN) \ + TAG(ZIR_INST_EXP) \ + TAG(ZIR_INST_EXP2) \ + TAG(ZIR_INST_LOG) \ + TAG(ZIR_INST_LOG2) \ + TAG(ZIR_INST_LOG10) \ + TAG(ZIR_INST_ABS) \ + TAG(ZIR_INST_FLOOR) \ + TAG(ZIR_INST_CEIL) \ + TAG(ZIR_INST_TRUNC) \ + TAG(ZIR_INST_ROUND) \ + TAG(ZIR_INST_TAG_NAME) \ + TAG(ZIR_INST_TYPE_NAME) \ + TAG(ZIR_INST_FRAME_TYPE) \ + TAG(ZIR_INST_INT_FROM_FLOAT) \ + TAG(ZIR_INST_FLOAT_FROM_INT) \ + TAG(ZIR_INST_PTR_FROM_INT) \ + TAG(ZIR_INST_ENUM_FROM_INT) \ + TAG(ZIR_INST_FLOAT_CAST) \ + TAG(ZIR_INST_INT_CAST) \ + TAG(ZIR_INST_PTR_CAST) \ + TAG(ZIR_INST_TRUNCATE) \ + TAG(ZIR_INST_HAS_DECL) \ + TAG(ZIR_INST_HAS_FIELD) \ + TAG(ZIR_INST_CLZ) \ + TAG(ZIR_INST_CTZ) \ + TAG(ZIR_INST_POP_COUNT) \ + TAG(ZIR_INST_BYTE_SWAP) \ + TAG(ZIR_INST_BIT_REVERSE) \ + TAG(ZIR_INST_BIT_OFFSET_OF) \ + TAG(ZIR_INST_OFFSET_OF) \ + TAG(ZIR_INST_SPLAT) \ + TAG(ZIR_INST_REDUCE) \ + TAG(ZIR_INST_SHUFFLE) \ + TAG(ZIR_INST_ATOMIC_LOAD) \ + TAG(ZIR_INST_ATOMIC_RMW) \ + TAG(ZIR_INST_ATOMIC_STORE) \ + TAG(ZIR_INST_MUL_ADD) \ + TAG(ZIR_INST_MEMCPY) \ + TAG(ZIR_INST_MEMMOVE) \ + TAG(ZIR_INST_MEMSET) \ + TAG(ZIR_INST_MIN) \ + TAG(ZIR_INST_MAX) \ + TAG(ZIR_INST_C_IMPORT) \ + TAG(ZIR_INST_ALLOC) \ + TAG(ZIR_INST_ALLOC_MUT) \ + TAG(ZIR_INST_ALLOC_COMPTIME_MUT) \ + TAG(ZIR_INST_ALLOC_INFERRED) \ + TAG(ZIR_INST_ALLOC_INFERRED_MUT) \ + TAG(ZIR_INST_ALLOC_INFERRED_COMPTIME) \ + TAG(ZIR_INST_ALLOC_INFERRED_COMPTIME_MUT) \ + TAG(ZIR_INST_RESOLVE_INFERRED_ALLOC) \ + TAG(ZIR_INST_MAKE_PTR_CONST) \ + TAG(ZIR_INST_RESUME) \ + TAG(ZIR_INST_DEFER) \ + TAG(ZIR_INST_DEFER_ERR_CODE) \ + TAG(ZIR_INST_SAVE_ERR_RET_INDEX) \ + TAG(ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL) \ + TAG(ZIR_INST_RESTORE_ERR_RET_INDEX_FN_ENTRY) \ + TAG(ZIR_INST_EXTENDED) + +#define ZIR_GENERATE_ENUM(e) e, +typedef enum { ZIR_INST_FOREACH_TAG(ZIR_GENERATE_ENUM) } ZirInstTag; + +// --- ZIR extended opcodes (uint16_t) --- +// Matches Zir.Inst.Extended enum order from Zir.zig. + +#define ZIR_EXT_FOREACH_TAG(TAG) \ + TAG(ZIR_EXT_STRUCT_DECL) \ + TAG(ZIR_EXT_ENUM_DECL) \ + TAG(ZIR_EXT_UNION_DECL) \ + TAG(ZIR_EXT_OPAQUE_DECL) \ + TAG(ZIR_EXT_TUPLE_DECL) \ + TAG(ZIR_EXT_THIS) \ + TAG(ZIR_EXT_RET_ADDR) \ + TAG(ZIR_EXT_BUILTIN_SRC) \ + TAG(ZIR_EXT_ERROR_RETURN_TRACE) \ + TAG(ZIR_EXT_FRAME) \ + TAG(ZIR_EXT_FRAME_ADDRESS) \ + TAG(ZIR_EXT_ALLOC) \ + TAG(ZIR_EXT_BUILTIN_EXTERN) \ + TAG(ZIR_EXT_ASM) \ + TAG(ZIR_EXT_ASM_EXPR) \ + TAG(ZIR_EXT_COMPILE_LOG) \ + TAG(ZIR_EXT_TYPEOF_PEER) \ + TAG(ZIR_EXT_MIN_MULTI) \ + TAG(ZIR_EXT_MAX_MULTI) \ + TAG(ZIR_EXT_ADD_WITH_OVERFLOW) \ + TAG(ZIR_EXT_SUB_WITH_OVERFLOW) \ + TAG(ZIR_EXT_MUL_WITH_OVERFLOW) \ + TAG(ZIR_EXT_SHL_WITH_OVERFLOW) \ + TAG(ZIR_EXT_C_UNDEF) \ + TAG(ZIR_EXT_C_INCLUDE) \ + TAG(ZIR_EXT_C_DEFINE) \ + TAG(ZIR_EXT_WASM_MEMORY_SIZE) \ + TAG(ZIR_EXT_WASM_MEMORY_GROW) \ + TAG(ZIR_EXT_PREFETCH) \ + TAG(ZIR_EXT_SET_FLOAT_MODE) \ + TAG(ZIR_EXT_ERROR_CAST) \ + TAG(ZIR_EXT_BREAKPOINT) \ + TAG(ZIR_EXT_DISABLE_INSTRUMENTATION) \ + TAG(ZIR_EXT_DISABLE_INTRINSICS) \ + TAG(ZIR_EXT_SELECT) \ + TAG(ZIR_EXT_INT_FROM_ERROR) \ + TAG(ZIR_EXT_ERROR_FROM_INT) \ + TAG(ZIR_EXT_REIFY) \ + TAG(ZIR_EXT_CMPXCHG) \ + TAG(ZIR_EXT_C_VA_ARG) \ + TAG(ZIR_EXT_C_VA_COPY) \ + TAG(ZIR_EXT_C_VA_END) \ + TAG(ZIR_EXT_C_VA_START) \ + TAG(ZIR_EXT_PTR_CAST_FULL) \ + TAG(ZIR_EXT_PTR_CAST_NO_DEST) \ + TAG(ZIR_EXT_WORK_ITEM_ID) \ + TAG(ZIR_EXT_WORK_GROUP_SIZE) \ + TAG(ZIR_EXT_WORK_GROUP_ID) \ + TAG(ZIR_EXT_IN_COMPTIME) \ + TAG(ZIR_EXT_RESTORE_ERR_RET_INDEX) \ + TAG(ZIR_EXT_CLOSURE_GET) \ + TAG(ZIR_EXT_VALUE_PLACEHOLDER) \ + TAG(ZIR_EXT_FIELD_PARENT_PTR) \ + TAG(ZIR_EXT_BUILTIN_VALUE) \ + TAG(ZIR_EXT_BRANCH_HINT) \ + TAG(ZIR_EXT_INPLACE_ARITH_RESULT_TY) \ + TAG(ZIR_EXT_DBG_EMPTY_STMT) \ + TAG(ZIR_EXT_ASTGEN_ERROR) + +#define ZIR_EXT_GENERATE_ENUM(e) e, +typedef enum { ZIR_EXT_FOREACH_TAG(ZIR_EXT_GENERATE_ENUM) } ZirInstExtended; + +// --- ZIR instruction data (8-byte union) --- +// Matches Zir.Inst.Data union from Zir.zig. + +typedef uint32_t ZirInstIndex; +typedef uint32_t ZirInstRef; + +typedef union { + struct { + uint16_t opcode; + uint16_t small; + uint32_t operand; + } extended; + struct { + int32_t src_node; + ZirInstRef operand; + } un_node; + struct { + int32_t src_tok; + ZirInstRef operand; + } un_tok; + struct { + int32_t src_node; + uint32_t payload_index; + } pl_node; + struct { + int32_t src_tok; + uint32_t payload_index; + } pl_tok; + struct { + ZirInstRef lhs; + ZirInstRef rhs; + } bin; + struct { + uint32_t start; + uint32_t len; + } str; + struct { + uint32_t start; + int32_t src_tok; + } str_tok; + int32_t tok; + int32_t node; + uint64_t int_val; + double float_val; + struct { + uint8_t flags; + uint8_t size; + uint16_t _pad; + uint32_t payload_index; + } ptr_type; + struct { + int32_t src_node; + uint16_t bit_count; + uint8_t signedness; + uint8_t _pad; + } int_type; + struct { + int32_t src_node; + uint32_t _pad; + } unreachable_data; + struct { + ZirInstRef operand; + uint32_t payload_index; + } break_data; + struct { + uint32_t line; + uint32_t column; + } dbg_stmt; + struct { + int32_t src_node; + ZirInstIndex inst; + } inst_node; + struct { + uint32_t str; + ZirInstRef operand; + } str_op; + struct { + uint32_t index; + uint32_t len; + } defer_data; + struct { + ZirInstRef err_code; + uint32_t payload_index; + } defer_err_code; + struct { + ZirInstRef operand; + uint32_t _pad; + } save_err_ret_index; + struct { + ZirInstRef operand; + uint32_t idx; + } elem_val_imm; + struct { + uint32_t src_node; + uint32_t payload_index; + } declaration; +} ZirInstData; + +// --- ZIR built-in refs --- +// Matches Zir.Inst.Ref enum from Zir.zig. +// Values below REF_START_INDEX are InternPool indices. + +#define ZIR_REF_START_INDEX 124 +#define ZIR_REF_NONE UINT32_MAX +#define ZIR_MAIN_STRUCT_INST 0 + +// Zir.Inst.Ref enum values (matching Zig enum order in Zir.zig). +// Types (0-103). +#define ZIR_REF_U1_TYPE 2 +#define ZIR_REF_U8_TYPE 3 +#define ZIR_REF_I8_TYPE 4 +#define ZIR_REF_U16_TYPE 5 +#define ZIR_REF_I16_TYPE 6 +#define ZIR_REF_U29_TYPE 7 +#define ZIR_REF_U32_TYPE 8 +#define ZIR_REF_I32_TYPE 9 +#define ZIR_REF_U64_TYPE 10 +#define ZIR_REF_I64_TYPE 11 +#define ZIR_REF_U128_TYPE 13 +#define ZIR_REF_I128_TYPE 14 +#define ZIR_REF_USIZE_TYPE 16 +#define ZIR_REF_ISIZE_TYPE 17 +#define ZIR_REF_C_CHAR_TYPE 18 +#define ZIR_REF_C_SHORT_TYPE 19 +#define ZIR_REF_C_USHORT_TYPE 20 +#define ZIR_REF_C_INT_TYPE 21 +#define ZIR_REF_C_UINT_TYPE 22 +#define ZIR_REF_C_LONG_TYPE 23 +#define ZIR_REF_C_ULONG_TYPE 24 +#define ZIR_REF_C_LONGLONG_TYPE 25 +#define ZIR_REF_C_ULONGLONG_TYPE 26 +#define ZIR_REF_C_LONGDOUBLE_TYPE 27 +#define ZIR_REF_F16_TYPE 28 +#define ZIR_REF_F32_TYPE 29 +#define ZIR_REF_F64_TYPE 30 +#define ZIR_REF_F80_TYPE 31 +#define ZIR_REF_F128_TYPE 32 +#define ZIR_REF_ANYOPAQUE_TYPE 33 +#define ZIR_REF_BOOL_TYPE 34 +#define ZIR_REF_VOID_TYPE 35 +#define ZIR_REF_TYPE_TYPE 36 +#define ZIR_REF_ANYERROR_TYPE 37 +#define ZIR_REF_COMPTIME_INT_TYPE 38 +#define ZIR_REF_COMPTIME_FLOAT_TYPE 39 +#define ZIR_REF_NORETURN_TYPE 40 +#define ZIR_REF_ANYFRAME_TYPE 41 +#define ZIR_REF_NULL_TYPE 42 +#define ZIR_REF_UNDEFINED_TYPE 43 +#define ZIR_REF_ENUM_LITERAL_TYPE 44 +#define ZIR_REF_PTR_USIZE_TYPE 45 +#define ZIR_REF_PTR_CONST_COMPTIME_INT_TYPE 46 +#define ZIR_REF_MANYPTR_U8_TYPE 47 +#define ZIR_REF_MANYPTR_CONST_U8_TYPE 48 +#define ZIR_REF_MANYPTR_CONST_U8_SENTINEL_0_TYPE 49 +#define ZIR_REF_SLICE_CONST_U8_TYPE 50 +#define ZIR_REF_SLICE_CONST_U8_SENTINEL_0_TYPE 51 +#define ZIR_REF_ANYERROR_VOID_ERROR_UNION_TYPE 100 +#define ZIR_REF_GENERIC_POISON_TYPE 102 +#define ZIR_REF_EMPTY_TUPLE_TYPE 103 +// Values (104-123). +#define ZIR_REF_UNDEF 104 +#define ZIR_REF_UNDEF_BOOL 105 +#define ZIR_REF_UNDEF_USIZE 106 +#define ZIR_REF_UNDEF_U1 107 +#define ZIR_REF_ZERO 108 +#define ZIR_REF_ZERO_USIZE 109 +#define ZIR_REF_ZERO_U1 110 +#define ZIR_REF_ZERO_U8 111 +#define ZIR_REF_ONE 112 +#define ZIR_REF_ONE_USIZE 113 +#define ZIR_REF_ONE_U1 114 +#define ZIR_REF_ONE_U8 115 +#define ZIR_REF_FOUR_U8 116 +#define ZIR_REF_NEGATIVE_ONE 117 +#define ZIR_REF_VOID_VALUE 118 +#define ZIR_REF_UNREACHABLE_VALUE 119 +#define ZIR_REF_NULL_VALUE 120 +#define ZIR_REF_BOOL_TRUE 121 +#define ZIR_REF_BOOL_FALSE 122 +#define ZIR_REF_EMPTY_TUPLE 123 + +// Ast.Node.OptionalOffset.none = maxInt(i32). +#define AST_NODE_OFFSET_NONE ((int32_t)0x7FFFFFFF) + +// --- Extra indices reserved at the start of extra[] --- +// Matches Zir.ExtraIndex enum from Zir.zig. + +#define ZIR_EXTRA_COMPILE_ERRORS 0 +#define ZIR_EXTRA_IMPORTS 1 +#define ZIR_EXTRA_RESERVED_COUNT 2 + +// --- Zir output structure --- + +typedef struct { + uint32_t inst_len; + uint32_t inst_cap; + ZirInstTag* inst_tags; + ZirInstData* inst_datas; + uint32_t extra_len; + uint32_t extra_cap; + uint32_t* extra; + uint32_t string_bytes_len; + uint32_t string_bytes_cap; + uint8_t* string_bytes; + bool has_compile_errors; +} Zir; + +void zirDeinit(Zir* zir); + +#endif