From befbe18ebc89b719e1f6ec9a6c48d2b588de8d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 13 Feb 2026 21:22:53 +0000 Subject: [PATCH 1/4] parser: fix while-type-expr continue parsing and comptime labeled blocks Fix two parser bugs found by auditing against upstream Parse.zig: 1. In parseTypeExpr's while case, the continue expression was parsed inline as `eatToken(COLON) ? expectExpr : 0` which missed the required parentheses. Use parseWhileContinueExpr(p) instead, matching what parseWhileExpr already does. 2. In expectStatement, comptime blocks used parseBlock() which only matches `{ ... }`. Use parseBlockExpr() to also recognize labeled blocks like `comptime label: { ... }`. Co-Authored-By: Claude Opus 4.6 --- parser.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/parser.c b/parser.c index 1aeca2ade5..19c5f63d00 100644 --- a/parser.c +++ b/parser.c @@ -711,7 +711,7 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { const AstTokenIndex comptime_token = eatToken(p, TOKEN_KEYWORD_COMPTIME); if (comptime_token != null_token) { // comptime followed by block => comptime block statement - const AstNodeIndex block = parseBlock(p); + const AstNodeIndex block = parseBlockExpr(p); if (block != 0) { return addNode(&p->nodes, (AstNodeItem) { @@ -1728,8 +1728,7 @@ static AstNodeIndex parseTypeExpr(Parser* p) { const AstNodeIndex condition = expectExpr(p); expectToken(p, TOKEN_R_PAREN); parsePtrPayload(p); - const AstNodeIndex cont_expr - = eatToken(p, TOKEN_COLON) != null_token ? expectExpr(p) : 0; + const AstNodeIndex cont_expr = parseWhileContinueExpr(p); const AstNodeIndex body = parseTypeExpr(p); if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) { parsePayload(p); From 52bfd87de71976773135e1204124c0180ed3a7c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 13 Feb 2026 21:48:16 +0000 Subject: [PATCH 2/4] astgen: fix defer RL annotation, compile_error noreturn, block force_comptime Three bugs found by auditing against upstream AstGen.zig/AstRlAnnotate.zig: 1. rlExpr: defer was recursing into nd.rhs (always 0) instead of nd.lhs (the actual deferred expression), so the RL annotation pass never visited defer bodies. 2. addEnsureResult: compile_error was missing from the noreturn instruction list, causing spurious ensure_result_used instructions to be emitted after @compileError calls. 3. blockExprExpr: force_comptime was derived from gz->is_comptime, but upstream blockExpr always passes force_comptime=false to labeledBlockExpr. This caused labeled blocks in comptime contexts to incorrectly emit BLOCK_COMPTIME + BREAK_INLINE instead of BLOCK + BREAK. Co-Authored-By: Claude Opus 4.6 --- astgen.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/astgen.c b/astgen.c index 288b2ac01c..8f000d2e28 100644 --- a/astgen.c +++ b/astgen.c @@ -4762,7 +4762,7 @@ static uint32_t blockExprExpr( } // Labeled block (AstGen.zig:2466-2536). - bool force_comptime = gz->is_comptime; + // Note: upstream blockExpr always passes force_comptime=false. uint32_t label_token = lbrace - 2; // Compute break result info (AstGen.zig:2484-2492). @@ -4771,15 +4771,10 @@ static uint32_t blockExprExpr( bool need_result_rvalue = (break_ri.tag != rl.tag); // Reserve the block instruction (AstGen.zig:2500-2501). - ZirInstTag block_tag - = force_comptime ? ZIR_INST_BLOCK_COMPTIME : ZIR_INST_BLOCK; - uint32_t block_inst = makeBlockInst(ag, block_tag, gz, node); + uint32_t block_inst = makeBlockInst(ag, ZIR_INST_BLOCK, gz, node); gzAppendInstruction(gz, block_inst); GenZir block_scope = makeSubBlock(gz, scope); - block_scope.is_inline = force_comptime; // AstGen.zig:2503 - if (force_comptime) - block_scope.is_comptime = true; // Set label on block_scope (AstGen.zig:2504-2508). block_scope.label_token = label_token; block_scope.label_block_inst = block_inst; @@ -4790,29 +4785,19 @@ static uint32_t blockExprExpr( if (!endsWithNoReturn(&block_scope)) { // Emit restore_err_ret_index (AstGen.zig:2515). - if (!force_comptime) { - ZirInstData rdata; - rdata.un_node.operand = block_inst + ZIR_REF_START_INDEX; - rdata.un_node.src_node - = (int32_t)node - (int32_t)gz->decl_node_index; - addInstruction( - gz, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); - } + ZirInstData rdata; + rdata.un_node.operand = block_inst + ZIR_REF_START_INDEX; + rdata.un_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + addInstruction( + gz, ZIR_INST_RESTORE_ERR_RET_INDEX_UNCONDITIONAL, rdata); // rvalue + break (AstGen.zig:2516-2518). uint32_t result = rvalue( gz, block_scope.break_result_info, ZIR_REF_VOID_VALUE, node); - ZirInstTag break_tag - = force_comptime ? ZIR_INST_BREAK_INLINE : ZIR_INST_BREAK; - addBreak( - &block_scope, break_tag, block_inst, result, AST_NODE_OFFSET_NONE); + addBreak(&block_scope, ZIR_INST_BREAK, block_inst, result, + AST_NODE_OFFSET_NONE); } - if (force_comptime) { - setBlockComptimeBody( - ag, &block_scope, block_inst, COMPTIME_REASON_COMPTIME_KEYWORD); - } else { - setBlockBody(ag, &block_scope, block_inst); - } + setBlockBody(ag, &block_scope, block_inst); // AstGen.zig:2531-2534. if (need_result_rvalue) @@ -6602,6 +6587,7 @@ static bool addEnsureResult( case ZIR_INST_TRAP: case ZIR_INST_CHECK_COMPTIME_CONTROL_FLOW: case ZIR_INST_SWITCH_CONTINUE: + case ZIR_INST_COMPILE_ERROR: is_noreturn = true; elide_check = true; break; @@ -9577,7 +9563,7 @@ static bool rlExpr( // defer (AstRlAnnotate.zig:148-151). case AST_NODE_DEFER: - (void)rlExpr(ag, nd.rhs, block, RL_RI_NONE); + (void)rlExpr(ag, nd.lhs, block, RL_RI_NONE); return false; // container_field (AstRlAnnotate.zig:153-167). From 5672cd73bc50948cd1cdd2ee898b4e6333ebbfb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 13 Feb 2026 22:05:01 +0000 Subject: [PATCH 3/4] astgen: fix builtin_call ensure_result_used bit position BuiltinCall.Flags has ensure_result_used at bit 1, not bit 3 like Call/FieldCall. Separate the case to use the correct bit. Co-Authored-By: Claude Opus 4.6 --- astgen.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/astgen.c b/astgen.c index 8f000d2e28..5fa717f394 100644 --- a/astgen.c +++ b/astgen.c @@ -6560,17 +6560,23 @@ static bool addEnsureResult( uint32_t inst = maybe_unused_result - ZIR_REF_START_INDEX; ZirInstTag tag = ag->inst_tags[inst]; switch (tag) { - // For call/field_call/builtin_call: set ensure_result_used flag - // (bit 3 of flags at offset 0). Flags *must* be at offset 0 in all - // three structs (AstGen.zig:2658-2665, Zir.zig:3022). + // For call/field_call: set ensure_result_used flag + // (bit 3 of flags at offset 0). Flags *must* be at offset 0 + // (AstGen.zig:2658-2665, Zir.zig:3022). case ZIR_INST_CALL: - case ZIR_INST_FIELD_CALL: - case ZIR_INST_BUILTIN_CALL: { + case ZIR_INST_FIELD_CALL: { uint32_t pi = ag->inst_datas[inst].pl_node.payload_index; ag->extra[pi] |= (1u << 3); // ensure_result_used elide_check = true; break; } + // For builtin_call: ensure_result_used is at bit 1, not bit 3. + case ZIR_INST_BUILTIN_CALL: { + uint32_t pi = ag->inst_datas[inst].pl_node.payload_index; + ag->extra[pi] |= (1u << 1); // ensure_result_used + elide_check = true; + break; + } // Always noreturn → elide (AstGen.zig:2909). case ZIR_INST_BREAK: case ZIR_INST_BREAK_INLINE: From 0b12e027a3628e26a765126d9937a2366b638ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 13 Feb 2026 22:07:23 +0000 Subject: [PATCH 4/4] astgen: add \u{...} unicode escape sequence handling Port the \u{NNNNNN} unicode escape parsing from upstream Zig's string_literal.zig:parseEscapeSequence into both strLitAsString (string literal decoding with UTF-8 encoding) and char_literal (codepoint value extraction). Without this, \u escapes fell through to the default branch which wrote a literal 'u' character, producing incorrect ZIR string bytes. Co-Authored-By: Claude Opus 4.6 --- astgen.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/astgen.c b/astgen.c index 5fa717f394..4342bd7a38 100644 --- a/astgen.c +++ b/astgen.c @@ -1089,6 +1089,54 @@ static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, ag->string_bytes[ag->string_bytes_len++] = val; break; } + case 'u': { + // \u{NNNNNN} unicode escape (string_literal.zig:194-231). + // Skip past '{'. + i++; + // Parse hex digits until '}'. + uint32_t codepoint = 0; + while (i + 1 < raw_end) { + i++; + char c = source[i]; + if (c >= '0' && c <= '9') { + codepoint = codepoint * 16 + (uint32_t)(c - '0'); + } else if (c >= 'a' && c <= 'f') { + codepoint = codepoint * 16 + 10 + (uint32_t)(c - 'a'); + } else if (c >= 'A' && c <= 'F') { + codepoint = codepoint * 16 + 10 + (uint32_t)(c - 'A'); + } else { + // Must be '}', done. + break; + } + } + // Encode codepoint as UTF-8 (unicode.zig:53-82). + if (codepoint <= 0x7F) { + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)codepoint; + } else if (codepoint <= 0x7FF) { + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0xC0 | (codepoint >> 6)); + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0x80 | (codepoint & 0x3F)); + } else if (codepoint <= 0xFFFF) { + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0xE0 | (codepoint >> 12)); + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F)); + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0x80 | (codepoint & 0x3F)); + } else { + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0xF0 | (codepoint >> 18)); + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0x80 | ((codepoint >> 12) & 0x3F)); + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F)); + ag->string_bytes[ag->string_bytes_len++] + = (uint8_t)(0x80 | (codepoint & 0x3F)); + } + break; + } default: ag->string_bytes[ag->string_bytes_len++] = (uint8_t)source[i]; break; @@ -4251,6 +4299,26 @@ static uint32_t exprRl(GenZir* gz, Scope* scope, ResultLoc rl, uint32_t node) { char_val = val; break; } + case 'u': { + // \u{NNNNNN} unicode escape (string_literal.zig:194-231). + // Skip past '{'. + ci++; + uint32_t codepoint = 0; + while (true) { + ci++; + char c = src[ci]; + if (c >= '0' && c <= '9') + codepoint = codepoint * 16 + (uint32_t)(c - '0'); + else if (c >= 'a' && c <= 'f') + codepoint = codepoint * 16 + 10 + (uint32_t)(c - 'a'); + else if (c >= 'A' && c <= 'F') + codepoint = codepoint * 16 + 10 + (uint32_t)(c - 'A'); + else + break; // Must be '}'. + } + char_val = codepoint; + break; + } default: char_val = (uint8_t)src[ci]; break;