zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit cb09cd5dfa5234a170490d824b89fd34d3adb0a1 (tree)
parent ed1520b659dca8a3d53da5b11bde47557733f8a7
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Sat, 14 Feb 2026 19:55:52 +0000

astgen: add inline switch prong parsing and float literal support

- parser.c: Handle TOKEN_KEYWORD_INLINE in parseSwitchProng, producing
  AST_NODE_SWITCH_CASE_INLINE_ONE / AST_NODE_SWITCH_CASE_INLINE nodes.
  This fixes parsing of multi_array_list.zig which uses `inline else`.
- astgen.c: Implement float literal parsing in numberLiteral using
  strtold with f64 round-trip check. Add addFloat helper and addPlNodeQuad
  for float128 emission. Extend token scanning to include exponent markers.
- multi_array_list.zig still skipped: remaining diffs in bool_not,
  bool_br_and, ret_is_non_err.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
Mstage0/astgen.c | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mstage0/astgen_test.zig | 2+-
Mstage0/parser.c | 15++++++++++++---
3 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/stage0/astgen.c b/stage0/astgen.c @@ -625,6 +625,13 @@ static uint32_t addInt(GenZir* gz, uint64_t integer) { return addInstruction(gz, ZIR_INST_INT, data); } +// Mirrors GenZir.addFloat (AstGen.zig:12265). +static uint32_t addFloat(GenZir* gz, double number) { + ZirInstData data; + data.float_val = number; + return addInstruction(gz, ZIR_INST_FLOAT, data); +} + // Mirrors GenZir.add for bin data (Zir.zig:1877). // Creates an instruction with bin data (lhs + rhs stored in inst_datas). static uint32_t addBin( @@ -665,6 +672,22 @@ static uint32_t addPlNodeTriple(GenZir* gz, ZirInstTag tag, uint32_t node, return addInstruction(gz, tag, data); } +// Mirrors addPlNode for 4-operand payloads (e.g. Float128). +static uint32_t addPlNodeQuad(GenZir* gz, ZirInstTag tag, uint32_t node, + uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + AstGenCtx* ag = gz->astgen; + ensureExtraCapacity(ag, 4); + uint32_t payload_index = ag->extra_len; + ag->extra[ag->extra_len++] = a; + ag->extra[ag->extra_len++] = b; + ag->extra[ag->extra_len++] = c; + ag->extra[ag->extra_len++] = d; + ZirInstData data; + data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index; + data.pl_node.payload_index = payload_index; + return addInstruction(gz, tag, data); +} + // Checks if an AST identifier node is the single underscore `_`. // Used for inferred array length detection in [_]T patterns. // Intentionally does NOT support @"_" syntax (matches upstream). @@ -2978,16 +3001,92 @@ static uint32_t numberLiteral( uint32_t tok_start = ag->tree->tokens.starts[num_token]; const char* source = ag->tree->source; - // Determine token length by scanning to next non-number character. + // Determine token end by scanning forward, bounded by next token's start. + uint32_t max_end = (num_token + 1 < ag->tree->tokens.len) + ? ag->tree->tokens.starts[num_token + 1] + : ag->tree->source_len; uint32_t tok_end = tok_start; - while (tok_end < ag->tree->source_len - && ((source[tok_end] >= '0' && source[tok_end] <= '9') - || source[tok_end] == '_' || source[tok_end] == '.' - || source[tok_end] == 'x' || source[tok_end] == 'o' - || source[tok_end] == 'b' - || (source[tok_end] >= 'a' && source[tok_end] <= 'f') - || (source[tok_end] >= 'A' && source[tok_end] <= 'F'))) { - tok_end++; + while (tok_end < max_end) { + char ch = source[tok_end]; + if ((ch >= '0' && ch <= '9') || ch == '_' || ch == '.' || ch == 'x' + || ch == 'o' || ch == 'b' || ch == 'p' || ch == 'P' + || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { + tok_end++; + } else if ((ch == '+' || ch == '-') && tok_end > tok_start + && (source[tok_end - 1] == 'e' || source[tok_end - 1] == 'E' + || source[tok_end - 1] == 'p' || source[tok_end - 1] == 'P')) { + tok_end++; + } else { + break; + } + } + + // Check if this is a float literal (has '.' or exponent). + bool is_float = false; + for (uint32_t i = tok_start; i < tok_end; i++) { + if (source[i] == '.' || source[i] == 'p' || source[i] == 'P') { + is_float = true; + break; + } + // 'e'/'E' is exponent only for decimal floats, not hex digits + if ((source[i] == 'e' || source[i] == 'E') + && !(tok_end - tok_start >= 2 && source[tok_start] == '0' + && source[tok_start + 1] == 'x')) { + is_float = true; + break; + } + } + + if (is_float) { + // Copy to temporary buffer, stripping underscores (AstGen.zig:8721). + char buf[256]; + uint32_t buf_len = 0; + for (uint32_t i = tok_start; i < tok_end && buf_len < sizeof(buf) - 1; + i++) { + if (source[i] != '_') + buf[buf_len++] = source[i]; + } + buf[buf_len] = '\0'; + + // Parse as long double for maximum precision, then check if it + // round-trips through f64 (mirrors AstGen.zig:8730-8746). + long double ld_val = strtold(buf, NULL); + if (sign == NUM_SIGN_NEGATIVE) + ld_val = -ld_val; + + double d_val = (double)ld_val; + long double round_trip = (long double)d_val; + if (round_trip == ld_val) { + // Fits in f64 — emit ZIR_INST_FLOAT. + return addFloat(gz, d_val); + } + + // Needs f128 — break into 4 u32 pieces (AstGen.zig:8738-8746). + // Convert x86 80-bit extended double to IEEE 754 binary128. + // Extended: sign(1) | exponent(15) | integer(1) | fraction(63) + // Binary128: sign(1) | exponent(15) | fraction(112) + // Same exponent bias (16383); drop the explicit integer bit. + uint8_t ld_bytes[16]; + memset(ld_bytes, 0, sizeof(ld_bytes)); + memcpy(ld_bytes, &ld_val, sizeof(ld_val)); + uint64_t ld_mantissa; + memcpy(&ld_mantissa, ld_bytes, 8); + uint16_t ld_sign_exp; + memcpy(&ld_sign_exp, ld_bytes + 8, 2); + uint32_t ld_sign = (uint32_t)((ld_sign_exp >> 15) & 1); + uint32_t ld_exp = (uint32_t)(ld_sign_exp & 0x7FFF); + // Drop explicit integer bit, get 63-bit fraction. + uint64_t frac63 = ld_mantissa & 0x7FFFFFFFFFFFFFFFULL; + // Shift 63-bit fraction left by 49 to fill 112-bit binary128 fraction. + uint64_t frac_lo = frac63 << 49; + uint64_t frac_hi = frac63 >> 15; + frac_hi |= ((uint64_t)ld_exp << 48) | ((uint64_t)ld_sign << 63); + uint32_t piece0 = (uint32_t)(frac_lo & 0xFFFFFFFFU); + uint32_t piece1 = (uint32_t)(frac_lo >> 32); + uint32_t piece2 = (uint32_t)(frac_hi & 0xFFFFFFFFU); + uint32_t piece3 = (uint32_t)(frac_hi >> 32); + return addPlNodeQuad( + gz, ZIR_INST_FLOAT128, node, piece0, piece1, piece2, piece3); } // Parse the integer value (simplified: decimal and hex). @@ -3015,8 +3114,6 @@ static uint32_t numberLiteral( for (; pos < tok_end; pos++) { if (source[pos] == '_') continue; - if (source[pos] == '.') - break; // float — not handled yet if (source[pos] >= '0' && source[pos] <= '9') value = value * 10 + (uint64_t)(source[pos] - '0'); } diff --git a/stage0/astgen_test.zig b/stage0/astgen_test.zig @@ -953,7 +953,7 @@ test "astgen: corpus array_list.zig" { } test "astgen: corpus multi_array_list.zig" { - if (true) return error.SkipZigTest; // TODO: parser bug - C parser produces nodes_len=1 + if (true) return error.SkipZigTest; // TODO: remaining diffs: bool_not, bool_br_and, ret_is_non_err, plus small instruction count differences const gpa = std.testing.allocator; try corpusCheck(gpa, @embedFile("../lib/std/multi_array_list.zig")); } diff --git a/stage0/parser.c b/stage0/parser.c @@ -2898,6 +2898,8 @@ static void parsePtrPayload(Parser* p) { static AstNodeIndex parseSwitchProng(Parser* p) { const uint32_t items_old_len = p->scratch.len; + const bool is_inline = eatToken(p, TOKEN_KEYWORD_INLINE) != null_token; + if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) { while (true) { const AstNodeIndex item = parseSwitchItem(p); @@ -2907,8 +2909,11 @@ static AstNodeIndex parseSwitchProng(Parser* p) { if (eatToken(p, TOKEN_COMMA) == null_token) break; } - if (p->scratch.len == items_old_len) + if (p->scratch.len == items_old_len) { + if (is_inline) + p->tok_i -= 1; return null_node; + } } const AstTokenIndex arrow @@ -2926,7 +2931,9 @@ static AstNodeIndex parseSwitchProng(Parser* p) { case 1: case_node = addNode(&p->nodes, (AstNodeItem) { - .tag = AST_NODE_SWITCH_CASE_ONE, + .tag = is_inline + ? AST_NODE_SWITCH_CASE_INLINE_ONE + : AST_NODE_SWITCH_CASE_ONE, .main_token = arrow, .data = { .lhs @@ -2940,7 +2947,9 @@ static AstNodeIndex parseSwitchProng(Parser* p) { = listToSpan(p, &p->scratch.arr[items_old_len], items_len); case_node = addNode(&p->nodes, (AstNodeItem) { - .tag = AST_NODE_SWITCH_CASE, + .tag = is_inline + ? AST_NODE_SWITCH_CASE_INLINE + : AST_NODE_SWITCH_CASE, .main_token = arrow, .data = { .lhs = addExtra(p,