astgen: add inline switch prong parsing and float literal support - zig

commit cb09cd5dfa5234a170490d824b89fd34d3adb0a1 (tree)
parent ed1520b659dca8a3d53da5b11bde47557733f8a7
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Sat, 14 Feb 2026 19:55:52 +0000

astgen: add inline switch prong parsing and float literal support

- parser.c: Handle TOKEN_KEYWORD_INLINE in parseSwitchProng, producing
  AST_NODE_SWITCH_CASE_INLINE_ONE / AST_NODE_SWITCH_CASE_INLINE nodes.
  This fixes parsing of multi_array_list.zig which uses `inline else`.
- astgen.c: Implement float literal parsing in numberLiteral using
  strtold with f64 round-trip check. Add addFloat helper and addPlNodeQuad
  for float128 emission. Extend token scanning to include exponent markers.
- multi_array_list.zig still skipped: remaining diffs in bool_not,
  bool_br_and, ret_is_non_err.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
M stage0/astgen.c  | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
M stage0/astgen_test.zig  | 2 +-
M stage0/parser.c  | 15 ++++++++++++---

3 files changed, 121 insertions(+), 15 deletions(-)
diff --git a/stage0/astgen.c b/stage0/astgen.c
@@ -625,6 +625,13 @@ static uint32_t addInt(GenZir* gz, uint64_t integer) {
     return addInstruction(gz, ZIR_INST_INT, data);
 }
 
+// Mirrors GenZir.addFloat (AstGen.zig:12265).
+static uint32_t addFloat(GenZir* gz, double number) {
+    ZirInstData data;
+    data.float_val = number;
+    return addInstruction(gz, ZIR_INST_FLOAT, data);
+}
+
 // Mirrors GenZir.add for bin data (Zir.zig:1877).
 // Creates an instruction with bin data (lhs + rhs stored in inst_datas).
 static uint32_t addBin(
@@ -665,6 +672,22 @@ static uint32_t addPlNodeTriple(GenZir* gz, ZirInstTag tag, uint32_t node,
     return addInstruction(gz, tag, data);
 }
 
+// Mirrors addPlNode for 4-operand payloads (e.g. Float128).
+static uint32_t addPlNodeQuad(GenZir* gz, ZirInstTag tag, uint32_t node,
+    uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+    AstGenCtx* ag = gz->astgen;
+    ensureExtraCapacity(ag, 4);
+    uint32_t payload_index = ag->extra_len;
+    ag->extra[ag->extra_len++] = a;
+    ag->extra[ag->extra_len++] = b;
+    ag->extra[ag->extra_len++] = c;
+    ag->extra[ag->extra_len++] = d;
+    ZirInstData data;
+    data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index;
+    data.pl_node.payload_index = payload_index;
+    return addInstruction(gz, tag, data);
+}
+
 // Checks if an AST identifier node is the single underscore `_`.
 // Used for inferred array length detection in [_]T patterns.
 // Intentionally does NOT support @"_" syntax (matches upstream).
@@ -2978,16 +3001,92 @@ static uint32_t numberLiteral(
     uint32_t tok_start = ag->tree->tokens.starts[num_token];
     const char* source = ag->tree->source;
 
-    // Determine token length by scanning to next non-number character.
+    // Determine token end by scanning forward, bounded by next token's start.
+    uint32_t max_end = (num_token + 1 < ag->tree->tokens.len)
+        ? ag->tree->tokens.starts[num_token + 1]
+        : ag->tree->source_len;
     uint32_t tok_end = tok_start;
-    while (tok_end < ag->tree->source_len
-        && ((source[tok_end] >= '0' && source[tok_end] <= '9')
-            || source[tok_end] == '_' || source[tok_end] == '.'
-            || source[tok_end] == 'x' || source[tok_end] == 'o'
-            || source[tok_end] == 'b'
-            || (source[tok_end] >= 'a' && source[tok_end] <= 'f')
-            || (source[tok_end] >= 'A' && source[tok_end] <= 'F'))) {
-        tok_end++;
+    while (tok_end < max_end) {
+        char ch = source[tok_end];
+        if ((ch >= '0' && ch <= '9') || ch == '_' || ch == '.' || ch == 'x'
+            || ch == 'o' || ch == 'b' || ch == 'p' || ch == 'P'
+            || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
+            tok_end++;
+        } else if ((ch == '+' || ch == '-') && tok_end > tok_start
+            && (source[tok_end - 1] == 'e' || source[tok_end - 1] == 'E'
+                || source[tok_end - 1] == 'p' || source[tok_end - 1] == 'P')) {
+            tok_end++;
+        } else {
+            break;
+        }
+    }
+
+    // Check if this is a float literal (has '.' or exponent).
+    bool is_float = false;
+    for (uint32_t i = tok_start; i < tok_end; i++) {
+        if (source[i] == '.' || source[i] == 'p' || source[i] == 'P') {
+            is_float = true;
+            break;
+        }
+        // 'e'/'E' is exponent only for decimal floats, not hex digits
+        if ((source[i] == 'e' || source[i] == 'E')
+            && !(tok_end - tok_start >= 2 && source[tok_start] == '0'
+                && source[tok_start + 1] == 'x')) {
+            is_float = true;
+            break;
+        }
+    }
+
+    if (is_float) {
+        // Copy to temporary buffer, stripping underscores (AstGen.zig:8721).
+        char buf[256];
+        uint32_t buf_len = 0;
+        for (uint32_t i = tok_start; i < tok_end && buf_len < sizeof(buf) - 1;
+            i++) {
+            if (source[i] != '_')
+                buf[buf_len++] = source[i];
+        }
+        buf[buf_len] = '\0';
+
+        // Parse as long double for maximum precision, then check if it
+        // round-trips through f64 (mirrors AstGen.zig:8730-8746).
+        long double ld_val = strtold(buf, NULL);
+        if (sign == NUM_SIGN_NEGATIVE)
+            ld_val = -ld_val;
+
+        double d_val = (double)ld_val;
+        long double round_trip = (long double)d_val;
+        if (round_trip == ld_val) {
+            // Fits in f64 — emit ZIR_INST_FLOAT.
+            return addFloat(gz, d_val);
+        }
+
+        // Needs f128 — break into 4 u32 pieces (AstGen.zig:8738-8746).
+        // Convert x86 80-bit extended double to IEEE 754 binary128.
+        // Extended: sign(1) | exponent(15) | integer(1) | fraction(63)
+        // Binary128: sign(1) | exponent(15) | fraction(112)
+        // Same exponent bias (16383); drop the explicit integer bit.
+        uint8_t ld_bytes[16];
+        memset(ld_bytes, 0, sizeof(ld_bytes));
+        memcpy(ld_bytes, &ld_val, sizeof(ld_val));
+        uint64_t ld_mantissa;
+        memcpy(&ld_mantissa, ld_bytes, 8);
+        uint16_t ld_sign_exp;
+        memcpy(&ld_sign_exp, ld_bytes + 8, 2);
+        uint32_t ld_sign = (uint32_t)((ld_sign_exp >> 15) & 1);
+        uint32_t ld_exp = (uint32_t)(ld_sign_exp & 0x7FFF);
+        // Drop explicit integer bit, get 63-bit fraction.
+        uint64_t frac63 = ld_mantissa & 0x7FFFFFFFFFFFFFFFULL;
+        // Shift 63-bit fraction left by 49 to fill 112-bit binary128 fraction.
+        uint64_t frac_lo = frac63 << 49;
+        uint64_t frac_hi = frac63 >> 15;
+        frac_hi |= ((uint64_t)ld_exp << 48) | ((uint64_t)ld_sign << 63);
+        uint32_t piece0 = (uint32_t)(frac_lo & 0xFFFFFFFFU);
+        uint32_t piece1 = (uint32_t)(frac_lo >> 32);
+        uint32_t piece2 = (uint32_t)(frac_hi & 0xFFFFFFFFU);
+        uint32_t piece3 = (uint32_t)(frac_hi >> 32);
+        return addPlNodeQuad(
+            gz, ZIR_INST_FLOAT128, node, piece0, piece1, piece2, piece3);
     }
 
     // Parse the integer value (simplified: decimal and hex).
@@ -3015,8 +3114,6 @@ static uint32_t numberLiteral(
         for (; pos < tok_end; pos++) {
             if (source[pos] == '_')
                 continue;
-            if (source[pos] == '.')
-                break; // float — not handled yet
             if (source[pos] >= '0' && source[pos] <= '9')
                 value = value * 10 + (uint64_t)(source[pos] - '0');
         }
diff --git a/stage0/astgen_test.zig b/stage0/astgen_test.zig
@@ -953,7 +953,7 @@ test "astgen: corpus array_list.zig" {
 }
 
 test "astgen: corpus multi_array_list.zig" {
-    if (true) return error.SkipZigTest; // TODO: parser bug - C parser produces nodes_len=1
+    if (true) return error.SkipZigTest; // TODO: remaining diffs: bool_not, bool_br_and, ret_is_non_err, plus small instruction count differences
     const gpa = std.testing.allocator;
     try corpusCheck(gpa, @embedFile("../lib/std/multi_array_list.zig"));
 }
diff --git a/stage0/parser.c b/stage0/parser.c
@@ -2898,6 +2898,8 @@ static void parsePtrPayload(Parser* p) {
 static AstNodeIndex parseSwitchProng(Parser* p) {
     const uint32_t items_old_len = p->scratch.len;
 
+    const bool is_inline = eatToken(p, TOKEN_KEYWORD_INLINE) != null_token;
+
     if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) {
         while (true) {
             const AstNodeIndex item = parseSwitchItem(p);
@@ -2907,8 +2909,11 @@ static AstNodeIndex parseSwitchProng(Parser* p) {
             if (eatToken(p, TOKEN_COMMA) == null_token)
                 break;
         }
-        if (p->scratch.len == items_old_len)
+        if (p->scratch.len == items_old_len) {
+            if (is_inline)
+                p->tok_i -= 1;
             return null_node;
+        }
     }
 
     const AstTokenIndex arrow
@@ -2926,7 +2931,9 @@ static AstNodeIndex parseSwitchProng(Parser* p) {
     case 1:
         case_node = addNode(&p->nodes,
             (AstNodeItem) {
-                .tag = AST_NODE_SWITCH_CASE_ONE,
+                .tag = is_inline
+                    ? AST_NODE_SWITCH_CASE_INLINE_ONE
+                    : AST_NODE_SWITCH_CASE_ONE,
                 .main_token = arrow,
                 .data = {
                     .lhs
@@ -2940,7 +2947,9 @@ static AstNodeIndex parseSwitchProng(Parser* p) {
             = listToSpan(p, &p->scratch.arr[items_old_len], items_len);
         case_node = addNode(&p->nodes,
             (AstNodeItem) {
-                .tag = AST_NODE_SWITCH_CASE,
+                .tag = is_inline
+                    ? AST_NODE_SWITCH_CASE_INLINE
+                    : AST_NODE_SWITCH_CASE,
                 .main_token = arrow,
                 .data = {
                     .lhs = addExtra(p,

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	stage0/astgen.c	\|	119	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
M	stage0/astgen_test.zig	\|	2	+-
M	stage0/parser.c	\|	15	++++++++++++---