commit cb09cd5dfa5234a170490d824b89fd34d3adb0a1 (tree)
parent ed1520b659dca8a3d53da5b11bde47557733f8a7
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Sat, 14 Feb 2026 19:55:52 +0000
astgen: add inline switch prong parsing and float literal support
- parser.c: Handle TOKEN_KEYWORD_INLINE in parseSwitchProng, producing
AST_NODE_SWITCH_CASE_INLINE_ONE / AST_NODE_SWITCH_CASE_INLINE nodes.
This fixes parsing of multi_array_list.zig which uses `inline else`.
- astgen.c: Implement float literal parsing in numberLiteral using
strtold with f64 round-trip check. Add addFloat helper and addPlNodeQuad
for float128 emission. Extend token scanning to include exponent markers.
- multi_array_list.zig still skipped: remaining diffs in bool_not,
bool_br_and, ret_is_non_err.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
3 files changed, 121 insertions(+), 15 deletions(-)
diff --git a/stage0/astgen.c b/stage0/astgen.c
@@ -625,6 +625,13 @@ static uint32_t addInt(GenZir* gz, uint64_t integer) {
return addInstruction(gz, ZIR_INST_INT, data);
}
+// Mirrors GenZir.addFloat (AstGen.zig:12265).
+static uint32_t addFloat(GenZir* gz, double number) {
+ ZirInstData data;
+ data.float_val = number;
+ return addInstruction(gz, ZIR_INST_FLOAT, data);
+}
+
// Mirrors GenZir.add for bin data (Zir.zig:1877).
// Creates an instruction with bin data (lhs + rhs stored in inst_datas).
static uint32_t addBin(
@@ -665,6 +672,22 @@ static uint32_t addPlNodeTriple(GenZir* gz, ZirInstTag tag, uint32_t node,
return addInstruction(gz, tag, data);
}
+// Mirrors addPlNode for 4-operand payloads (e.g. Float128).
+static uint32_t addPlNodeQuad(GenZir* gz, ZirInstTag tag, uint32_t node,
+ uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+ AstGenCtx* ag = gz->astgen;
+ ensureExtraCapacity(ag, 4);
+ uint32_t payload_index = ag->extra_len;
+ ag->extra[ag->extra_len++] = a;
+ ag->extra[ag->extra_len++] = b;
+ ag->extra[ag->extra_len++] = c;
+ ag->extra[ag->extra_len++] = d;
+ ZirInstData data;
+ data.pl_node.src_node = (int32_t)node - (int32_t)gz->decl_node_index;
+ data.pl_node.payload_index = payload_index;
+ return addInstruction(gz, tag, data);
+}
+
// Checks if an AST identifier node is the single underscore `_`.
// Used for inferred array length detection in [_]T patterns.
// Intentionally does NOT support @"_" syntax (matches upstream).
@@ -2978,16 +3001,92 @@ static uint32_t numberLiteral(
uint32_t tok_start = ag->tree->tokens.starts[num_token];
const char* source = ag->tree->source;
- // Determine token length by scanning to next non-number character.
+ // Determine token end by scanning forward, bounded by next token's start.
+ uint32_t max_end = (num_token + 1 < ag->tree->tokens.len)
+ ? ag->tree->tokens.starts[num_token + 1]
+ : ag->tree->source_len;
uint32_t tok_end = tok_start;
- while (tok_end < ag->tree->source_len
- && ((source[tok_end] >= '0' && source[tok_end] <= '9')
- || source[tok_end] == '_' || source[tok_end] == '.'
- || source[tok_end] == 'x' || source[tok_end] == 'o'
- || source[tok_end] == 'b'
- || (source[tok_end] >= 'a' && source[tok_end] <= 'f')
- || (source[tok_end] >= 'A' && source[tok_end] <= 'F'))) {
- tok_end++;
+ while (tok_end < max_end) {
+ char ch = source[tok_end];
+ if ((ch >= '0' && ch <= '9') || ch == '_' || ch == '.' || ch == 'x'
+ || ch == 'o' || ch == 'b' || ch == 'p' || ch == 'P'
+ || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
+ tok_end++;
+ } else if ((ch == '+' || ch == '-') && tok_end > tok_start
+ && (source[tok_end - 1] == 'e' || source[tok_end - 1] == 'E'
+ || source[tok_end - 1] == 'p' || source[tok_end - 1] == 'P')) {
+ tok_end++;
+ } else {
+ break;
+ }
+ }
+
+ // Check if this is a float literal (has '.' or exponent).
+ bool is_float = false;
+ for (uint32_t i = tok_start; i < tok_end; i++) {
+ if (source[i] == '.' || source[i] == 'p' || source[i] == 'P') {
+ is_float = true;
+ break;
+ }
+ // 'e'/'E' is exponent only for decimal floats, not hex digits
+ if ((source[i] == 'e' || source[i] == 'E')
+ && !(tok_end - tok_start >= 2 && source[tok_start] == '0'
+ && source[tok_start + 1] == 'x')) {
+ is_float = true;
+ break;
+ }
+ }
+
+ if (is_float) {
+ // Copy to temporary buffer, stripping underscores (AstGen.zig:8721).
+ char buf[256];
+ uint32_t buf_len = 0;
+ for (uint32_t i = tok_start; i < tok_end && buf_len < sizeof(buf) - 1;
+ i++) {
+ if (source[i] != '_')
+ buf[buf_len++] = source[i];
+ }
+ buf[buf_len] = '\0';
+
+ // Parse as long double for maximum precision, then check if it
+ // round-trips through f64 (mirrors AstGen.zig:8730-8746).
+ long double ld_val = strtold(buf, NULL);
+ if (sign == NUM_SIGN_NEGATIVE)
+ ld_val = -ld_val;
+
+ double d_val = (double)ld_val;
+ long double round_trip = (long double)d_val;
+ if (round_trip == ld_val) {
+ // Fits in f64 — emit ZIR_INST_FLOAT.
+ return addFloat(gz, d_val);
+ }
+
+ // Needs f128 — break into 4 u32 pieces (AstGen.zig:8738-8746).
+ // Convert x86 80-bit extended double to IEEE 754 binary128.
+ // Extended: sign(1) | exponent(15) | integer(1) | fraction(63)
+ // Binary128: sign(1) | exponent(15) | fraction(112)
+ // Same exponent bias (16383); drop the explicit integer bit.
+ uint8_t ld_bytes[16];
+ memset(ld_bytes, 0, sizeof(ld_bytes));
+ memcpy(ld_bytes, &ld_val, sizeof(ld_val));
+ uint64_t ld_mantissa;
+ memcpy(&ld_mantissa, ld_bytes, 8);
+ uint16_t ld_sign_exp;
+ memcpy(&ld_sign_exp, ld_bytes + 8, 2);
+ uint32_t ld_sign = (uint32_t)((ld_sign_exp >> 15) & 1);
+ uint32_t ld_exp = (uint32_t)(ld_sign_exp & 0x7FFF);
+ // Drop explicit integer bit, get 63-bit fraction.
+ uint64_t frac63 = ld_mantissa & 0x7FFFFFFFFFFFFFFFULL;
+ // Shift 63-bit fraction left by 49 to fill 112-bit binary128 fraction.
+ uint64_t frac_lo = frac63 << 49;
+ uint64_t frac_hi = frac63 >> 15;
+ frac_hi |= ((uint64_t)ld_exp << 48) | ((uint64_t)ld_sign << 63);
+ uint32_t piece0 = (uint32_t)(frac_lo & 0xFFFFFFFFU);
+ uint32_t piece1 = (uint32_t)(frac_lo >> 32);
+ uint32_t piece2 = (uint32_t)(frac_hi & 0xFFFFFFFFU);
+ uint32_t piece3 = (uint32_t)(frac_hi >> 32);
+ return addPlNodeQuad(
+ gz, ZIR_INST_FLOAT128, node, piece0, piece1, piece2, piece3);
}
// Parse the integer value (simplified: decimal and hex).
@@ -3015,8 +3114,6 @@ static uint32_t numberLiteral(
for (; pos < tok_end; pos++) {
if (source[pos] == '_')
continue;
- if (source[pos] == '.')
- break; // float — not handled yet
if (source[pos] >= '0' && source[pos] <= '9')
value = value * 10 + (uint64_t)(source[pos] - '0');
}
diff --git a/stage0/astgen_test.zig b/stage0/astgen_test.zig
@@ -953,7 +953,7 @@ test "astgen: corpus array_list.zig" {
}
test "astgen: corpus multi_array_list.zig" {
- if (true) return error.SkipZigTest; // TODO: parser bug - C parser produces nodes_len=1
+ if (true) return error.SkipZigTest; // TODO: remaining diffs: bool_not, bool_br_and, ret_is_non_err, plus small instruction count differences
const gpa = std.testing.allocator;
try corpusCheck(gpa, @embedFile("../lib/std/multi_array_list.zig"));
}
diff --git a/stage0/parser.c b/stage0/parser.c
@@ -2898,6 +2898,8 @@ static void parsePtrPayload(Parser* p) {
static AstNodeIndex parseSwitchProng(Parser* p) {
const uint32_t items_old_len = p->scratch.len;
+ const bool is_inline = eatToken(p, TOKEN_KEYWORD_INLINE) != null_token;
+
if (eatToken(p, TOKEN_KEYWORD_ELSE) == null_token) {
while (true) {
const AstNodeIndex item = parseSwitchItem(p);
@@ -2907,8 +2909,11 @@ static AstNodeIndex parseSwitchProng(Parser* p) {
if (eatToken(p, TOKEN_COMMA) == null_token)
break;
}
- if (p->scratch.len == items_old_len)
+ if (p->scratch.len == items_old_len) {
+ if (is_inline)
+ p->tok_i -= 1;
return null_node;
+ }
}
const AstTokenIndex arrow
@@ -2926,7 +2931,9 @@ static AstNodeIndex parseSwitchProng(Parser* p) {
case 1:
case_node = addNode(&p->nodes,
(AstNodeItem) {
- .tag = AST_NODE_SWITCH_CASE_ONE,
+ .tag = is_inline
+ ? AST_NODE_SWITCH_CASE_INLINE_ONE
+ : AST_NODE_SWITCH_CASE_ONE,
.main_token = arrow,
.data = {
.lhs
@@ -2940,7 +2947,9 @@ static AstNodeIndex parseSwitchProng(Parser* p) {
= listToSpan(p, &p->scratch.arr[items_old_len], items_len);
case_node = addNode(&p->nodes,
(AstNodeItem) {
- .tag = AST_NODE_SWITCH_CASE,
+ .tag = is_inline
+ ? AST_NODE_SWITCH_CASE_INLINE
+ : AST_NODE_SWITCH_CASE,
.main_token = arrow,
.data = {
.lhs = addExtra(p,