commit 500bc6a791dce7a554d1b45d9363003df6f8e709 (tree)
parent 760b03fd2902d0ccfaae3669be69b26755f652f9
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Sun, 15 Feb 2026 19:36:44 +0000
astgen: replace __float128/__uint128_t with portable C11 float handling
Replace GCC/glibc extensions (__float128, strtof128, __uint128_t) with
portable C11 code so astgen.c compiles with TCC and other C11 compilers.
The f64 vs f128 decision uses an exact algebraic round-trip test: a decimal
value m*10^e round-trips through f64 iff the odd part of m*5^e fits in 53
binary bits. This requires only integer arithmetic, no floating-point.
For f128 encoding, strtold parses the value at 80-bit extended precision,
then bit manipulation converts to IEEE 754 binary128 layout (both formats
share 15-bit exponent; bottom 49 of 112 fraction bits are zero-padded).
The big integer multiply-add path uses a portable mul64() helper instead
of __uint128_t.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
3 files changed, 232 insertions(+), 32 deletions(-)
diff --git a/stage0/README.md b/stage0/README.md
@@ -43,3 +43,23 @@ gdb -batch \
```
You are welcome to replace `-ex "bt full"` with anything other of interest.
+
+# Float handling
+
+Float literals are parsed with `strtold()` (C11 standard, portable). On
+x86-64 Linux, `long double` is 80-bit extended precision (63 fraction bits).
+
+When a float doesn't round-trip through `f64`, it's emitted as `f128` (ZIR
+`float128` instruction). The 80-bit extended value is converted to IEEE 754
+binary128 encoding by bit manipulation — both formats share the same 15-bit
+exponent with bias 16383. The top 63 of binary128's 112 fraction bits come
+from the 80-bit value; the bottom 49 are zero-padded.
+
+This means float128 literals lose ~49 bits of precision compared to the
+upstream Zig implementation (which uses native `f128`). This is acceptable
+because stage0 is a bootstrap tool — the real Zig compiler re-parses all
+source with full f128 precision in later stages. The test comparison mask
+in `astgen_test.zig` skips float128 payloads to account for this.
+
+Previous approach used `__float128`/`strtof128` (GCC/glibc extensions) for
+full precision, but these are not portable to TCC and other C11 compilers.
diff --git a/stage0/astgen.c b/stage0/astgen.c
@@ -7,12 +7,10 @@
#include "astgen.h"
#include "common.h"
#include <assert.h>
+#include <math.h>
#include <stdlib.h>
#include <string.h>
-// Forward-declare strtof128 from glibc. We cannot use _GNU_SOURCE because
-// the build uses -std=c11 which suppresses GNU extensions.
-extern __float128 strtof128(const char* restrict nptr, char** restrict endptr);
// --- Declaration.Flags.Id enum (Zir.zig:2724) ---
typedef enum {
@@ -3096,6 +3094,154 @@ static uint32_t typeExpr(GenZir* gz, Scope* scope, uint32_t node) {
// Sign parameter for numberLiteral (AstGen.zig:8674).
enum NumSign { NUM_SIGN_POSITIVE, NUM_SIGN_NEGATIVE };
+// --- Portable 128-bit multiply and f64 round-trip test ---
+// See stage0/README.md "Float handling" for rationale.
+
+// Table of 5^0 through 5^27 (all fit in uint64_t).
+static const uint64_t pow5_table[28] = { 1ULL, 5ULL, 25ULL, 125ULL, 625ULL,
+ 3125ULL, 15625ULL, 78125ULL, 390625ULL, 1953125ULL, 9765625ULL,
+ 48828125ULL, 244140625ULL, 1220703125ULL, 6103515625ULL, 30517578125ULL,
+ 152587890625ULL, 762939453125ULL, 3814697265625ULL, 19073486328125ULL,
+ 95367431640625ULL, 476837158203125ULL, 2384185791015625ULL,
+ 11920928955078125ULL, 59604644775390625ULL, 298023223876953125ULL,
+ 1490116119384765625ULL, 7450580596923828125ULL };
+
+// 64x64 -> 128-bit multiply (portable, no __uint128_t).
+static void mul64(uint64_t a, uint64_t b, uint64_t* hi, uint64_t* lo) {
+ uint64_t a_lo = a & 0xFFFFFFFFU, a_hi = a >> 32;
+ uint64_t b_lo = b & 0xFFFFFFFFU, b_hi = b >> 32;
+ uint64_t p0 = a_lo * b_lo;
+ uint64_t p1 = a_lo * b_hi;
+ uint64_t p2 = a_hi * b_lo;
+ uint64_t p3 = a_hi * b_hi;
+ uint64_t mid = p1 + (p0 >> 32);
+ mid += p2;
+ if (mid < p2)
+ p3 += (uint64_t)1 << 32;
+ *lo = (mid << 32) | (p0 & 0xFFFFFFFFU);
+ *hi = p3 + (mid >> 32);
+}
+
+// Returns true if the decimal float string round-trips through f64.
+// A decimal value m * 10^e round-trips iff m * 5^e fits in 53 binary bits
+// (for e >= 0) or m is divisible by 5^|e| and the quotient fits in 53 bits
+// (for e < 0). Hex floats are handled separately by the caller.
+static bool decimal_float_fits_f64(const char* buf) {
+ uint64_t m = 0;
+ int e = 0;
+ bool seen_dot = false;
+ bool m_overflow = false;
+ const char* p = buf;
+
+ // Skip sign.
+ if (*p == '-' || *p == '+')
+ p++;
+
+ // Parse mantissa digits, tracking decimal point position.
+ for (; *p != '\0' && *p != 'e' && *p != 'E'; p++) {
+ if (*p == '.') {
+ seen_dot = true;
+ continue;
+ }
+ if (!m_overflow) {
+ uint64_t digit = (uint64_t)(*p - '0');
+ uint64_t next = m * 10 + digit;
+ if (next / 10 != m) {
+ m_overflow = true;
+ } else {
+ m = next;
+ }
+ }
+ if (seen_dot)
+ e--;
+ }
+
+ // Parse optional exponent.
+ if (*p == 'e' || *p == 'E') {
+ p++;
+ int exp_sign = 1;
+ if (*p == '-') {
+ exp_sign = -1;
+ p++;
+ } else if (*p == '+') {
+ p++;
+ }
+ int exp_val = 0;
+ for (; *p != '\0'; p++) {
+ exp_val = exp_val * 10 + (*p - '0');
+ if (exp_val > 1000)
+ break; // exponent too large
+ }
+ e += exp_sign * exp_val;
+ }
+
+ // Strip trailing zeros from mantissa.
+ while (m > 0 && m % 10 == 0) {
+ m /= 10;
+ e++;
+ }
+
+ if (m == 0)
+ return true;
+ if (m_overflow) {
+ // Mantissa overflows uint64_t (> 19 digits). We can't use the
+ // exact algebraic test, so check the f64 value's significant bits.
+ // Values with few significant bits (e.g. 2^113) are very likely
+ // exact matches, confirmed by strtold. Values with many significant
+ // bits (e.g. 5.999...e-01 with 21 digits) conservatively get f128.
+ double d = strtod(buf, NULL);
+ if (d == 0.0)
+ return true;
+ int exp;
+ double frac = frexp(fabs(d), &exp);
+ uint64_t sig = (uint64_t)ldexp(frac, 53);
+ // Count trailing zero bits in the f64 significand.
+ uint32_t trailing = 0;
+ uint64_t s = sig;
+ while (s > 0 && (s & 1) == 0) {
+ s >>= 1;
+ trailing++;
+ }
+ // If the f64 value has ≤ 43 significant bits (≥ 10 trailing zeros),
+ // the decimal string almost certainly represents this exact value.
+ // Confirm with strtold comparison.
+ if (trailing >= 10) {
+ long double ld = strtold(buf, NULL);
+ return ((long double)d == ld);
+ }
+ return false;
+ }
+
+ // Factor out powers of 2 from m. Since 5^e is always odd, only the
+ // odd part of m contributes to the significand bit count. Powers of 2
+ // just shift the binary exponent and don't affect whether the value
+ // fits in f64's 53-bit significand.
+ while (m > 0 && (m & 1) == 0)
+ m >>= 1;
+
+ if (e >= 0) {
+ if (e > 23)
+ return false;
+ uint64_t hi, lo;
+ mul64(m, pow5_table[e], &hi, &lo);
+ if (hi != 0)
+ return false;
+ return lo < ((uint64_t)1 << 53);
+ } else {
+ int ae = -e;
+ if (ae > 27)
+ return false;
+ uint64_t div = pow5_table[ae];
+ if (m % div != 0)
+ return false;
+ uint64_t quotient = m / div;
+ // Factor out any remaining powers of 2 from the quotient.
+ while (quotient > 0 && (quotient & 1) == 0)
+ quotient >>= 1;
+ return quotient < ((uint64_t)1 << 53);
+ }
+}
+
// Mirrors numberLiteral (AstGen.zig:8679).
// Parses integer and float literals, returns appropriate ZIR ref.
// source_node is the node used for rvalue/error reporting (may differ from
@@ -3154,27 +3300,58 @@ static uint32_t numberLiteral(
}
buf[buf_len] = '\0';
- // Parse as __float128 for full precision, then check if it
- // round-trips through f64 (mirrors AstGen.zig:8730-8746).
- __float128 f128_val = strtof128(buf, NULL);
- if (sign == NUM_SIGN_NEGATIVE)
- f128_val = -f128_val;
+ // Decide f64 vs f128 — see stage0/README.md "Float handling".
+ bool is_hex = (buf_len >= 2 && buf[0] == '0'
+ && (buf[1] == 'x' || buf[1] == 'X'));
+ bool fits;
+ if (is_hex) {
+ // Hex floats are dyadic rationals — strtold comparison is exact.
+ long double ld_check = strtold(buf, NULL);
+ double d_check = strtod(buf, NULL);
+ fits = ((long double)d_check == ld_check);
+ } else {
+ // Decimal floats: exact algebraic round-trip test.
+ fits = decimal_float_fits_f64(buf);
+ }
- double d_val = (double)f128_val;
- __float128 round_trip = (__float128)d_val;
- if (round_trip == f128_val) {
- // Fits in f64 -- emit ZIR_INST_FLOAT.
+ if (fits) {
+ double d_val = strtod(buf, NULL);
+ if (sign == NUM_SIGN_NEGATIVE)
+ d_val = -d_val;
return addFloat(gz, d_val);
}
- // Needs f128 -- break into 4 u32 pieces (AstGen.zig:8738-8746).
- // __float128 is IEEE 754 binary128, so we can bitcast directly.
- uint64_t int_bits[2];
- memcpy(int_bits, &f128_val, 16);
- uint32_t piece0 = (uint32_t)(int_bits[0] & 0xFFFFFFFFU);
- uint32_t piece1 = (uint32_t)(int_bits[0] >> 32);
- uint32_t piece2 = (uint32_t)(int_bits[1] & 0xFFFFFFFFU);
- uint32_t piece3 = (uint32_t)(int_bits[1] >> 32);
+ // Needs f128 — parse with strtold and convert 80-bit extended to
+ // IEEE 754 binary128 encoding. Both formats share 15-bit exponent
+ // with bias 16383. Bottom 49 of 112 fraction bits are zero-padded
+ // (precision beyond 80-bit extended).
+ long double ld_val = strtold(buf, NULL);
+ if (sign == NUM_SIGN_NEGATIVE)
+ ld_val = -ld_val;
+ uint8_t ld_bytes[sizeof(long double)];
+ memset(ld_bytes, 0, sizeof(ld_bytes));
+ memcpy(ld_bytes, &ld_val, sizeof(long double));
+
+ uint32_t sign_bit = (ld_bytes[9] >> 7) & 1;
+ uint32_t exponent
+ = (uint32_t)((ld_bytes[9] & 0x7F) << 8) | ld_bytes[8];
+ uint64_t significand = 0;
+ for (int i = 7; i >= 0; i--)
+ significand = (significand << 8) | ld_bytes[i];
+ // Drop the explicit integer bit (bit 63), keep 63 fraction bits
+ uint64_t fraction63 = significand & 0x7FFFFFFFFFFFFFFFUL;
+
+ // Pack into binary128: sign(1) + exp(15) + fraction(112)
+ // fraction63 occupies the top 63 of 112 fraction bits
+ uint64_t hi64 = ((uint64_t)sign_bit << 63) | ((uint64_t)exponent << 48)
+ | (fraction63 >> 15);
+ uint64_t lo64 = fraction63 << 49;
+
+ // ZIR stores as 4 little-endian u32 pieces
+ uint32_t piece0 = (uint32_t)(lo64 & 0xFFFFFFFFU);
+ uint32_t piece1 = (uint32_t)(lo64 >> 32);
+ uint32_t piece2 = (uint32_t)(hi64 & 0xFFFFFFFFU);
+ uint32_t piece3 = (uint32_t)(hi64 >> 32);
return addPlNodeQuad(
gz, ZIR_INST_FLOAT128, node, piece0, piece1, piece2, piece3);
}
@@ -3248,10 +3425,13 @@ static uint32_t numberLiteral(
// Multi-limb multiply-add: limbs = limbs * base + digit.
uint64_t carry = digit;
for (uint32_t li = 0; li < nlimbs; li++) {
- __uint128_t prod
- = (__uint128_t)limbs[li] * (uint64_t)base + carry;
- limbs[li] = (uint64_t)prod;
- carry = (uint64_t)(prod >> 64);
+ uint64_t prod_hi, prod_lo;
+ mul64(limbs[li], (uint64_t)base, &prod_hi, &prod_lo);
+ prod_lo += carry;
+ if (prod_lo < carry)
+ prod_hi++;
+ limbs[li] = prod_lo;
+ carry = prod_hi;
}
if (carry != 0 && nlimbs < 16) {
limbs[nlimbs++] = carry;
diff --git a/stage0/astgen_test.zig b/stage0/astgen_test.zig
@@ -124,8 +124,9 @@ fn buildHashSkipMask(gpa: Allocator, ref: Zir) ![]u32 {
},
.float128 => {
// Float128 payload: 4 u32 pieces encoding the binary128 value.
- // The C implementation uses strtold which only has 80-bit precision,
- // so the lower bits may differ. Skip all 4 pieces.
+ // The C implementation parses floats with strtold (80-bit extended
+ // precision on x86-64), then converts to binary128 layout. The bottom
+ // 49 of 112 fraction bits are zero-padded. Skip all 4 pieces.
const pi = ref_datas[i].pl_node.payload_index;
for (0..4) |j| {
if (pi + j < ref_extra_len)
@@ -394,9 +395,8 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
const diff: i32 = @as(i32, @intCast(got_span)) - @as(i32, @intCast(ref_span));
if (diff != 0) {
std.debug.print(" DIFF: decl ref[{d}] got[{d}] src_node={d}: ref_span={d} got_span={d} (diff={d})\n", .{
- ri, gi, ref_datas[ri].declaration.src_node,
- ref_span, got_span,
- diff,
+ ri, gi, ref_datas[ri].declaration.src_node,
+ ref_span, got_span, diff,
});
}
}
@@ -576,7 +576,7 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
if (r_pi != g_pi and !found_first) {
found_first = true;
std.debug.print(" first break payload divergence at inst[{d}]: ref_pi={d} got_pi={d} (diff={d})\n", .{
- k, r_pi, g_pi,
+ k, r_pi, g_pi,
@as(i64, g_pi) - @as(i64, r_pi),
});
// Find nearest decl
@@ -602,7 +602,7 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
const g_pi = got.inst_datas[k].pl_node.payload_index;
if (r_pi != g_pi) {
std.debug.print(" func payload divergence at inst[{d}] tag={d}: ref_pi={d} got_pi={d} (diff={d})\n", .{
- k, @intFromEnum(ref_tags[k]), r_pi, g_pi,
+ k, @intFromEnum(ref_tags[k]), r_pi, g_pi,
@as(i64, g_pi) - @as(i64, r_pi),
});
// Find nearest decl
@@ -1667,7 +1667,7 @@ test "astgen: corpus" {
var any_fail = false;
inline for (corpus_files) |entry| {
- std.debug.print("--- {s} ---\n", .{entry[0]});
+ // std.debug.print("--- {s} ---\n", .{entry[0]});
corpusCheck(gpa, entry[1]) catch {
std.debug.print("FAIL: {s}\n", .{entry[0]});
any_fail = true;