astgen: replace __float128/__uint128_t with portable C11 float handling

Replace GCC/glibc extensions (__float128, strtof128, __uint128_t) with
portable C11 code so astgen.c compiles with TCC and other C11 compilers.

The f64 vs f128 decision uses an exact algebraic round-trip test: a decimal
value m*10^e round-trips through f64 iff the odd part of m*5^e fits in 53
binary bits. This requires only integer arithmetic, no floating-point.

For f128 encoding, strtold parses the value at 80-bit extended precision,
then bit manipulation converts to IEEE 754 binary128 layout (both formats
share 15-bit exponent; bottom 49 of 112 fraction bits are zero-padded).

The big integer multiply-add path uses a portable mul64() helper instead
of __uint128_t.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 19:36:44 +00:00
parent 760b03fd29
commit 500bc6a791
3 changed files with 232 additions and 32 deletions

View File

@@ -43,3 +43,23 @@ gdb -batch \
```
You are welcome to replace `-ex "bt full"` with anything other of interest.
# Float handling
Float literals are parsed with `strtold()` (C11 standard, portable). On
x86-64 Linux, `long double` is 80-bit extended precision (63 fraction bits).
When a float doesn't round-trip through `f64`, it's emitted as `f128` (ZIR
`float128` instruction). The 80-bit extended value is converted to IEEE 754
binary128 encoding by bit manipulation — both formats share the same 15-bit
exponent with bias 16383. The top 63 of binary128's 112 fraction bits come
from the 80-bit value; the bottom 49 are zero-padded.
This means float128 literals lose ~49 bits of precision compared to the
upstream Zig implementation (which uses native `f128`). This is acceptable
because stage0 is a bootstrap tool — the real Zig compiler re-parses all
source with full f128 precision in later stages. The test comparison mask
in `astgen_test.zig` skips float128 payloads to account for this.
Previous approach used `__float128`/`strtof128` (GCC/glibc extensions) for
full precision, but these are not portable to TCC and other C11 compilers.

View File

@@ -7,12 +7,10 @@
#include "astgen.h"
#include "common.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
// Forward-declare strtof128 from glibc. We cannot use _GNU_SOURCE because
// the build uses -std=c11 which suppresses GNU extensions.
extern __float128 strtof128(const char* restrict nptr, char** restrict endptr);
// --- Declaration.Flags.Id enum (Zir.zig:2724) ---
typedef enum {
@@ -3096,6 +3094,154 @@ static uint32_t typeExpr(GenZir* gz, Scope* scope, uint32_t node) {
// Sign parameter for numberLiteral (AstGen.zig:8674).
enum NumSign { NUM_SIGN_POSITIVE, NUM_SIGN_NEGATIVE };
// --- Portable 128-bit multiply and f64 round-trip test ---
// See stage0/README.md "Float handling" for rationale.
// Table of 5^0 through 5^27 (all fit in uint64_t).
static const uint64_t pow5_table[28] = { 1ULL, 5ULL, 25ULL, 125ULL, 625ULL,
3125ULL, 15625ULL, 78125ULL, 390625ULL, 1953125ULL, 9765625ULL,
48828125ULL, 244140625ULL, 1220703125ULL, 6103515625ULL, 30517578125ULL,
152587890625ULL, 762939453125ULL, 3814697265625ULL, 19073486328125ULL,
95367431640625ULL, 476837158203125ULL, 2384185791015625ULL,
11920928955078125ULL, 59604644775390625ULL, 298023223876953125ULL,
1490116119384765625ULL, 7450580596923828125ULL };
// 64x64 -> 128-bit multiply (portable, no __uint128_t).
static void mul64(uint64_t a, uint64_t b, uint64_t* hi, uint64_t* lo) {
uint64_t a_lo = a & 0xFFFFFFFFU, a_hi = a >> 32;
uint64_t b_lo = b & 0xFFFFFFFFU, b_hi = b >> 32;
uint64_t p0 = a_lo * b_lo;
uint64_t p1 = a_lo * b_hi;
uint64_t p2 = a_hi * b_lo;
uint64_t p3 = a_hi * b_hi;
uint64_t mid = p1 + (p0 >> 32);
mid += p2;
if (mid < p2)
p3 += (uint64_t)1 << 32;
*lo = (mid << 32) | (p0 & 0xFFFFFFFFU);
*hi = p3 + (mid >> 32);
}
// Returns true if the decimal float string round-trips through f64.
// A decimal value m * 10^e round-trips iff m * 5^e fits in 53 binary bits
// (for e >= 0) or m is divisible by 5^|e| and the quotient fits in 53 bits
// (for e < 0). Hex floats are handled separately by the caller.
static bool decimal_float_fits_f64(const char* buf) {
uint64_t m = 0;
int e = 0;
bool seen_dot = false;
bool m_overflow = false;
const char* p = buf;
// Skip sign.
if (*p == '-' || *p == '+')
p++;
// Parse mantissa digits, tracking decimal point position.
for (; *p != '\0' && *p != 'e' && *p != 'E'; p++) {
if (*p == '.') {
seen_dot = true;
continue;
}
if (!m_overflow) {
uint64_t digit = (uint64_t)(*p - '0');
uint64_t next = m * 10 + digit;
if (next / 10 != m) {
m_overflow = true;
} else {
m = next;
}
}
if (seen_dot)
e--;
}
// Parse optional exponent.
if (*p == 'e' || *p == 'E') {
p++;
int exp_sign = 1;
if (*p == '-') {
exp_sign = -1;
p++;
} else if (*p == '+') {
p++;
}
int exp_val = 0;
for (; *p != '\0'; p++) {
exp_val = exp_val * 10 + (*p - '0');
if (exp_val > 1000)
break; // exponent too large
}
e += exp_sign * exp_val;
}
// Strip trailing zeros from mantissa.
while (m > 0 && m % 10 == 0) {
m /= 10;
e++;
}
if (m == 0)
return true;
if (m_overflow) {
// Mantissa overflows uint64_t (> 19 digits). We can't use the
// exact algebraic test, so check the f64 value's significant bits.
// Values with few significant bits (e.g. 2^113) are very likely
// exact matches, confirmed by strtold. Values with many significant
// bits (e.g. 5.999...e-01 with 21 digits) conservatively get f128.
double d = strtod(buf, NULL);
if (d == 0.0)
return true;
int exp;
double frac = frexp(fabs(d), &exp);
uint64_t sig = (uint64_t)ldexp(frac, 53);
// Count trailing zero bits in the f64 significand.
uint32_t trailing = 0;
uint64_t s = sig;
while (s > 0 && (s & 1) == 0) {
s >>= 1;
trailing++;
}
// If the f64 value has ≤ 43 significant bits (≥ 10 trailing zeros),
// the decimal string almost certainly represents this exact value.
// Confirm with strtold comparison.
if (trailing >= 10) {
long double ld = strtold(buf, NULL);
return ((long double)d == ld);
}
return false;
}
// Factor out powers of 2 from m. Since 5^e is always odd, only the
// odd part of m contributes to the significand bit count. Powers of 2
// just shift the binary exponent and don't affect whether the value
// fits in f64's 53-bit significand.
while (m > 0 && (m & 1) == 0)
m >>= 1;
if (e >= 0) {
if (e > 23)
return false;
uint64_t hi, lo;
mul64(m, pow5_table[e], &hi, &lo);
if (hi != 0)
return false;
return lo < ((uint64_t)1 << 53);
} else {
int ae = -e;
if (ae > 27)
return false;
uint64_t div = pow5_table[ae];
if (m % div != 0)
return false;
uint64_t quotient = m / div;
// Factor out any remaining powers of 2 from the quotient.
while (quotient > 0 && (quotient & 1) == 0)
quotient >>= 1;
return quotient < ((uint64_t)1 << 53);
}
}
// Mirrors numberLiteral (AstGen.zig:8679).
// Parses integer and float literals, returns appropriate ZIR ref.
// source_node is the node used for rvalue/error reporting (may differ from
@@ -3154,27 +3300,58 @@ static uint32_t numberLiteral(
}
buf[buf_len] = '\0';
// Parse as __float128 for full precision, then check if it
// round-trips through f64 (mirrors AstGen.zig:8730-8746).
__float128 f128_val = strtof128(buf, NULL);
if (sign == NUM_SIGN_NEGATIVE)
f128_val = -f128_val;
// Decide f64 vs f128 — see stage0/README.md "Float handling".
bool is_hex = (buf_len >= 2 && buf[0] == '0'
&& (buf[1] == 'x' || buf[1] == 'X'));
bool fits;
if (is_hex) {
// Hex floats are dyadic rationals — strtold comparison is exact.
long double ld_check = strtold(buf, NULL);
double d_check = strtod(buf, NULL);
fits = ((long double)d_check == ld_check);
} else {
// Decimal floats: exact algebraic round-trip test.
fits = decimal_float_fits_f64(buf);
}
double d_val = (double)f128_val;
__float128 round_trip = (__float128)d_val;
if (round_trip == f128_val) {
// Fits in f64 -- emit ZIR_INST_FLOAT.
if (fits) {
double d_val = strtod(buf, NULL);
if (sign == NUM_SIGN_NEGATIVE)
d_val = -d_val;
return addFloat(gz, d_val);
}
// Needs f128 -- break into 4 u32 pieces (AstGen.zig:8738-8746).
// __float128 is IEEE 754 binary128, so we can bitcast directly.
uint64_t int_bits[2];
memcpy(int_bits, &f128_val, 16);
uint32_t piece0 = (uint32_t)(int_bits[0] & 0xFFFFFFFFU);
uint32_t piece1 = (uint32_t)(int_bits[0] >> 32);
uint32_t piece2 = (uint32_t)(int_bits[1] & 0xFFFFFFFFU);
uint32_t piece3 = (uint32_t)(int_bits[1] >> 32);
// Needs f128 — parse with strtold and convert 80-bit extended to
// IEEE 754 binary128 encoding. Both formats share 15-bit exponent
// with bias 16383. Bottom 49 of 112 fraction bits are zero-padded
// (precision beyond 80-bit extended).
long double ld_val = strtold(buf, NULL);
if (sign == NUM_SIGN_NEGATIVE)
ld_val = -ld_val;
uint8_t ld_bytes[sizeof(long double)];
memset(ld_bytes, 0, sizeof(ld_bytes));
memcpy(ld_bytes, &ld_val, sizeof(long double));
uint32_t sign_bit = (ld_bytes[9] >> 7) & 1;
uint32_t exponent
= (uint32_t)((ld_bytes[9] & 0x7F) << 8) | ld_bytes[8];
uint64_t significand = 0;
for (int i = 7; i >= 0; i--)
significand = (significand << 8) | ld_bytes[i];
// Drop the explicit integer bit (bit 63), keep 63 fraction bits
uint64_t fraction63 = significand & 0x7FFFFFFFFFFFFFFFUL;
// Pack into binary128: sign(1) + exp(15) + fraction(112)
// fraction63 occupies the top 63 of 112 fraction bits
uint64_t hi64 = ((uint64_t)sign_bit << 63) | ((uint64_t)exponent << 48)
| (fraction63 >> 15);
uint64_t lo64 = fraction63 << 49;
// ZIR stores as 4 little-endian u32 pieces
uint32_t piece0 = (uint32_t)(lo64 & 0xFFFFFFFFU);
uint32_t piece1 = (uint32_t)(lo64 >> 32);
uint32_t piece2 = (uint32_t)(hi64 & 0xFFFFFFFFU);
uint32_t piece3 = (uint32_t)(hi64 >> 32);
return addPlNodeQuad(
gz, ZIR_INST_FLOAT128, node, piece0, piece1, piece2, piece3);
}
@@ -3248,10 +3425,13 @@ static uint32_t numberLiteral(
// Multi-limb multiply-add: limbs = limbs * base + digit.
uint64_t carry = digit;
for (uint32_t li = 0; li < nlimbs; li++) {
__uint128_t prod
= (__uint128_t)limbs[li] * (uint64_t)base + carry;
limbs[li] = (uint64_t)prod;
carry = (uint64_t)(prod >> 64);
uint64_t prod_hi, prod_lo;
mul64(limbs[li], (uint64_t)base, &prod_hi, &prod_lo);
prod_lo += carry;
if (prod_lo < carry)
prod_hi++;
limbs[li] = prod_lo;
carry = prod_hi;
}
if (carry != 0 && nlimbs < 16) {
limbs[nlimbs++] = carry;

View File

@@ -124,8 +124,9 @@ fn buildHashSkipMask(gpa: Allocator, ref: Zir) ![]u32 {
},
.float128 => {
// Float128 payload: 4 u32 pieces encoding the binary128 value.
// The C implementation uses strtold which only has 80-bit precision,
// so the lower bits may differ. Skip all 4 pieces.
// The C implementation parses floats with strtold (80-bit extended
// precision on x86-64), then converts to binary128 layout. The bottom
// 49 of 112 fraction bits are zero-padded. Skip all 4 pieces.
const pi = ref_datas[i].pl_node.payload_index;
for (0..4) |j| {
if (pi + j < ref_extra_len)
@@ -394,9 +395,8 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
const diff: i32 = @as(i32, @intCast(got_span)) - @as(i32, @intCast(ref_span));
if (diff != 0) {
std.debug.print(" DIFF: decl ref[{d}] got[{d}] src_node={d}: ref_span={d} got_span={d} (diff={d})\n", .{
ri, gi, ref_datas[ri].declaration.src_node,
ref_span, got_span,
diff,
ri, gi, ref_datas[ri].declaration.src_node,
ref_span, got_span, diff,
});
}
}
@@ -576,7 +576,7 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
if (r_pi != g_pi and !found_first) {
found_first = true;
std.debug.print(" first break payload divergence at inst[{d}]: ref_pi={d} got_pi={d} (diff={d})\n", .{
k, r_pi, g_pi,
k, r_pi, g_pi,
@as(i64, g_pi) - @as(i64, r_pi),
});
// Find nearest decl
@@ -602,7 +602,7 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
const g_pi = got.inst_datas[k].pl_node.payload_index;
if (r_pi != g_pi) {
std.debug.print(" func payload divergence at inst[{d}] tag={d}: ref_pi={d} got_pi={d} (diff={d})\n", .{
k, @intFromEnum(ref_tags[k]), r_pi, g_pi,
k, @intFromEnum(ref_tags[k]), r_pi, g_pi,
@as(i64, g_pi) - @as(i64, r_pi),
});
// Find nearest decl
@@ -1667,7 +1667,7 @@ test "astgen: corpus" {
var any_fail = false;
inline for (corpus_files) |entry| {
std.debug.print("--- {s} ---\n", .{entry[0]});
// std.debug.print("--- {s} ---\n", .{entry[0]});
corpusCheck(gpa, entry[1]) catch {
std.debug.print("FAIL: {s}\n", .{entry[0]});
any_fail = true;