astgen: replace __float128/__uint128_t with portable C11 float handling
Replace GCC/glibc extensions (__float128, strtof128, __uint128_t) with portable C11 code so astgen.c compiles with TCC and other C11 compilers. The f64 vs f128 decision uses an exact algebraic round-trip test: a decimal value m*10^e round-trips through f64 iff the odd part of m*5^e fits in 53 binary bits. This requires only integer arithmetic, no floating-point. For f128 encoding, strtold parses the value at 80-bit extended precision, then bit manipulation converts to IEEE 754 binary128 layout (both formats share 15-bit exponent; bottom 49 of 112 fraction bits are zero-padded). The big integer multiply-add path uses a portable mul64() helper instead of __uint128_t. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,3 +43,23 @@ gdb -batch \
|
||||
```
|
||||
|
||||
You are welcome to replace `-ex "bt full"` with anything other of interest.
|
||||
|
||||
# Float handling
|
||||
|
||||
Float literals are parsed with `strtold()` (C11 standard, portable). On
|
||||
x86-64 Linux, `long double` is 80-bit extended precision (63 fraction bits).
|
||||
|
||||
When a float doesn't round-trip through `f64`, it's emitted as `f128` (ZIR
|
||||
`float128` instruction). The 80-bit extended value is converted to IEEE 754
|
||||
binary128 encoding by bit manipulation — both formats share the same 15-bit
|
||||
exponent with bias 16383. The top 63 of binary128's 112 fraction bits come
|
||||
from the 80-bit value; the bottom 49 are zero-padded.
|
||||
|
||||
This means float128 literals lose ~49 bits of precision compared to the
|
||||
upstream Zig implementation (which uses native `f128`). This is acceptable
|
||||
because stage0 is a bootstrap tool — the real Zig compiler re-parses all
|
||||
source with full f128 precision in later stages. The test comparison mask
|
||||
in `astgen_test.zig` skips float128 payloads to account for this.
|
||||
|
||||
Previous approach used `__float128`/`strtof128` (GCC/glibc extensions) for
|
||||
full precision, but these are not portable to TCC and other C11 compilers.
|
||||
|
||||
228
stage0/astgen.c
228
stage0/astgen.c
@@ -7,12 +7,10 @@
|
||||
#include "astgen.h"
|
||||
#include "common.h"
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Forward-declare strtof128 from glibc. We cannot use _GNU_SOURCE because
|
||||
// the build uses -std=c11 which suppresses GNU extensions.
|
||||
extern __float128 strtof128(const char* restrict nptr, char** restrict endptr);
|
||||
// --- Declaration.Flags.Id enum (Zir.zig:2724) ---
|
||||
|
||||
typedef enum {
|
||||
@@ -3096,6 +3094,154 @@ static uint32_t typeExpr(GenZir* gz, Scope* scope, uint32_t node) {
|
||||
// Sign parameter for numberLiteral (AstGen.zig:8674).
|
||||
enum NumSign { NUM_SIGN_POSITIVE, NUM_SIGN_NEGATIVE };
|
||||
|
||||
// --- Portable 128-bit multiply and f64 round-trip test ---
|
||||
// See stage0/README.md "Float handling" for rationale.
|
||||
|
||||
// Table of 5^0 through 5^27 (all fit in uint64_t).
|
||||
static const uint64_t pow5_table[28] = { 1ULL, 5ULL, 25ULL, 125ULL, 625ULL,
|
||||
3125ULL, 15625ULL, 78125ULL, 390625ULL, 1953125ULL, 9765625ULL,
|
||||
48828125ULL, 244140625ULL, 1220703125ULL, 6103515625ULL, 30517578125ULL,
|
||||
152587890625ULL, 762939453125ULL, 3814697265625ULL, 19073486328125ULL,
|
||||
95367431640625ULL, 476837158203125ULL, 2384185791015625ULL,
|
||||
11920928955078125ULL, 59604644775390625ULL, 298023223876953125ULL,
|
||||
1490116119384765625ULL, 7450580596923828125ULL };
|
||||
|
||||
// 64x64 -> 128-bit multiply (portable, no __uint128_t).
|
||||
static void mul64(uint64_t a, uint64_t b, uint64_t* hi, uint64_t* lo) {
|
||||
uint64_t a_lo = a & 0xFFFFFFFFU, a_hi = a >> 32;
|
||||
uint64_t b_lo = b & 0xFFFFFFFFU, b_hi = b >> 32;
|
||||
uint64_t p0 = a_lo * b_lo;
|
||||
uint64_t p1 = a_lo * b_hi;
|
||||
uint64_t p2 = a_hi * b_lo;
|
||||
uint64_t p3 = a_hi * b_hi;
|
||||
uint64_t mid = p1 + (p0 >> 32);
|
||||
mid += p2;
|
||||
if (mid < p2)
|
||||
p3 += (uint64_t)1 << 32;
|
||||
*lo = (mid << 32) | (p0 & 0xFFFFFFFFU);
|
||||
*hi = p3 + (mid >> 32);
|
||||
}
|
||||
|
||||
// Returns true if the decimal float string round-trips through f64.
|
||||
// A decimal value m * 10^e round-trips iff m * 5^e fits in 53 binary bits
|
||||
// (for e >= 0) or m is divisible by 5^|e| and the quotient fits in 53 bits
|
||||
// (for e < 0). Hex floats are handled separately by the caller.
|
||||
static bool decimal_float_fits_f64(const char* buf) {
|
||||
uint64_t m = 0;
|
||||
int e = 0;
|
||||
bool seen_dot = false;
|
||||
bool m_overflow = false;
|
||||
const char* p = buf;
|
||||
|
||||
// Skip sign.
|
||||
if (*p == '-' || *p == '+')
|
||||
p++;
|
||||
|
||||
// Parse mantissa digits, tracking decimal point position.
|
||||
for (; *p != '\0' && *p != 'e' && *p != 'E'; p++) {
|
||||
if (*p == '.') {
|
||||
seen_dot = true;
|
||||
continue;
|
||||
}
|
||||
if (!m_overflow) {
|
||||
uint64_t digit = (uint64_t)(*p - '0');
|
||||
uint64_t next = m * 10 + digit;
|
||||
if (next / 10 != m) {
|
||||
m_overflow = true;
|
||||
} else {
|
||||
m = next;
|
||||
}
|
||||
}
|
||||
if (seen_dot)
|
||||
e--;
|
||||
}
|
||||
|
||||
// Parse optional exponent.
|
||||
if (*p == 'e' || *p == 'E') {
|
||||
p++;
|
||||
int exp_sign = 1;
|
||||
if (*p == '-') {
|
||||
exp_sign = -1;
|
||||
p++;
|
||||
} else if (*p == '+') {
|
||||
p++;
|
||||
}
|
||||
int exp_val = 0;
|
||||
for (; *p != '\0'; p++) {
|
||||
exp_val = exp_val * 10 + (*p - '0');
|
||||
if (exp_val > 1000)
|
||||
break; // exponent too large
|
||||
}
|
||||
e += exp_sign * exp_val;
|
||||
}
|
||||
|
||||
// Strip trailing zeros from mantissa.
|
||||
while (m > 0 && m % 10 == 0) {
|
||||
m /= 10;
|
||||
e++;
|
||||
}
|
||||
|
||||
if (m == 0)
|
||||
return true;
|
||||
if (m_overflow) {
|
||||
// Mantissa overflows uint64_t (> 19 digits). We can't use the
|
||||
// exact algebraic test, so check the f64 value's significant bits.
|
||||
// Values with few significant bits (e.g. 2^113) are very likely
|
||||
// exact matches, confirmed by strtold. Values with many significant
|
||||
// bits (e.g. 5.999...e-01 with 21 digits) conservatively get f128.
|
||||
double d = strtod(buf, NULL);
|
||||
if (d == 0.0)
|
||||
return true;
|
||||
int exp;
|
||||
double frac = frexp(fabs(d), &exp);
|
||||
uint64_t sig = (uint64_t)ldexp(frac, 53);
|
||||
// Count trailing zero bits in the f64 significand.
|
||||
uint32_t trailing = 0;
|
||||
uint64_t s = sig;
|
||||
while (s > 0 && (s & 1) == 0) {
|
||||
s >>= 1;
|
||||
trailing++;
|
||||
}
|
||||
// If the f64 value has ≤ 43 significant bits (≥ 10 trailing zeros),
|
||||
// the decimal string almost certainly represents this exact value.
|
||||
// Confirm with strtold comparison.
|
||||
if (trailing >= 10) {
|
||||
long double ld = strtold(buf, NULL);
|
||||
return ((long double)d == ld);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Factor out powers of 2 from m. Since 5^e is always odd, only the
|
||||
// odd part of m contributes to the significand bit count. Powers of 2
|
||||
// just shift the binary exponent and don't affect whether the value
|
||||
// fits in f64's 53-bit significand.
|
||||
while (m > 0 && (m & 1) == 0)
|
||||
m >>= 1;
|
||||
|
||||
if (e >= 0) {
|
||||
if (e > 23)
|
||||
return false;
|
||||
uint64_t hi, lo;
|
||||
mul64(m, pow5_table[e], &hi, &lo);
|
||||
if (hi != 0)
|
||||
return false;
|
||||
return lo < ((uint64_t)1 << 53);
|
||||
} else {
|
||||
int ae = -e;
|
||||
if (ae > 27)
|
||||
return false;
|
||||
uint64_t div = pow5_table[ae];
|
||||
if (m % div != 0)
|
||||
return false;
|
||||
uint64_t quotient = m / div;
|
||||
// Factor out any remaining powers of 2 from the quotient.
|
||||
while (quotient > 0 && (quotient & 1) == 0)
|
||||
quotient >>= 1;
|
||||
return quotient < ((uint64_t)1 << 53);
|
||||
}
|
||||
}
|
||||
|
||||
// Mirrors numberLiteral (AstGen.zig:8679).
|
||||
// Parses integer and float literals, returns appropriate ZIR ref.
|
||||
// source_node is the node used for rvalue/error reporting (may differ from
|
||||
@@ -3154,27 +3300,58 @@ static uint32_t numberLiteral(
|
||||
}
|
||||
buf[buf_len] = '\0';
|
||||
|
||||
// Parse as __float128 for full precision, then check if it
|
||||
// round-trips through f64 (mirrors AstGen.zig:8730-8746).
|
||||
__float128 f128_val = strtof128(buf, NULL);
|
||||
if (sign == NUM_SIGN_NEGATIVE)
|
||||
f128_val = -f128_val;
|
||||
// Decide f64 vs f128 — see stage0/README.md "Float handling".
|
||||
bool is_hex = (buf_len >= 2 && buf[0] == '0'
|
||||
&& (buf[1] == 'x' || buf[1] == 'X'));
|
||||
bool fits;
|
||||
if (is_hex) {
|
||||
// Hex floats are dyadic rationals — strtold comparison is exact.
|
||||
long double ld_check = strtold(buf, NULL);
|
||||
double d_check = strtod(buf, NULL);
|
||||
fits = ((long double)d_check == ld_check);
|
||||
} else {
|
||||
// Decimal floats: exact algebraic round-trip test.
|
||||
fits = decimal_float_fits_f64(buf);
|
||||
}
|
||||
|
||||
double d_val = (double)f128_val;
|
||||
__float128 round_trip = (__float128)d_val;
|
||||
if (round_trip == f128_val) {
|
||||
// Fits in f64 -- emit ZIR_INST_FLOAT.
|
||||
if (fits) {
|
||||
double d_val = strtod(buf, NULL);
|
||||
if (sign == NUM_SIGN_NEGATIVE)
|
||||
d_val = -d_val;
|
||||
return addFloat(gz, d_val);
|
||||
}
|
||||
|
||||
// Needs f128 -- break into 4 u32 pieces (AstGen.zig:8738-8746).
|
||||
// __float128 is IEEE 754 binary128, so we can bitcast directly.
|
||||
uint64_t int_bits[2];
|
||||
memcpy(int_bits, &f128_val, 16);
|
||||
uint32_t piece0 = (uint32_t)(int_bits[0] & 0xFFFFFFFFU);
|
||||
uint32_t piece1 = (uint32_t)(int_bits[0] >> 32);
|
||||
uint32_t piece2 = (uint32_t)(int_bits[1] & 0xFFFFFFFFU);
|
||||
uint32_t piece3 = (uint32_t)(int_bits[1] >> 32);
|
||||
// Needs f128 — parse with strtold and convert 80-bit extended to
|
||||
// IEEE 754 binary128 encoding. Both formats share 15-bit exponent
|
||||
// with bias 16383. Bottom 49 of 112 fraction bits are zero-padded
|
||||
// (precision beyond 80-bit extended).
|
||||
long double ld_val = strtold(buf, NULL);
|
||||
if (sign == NUM_SIGN_NEGATIVE)
|
||||
ld_val = -ld_val;
|
||||
uint8_t ld_bytes[sizeof(long double)];
|
||||
memset(ld_bytes, 0, sizeof(ld_bytes));
|
||||
memcpy(ld_bytes, &ld_val, sizeof(long double));
|
||||
|
||||
uint32_t sign_bit = (ld_bytes[9] >> 7) & 1;
|
||||
uint32_t exponent
|
||||
= (uint32_t)((ld_bytes[9] & 0x7F) << 8) | ld_bytes[8];
|
||||
uint64_t significand = 0;
|
||||
for (int i = 7; i >= 0; i--)
|
||||
significand = (significand << 8) | ld_bytes[i];
|
||||
// Drop the explicit integer bit (bit 63), keep 63 fraction bits
|
||||
uint64_t fraction63 = significand & 0x7FFFFFFFFFFFFFFFUL;
|
||||
|
||||
// Pack into binary128: sign(1) + exp(15) + fraction(112)
|
||||
// fraction63 occupies the top 63 of 112 fraction bits
|
||||
uint64_t hi64 = ((uint64_t)sign_bit << 63) | ((uint64_t)exponent << 48)
|
||||
| (fraction63 >> 15);
|
||||
uint64_t lo64 = fraction63 << 49;
|
||||
|
||||
// ZIR stores as 4 little-endian u32 pieces
|
||||
uint32_t piece0 = (uint32_t)(lo64 & 0xFFFFFFFFU);
|
||||
uint32_t piece1 = (uint32_t)(lo64 >> 32);
|
||||
uint32_t piece2 = (uint32_t)(hi64 & 0xFFFFFFFFU);
|
||||
uint32_t piece3 = (uint32_t)(hi64 >> 32);
|
||||
return addPlNodeQuad(
|
||||
gz, ZIR_INST_FLOAT128, node, piece0, piece1, piece2, piece3);
|
||||
}
|
||||
@@ -3248,10 +3425,13 @@ static uint32_t numberLiteral(
|
||||
// Multi-limb multiply-add: limbs = limbs * base + digit.
|
||||
uint64_t carry = digit;
|
||||
for (uint32_t li = 0; li < nlimbs; li++) {
|
||||
__uint128_t prod
|
||||
= (__uint128_t)limbs[li] * (uint64_t)base + carry;
|
||||
limbs[li] = (uint64_t)prod;
|
||||
carry = (uint64_t)(prod >> 64);
|
||||
uint64_t prod_hi, prod_lo;
|
||||
mul64(limbs[li], (uint64_t)base, &prod_hi, &prod_lo);
|
||||
prod_lo += carry;
|
||||
if (prod_lo < carry)
|
||||
prod_hi++;
|
||||
limbs[li] = prod_lo;
|
||||
carry = prod_hi;
|
||||
}
|
||||
if (carry != 0 && nlimbs < 16) {
|
||||
limbs[nlimbs++] = carry;
|
||||
|
||||
@@ -124,8 +124,9 @@ fn buildHashSkipMask(gpa: Allocator, ref: Zir) ![]u32 {
|
||||
},
|
||||
.float128 => {
|
||||
// Float128 payload: 4 u32 pieces encoding the binary128 value.
|
||||
// The C implementation uses strtold which only has 80-bit precision,
|
||||
// so the lower bits may differ. Skip all 4 pieces.
|
||||
// The C implementation parses floats with strtold (80-bit extended
|
||||
// precision on x86-64), then converts to binary128 layout. The bottom
|
||||
// 49 of 112 fraction bits are zero-padded. Skip all 4 pieces.
|
||||
const pi = ref_datas[i].pl_node.payload_index;
|
||||
for (0..4) |j| {
|
||||
if (pi + j < ref_extra_len)
|
||||
@@ -394,9 +395,8 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
|
||||
const diff: i32 = @as(i32, @intCast(got_span)) - @as(i32, @intCast(ref_span));
|
||||
if (diff != 0) {
|
||||
std.debug.print(" DIFF: decl ref[{d}] got[{d}] src_node={d}: ref_span={d} got_span={d} (diff={d})\n", .{
|
||||
ri, gi, ref_datas[ri].declaration.src_node,
|
||||
ref_span, got_span,
|
||||
diff,
|
||||
ri, gi, ref_datas[ri].declaration.src_node,
|
||||
ref_span, got_span, diff,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -576,7 +576,7 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
|
||||
if (r_pi != g_pi and !found_first) {
|
||||
found_first = true;
|
||||
std.debug.print(" first break payload divergence at inst[{d}]: ref_pi={d} got_pi={d} (diff={d})\n", .{
|
||||
k, r_pi, g_pi,
|
||||
k, r_pi, g_pi,
|
||||
@as(i64, g_pi) - @as(i64, r_pi),
|
||||
});
|
||||
// Find nearest decl
|
||||
@@ -602,7 +602,7 @@ fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
|
||||
const g_pi = got.inst_datas[k].pl_node.payload_index;
|
||||
if (r_pi != g_pi) {
|
||||
std.debug.print(" func payload divergence at inst[{d}] tag={d}: ref_pi={d} got_pi={d} (diff={d})\n", .{
|
||||
k, @intFromEnum(ref_tags[k]), r_pi, g_pi,
|
||||
k, @intFromEnum(ref_tags[k]), r_pi, g_pi,
|
||||
@as(i64, g_pi) - @as(i64, r_pi),
|
||||
});
|
||||
// Find nearest decl
|
||||
@@ -1667,7 +1667,7 @@ test "astgen: corpus" {
|
||||
|
||||
var any_fail = false;
|
||||
inline for (corpus_files) |entry| {
|
||||
std.debug.print("--- {s} ---\n", .{entry[0]});
|
||||
// std.debug.print("--- {s} ---\n", .{entry[0]});
|
||||
corpusCheck(gpa, entry[1]) catch {
|
||||
std.debug.print("FAIL: {s}\n", .{entry[0]});
|
||||
any_fail = true;
|
||||
|
||||
Reference in New Issue
Block a user