improvements to zig's implementation of libc and WebAssembly - zig

commit 14cdb01f35b35972b06e95a3a438f9f7910b97f8 (tree)
parent 3aa43dc31c468b2f80bf065f850618a981c5fca2
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Wed, 15 May 2019 16:20:16 -0400

improvements to zig's implementation of libc and WebAssembly

 * rename std/special/builtin.zig to std/special/c.zig
   not to be confused with @import("builtin") which is entirely
   different, this is zig's multi-target libc implementation.
 * WebAssembly: build-exe is for executables which have a main().
   build-lib is for building libraries of functions to use from,
   for example, a web browser environment.
   - for now pass --export-all for libraries when there are any
     C objects because we have no way to detect the list of exports
     when compiling C code.
   - stop passing --no-entry for executables. if you want --no-entry
     then use build-lib.
 * make the "musl" ABI the default ABI for wasm32-freestanding.
 * zig provides libc for wasm32-freestanding-musl.

Diffstat:
M CMakeLists.txt  | 2 +-
M src/link.cpp  | 45 ++++++++++++++++++++++++---------------------
M src/target.cpp  | 7 +++++--
D std/special/builtin.zig  | 490 -------------------------------------------------------------------------------
A std/special/c.zig  | 506 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

5 files changed, 536 insertions(+), 514 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -643,7 +643,7 @@ set(ZIG_STD_FILES
     "special/bootstrap_lib.zig"
     "special/bootstrap_windows_tls.zig"
     "special/build_runner.zig"
-    "special/builtin.zig"
+    "special/c.zig"
     "special/compiler_rt.zig"
     "special/compiler_rt/stack_probe.zig"
     "special/compiler_rt/arm/aeabi_fcmp.zig"
diff --git a/src/link.cpp b/src/link.cpp
@@ -776,8 +776,8 @@ static const char *get_libc_crt_file(CodeGen *parent, const char *file) {
 
 static Buf *build_a_raw(CodeGen *parent_gen, const char *aname, Buf *full_path, OutType child_out_type) {
     // The Mach-O LLD code is not well maintained, and trips an assertion
-    // when we link compiler_rt and builtin as libraries rather than objects.
-    // Here we workaround this by having compiler_rt and builtin be objects.
+    // when we link compiler_rt and libc.zig as libraries rather than objects.
+    // Here we workaround this by having compiler_rt and libc.zig be objects.
     // TODO write our own linker. https://github.com/ziglang/zig/issues/1535
     if (parent_gen->zig_target->os == OsMacOSX) {
         child_out_type = OutTypeObj;
@@ -787,7 +787,7 @@ static Buf *build_a_raw(CodeGen *parent_gen, const char *aname, Buf *full_path, 
             parent_gen->libc);
     codegen_set_out_name(child_gen, buf_create_from_str(aname));
 
-    // This is so that compiler_rt and builtin libraries know whether they
+    // This is so that compiler_rt and libc.zig libraries know whether they
     // will eventually be linked with libc. They make different decisions
     // about what to export depending on whether libc is linked.
     if (parent_gen->libc_link_lib != nullptr) {
@@ -1002,8 +1002,8 @@ static void construct_linker_job_elf(LinkJob *lj) {
 
     if (!g->is_dummy_so && (g->out_type == OutTypeExe || is_dyn_lib)) {
         if (g->libc_link_lib == nullptr) {
-            Buf *builtin_a_path = build_a(g, "builtin");
-            lj->args.append(buf_ptr(builtin_a_path));
+            Buf *libc_a_path = build_a(g, "c");
+            lj->args.append(buf_ptr(libc_a_path));
         }
 
         Buf *compiler_rt_o_path = build_compiler_rt(g, OutTypeLib);
@@ -1092,30 +1092,33 @@ static void construct_linker_job_wasm(LinkJob *lj) {
 
     lj->args.append("-error-limit=0");
 
-    if (g->zig_target->os != OsWASI) {
-	    lj->args.append("--no-entry");  // So lld doesn't look for _start.
+    if (g->out_type != OutTypeExe) {
+	    lj->args.append("--no-entry"); // So lld doesn't look for _start.
+
+        // If there are any C source files we cannot rely on individual exports.
+        if (g->c_source_files.length != 0) {
+            lj->args.append("--export-all");
+        } else {
+            auto export_it = g->exported_symbol_names.entry_iterator();
+            decltype(g->exported_symbol_names)::Entry *curr_entry = nullptr;
+            while ((curr_entry = export_it.next()) != nullptr) {
+                Buf *arg = buf_sprintf("--export=%s", buf_ptr(curr_entry->key));
+                lj->args.append(buf_ptr(arg));
+            }
+        }
     }
     lj->args.append("--allow-undefined");
     lj->args.append("-o");
     lj->args.append(buf_ptr(&g->output_file_path));
 
-    auto export_it = g->exported_symbol_names.entry_iterator();
-    decltype(g->exported_symbol_names)::Entry *curr_entry = nullptr;
-    while ((curr_entry = export_it.next()) != nullptr) {
-        Buf *arg = buf_sprintf("--export=%s", buf_ptr(curr_entry->key));
-        lj->args.append(buf_ptr(arg));
-    }
-
     // .o files
     for (size_t i = 0; i < g->link_objects.length; i += 1) {
         lj->args.append((const char *)buf_ptr(g->link_objects.at(i)));
     }
 
-    if (g->out_type == OutTypeExe) {
-        if (g->libc_link_lib == nullptr) {
-            Buf *builtin_a_path = build_a(g, "builtin");
-            lj->args.append(buf_ptr(builtin_a_path));
-        }
+    if (g->out_type != OutTypeObj) {
+        Buf *libc_a_path = build_a(g, "c");
+        lj->args.append(buf_ptr(libc_a_path));
 
         Buf *compiler_rt_o_path = build_compiler_rt(g, OutTypeLib);
         lj->args.append(buf_ptr(compiler_rt_o_path));
@@ -1356,8 +1359,8 @@ static void construct_linker_job_coff(LinkJob *lj) {
 
     if (g->out_type == OutTypeExe || (g->out_type == OutTypeLib && g->is_dynamic)) {
         if (g->libc_link_lib == nullptr && !g->is_dummy_so) {
-            Buf *builtin_a_path = build_a(g, "builtin");
-            lj->args.append(buf_ptr(builtin_a_path));
+            Buf *libc_a_path = build_a(g, "c");
+            lj->args.append(buf_ptr(libc_a_path));
         }
 
         // msvc compiler_rt is missing some stuff, so we still build it and rely on weak linkage
diff --git a/src/target.cpp b/src/target.cpp
@@ -1376,6 +1376,9 @@ bool target_is_single_threaded(const ZigTarget *target) {
 }
 
 ZigLLVM_EnvironmentType target_default_abi(ZigLLVM_ArchType arch, Os os) {
+    if (arch == ZigLLVM_wasm32 || arch == ZigLLVM_wasm64) {
+        return ZigLLVM_Musl;
+    }
     switch (os) {
         case OsFreestanding:
         case OsAnanas:
@@ -1490,6 +1493,7 @@ static const AvailableLibC libcs_available[] = {
     {ZigLLVM_systemz, OsLinux, ZigLLVM_Musl},
     {ZigLLVM_sparc, OsLinux, ZigLLVM_GNU},
     {ZigLLVM_sparcv9, OsLinux, ZigLLVM_GNU},
+    {ZigLLVM_wasm32, OsFreestanding, ZigLLVM_Musl},
     {ZigLLVM_x86_64, OsLinux, ZigLLVM_GNU},
     {ZigLLVM_x86_64, OsLinux, ZigLLVM_GNUX32},
     {ZigLLVM_x86_64, OsLinux, ZigLLVM_Musl},
@@ -1508,7 +1512,6 @@ bool target_can_build_libc(const ZigTarget *target) {
 }
 
 const char *target_libc_generic_name(const ZigTarget *target) {
-    assert(target->os == OsLinux);
     switch (target->abi) {
         case ZigLLVM_GNU:
         case ZigLLVM_GNUABIN32:
@@ -1520,6 +1523,7 @@ const char *target_libc_generic_name(const ZigTarget *target) {
         case ZigLLVM_Musl:
         case ZigLLVM_MuslEABI:
         case ZigLLVM_MuslEABIHF:
+        case ZigLLVM_UnknownEnvironment:
             return "musl";
         case ZigLLVM_CODE16:
         case ZigLLVM_EABI:
@@ -1530,7 +1534,6 @@ const char *target_libc_generic_name(const ZigTarget *target) {
         case ZigLLVM_Cygnus:
         case ZigLLVM_CoreCLR:
         case ZigLLVM_Simulator:
-        case ZigLLVM_UnknownEnvironment:
             zig_unreachable();
     }
     zig_unreachable();
diff --git a/std/special/builtin.zig b/std/special/builtin.zig
@@ -1,490 +0,0 @@
-// These functions are provided when not linking against libc because LLVM
-// sometimes generates code that calls them.
-
-const std = @import("std");
-const builtin = @import("builtin");
-const maxInt = std.math.maxInt;
-
-// Avoid dragging in the runtime safety mechanisms into this .o file,
-// unless we're trying to test this file.
-pub fn panic(msg: []const u8, error_return_trace: ?*builtin.StackTrace) noreturn {
-    if (builtin.is_test) {
-        @setCold(true);
-        std.debug.panic("{}", msg);
-    } else {
-        unreachable;
-    }
-}
-
-export fn memset(dest: ?[*]u8, c: u8, n: usize) ?[*]u8 {
-    @setRuntimeSafety(false);
-
-    var index: usize = 0;
-    while (index != n) : (index += 1)
-        dest.?[index] = c;
-
-    return dest;
-}
-
-export fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, n: usize) ?[*]u8 {
-    @setRuntimeSafety(false);
-
-    var index: usize = 0;
-    while (index != n) : (index += 1)
-        dest.?[index] = src.?[index];
-
-    return dest;
-}
-
-export fn memmove(dest: ?[*]u8, src: ?[*]const u8, n: usize) ?[*]u8 {
-    @setRuntimeSafety(false);
-
-    if (@ptrToInt(dest) < @ptrToInt(src)) {
-        var index: usize = 0;
-        while (index != n) : (index += 1) {
-            dest.?[index] = src.?[index];
-        }
-    } else {
-        var index = n;
-        while (index != 0) {
-            index -= 1;
-            dest.?[index] = src.?[index];
-        }
-    }
-
-    return dest;
-}
-
-export fn memcmp(vl: ?[*]const u8, vr: ?[*]const u8, n: usize) isize {
-    @setRuntimeSafety(false);
-
-    var index: usize = 0;
-    while (index != n) : (index += 1) {
-        const compare_val = @bitCast(i8, vl.?[index] -% vr.?[index]);
-        if (compare_val != 0) {
-            return compare_val;
-        }
-    }
-
-    return 0;
-}
-
-test "test_memcmp" {
-    const base_arr = []u8{ 1, 1, 1 };
-    const arr1 = []u8{ 1, 1, 1 };
-    const arr2 = []u8{ 1, 0, 1 };
-    const arr3 = []u8{ 1, 2, 1 };
-
-    std.testing.expect(memcmp(base_arr[0..].ptr, arr1[0..].ptr, base_arr.len) == 0);
-    std.testing.expect(memcmp(base_arr[0..].ptr, arr2[0..].ptr, base_arr.len) == 1);
-    std.testing.expect(memcmp(base_arr[0..].ptr, arr3[0..].ptr, base_arr.len) == -1);
-}
-
-comptime {
-    if (builtin.mode != builtin.Mode.ReleaseFast and
-        builtin.mode != builtin.Mode.ReleaseSmall and
-        builtin.os != builtin.Os.windows)
-    {
-        @export("__stack_chk_fail", __stack_chk_fail, builtin.GlobalLinkage.Strong);
-    }
-    if (builtin.os == builtin.Os.linux) {
-        @export("clone", clone, builtin.GlobalLinkage.Strong);
-    }
-}
-extern fn __stack_chk_fail() noreturn {
-    @panic("stack smashing detected");
-}
-
-// TODO we should be able to put this directly in std/linux/x86_64.zig but
-// it causes a segfault in release mode. this is a workaround of calling it
-// across .o file boundaries. fix comptime @ptrCast of nakedcc functions.
-nakedcc fn clone() void {
-    if (builtin.arch == builtin.Arch.x86_64) {
-        asm volatile (
-            \\      xor %%eax,%%eax
-            \\      mov $56,%%al // SYS_clone
-            \\      mov %%rdi,%%r11
-            \\      mov %%rdx,%%rdi
-            \\      mov %%r8,%%rdx
-            \\      mov %%r9,%%r8
-            \\      mov 8(%%rsp),%%r10
-            \\      mov %%r11,%%r9
-            \\      and $-16,%%rsi
-            \\      sub $8,%%rsi
-            \\      mov %%rcx,(%%rsi)
-            \\      syscall
-            \\      test %%eax,%%eax
-            \\      jnz 1f
-            \\      xor %%ebp,%%ebp
-            \\      pop %%rdi
-            \\      call *%%r9
-            \\      mov %%eax,%%edi
-            \\      xor %%eax,%%eax
-            \\      mov $60,%%al // SYS_exit
-            \\      syscall
-            \\      hlt
-            \\1:    ret
-            \\
-        );
-    } else if (builtin.arch == builtin.Arch.aarch64) {
-        // __clone(func, stack, flags, arg, ptid, tls, ctid)
-        //         x0,   x1,    w2,    x3,  x4,   x5,  x6
-
-        // syscall(SYS_clone, flags, stack, ptid, tls, ctid)
-        //         x8,        x0,    x1,    x2,   x3,  x4
-        asm volatile (
-            \\      // align stack and save func,arg
-            \\      and x1,x1,#-16
-            \\      stp x0,x3,[x1,#-16]!
-            \\
-            \\      // syscall
-            \\      uxtw x0,w2
-            \\      mov x2,x4
-            \\      mov x3,x5
-            \\      mov x4,x6
-            \\      mov x8,#220 // SYS_clone
-            \\      svc #0
-            \\
-            \\      cbz x0,1f
-            \\      // parent
-            \\      ret
-            \\      // child
-            \\1:    ldp x1,x0,[sp],#16
-            \\      blr x1
-            \\      mov x8,#93 // SYS_exit
-            \\      svc #0
-        );
-    } else {
-        @compileError("Implement clone() for this arch.");
-    }
-}
-
-const math = std.math;
-
-export fn fmodf(x: f32, y: f32) f32 {
-    return generic_fmod(f32, x, y);
-}
-export fn fmod(x: f64, y: f64) f64 {
-    return generic_fmod(f64, x, y);
-}
-
-// TODO add intrinsics for these (and probably the double version too)
-// and have the math stuff use the intrinsic. same as @mod and @rem
-export fn floorf(x: f32) f32 {
-    return math.floor(x);
-}
-export fn ceilf(x: f32) f32 {
-    return math.ceil(x);
-}
-export fn floor(x: f64) f64 {
-    return math.floor(x);
-}
-export fn ceil(x: f64) f64 {
-    return math.ceil(x);
-}
-
-fn generic_fmod(comptime T: type, x: T, y: T) T {
-    @setRuntimeSafety(false);
-
-    const uint = @IntType(false, T.bit_count);
-    const log2uint = math.Log2Int(uint);
-    const digits = if (T == f32) 23 else 52;
-    const exp_bits = if (T == f32) 9 else 12;
-    const bits_minus_1 = T.bit_count - 1;
-    const mask = if (T == f32) 0xff else 0x7ff;
-    var ux = @bitCast(uint, x);
-    var uy = @bitCast(uint, y);
-    var ex = @intCast(i32, (ux >> digits) & mask);
-    var ey = @intCast(i32, (uy >> digits) & mask);
-    const sx = if (T == f32) @intCast(u32, ux & 0x80000000) else @intCast(i32, ux >> bits_minus_1);
-    var i: uint = undefined;
-
-    if (uy << 1 == 0 or isNan(uint, uy) or ex == mask)
-        return (x * y) / (x * y);
-
-    if (ux << 1 <= uy << 1) {
-        if (ux << 1 == uy << 1)
-            return 0 * x;
-        return x;
-    }
-
-    // normalize x and y
-    if (ex == 0) {
-        i = ux << exp_bits;
-        while (i >> bits_minus_1 == 0) : (b: {
-            ex -= 1;
-            i <<= 1;
-        }) {}
-        ux <<= @intCast(log2uint, @bitCast(u32, -ex + 1));
-    } else {
-        ux &= maxInt(uint) >> exp_bits;
-        ux |= 1 << digits;
-    }
-    if (ey == 0) {
-        i = uy << exp_bits;
-        while (i >> bits_minus_1 == 0) : (b: {
-            ey -= 1;
-            i <<= 1;
-        }) {}
-        uy <<= @intCast(log2uint, @bitCast(u32, -ey + 1));
-    } else {
-        uy &= maxInt(uint) >> exp_bits;
-        uy |= 1 << digits;
-    }
-
-    // x mod y
-    while (ex > ey) : (ex -= 1) {
-        i = ux -% uy;
-        if (i >> bits_minus_1 == 0) {
-            if (i == 0)
-                return 0 * x;
-            ux = i;
-        }
-        ux <<= 1;
-    }
-    i = ux -% uy;
-    if (i >> bits_minus_1 == 0) {
-        if (i == 0)
-            return 0 * x;
-        ux = i;
-    }
-    while (ux >> digits == 0) : (b: {
-        ux <<= 1;
-        ex -= 1;
-    }) {}
-
-    // scale result up
-    if (ex > 0) {
-        ux -%= 1 << digits;
-        ux |= uint(@bitCast(u32, ex)) << digits;
-    } else {
-        ux >>= @intCast(log2uint, @bitCast(u32, -ex + 1));
-    }
-    if (T == f32) {
-        ux |= sx;
-    } else {
-        ux |= @intCast(uint, sx) << bits_minus_1;
-    }
-    return @bitCast(T, ux);
-}
-
-fn isNan(comptime T: type, bits: T) bool {
-    if (T == u16) {
-        return (bits & 0x7fff) > 0x7c00;
-    } else if (T == u32) {
-        return (bits & 0x7fffffff) > 0x7f800000;
-    } else if (T == u64) {
-        return (bits & (maxInt(u64) >> 1)) > (u64(0x7ff) << 52);
-    } else {
-        unreachable;
-    }
-}
-
-// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
-// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
-// potentially some edge cases remaining that are not handled in the same way.
-export fn sqrt(x: f64) f64 {
-    const tiny: f64 = 1.0e-300;
-    const sign: u32 = 0x80000000;
-    const u = @bitCast(u64, x);
-
-    var ix0 = @intCast(u32, u >> 32);
-    var ix1 = @intCast(u32, u & 0xFFFFFFFF);
-
-    // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
-    if (ix0 & 0x7FF00000 == 0x7FF00000) {
-        return x * x + x;
-    }
-
-    // sqrt(+-0) = +-0
-    if (x == 0.0) {
-        return x;
-    }
-    // sqrt(-ve) = snan
-    if (ix0 & sign != 0) {
-        return math.snan(f64);
-    }
-
-    // normalize x
-    var m = @intCast(i32, ix0 >> 20);
-    if (m == 0) {
-        // subnormal
-        while (ix0 == 0) {
-            m -= 21;
-            ix0 |= ix1 >> 11;
-            ix1 <<= 21;
-        }
-
-        // subnormal
-        var i: u32 = 0;
-        while (ix0 & 0x00100000 == 0) : (i += 1) {
-            ix0 <<= 1;
-        }
-        m -= @intCast(i32, i) - 1;
-        ix0 |= ix1 >> @intCast(u5, 32 - i);
-        ix1 <<= @intCast(u5, i);
-    }
-
-    // unbias exponent
-    m -= 1023;
-    ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
-    if (m & 1 != 0) {
-        ix0 += ix0 + (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-    }
-    m >>= 1;
-
-    // sqrt(x) bit by bit
-    ix0 += ix0 + (ix1 >> 31);
-    ix1 = ix1 +% ix1;
-
-    var q: u32 = 0;
-    var q1: u32 = 0;
-    var s0: u32 = 0;
-    var s1: u32 = 0;
-    var r: u32 = 0x00200000;
-    var t: u32 = undefined;
-    var t1: u32 = undefined;
-
-    while (r != 0) {
-        t = s0 +% r;
-        if (t <= ix0) {
-            s0 = t + r;
-            ix0 -= t;
-            q += r;
-        }
-        ix0 = ix0 +% ix0 +% (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-        r >>= 1;
-    }
-
-    r = sign;
-    while (r != 0) {
-        t = s1 +% r;
-        t = s0;
-        if (t < ix0 or (t == ix0 and t1 <= ix1)) {
-            s1 = t1 +% r;
-            if (t1 & sign == sign and s1 & sign == 0) {
-                s0 += 1;
-            }
-            ix0 -= t;
-            if (ix1 < t1) {
-                ix0 -= 1;
-            }
-            ix1 = ix1 -% t1;
-            q1 += r;
-        }
-        ix0 = ix0 +% ix0 +% (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-        r >>= 1;
-    }
-
-    // rounding direction
-    if (ix0 | ix1 != 0) {
-        var z = 1.0 - tiny; // raise inexact
-        if (z >= 1.0) {
-            z = 1.0 + tiny;
-            if (q1 == 0xFFFFFFFF) {
-                q1 = 0;
-                q += 1;
-            } else if (z > 1.0) {
-                if (q1 == 0xFFFFFFFE) {
-                    q += 1;
-                }
-                q1 += 2;
-            } else {
-                q1 += q1 & 1;
-            }
-        }
-    }
-
-    ix0 = (q >> 1) + 0x3FE00000;
-    ix1 = q1 >> 1;
-    if (q & 1 != 0) {
-        ix1 |= 0x80000000;
-    }
-
-    // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
-    // behaviour at least.
-    var iix0 = @intCast(i32, ix0);
-    iix0 = iix0 +% (m << 20);
-
-    const uz = (@intCast(u64, iix0) << 32) | ix1;
-    return @bitCast(f64, uz);
-}
-
-export fn sqrtf(x: f32) f32 {
-    const tiny: f32 = 1.0e-30;
-    const sign: i32 = @bitCast(i32, u32(0x80000000));
-    var ix: i32 = @bitCast(i32, x);
-
-    if ((ix & 0x7F800000) == 0x7F800000) {
-        return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = snan
-    }
-
-    // zero
-    if (ix <= 0) {
-        if (ix & ~sign == 0) {
-            return x; // sqrt (+-0) = +-0
-        }
-        if (ix < 0) {
-            return math.snan(f32);
-        }
-    }
-
-    // normalize
-    var m = ix >> 23;
-    if (m == 0) {
-        // subnormal
-        var i: i32 = 0;
-        while (ix & 0x00800000 == 0) : (i += 1) {
-            ix <<= 1;
-        }
-        m -= i - 1;
-    }
-
-    m -= 127; // unbias exponent
-    ix = (ix & 0x007FFFFF) | 0x00800000;
-
-    if (m & 1 != 0) { // odd m, double x to even
-        ix += ix;
-    }
-
-    m >>= 1; // m = [m / 2]
-
-    // sqrt(x) bit by bit
-    ix += ix;
-    var q: i32 = 0; // q = sqrt(x)
-    var s: i32 = 0;
-    var r: i32 = 0x01000000; // r = moving bit right -> left
-
-    while (r != 0) {
-        const t = s + r;
-        if (t <= ix) {
-            s = t + r;
-            ix -= t;
-            q += r;
-        }
-        ix += ix;
-        r >>= 1;
-    }
-
-    // floating add to find rounding direction
-    if (ix != 0) {
-        var z = 1.0 - tiny; // inexact
-        if (z >= 1.0) {
-            z = 1.0 + tiny;
-            if (z > 1.0) {
-                q += 2;
-            } else {
-                if (q & 1 != 0) {
-                    q += 1;
-                }
-            }
-        }
-    }
-
-    ix = (q >> 1) + 0x3f000000;
-    ix += m << 23;
-    return @bitCast(f32, ix);
-}
diff --git a/std/special/c.zig b/std/special/c.zig
@@ -0,0 +1,506 @@
+// This is Zig's multi-target implementation of libc.
+// When builtin.link_libc is true, we need to export all the functions and
+// provide an entire C API.
+// Otherwise, only the functions which LLVM generates calls to need to be generated,
+// such as memcpy, memset, and some math functions.
+
+const std = @import("std");
+const builtin = @import("builtin");
+const maxInt = std.math.maxInt;
+
+const is_wasm = switch (builtin.arch) { .wasm32, .wasm64 => true, else => false};
+const is_freestanding = switch (builtin.os) { .freestanding => true, else => false };
+comptime {
+    if (is_freestanding and is_wasm) {
+        @export("_start", wasm_start, .Strong);
+    }
+}
+
+extern fn main(argc: c_int, argv: [*][*]u8) c_int;
+extern fn wasm_start() c_int {
+    return main(0, undefined);
+}
+
+// Avoid dragging in the runtime safety mechanisms into this .o file,
+// unless we're trying to test this file.
+pub fn panic(msg: []const u8, error_return_trace: ?*builtin.StackTrace) noreturn {
+    if (builtin.is_test) {
+        @setCold(true);
+        std.debug.panic("{}", msg);
+    } else {
+        unreachable;
+    }
+}
+
+export fn memset(dest: ?[*]u8, c: u8, n: usize) ?[*]u8 {
+    @setRuntimeSafety(false);
+
+    var index: usize = 0;
+    while (index != n) : (index += 1)
+        dest.?[index] = c;
+
+    return dest;
+}
+
+export fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, n: usize) ?[*]u8 {
+    @setRuntimeSafety(false);
+
+    var index: usize = 0;
+    while (index != n) : (index += 1)
+        dest.?[index] = src.?[index];
+
+    return dest;
+}
+
+export fn memmove(dest: ?[*]u8, src: ?[*]const u8, n: usize) ?[*]u8 {
+    @setRuntimeSafety(false);
+
+    if (@ptrToInt(dest) < @ptrToInt(src)) {
+        var index: usize = 0;
+        while (index != n) : (index += 1) {
+            dest.?[index] = src.?[index];
+        }
+    } else {
+        var index = n;
+        while (index != 0) {
+            index -= 1;
+            dest.?[index] = src.?[index];
+        }
+    }
+
+    return dest;
+}
+
+export fn memcmp(vl: ?[*]const u8, vr: ?[*]const u8, n: usize) isize {
+    @setRuntimeSafety(false);
+
+    var index: usize = 0;
+    while (index != n) : (index += 1) {
+        const compare_val = @bitCast(i8, vl.?[index] -% vr.?[index]);
+        if (compare_val != 0) {
+            return compare_val;
+        }
+    }
+
+    return 0;
+}
+
+test "test_memcmp" {
+    const base_arr = []u8{ 1, 1, 1 };
+    const arr1 = []u8{ 1, 1, 1 };
+    const arr2 = []u8{ 1, 0, 1 };
+    const arr3 = []u8{ 1, 2, 1 };
+
+    std.testing.expect(memcmp(base_arr[0..].ptr, arr1[0..].ptr, base_arr.len) == 0);
+    std.testing.expect(memcmp(base_arr[0..].ptr, arr2[0..].ptr, base_arr.len) == 1);
+    std.testing.expect(memcmp(base_arr[0..].ptr, arr3[0..].ptr, base_arr.len) == -1);
+}
+
+comptime {
+    if (builtin.mode != builtin.Mode.ReleaseFast and
+        builtin.mode != builtin.Mode.ReleaseSmall and
+        builtin.os != builtin.Os.windows)
+    {
+        @export("__stack_chk_fail", __stack_chk_fail, builtin.GlobalLinkage.Strong);
+    }
+    if (builtin.os == builtin.Os.linux) {
+        @export("clone", clone, builtin.GlobalLinkage.Strong);
+    }
+}
+extern fn __stack_chk_fail() noreturn {
+    @panic("stack smashing detected");
+}
+
+// TODO we should be able to put this directly in std/linux/x86_64.zig but
+// it causes a segfault in release mode. this is a workaround of calling it
+// across .o file boundaries. fix comptime @ptrCast of nakedcc functions.
+nakedcc fn clone() void {
+    if (builtin.arch == builtin.Arch.x86_64) {
+        asm volatile (
+            \\      xor %%eax,%%eax
+            \\      mov $56,%%al // SYS_clone
+            \\      mov %%rdi,%%r11
+            \\      mov %%rdx,%%rdi
+            \\      mov %%r8,%%rdx
+            \\      mov %%r9,%%r8
+            \\      mov 8(%%rsp),%%r10
+            \\      mov %%r11,%%r9
+            \\      and $-16,%%rsi
+            \\      sub $8,%%rsi
+            \\      mov %%rcx,(%%rsi)
+            \\      syscall
+            \\      test %%eax,%%eax
+            \\      jnz 1f
+            \\      xor %%ebp,%%ebp
+            \\      pop %%rdi
+            \\      call *%%r9
+            \\      mov %%eax,%%edi
+            \\      xor %%eax,%%eax
+            \\      mov $60,%%al // SYS_exit
+            \\      syscall
+            \\      hlt
+            \\1:    ret
+            \\
+        );
+    } else if (builtin.arch == builtin.Arch.aarch64) {
+        // __clone(func, stack, flags, arg, ptid, tls, ctid)
+        //         x0,   x1,    w2,    x3,  x4,   x5,  x6
+
+        // syscall(SYS_clone, flags, stack, ptid, tls, ctid)
+        //         x8,        x0,    x1,    x2,   x3,  x4
+        asm volatile (
+            \\      // align stack and save func,arg
+            \\      and x1,x1,#-16
+            \\      stp x0,x3,[x1,#-16]!
+            \\
+            \\      // syscall
+            \\      uxtw x0,w2
+            \\      mov x2,x4
+            \\      mov x3,x5
+            \\      mov x4,x6
+            \\      mov x8,#220 // SYS_clone
+            \\      svc #0
+            \\
+            \\      cbz x0,1f
+            \\      // parent
+            \\      ret
+            \\      // child
+            \\1:    ldp x1,x0,[sp],#16
+            \\      blr x1
+            \\      mov x8,#93 // SYS_exit
+            \\      svc #0
+        );
+    } else {
+        @compileError("Implement clone() for this arch.");
+    }
+}
+
+const math = std.math;
+
+export fn fmodf(x: f32, y: f32) f32 {
+    return generic_fmod(f32, x, y);
+}
+export fn fmod(x: f64, y: f64) f64 {
+    return generic_fmod(f64, x, y);
+}
+
+// TODO add intrinsics for these (and probably the double version too)
+// and have the math stuff use the intrinsic. same as @mod and @rem
+export fn floorf(x: f32) f32 {
+    return math.floor(x);
+}
+export fn ceilf(x: f32) f32 {
+    return math.ceil(x);
+}
+export fn floor(x: f64) f64 {
+    return math.floor(x);
+}
+export fn ceil(x: f64) f64 {
+    return math.ceil(x);
+}
+
+fn generic_fmod(comptime T: type, x: T, y: T) T {
+    @setRuntimeSafety(false);
+
+    const uint = @IntType(false, T.bit_count);
+    const log2uint = math.Log2Int(uint);
+    const digits = if (T == f32) 23 else 52;
+    const exp_bits = if (T == f32) 9 else 12;
+    const bits_minus_1 = T.bit_count - 1;
+    const mask = if (T == f32) 0xff else 0x7ff;
+    var ux = @bitCast(uint, x);
+    var uy = @bitCast(uint, y);
+    var ex = @intCast(i32, (ux >> digits) & mask);
+    var ey = @intCast(i32, (uy >> digits) & mask);
+    const sx = if (T == f32) @intCast(u32, ux & 0x80000000) else @intCast(i32, ux >> bits_minus_1);
+    var i: uint = undefined;
+
+    if (uy << 1 == 0 or isNan(uint, uy) or ex == mask)
+        return (x * y) / (x * y);
+
+    if (ux << 1 <= uy << 1) {
+        if (ux << 1 == uy << 1)
+            return 0 * x;
+        return x;
+    }
+
+    // normalize x and y
+    if (ex == 0) {
+        i = ux << exp_bits;
+        while (i >> bits_minus_1 == 0) : (b: {
+            ex -= 1;
+            i <<= 1;
+        }) {}
+        ux <<= @intCast(log2uint, @bitCast(u32, -ex + 1));
+    } else {
+        ux &= maxInt(uint) >> exp_bits;
+        ux |= 1 << digits;
+    }
+    if (ey == 0) {
+        i = uy << exp_bits;
+        while (i >> bits_minus_1 == 0) : (b: {
+            ey -= 1;
+            i <<= 1;
+        }) {}
+        uy <<= @intCast(log2uint, @bitCast(u32, -ey + 1));
+    } else {
+        uy &= maxInt(uint) >> exp_bits;
+        uy |= 1 << digits;
+    }
+
+    // x mod y
+    while (ex > ey) : (ex -= 1) {
+        i = ux -% uy;
+        if (i >> bits_minus_1 == 0) {
+            if (i == 0)
+                return 0 * x;
+            ux = i;
+        }
+        ux <<= 1;
+    }
+    i = ux -% uy;
+    if (i >> bits_minus_1 == 0) {
+        if (i == 0)
+            return 0 * x;
+        ux = i;
+    }
+    while (ux >> digits == 0) : (b: {
+        ux <<= 1;
+        ex -= 1;
+    }) {}
+
+    // scale result up
+    if (ex > 0) {
+        ux -%= 1 << digits;
+        ux |= uint(@bitCast(u32, ex)) << digits;
+    } else {
+        ux >>= @intCast(log2uint, @bitCast(u32, -ex + 1));
+    }
+    if (T == f32) {
+        ux |= sx;
+    } else {
+        ux |= @intCast(uint, sx) << bits_minus_1;
+    }
+    return @bitCast(T, ux);
+}
+
+fn isNan(comptime T: type, bits: T) bool {
+    if (T == u16) {
+        return (bits & 0x7fff) > 0x7c00;
+    } else if (T == u32) {
+        return (bits & 0x7fffffff) > 0x7f800000;
+    } else if (T == u64) {
+        return (bits & (maxInt(u64) >> 1)) > (u64(0x7ff) << 52);
+    } else {
+        unreachable;
+    }
+}
+
+// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
+// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
+// potentially some edge cases remaining that are not handled in the same way.
+export fn sqrt(x: f64) f64 {
+    const tiny: f64 = 1.0e-300;
+    const sign: u32 = 0x80000000;
+    const u = @bitCast(u64, x);
+
+    var ix0 = @intCast(u32, u >> 32);
+    var ix1 = @intCast(u32, u & 0xFFFFFFFF);
+
+    // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
+    if (ix0 & 0x7FF00000 == 0x7FF00000) {
+        return x * x + x;
+    }
+
+    // sqrt(+-0) = +-0
+    if (x == 0.0) {
+        return x;
+    }
+    // sqrt(-ve) = snan
+    if (ix0 & sign != 0) {
+        return math.snan(f64);
+    }
+
+    // normalize x
+    var m = @intCast(i32, ix0 >> 20);
+    if (m == 0) {
+        // subnormal
+        while (ix0 == 0) {
+            m -= 21;
+            ix0 |= ix1 >> 11;
+            ix1 <<= 21;
+        }
+
+        // subnormal
+        var i: u32 = 0;
+        while (ix0 & 0x00100000 == 0) : (i += 1) {
+            ix0 <<= 1;
+        }
+        m -= @intCast(i32, i) - 1;
+        ix0 |= ix1 >> @intCast(u5, 32 - i);
+        ix1 <<= @intCast(u5, i);
+    }
+
+    // unbias exponent
+    m -= 1023;
+    ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
+    if (m & 1 != 0) {
+        ix0 += ix0 + (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+    }
+    m >>= 1;
+
+    // sqrt(x) bit by bit
+    ix0 += ix0 + (ix1 >> 31);
+    ix1 = ix1 +% ix1;
+
+    var q: u32 = 0;
+    var q1: u32 = 0;
+    var s0: u32 = 0;
+    var s1: u32 = 0;
+    var r: u32 = 0x00200000;
+    var t: u32 = undefined;
+    var t1: u32 = undefined;
+
+    while (r != 0) {
+        t = s0 +% r;
+        if (t <= ix0) {
+            s0 = t + r;
+            ix0 -= t;
+            q += r;
+        }
+        ix0 = ix0 +% ix0 +% (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+        r >>= 1;
+    }
+
+    r = sign;
+    while (r != 0) {
+        t = s1 +% r;
+        t = s0;
+        if (t < ix0 or (t == ix0 and t1 <= ix1)) {
+            s1 = t1 +% r;
+            if (t1 & sign == sign and s1 & sign == 0) {
+                s0 += 1;
+            }
+            ix0 -= t;
+            if (ix1 < t1) {
+                ix0 -= 1;
+            }
+            ix1 = ix1 -% t1;
+            q1 += r;
+        }
+        ix0 = ix0 +% ix0 +% (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+        r >>= 1;
+    }
+
+    // rounding direction
+    if (ix0 | ix1 != 0) {
+        var z = 1.0 - tiny; // raise inexact
+        if (z >= 1.0) {
+            z = 1.0 + tiny;
+            if (q1 == 0xFFFFFFFF) {
+                q1 = 0;
+                q += 1;
+            } else if (z > 1.0) {
+                if (q1 == 0xFFFFFFFE) {
+                    q += 1;
+                }
+                q1 += 2;
+            } else {
+                q1 += q1 & 1;
+            }
+        }
+    }
+
+    ix0 = (q >> 1) + 0x3FE00000;
+    ix1 = q1 >> 1;
+    if (q & 1 != 0) {
+        ix1 |= 0x80000000;
+    }
+
+    // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
+    // behaviour at least.
+    var iix0 = @intCast(i32, ix0);
+    iix0 = iix0 +% (m << 20);
+
+    const uz = (@intCast(u64, iix0) << 32) | ix1;
+    return @bitCast(f64, uz);
+}
+
+export fn sqrtf(x: f32) f32 {
+    const tiny: f32 = 1.0e-30;
+    const sign: i32 = @bitCast(i32, u32(0x80000000));
+    var ix: i32 = @bitCast(i32, x);
+
+    if ((ix & 0x7F800000) == 0x7F800000) {
+        return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = snan
+    }
+
+    // zero
+    if (ix <= 0) {
+        if (ix & ~sign == 0) {
+            return x; // sqrt (+-0) = +-0
+        }
+        if (ix < 0) {
+            return math.snan(f32);
+        }
+    }
+
+    // normalize
+    var m = ix >> 23;
+    if (m == 0) {
+        // subnormal
+        var i: i32 = 0;
+        while (ix & 0x00800000 == 0) : (i += 1) {
+            ix <<= 1;
+        }
+        m -= i - 1;
+    }
+
+    m -= 127; // unbias exponent
+    ix = (ix & 0x007FFFFF) | 0x00800000;
+
+    if (m & 1 != 0) { // odd m, double x to even
+        ix += ix;
+    }
+
+    m >>= 1; // m = [m / 2]
+
+    // sqrt(x) bit by bit
+    ix += ix;
+    var q: i32 = 0; // q = sqrt(x)
+    var s: i32 = 0;
+    var r: i32 = 0x01000000; // r = moving bit right -> left
+
+    while (r != 0) {
+        const t = s + r;
+        if (t <= ix) {
+            s = t + r;
+            ix -= t;
+            q += r;
+        }
+        ix += ix;
+        r >>= 1;
+    }
+
+    // floating add to find rounding direction
+    if (ix != 0) {
+        var z = 1.0 - tiny; // inexact
+        if (z >= 1.0) {
+            z = 1.0 + tiny;
+            if (z > 1.0) {
+                q += 2;
+            } else {
+                if (q & 1 != 0) {
+                    q += 1;
+                }
+            }
+        }
+    }
+
+    ix = (q >> 1) + 0x3f000000;
+    ix += m << 23;
+    return @bitCast(f32, ix);
+}

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	CMakeLists.txt	\|	2	+-
M	src/link.cpp	\|	45	++++++++++++++++++++++++---------------------
M	src/target.cpp	\|	7	+++++--
D	std/special/builtin.zig	\|	490	-------------------------------------------------------------------------------
A	std/special/c.zig	\|	506	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++