zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit a8f0f37adb3bae8ad3d1f344fdaf1f1051551d21 (tree)
parent f9bd049c89e4d2b4d3f51a937ec2114c3cac9176
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Tue, 25 Aug 2020 14:12:48 -0700

Merge remote-tracking branch 'origin/master' into llvm11

Diffstat:
Mlib/std/builtin.zig | 2--
Mlib/std/c/darwin.zig | 1+
Mlib/std/cache_hash.zig | 70++++++++++++++++++++++++++++++++++++++--------------------------------
Mlib/std/crypto.zig | 2++
Mlib/std/crypto/benchmark.zig | 4++++
Alib/std/crypto/siphash.zig | 431+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/std/elf.zig | 3+++
Mlib/std/fs.zig | 31+++++++++++++++++++------------
Mlib/std/fs/file.zig | 4++++
Mlib/std/hash.zig | 3+--
Mlib/std/hash/benchmark.zig | 12------------
Dlib/std/hash/siphash.zig | 393-------------------------------------------------------------------------------
Mlib/std/heap/general_purpose_allocator.zig | 6++++--
Mlib/std/linked_list.zig | 12------------
Mlib/std/macho.zig | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mlib/std/os/bits/linux.zig | 1-
Dlib/std/os/bits/linux/bpf.zig | 975-------------------------------------------------------------------------------
Mlib/std/os/linux.zig | 1+
Alib/std/os/linux/bpf.zig | 973+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/std/special/init-exe/build.zig | 5-----
Mlib/std/special/init-exe/src/main.zig | 5-----
Mlib/std/special/init-lib/build.zig | 5-----
Mlib/std/special/init-lib/src/main.zig | 5-----
Mlib/std/target.zig | 26++++++++++++++++++--------
Mlib/std/zig/system.zig | 2+-
Msrc-self-hosted/Module.zig | 42++++++++++++++++++++++++++++++++++++++++++
Msrc-self-hosted/astgen.zig | 433+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Msrc-self-hosted/codegen.zig | 425+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Asrc-self-hosted/codegen/arm.zig | 607+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc-self-hosted/codegen/spu-mk2.zig | 170+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc-self-hosted/codegen/spu-mk2/interpreter.zig | 166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc-self-hosted/link.zig | 4++++
Msrc-self-hosted/link/Elf.zig | 87++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Msrc-self-hosted/link/MachO.zig | 215+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc-self-hosted/test.zig | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc-self-hosted/type.zig | 258++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc-self-hosted/value.zig | 91++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc-self-hosted/zir.zig | 57++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc-self-hosted/zir_sema.zig | 130++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/analyze.cpp | 225+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Msrc/ir.cpp | 138++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mtest/compile_errors.zig | 58+++++++++++++++++++++++++++++++++++++++++++++-------------
Mtest/stage1/behavior/type.zig | 34++++++++++++++++++++++++++++++++++
Mtest/stage1/behavior/type_info.zig | 1-
Atest/stage2/spu-ii.zig | 23+++++++++++++++++++++++
Mtest/stage2/test.zig | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/process_headers.zig | 5+++--
Mtools/update_glibc.zig | 40++++++++++++++++++++--------------------
48 files changed, 4504 insertions(+), 1967 deletions(-)

diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig @@ -289,8 +289,6 @@ pub const TypeInfo = union(enum) { /// therefore must be kept in sync with the compiler implementation. pub const Error = struct { name: []const u8, - /// This field is ignored when using @Type(). - value: comptime_int, }; /// This data structure is used by the Zig language code generation and diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig @@ -11,6 +11,7 @@ const macho = std.macho; usingnamespace @import("../os/bits.zig"); extern "c" fn __error() *c_int; +pub extern "c" fn NSVersionOfRunTimeLibrary(library_name: [*:0]const u8) u32; pub extern "c" fn _NSGetExecutablePath(buf: [*]u8, bufsize: *u32) c_int; pub extern "c" fn _dyld_image_count() u32; pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header; diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig @@ -4,7 +4,8 @@ // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. const std = @import("std.zig"); -const Blake3 = std.crypto.hash.Blake3; +const crypto = std.crypto; +const Hasher = crypto.auth.siphash.SipHash128(1, 3); // provides enough collision resistance for the CacheHash use cases, while being one of our fastest options right now const fs = std.fs; const base64 = std.base64; const ArrayList = std.ArrayList; @@ -16,9 +17,8 @@ const Allocator = std.mem.Allocator; const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; -/// This is 70 more bits than UUIDs. For an analysis of probability of collisions, see: -/// https://en.wikipedia.org/wiki/Universally_unique_identifier#Collisions -const BIN_DIGEST_LEN = 24; +/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6 +const BIN_DIGEST_LEN = 16; const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); const MANIFEST_FILE_SIZE_MAX = 50 * 1024 * 1024; @@ -43,9 +43,13 @@ pub const File = struct { } }; +/// CacheHash manages project-local `zig-cache` directories. +/// This is not a general-purpose cache. +/// It was designed to be fast and simple, not to withstand attacks using specially-crafted input. pub const CacheHash = struct { allocator: *Allocator, - blake3: Blake3, + hasher_init: Hasher, // initial state, that can be copied + hasher: Hasher, // current state for incremental hashing manifest_dir: fs.Dir, manifest_file: ?fs.File, manifest_dirty: bool, @@ -54,9 +58,11 @@ pub const CacheHash = struct { /// Be sure to call release after successful initialization. pub fn init(allocator: *Allocator, dir: fs.Dir, manifest_dir_path: []const u8) !CacheHash { + const hasher_init = Hasher.init(&[_]u8{0} ** Hasher.minimum_key_length); return CacheHash{ .allocator = allocator, - .blake3 = Blake3.init(.{}), + .hasher_init = hasher_init, + .hasher = hasher_init, .manifest_dir = try dir.makeOpenPath(manifest_dir_path, .{}), .manifest_file = null, .manifest_dirty = false, @@ -69,8 +75,8 @@ pub const CacheHash = struct { pub fn addSlice(self: *CacheHash, val: []const u8) void { assert(self.manifest_file == null); - self.blake3.update(val); - self.blake3.update(&[_]u8{0}); + self.hasher.update(val); + self.hasher.update(&[_]u8{0}); } /// Convert the input value into bytes and record it as a dependency of the @@ -133,12 +139,12 @@ pub const CacheHash = struct { assert(self.manifest_file == null); var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; - self.blake3.final(&bin_digest); + self.hasher.final(&bin_digest); base64_encoder.encode(self.b64_digest[0..], &bin_digest); - self.blake3 = Blake3.init(.{}); - self.blake3.update(&bin_digest); + self.hasher = self.hasher_init; + self.hasher.update(&bin_digest); const manifest_file_path = try fmt.allocPrint(self.allocator, "{}.txt", .{self.b64_digest}); defer self.allocator.free(manifest_file_path); @@ -238,7 +244,7 @@ pub const CacheHash = struct { } var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; - try hashFile(this_file, &actual_digest); + try hashFile(this_file, &actual_digest, self.hasher_init); if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { cache_hash_file.bin_digest = actual_digest; @@ -248,7 +254,7 @@ pub const CacheHash = struct { } if (!any_file_changed) { - self.blake3.update(&cache_hash_file.bin_digest); + self.hasher.update(&cache_hash_file.bin_digest); } } @@ -256,8 +262,8 @@ pub const CacheHash = struct { // cache miss // keep the manifest file open // reset the hash - self.blake3 = Blake3.init(.{}); - self.blake3.update(&bin_digest); + self.hasher = self.hasher_init; + self.hasher.update(&bin_digest); // Remove files not in the initial hash for (self.files.items[input_file_count..]) |*file| { @@ -266,7 +272,7 @@ pub const CacheHash = struct { self.files.shrink(input_file_count); for (self.files.items) |file| { - self.blake3.update(&file.bin_digest); + self.hasher.update(&file.bin_digest); } return null; } @@ -304,23 +310,23 @@ pub const CacheHash = struct { // Hash while reading from disk, to keep the contents in the cpu cache while // doing hashing. - var blake3 = Blake3.init(.{}); + var hasher = self.hasher_init; var off: usize = 0; while (true) { // give me everything you've got, captain const bytes_read = try file.read(contents[off..]); if (bytes_read == 0) break; - blake3.update(contents[off..][0..bytes_read]); + hasher.update(contents[off..][0..bytes_read]); off += bytes_read; } - blake3.final(&ch_file.bin_digest); + hasher.final(&ch_file.bin_digest); ch_file.contents = contents; } else { - try hashFile(file, &ch_file.bin_digest); + try hashFile(file, &ch_file.bin_digest, self.hasher_init); } - self.blake3.update(&ch_file.bin_digest); + self.hasher.update(&ch_file.bin_digest); } /// Add a file as a dependency of process being cached, after the initial hash has been @@ -382,7 +388,7 @@ pub const CacheHash = struct { // the artifacts to cache. var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; - self.blake3.final(&bin_digest); + self.hasher.final(&bin_digest); var out_digest: [BASE64_DIGEST_LEN]u8 = undefined; base64_encoder.encode(&out_digest, &bin_digest); @@ -433,17 +439,17 @@ pub const CacheHash = struct { } }; -fn hashFile(file: fs.File, bin_digest: []u8) !void { - var blake3 = Blake3.init(.{}); +fn hashFile(file: fs.File, bin_digest: []u8, hasher_init: anytype) !void { var buf: [1024]u8 = undefined; + var hasher = hasher_init; while (true) { const bytes_read = try file.read(&buf); if (bytes_read == 0) break; - blake3.update(buf[0..bytes_read]); + hasher.update(buf[0..bytes_read]); } - blake3.final(bin_digest); + hasher.final(bin_digest); } /// If the wall clock time, rounded to the same precision as the @@ -507,7 +513,7 @@ test "cache file and then recall it" { _ = try ch.addFile(temp_file, null); // There should be nothing in the cache - testing.expectEqual(@as(?[32]u8, null), try ch.hit()); + testing.expectEqual(@as(?[BASE64_DIGEST_LEN]u8, null), try ch.hit()); digest1 = ch.final(); } @@ -575,7 +581,7 @@ test "check that changing a file makes cache fail" { const temp_file_idx = try ch.addFile(temp_file, 100); // There should be nothing in the cache - testing.expectEqual(@as(?[32]u8, null), try ch.hit()); + testing.expectEqual(@as(?[BASE64_DIGEST_LEN]u8, null), try ch.hit()); testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); @@ -592,7 +598,7 @@ test "check that changing a file makes cache fail" { const temp_file_idx = try ch.addFile(temp_file, 100); // A file that we depend on has been updated, so the cache should not contain an entry for it - testing.expectEqual(@as(?[32]u8, null), try ch.hit()); + testing.expectEqual(@as(?[BASE64_DIGEST_LEN]u8, null), try ch.hit()); // The cache system does not keep the contents of re-hashed input files. testing.expect(ch.files.items[temp_file_idx].contents == null); @@ -625,7 +631,7 @@ test "no file inputs" { ch.add("1234"); // There should be nothing in the cache - testing.expectEqual(@as(?[32]u8, null), try ch.hit()); + testing.expectEqual(@as(?[BASE64_DIGEST_LEN]u8, null), try ch.hit()); digest1 = ch.final(); } @@ -672,7 +678,7 @@ test "CacheHashes with files added after initial hash work" { _ = try ch.addFile(temp_file1, null); // There should be nothing in the cache - testing.expectEqual(@as(?[32]u8, null), try ch.hit()); + testing.expectEqual(@as(?[BASE64_DIGEST_LEN]u8, null), try ch.hit()); _ = try ch.addFilePost(temp_file2); @@ -705,7 +711,7 @@ test "CacheHashes with files added after initial hash work" { _ = try ch.addFile(temp_file1, null); // A file that we depend on has been updated, so the cache should not contain an entry for it - testing.expectEqual(@as(?[32]u8, null), try ch.hit()); + testing.expectEqual(@as(?[BASE64_DIGEST_LEN]u8, null), try ch.hit()); _ = try ch.addFilePost(temp_file2); diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig @@ -18,6 +18,7 @@ pub const hash = struct { /// Authentication (MAC) functions. pub const auth = struct { pub const hmac = @import("crypto/hmac.zig"); + pub const siphash = @import("crypto/siphash.zig"); }; /// Authenticated Encryption with Associated Data @@ -80,6 +81,7 @@ test "crypto" { _ = @import("crypto/sha1.zig"); _ = @import("crypto/sha2.zig"); _ = @import("crypto/sha3.zig"); + _ = @import("crypto/siphash.zig"); _ = @import("crypto/25519/curve25519.zig"); _ = @import("crypto/25519/ed25519.zig"); _ = @import("crypto/25519/edwards25519.zig"); diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig @@ -60,6 +60,10 @@ const macs = [_]Crypto{ Crypto{ .ty = crypto.auth.hmac.HmacSha1, .name = "hmac-sha1" }, Crypto{ .ty = crypto.auth.hmac.sha2.HmacSha256, .name = "hmac-sha256" }, Crypto{ .ty = crypto.auth.hmac.sha2.HmacSha512, .name = "hmac-sha512" }, + Crypto{ .ty = crypto.auth.siphash.SipHash64(2, 4), .name = "siphash-2-4" }, + Crypto{ .ty = crypto.auth.siphash.SipHash64(1, 3), .name = "siphash-1-3" }, + Crypto{ .ty = crypto.auth.siphash.SipHash128(2, 4), .name = "siphash128-2-4" }, + Crypto{ .ty = crypto.auth.siphash.SipHash128(1, 3), .name = "siphash128-1-3" }, }; pub fn benchmarkMac(comptime Mac: anytype, comptime bytes: comptime_int) !u64 { diff --git a/lib/std/crypto/siphash.zig b/lib/std/crypto/siphash.zig @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +// +// SipHash is a moderately fast pseudorandom function, returning a 64-bit or 128-bit tag for an arbitrary long input. +// +// Typical use cases include: +// - protection against against DoS attacks for hash tables and bloom filters +// - authentication of short-lived messages in online protocols +// +// https://131002.net/siphash/ +const std = @import("../std.zig"); +const assert = std.debug.assert; +const testing = std.testing; +const math = std.math; +const mem = std.mem; + +/// SipHash function with 64-bit output. +/// +/// Recommended parameters are: +/// - (c_rounds=4, d_rounds=8) for conservative security; regular hash functions such as BLAKE2 or BLAKE3 are usually a better alternative. +/// - (c_rounds=2, d_rounds=4) standard parameters. +/// - (c_rounds=1, d_rounds=3) reduced-round function. Faster, no known implications on its practical security level. +/// - (c_rounds=1, d_rounds=2) fastest option, but the output may be distinguishable from random data with related keys or non-uniform input - not suitable as a PRF. +/// +/// SipHash is not a traditional hash function. If the input includes untrusted content, a secret key is absolutely necessary. +/// And due to its small output size, collisions in SipHash64 can be found with an exhaustive search. +pub fn SipHash64(comptime c_rounds: usize, comptime d_rounds: usize) type { + return SipHash(u64, c_rounds, d_rounds); +} + +/// SipHash function with 128-bit output. +/// +/// Recommended parameters are: +/// - (c_rounds=4, d_rounds=8) for conservative security; regular hash functions such as BLAKE2 or BLAKE3 are usually a better alternative. +/// - (c_rounds=2, d_rounds=4) standard parameters. +/// - (c_rounds=1, d_rounds=4) reduced-round function. Recommended to hash very short, similar strings, when a 128-bit PRF output is still required. +/// - (c_rounds=1, d_rounds=3) reduced-round function. Faster, no known implications on its practical security level. +/// - (c_rounds=1, d_rounds=2) fastest option, but the output may be distinguishable from random data with related keys or non-uniform input - not suitable as a PRF. +/// +/// SipHash is not a traditional hash function. If the input includes untrusted content, a secret key is absolutely necessary. +pub fn SipHash128(comptime c_rounds: usize, comptime d_rounds: usize) type { + return SipHash(u128, c_rounds, d_rounds); +} + +fn SipHashStateless(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) type { + assert(T == u64 or T == u128); + assert(c_rounds > 0 and d_rounds > 0); + + return struct { + const Self = @This(); + const digest_size = 64; + const block_size = 64; + + v0: u64, + v1: u64, + v2: u64, + v3: u64, + msg_len: u8, + + pub fn init(key: []const u8) Self { + assert(key.len >= 16); + + const k0 = mem.readIntLittle(u64, key[0..8]); + const k1 = mem.readIntLittle(u64, key[8..16]); + + var d = Self{ + .v0 = k0 ^ 0x736f6d6570736575, + .v1 = k1 ^ 0x646f72616e646f6d, + .v2 = k0 ^ 0x6c7967656e657261, + .v3 = k1 ^ 0x7465646279746573, + .msg_len = 0, + }; + + if (T == u128) { + d.v1 ^= 0xee; + } + + return d; + } + + pub fn update(self: *Self, b: []const u8) void { + std.debug.assert(b.len % 8 == 0); + + var off: usize = 0; + while (off < b.len) : (off += 8) { + @call(.{ .modifier = .always_inline }, self.round, .{b[off .. off + 8]}); + } + + self.msg_len +%= @truncate(u8, b.len); + } + + pub fn final(self: *Self, b: []const u8) T { + std.debug.assert(b.len < 8); + + self.msg_len +%= @truncate(u8, b.len); + + var buf = [_]u8{0} ** 8; + mem.copy(u8, buf[0..], b[0..]); + buf[7] = self.msg_len; + self.round(buf[0..]); + + if (T == u128) { + self.v2 ^= 0xee; + } else { + self.v2 ^= 0xff; + } + + // TODO this is a workaround, should be able to supply the value without a separate variable + const inl = std.builtin.CallOptions{ .modifier = .always_inline }; + + comptime var i: usize = 0; + inline while (i < d_rounds) : (i += 1) { + @call(inl, sipRound, .{self}); + } + + const b1 = self.v0 ^ self.v1 ^ self.v2 ^ self.v3; + if (T == u64) { + return b1; + } + + self.v1 ^= 0xdd; + + comptime var j: usize = 0; + inline while (j < d_rounds) : (j += 1) { + @call(inl, sipRound, .{self}); + } + + const b2 = self.v0 ^ self.v1 ^ self.v2 ^ self.v3; + return (@as(u128, b2) << 64) | b1; + } + + fn round(self: *Self, b: []const u8) void { + assert(b.len == 8); + + const m = mem.readIntLittle(u64, b[0..8]); + self.v3 ^= m; + + // TODO this is a workaround, should be able to supply the value without a separate variable + const inl = std.builtin.CallOptions{ .modifier = .always_inline }; + comptime var i: usize = 0; + inline while (i < c_rounds) : (i += 1) { + @call(inl, sipRound, .{self}); + } + + self.v0 ^= m; + } + + fn sipRound(d: *Self) void { + d.v0 +%= d.v1; + d.v1 = math.rotl(u64, d.v1, @as(u64, 13)); + d.v1 ^= d.v0; + d.v0 = math.rotl(u64, d.v0, @as(u64, 32)); + d.v2 +%= d.v3; + d.v3 = math.rotl(u64, d.v3, @as(u64, 16)); + d.v3 ^= d.v2; + d.v0 +%= d.v3; + d.v3 = math.rotl(u64, d.v3, @as(u64, 21)); + d.v3 ^= d.v0; + d.v2 +%= d.v1; + d.v1 = math.rotl(u64, d.v1, @as(u64, 17)); + d.v1 ^= d.v2; + d.v2 = math.rotl(u64, d.v2, @as(u64, 32)); + } + + pub fn hash(msg: []const u8, key: []const u8) T { + const aligned_len = msg.len - (msg.len % 8); + var c = Self.init(key); + @call(.{ .modifier = .always_inline }, c.update, .{msg[0..aligned_len]}); + return @call(.{ .modifier = .always_inline }, c.final, .{msg[aligned_len..]}); + } + }; +} + +fn SipHash(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) type { + assert(T == u64 or T == u128); + assert(c_rounds > 0 and d_rounds > 0); + + return struct { + const State = SipHashStateless(T, c_rounds, d_rounds); + const Self = @This(); + pub const minimum_key_length = 16; + pub const mac_length = @sizeOf(T); + pub const block_length = 8; + + state: State, + buf: [8]u8, + buf_len: usize, + + /// Initialize a state for a SipHash function + pub fn init(key: []const u8) Self { + return Self{ + .state = State.init(key), + .buf = undefined, + .buf_len = 0, + }; + } + + /// Add data to the state + pub fn update(self: *Self, b: []const u8) void { + var off: usize = 0; + + if (self.buf_len != 0 and self.buf_len + b.len >= 8) { + off += 8 - self.buf_len; + mem.copy(u8, self.buf[self.buf_len..], b[0..off]); + self.state.update(self.buf[0..]); + self.buf_len = 0; + } + + const remain_len = b.len - off; + const aligned_len = remain_len - (remain_len % 8); + self.state.update(b[off .. off + aligned_len]); + + mem.copy(u8, self.buf[self.buf_len..], b[off + aligned_len ..]); + self.buf_len += @intCast(u8, b[off + aligned_len ..].len); + } + + /// Return an authentication tag for the current state + pub fn final(self: *Self, out: []u8) void { + std.debug.assert(out.len >= mac_length); + mem.writeIntLittle(T, out[0..mac_length], self.state.final(self.buf[0..self.buf_len])); + } + + /// Return an authentication tag for a message and a key + pub fn create(out: []u8, msg: []const u8, key: []const u8) void { + var ctx = Self.init(key); + ctx.update(msg); + ctx.final(out[0..]); + } + + /// Return an authentication tag for the current state, as an integer + pub fn finalInt(self: *Self) T { + return self.state.final(self.buf[0..self.buf_len]); + } + + /// Return an authentication tag for a message and a key, as an integer + pub fn toInt(msg: []const u8, key: []const u8) T { + return State.hash(msg, key); + } + }; +} + +// Test vectors from reference implementation. +// https://github.com/veorq/SipHash/blob/master/vectors.h +const test_key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"; + +test "siphash64-2-4 sanity" { + const vectors = [_][8]u8{ + "\x31\x0e\x0e\xdd\x47\xdb\x6f\x72".*, // "" + "\xfd\x67\xdc\x93\xc5\x39\xf8\x74".*, // "\x00" + "\x5a\x4f\xa9\xd9\x09\x80\x6c\x0d".*, // "\x00\x01" ... etc + "\x2d\x7e\xfb\xd7\x96\x66\x67\x85".*, + "\xb7\x87\x71\x27\xe0\x94\x27\xcf".*, + "\x8d\xa6\x99\xcd\x64\x55\x76\x18".*, + "\xce\xe3\xfe\x58\x6e\x46\xc9\xcb".*, + "\x37\xd1\x01\x8b\xf5\x00\x02\xab".*, + "\x62\x24\x93\x9a\x79\xf5\xf5\x93".*, + "\xb0\xe4\xa9\x0b\xdf\x82\x00\x9e".*, + "\xf3\xb9\xdd\x94\xc5\xbb\x5d\x7a".*, + "\xa7\xad\x6b\x22\x46\x2f\xb3\xf4".*, + "\xfb\xe5\x0e\x86\xbc\x8f\x1e\x75".*, + "\x90\x3d\x84\xc0\x27\x56\xea\x14".*, + "\xee\xf2\x7a\x8e\x90\xca\x23\xf7".*, + "\xe5\x45\xbe\x49\x61\xca\x29\xa1".*, + "\xdb\x9b\xc2\x57\x7f\xcc\x2a\x3f".*, + "\x94\x47\xbe\x2c\xf5\xe9\x9a\x69".*, + "\x9c\xd3\x8d\x96\xf0\xb3\xc1\x4b".*, + "\xbd\x61\x79\xa7\x1d\xc9\x6d\xbb".*, + "\x98\xee\xa2\x1a\xf2\x5c\xd6\xbe".*, + "\xc7\x67\x3b\x2e\xb0\xcb\xf2\xd0".*, + "\x88\x3e\xa3\xe3\x95\x67\x53\x93".*, + "\xc8\xce\x5c\xcd\x8c\x03\x0c\xa8".*, + "\x94\xaf\x49\xf6\xc6\x50\xad\xb8".*, + "\xea\xb8\x85\x8a\xde\x92\xe1\xbc".*, + "\xf3\x15\xbb\x5b\xb8\x35\xd8\x17".*, + "\xad\xcf\x6b\x07\x63\x61\x2e\x2f".*, + "\xa5\xc9\x1d\xa7\xac\xaa\x4d\xde".*, + "\x71\x65\x95\x87\x66\x50\xa2\xa6".*, + "\x28\xef\x49\x5c\x53\xa3\x87\xad".*, + "\x42\xc3\x41\xd8\xfa\x92\xd8\x32".*, + "\xce\x7c\xf2\x72\x2f\x51\x27\x71".*, + "\xe3\x78\x59\xf9\x46\x23\xf3\xa7".*, + "\x38\x12\x05\xbb\x1a\xb0\xe0\x12".*, + "\xae\x97\xa1\x0f\xd4\x34\xe0\x15".*, + "\xb4\xa3\x15\x08\xbe\xff\x4d\x31".*, + "\x81\x39\x62\x29\xf0\x90\x79\x02".*, + "\x4d\x0c\xf4\x9e\xe5\xd4\xdc\xca".*, + "\x5c\x73\x33\x6a\x76\xd8\xbf\x9a".*, + "\xd0\xa7\x04\x53\x6b\xa9\x3e\x0e".*, + "\x92\x59\x58\xfc\xd6\x42\x0c\xad".*, + "\xa9\x15\xc2\x9b\xc8\x06\x73\x18".*, + "\x95\x2b\x79\xf3\xbc\x0a\xa6\xd4".*, + "\xf2\x1d\xf2\xe4\x1d\x45\x35\xf9".*, + "\x87\x57\x75\x19\x04\x8f\x53\xa9".*, + "\x10\xa5\x6c\xf5\xdf\xcd\x9a\xdb".*, + "\xeb\x75\x09\x5c\xcd\x98\x6c\xd0".*, + "\x51\xa9\xcb\x9e\xcb\xa3\x12\xe6".*, + "\x96\xaf\xad\xfc\x2c\xe6\x66\xc7".*, + "\x72\xfe\x52\x97\x5a\x43\x64\xee".*, + "\x5a\x16\x45\xb2\x76\xd5\x92\xa1".*, + "\xb2\x74\xcb\x8e\xbf\x87\x87\x0a".*, + "\x6f\x9b\xb4\x20\x3d\xe7\xb3\x81".*, + "\xea\xec\xb2\xa3\x0b\x22\xa8\x7f".*, + "\x99\x24\xa4\x3c\xc1\x31\x57\x24".*, + "\xbd\x83\x8d\x3a\xaf\xbf\x8d\xb7".*, + "\x0b\x1a\x2a\x32\x65\xd5\x1a\xea".*, + "\x13\x50\x79\xa3\x23\x1c\xe6\x60".*, + "\x93\x2b\x28\x46\xe4\xd7\x06\x66".*, + "\xe1\x91\x5f\x5c\xb1\xec\xa4\x6c".*, + "\xf3\x25\x96\x5c\xa1\x6d\x62\x9f".*, + "\x57\x5f\xf2\x8e\x60\x38\x1b\xe5".*, + "\x72\x45\x06\xeb\x4c\x32\x8a\x95".*, + }; + + const siphash = SipHash64(2, 4); + + var buffer: [64]u8 = undefined; + for (vectors) |vector, i| { + buffer[i] = @intCast(u8, i); + + var out: [siphash.mac_length]u8 = undefined; + siphash.create(&out, buffer[0..i], test_key); + testing.expectEqual(out, vector); + } +} + +test "siphash128-2-4 sanity" { + const vectors = [_][16]u8{ + "\xa3\x81\x7f\x04\xba\x25\xa8\xe6\x6d\xf6\x72\x14\xc7\x55\x02\x93".*, + "\xda\x87\xc1\xd8\x6b\x99\xaf\x44\x34\x76\x59\x11\x9b\x22\xfc\x45".*, + "\x81\x77\x22\x8d\xa4\xa4\x5d\xc7\xfc\xa3\x8b\xde\xf6\x0a\xff\xe4".*, + "\x9c\x70\xb6\x0c\x52\x67\xa9\x4e\x5f\x33\xb6\xb0\x29\x85\xed\x51".*, + "\xf8\x81\x64\xc1\x2d\x9c\x8f\xaf\x7d\x0f\x6e\x7c\x7b\xcd\x55\x79".*, + "\x13\x68\x87\x59\x80\x77\x6f\x88\x54\x52\x7a\x07\x69\x0e\x96\x27".*, + "\x14\xee\xca\x33\x8b\x20\x86\x13\x48\x5e\xa0\x30\x8f\xd7\xa1\x5e".*, + "\xa1\xf1\xeb\xbe\xd8\xdb\xc1\x53\xc0\xb8\x4a\xa6\x1f\xf0\x82\x39".*, + "\x3b\x62\xa9\xba\x62\x58\xf5\x61\x0f\x83\xe2\x64\xf3\x14\x97\xb4".*, + "\x26\x44\x99\x06\x0a\xd9\xba\xab\xc4\x7f\x8b\x02\xbb\x6d\x71\xed".*, + "\x00\x11\x0d\xc3\x78\x14\x69\x56\xc9\x54\x47\xd3\xf3\xd0\xfb\xba".*, + "\x01\x51\xc5\x68\x38\x6b\x66\x77\xa2\xb4\xdc\x6f\x81\xe5\xdc\x18".*, + "\xd6\x26\xb2\x66\x90\x5e\xf3\x58\x82\x63\x4d\xf6\x85\x32\xc1\x25".*, + "\x98\x69\xe2\x47\xe9\xc0\x8b\x10\xd0\x29\x93\x4f\xc4\xb9\x52\xf7".*, + "\x31\xfc\xef\xac\x66\xd7\xde\x9c\x7e\xc7\x48\x5f\xe4\x49\x49\x02".*, + "\x54\x93\xe9\x99\x33\xb0\xa8\x11\x7e\x08\xec\x0f\x97\xcf\xc3\xd9".*, + "\x6e\xe2\xa4\xca\x67\xb0\x54\xbb\xfd\x33\x15\xbf\x85\x23\x05\x77".*, + "\x47\x3d\x06\xe8\x73\x8d\xb8\x98\x54\xc0\x66\xc4\x7a\xe4\x77\x40".*, + "\xa4\x26\xe5\xe4\x23\xbf\x48\x85\x29\x4d\xa4\x81\xfe\xae\xf7\x23".*, + "\x78\x01\x77\x31\xcf\x65\xfa\xb0\x74\xd5\x20\x89\x52\x51\x2e\xb1".*, + "\x9e\x25\xfc\x83\x3f\x22\x90\x73\x3e\x93\x44\xa5\xe8\x38\x39\xeb".*, + "\x56\x8e\x49\x5a\xbe\x52\x5a\x21\x8a\x22\x14\xcd\x3e\x07\x1d\x12".*, + "\x4a\x29\xb5\x45\x52\xd1\x6b\x9a\x46\x9c\x10\x52\x8e\xff\x0a\xae".*, + "\xc9\xd1\x84\xdd\xd5\xa9\xf5\xe0\xcf\x8c\xe2\x9a\x9a\xbf\x69\x1c".*, + "\x2d\xb4\x79\xae\x78\xbd\x50\xd8\x88\x2a\x8a\x17\x8a\x61\x32\xad".*, + "\x8e\xce\x5f\x04\x2d\x5e\x44\x7b\x50\x51\xb9\xea\xcb\x8d\x8f\x6f".*, + "\x9c\x0b\x53\xb4\xb3\xc3\x07\xe8\x7e\xae\xe0\x86\x78\x14\x1f\x66".*, + "\xab\xf2\x48\xaf\x69\xa6\xea\xe4\xbf\xd3\xeb\x2f\x12\x9e\xeb\x94".*, + "\x06\x64\xda\x16\x68\x57\x4b\x88\xb9\x35\xf3\x02\x73\x58\xae\xf4".*, + "\xaa\x4b\x9d\xc4\xbf\x33\x7d\xe9\x0c\xd4\xfd\x3c\x46\x7c\x6a\xb7".*, + "\xea\x5c\x7f\x47\x1f\xaf\x6b\xde\x2b\x1a\xd7\xd4\x68\x6d\x22\x87".*, + "\x29\x39\xb0\x18\x32\x23\xfa\xfc\x17\x23\xde\x4f\x52\xc4\x3d\x35".*, + "\x7c\x39\x56\xca\x5e\xea\xfc\x3e\x36\x3e\x9d\x55\x65\x46\xeb\x68".*, + "\x77\xc6\x07\x71\x46\xf0\x1c\x32\xb6\xb6\x9d\x5f\x4e\xa9\xff\xcf".*, + "\x37\xa6\x98\x6c\xb8\x84\x7e\xdf\x09\x25\xf0\xf1\x30\x9b\x54\xde".*, + "\xa7\x05\xf0\xe6\x9d\xa9\xa8\xf9\x07\x24\x1a\x2e\x92\x3c\x8c\xc8".*, + "\x3d\xc4\x7d\x1f\x29\xc4\x48\x46\x1e\x9e\x76\xed\x90\x4f\x67\x11".*, + "\x0d\x62\xbf\x01\xe6\xfc\x0e\x1a\x0d\x3c\x47\x51\xc5\xd3\x69\x2b".*, + "\x8c\x03\x46\x8b\xca\x7c\x66\x9e\xe4\xfd\x5e\x08\x4b\xbe\xe7\xb5".*, + "\x52\x8a\x5b\xb9\x3b\xaf\x2c\x9c\x44\x73\xcc\xe5\xd0\xd2\x2b\xd9".*, + "\xdf\x6a\x30\x1e\x95\xc9\x5d\xad\x97\xae\x0c\xc8\xc6\x91\x3b\xd8".*, + "\x80\x11\x89\x90\x2c\x85\x7f\x39\xe7\x35\x91\x28\x5e\x70\xb6\xdb".*, + "\xe6\x17\x34\x6a\xc9\xc2\x31\xbb\x36\x50\xae\x34\xcc\xca\x0c\x5b".*, + "\x27\xd9\x34\x37\xef\xb7\x21\xaa\x40\x18\x21\xdc\xec\x5a\xdf\x89".*, + "\x89\x23\x7d\x9d\xed\x9c\x5e\x78\xd8\xb1\xc9\xb1\x66\xcc\x73\x42".*, + "\x4a\x6d\x80\x91\xbf\x5e\x7d\x65\x11\x89\xfa\x94\xa2\x50\xb1\x4c".*, + "\x0e\x33\xf9\x60\x55\xe7\xae\x89\x3f\xfc\x0e\x3d\xcf\x49\x29\x02".*, + "\xe6\x1c\x43\x2b\x72\x0b\x19\xd1\x8e\xc8\xd8\x4b\xdc\x63\x15\x1b".*, + "\xf7\xe5\xae\xf5\x49\xf7\x82\xcf\x37\x90\x55\xa6\x08\x26\x9b\x16".*, + "\x43\x8d\x03\x0f\xd0\xb7\xa5\x4f\xa8\x37\xf2\xad\x20\x1a\x64\x03".*, + "\xa5\x90\xd3\xee\x4f\xbf\x04\xe3\x24\x7e\x0d\x27\xf2\x86\x42\x3f".*, + "\x5f\xe2\xc1\xa1\x72\xfe\x93\xc4\xb1\x5c\xd3\x7c\xae\xf9\xf5\x38".*, + "\x2c\x97\x32\x5c\xbd\x06\xb3\x6e\xb2\x13\x3d\xd0\x8b\x3a\x01\x7c".*, + "\x92\xc8\x14\x22\x7a\x6b\xca\x94\x9f\xf0\x65\x9f\x00\x2a\xd3\x9e".*, + "\xdc\xe8\x50\x11\x0b\xd8\x32\x8c\xfb\xd5\x08\x41\xd6\x91\x1d\x87".*, + "\x67\xf1\x49\x84\xc7\xda\x79\x12\x48\xe3\x2b\xb5\x92\x25\x83\xda".*, + "\x19\x38\xf2\xcf\x72\xd5\x4e\xe9\x7e\x94\x16\x6f\xa9\x1d\x2a\x36".*, + "\x74\x48\x1e\x96\x46\xed\x49\xfe\x0f\x62\x24\x30\x16\x04\x69\x8e".*, + "\x57\xfc\xa5\xde\x98\xa9\xd6\xd8\x00\x64\x38\xd0\x58\x3d\x8a\x1d".*, + "\x9f\xec\xde\x1c\xef\xdc\x1c\xbe\xd4\x76\x36\x74\xd9\x57\x53\x59".*, + "\xe3\x04\x0c\x00\xeb\x28\xf1\x53\x66\xca\x73\xcb\xd8\x72\xe7\x40".*, + "\x76\x97\x00\x9a\x6a\x83\x1d\xfe\xcc\xa9\x1c\x59\x93\x67\x0f\x7a".*, + "\x58\x53\x54\x23\x21\xf5\x67\xa0\x05\xd5\x47\xa4\xf0\x47\x59\xbd".*, + "\x51\x50\xd1\x77\x2f\x50\x83\x4a\x50\x3e\x06\x9a\x97\x3f\xbd\x7c".*, + }; + + const siphash = SipHash128(2, 4); + + var buffer: [64]u8 = undefined; + for (vectors) |vector, i| { + buffer[i] = @intCast(u8, i); + + var out: [siphash.mac_length]u8 = undefined; + siphash.create(&out, buffer[0..i], test_key[0..]); + testing.expectEqual(out, vector); + } +} + +test "iterative non-divisible update" { + var buf: [1024]u8 = undefined; + for (buf) |*e, i| { + e.* = @truncate(u8, i); + } + + const key = "0x128dad08f12307"; + const Siphash = SipHash64(2, 4); + + var end: usize = 9; + while (end < buf.len) : (end += 9) { + const non_iterative_hash = Siphash.toInt(buf[0..end], key[0..]); + + var siphash = Siphash.init(key); + var i: usize = 0; + while (i < end) : (i += 7) { + siphash.update(buf[i..std.math.min(i + 7, end)]); + } + const iterative_hash = siphash.finalInt(); + + std.testing.expectEqual(iterative_hash, non_iterative_hash); + } +} diff --git a/lib/std/elf.zig b/lib/std/elf.zig @@ -976,6 +976,9 @@ pub const EM = extern enum(u16) { /// MIPS RS3000 Little-endian _MIPS_RS3_LE = 10, + /// SPU Mark II + _SPU_2 = 13, + /// Hewlett-Packard PA-RISC _PARISC = 15, diff --git a/lib/std/fs.zig b/lib/std/fs.zig @@ -686,21 +686,28 @@ pub const Dir = struct { return self.openFileW(path_w.span(), flags); } + var os_flags: u32 = os.O_CLOEXEC; // Use the O_ locking flags if the os supports them // (Or if it's darwin, as darwin's `open` doesn't support the O_SYNC flag) const has_flock_open_flags = @hasDecl(os, "O_EXLOCK") and !is_darwin; - const nonblocking_lock_flag = if (has_flock_open_flags and flags.lock_nonblocking) - os.O_NONBLOCK | os.O_SYNC - else - @as(u32, 0); - const lock_flag: u32 = if (has_flock_open_flags) switch (flags.lock) { - .None => @as(u32, 0), - .Shared => os.O_SHLOCK | nonblocking_lock_flag, - .Exclusive => os.O_EXLOCK | nonblocking_lock_flag, - } else 0; - - const O_LARGEFILE = if (@hasDecl(os, "O_LARGEFILE")) os.O_LARGEFILE else 0; - const os_flags = lock_flag | O_LARGEFILE | os.O_CLOEXEC | if (flags.write and flags.read) + if (has_flock_open_flags) { + const nonblocking_lock_flag = if (flags.lock_nonblocking) + os.O_NONBLOCK | os.O_SYNC + else + @as(u32, 0); + os_flags |= switch (flags.lock) { + .None => @as(u32, 0), + .Shared => os.O_SHLOCK | nonblocking_lock_flag, + .Exclusive => os.O_EXLOCK | nonblocking_lock_flag, + }; + } + if (@hasDecl(os, "O_LARGEFILE")) { + os_flags |= os.O_LARGEFILE; + } + if (!flags.allow_ctty) { + os_flags |= os.O_NOCTTY; + } + os_flags |= if (flags.write and flags.read) @as(u32, os.O_RDWR) else if (flags.write) @as(u32, os.O_WRONLY) diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig @@ -101,6 +101,10 @@ pub const File = struct { /// if `std.io.is_async`. It allows the use of `nosuspend` when calling functions /// related to opening the file, reading, writing, and locking. intended_io_mode: io.ModeOverride = io.default_mode, + + /// Set this to allow the opened file to automatically become the + /// controlling TTY for the current process. + allow_ctty: bool = false, }; /// TODO https://github.com/ziglang/zig/issues/3802 diff --git a/lib/std/hash.zig b/lib/std/hash.zig @@ -20,7 +20,7 @@ pub const Fnv1a_32 = fnv.Fnv1a_32; pub const Fnv1a_64 = fnv.Fnv1a_64; pub const Fnv1a_128 = fnv.Fnv1a_128; -const siphash = @import("hash/siphash.zig"); +const siphash = @import("crypto/siphash.zig"); pub const SipHash64 = siphash.SipHash64; pub const SipHash128 = siphash.SipHash128; @@ -42,7 +42,6 @@ test "hash" { _ = @import("hash/auto_hash.zig"); _ = @import("hash/crc.zig"); _ = @import("hash/fnv.zig"); - _ = @import("hash/siphash.zig"); _ = @import("hash/murmur.zig"); _ = @import("hash/cityhash.zig"); _ = @import("hash/wyhash.zig"); diff --git a/lib/std/hash/benchmark.zig b/lib/std/hash/benchmark.zig @@ -25,8 +25,6 @@ const Hash = struct { init_u64: ?u64 = null, }; -const siphash_key = "0123456789abcdef"; - const hashes = [_]Hash{ Hash{ .ty = hash.Wyhash, @@ -34,16 +32,6 @@ const hashes = [_]Hash{ .init_u64 = 0, }, Hash{ - .ty = hash.SipHash64(1, 3), - .name = "siphash(1,3)", - .init_u8s = siphash_key, - }, - Hash{ - .ty = hash.SipHash64(2, 4), - .name = "siphash(2,4)", - .init_u8s = siphash_key, - }, - Hash{ .ty = hash.Fnv1a_64, .name = "fnv1a", }, diff --git a/lib/std/hash/siphash.zig b/lib/std/hash/siphash.zig @@ -1,393 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2020 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. -// Siphash -// -// SipHash is a moderately fast, non-cryptographic keyed hash function designed for resistance -// against hash flooding DoS attacks. -// -// https://131002.net/siphash/ - -const std = @import("../std.zig"); -const assert = std.debug.assert; -const testing = std.testing; -const math = std.math; -const mem = std.mem; - -const Endian = std.builtin.Endian; - -pub fn SipHash64(comptime c_rounds: usize, comptime d_rounds: usize) type { - return SipHash(u64, c_rounds, d_rounds); -} - -pub fn SipHash128(comptime c_rounds: usize, comptime d_rounds: usize) type { - return SipHash(u128, c_rounds, d_rounds); -} - -fn SipHashStateless(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) type { - assert(T == u64 or T == u128); - assert(c_rounds > 0 and d_rounds > 0); - - return struct { - const Self = @This(); - const digest_size = 64; - const block_size = 64; - - v0: u64, - v1: u64, - v2: u64, - v3: u64, - msg_len: u8, - - pub fn init(key: []const u8) Self { - assert(key.len >= 16); - - const k0 = mem.readIntLittle(u64, key[0..8]); - const k1 = mem.readIntLittle(u64, key[8..16]); - - var d = Self{ - .v0 = k0 ^ 0x736f6d6570736575, - .v1 = k1 ^ 0x646f72616e646f6d, - .v2 = k0 ^ 0x6c7967656e657261, - .v3 = k1 ^ 0x7465646279746573, - .msg_len = 0, - }; - - if (T == u128) { - d.v1 ^= 0xee; - } - - return d; - } - - pub fn update(self: *Self, b: []const u8) void { - std.debug.assert(b.len % 8 == 0); - - var off: usize = 0; - while (off < b.len) : (off += 8) { - @call(.{ .modifier = .always_inline }, self.round, .{b[off .. off + 8]}); - } - - self.msg_len +%= @truncate(u8, b.len); - } - - pub fn final(self: *Self, b: []const u8) T { - std.debug.assert(b.len < 8); - - self.msg_len +%= @truncate(u8, b.len); - - var buf = [_]u8{0} ** 8; - mem.copy(u8, buf[0..], b[0..]); - buf[7] = self.msg_len; - self.round(buf[0..]); - - if (T == u128) { - self.v2 ^= 0xee; - } else { - self.v2 ^= 0xff; - } - - // TODO this is a workaround, should be able to supply the value without a separate variable - const inl = std.builtin.CallOptions{ .modifier = .always_inline }; - - comptime var i: usize = 0; - inline while (i < d_rounds) : (i += 1) { - @call(inl, sipRound, .{self}); - } - - const b1 = self.v0 ^ self.v1 ^ self.v2 ^ self.v3; - if (T == u64) { - return b1; - } - - self.v1 ^= 0xdd; - - comptime var j: usize = 0; - inline while (j < d_rounds) : (j += 1) { - @call(inl, sipRound, .{self}); - } - - const b2 = self.v0 ^ self.v1 ^ self.v2 ^ self.v3; - return (@as(u128, b2) << 64) | b1; - } - - fn round(self: *Self, b: []const u8) void { - assert(b.len == 8); - - const m = mem.readIntLittle(u64, b[0..8]); - self.v3 ^= m; - - // TODO this is a workaround, should be able to supply the value without a separate variable - const inl = std.builtin.CallOptions{ .modifier = .always_inline }; - comptime var i: usize = 0; - inline while (i < c_rounds) : (i += 1) { - @call(inl, sipRound, .{self}); - } - - self.v0 ^= m; - } - - fn sipRound(d: *Self) void { - d.v0 +%= d.v1; - d.v1 = math.rotl(u64, d.v1, @as(u64, 13)); - d.v1 ^= d.v0; - d.v0 = math.rotl(u64, d.v0, @as(u64, 32)); - d.v2 +%= d.v3; - d.v3 = math.rotl(u64, d.v3, @as(u64, 16)); - d.v3 ^= d.v2; - d.v0 +%= d.v3; - d.v3 = math.rotl(u64, d.v3, @as(u64, 21)); - d.v3 ^= d.v0; - d.v2 +%= d.v1; - d.v1 = math.rotl(u64, d.v1, @as(u64, 17)); - d.v1 ^= d.v2; - d.v2 = math.rotl(u64, d.v2, @as(u64, 32)); - } - - pub fn hash(key: []const u8, input: []const u8) T { - const aligned_len = input.len - (input.len % 8); - - var c = Self.init(key); - @call(.{ .modifier = .always_inline }, c.update, .{input[0..aligned_len]}); - return @call(.{ .modifier = .always_inline }, c.final, .{input[aligned_len..]}); - } - }; -} - -pub fn SipHash(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) type { - assert(T == u64 or T == u128); - assert(c_rounds > 0 and d_rounds > 0); - - return struct { - const State = SipHashStateless(T, c_rounds, d_rounds); - const Self = @This(); - const digest_size = 64; - const block_size = 64; - - state: State, - buf: [8]u8, - buf_len: usize, - - pub fn init(key: []const u8) Self { - return Self{ - .state = State.init(key), - .buf = undefined, - .buf_len = 0, - }; - } - - pub fn update(self: *Self, b: []const u8) void { - var off: usize = 0; - - if (self.buf_len != 0 and self.buf_len + b.len >= 8) { - off += 8 - self.buf_len; - mem.copy(u8, self.buf[self.buf_len..], b[0..off]); - self.state.update(self.buf[0..]); - self.buf_len = 0; - } - - const remain_len = b.len - off; - const aligned_len = remain_len - (remain_len % 8); - self.state.update(b[off .. off + aligned_len]); - - mem.copy(u8, self.buf[self.buf_len..], b[off + aligned_len ..]); - self.buf_len += @intCast(u8, b[off + aligned_len ..].len); - } - - pub fn final(self: *Self) T { - return self.state.final(self.buf[0..self.buf_len]); - } - - pub fn hash(key: []const u8, input: []const u8) T { - return State.hash(key, input); - } - }; -} - -// Test vectors from reference implementation. -// https://github.com/veorq/SipHash/blob/master/vectors.h -const test_key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"; - -test "siphash64-2-4 sanity" { - const vectors = [_][8]u8{ - "\x31\x0e\x0e\xdd\x47\xdb\x6f\x72".*, // "" - "\xfd\x67\xdc\x93\xc5\x39\xf8\x74".*, // "\x00" - "\x5a\x4f\xa9\xd9\x09\x80\x6c\x0d".*, // "\x00\x01" ... etc - "\x2d\x7e\xfb\xd7\x96\x66\x67\x85".*, - "\xb7\x87\x71\x27\xe0\x94\x27\xcf".*, - "\x8d\xa6\x99\xcd\x64\x55\x76\x18".*, - "\xce\xe3\xfe\x58\x6e\x46\xc9\xcb".*, - "\x37\xd1\x01\x8b\xf5\x00\x02\xab".*, - "\x62\x24\x93\x9a\x79\xf5\xf5\x93".*, - "\xb0\xe4\xa9\x0b\xdf\x82\x00\x9e".*, - "\xf3\xb9\xdd\x94\xc5\xbb\x5d\x7a".*, - "\xa7\xad\x6b\x22\x46\x2f\xb3\xf4".*, - "\xfb\xe5\x0e\x86\xbc\x8f\x1e\x75".*, - "\x90\x3d\x84\xc0\x27\x56\xea\x14".*, - "\xee\xf2\x7a\x8e\x90\xca\x23\xf7".*, - "\xe5\x45\xbe\x49\x61\xca\x29\xa1".*, - "\xdb\x9b\xc2\x57\x7f\xcc\x2a\x3f".*, - "\x94\x47\xbe\x2c\xf5\xe9\x9a\x69".*, - "\x9c\xd3\x8d\x96\xf0\xb3\xc1\x4b".*, - "\xbd\x61\x79\xa7\x1d\xc9\x6d\xbb".*, - "\x98\xee\xa2\x1a\xf2\x5c\xd6\xbe".*, - "\xc7\x67\x3b\x2e\xb0\xcb\xf2\xd0".*, - "\x88\x3e\xa3\xe3\x95\x67\x53\x93".*, - "\xc8\xce\x5c\xcd\x8c\x03\x0c\xa8".*, - "\x94\xaf\x49\xf6\xc6\x50\xad\xb8".*, - "\xea\xb8\x85\x8a\xde\x92\xe1\xbc".*, - "\xf3\x15\xbb\x5b\xb8\x35\xd8\x17".*, - "\xad\xcf\x6b\x07\x63\x61\x2e\x2f".*, - "\xa5\xc9\x1d\xa7\xac\xaa\x4d\xde".*, - "\x71\x65\x95\x87\x66\x50\xa2\xa6".*, - "\x28\xef\x49\x5c\x53\xa3\x87\xad".*, - "\x42\xc3\x41\xd8\xfa\x92\xd8\x32".*, - "\xce\x7c\xf2\x72\x2f\x51\x27\x71".*, - "\xe3\x78\x59\xf9\x46\x23\xf3\xa7".*, - "\x38\x12\x05\xbb\x1a\xb0\xe0\x12".*, - "\xae\x97\xa1\x0f\xd4\x34\xe0\x15".*, - "\xb4\xa3\x15\x08\xbe\xff\x4d\x31".*, - "\x81\x39\x62\x29\xf0\x90\x79\x02".*, - "\x4d\x0c\xf4\x9e\xe5\xd4\xdc\xca".*, - "\x5c\x73\x33\x6a\x76\xd8\xbf\x9a".*, - "\xd0\xa7\x04\x53\x6b\xa9\x3e\x0e".*, - "\x92\x59\x58\xfc\xd6\x42\x0c\xad".*, - "\xa9\x15\xc2\x9b\xc8\x06\x73\x18".*, - "\x95\x2b\x79\xf3\xbc\x0a\xa6\xd4".*, - "\xf2\x1d\xf2\xe4\x1d\x45\x35\xf9".*, - "\x87\x57\x75\x19\x04\x8f\x53\xa9".*, - "\x10\xa5\x6c\xf5\xdf\xcd\x9a\xdb".*, - "\xeb\x75\x09\x5c\xcd\x98\x6c\xd0".*, - "\x51\xa9\xcb\x9e\xcb\xa3\x12\xe6".*, - "\x96\xaf\xad\xfc\x2c\xe6\x66\xc7".*, - "\x72\xfe\x52\x97\x5a\x43\x64\xee".*, - "\x5a\x16\x45\xb2\x76\xd5\x92\xa1".*, - "\xb2\x74\xcb\x8e\xbf\x87\x87\x0a".*, - "\x6f\x9b\xb4\x20\x3d\xe7\xb3\x81".*, - "\xea\xec\xb2\xa3\x0b\x22\xa8\x7f".*, - "\x99\x24\xa4\x3c\xc1\x31\x57\x24".*, - "\xbd\x83\x8d\x3a\xaf\xbf\x8d\xb7".*, - "\x0b\x1a\x2a\x32\x65\xd5\x1a\xea".*, - "\x13\x50\x79\xa3\x23\x1c\xe6\x60".*, - "\x93\x2b\x28\x46\xe4\xd7\x06\x66".*, - "\xe1\x91\x5f\x5c\xb1\xec\xa4\x6c".*, - "\xf3\x25\x96\x5c\xa1\x6d\x62\x9f".*, - "\x57\x5f\xf2\x8e\x60\x38\x1b\xe5".*, - "\x72\x45\x06\xeb\x4c\x32\x8a\x95".*, - }; - - const siphash = SipHash64(2, 4); - - var buffer: [64]u8 = undefined; - for (vectors) |vector, i| { - buffer[i] = @intCast(u8, i); - - const expected = mem.readIntLittle(u64, &vector); - testing.expectEqual(siphash.hash(test_key, buffer[0..i]), expected); - } -} - -test "siphash128-2-4 sanity" { - const vectors = [_][16]u8{ - "\xa3\x81\x7f\x04\xba\x25\xa8\xe6\x6d\xf6\x72\x14\xc7\x55\x02\x93".*, - "\xda\x87\xc1\xd8\x6b\x99\xaf\x44\x34\x76\x59\x11\x9b\x22\xfc\x45".*, - "\x81\x77\x22\x8d\xa4\xa4\x5d\xc7\xfc\xa3\x8b\xde\xf6\x0a\xff\xe4".*, - "\x9c\x70\xb6\x0c\x52\x67\xa9\x4e\x5f\x33\xb6\xb0\x29\x85\xed\x51".*, - "\xf8\x81\x64\xc1\x2d\x9c\x8f\xaf\x7d\x0f\x6e\x7c\x7b\xcd\x55\x79".*, - "\x13\x68\x87\x59\x80\x77\x6f\x88\x54\x52\x7a\x07\x69\x0e\x96\x27".*, - "\x14\xee\xca\x33\x8b\x20\x86\x13\x48\x5e\xa0\x30\x8f\xd7\xa1\x5e".*, - "\xa1\xf1\xeb\xbe\xd8\xdb\xc1\x53\xc0\xb8\x4a\xa6\x1f\xf0\x82\x39".*, - "\x3b\x62\xa9\xba\x62\x58\xf5\x61\x0f\x83\xe2\x64\xf3\x14\x97\xb4".*, - "\x26\x44\x99\x06\x0a\xd9\xba\xab\xc4\x7f\x8b\x02\xbb\x6d\x71\xed".*, - "\x00\x11\x0d\xc3\x78\x14\x69\x56\xc9\x54\x47\xd3\xf3\xd0\xfb\xba".*, - "\x01\x51\xc5\x68\x38\x6b\x66\x77\xa2\xb4\xdc\x6f\x81\xe5\xdc\x18".*, - "\xd6\x26\xb2\x66\x90\x5e\xf3\x58\x82\x63\x4d\xf6\x85\x32\xc1\x25".*, - "\x98\x69\xe2\x47\xe9\xc0\x8b\x10\xd0\x29\x93\x4f\xc4\xb9\x52\xf7".*, - "\x31\xfc\xef\xac\x66\xd7\xde\x9c\x7e\xc7\x48\x5f\xe4\x49\x49\x02".*, - "\x54\x93\xe9\x99\x33\xb0\xa8\x11\x7e\x08\xec\x0f\x97\xcf\xc3\xd9".*, - "\x6e\xe2\xa4\xca\x67\xb0\x54\xbb\xfd\x33\x15\xbf\x85\x23\x05\x77".*, - "\x47\x3d\x06\xe8\x73\x8d\xb8\x98\x54\xc0\x66\xc4\x7a\xe4\x77\x40".*, - "\xa4\x26\xe5\xe4\x23\xbf\x48\x85\x29\x4d\xa4\x81\xfe\xae\xf7\x23".*, - "\x78\x01\x77\x31\xcf\x65\xfa\xb0\x74\xd5\x20\x89\x52\x51\x2e\xb1".*, - "\x9e\x25\xfc\x83\x3f\x22\x90\x73\x3e\x93\x44\xa5\xe8\x38\x39\xeb".*, - "\x56\x8e\x49\x5a\xbe\x52\x5a\x21\x8a\x22\x14\xcd\x3e\x07\x1d\x12".*, - "\x4a\x29\xb5\x45\x52\xd1\x6b\x9a\x46\x9c\x10\x52\x8e\xff\x0a\xae".*, - "\xc9\xd1\x84\xdd\xd5\xa9\xf5\xe0\xcf\x8c\xe2\x9a\x9a\xbf\x69\x1c".*, - "\x2d\xb4\x79\xae\x78\xbd\x50\xd8\x88\x2a\x8a\x17\x8a\x61\x32\xad".*, - "\x8e\xce\x5f\x04\x2d\x5e\x44\x7b\x50\x51\xb9\xea\xcb\x8d\x8f\x6f".*, - "\x9c\x0b\x53\xb4\xb3\xc3\x07\xe8\x7e\xae\xe0\x86\x78\x14\x1f\x66".*, - "\xab\xf2\x48\xaf\x69\xa6\xea\xe4\xbf\xd3\xeb\x2f\x12\x9e\xeb\x94".*, - "\x06\x64\xda\x16\x68\x57\x4b\x88\xb9\x35\xf3\x02\x73\x58\xae\xf4".*, - "\xaa\x4b\x9d\xc4\xbf\x33\x7d\xe9\x0c\xd4\xfd\x3c\x46\x7c\x6a\xb7".*, - "\xea\x5c\x7f\x47\x1f\xaf\x6b\xde\x2b\x1a\xd7\xd4\x68\x6d\x22\x87".*, - "\x29\x39\xb0\x18\x32\x23\xfa\xfc\x17\x23\xde\x4f\x52\xc4\x3d\x35".*, - "\x7c\x39\x56\xca\x5e\xea\xfc\x3e\x36\x3e\x9d\x55\x65\x46\xeb\x68".*, - "\x77\xc6\x07\x71\x46\xf0\x1c\x32\xb6\xb6\x9d\x5f\x4e\xa9\xff\xcf".*, - "\x37\xa6\x98\x6c\xb8\x84\x7e\xdf\x09\x25\xf0\xf1\x30\x9b\x54\xde".*, - "\xa7\x05\xf0\xe6\x9d\xa9\xa8\xf9\x07\x24\x1a\x2e\x92\x3c\x8c\xc8".*, - "\x3d\xc4\x7d\x1f\x29\xc4\x48\x46\x1e\x9e\x76\xed\x90\x4f\x67\x11".*, - "\x0d\x62\xbf\x01\xe6\xfc\x0e\x1a\x0d\x3c\x47\x51\xc5\xd3\x69\x2b".*, - "\x8c\x03\x46\x8b\xca\x7c\x66\x9e\xe4\xfd\x5e\x08\x4b\xbe\xe7\xb5".*, - "\x52\x8a\x5b\xb9\x3b\xaf\x2c\x9c\x44\x73\xcc\xe5\xd0\xd2\x2b\xd9".*, - "\xdf\x6a\x30\x1e\x95\xc9\x5d\xad\x97\xae\x0c\xc8\xc6\x91\x3b\xd8".*, - "\x80\x11\x89\x90\x2c\x85\x7f\x39\xe7\x35\x91\x28\x5e\x70\xb6\xdb".*, - "\xe6\x17\x34\x6a\xc9\xc2\x31\xbb\x36\x50\xae\x34\xcc\xca\x0c\x5b".*, - "\x27\xd9\x34\x37\xef\xb7\x21\xaa\x40\x18\x21\xdc\xec\x5a\xdf\x89".*, - "\x89\x23\x7d\x9d\xed\x9c\x5e\x78\xd8\xb1\xc9\xb1\x66\xcc\x73\x42".*, - "\x4a\x6d\x80\x91\xbf\x5e\x7d\x65\x11\x89\xfa\x94\xa2\x50\xb1\x4c".*, - "\x0e\x33\xf9\x60\x55\xe7\xae\x89\x3f\xfc\x0e\x3d\xcf\x49\x29\x02".*, - "\xe6\x1c\x43\x2b\x72\x0b\x19\xd1\x8e\xc8\xd8\x4b\xdc\x63\x15\x1b".*, - "\xf7\xe5\xae\xf5\x49\xf7\x82\xcf\x37\x90\x55\xa6\x08\x26\x9b\x16".*, - "\x43\x8d\x03\x0f\xd0\xb7\xa5\x4f\xa8\x37\xf2\xad\x20\x1a\x64\x03".*, - "\xa5\x90\xd3\xee\x4f\xbf\x04\xe3\x24\x7e\x0d\x27\xf2\x86\x42\x3f".*, - "\x5f\xe2\xc1\xa1\x72\xfe\x93\xc4\xb1\x5c\xd3\x7c\xae\xf9\xf5\x38".*, - "\x2c\x97\x32\x5c\xbd\x06\xb3\x6e\xb2\x13\x3d\xd0\x8b\x3a\x01\x7c".*, - "\x92\xc8\x14\x22\x7a\x6b\xca\x94\x9f\xf0\x65\x9f\x00\x2a\xd3\x9e".*, - "\xdc\xe8\x50\x11\x0b\xd8\x32\x8c\xfb\xd5\x08\x41\xd6\x91\x1d\x87".*, - "\x67\xf1\x49\x84\xc7\xda\x79\x12\x48\xe3\x2b\xb5\x92\x25\x83\xda".*, - "\x19\x38\xf2\xcf\x72\xd5\x4e\xe9\x7e\x94\x16\x6f\xa9\x1d\x2a\x36".*, - "\x74\x48\x1e\x96\x46\xed\x49\xfe\x0f\x62\x24\x30\x16\x04\x69\x8e".*, - "\x57\xfc\xa5\xde\x98\xa9\xd6\xd8\x00\x64\x38\xd0\x58\x3d\x8a\x1d".*, - "\x9f\xec\xde\x1c\xef\xdc\x1c\xbe\xd4\x76\x36\x74\xd9\x57\x53\x59".*, - "\xe3\x04\x0c\x00\xeb\x28\xf1\x53\x66\xca\x73\xcb\xd8\x72\xe7\x40".*, - "\x76\x97\x00\x9a\x6a\x83\x1d\xfe\xcc\xa9\x1c\x59\x93\x67\x0f\x7a".*, - "\x58\x53\x54\x23\x21\xf5\x67\xa0\x05\xd5\x47\xa4\xf0\x47\x59\xbd".*, - "\x51\x50\xd1\x77\x2f\x50\x83\x4a\x50\x3e\x06\x9a\x97\x3f\xbd\x7c".*, - }; - - const siphash = SipHash128(2, 4); - - var buffer: [64]u8 = undefined; - for (vectors) |vector, i| { - buffer[i] = @intCast(u8, i); - - const expected = mem.readIntLittle(u128, &vector); - testing.expectEqual(siphash.hash(test_key, buffer[0..i]), expected); - } -} - -test "iterative non-divisible update" { - var buf: [1024]u8 = undefined; - for (buf) |*e, i| { - e.* = @truncate(u8, i); - } - - const key = "0x128dad08f12307"; - const Siphash = SipHash64(2, 4); - - var end: usize = 9; - while (end < buf.len) : (end += 9) { - const non_iterative_hash = Siphash.hash(key, buf[0..end]); - - var wy = Siphash.init(key); - var i: usize = 0; - while (i < end) : (i += 7) { - wy.update(buf[i..std.math.min(i + 7, end)]); - } - const iterative_hash = wy.final(); - - std.testing.expectEqual(iterative_hash, non_iterative_hash); - } -} diff --git a/lib/std/heap/general_purpose_allocator.zig b/lib/std/heap/general_purpose_allocator.zig @@ -433,8 +433,7 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { const bucket_slice = @ptrCast([*]align(@alignOf(BucketHeader)) u8, bucket)[0..bucket_size]; self.backing_allocator.free(bucket_slice); } else { - // TODO Set the slot data to undefined. - // Related: https://github.com/ziglang/zig/issues/4298 + @memset(bucket.page + slot_index * size_class, undefined, size_class); } } @@ -567,6 +566,9 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { const new_aligned_size = math.max(new_size, old_align); const new_size_class = math.ceilPowerOfTwoAssert(usize, new_aligned_size); if (new_size_class <= size_class) { + if (old_mem.len > new_size) { + @memset(old_mem.ptr + new_size, undefined, old_mem.len - new_size); + } return new_size; } return error.OutOfMemory; diff --git a/lib/std/linked_list.zig b/lib/std/linked_list.zig @@ -28,12 +28,6 @@ pub fn SinglyLinkedList(comptime T: type) type { pub const Data = T; - pub fn init(data: T) Node { - return Node{ - .data = data, - }; - } - /// Insert a new node after the current one. /// /// Arguments: @@ -175,12 +169,6 @@ pub fn TailQueue(comptime T: type) type { prev: ?*Node = null, next: ?*Node = null, data: T, - - pub fn init(data: T) Node { - return Node{ - .data = data, - }; - } }; first: ?*Node = null, diff --git a/lib/std/macho.zig b/lib/std/macho.zig @@ -40,6 +40,24 @@ pub const uuid_command = extern struct { uuid: [16]u8, }; +/// The entry_point_command is a replacement for thread_command. +/// It is used for main executables to specify the location (file offset) +/// of main(). If -stack_size was used at link time, the stacksize +/// field will contain the stack size needed for the main thread. +pub const entry_point_command = struct { + /// LC_MAIN only used in MH_EXECUTE filetypes + cmd: u32, + + /// sizeof(struct entry_point_command) + cmdsize: u32, + + /// file (__TEXT) offset of main() + entryoff: u64, + + /// if not zero, initial stack size + stacksize: u64, +}; + /// The symtab_command contains the offsets and sizes of the link-edit 4.3BSD /// "stab" style symbol table information as described in the header files /// <nlist.h> and <stab.h>. @@ -65,7 +83,7 @@ pub const symtab_command = extern struct { /// The linkedit_data_command contains the offsets and sizes of a blob /// of data in the __LINKEDIT segment. -const linkedit_data_command = extern struct { +pub const linkedit_data_command = extern struct { /// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT. cmd: u32, @@ -79,6 +97,65 @@ const linkedit_data_command = extern struct { datasize: u32, }; +/// A program that uses a dynamic linker contains a dylinker_command to identify +/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker +/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). +/// A file can have at most one of these. +/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and contains +/// string for dyld to treat like an environment variable. +pub const dylinker_command = extern struct { + /// LC_ID_DYLINKER, LC_LOAD_DYLINKER, or LC_DYLD_ENVIRONMENT + cmd: u32, + + /// includes pathname string + cmdsize: u32, + + /// A variable length string in a load command is represented by an lc_str + /// union. The strings are stored just after the load command structure and + /// the offset is from the start of the load command structure. The size + /// of the string is reflected in the cmdsize field of the load command. + /// Once again any padded bytes to bring the cmdsize field to a multiple + /// of 4 bytes must be zero. + name: u32, +}; + +/// A dynamically linked shared library (filetype == MH_DYLIB in the mach header) +/// contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. +/// An object that uses a dynamically linked shared library also contains a +/// dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or +/// LC_REEXPORT_DYLIB) for each library it uses. +pub const dylib_command = extern struct { + /// LC_ID_DYLIB, LC_LOAD_WEAK_DYLIB, LC_LOAD_DYLIB, LC_REEXPORT_DYLIB + cmd: u32, + + /// includes pathname string + cmdsize: u32, + + /// the library identification + dylib: dylib, +}; + +/// Dynamicaly linked shared libraries are identified by two things. The +/// pathname (the name of the library as found for execution), and the +/// compatibility version number. The pathname must match and the compatibility +/// number in the user of the library must be greater than or equal to the +/// library being used. The time stamp is used to record the time a library was +/// built and copied into user so it can be use to determined if the library used +/// at runtime is exactly the same as used to built the program. +pub const dylib = extern struct { + /// library's pathname (offset pointing at the end of dylib_command) + name: u32, + + /// library's build timestamp + timestamp: u32, + + /// library's current version number + current_version: u32, + + /// library's compatibility version number + compatibility_version: u32, +}; + /// The segment load command indicates that a part of this file is to be /// mapped into the task's address space. The size of this segment in memory, /// vmsize, maybe equal to or larger than the amount to map from this file, diff --git a/lib/std/os/bits/linux.zig b/lib/std/os/bits/linux.zig @@ -24,7 +24,6 @@ pub usingnamespace switch (builtin.arch) { }; pub usingnamespace @import("linux/netlink.zig"); -pub const BPF = @import("linux/bpf.zig"); const is_mips = builtin.arch.isMIPS(); diff --git a/lib/std/os/bits/linux/bpf.zig b/lib/std/os/bits/linux/bpf.zig @@ -1,975 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2020 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. -usingnamespace std.os; -const std = @import("../../../std.zig"); -const expectEqual = std.testing.expectEqual; -const fd_t = std.os.fd_t; -const pid_t = std.os.pid_t; - -// instruction classes -pub const LD = 0x00; -pub const LDX = 0x01; -pub const ST = 0x02; -pub const STX = 0x03; -pub const ALU = 0x04; -pub const JMP = 0x05; -pub const RET = 0x06; -pub const MISC = 0x07; - -/// 32-bit -pub const W = 0x00; -/// 16-bit -pub const H = 0x08; -/// 8-bit -pub const B = 0x10; -/// 64-bit -pub const DW = 0x18; - -pub const IMM = 0x00; -pub const ABS = 0x20; -pub const IND = 0x40; -pub const MEM = 0x60; -pub const LEN = 0x80; -pub const MSH = 0xa0; - -// alu fields -pub const ADD = 0x00; -pub const SUB = 0x10; -pub const MUL = 0x20; -pub const DIV = 0x30; -pub const OR = 0x40; -pub const AND = 0x50; -pub const LSH = 0x60; -pub const RSH = 0x70; -pub const NEG = 0x80; -pub const MOD = 0x90; -pub const XOR = 0xa0; - -// jmp fields -pub const JA = 0x00; -pub const JEQ = 0x10; -pub const JGT = 0x20; -pub const JGE = 0x30; -pub const JSET = 0x40; - -//#define BPF_SRC(code) ((code) & 0x08) -pub const K = 0x00; -pub const X = 0x08; - -pub const MAXINSNS = 4096; - -// instruction classes -/// jmp mode in word width -pub const JMP32 = 0x06; -/// alu mode in double word width -pub const ALU64 = 0x07; - -// ld/ldx fields -/// exclusive add -pub const XADD = 0xc0; - -// alu/jmp fields -/// mov reg to reg -pub const MOV = 0xb0; -/// sign extending arithmetic shift right */ -pub const ARSH = 0xc0; - -// change endianness of a register -/// flags for endianness conversion: -pub const END = 0xd0; -/// convert to little-endian */ -pub const TO_LE = 0x00; -/// convert to big-endian -pub const TO_BE = 0x08; -pub const FROM_LE = TO_LE; -pub const FROM_BE = TO_BE; - -// jmp encodings -/// jump != * -pub const JNE = 0x50; -/// LT is unsigned, '<' -pub const JLT = 0xa0; -/// LE is unsigned, '<=' * -pub const JLE = 0xb0; -/// SGT is signed '>', GT in x86 -pub const JSGT = 0x60; -/// SGE is signed '>=', GE in x86 -pub const JSGE = 0x70; -/// SLT is signed, '<' -pub const JSLT = 0xc0; -/// SLE is signed, '<=' -pub const JSLE = 0xd0; -/// function call -pub const CALL = 0x80; -/// function return -pub const EXIT = 0x90; - -/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the -/// program in this cgroup yields to sub-cgroup program. -pub const F_ALLOW_OVERRIDE = 0x1; -/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, -/// that cgroup program gets run in addition to the program in this cgroup. -pub const F_ALLOW_MULTI = 0x2; -/// Flag for prog_attach command. -pub const F_REPLACE = 0x4; - -/// If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the verifier -/// will perform strict alignment checking as if the kernel has been built with -/// CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, and NET_IP_ALIGN defined to 2. -pub const F_STRICT_ALIGNMENT = 0x1; - -/// If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the verifier will -/// allow any alignment whatsoever. On platforms with strict alignment -/// requirements for loads ands stores (such as sparc and mips) the verifier -/// validates that all loads and stores provably follow this requirement. This -/// flag turns that checking and enforcement off. -/// -/// It is mostly used for testing when we want to validate the context and -/// memory access aspects of the verifier, but because of an unaligned access -/// the alignment check would trigger before the one we are interested in. -pub const F_ANY_ALIGNMENT = 0x2; - -/// BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. -/// Verifier does sub-register def/use analysis and identifies instructions -/// whose def only matters for low 32-bit, high 32-bit is never referenced later -/// through implicit zero extension. Therefore verifier notifies JIT back-ends -/// that it is safe to ignore clearing high 32-bit for these instructions. This -/// saves some back-ends a lot of code-gen. However such optimization is not -/// necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends -/// hence hasn't used verifier's analysis result. But, we really want to have a -/// way to be able to verify the correctness of the described optimization on -/// x86_64 on which testsuites are frequently exercised. -/// -/// So, this flag is introduced. Once it is set, verifier will randomize high -/// 32-bit for those instructions who has been identified as safe to ignore -/// them. Then, if verifier is not doing correct analysis, such randomization -/// will regress tests to expose bugs. -pub const F_TEST_RND_HI32 = 0x4; - -/// When BPF ldimm64's insn[0].src_reg != 0 then this can have two extensions: -/// insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE -/// insn[0].imm: map fd map fd -/// insn[1].imm: 0 offset into value -/// insn[0].off: 0 0 -/// insn[1].off: 0 0 -/// ldimm64 rewrite: address of map address of map[0]+offset -/// verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE -pub const PSEUDO_MAP_FD = 1; -pub const PSEUDO_MAP_VALUE = 2; - -/// when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative -/// offset to another bpf function -pub const PSEUDO_CALL = 1; - -/// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing -pub const ANY = 0; -/// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist -pub const NOEXIST = 1; -/// flag for BPF_MAP_UPDATE_ELEM command. update existing element -pub const EXIST = 2; -/// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update -pub const F_LOCK = 4; - -/// flag for BPF_MAP_CREATE command */ -pub const BPF_F_NO_PREALLOC = 0x1; -/// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in -/// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can -/// scale and perform better. Note, the LRU nodes (including free nodes) cannot -/// be moved across different LRU lists. -pub const BPF_F_NO_COMMON_LRU = 0x2; -/// flag for BPF_MAP_CREATE command. Specify numa node during map creation -pub const BPF_F_NUMA_NODE = 0x4; -/// flag for BPF_MAP_CREATE command. Flags for BPF object read access from -/// syscall side -pub const BPF_F_RDONLY = 0x8; -/// flag for BPF_MAP_CREATE command. Flags for BPF object write access from -/// syscall side -pub const BPF_F_WRONLY = 0x10; -/// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset -/// instead of pointer -pub const BPF_F_STACK_BUILD_ID = 0x20; -/// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This -/// should only be used for testing. -pub const BPF_F_ZERO_SEED = 0x40; -/// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program -/// side. -pub const BPF_F_RDONLY_PROG = 0x80; -/// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program -/// side. -pub const BPF_F_WRONLY_PROG = 0x100; -/// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted -/// socket -pub const BPF_F_CLONE = 0x200; -/// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map -pub const BPF_F_MMAPABLE = 0x400; - -/// These values correspond to "syscalls" within the BPF program's environment -pub const Helper = enum(i32) { - unspec, - map_lookup_elem, - map_update_elem, - map_delete_elem, - probe_read, - ktime_get_ns, - trace_printk, - get_prandom_u32, - get_smp_processor_id, - skb_store_bytes, - l3_csum_replace, - l4_csum_replace, - tail_call, - clone_redirect, - get_current_pid_tgid, - get_current_uid_gid, - get_current_comm, - get_cgroup_classid, - skb_vlan_push, - skb_vlan_pop, - skb_get_tunnel_key, - skb_set_tunnel_key, - perf_event_read, - redirect, - get_route_realm, - perf_event_output, - skb_load_bytes, - get_stackid, - csum_diff, - skb_get_tunnel_opt, - skb_set_tunnel_opt, - skb_change_proto, - skb_change_type, - skb_under_cgroup, - get_hash_recalc, - get_current_task, - probe_write_user, - current_task_under_cgroup, - skb_change_tail, - skb_pull_data, - csum_update, - set_hash_invalid, - get_numa_node_id, - skb_change_head, - xdp_adjust_head, - probe_read_str, - get_socket_cookie, - get_socket_uid, - set_hash, - setsockopt, - skb_adjust_room, - redirect_map, - sk_redirect_map, - sock_map_update, - xdp_adjust_meta, - perf_event_read_value, - perf_prog_read_value, - getsockopt, - override_return, - sock_ops_cb_flags_set, - msg_redirect_map, - msg_apply_bytes, - msg_cork_bytes, - msg_pull_data, - bind, - xdp_adjust_tail, - skb_get_xfrm_state, - get_stack, - skb_load_bytes_relative, - fib_lookup, - sock_hash_update, - msg_redirect_hash, - sk_redirect_hash, - lwt_push_encap, - lwt_seg6_store_bytes, - lwt_seg6_adjust_srh, - lwt_seg6_action, - rc_repeat, - rc_keydown, - skb_cgroup_id, - get_current_cgroup_id, - get_local_storage, - sk_select_reuseport, - skb_ancestor_cgroup_id, - sk_lookup_tcp, - sk_lookup_udp, - sk_release, - map_push_elem, - map_pop_elem, - map_peek_elem, - msg_push_data, - msg_pop_data, - rc_pointer_rel, - spin_lock, - spin_unlock, - sk_fullsock, - tcp_sock, - skb_ecn_set_ce, - get_listener_sock, - skc_lookup_tcp, - tcp_check_syncookie, - sysctl_get_name, - sysctl_get_current_value, - sysctl_get_new_value, - sysctl_set_new_value, - strtol, - strtoul, - sk_storage_get, - sk_storage_delete, - send_signal, - tcp_gen_syncookie, - skb_output, - probe_read_user, - probe_read_kernel, - probe_read_user_str, - probe_read_kernel_str, - tcp_send_ack, - send_signal_thread, - jiffies64, - _, -}; - -/// a single BPF instruction -pub const Insn = packed struct { - code: u8, - dst: u4, - src: u4, - off: i16, - imm: i32, - - /// r0 - r9 are general purpose 64-bit registers, r10 points to the stack - /// frame - pub const Reg = packed enum(u4) { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 }; - const Source = packed enum(u1) { reg, imm }; - const AluOp = packed enum(u8) { - add = ADD, - sub = SUB, - mul = MUL, - div = DIV, - op_or = OR, - op_and = AND, - lsh = LSH, - rsh = RSH, - neg = NEG, - mod = MOD, - xor = XOR, - mov = MOV, - }; - - pub const Size = packed enum(u8) { - byte = B, - half_word = H, - word = W, - double_word = DW, - }; - - const JmpOp = packed enum(u8) { - ja = JA, - jeq = JEQ, - jgt = JGT, - jge = JGE, - jset = JSET, - }; - - const ImmOrReg = union(Source) { - imm: i32, - reg: Reg, - }; - - fn imm_reg(code: u8, dst: Reg, src: anytype, off: i16) Insn { - const imm_or_reg = if (@typeInfo(@TypeOf(src)) == .EnumLiteral) - ImmOrReg{ .reg = @as(Reg, src) } - else - ImmOrReg{ .imm = src }; - - const src_type = switch (imm_or_reg) { - .imm => K, - .reg => X, - }; - - return Insn{ - .code = code | src_type, - .dst = @enumToInt(dst), - .src = switch (imm_or_reg) { - .imm => 0, - .reg => |r| @enumToInt(r), - }, - .off = off, - .imm = switch (imm_or_reg) { - .imm => |i| i, - .reg => 0, - }, - }; - } - - fn alu(comptime width: comptime_int, op: AluOp, dst: Reg, src: anytype) Insn { - const width_bitfield = switch (width) { - 32 => ALU, - 64 => ALU64, - else => @compileError("width must be 32 or 64"), - }; - - return imm_reg(width_bitfield | @enumToInt(op), dst, src, 0); - } - - pub fn mov(dst: Reg, src: anytype) Insn { - return alu(64, .mov, dst, src); - } - - pub fn add(dst: Reg, src: anytype) Insn { - return alu(64, .add, dst, src); - } - - fn jmp(op: JmpOp, dst: Reg, src: anytype, off: i16) Insn { - return imm_reg(JMP | @enumToInt(op), dst, src, off); - } - - pub fn jeq(dst: Reg, src: anytype, off: i16) Insn { - return jmp(.jeq, dst, src, off); - } - - pub fn stx_mem(size: Size, dst: Reg, src: Reg, off: i16) Insn { - return Insn{ - .code = STX | @enumToInt(size) | MEM, - .dst = @enumToInt(dst), - .src = @enumToInt(src), - .off = off, - .imm = 0, - }; - } - - pub fn xadd(dst: Reg, src: Reg) Insn { - return Insn{ - .code = STX | XADD | DW, - .dst = @enumToInt(dst), - .src = @enumToInt(src), - .off = 0, - .imm = 0, - }; - } - - /// direct packet access, R0 = *(uint *)(skb->data + imm32) - pub fn ld_abs(size: Size, imm: i32) Insn { - return Insn{ - .code = LD | @enumToInt(size) | ABS, - .dst = 0, - .src = 0, - .off = 0, - .imm = imm, - }; - } - - fn ld_imm_impl1(dst: Reg, src: Reg, imm: u64) Insn { - return Insn{ - .code = LD | DW | IMM, - .dst = @enumToInt(dst), - .src = @enumToInt(src), - .off = 0, - .imm = @intCast(i32, @truncate(u32, imm)), - }; - } - - fn ld_imm_impl2(imm: u64) Insn { - return Insn{ - .code = 0, - .dst = 0, - .src = 0, - .off = 0, - .imm = @intCast(i32, @truncate(u32, imm >> 32)), - }; - } - - pub fn ld_map_fd1(dst: Reg, map_fd: fd_t) Insn { - return ld_imm_impl1(dst, @intToEnum(Reg, PSEUDO_MAP_FD), @intCast(u64, map_fd)); - } - - pub fn ld_map_fd2(map_fd: fd_t) Insn { - return ld_imm_impl2(@intCast(u64, map_fd)); - } - - pub fn call(helper: Helper) Insn { - return Insn{ - .code = JMP | CALL, - .dst = 0, - .src = 0, - .off = 0, - .imm = @enumToInt(helper), - }; - } - - /// exit BPF program - pub fn exit() Insn { - return Insn{ - .code = JMP | EXIT, - .dst = 0, - .src = 0, - .off = 0, - .imm = 0, - }; - } -}; - -fn expect_insn(insn: Insn, val: u64) void { - expectEqual(@bitCast(u64, insn), val); -} - -test "insn bitsize" { - expectEqual(@bitSizeOf(Insn), 64); -} - -// mov instructions -test "mov imm" { - expect_insn(Insn.mov(.r1, 1), 0x00000001000001b7); -} - -test "mov reg" { - expect_insn(Insn.mov(.r6, .r1), 0x00000000000016bf); -} - -// alu instructions -test "add imm" { - expect_insn(Insn.add(.r2, -4), 0xfffffffc00000207); -} - -// ld instructions -test "ld_abs" { - expect_insn(Insn.ld_abs(.byte, 42), 0x0000002a00000030); -} - -test "ld_map_fd" { - expect_insn(Insn.ld_map_fd1(.r1, 42), 0x0000002a00001118); - expect_insn(Insn.ld_map_fd2(42), 0x0000000000000000); -} - -// st instructions -test "stx_mem" { - expect_insn(Insn.stx_mem(.word, .r10, .r0, -4), 0x00000000fffc0a63); -} - -test "xadd" { - expect_insn(Insn.xadd(.r0, .r1), 0x00000000000010db); -} - -// jmp instructions -test "jeq imm" { - expect_insn(Insn.jeq(.r0, 0, 2), 0x0000000000020015); -} - -// other instructions -test "call" { - expect_insn(Insn.call(.map_lookup_elem), 0x0000000100000085); -} - -test "exit" { - expect_insn(Insn.exit(), 0x0000000000000095); -} - -pub const Cmd = extern enum(usize) { - map_create, - map_lookup_elem, - map_update_elem, - map_delete_elem, - map_get_next_key, - prog_load, - obj_pin, - obj_get, - prog_attach, - prog_detach, - prog_test_run, - prog_get_next_id, - map_get_next_id, - prog_get_fd_by_id, - map_get_fd_by_id, - obj_get_info_by_fd, - prog_query, - raw_tracepoint_open, - btf_load, - btf_get_fd_by_id, - task_fd_query, - map_lookup_and_delete_elem, - map_freeze, - btf_get_next_id, - map_lookup_batch, - map_lookup_and_delete_batch, - map_update_batch, - map_delete_batch, - link_create, - link_update, - link_get_fd_by_id, - link_get_next_id, - enable_stats, - iter_create, - link_detach, - _, -}; - -pub const MapType = extern enum(u32) { - unspec, - hash, - array, - prog_array, - perf_event_array, - percpu_hash, - percpu_array, - stack_trace, - cgroup_array, - lru_hash, - lru_percpu_hash, - lpm_trie, - array_of_maps, - hash_of_maps, - devmap, - sockmap, - cpumap, - xskmap, - sockhash, - cgroup_storage, - reuseport_sockarray, - percpu_cgroup_storage, - queue, - stack, - sk_storage, - devmap_hash, - struct_ops, - ringbuf, - _, -}; - -pub const ProgType = extern enum(u32) { - unspec, - socket_filter, - kprobe, - sched_cls, - sched_act, - tracepoint, - xdp, - perf_event, - cgroup_skb, - cgroup_sock, - lwt_in, - lwt_out, - lwt_xmit, - sock_ops, - sk_skb, - cgroup_device, - sk_msg, - raw_tracepoint, - cgroup_sock_addr, - lwt_seg6local, - lirc_mode2, - sk_reuseport, - flow_dissector, - cgroup_sysctl, - raw_tracepoint_writable, - cgroup_sockopt, - tracing, - struct_ops, - ext, - lsm, - sk_lookup, -}; - -pub const AttachType = extern enum(u32) { - cgroup_inet_ingress, - cgroup_inet_egress, - cgroup_inet_sock_create, - cgroup_sock_ops, - sk_skb_stream_parser, - sk_skb_stream_verdict, - cgroup_device, - sk_msg_verdict, - cgroup_inet4_bind, - cgroup_inet6_bind, - cgroup_inet4_connect, - cgroup_inet6_connect, - cgroup_inet4_post_bind, - cgroup_inet6_post_bind, - cgroup_udp4_sendmsg, - cgroup_udp6_sendmsg, - lirc_mode2, - flow_dissector, - cgroup_sysctl, - cgroup_udp4_recvmsg, - cgroup_udp6_recvmsg, - cgroup_getsockopt, - cgroup_setsockopt, - trace_raw_tp, - trace_fentry, - trace_fexit, - modify_return, - lsm_mac, - trace_iter, - cgroup_inet4_getpeername, - cgroup_inet6_getpeername, - cgroup_inet4_getsockname, - cgroup_inet6_getsockname, - xdp_devmap, - cgroup_inet_sock_release, - xdp_cpumap, - sk_lookup, - xdp, - _, -}; - -const obj_name_len = 16; -/// struct used by Cmd.map_create command -pub const MapCreateAttr = extern struct { - /// one of MapType - map_type: u32, - /// size of key in bytes - key_size: u32, - /// size of value in bytes - value_size: u32, - /// max number of entries in a map - max_entries: u32, - /// .map_create related flags - map_flags: u32, - /// fd pointing to the inner map - inner_map_fd: fd_t, - /// numa node (effective only if MapCreateFlags.numa_node is set) - numa_node: u32, - map_name: [obj_name_len]u8, - /// ifindex of netdev to create on - map_ifindex: u32, - /// fd pointing to a BTF type data - btf_fd: fd_t, - /// BTF type_id of the key - btf_key_type_id: u32, - /// BTF type_id of the value - bpf_value_type_id: u32, - /// BTF type_id of a kernel struct stored as the map value - btf_vmlinux_value_type_id: u32, -}; - -/// struct used by Cmd.map_*_elem commands -pub const MapElemAttr = extern struct { - map_fd: fd_t, - key: u64, - result: extern union { - value: u64, - next_key: u64, - }, - flags: u64, -}; - -/// struct used by Cmd.map_*_batch commands -pub const MapBatchAttr = extern struct { - /// start batch, NULL to start from beginning - in_batch: u64, - /// output: next start batch - out_batch: u64, - keys: u64, - values: u64, - /// input/output: - /// input: # of key/value elements - /// output: # of filled elements - count: u32, - map_fd: fd_t, - elem_flags: u64, - flags: u64, -}; - -/// struct used by Cmd.prog_load command -pub const ProgLoadAttr = extern struct { - /// one of ProgType - prog_type: u32, - insn_cnt: u32, - insns: u64, - license: u64, - /// verbosity level of verifier - log_level: u32, - /// size of user buffer - log_size: u32, - /// user supplied buffer - log_buf: u64, - /// not used - kern_version: u32, - prog_flags: u32, - prog_name: [obj_name_len]u8, - /// ifindex of netdev to prep for. For some prog types expected attach - /// type must be known at load time to verify attach type specific parts - /// of prog (context accesses, allowed helpers, etc). - prog_ifindex: u32, - expected_attach_type: u32, - /// fd pointing to BTF type data - prog_btf_fd: fd_t, - /// userspace bpf_func_info size - func_info_rec_size: u32, - func_info: u64, - /// number of bpf_func_info records - func_info_cnt: u32, - /// userspace bpf_line_info size - line_info_rec_size: u32, - line_info: u64, - /// number of bpf_line_info records - line_info_cnt: u32, - /// in-kernel BTF type id to attach to - attact_btf_id: u32, - /// 0 to attach to vmlinux - attach_prog_id: u32, -}; - -/// struct used by Cmd.obj_* commands -pub const ObjAttr = extern struct { - pathname: u64, - bpf_fd: fd_t, - file_flags: u32, -}; - -/// struct used by Cmd.prog_attach/detach commands -pub const ProgAttachAttr = extern struct { - /// container object to attach to - target_fd: fd_t, - /// eBPF program to attach - attach_bpf_fd: fd_t, - attach_type: u32, - attach_flags: u32, - // TODO: BPF_F_REPLACE flags - /// previously attached eBPF program to replace if .replace is used - replace_bpf_fd: fd_t, -}; - -/// struct used by Cmd.prog_test_run command -pub const TestAttr = extern struct { - prog_fd: fd_t, - retval: u32, - /// input: len of data_in - data_size_in: u32, - /// input/output: len of data_out. returns ENOSPC if data_out is too small. - data_size_out: u32, - data_in: u64, - data_out: u64, - repeat: u32, - duration: u32, - /// input: len of ctx_in - ctx_size_in: u32, - /// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small. - ctx_size_out: u32, - ctx_in: u64, - ctx_out: u64, -}; - -/// struct used by Cmd.*_get_*_id commands -pub const GetIdAttr = extern struct { - id: extern union { - start_id: u32, - prog_id: u32, - map_id: u32, - btf_id: u32, - link_id: u32, - }, - next_id: u32, - open_flags: u32, -}; - -/// struct used by Cmd.obj_get_info_by_fd command -pub const InfoAttr = extern struct { - bpf_fd: fd_t, - info_len: u32, - info: u64, -}; - -/// struct used by Cmd.prog_query command -pub const QueryAttr = extern struct { - /// container object to query - target_fd: fd_t, - attach_type: u32, - query_flags: u32, - attach_flags: u32, - prog_ids: u64, - prog_cnt: u32, -}; - -/// struct used by Cmd.raw_tracepoint_open command -pub const RawTracepointAttr = extern struct { - name: u64, - prog_fd: fd_t, -}; - -/// struct used by Cmd.btf_load command -pub const BtfLoadAttr = extern struct { - btf: u64, - btf_log_buf: u64, - btf_size: u32, - btf_log_size: u32, - btf_log_level: u32, -}; - -pub const TaskFdQueryAttr = extern struct { - /// input: pid - pid: pid_t, - /// input: fd - fd: fd_t, - /// input: flags - flags: u32, - /// input/output: buf len - buf_len: u32, - /// input/output: - /// tp_name for tracepoint - /// symbol for kprobe - /// filename for uprobe - buf: u64, - /// output: prod_id - prog_id: u32, - /// output: BPF_FD_TYPE - fd_type: u32, - /// output: probe_offset - probe_offset: u64, - /// output: probe_addr - probe_addr: u64, -}; - -/// struct used by Cmd.link_create command -pub const LinkCreateAttr = extern struct { - /// eBPF program to attach - prog_fd: fd_t, - /// object to attach to - target_fd: fd_t, - attach_type: u32, - /// extra flags - flags: u32, -}; - -/// struct used by Cmd.link_update command -pub const LinkUpdateAttr = extern struct { - link_fd: fd_t, - /// new program to update link with - new_prog_fd: fd_t, - /// extra flags - flags: u32, - /// expected link's program fd, it is specified only if BPF_F_REPLACE is - /// set in flags - old_prog_fd: fd_t, -}; - -/// struct used by Cmd.enable_stats command -pub const EnableStatsAttr = extern struct { - type: u32, -}; - -/// struct used by Cmd.iter_create command -pub const IterCreateAttr = extern struct { - link_fd: fd_t, - flags: u32, -}; - -pub const Attr = extern union { - map_create: MapCreateAttr, - map_elem: MapElemAttr, - map_batch: MapBatchAttr, - prog_load: ProgLoadAttr, - obj: ObjAttr, - prog_attach: ProgAttachAttr, - test_run: TestRunAttr, - get_id: GetIdAttr, - info: InfoAttr, - query: QueryAttr, - raw_tracepoint: RawTracepointAttr, - btf_load: BtfLoadAttr, - task_fd_query: TaskFdQueryAttr, - link_create: LinkCreateAttr, - link_update: LinkUpdateAttr, - enable_stats: EnableStatsAttr, - iter_create: IterCreateAttr, -}; diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig @@ -29,6 +29,7 @@ pub usingnamespace switch (builtin.arch) { }; pub usingnamespace @import("bits.zig"); pub const tls = @import("linux/tls.zig"); +pub const BPF = @import("linux/bpf.zig"); /// Set by startup code, used by `getauxval`. pub var elf_aux_maybe: ?[*]std.elf.Auxv = null; diff --git a/lib/std/os/linux/bpf.zig b/lib/std/os/linux/bpf.zig @@ -0,0 +1,973 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +usingnamespace std.os; +const std = @import("../../std.zig"); +const expectEqual = std.testing.expectEqual; + +// instruction classes +pub const LD = 0x00; +pub const LDX = 0x01; +pub const ST = 0x02; +pub const STX = 0x03; +pub const ALU = 0x04; +pub const JMP = 0x05; +pub const RET = 0x06; +pub const MISC = 0x07; + +/// 32-bit +pub const W = 0x00; +/// 16-bit +pub const H = 0x08; +/// 8-bit +pub const B = 0x10; +/// 64-bit +pub const DW = 0x18; + +pub const IMM = 0x00; +pub const ABS = 0x20; +pub const IND = 0x40; +pub const MEM = 0x60; +pub const LEN = 0x80; +pub const MSH = 0xa0; + +// alu fields +pub const ADD = 0x00; +pub const SUB = 0x10; +pub const MUL = 0x20; +pub const DIV = 0x30; +pub const OR = 0x40; +pub const AND = 0x50; +pub const LSH = 0x60; +pub const RSH = 0x70; +pub const NEG = 0x80; +pub const MOD = 0x90; +pub const XOR = 0xa0; + +// jmp fields +pub const JA = 0x00; +pub const JEQ = 0x10; +pub const JGT = 0x20; +pub const JGE = 0x30; +pub const JSET = 0x40; + +//#define BPF_SRC(code) ((code) & 0x08) +pub const K = 0x00; +pub const X = 0x08; + +pub const MAXINSNS = 4096; + +// instruction classes +/// jmp mode in word width +pub const JMP32 = 0x06; +/// alu mode in double word width +pub const ALU64 = 0x07; + +// ld/ldx fields +/// exclusive add +pub const XADD = 0xc0; + +// alu/jmp fields +/// mov reg to reg +pub const MOV = 0xb0; +/// sign extending arithmetic shift right */ +pub const ARSH = 0xc0; + +// change endianness of a register +/// flags for endianness conversion: +pub const END = 0xd0; +/// convert to little-endian */ +pub const TO_LE = 0x00; +/// convert to big-endian +pub const TO_BE = 0x08; +pub const FROM_LE = TO_LE; +pub const FROM_BE = TO_BE; + +// jmp encodings +/// jump != * +pub const JNE = 0x50; +/// LT is unsigned, '<' +pub const JLT = 0xa0; +/// LE is unsigned, '<=' * +pub const JLE = 0xb0; +/// SGT is signed '>', GT in x86 +pub const JSGT = 0x60; +/// SGE is signed '>=', GE in x86 +pub const JSGE = 0x70; +/// SLT is signed, '<' +pub const JSLT = 0xc0; +/// SLE is signed, '<=' +pub const JSLE = 0xd0; +/// function call +pub const CALL = 0x80; +/// function return +pub const EXIT = 0x90; + +/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the +/// program in this cgroup yields to sub-cgroup program. +pub const F_ALLOW_OVERRIDE = 0x1; +/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, +/// that cgroup program gets run in addition to the program in this cgroup. +pub const F_ALLOW_MULTI = 0x2; +/// Flag for prog_attach command. +pub const F_REPLACE = 0x4; + +/// If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the verifier +/// will perform strict alignment checking as if the kernel has been built with +/// CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, and NET_IP_ALIGN defined to 2. +pub const F_STRICT_ALIGNMENT = 0x1; + +/// If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the verifier will +/// allow any alignment whatsoever. On platforms with strict alignment +/// requirements for loads ands stores (such as sparc and mips) the verifier +/// validates that all loads and stores provably follow this requirement. This +/// flag turns that checking and enforcement off. +/// +/// It is mostly used for testing when we want to validate the context and +/// memory access aspects of the verifier, but because of an unaligned access +/// the alignment check would trigger before the one we are interested in. +pub const F_ANY_ALIGNMENT = 0x2; + +/// BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. +/// Verifier does sub-register def/use analysis and identifies instructions +/// whose def only matters for low 32-bit, high 32-bit is never referenced later +/// through implicit zero extension. Therefore verifier notifies JIT back-ends +/// that it is safe to ignore clearing high 32-bit for these instructions. This +/// saves some back-ends a lot of code-gen. However such optimization is not +/// necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends +/// hence hasn't used verifier's analysis result. But, we really want to have a +/// way to be able to verify the correctness of the described optimization on +/// x86_64 on which testsuites are frequently exercised. +/// +/// So, this flag is introduced. Once it is set, verifier will randomize high +/// 32-bit for those instructions who has been identified as safe to ignore +/// them. Then, if verifier is not doing correct analysis, such randomization +/// will regress tests to expose bugs. +pub const F_TEST_RND_HI32 = 0x4; + +/// When BPF ldimm64's insn[0].src_reg != 0 then this can have two extensions: +/// insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE +/// insn[0].imm: map fd map fd +/// insn[1].imm: 0 offset into value +/// insn[0].off: 0 0 +/// insn[1].off: 0 0 +/// ldimm64 rewrite: address of map address of map[0]+offset +/// verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE +pub const PSEUDO_MAP_FD = 1; +pub const PSEUDO_MAP_VALUE = 2; + +/// when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative +/// offset to another bpf function +pub const PSEUDO_CALL = 1; + +/// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing +pub const ANY = 0; +/// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist +pub const NOEXIST = 1; +/// flag for BPF_MAP_UPDATE_ELEM command. update existing element +pub const EXIST = 2; +/// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update +pub const F_LOCK = 4; + +/// flag for BPF_MAP_CREATE command */ +pub const BPF_F_NO_PREALLOC = 0x1; +/// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in +/// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can +/// scale and perform better. Note, the LRU nodes (including free nodes) cannot +/// be moved across different LRU lists. +pub const BPF_F_NO_COMMON_LRU = 0x2; +/// flag for BPF_MAP_CREATE command. Specify numa node during map creation +pub const BPF_F_NUMA_NODE = 0x4; +/// flag for BPF_MAP_CREATE command. Flags for BPF object read access from +/// syscall side +pub const BPF_F_RDONLY = 0x8; +/// flag for BPF_MAP_CREATE command. Flags for BPF object write access from +/// syscall side +pub const BPF_F_WRONLY = 0x10; +/// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset +/// instead of pointer +pub const BPF_F_STACK_BUILD_ID = 0x20; +/// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This +/// should only be used for testing. +pub const BPF_F_ZERO_SEED = 0x40; +/// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program +/// side. +pub const BPF_F_RDONLY_PROG = 0x80; +/// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program +/// side. +pub const BPF_F_WRONLY_PROG = 0x100; +/// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted +/// socket +pub const BPF_F_CLONE = 0x200; +/// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map +pub const BPF_F_MMAPABLE = 0x400; + +/// These values correspond to "syscalls" within the BPF program's environment +pub const Helper = enum(i32) { + unspec, + map_lookup_elem, + map_update_elem, + map_delete_elem, + probe_read, + ktime_get_ns, + trace_printk, + get_prandom_u32, + get_smp_processor_id, + skb_store_bytes, + l3_csum_replace, + l4_csum_replace, + tail_call, + clone_redirect, + get_current_pid_tgid, + get_current_uid_gid, + get_current_comm, + get_cgroup_classid, + skb_vlan_push, + skb_vlan_pop, + skb_get_tunnel_key, + skb_set_tunnel_key, + perf_event_read, + redirect, + get_route_realm, + perf_event_output, + skb_load_bytes, + get_stackid, + csum_diff, + skb_get_tunnel_opt, + skb_set_tunnel_opt, + skb_change_proto, + skb_change_type, + skb_under_cgroup, + get_hash_recalc, + get_current_task, + probe_write_user, + current_task_under_cgroup, + skb_change_tail, + skb_pull_data, + csum_update, + set_hash_invalid, + get_numa_node_id, + skb_change_head, + xdp_adjust_head, + probe_read_str, + get_socket_cookie, + get_socket_uid, + set_hash, + setsockopt, + skb_adjust_room, + redirect_map, + sk_redirect_map, + sock_map_update, + xdp_adjust_meta, + perf_event_read_value, + perf_prog_read_value, + getsockopt, + override_return, + sock_ops_cb_flags_set, + msg_redirect_map, + msg_apply_bytes, + msg_cork_bytes, + msg_pull_data, + bind, + xdp_adjust_tail, + skb_get_xfrm_state, + get_stack, + skb_load_bytes_relative, + fib_lookup, + sock_hash_update, + msg_redirect_hash, + sk_redirect_hash, + lwt_push_encap, + lwt_seg6_store_bytes, + lwt_seg6_adjust_srh, + lwt_seg6_action, + rc_repeat, + rc_keydown, + skb_cgroup_id, + get_current_cgroup_id, + get_local_storage, + sk_select_reuseport, + skb_ancestor_cgroup_id, + sk_lookup_tcp, + sk_lookup_udp, + sk_release, + map_push_elem, + map_pop_elem, + map_peek_elem, + msg_push_data, + msg_pop_data, + rc_pointer_rel, + spin_lock, + spin_unlock, + sk_fullsock, + tcp_sock, + skb_ecn_set_ce, + get_listener_sock, + skc_lookup_tcp, + tcp_check_syncookie, + sysctl_get_name, + sysctl_get_current_value, + sysctl_get_new_value, + sysctl_set_new_value, + strtol, + strtoul, + sk_storage_get, + sk_storage_delete, + send_signal, + tcp_gen_syncookie, + skb_output, + probe_read_user, + probe_read_kernel, + probe_read_user_str, + probe_read_kernel_str, + tcp_send_ack, + send_signal_thread, + jiffies64, + _, +}; + +/// a single BPF instruction +pub const Insn = packed struct { + code: u8, + dst: u4, + src: u4, + off: i16, + imm: i32, + + /// r0 - r9 are general purpose 64-bit registers, r10 points to the stack + /// frame + pub const Reg = packed enum(u4) { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 }; + const Source = packed enum(u1) { reg, imm }; + const AluOp = packed enum(u8) { + add = ADD, + sub = SUB, + mul = MUL, + div = DIV, + op_or = OR, + op_and = AND, + lsh = LSH, + rsh = RSH, + neg = NEG, + mod = MOD, + xor = XOR, + mov = MOV, + }; + + pub const Size = packed enum(u8) { + byte = B, + half_word = H, + word = W, + double_word = DW, + }; + + const JmpOp = packed enum(u8) { + ja = JA, + jeq = JEQ, + jgt = JGT, + jge = JGE, + jset = JSET, + }; + + const ImmOrReg = union(Source) { + imm: i32, + reg: Reg, + }; + + fn imm_reg(code: u8, dst: Reg, src: anytype, off: i16) Insn { + const imm_or_reg = if (@typeInfo(@TypeOf(src)) == .EnumLiteral) + ImmOrReg{ .reg = @as(Reg, src) } + else + ImmOrReg{ .imm = src }; + + const src_type = switch (imm_or_reg) { + .imm => K, + .reg => X, + }; + + return Insn{ + .code = code | src_type, + .dst = @enumToInt(dst), + .src = switch (imm_or_reg) { + .imm => 0, + .reg => |r| @enumToInt(r), + }, + .off = off, + .imm = switch (imm_or_reg) { + .imm => |i| i, + .reg => 0, + }, + }; + } + + fn alu(comptime width: comptime_int, op: AluOp, dst: Reg, src: anytype) Insn { + const width_bitfield = switch (width) { + 32 => ALU, + 64 => ALU64, + else => @compileError("width must be 32 or 64"), + }; + + return imm_reg(width_bitfield | @enumToInt(op), dst, src, 0); + } + + pub fn mov(dst: Reg, src: anytype) Insn { + return alu(64, .mov, dst, src); + } + + pub fn add(dst: Reg, src: anytype) Insn { + return alu(64, .add, dst, src); + } + + fn jmp(op: JmpOp, dst: Reg, src: anytype, off: i16) Insn { + return imm_reg(JMP | @enumToInt(op), dst, src, off); + } + + pub fn jeq(dst: Reg, src: anytype, off: i16) Insn { + return jmp(.jeq, dst, src, off); + } + + pub fn stx_mem(size: Size, dst: Reg, src: Reg, off: i16) Insn { + return Insn{ + .code = STX | @enumToInt(size) | MEM, + .dst = @enumToInt(dst), + .src = @enumToInt(src), + .off = off, + .imm = 0, + }; + } + + pub fn xadd(dst: Reg, src: Reg) Insn { + return Insn{ + .code = STX | XADD | DW, + .dst = @enumToInt(dst), + .src = @enumToInt(src), + .off = 0, + .imm = 0, + }; + } + + /// direct packet access, R0 = *(uint *)(skb->data + imm32) + pub fn ld_abs(size: Size, imm: i32) Insn { + return Insn{ + .code = LD | @enumToInt(size) | ABS, + .dst = 0, + .src = 0, + .off = 0, + .imm = imm, + }; + } + + fn ld_imm_impl1(dst: Reg, src: Reg, imm: u64) Insn { + return Insn{ + .code = LD | DW | IMM, + .dst = @enumToInt(dst), + .src = @enumToInt(src), + .off = 0, + .imm = @intCast(i32, @truncate(u32, imm)), + }; + } + + fn ld_imm_impl2(imm: u64) Insn { + return Insn{ + .code = 0, + .dst = 0, + .src = 0, + .off = 0, + .imm = @intCast(i32, @truncate(u32, imm >> 32)), + }; + } + + pub fn ld_map_fd1(dst: Reg, map_fd: fd_t) Insn { + return ld_imm_impl1(dst, @intToEnum(Reg, PSEUDO_MAP_FD), @intCast(u64, map_fd)); + } + + pub fn ld_map_fd2(map_fd: fd_t) Insn { + return ld_imm_impl2(@intCast(u64, map_fd)); + } + + pub fn call(helper: Helper) Insn { + return Insn{ + .code = JMP | CALL, + .dst = 0, + .src = 0, + .off = 0, + .imm = @enumToInt(helper), + }; + } + + /// exit BPF program + pub fn exit() Insn { + return Insn{ + .code = JMP | EXIT, + .dst = 0, + .src = 0, + .off = 0, + .imm = 0, + }; + } +}; + +fn expect_insn(insn: Insn, val: u64) void { + expectEqual(@bitCast(u64, insn), val); +} + +test "insn bitsize" { + expectEqual(@bitSizeOf(Insn), 64); +} + +// mov instructions +test "mov imm" { + expect_insn(Insn.mov(.r1, 1), 0x00000001000001b7); +} + +test "mov reg" { + expect_insn(Insn.mov(.r6, .r1), 0x00000000000016bf); +} + +// alu instructions +test "add imm" { + expect_insn(Insn.add(.r2, -4), 0xfffffffc00000207); +} + +// ld instructions +test "ld_abs" { + expect_insn(Insn.ld_abs(.byte, 42), 0x0000002a00000030); +} + +test "ld_map_fd" { + expect_insn(Insn.ld_map_fd1(.r1, 42), 0x0000002a00001118); + expect_insn(Insn.ld_map_fd2(42), 0x0000000000000000); +} + +// st instructions +test "stx_mem" { + expect_insn(Insn.stx_mem(.word, .r10, .r0, -4), 0x00000000fffc0a63); +} + +test "xadd" { + expect_insn(Insn.xadd(.r0, .r1), 0x00000000000010db); +} + +// jmp instructions +test "jeq imm" { + expect_insn(Insn.jeq(.r0, 0, 2), 0x0000000000020015); +} + +// other instructions +test "call" { + expect_insn(Insn.call(.map_lookup_elem), 0x0000000100000085); +} + +test "exit" { + expect_insn(Insn.exit(), 0x0000000000000095); +} + +pub const Cmd = extern enum(usize) { + map_create, + map_lookup_elem, + map_update_elem, + map_delete_elem, + map_get_next_key, + prog_load, + obj_pin, + obj_get, + prog_attach, + prog_detach, + prog_test_run, + prog_get_next_id, + map_get_next_id, + prog_get_fd_by_id, + map_get_fd_by_id, + obj_get_info_by_fd, + prog_query, + raw_tracepoint_open, + btf_load, + btf_get_fd_by_id, + task_fd_query, + map_lookup_and_delete_elem, + map_freeze, + btf_get_next_id, + map_lookup_batch, + map_lookup_and_delete_batch, + map_update_batch, + map_delete_batch, + link_create, + link_update, + link_get_fd_by_id, + link_get_next_id, + enable_stats, + iter_create, + link_detach, + _, +}; + +pub const MapType = extern enum(u32) { + unspec, + hash, + array, + prog_array, + perf_event_array, + percpu_hash, + percpu_array, + stack_trace, + cgroup_array, + lru_hash, + lru_percpu_hash, + lpm_trie, + array_of_maps, + hash_of_maps, + devmap, + sockmap, + cpumap, + xskmap, + sockhash, + cgroup_storage, + reuseport_sockarray, + percpu_cgroup_storage, + queue, + stack, + sk_storage, + devmap_hash, + struct_ops, + ringbuf, + _, +}; + +pub const ProgType = extern enum(u32) { + unspec, + socket_filter, + kprobe, + sched_cls, + sched_act, + tracepoint, + xdp, + perf_event, + cgroup_skb, + cgroup_sock, + lwt_in, + lwt_out, + lwt_xmit, + sock_ops, + sk_skb, + cgroup_device, + sk_msg, + raw_tracepoint, + cgroup_sock_addr, + lwt_seg6local, + lirc_mode2, + sk_reuseport, + flow_dissector, + cgroup_sysctl, + raw_tracepoint_writable, + cgroup_sockopt, + tracing, + struct_ops, + ext, + lsm, + sk_lookup, +}; + +pub const AttachType = extern enum(u32) { + cgroup_inet_ingress, + cgroup_inet_egress, + cgroup_inet_sock_create, + cgroup_sock_ops, + sk_skb_stream_parser, + sk_skb_stream_verdict, + cgroup_device, + sk_msg_verdict, + cgroup_inet4_bind, + cgroup_inet6_bind, + cgroup_inet4_connect, + cgroup_inet6_connect, + cgroup_inet4_post_bind, + cgroup_inet6_post_bind, + cgroup_udp4_sendmsg, + cgroup_udp6_sendmsg, + lirc_mode2, + flow_dissector, + cgroup_sysctl, + cgroup_udp4_recvmsg, + cgroup_udp6_recvmsg, + cgroup_getsockopt, + cgroup_setsockopt, + trace_raw_tp, + trace_fentry, + trace_fexit, + modify_return, + lsm_mac, + trace_iter, + cgroup_inet4_getpeername, + cgroup_inet6_getpeername, + cgroup_inet4_getsockname, + cgroup_inet6_getsockname, + xdp_devmap, + cgroup_inet_sock_release, + xdp_cpumap, + sk_lookup, + xdp, + _, +}; + +const obj_name_len = 16; +/// struct used by Cmd.map_create command +pub const MapCreateAttr = extern struct { + /// one of MapType + map_type: u32, + /// size of key in bytes + key_size: u32, + /// size of value in bytes + value_size: u32, + /// max number of entries in a map + max_entries: u32, + /// .map_create related flags + map_flags: u32, + /// fd pointing to the inner map + inner_map_fd: fd_t, + /// numa node (effective only if MapCreateFlags.numa_node is set) + numa_node: u32, + map_name: [obj_name_len]u8, + /// ifindex of netdev to create on + map_ifindex: u32, + /// fd pointing to a BTF type data + btf_fd: fd_t, + /// BTF type_id of the key + btf_key_type_id: u32, + /// BTF type_id of the value + bpf_value_type_id: u32, + /// BTF type_id of a kernel struct stored as the map value + btf_vmlinux_value_type_id: u32, +}; + +/// struct used by Cmd.map_*_elem commands +pub const MapElemAttr = extern struct { + map_fd: fd_t, + key: u64, + result: extern union { + value: u64, + next_key: u64, + }, + flags: u64, +}; + +/// struct used by Cmd.map_*_batch commands +pub const MapBatchAttr = extern struct { + /// start batch, NULL to start from beginning + in_batch: u64, + /// output: next start batch + out_batch: u64, + keys: u64, + values: u64, + /// input/output: + /// input: # of key/value elements + /// output: # of filled elements + count: u32, + map_fd: fd_t, + elem_flags: u64, + flags: u64, +}; + +/// struct used by Cmd.prog_load command +pub const ProgLoadAttr = extern struct { + /// one of ProgType + prog_type: u32, + insn_cnt: u32, + insns: u64, + license: u64, + /// verbosity level of verifier + log_level: u32, + /// size of user buffer + log_size: u32, + /// user supplied buffer + log_buf: u64, + /// not used + kern_version: u32, + prog_flags: u32, + prog_name: [obj_name_len]u8, + /// ifindex of netdev to prep for. For some prog types expected attach + /// type must be known at load time to verify attach type specific parts + /// of prog (context accesses, allowed helpers, etc). + prog_ifindex: u32, + expected_attach_type: u32, + /// fd pointing to BTF type data + prog_btf_fd: fd_t, + /// userspace bpf_func_info size + func_info_rec_size: u32, + func_info: u64, + /// number of bpf_func_info records + func_info_cnt: u32, + /// userspace bpf_line_info size + line_info_rec_size: u32, + line_info: u64, + /// number of bpf_line_info records + line_info_cnt: u32, + /// in-kernel BTF type id to attach to + attact_btf_id: u32, + /// 0 to attach to vmlinux + attach_prog_id: u32, +}; + +/// struct used by Cmd.obj_* commands +pub const ObjAttr = extern struct { + pathname: u64, + bpf_fd: fd_t, + file_flags: u32, +}; + +/// struct used by Cmd.prog_attach/detach commands +pub const ProgAttachAttr = extern struct { + /// container object to attach to + target_fd: fd_t, + /// eBPF program to attach + attach_bpf_fd: fd_t, + attach_type: u32, + attach_flags: u32, + // TODO: BPF_F_REPLACE flags + /// previously attached eBPF program to replace if .replace is used + replace_bpf_fd: fd_t, +}; + +/// struct used by Cmd.prog_test_run command +pub const TestAttr = extern struct { + prog_fd: fd_t, + retval: u32, + /// input: len of data_in + data_size_in: u32, + /// input/output: len of data_out. returns ENOSPC if data_out is too small. + data_size_out: u32, + data_in: u64, + data_out: u64, + repeat: u32, + duration: u32, + /// input: len of ctx_in + ctx_size_in: u32, + /// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small. + ctx_size_out: u32, + ctx_in: u64, + ctx_out: u64, +}; + +/// struct used by Cmd.*_get_*_id commands +pub const GetIdAttr = extern struct { + id: extern union { + start_id: u32, + prog_id: u32, + map_id: u32, + btf_id: u32, + link_id: u32, + }, + next_id: u32, + open_flags: u32, +}; + +/// struct used by Cmd.obj_get_info_by_fd command +pub const InfoAttr = extern struct { + bpf_fd: fd_t, + info_len: u32, + info: u64, +}; + +/// struct used by Cmd.prog_query command +pub const QueryAttr = extern struct { + /// container object to query + target_fd: fd_t, + attach_type: u32, + query_flags: u32, + attach_flags: u32, + prog_ids: u64, + prog_cnt: u32, +}; + +/// struct used by Cmd.raw_tracepoint_open command +pub const RawTracepointAttr = extern struct { + name: u64, + prog_fd: fd_t, +}; + +/// struct used by Cmd.btf_load command +pub const BtfLoadAttr = extern struct { + btf: u64, + btf_log_buf: u64, + btf_size: u32, + btf_log_size: u32, + btf_log_level: u32, +}; + +pub const TaskFdQueryAttr = extern struct { + /// input: pid + pid: pid_t, + /// input: fd + fd: fd_t, + /// input: flags + flags: u32, + /// input/output: buf len + buf_len: u32, + /// input/output: + /// tp_name for tracepoint + /// symbol for kprobe + /// filename for uprobe + buf: u64, + /// output: prod_id + prog_id: u32, + /// output: BPF_FD_TYPE + fd_type: u32, + /// output: probe_offset + probe_offset: u64, + /// output: probe_addr + probe_addr: u64, +}; + +/// struct used by Cmd.link_create command +pub const LinkCreateAttr = extern struct { + /// eBPF program to attach + prog_fd: fd_t, + /// object to attach to + target_fd: fd_t, + attach_type: u32, + /// extra flags + flags: u32, +}; + +/// struct used by Cmd.link_update command +pub const LinkUpdateAttr = extern struct { + link_fd: fd_t, + /// new program to update link with + new_prog_fd: fd_t, + /// extra flags + flags: u32, + /// expected link's program fd, it is specified only if BPF_F_REPLACE is + /// set in flags + old_prog_fd: fd_t, +}; + +/// struct used by Cmd.enable_stats command +pub const EnableStatsAttr = extern struct { + type: u32, +}; + +/// struct used by Cmd.iter_create command +pub const IterCreateAttr = extern struct { + link_fd: fd_t, + flags: u32, +}; + +pub const Attr = extern union { + map_create: MapCreateAttr, + map_elem: MapElemAttr, + map_batch: MapBatchAttr, + prog_load: ProgLoadAttr, + obj: ObjAttr, + prog_attach: ProgAttachAttr, + test_run: TestRunAttr, + get_id: GetIdAttr, + info: InfoAttr, + query: QueryAttr, + raw_tracepoint: RawTracepointAttr, + btf_load: BtfLoadAttr, + task_fd_query: TaskFdQueryAttr, + link_create: LinkCreateAttr, + link_update: LinkUpdateAttr, + enable_stats: EnableStatsAttr, + iter_create: IterCreateAttr, +}; diff --git a/lib/std/special/init-exe/build.zig b/lib/std/special/init-exe/build.zig @@ -1,8 +1,3 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2020 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. const Builder = @import("std").build.Builder; pub fn build(b: *Builder) void { diff --git a/lib/std/special/init-exe/src/main.zig b/lib/std/special/init-exe/src/main.zig @@ -1,8 +1,3 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2020 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. const std = @import("std"); pub fn main() anyerror!void { diff --git a/lib/std/special/init-lib/build.zig b/lib/std/special/init-lib/build.zig @@ -1,8 +1,3 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2020 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. const Builder = @import("std").build.Builder; pub fn build(b: *Builder) void { diff --git a/lib/std/special/init-lib/src/main.zig b/lib/std/special/init-lib/src/main.zig @@ -1,8 +1,3 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2020 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. const std = @import("std"); const testing = std.testing; diff --git a/lib/std/target.zig b/lib/std/target.zig @@ -96,8 +96,12 @@ pub const Target = struct { win10_rs4 = 0x0A000005, win10_rs5 = 0x0A000006, win10_19h1 = 0x0A000007, + win10_20h1 = 0x0A000008, _, + /// Latest Windows version that the Zig Standard Library is aware of + pub const latest = WindowsVersion.win10_20h1; + pub const Range = struct { min: WindowsVersion, max: WindowsVersion, @@ -124,18 +128,17 @@ pub const Target = struct { out_stream: anytype, ) !void { if (fmt.len > 0 and fmt[0] == 's') { - if (@enumToInt(self) >= @enumToInt(WindowsVersion.nt4) and @enumToInt(self) <= @enumToInt(WindowsVersion.win10_19h1)) { + if (@enumToInt(self) >= @enumToInt(WindowsVersion.nt4) and @enumToInt(self) <= @enumToInt(WindowsVersion.latest)) { try std.fmt.format(out_stream, ".{}", .{@tagName(self)}); } else { - try std.fmt.format(out_stream, "@intToEnum(Target.Os.WindowsVersion, {})", .{@enumToInt(self)}); + // TODO this code path breaks zig triples, but it is used in `builtin` + try std.fmt.format(out_stream, "@intToEnum(Target.Os.WindowsVersion, 0x{X:0>8})", .{@enumToInt(self)}); } } else { - if (@enumToInt(self) >= @enumToInt(WindowsVersion.nt4) and @enumToInt(self) <= @enumToInt(WindowsVersion.win10_19h1)) { + if (@enumToInt(self) >= @enumToInt(WindowsVersion.nt4) and @enumToInt(self) <= @enumToInt(WindowsVersion.latest)) { try std.fmt.format(out_stream, "WindowsVersion.{}", .{@tagName(self)}); } else { - try std.fmt.format(out_stream, "WindowsVersion(", .{@typeName(@This())}); - try std.fmt.format(out_stream, "{}", .{@enumToInt(self)}); - try out_stream.writeAll(")"); + try std.fmt.format(out_stream, "WindowsVersion(0x{X:0>8})", .{@enumToInt(self)}); } } } @@ -280,7 +283,7 @@ pub const Target = struct { .windows => return .{ .windows = .{ .min = .win8_1, - .max = .win10_19h1, + .max = WindowsVersion.latest, }, }, } @@ -663,6 +666,9 @@ pub const Target = struct { renderscript32, renderscript64, ve, + // Stage1 currently assumes that architectures above this comment + // map one-to-one with the ZigLLVM_ArchType enum. + spu_2, pub fn isARM(arch: Arch) bool { return switch (arch) { @@ -761,6 +767,7 @@ pub const Target = struct { .sparcv9 => ._SPARCV9, .s390x => ._S390, .ve => ._NONE, + .spu_2 => ._SPU_2, }; } @@ -803,6 +810,7 @@ pub const Target = struct { .renderscript64, .shave, .ve, + .spu_2, => .Little, .arc, @@ -827,6 +835,7 @@ pub const Target = struct { switch (arch) { .avr, .msp430, + .spu_2, => return 16, .arc, @@ -1317,12 +1326,13 @@ pub const Target = struct { .bpfeb, .nvptx, .nvptx64, + .spu_2, + .avr, => return result, // TODO go over each item in this list and either move it to the above list, or // implement the standard dynamic linker path code for it. .arc, - .avr, .hexagon, .msp430, .r600, diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig @@ -249,7 +249,7 @@ pub const NativeTargetInfo = struct { // values const known_build_numbers = [_]u32{ 10240, 10586, 14393, 15063, 16299, 17134, 17763, - 18362, 18363, + 18362, 19041, }; var last_idx: usize = 0; for (known_build_numbers) |build, i| { diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig @@ -80,6 +80,9 @@ deletion_set: std.ArrayListUnmanaged(*Decl) = .{}, root_name: []u8, keep_source_files_loaded: bool, +/// Error tags and their values, tag names are duped with mod.gpa. +global_error_set: std.StringHashMapUnmanaged(u16) = .{}, + pub const InnerError = error{ OutOfMemory, AnalysisFail }; const WorkItem = union(enum) { @@ -928,6 +931,11 @@ pub fn deinit(self: *Module) void { self.symbol_exports.deinit(gpa); self.root_scope.destroy(gpa); + + for (self.global_error_set.items()) |entry| { + gpa.free(entry.key); + } + self.global_error_set.deinit(gpa); self.* = undefined; } @@ -2072,6 +2080,18 @@ fn createNewDecl( return new_decl; } +/// Get error value for error tag `name`. +pub fn getErrorValue(self: *Module, name: []const u8) !std.StringHashMapUnmanaged(u16).Entry { + const gop = try self.global_error_set.getOrPut(self.gpa, name); + if (gop.found_existing) + return gop.entry.*; + errdefer self.global_error_set.removeAssertDiscard(name); + + gop.entry.key = try self.gpa.dupe(u8, name); + gop.entry.value = @intCast(u16, self.global_error_set.items().len - 1); + return gop.entry.*; +} + /// TODO split this into `requireRuntimeBlock` and `requireFunctionBlock` and audit callsites. pub fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { return scope.cast(Scope.Block) orelse @@ -3309,6 +3329,28 @@ pub fn arrayType(self: *Module, scope: *Scope, len: u64, sentinel: ?Value, elem_ return Type.initPayload(&payload.base); } +pub fn errorUnionType(self: *Module, scope: *Scope, error_set: Type, payload: Type) Allocator.Error!Type { + assert(error_set.zigTypeTag() == .ErrorSet); + if (error_set.eql(Type.initTag(.anyerror)) and payload.eql(Type.initTag(.void))) { + return Type.initTag(.anyerror_void_error_union); + } + + const result = try scope.arena().create(Type.Payload.ErrorUnion); + result.* = .{ + .error_set = error_set, + .payload = payload, + }; + return Type.initPayload(&result.base); +} + +pub fn anyframeType(self: *Module, scope: *Scope, return_type: Type) Allocator.Error!Type { + const result = try scope.arena().create(Type.Payload.AnyFrame); + result.* = .{ + .return_type = return_type, + }; + return Type.initPayload(&result.base); +} + pub fn dumpInst(self: *Module, scope: *Scope, inst: *Inst) void { const zir_module = scope.namespace(); const source = zir_module.getSource(self) catch @panic("dumpInst failed to get source"); diff --git a/src-self-hosted/astgen.zig b/src-self-hosted/astgen.zig @@ -18,9 +18,7 @@ pub const ResultLoc = union(enum) { /// The expression has an inferred type, and it will be evaluated as an rvalue. none, /// The expression must generate a pointer rather than a value. For example, the left hand side - /// of an assignment uses an "LValue" result location. - lvalue, - /// The expression must generate a pointer + /// of an assignment uses this kind of result location. ref, /// The expression will be type coerced into this type, but it will be evaluated as an rvalue. ty: *zir.Inst, @@ -46,134 +44,136 @@ pub fn typeExpr(mod: *Module, scope: *Scope, type_node: *ast.Node) InnerError!*z return expr(mod, scope, type_rl, type_node); } -/// Turn Zig AST into untyped ZIR istructions. -pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerError!*zir.Inst { - if (rl == .lvalue) { - switch (node.tag) { - .Root => unreachable, - .Use => unreachable, - .TestDecl => unreachable, - .DocComment => unreachable, - .VarDecl => unreachable, - .SwitchCase => unreachable, - .SwitchElse => unreachable, - .Else => unreachable, - .Payload => unreachable, - .PointerPayload => unreachable, - .PointerIndexPayload => unreachable, - .ErrorTag => unreachable, - .FieldInitializer => unreachable, - .ContainerField => unreachable, - - .Assign, - .AssignBitAnd, - .AssignBitOr, - .AssignBitShiftLeft, - .AssignBitShiftRight, - .AssignBitXor, - .AssignDiv, - .AssignSub, - .AssignSubWrap, - .AssignMod, - .AssignAdd, - .AssignAddWrap, - .AssignMul, - .AssignMulWrap, - .Add, - .AddWrap, - .Sub, - .SubWrap, - .Mul, - .MulWrap, - .Div, - .Mod, - .BitAnd, - .BitOr, - .BitShiftLeft, - .BitShiftRight, - .BitXor, - .BangEqual, - .EqualEqual, - .GreaterThan, - .GreaterOrEqual, - .LessThan, - .LessOrEqual, - .ArrayCat, - .ArrayMult, - .BoolAnd, - .BoolOr, - .Asm, - .StringLiteral, - .IntegerLiteral, - .Call, - .Unreachable, - .Return, - .If, - .While, - .BoolNot, - .AddressOf, - .FloatLiteral, - .UndefinedLiteral, - .BoolLiteral, - .NullLiteral, - .OptionalType, - .Block, - .LabeledBlock, - .Break, - .PtrType, - .GroupedExpression, - .ArrayType, - .ArrayTypeSentinel, - .EnumLiteral, - .MultilineStringLiteral, - .CharLiteral, - .Defer, - .Catch, - .ErrorUnion, - .MergeErrorSets, - .Range, - .OrElse, - .Await, - .BitNot, - .Negation, - .NegationWrap, - .Resume, - .Try, - .SliceType, - .Slice, - .ArrayInitializer, - .ArrayInitializerDot, - .StructInitializer, - .StructInitializerDot, - .Switch, - .For, - .Suspend, - .Continue, - .AnyType, - .ErrorType, - .FnProto, - .AnyFrameType, - .ErrorSetDecl, - .ContainerDecl, - .Comptime, - .Nosuspend, - => return mod.failNode(scope, node, "invalid left-hand side to assignment", .{}), - - // @field can be assigned to - .BuiltinCall => { - const call = node.castTag(.BuiltinCall).?; - const tree = scope.tree(); - const builtin_name = tree.tokenSlice(call.builtin_token); - - if (!mem.eql(u8, builtin_name, "@field")) { - return mod.failNode(scope, node, "invalid left-hand side to assignment", .{}); - } - }, +fn lvalExpr(mod: *Module, scope: *Scope, node: *ast.Node) InnerError!*zir.Inst { + switch (node.tag) { + .Root => unreachable, + .Use => unreachable, + .TestDecl => unreachable, + .DocComment => unreachable, + .VarDecl => unreachable, + .SwitchCase => unreachable, + .SwitchElse => unreachable, + .Else => unreachable, + .Payload => unreachable, + .PointerPayload => unreachable, + .PointerIndexPayload => unreachable, + .ErrorTag => unreachable, + .FieldInitializer => unreachable, + .ContainerField => unreachable, + + .Assign, + .AssignBitAnd, + .AssignBitOr, + .AssignBitShiftLeft, + .AssignBitShiftRight, + .AssignBitXor, + .AssignDiv, + .AssignSub, + .AssignSubWrap, + .AssignMod, + .AssignAdd, + .AssignAddWrap, + .AssignMul, + .AssignMulWrap, + .Add, + .AddWrap, + .Sub, + .SubWrap, + .Mul, + .MulWrap, + .Div, + .Mod, + .BitAnd, + .BitOr, + .BitShiftLeft, + .BitShiftRight, + .BitXor, + .BangEqual, + .EqualEqual, + .GreaterThan, + .GreaterOrEqual, + .LessThan, + .LessOrEqual, + .ArrayCat, + .ArrayMult, + .BoolAnd, + .BoolOr, + .Asm, + .StringLiteral, + .IntegerLiteral, + .Call, + .Unreachable, + .Return, + .If, + .While, + .BoolNot, + .AddressOf, + .FloatLiteral, + .UndefinedLiteral, + .BoolLiteral, + .NullLiteral, + .OptionalType, + .Block, + .LabeledBlock, + .Break, + .PtrType, + .GroupedExpression, + .ArrayType, + .ArrayTypeSentinel, + .EnumLiteral, + .MultilineStringLiteral, + .CharLiteral, + .Defer, + .Catch, + .ErrorUnion, + .MergeErrorSets, + .Range, + .OrElse, + .Await, + .BitNot, + .Negation, + .NegationWrap, + .Resume, + .Try, + .SliceType, + .Slice, + .ArrayInitializer, + .ArrayInitializerDot, + .StructInitializer, + .StructInitializerDot, + .Switch, + .For, + .Suspend, + .Continue, + .AnyType, + .ErrorType, + .FnProto, + .AnyFrameType, + .ErrorSetDecl, + .ContainerDecl, + .Comptime, + .Nosuspend, + => return mod.failNode(scope, node, "invalid left-hand side to assignment", .{}), + + // @field can be assigned to + .BuiltinCall => { + const call = node.castTag(.BuiltinCall).?; + const tree = scope.tree(); + const builtin_name = tree.tokenSlice(call.builtin_token); + + if (!mem.eql(u8, builtin_name, "@field")) { + return mod.failNode(scope, node, "invalid left-hand side to assignment", .{}); + } + }, - // can be assigned to - .UnwrapOptional, .Deref, .Period, .ArrayAccess, .Identifier => {}, - } + // can be assigned to + .UnwrapOptional, .Deref, .Period, .ArrayAccess, .Identifier => {}, } + return expr(mod, scope, .ref, node); +} + +/// Turn Zig AST into untyped ZIR istructions. +pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerError!*zir.Inst { switch (node.tag) { .Root => unreachable, // Top-level declaration. .Use => unreachable, // Top-level declaration. @@ -232,6 +232,11 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr .BoolAnd => return boolBinOp(mod, scope, rl, node.castTag(.BoolAnd).?), .BoolOr => return boolBinOp(mod, scope, rl, node.castTag(.BoolOr).?), + .BoolNot => return rlWrap(mod, scope, rl, try boolNot(mod, scope, node.castTag(.BoolNot).?)), + .BitNot => return rlWrap(mod, scope, rl, try bitNot(mod, scope, node.castTag(.BitNot).?)), + .Negation => return rlWrap(mod, scope, rl, try negation(mod, scope, node.castTag(.Negation).?, .sub)), + .NegationWrap => return rlWrap(mod, scope, rl, try negation(mod, scope, node.castTag(.NegationWrap).?, .subwrap)), + .Identifier => return try identifier(mod, scope, rl, node.castTag(.Identifier).?), .Asm => return rlWrap(mod, scope, rl, try assembly(mod, scope, node.castTag(.Asm).?)), .StringLiteral => return rlWrap(mod, scope, rl, try stringLiteral(mod, scope, node.castTag(.StringLiteral).?)), @@ -242,9 +247,8 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr .Return => return ret(mod, scope, node.castTag(.Return).?), .If => return ifExpr(mod, scope, rl, node.castTag(.If).?), .While => return whileExpr(mod, scope, rl, node.castTag(.While).?), - .Period => return rlWrap(mod, scope, rl, try field(mod, scope, node.castTag(.Period).?)), + .Period => return field(mod, scope, rl, node.castTag(.Period).?), .Deref => return rlWrap(mod, scope, rl, try deref(mod, scope, node.castTag(.Deref).?)), - .BoolNot => return rlWrap(mod, scope, rl, try boolNot(mod, scope, node.castTag(.BoolNot).?)), .AddressOf => return rlWrap(mod, scope, rl, try addressOf(mod, scope, node.castTag(.AddressOf).?)), .FloatLiteral => return rlWrap(mod, scope, rl, try floatLiteral(mod, scope, node.castTag(.FloatLiteral).?)), .UndefinedLiteral => return rlWrap(mod, scope, rl, try undefLiteral(mod, scope, node.castTag(.UndefinedLiteral).?)), @@ -263,17 +267,17 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr .MultilineStringLiteral => return rlWrap(mod, scope, rl, try multilineStrLiteral(mod, scope, node.castTag(.MultilineStringLiteral).?)), .CharLiteral => return rlWrap(mod, scope, rl, try charLiteral(mod, scope, node.castTag(.CharLiteral).?)), .SliceType => return rlWrap(mod, scope, rl, try sliceType(mod, scope, node.castTag(.SliceType).?)), + .ErrorUnion => return rlWrap(mod, scope, rl, try typeInixOp(mod, scope, node.castTag(.ErrorUnion).?, .error_union_type)), + .MergeErrorSets => return rlWrap(mod, scope, rl, try typeInixOp(mod, scope, node.castTag(.MergeErrorSets).?, .merge_error_sets)), + .AnyFrameType => return rlWrap(mod, scope, rl, try anyFrameType(mod, scope, node.castTag(.AnyFrameType).?)), + .ErrorSetDecl => return errorSetDecl(mod, scope, rl, node.castTag(.ErrorSetDecl).?), + .ErrorType => return rlWrap(mod, scope, rl, try errorType(mod, scope, node.castTag(.ErrorType).?)), .Defer => return mod.failNode(scope, node, "TODO implement astgen.expr for .Defer", .{}), .Catch => return mod.failNode(scope, node, "TODO implement astgen.expr for .Catch", .{}), - .ErrorUnion => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorUnion", .{}), - .MergeErrorSets => return mod.failNode(scope, node, "TODO implement astgen.expr for .MergeErrorSets", .{}), .Range => return mod.failNode(scope, node, "TODO implement astgen.expr for .Range", .{}), .OrElse => return mod.failNode(scope, node, "TODO implement astgen.expr for .OrElse", .{}), .Await => return mod.failNode(scope, node, "TODO implement astgen.expr for .Await", .{}), - .BitNot => return mod.failNode(scope, node, "TODO implement astgen.expr for .BitNot", .{}), - .Negation => return mod.failNode(scope, node, "TODO implement astgen.expr for .Negation", .{}), - .NegationWrap => return mod.failNode(scope, node, "TODO implement astgen.expr for .NegationWrap", .{}), .Resume => return mod.failNode(scope, node, "TODO implement astgen.expr for .Resume", .{}), .Try => return mod.failNode(scope, node, "TODO implement astgen.expr for .Try", .{}), .Slice => return mod.failNode(scope, node, "TODO implement astgen.expr for .Slice", .{}), @@ -287,10 +291,7 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr .Suspend => return mod.failNode(scope, node, "TODO implement astgen.expr for .Suspend", .{}), .Continue => return mod.failNode(scope, node, "TODO implement astgen.expr for .Continue", .{}), .AnyType => return mod.failNode(scope, node, "TODO implement astgen.expr for .AnyType", .{}), - .ErrorType => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorType", .{}), .FnProto => return mod.failNode(scope, node, "TODO implement astgen.expr for .FnProto", .{}), - .AnyFrameType => return mod.failNode(scope, node, "TODO implement astgen.expr for .AnyFrameType", .{}), - .ErrorSetDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorSetDecl", .{}), .ContainerDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ContainerDecl", .{}), .Comptime => return mod.failNode(scope, node, "TODO implement astgen.expr for .Comptime", .{}), .Nosuspend => return mod.failNode(scope, node, "TODO implement astgen.expr for .Nosuspend", .{}), @@ -316,7 +317,7 @@ fn breakExpr(mod: *Module, parent_scope: *Scope, node: *ast.Node.ControlFlowExpr // proper type inference requires peer type resolution on the block's // break operand expressions. const branch_rl: ResultLoc = switch (label.result_loc) { - .discard, .none, .ty, .ptr, .lvalue, .ref => label.result_loc, + .discard, .none, .ty, .ptr, .ref => label.result_loc, .inferred_ptr, .bitcasted_ptr, .block_ptr => .{ .block_ptr = label.block_inst }, }; const operand = try expr(mod, parent_scope, branch_rl, rhs); @@ -458,7 +459,9 @@ fn varDecl( const tree = scope.tree(); const name_src = tree.token_locs[node.name_token].start; const ident_name = try identifierTokenString(mod, scope, node.name_token); - const init_node = node.getTrailer("init_node").?; + const init_node = node.getTrailer("init_node") orelse + return mod.fail(scope, name_src, "variables must be initialized", .{}); + switch (tree.token_ids[node.mut_token]) { .Keyword_const => { // Depending on the type of AST the initialization expression is, we may need an lvalue @@ -521,7 +524,7 @@ fn assign(mod: *Module, scope: *Scope, infix_node: *ast.Node.SimpleInfixOp) Inne return; } } - const lvalue = try expr(mod, scope, .lvalue, infix_node.lhs); + const lvalue = try lvalExpr(mod, scope, infix_node.lhs); _ = try expr(mod, scope, .{ .ptr = lvalue }, infix_node.rhs); } @@ -531,7 +534,7 @@ fn assignOp( infix_node: *ast.Node.SimpleInfixOp, op_inst_tag: zir.Inst.Tag, ) InnerError!void { - const lhs_ptr = try expr(mod, scope, .lvalue, infix_node.lhs); + const lhs_ptr = try lvalExpr(mod, scope, infix_node.lhs); const lhs = try addZIRUnOp(mod, scope, lhs_ptr.src, .deref, lhs_ptr); const lhs_type = try addZIRUnOp(mod, scope, lhs_ptr.src, .typeof, lhs); const rhs = try expr(mod, scope, .{ .ty = lhs_type }, infix_node.rhs); @@ -554,6 +557,26 @@ fn boolNot(mod: *Module, scope: *Scope, node: *ast.Node.SimplePrefixOp) InnerErr return addZIRUnOp(mod, scope, src, .boolnot, operand); } +fn bitNot(mod: *Module, scope: *Scope, node: *ast.Node.SimplePrefixOp) InnerError!*zir.Inst { + const tree = scope.tree(); + const src = tree.token_locs[node.op_token].start; + const operand = try expr(mod, scope, .none, node.rhs); + return addZIRUnOp(mod, scope, src, .bitnot, operand); +} + +fn negation(mod: *Module, scope: *Scope, node: *ast.Node.SimplePrefixOp, op_inst_tag: zir.Inst.Tag) InnerError!*zir.Inst { + const tree = scope.tree(); + const src = tree.token_locs[node.op_token].start; + + const lhs = try addZIRInstConst(mod, scope, src, .{ + .ty = Type.initTag(.comptime_int), + .val = Value.initTag(.zero), + }); + const rhs = try expr(mod, scope, .none, node.rhs); + + return addZIRBinOp(mod, scope, src, op_inst_tag, lhs, rhs); +} + fn addressOf(mod: *Module, scope: *Scope, node: *ast.Node.SimplePrefixOp) InnerError!*zir.Inst { return expr(mod, scope, .ref, node.rhs); } @@ -561,11 +584,7 @@ fn addressOf(mod: *Module, scope: *Scope, node: *ast.Node.SimplePrefixOp) InnerE fn optionalType(mod: *Module, scope: *Scope, node: *ast.Node.SimplePrefixOp) InnerError!*zir.Inst { const tree = scope.tree(); const src = tree.token_locs[node.op_token].start; - const meta_type = try addZIRInstConst(mod, scope, src, .{ - .ty = Type.initTag(.type), - .val = Value.initTag(.type_type), - }); - const operand = try expr(mod, scope, .{ .ty = meta_type }, node.rhs); + const operand = try typeExpr(mod, scope, node.rhs); return addZIRUnOp(mod, scope, src, .optional_type, operand); } @@ -590,18 +609,13 @@ fn ptrType(mod: *Module, scope: *Scope, node: *ast.Node.PtrType) InnerError!*zir } fn ptrSliceType(mod: *Module, scope: *Scope, src: usize, ptr_info: *ast.PtrInfo, rhs: *ast.Node, size: std.builtin.TypeInfo.Pointer.Size) InnerError!*zir.Inst { - const meta_type = try addZIRInstConst(mod, scope, src, .{ - .ty = Type.initTag(.type), - .val = Value.initTag(.type_type), - }); - const simple = ptr_info.allowzero_token == null and ptr_info.align_info == null and ptr_info.volatile_token == null and ptr_info.sentinel == null; if (simple) { - const child_type = try expr(mod, scope, .{ .ty = meta_type }, rhs); + const child_type = try typeExpr(mod, scope, rhs); const mutable = ptr_info.const_token == null; // TODO stage1 type inference bug const T = zir.Inst.Tag; @@ -629,7 +643,7 @@ fn ptrSliceType(mod: *Module, scope: *Scope, src: usize, ptr_info: *ast.PtrInfo, kw_args.sentinel = try expr(mod, scope, .none, some); } - const child_type = try expr(mod, scope, .{ .ty = meta_type }, rhs); + const child_type = try typeExpr(mod, scope, rhs); if (kw_args.sentinel) |some| { kw_args.sentinel = try addZIRBinOp(mod, scope, some.src, .as, child_type, some); } @@ -640,10 +654,6 @@ fn ptrSliceType(mod: *Module, scope: *Scope, src: usize, ptr_info: *ast.PtrInfo, fn arrayType(mod: *Module, scope: *Scope, node: *ast.Node.ArrayType) !*zir.Inst { const tree = scope.tree(); const src = tree.token_locs[node.op_token].start; - const meta_type = try addZIRInstConst(mod, scope, src, .{ - .ty = Type.initTag(.type), - .val = Value.initTag(.type_type), - }); const usize_type = try addZIRInstConst(mod, scope, src, .{ .ty = Type.initTag(.type), .val = Value.initTag(.usize_type), @@ -651,18 +661,14 @@ fn arrayType(mod: *Module, scope: *Scope, node: *ast.Node.ArrayType) !*zir.Inst // TODO check for [_]T const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr); - const child_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs); + const elem_type = try typeExpr(mod, scope, node.rhs); - return addZIRBinOp(mod, scope, src, .array_type, len, child_type); + return addZIRBinOp(mod, scope, src, .array_type, len, elem_type); } fn arrayTypeSentinel(mod: *Module, scope: *Scope, node: *ast.Node.ArrayTypeSentinel) !*zir.Inst { const tree = scope.tree(); const src = tree.token_locs[node.op_token].start; - const meta_type = try addZIRInstConst(mod, scope, src, .{ - .ty = Type.initTag(.type), - .val = Value.initTag(.type_type), - }); const usize_type = try addZIRInstConst(mod, scope, src, .{ .ty = Type.initTag(.type), .val = Value.initTag(.usize_type), @@ -671,7 +677,7 @@ fn arrayTypeSentinel(mod: *Module, scope: *Scope, node: *ast.Node.ArrayTypeSenti // TODO check for [_]T const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr); const sentinel_uncasted = try expr(mod, scope, .none, node.sentinel); - const elem_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs); + const elem_type = try typeExpr(mod, scope, node.rhs); const sentinel = try addZIRBinOp(mod, scope, src, .as, elem_type, sentinel_uncasted); return addZIRInst(mod, scope, src, zir.Inst.ArrayTypeSentinel, .{ @@ -681,6 +687,28 @@ fn arrayTypeSentinel(mod: *Module, scope: *Scope, node: *ast.Node.ArrayTypeSenti }, .{}); } +fn anyFrameType(mod: *Module, scope: *Scope, node: *ast.Node.AnyFrameType) InnerError!*zir.Inst { + const tree = scope.tree(); + const src = tree.token_locs[node.anyframe_token].start; + if (node.result) |some| { + const return_type = try typeExpr(mod, scope, some.return_type); + return addZIRUnOp(mod, scope, src, .anyframe_type, return_type); + } else { + return addZIRInstConst(mod, scope, src, .{ + .ty = Type.initTag(.type), + .val = Value.initTag(.anyframe_type), + }); + } +} + +fn typeInixOp(mod: *Module, scope: *Scope, node: *ast.Node.SimpleInfixOp, op_inst_tag: zir.Inst.Tag) InnerError!*zir.Inst { + const tree = scope.tree(); + const src = tree.token_locs[node.op_token].start; + const error_set = try typeExpr(mod, scope, node.lhs); + const payload = try typeExpr(mod, scope, node.rhs); + return addZIRBinOp(mod, scope, src, op_inst_tag, error_set, payload); +} + fn enumLiteral(mod: *Module, scope: *Scope, node: *ast.Node.EnumLiteral) !*zir.Inst { const tree = scope.tree(); const src = tree.token_locs[node.name].start; @@ -694,10 +722,31 @@ fn unwrapOptional(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node.Si const src = tree.token_locs[node.rtoken].start; const operand = try expr(mod, scope, .ref, node.lhs); - const unwrapped_ptr = try addZIRUnOp(mod, scope, src, .unwrap_optional_safe, operand); - if (rl == .lvalue or rl == .ref) return unwrapped_ptr; + return rlWrapPtr(mod, scope, rl, try addZIRUnOp(mod, scope, src, .unwrap_optional_safe, operand)); +} + +fn errorSetDecl(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node.ErrorSetDecl) InnerError!*zir.Inst { + const tree = scope.tree(); + const src = tree.token_locs[node.error_token].start; + const decls = node.decls(); + const fields = try scope.arena().alloc([]const u8, decls.len); + + for (decls) |decl, i| { + const tag = decl.castTag(.ErrorTag).?; + fields[i] = try identifierTokenString(mod, scope, tag.name_token); + } - return rlWrap(mod, scope, rl, try addZIRUnOp(mod, scope, src, .deref, unwrapped_ptr)); + // analyzing the error set results in a decl ref, so we might need to dereference it + return rlWrapPtr(mod, scope, rl, try addZIRInst(mod, scope, src, zir.Inst.ErrorSet, .{ .fields = fields }, .{})); +} + +fn errorType(mod: *Module, scope: *Scope, node: *ast.Node.OneToken) InnerError!*zir.Inst { + const tree = scope.tree(); + const src = tree.token_locs[node.token].start; + return addZIRInstConst(mod, scope, src, .{ + .ty = Type.initTag(.type), + .val = Value.initTag(.anyerror_type), + }); } /// Return whether the identifier names of two tokens are equal. Resolves @"" tokens without allocating. @@ -737,16 +786,16 @@ pub fn identifierStringInst(mod: *Module, scope: *Scope, node: *ast.Node.OneToke return addZIRInst(mod, scope, src, zir.Inst.Str, .{ .bytes = ident_name }, .{}); } -fn field(mod: *Module, scope: *Scope, node: *ast.Node.SimpleInfixOp) InnerError!*zir.Inst { - // TODO introduce lvalues +fn field(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node.SimpleInfixOp) InnerError!*zir.Inst { const tree = scope.tree(); const src = tree.token_locs[node.op_token].start; - const lhs = try expr(mod, scope, .none, node.lhs); + const lhs = try expr(mod, scope, .ref, node.lhs); const field_name = try identifierStringInst(mod, scope, node.rhs.castTag(.Identifier).?); const pointer = try addZIRInst(mod, scope, src, zir.Inst.FieldPtr, .{ .object_ptr = lhs, .field_name = field_name }, .{}); - return addZIRUnOp(mod, scope, src, .deref, pointer); + if (rl == .ref) return pointer; + return rlWrap(mod, scope, rl, try addZIRUnOp(mod, scope, src, .deref, pointer)); } fn deref(mod: *Module, scope: *Scope, node: *ast.Node.SimpleSuffixOp) InnerError!*zir.Inst { @@ -971,7 +1020,7 @@ fn ifExpr(mod: *Module, scope: *Scope, rl: ResultLoc, if_node: *ast.Node.If) Inn // proper type inference requires peer type resolution on the if's // branches. const branch_rl: ResultLoc = switch (rl) { - .discard, .none, .ty, .ptr, .lvalue, .ref => rl, + .discard, .none, .ty, .ptr, .ref => rl, .inferred_ptr, .bitcasted_ptr, .block_ptr => .{ .block_ptr = block }, }; @@ -1101,7 +1150,7 @@ fn whileExpr(mod: *Module, scope: *Scope, rl: ResultLoc, while_node: *ast.Node.W // proper type inference requires peer type resolution on the while's // branches. const branch_rl: ResultLoc = switch (rl) { - .discard, .none, .ty, .ptr, .lvalue, .ref => rl, + .discard, .none, .ty, .ptr, .ref => rl, .inferred_ptr, .bitcasted_ptr, .block_ptr => .{ .block_ptr = while_block }, }; @@ -1232,12 +1281,7 @@ fn identifier(mod: *Module, scope: *Scope, rl: ResultLoc, ident: *ast.Node.OneTo .local_ptr => { const local_ptr = s.cast(Scope.LocalPtr).?; if (mem.eql(u8, local_ptr.name, ident_name)) { - if (rl == .lvalue or rl == .ref) { - return local_ptr.ptr; - } else { - const result = try addZIRUnOp(mod, scope, src, .deref, local_ptr.ptr); - return rlWrap(mod, scope, rl, result); - } + return rlWrapPtr(mod, scope, rl, local_ptr.ptr); } s = local_ptr.parent; }, @@ -1247,10 +1291,7 @@ fn identifier(mod: *Module, scope: *Scope, rl: ResultLoc, ident: *ast.Node.OneTo } if (mod.lookupDeclName(scope, ident_name)) |decl| { - const result = try addZIRInst(mod, scope, src, zir.Inst.DeclValInModule, .{ .decl = decl }, .{}); - if (rl == .lvalue or rl == .ref) - return result; - return rlWrap(mod, scope, rl, try addZIRUnOp(mod, scope, src, .deref, result)); + return rlWrapPtr(mod, scope, rl, try addZIRInst(mod, scope, src, zir.Inst.DeclValInModule, .{ .decl = decl }, .{})); } return mod.failNode(scope, &ident.base, "use of undeclared identifier '{}'", .{ident_name}); @@ -1466,12 +1507,8 @@ fn simpleCast( try ensureBuiltinParamCount(mod, scope, call, 2); const tree = scope.tree(); const src = tree.token_locs[call.builtin_token].start; - const type_type = try addZIRInstConst(mod, scope, src, .{ - .ty = Type.initTag(.type), - .val = Value.initTag(.type_type), - }); const params = call.params(); - const dest_type = try expr(mod, scope, .{ .ty = type_type }, params[0]); + const dest_type = try typeExpr(mod, scope, params[0]); const rhs = try expr(mod, scope, .none, params[1]); const result = try addZIRBinOp(mod, scope, src, inst_tag, dest_type, rhs); return rlWrap(mod, scope, rl, result); @@ -1498,7 +1535,6 @@ fn as(mod: *Module, scope: *Scope, rl: ResultLoc, call: *ast.Node.BuiltinCall) I _ = try addZIRUnOp(mod, scope, result.src, .ensure_result_non_error, result); return result; }, - .lvalue => unreachable, .ref => { const result = try expr(mod, scope, .{ .ty = dest_type }, params[1]); return addZIRUnOp(mod, scope, result.src, .ref, result); @@ -1533,12 +1569,8 @@ fn bitCast(mod: *Module, scope: *Scope, rl: ResultLoc, call: *ast.Node.BuiltinCa try ensureBuiltinParamCount(mod, scope, call, 2); const tree = scope.tree(); const src = tree.token_locs[call.builtin_token].start; - const type_type = try addZIRInstConst(mod, scope, src, .{ - .ty = Type.initTag(.type), - .val = Value.initTag(.type_type), - }); const params = call.params(); - const dest_type = try expr(mod, scope, .{ .ty = type_type }, params[0]); + const dest_type = try typeExpr(mod, scope, params[0]); switch (rl) { .none => { const operand = try expr(mod, scope, .none, params[1]); @@ -1550,7 +1582,6 @@ fn bitCast(mod: *Module, scope: *Scope, rl: ResultLoc, call: *ast.Node.BuiltinCa _ = try addZIRUnOp(mod, scope, result.src, .ensure_result_non_error, result); return result; }, - .lvalue => unreachable, .ref => { const operand = try expr(mod, scope, .ref, params[1]); const result = try addZIRBinOp(mod, scope, src, .bitcast_ref, dest_type, operand); @@ -1818,7 +1849,7 @@ fn rlWrap(mod: *Module, scope: *Scope, rl: ResultLoc, result: *zir.Inst) InnerEr _ = try addZIRUnOp(mod, scope, result.src, .ensure_result_non_error, result); return result; }, - .lvalue, .ref => { + .ref => { // We need a pointer but we have a value. return addZIRUnOp(mod, scope, result.src, .ref, result); }, @@ -1852,6 +1883,12 @@ fn rlWrapVoid(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node, resul return rlWrap(mod, scope, rl, void_inst); } +fn rlWrapPtr(mod: *Module, scope: *Scope, rl: ResultLoc, ptr: *zir.Inst) InnerError!*zir.Inst { + if (rl == .ref) return ptr; + + return rlWrap(mod, scope, rl, try addZIRUnOp(mod, scope, ptr.src, .deref, ptr)); +} + pub fn addZIRInstSpecial( mod: *Module, scope: *Scope, diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig @@ -14,6 +14,7 @@ const Allocator = mem.Allocator; const trace = @import("tracy.zig").trace; const DW = std.dwarf; const leb128 = std.debug.leb; +const log = std.log.scoped(.codegen); // TODO Turn back on zig fmt when https://github.com/ziglang/zig/issues/5948 is implemented. // zig fmt: off @@ -75,8 +76,8 @@ pub fn generateSymbol( switch (bin_file.options.target.cpu.arch) { .wasm32 => unreachable, // has its own code path .wasm64 => unreachable, // has its own code path - //.arm => return Function(.arm).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), - //.armeb => return Function(.armeb).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), + .arm => return Function(.arm).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), + .armeb => return Function(.armeb).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.aarch64 => return Function(.aarch64).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.aarch64_be => return Function(.aarch64_be).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.aarch64_32 => return Function(.aarch64_32).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), @@ -101,6 +102,7 @@ pub fn generateSymbol( //.sparcv9 => return Function(.sparcv9).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.sparcel => return Function(.sparcel).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.s390x => return Function(.s390x).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), + .spu_2 => return Function(.spu_2).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.tce => return Function(.tce).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.tcele => return Function(.tcele).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), //.thumb => return Function(.thumb).generateSymbol(bin_file, src, typed_value, code, dbg_line, dbg_info, dbg_info_type_relocs), @@ -344,6 +346,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const Branch = struct { inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{}, + /// The key must be canonical register. registers: std.AutoHashMapUnmanaged(Register, RegisterAllocation) = .{}, free_registers: FreeRegInt = math.maxInt(FreeRegInt), @@ -381,9 +384,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.free_registers &= ~(@as(FreeRegInt, 1) << free_index); const reg = callee_preserved_regs[free_index]; self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); + log.debug("alloc {} => {*}", .{reg, inst}); return reg; } + /// Does not track the register. + fn findUnusedReg(self: *Branch) ?Register { + const free_index = @ctz(FreeRegInt, self.free_registers); + if (free_index >= callee_preserved_regs.len) { + return null; + } + return callee_preserved_regs[free_index]; + } + fn deinit(self: *Branch, gpa: *Allocator) void { self.inst_table.deinit(gpa); self.registers.deinit(gpa); @@ -570,8 +583,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; const inst_table = &branch.inst_table; for (body.instructions) |inst| { - const new_inst = try self.genFuncInst(inst); - try inst_table.putNoClobber(self.gpa, inst, new_inst); + const mcv = try self.genFuncInst(inst); + log.debug("{*} => {}", .{inst, mcv}); + // TODO don't put void or dead things in here + try inst_table.putNoClobber(self.gpa, inst, mcv); var i: ir.Inst.DeathsBitIndex = 0; while (inst.getOperand(i)) |operand| : (i += 1) { @@ -714,7 +729,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.allocMem(inst, abi_size, abi_align); } - fn allocRegOrMem(self: *Self, inst: *ir.Inst) !MCValue { + fn allocRegOrMem(self: *Self, inst: *ir.Inst, reg_ok: bool) !MCValue { const elem_ty = inst.ty; const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch { return self.fail(inst.src, "type '{}' too big to fit into stack frame", .{elem_ty}); @@ -724,30 +739,73 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.stack_align = abi_align; const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - if (abi_size <= ptr_bytes) { - try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); - if (branch.allocReg(inst)) |reg| { - return MCValue{ .register = registerAlias(reg, abi_size) }; + if (reg_ok) { + // Make sure the type can fit in a register before we try to allocate one. + const ptr_bits = arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + if (abi_size <= ptr_bytes) { + try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + if (branch.allocReg(inst)) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } } } const stack_offset = try self.allocMem(inst, abi_size, abi_align); return MCValue{ .stack_offset = stack_offset }; } - /// Does not "move" the instruction. - fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { + /// Copies a value to a register without tracking the register. The register is not considered + /// allocated. A second call to `copyToTmpRegister` may return the same register. + /// This can have a side effect of spilling instructions to the stack to free up a register. + fn copyToTmpRegister(self: *Self, src: usize, mcv: MCValue) !Register { + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + + const reg = branch.findUnusedReg() orelse b: { + // We'll take over the first register. Move the instruction that was previously + // there to a stack allocation. + const reg = callee_preserved_regs[0]; + const regs_entry = branch.registers.remove(reg).?; + const spilled_inst = regs_entry.value.inst; + + const stack_mcv = try self.allocRegOrMem(spilled_inst, false); + const inst_entry = branch.inst_table.getEntry(spilled_inst).?; + const reg_mcv = inst_entry.value; + assert(reg == toCanonicalReg(reg_mcv.register)); + inst_entry.value = stack_mcv; + try self.genSetStack(src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv); + + break :b reg; + }; + try self.genSetReg(src, reg, mcv); + return reg; + } + + /// Allocates a new register and copies `mcv` into it. + /// `reg_owner` is the instruction that gets associated with the register in the register table. + /// This can have a side effect of spilling instructions to the stack to free up a register. + fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue { const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); - const reg = branch.allocReg(inst) orelse - return self.fail(inst.src, "TODO implement spilling register to stack", .{}); - const old_mcv = branch.inst_table.get(inst).?; - const new_mcv: MCValue = .{ .register = reg }; - try self.genSetReg(inst.src, reg, old_mcv); - return new_mcv; + const reg = branch.allocReg(reg_owner) orelse b: { + // We'll take over the first register. Move the instruction that was previously + // there to a stack allocation. + const reg = callee_preserved_regs[0]; + const regs_entry = branch.registers.getEntry(reg).?; + const spilled_inst = regs_entry.value.inst; + regs_entry.value = .{ .inst = reg_owner }; + + const stack_mcv = try self.allocRegOrMem(spilled_inst, false); + const inst_entry = branch.inst_table.getEntry(spilled_inst).?; + const reg_mcv = inst_entry.value; + assert(reg == toCanonicalReg(reg_mcv.register)); + inst_entry.value = stack_mcv; + try self.genSetStack(reg_owner.src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv); + + break :b reg; + }; + try self.genSetReg(reg_owner.src, reg, mcv); + return MCValue{ .register = reg }; } fn genAlloc(self: *Self, inst: *ir.Inst.NoOp) !MCValue { @@ -868,13 +926,30 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - fn reuseOperand(inst: *ir.Inst, op_index: ir.Inst.DeathsBitIndex, mcv: MCValue) bool { - if (!inst.operandDies(op_index) or !mcv.isMutable()) + fn reuseOperand(self: *Self, inst: *ir.Inst, op_index: ir.Inst.DeathsBitIndex, mcv: MCValue) bool { + if (!inst.operandDies(op_index)) return false; - // OK we're going to do it, but we need to clear the operand death bit so that - // it stays allocated. + switch (mcv) { + .register => |reg| { + // If it's in the registers table, need to associate the register with the + // new instruction. + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + if (branch.registers.getEntry(toCanonicalReg(reg))) |entry| { + entry.value = .{ .inst = inst }; + } + log.debug("reusing {} => {*}", .{reg, inst}); + }, + .stack_offset => |off| { + log.debug("reusing stack offset {} => {*}", .{off, inst}); + return true; + }, + else => return false, + } + + // Prevent the operand deaths processing code from deallocating it. inst.clearOperandDeath(op_index); + return true; } @@ -887,11 +962,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (inst.base.isUnused() and !is_volatile) return MCValue.dead; const dst_mcv: MCValue = blk: { - if (reuseOperand(&inst.base, 0, ptr)) { + if (self.reuseOperand(&inst.base, 0, ptr)) { // The MCValue that holds the pointer can be re-used as the value. break :blk ptr; } else { - break :blk try self.allocRegOrMem(&inst.base); + break :blk try self.allocRegOrMem(&inst.base, true); } }; switch (ptr) { @@ -985,23 +1060,23 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { var dst_mcv: MCValue = undefined; var src_mcv: MCValue = undefined; var src_inst: *ir.Inst = undefined; - if (reuseOperand(inst, 0, lhs)) { + if (self.reuseOperand(inst, 0, lhs)) { // LHS dies; use it as the destination. // Both operands cannot be memory. src_inst = op_rhs; if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(op_lhs); + dst_mcv = try self.copyToNewRegister(inst, lhs); src_mcv = rhs; } else { dst_mcv = lhs; src_mcv = rhs; } - } else if (reuseOperand(inst, 1, rhs)) { + } else if (self.reuseOperand(inst, 1, rhs)) { // RHS dies; use it as the destination. // Both operands cannot be memory. src_inst = op_lhs; if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(op_rhs); + dst_mcv = try self.copyToNewRegister(inst, rhs); src_mcv = lhs; } else { dst_mcv = rhs; @@ -1009,11 +1084,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } else { if (lhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(op_lhs); + dst_mcv = try self.copyToNewRegister(inst, lhs); src_mcv = rhs; src_inst = op_rhs; } else { - dst_mcv = try self.copyToNewRegister(op_rhs); + dst_mcv = try self.copyToNewRegister(inst, rhs); src_mcv = lhs; src_inst = op_lhs; } @@ -1026,18 +1101,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { switch (src_mcv) { .immediate => |imm| { if (imm > math.maxInt(u31)) { - src_mcv = try self.copyToNewRegister(src_inst); + src_mcv = MCValue{ .register = try self.copyToTmpRegister(src_inst.src, src_mcv) }; } }, else => {}, } - try self.genX8664BinMathCode(inst.src, dst_mcv, src_mcv, opx, mr); + try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, opx, mr); return dst_mcv; } - fn genX8664BinMathCode(self: *Self, src: usize, dst_mcv: MCValue, src_mcv: MCValue, opx: u8, mr: u8) !void { + fn genX8664BinMathCode( + self: *Self, + src: usize, + dst_ty: Type, + dst_mcv: MCValue, + src_mcv: MCValue, + opx: u8, + mr: u8, + ) !void { switch (dst_mcv) { .none => unreachable, .undef => unreachable, @@ -1087,12 +1170,60 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, } }, - .embedded_in_code, .memory, .stack_offset => { + .stack_offset => |off| { + switch (src_mcv) { + .none => unreachable, + .undef => return self.genSetStack(src, dst_ty, off, .undef), + .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |src_reg| { + try self.genX8664ModRMRegToStack(src, dst_ty, off, src_reg, mr + 0x1); + }, + .immediate => |imm| { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP source immediate", .{}); + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP source memory", .{}); + }, + .compare_flags_unsigned => { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{}); + }, + } + }, + .embedded_in_code, .memory => { return self.fail(src, "TODO implement x86 ADD/SUB/CMP destination memory", .{}); }, } } + fn genX8664ModRMRegToStack(self: *Self, src: usize, ty: Type, off: u32, reg: Register, opcode: u8) !void { + const abi_size = ty.abiSize(self.target.*); + const adj_off = off + abi_size; + try self.code.ensureCapacity(self.code.items.len + 7); + self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() }); + const reg_id: u8 = @truncate(u3, reg.id()); + if (adj_off <= 128) { + // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx + const RM = @as(u8, 0b01_000_101) | (reg_id << 3); + const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); + const twos_comp = @bitCast(u8, negative_offset); + self.code.appendSliceAssumeCapacity(&[_]u8{ opcode, RM, twos_comp }); + } else if (adj_off <= 2147483648) { + // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx + const RM = @as(u8, 0b10_000_101) | (reg_id << 3); + const negative_offset = @intCast(i32, -@intCast(i33, adj_off)); + const twos_comp = @bitCast(u32, negative_offset); + self.code.appendSliceAssumeCapacity(&[_]u8{ opcode, RM }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp); + } else { + return self.fail(src, "stack offset too large", .{}); + } + } + fn genArg(self: *Self, inst: *ir.Inst.Arg) !MCValue { if (FreeRegInt == u0) { return self.fail(inst.base.src, "TODO implement Register enum for {}", .{self.target.cpu.arch}); @@ -1109,7 +1240,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const name_with_null = inst.name[0..mem.lenZ(inst.name) + 1]; switch (result) { .register => |reg| { - branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = &inst.base }); + branch.registers.putAssumeCapacityNoClobber(toCanonicalReg(reg), .{ .inst = &inst.base }); branch.markRegUsed(reg); try self.dbg_info.ensureCapacity(self.dbg_info.items.len + 8 + name_with_null.len); @@ -1134,6 +1265,17 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .riscv64 => { mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ebreak.toU32()); }, + .spu_2 => { + try self.code.resize(self.code.items.len + 2); + var instr = Instruction{ .condition = .always, .input0 = .zero, .input1 = .zero, .modify_flags = false, .output = .discard, .command = .undefined1 }; + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 2 ..][0..2], @bitCast(u16, instr)); + }, + .arm => { + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.bkpt(0).toU32()); + }, + .armeb => { + mem.writeIntBig(u32, try self.code.addManyAsArray(4), Instruction.bkpt(0).toU32()); + }, else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.target.cpu.arch}), } return .none; @@ -1219,10 +1361,77 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); } }, + .spu_2 => { + if (inst.func.cast(ir.Inst.Constant)) |func_inst| { + if (info.args.len != 0) { + return self.fail(inst.base.src, "TODO implement call with more than 0 parameters", .{}); + } + if (func_inst.val.cast(Value.Payload.Function)) |func_val| { + const func = func_val.func; + const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; + const got_addr = @intCast(u16, got.p_vaddr + func.owner_decl.link.elf.offset_table_index * 2); + const return_type = func.owner_decl.typed_value.most_recent.typed_value.ty.fnReturnType(); + // First, push the return address, then jump; if noreturn, don't bother with the first step + // TODO: implement packed struct -> u16 at comptime and move the bitcast here + var instr = Instruction{ .condition = .always, .input0 = .immediate, .input1 = .zero, .modify_flags = false, .output = .jump, .command = .load16 }; + if (return_type.zigTypeTag() == .NoReturn) { + try self.code.resize(self.code.items.len + 4); + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 4 ..][0..2], @bitCast(u16, instr)); + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 2 ..][0..2], got_addr); + return MCValue.unreach; + } else { + try self.code.resize(self.code.items.len + 8); + var push = Instruction{ .condition = .always, .input0 = .immediate, .input1 = .zero, .modify_flags = false, .output = .push, .command = .ipget }; + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 8 ..][0..2], @bitCast(u16, push)); + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 6 ..][0..2], @as(u16, 4)); + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 4 ..][0..2], @bitCast(u16, instr)); + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 2 ..][0..2], got_addr); + switch (return_type.zigTypeTag()) { + .Void => return MCValue{ .none = {} }, + .NoReturn => unreachable, + else => return self.fail(inst.base.src, "TODO implement fn call with non-void return value", .{}), + } + } + } else { + return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{}); + } + } else { + return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); + } + }, + .arm => { + if (info.args.len > 0) return self.fail(inst.base.src, "TODO implement fn args for {}", .{self.target.cpu.arch}); + + if (inst.func.cast(ir.Inst.Constant)) |func_inst| { + if (func_inst.val.cast(Value.Payload.Function)) |func_val| { + const func = func_val.func; + const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + const got_addr = @intCast(u32, got.p_vaddr + func.owner_decl.link.elf.offset_table_index * ptr_bytes); + + // TODO only works with leaf functions + // at the moment, which works fine for + // Hello World, but not for real code + // of course. Add pushing lr to stack + // and popping after call + try self.genSetReg(inst.base.src, .lr, .{ .memory = got_addr }); + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.blx(.al, .lr).toU32()); + } else { + return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{}); + } + } else { + return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); + } + }, else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}), } } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO", .{}); + switch (arch) { + .x86_64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for x86_64 arch", .{}), + .aarch64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for aarch64 arch", .{}), + else => unreachable, + } } else { unreachable; } @@ -1275,6 +1484,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .riscv64 => { mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.jalr(.zero, 0, .ra).toU32()); }, + .arm => { + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.bx(.al, .lr).toU32()); + }, else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}), } return .unreach; @@ -1304,13 +1516,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // Either one, but not both, can be a memory operand. // Source operand can be an immediate, 8 bits or 32 bits. const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) - try self.copyToNewRegister(inst.lhs) + try self.copyToNewRegister(&inst.base, lhs) else lhs; // This instruction supports only signed 32-bit immediates at most. const src_mcv = try self.limitImmediateType(inst.rhs, i32); - try self.genX8664BinMathCode(inst.base.src, dst_mcv, src_mcv, 7, 0x38); + try self.genX8664BinMathCode(inst.base.src, inst.base.ty, dst_mcv, src_mcv, 7, 0x38); const info = inst.lhs.ty.intInfo(self.target.*); if (info.signed) { return MCValue{ .compare_flags_signed = op }; @@ -1512,6 +1724,49 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (!inst.is_volatile and inst.base.isUnused()) return MCValue.dead; switch (arch) { + .spu_2 => { + if (inst.inputs.len > 0 or inst.output != null) { + return self.fail(inst.base.src, "TODO implement inline asm inputs / outputs for SPU Mark II", .{}); + } + if (mem.eql(u8, inst.asm_source, "undefined0")) { + try self.code.resize(self.code.items.len + 2); + var instr = Instruction{ .condition = .always, .input0 = .zero, .input1 = .zero, .modify_flags = false, .output = .discard, .command = .undefined0 }; + mem.writeIntLittle(u16, self.code.items[self.code.items.len - 2 ..][0..2], @bitCast(u16, instr)); + return MCValue.none; + } else { + return self.fail(inst.base.src, "TODO implement support for more SPU II assembly instructions", .{}); + } + }, + .arm => { + for (inst.inputs) |input, i| { + if (input.len < 3 or input[0] != '{' or input[input.len - 1] != '}') { + return self.fail(inst.base.src, "unrecognized asm input constraint: '{}'", .{input}); + } + const reg_name = input[1 .. input.len - 1]; + const reg = parseRegName(reg_name) orelse + return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name}); + const arg = try self.resolveInst(inst.args[i]); + try self.genSetReg(inst.base.src, reg, arg); + } + + if (mem.eql(u8, inst.asm_source, "svc #0")) { + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.svc(.al, 0).toU32()); + } else { + return self.fail(inst.base.src, "TODO implement support for more arm assembly instructions", .{}); + } + + if (inst.output) |output| { + if (output.len < 4 or output[0] != '=' or output[1] != '{' or output[output.len - 1] != '}') { + return self.fail(inst.base.src, "unrecognized asm output constraint: '{}'", .{output}); + } + const reg_name = output[2 .. output.len - 1]; + const reg = parseRegName(reg_name) orelse + return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name}); + return MCValue{ .register = reg }; + } else { + return MCValue.none; + } + }, .riscv64 => { for (inst.inputs) |input, i| { if (input.len < 3 or input[0] != '{' or input[input.len - 1] != '}') { @@ -1584,7 +1839,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// resulting REX is meaningful, but will remain the same if it is not. /// * Deliberately inserting a "meaningless REX" requires explicit usage of /// 0x40, and cannot be done via this function. + /// W => 64 bit mode + /// R => extension to the MODRM.reg field + /// X => extension to the SIB.index field + /// B => extension to the MODRM.rm field or the SIB.base field fn rex(self: *Self, arg: struct { b: bool = false, w: bool = false, x: bool = false, r: bool = false }) void { + comptime assert(arch == .x86_64); // From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB. var value: u8 = 0x40; if (arg.b) { @@ -1681,27 +1941,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{}); }, .register => |reg| { - const abi_size = ty.abiSize(self.target.*); - const adj_off = stack_offset + abi_size; - try self.code.ensureCapacity(self.code.items.len + 7); - self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); - const reg_id: u8 = @truncate(u3, reg.id()); - if (adj_off <= 128) { - // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx - const RM = @as(u8, 0b01_000_101) | (reg_id << 3); - const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); - const twos_comp = @bitCast(u8, negative_offset); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp }); - } else if (adj_off <= 2147483648) { - // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx - const RM = @as(u8, 0b10_000_101) | (reg_id << 3); - const negative_offset = @intCast(i32, -@intCast(i33, adj_off)); - const twos_comp = @bitCast(u32, negative_offset); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp); - } else { - return self.fail(src, "stack offset too large", .{}); - } + try self.genX8664ModRMRegToStack(src, ty, stack_offset, reg, 0x89); }, .memory => |vaddr| { return self.fail(src, "TODO implement set stack variable from memory vaddr", .{}); @@ -1709,7 +1949,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .stack_offset => |off| { if (stack_offset == off) return; // Copy stack variable to itself; nothing to do. - return self.fail(src, "TODO implement copy stack variable to stack variable", .{}); + + const reg = try self.copyToTmpRegister(src, mcv); + return self.genSetStack(src, ty, stack_offset, MCValue{ .register = reg }); }, }, else => return self.fail(src, "TODO implement getSetStack for {}", .{self.target.cpu.arch}), @@ -1718,6 +1960,58 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) InnerError!void { switch (arch) { + .arm => switch (mcv) { + .dead => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .unreach, .none => return, // Nothing to do. + .undef => { + if (!self.wantSafety()) + return; // The already existing value will do just fine. + // Write the debug undefined value. + return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa }); + }, + .immediate => |x| { + // TODO better analysis of x to determine the + // least amount of necessary instructions (use + // more intelligent rotating) + if (x <= math.maxInt(u8)) { + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, 0, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32()); + return; + } else if (x <= math.maxInt(u16)) { + // TODO Use movw Note: Not supported on + // all ARM targets! + + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, 0, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32()); + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, 0, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32()); + } else if (x <= math.maxInt(u32)) { + // TODO Use movw and movt Note: Not + // supported on all ARM targets! Also TODO + // write constant to code and load + // relative to pc + + // immediate: 0xaabbccdd + // mov reg, #0xaa + // orr reg, reg, #0xbb, 24 + // orr reg, reg, #0xcc, 16 + // orr reg, reg, #0xdd, 8 + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, 0, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32()); + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, 0, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32()); + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, 0, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32()); + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, 0, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32()); + return; + } else { + return self.fail(src, "ARM registers are 32-bit wide", .{}); + } + }, + .memory => |addr| { + // The value is in memory at a hard-coded address. + // If the type is a pointer, it means the pointer address is at this memory location. + try self.genSetReg(src, reg, .{ .immediate = addr }); + mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.al, reg, reg, Instruction.Offset.none).toU32()); + }, + else => return self.fail(src, "TODO implement getSetReg for arm {}", .{mcv}), + }, .riscv64 => switch (mcv) { .dead => unreachable, .ptr_stack_offset => unreachable, @@ -2027,7 +2321,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, }); if (imm >= math.maxInt(U)) { - return self.copyToNewRegister(inst); + return MCValue{ .register = try self.copyToTmpRegister(inst.src, mcv) }; } }, else => {}, @@ -2150,7 +2444,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { result.stack_byte_count = next_stack_offset; result.stack_align = 16; }, - else => return self.fail(src, "TODO implement function parameters for {}", .{cc}), + else => return self.fail(src, "TODO implement function parameters for {} on x86_64", .{cc}), } }, else => if (param_types.len != 0) @@ -2197,6 +2491,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .i386 => @import("codegen/x86.zig"), .x86_64 => @import("codegen/x86_64.zig"), .riscv64 => @import("codegen/riscv64.zig"), + .spu_2 => @import("codegen/spu-mk2.zig"), + .arm => @import("codegen/arm.zig"), + .armeb => @import("codegen/arm.zig"), else => struct { pub const Register = enum { dummy, diff --git a/src-self-hosted/codegen/arm.zig b/src-self-hosted/codegen/arm.zig @@ -0,0 +1,607 @@ +const std = @import("std"); +const DW = std.dwarf; +const testing = std.testing; + +/// The condition field specifies the flags neccessary for an +/// Instruction to be executed +pub const Condition = enum(u4) { + /// equal + eq, + /// not equal + ne, + /// unsigned higher or same + cs, + /// unsigned lower + cc, + /// negative + mi, + /// positive or zero + pl, + /// overflow + vs, + /// no overflow + vc, + /// unsigned higer + hi, + /// unsigned lower or same + ls, + /// greater or equal + ge, + /// less than + lt, + /// greater than + gt, + /// less than or equal + le, + /// always + al, +}; + +/// Represents a register in the ARM instruction set architecture +pub const Register = enum(u5) { + r0, + r1, + r2, + r3, + r4, + r5, + r6, + r7, + r8, + r9, + r10, + r11, + r12, + r13, + r14, + r15, + + /// Argument / result / scratch register 1 + a1, + /// Argument / result / scratch register 2 + a2, + /// Argument / scratch register 3 + a3, + /// Argument / scratch register 4 + a4, + /// Variable-register 1 + v1, + /// Variable-register 2 + v2, + /// Variable-register 3 + v3, + /// Variable-register 4 + v4, + /// Variable-register 5 + v5, + /// Platform register + v6, + /// Variable-register 7 + v7, + /// Frame pointer or Variable-register 8 + fp, + /// Intra-Procedure-call scratch register + ip, + /// Stack pointer + sp, + /// Link register + lr, + /// Program counter + pc, + + /// Returns the unique 4-bit ID of this register which is used in + /// the machine code + pub fn id(self: Register) u4 { + return @truncate(u4, @enumToInt(self)); + } + + /// Returns the index into `callee_preserved_regs`. + pub fn allocIndex(self: Register) ?u4 { + inline for (callee_preserved_regs) |cpreg, i| { + if (self.id() == cpreg.id()) return i; + } + return null; + } + + pub fn dwarfLocOp(self: Register) u8 { + return @as(u8, self.id()) + DW.OP_reg0; + } +}; + +test "Register.id" { + testing.expectEqual(@as(u4, 15), Register.r15.id()); + testing.expectEqual(@as(u4, 15), Register.pc.id()); +} + +pub const callee_preserved_regs = [_]Register{ .r0, .r1, .r2, .r3, .r4, .r5, .r6, .r7, .r8, .r10 }; +pub const c_abi_int_param_regs = [_]Register{ .r0, .r1, .r2, .r3 }; +pub const c_abi_int_return_regs = [_]Register{ .r0, .r1 }; + +/// Represents an instruction in the ARM instruction set architecture +pub const Instruction = union(enum) { + DataProcessing: packed struct { + // Note to self: The order of the fields top-to-bottom is + // right-to-left in the actual 32-bit int representation + op2: u12, + rd: u4, + rn: u4, + s: u1, + opcode: u4, + i: u1, + fixed: u2 = 0b00, + cond: u4, + }, + SingleDataTransfer: packed struct { + offset: u12, + rd: u4, + rn: u4, + l: u1, + w: u1, + b: u1, + u: u1, + p: u1, + i: u1, + fixed: u2 = 0b01, + cond: u4, + }, + Branch: packed struct { + offset: u24, + link: u1, + fixed: u3 = 0b101, + cond: u4, + }, + BranchExchange: packed struct { + rn: u4, + fixed_1: u1 = 0b1, + link: u1, + fixed_2: u22 = 0b0001_0010_1111_1111_1111_00, + cond: u4, + }, + SupervisorCall: packed struct { + comment: u24, + fixed: u4 = 0b1111, + cond: u4, + }, + Breakpoint: packed struct { + imm4: u4, + fixed_1: u4 = 0b0111, + imm12: u12, + fixed_2_and_cond: u12 = 0b1110_0001_0010, + }, + + /// Represents the possible operations which can be performed by a + /// DataProcessing instruction + const Opcode = enum(u4) { + // Rd := Op1 AND Op2 + @"and", + // Rd := Op1 EOR Op2 + eor, + // Rd := Op1 - Op2 + sub, + // Rd := Op2 - Op1 + rsb, + // Rd := Op1 + Op2 + add, + // Rd := Op1 + Op2 + C + adc, + // Rd := Op1 - Op2 + C - 1 + sbc, + // Rd := Op2 - Op1 + C - 1 + rsc, + // set condition codes on Op1 AND Op2 + tst, + // set condition codes on Op1 EOR Op2 + teq, + // set condition codes on Op1 - Op2 + cmp, + // set condition codes on Op1 + Op2 + cmn, + // Rd := Op1 OR Op2 + orr, + // Rd := Op2 + mov, + // Rd := Op1 AND NOT Op2 + bic, + // Rd := NOT Op2 + mvn, + }; + + /// Represents the second operand to a data processing instruction + /// which can either be content from a register or an immediate + /// value + pub const Operand = union(enum) { + Register: packed struct { + rm: u4, + shift: u8, + }, + Immediate: packed struct { + imm: u8, + rotate: u4, + }, + + /// Represents multiple ways a register can be shifted. A + /// register can be shifted by a specific immediate value or + /// by the contents of another register + pub const Shift = union(enum) { + Immediate: packed struct { + fixed: u1 = 0b0, + typ: u2, + amount: u5, + }, + Register: packed struct { + fixed_1: u1 = 0b1, + typ: u2, + fixed_2: u1 = 0b0, + rs: u4, + }, + + const Type = enum(u2) { + LogicalLeft, + LogicalRight, + ArithmeticRight, + RotateRight, + }; + + const none = Shift{ + .Immediate = .{ + .amount = 0, + .typ = 0, + }, + }; + + pub fn toU8(self: Shift) u8 { + return switch (self) { + .Register => |v| @bitCast(u8, v), + .Immediate => |v| @bitCast(u8, v), + }; + } + + pub fn reg(rs: Register, typ: Type) Shift { + return Shift{ + .Register = .{ + .rs = rs.id(), + .typ = @enumToInt(typ), + }, + }; + } + + pub fn imm(amount: u5, typ: Type) Shift { + return Shift{ + .Immediate = .{ + .amount = amount, + .typ = @enumToInt(typ), + }, + }; + } + }; + + pub fn toU12(self: Operand) u12 { + return switch (self) { + .Register => |v| @bitCast(u12, v), + .Immediate => |v| @bitCast(u12, v), + }; + } + + pub fn reg(rm: Register, shift: Shift) Operand { + return Operand{ + .Register = .{ + .rm = rm.id(), + .shift = shift.toU8(), + }, + }; + } + + pub fn imm(immediate: u8, rotate: u4) Operand { + return Operand{ + .Immediate = .{ + .imm = immediate, + .rotate = rotate, + }, + }; + } + }; + + /// Represents the offset operand of a load or store + /// instruction. Data can be loaded from memory with either an + /// immediate offset or an offset that is stored in some register. + pub const Offset = union(enum) { + Immediate: u12, + Register: packed struct { + rm: u4, + shift: u8, + }, + + pub const none = Offset{ + .Immediate = 0, + }; + + pub fn toU12(self: Offset) u12 { + return switch (self) { + .Register => |v| @bitCast(u12, v), + .Immediate => |v| v, + }; + } + + pub fn reg(rm: Register, shift: u8) Offset { + return Offset{ + .Register = .{ + .rm = rm.id(), + .shift = shift, + }, + }; + } + + pub fn imm(immediate: u8) Offset { + return Offset{ + .Immediate = immediate, + }; + } + }; + + pub fn toU32(self: Instruction) u32 { + return switch (self) { + .DataProcessing => |v| @bitCast(u32, v), + .SingleDataTransfer => |v| @bitCast(u32, v), + .Branch => |v| @bitCast(u32, v), + .BranchExchange => |v| @bitCast(u32, v), + .SupervisorCall => |v| @bitCast(u32, v), + .Breakpoint => |v| @intCast(u32, v.imm4) | (@intCast(u32, v.fixed_1) << 4) | (@intCast(u32, v.imm12) << 8) | (@intCast(u32, v.fixed_2_and_cond) << 20), + }; + } + + // Helper functions for the "real" functions below + + fn dataProcessing( + cond: Condition, + opcode: Opcode, + s: u1, + rd: Register, + rn: Register, + op2: Operand, + ) Instruction { + return Instruction{ + .DataProcessing = .{ + .cond = @enumToInt(cond), + .i = if (op2 == .Immediate) 1 else 0, + .opcode = @enumToInt(opcode), + .s = s, + .rn = rn.id(), + .rd = rd.id(), + .op2 = op2.toU12(), + }, + }; + } + + fn singleDataTransfer( + cond: Condition, + rd: Register, + rn: Register, + offset: Offset, + pre_post: u1, + up_down: u1, + byte_word: u1, + writeback: u1, + load_store: u1, + ) Instruction { + return Instruction{ + .SingleDataTransfer = .{ + .cond = @enumToInt(cond), + .rn = rn.id(), + .rd = rd.id(), + .offset = offset.toU12(), + .l = load_store, + .w = writeback, + .b = byte_word, + .u = up_down, + .p = pre_post, + .i = if (offset == .Immediate) 0 else 1, + }, + }; + } + + fn branch(cond: Condition, offset: i24, link: u1) Instruction { + return Instruction{ + .Branch = .{ + .cond = @enumToInt(cond), + .link = link, + .offset = @bitCast(u24, offset), + }, + }; + } + + fn branchExchange(cond: Condition, rn: Register, link: u1) Instruction { + return Instruction{ + .BranchExchange = .{ + .cond = @enumToInt(cond), + .link = link, + .rn = rn.id(), + }, + }; + } + + fn supervisorCall(cond: Condition, comment: u24) Instruction { + return Instruction{ + .SupervisorCall = .{ + .cond = @enumToInt(cond), + .comment = comment, + }, + }; + } + + fn breakpoint(imm: u16) Instruction { + return Instruction{ + .Breakpoint = .{ + .imm12 = @truncate(u12, imm >> 4), + .imm4 = @truncate(u4, imm), + }, + }; + } + + // Public functions replicating assembler syntax as closely as + // possible + + // Data processing + + pub fn @"and"(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .@"and", s, rd, rn, op2); + } + + pub fn eor(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .eor, s, rd, rn, op2); + } + + pub fn sub(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .sub, s, rd, rn, op2); + } + + pub fn rsb(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .rsb, s, rd, rn, op2); + } + + pub fn add(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .add, s, rd, rn, op2); + } + + pub fn adc(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .adc, s, rd, rn, op2); + } + + pub fn sbc(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .sbc, s, rd, rn, op2); + } + + pub fn rsc(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .rsc, s, rd, rn, op2); + } + + pub fn tst(cond: Condition, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .tst, 1, .r0, rn, op2); + } + + pub fn teq(cond: Condition, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .teq, 1, .r0, rn, op2); + } + + pub fn cmp(cond: Condition, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .cmp, 1, .r0, rn, op2); + } + + pub fn cmn(cond: Condition, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .cmn, 1, .r0, rn, op2); + } + + pub fn orr(cond: Condition, s: u1, rd: Register, rn: Register, op2: Operand) Instruction { + return dataProcessing(cond, .orr, s, rd, rn, op2); + } + + pub fn mov(cond: Condition, s: u1, rd: Register, op2: Operand) Instruction { + return dataProcessing(cond, .mov, s, rd, .r0, op2); + } + + pub fn bic(cond: Condition, s: u1, rd: Register, op2: Operand) Instruction { + return dataProcessing(cond, .bic, s, rd, rn, op2); + } + + pub fn mvn(cond: Condition, s: u1, rd: Register, op2: Operand) Instruction { + return dataProcessing(cond, .mvn, s, rd, .r0, op2); + } + + // Single data transfer + + pub fn ldr(cond: Condition, rd: Register, rn: Register, offset: Offset) Instruction { + return singleDataTransfer(cond, rd, rn, offset, 1, 1, 0, 0, 1); + } + + pub fn str(cond: Condition, rd: Register, rn: Register, offset: Offset) Instruction { + return singleDataTransfer(cond, rd, rn, offset, 1, 1, 0, 0, 0); + } + + // Branch + + pub fn b(cond: Condition, offset: i24) Instruction { + return branch(cond, offset, 0); + } + + pub fn bl(cond: Condition, offset: i24) Instruction { + return branch(cond, offset, 1); + } + + // Branch and exchange + + pub fn bx(cond: Condition, rn: Register) Instruction { + return branchExchange(cond, rn, 0); + } + + pub fn blx(cond: Condition, rn: Register) Instruction { + return branchExchange(cond, rn, 1); + } + + // Supervisor Call + + pub const swi = svc; + + pub fn svc(cond: Condition, comment: u24) Instruction { + return supervisorCall(cond, comment); + } + + // Breakpoint + + pub fn bkpt(imm: u16) Instruction { + return breakpoint(imm); + } +}; + +test "serialize instructions" { + const Testcase = struct { + inst: Instruction, + expected: u32, + }; + + const testcases = [_]Testcase{ + .{ // add r0, r0, r0 + .inst = Instruction.add(.al, 0, .r0, .r0, Instruction.Operand.reg(.r0, Instruction.Operand.Shift.none)), + .expected = 0b1110_00_0_0100_0_0000_0000_00000000_0000, + }, + .{ // mov r4, r2 + .inst = Instruction.mov(.al, 0, .r4, Instruction.Operand.reg(.r2, Instruction.Operand.Shift.none)), + .expected = 0b1110_00_0_1101_0_0000_0100_00000000_0010, + }, + .{ // mov r0, #42 + .inst = Instruction.mov(.al, 0, .r0, Instruction.Operand.imm(42, 0)), + .expected = 0b1110_00_1_1101_0_0000_0000_0000_00101010, + }, + .{ // ldr r0, [r2, #42] + .inst = Instruction.ldr(.al, .r0, .r2, Instruction.Offset.imm(42)), + .expected = 0b1110_01_0_1_1_0_0_1_0010_0000_000000101010, + }, + .{ // str r0, [r3] + .inst = Instruction.str(.al, .r0, .r3, Instruction.Offset.none), + .expected = 0b1110_01_0_1_1_0_0_0_0011_0000_000000000000, + }, + .{ // b #12 + .inst = Instruction.b(.al, 12), + .expected = 0b1110_101_0_0000_0000_0000_0000_0000_1100, + }, + .{ // bl #-4 + .inst = Instruction.bl(.al, -4), + .expected = 0b1110_101_1_1111_1111_1111_1111_1111_1100, + }, + .{ // bx lr + .inst = Instruction.bx(.al, .lr), + .expected = 0b1110_0001_0010_1111_1111_1111_0001_1110, + }, + .{ // svc #0 + .inst = Instruction.svc(.al, 0), + .expected = 0b1110_1111_0000_0000_0000_0000_0000_0000, + }, + .{ // bkpt #42 + .inst = Instruction.bkpt(42), + .expected = 0b1110_0001_0010_000000000010_0111_1010, + }, + }; + + for (testcases) |case| { + const actual = case.inst.toU32(); + testing.expectEqual(case.expected, actual); + } +} diff --git a/src-self-hosted/codegen/spu-mk2.zig b/src-self-hosted/codegen/spu-mk2.zig @@ -0,0 +1,170 @@ +const std = @import("std"); + +pub const Interpreter = @import("spu-mk2/interpreter.zig").Interpreter; + +pub const ExecutionCondition = enum(u3) { + always = 0, + when_zero = 1, + not_zero = 2, + greater_zero = 3, + less_than_zero = 4, + greater_or_equal_zero = 5, + less_or_equal_zero = 6, + overflow = 7, +}; + +pub const InputBehaviour = enum(u2) { + zero = 0, + immediate = 1, + peek = 2, + pop = 3, +}; + +pub const OutputBehaviour = enum(u2) { + discard = 0, + push = 1, + jump = 2, + jump_relative = 3, +}; + +pub const Command = enum(u5) { + copy = 0, + ipget = 1, + get = 2, + set = 3, + store8 = 4, + store16 = 5, + load8 = 6, + load16 = 7, + undefined0 = 8, + undefined1 = 9, + frget = 10, + frset = 11, + bpget = 12, + bpset = 13, + spget = 14, + spset = 15, + add = 16, + sub = 17, + mul = 18, + div = 19, + mod = 20, + @"and" = 21, + @"or" = 22, + xor = 23, + not = 24, + signext = 25, + rol = 26, + ror = 27, + bswap = 28, + asr = 29, + lsl = 30, + lsr = 31, +}; + +pub const Instruction = packed struct { + condition: ExecutionCondition, + input0: InputBehaviour, + input1: InputBehaviour, + modify_flags: bool, + output: OutputBehaviour, + command: Command, + reserved: u1 = 0, + + pub fn format(instr: Instruction, comptime fmt: []const u8, options: std.fmt.FormatOptions, out: anytype) !void { + try std.fmt.format(out, "0x{x:0<4} ", .{@bitCast(u16, instr)}); + try out.writeAll(switch (instr.condition) { + .always => " ", + .when_zero => "== 0", + .not_zero => "!= 0", + .greater_zero => " > 0", + .less_than_zero => " < 0", + .greater_or_equal_zero => ">= 0", + .less_or_equal_zero => "<= 0", + .overflow => "ovfl", + }); + try out.writeAll(" "); + try out.writeAll(switch (instr.input0) { + .zero => "zero", + .immediate => "imm ", + .peek => "peek", + .pop => "pop ", + }); + try out.writeAll(" "); + try out.writeAll(switch (instr.input1) { + .zero => "zero", + .immediate => "imm ", + .peek => "peek", + .pop => "pop ", + }); + try out.writeAll(" "); + try out.writeAll(switch (instr.command) { + .copy => "copy ", + .ipget => "ipget ", + .get => "get ", + .set => "set ", + .store8 => "store8 ", + .store16 => "store16 ", + .load8 => "load8 ", + .load16 => "load16 ", + .undefined0 => "undefined", + .undefined1 => "undefined", + .frget => "frget ", + .frset => "frset ", + .bpget => "bpget ", + .bpset => "bpset ", + .spget => "spget ", + .spset => "spset ", + .add => "add ", + .sub => "sub ", + .mul => "mul ", + .div => "div ", + .mod => "mod ", + .@"and" => "and ", + .@"or" => "or ", + .xor => "xor ", + .not => "not ", + .signext => "signext ", + .rol => "rol ", + .ror => "ror ", + .bswap => "bswap ", + .asr => "asr ", + .lsl => "lsl ", + .lsr => "lsr ", + }); + try out.writeAll(" "); + try out.writeAll(switch (instr.output) { + .discard => "discard", + .push => "push ", + .jump => "jmp ", + .jump_relative => "rjmp ", + }); + try out.writeAll(" "); + try out.writeAll(if (instr.modify_flags) + "+ flags" + else + " "); + } +}; + +pub const FlagRegister = packed struct { + zero: bool, + negative: bool, + carry: bool, + carry_enabled: bool, + interrupt0_enabled: bool, + interrupt1_enabled: bool, + interrupt2_enabled: bool, + interrupt3_enabled: bool, + reserved: u8 = 0, +}; + +pub const Register = enum { + dummy, + + pub fn allocIndex(self: Register) ?u4 { + return null; + } +}; + +pub const callee_preserved_regs = [_]Register{}; diff --git a/src-self-hosted/codegen/spu-mk2/interpreter.zig b/src-self-hosted/codegen/spu-mk2/interpreter.zig @@ -0,0 +1,166 @@ +const std = @import("std"); +const log = std.log.scoped(.SPU_2_Interpreter); +const spu = @import("../spu-mk2.zig"); +const FlagRegister = spu.FlagRegister; +const Instruction = spu.Instruction; +const ExecutionCondition = spu.ExecutionCondition; + +pub fn Interpreter(comptime Bus: type) type { + return struct { + ip: u16 = 0, + sp: u16 = undefined, + bp: u16 = undefined, + fr: FlagRegister = @bitCast(FlagRegister, @as(u16, 0)), + /// This is set to true when we hit an undefined0 instruction, allowing it to + /// be used as a trap for testing purposes + undefined0: bool = false, + /// This is set to true when we hit an undefined1 instruction, allowing it to + /// be used as a trap for testing purposes. undefined1 is used as a breakpoint. + undefined1: bool = false, + bus: Bus, + + pub fn ExecuteBlock(self: *@This(), comptime size: ?u32) !void { + var count: usize = 0; + while (size == null or count < size.?) { + count += 1; + var instruction = @bitCast(Instruction, self.bus.read16(self.ip)); + + log.debug("Executing {}\n", .{instruction}); + + self.ip +%= 2; + + const execute = switch (instruction.condition) { + .always => true, + .not_zero => !self.fr.zero, + .when_zero => self.fr.zero, + .overflow => self.fr.carry, + ExecutionCondition.greater_or_equal_zero => !self.fr.negative, + else => return error.Unimplemented, + }; + + if (execute) { + const val0 = switch (instruction.input0) { + .zero => @as(u16, 0), + .immediate => i: { + const val = self.bus.read16(@intCast(u16, self.ip)); + self.ip +%= 2; + break :i val; + }, + else => |e| e: { + // peek or pop; show value at current SP, and if pop, increment sp + const val = self.bus.read16(self.sp); + if (e == .pop) { + self.sp +%= 2; + } + break :e val; + }, + }; + const val1 = switch (instruction.input1) { + .zero => @as(u16, 0), + .immediate => i: { + const val = self.bus.read16(@intCast(u16, self.ip)); + self.ip +%= 2; + break :i val; + }, + else => |e| e: { + // peek or pop; show value at current SP, and if pop, increment sp + const val = self.bus.read16(self.sp); + if (e == .pop) { + self.sp +%= 2; + } + break :e val; + }, + }; + + const output: u16 = switch (instruction.command) { + .get => self.bus.read16(self.bp +% (2 *% val0)), + .set => a: { + self.bus.write16(self.bp +% 2 *% val0, val1); + break :a val1; + }, + .load8 => self.bus.read8(val0), + .load16 => self.bus.read16(val0), + .store8 => a: { + const val = @truncate(u8, val1); + self.bus.write8(val0, val); + break :a val; + }, + .store16 => a: { + self.bus.write16(val0, val1); + break :a val1; + }, + .copy => val0, + .add => a: { + var val: u16 = undefined; + self.fr.carry = @addWithOverflow(u16, val0, val1, &val); + break :a val; + }, + .sub => a: { + var val: u16 = undefined; + self.fr.carry = @subWithOverflow(u16, val0, val1, &val); + break :a val; + }, + .spset => a: { + self.sp = val0; + break :a val0; + }, + .bpset => a: { + self.bp = val0; + break :a val0; + }, + .frset => a: { + const val = (@bitCast(u16, self.fr) & val1) | (val0 & ~val1); + self.fr = @bitCast(FlagRegister, val); + break :a val; + }, + .bswap => (val0 >> 8) | (val0 << 8), + .bpget => self.bp, + .spget => self.sp, + .ipget => self.ip +% (2 *% val0), + .lsl => val0 << 1, + .lsr => val0 >> 1, + .@"and" => val0 & val1, + .@"or" => val0 | val1, + .xor => val0 ^ val1, + .not => ~val0, + .undefined0 => { + self.undefined0 = true; + // Break out of the loop, and let the caller decide what to do + return; + }, + .undefined1 => { + self.undefined1 = true; + // Break out of the loop, and let the caller decide what to do + return; + }, + .signext => if ((val0 & 0x80) != 0) + (val0 & 0xFF) | 0xFF00 + else + (val0 & 0xFF), + else => return error.Unimplemented, + }; + + switch (instruction.output) { + .discard => {}, + .push => { + self.sp -%= 2; + self.bus.write16(self.sp, output); + }, + .jump => { + self.ip = output; + }, + else => return error.Unimplemented, + } + if (instruction.modify_flags) { + self.fr.negative = (output & 0x8000) != 0; + self.fr.zero = (output == 0x0000); + } + } else { + if (instruction.input0 == .immediate) self.ip +%= 2; + if (instruction.input1 == .immediate) self.ip +%= 2; + break; + } + } + } + }; +} diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig @@ -5,6 +5,9 @@ const fs = std.fs; const trace = @import("tracy.zig").trace; const Package = @import("Package.zig"); const Type = @import("type.zig").Type; +const build_options = @import("build_options"); + +pub const producer_string = if (std.builtin.is_test) "zig test" else "zig " ++ build_options.version; pub const Options = struct { target: std.Target, @@ -20,6 +23,7 @@ pub const Options = struct { /// Used for calculating how much space to reserve for executable program code in case /// the binary file deos not already have such a section. program_code_size_hint: u64 = 256 * 1024, + entry_addr: ?u64 = null, }; pub const File = struct { diff --git a/src-self-hosted/link/Elf.zig b/src-self-hosted/link/Elf.zig @@ -14,12 +14,10 @@ const leb128 = std.debug.leb; const Package = @import("../Package.zig"); const Value = @import("../value.zig").Value; const Type = @import("../type.zig").Type; -const build_options = @import("build_options"); const link = @import("../link.zig"); const File = link.File; const Elf = @This(); -const producer_string = if (std.builtin.is_test) "zig test" else "zig " ++ build_options.version; const default_entry_addr = 0x8000000; // TODO Turn back on zig fmt when https://github.com/ziglang/zig/issues/5948 is implemented. @@ -249,8 +247,8 @@ fn openFile(allocator: *Allocator, file: fs.File, options: link.Options) !Elf { .allocator = allocator, }, .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, + 0 ... 32 => .p32, + 33 ... 64 => .p64, else => return error.UnsupportedELFArchitecture, }, }; @@ -278,8 +276,8 @@ fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !Elf .file = file, }, .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { - 32 => .p32, - 64 => .p64, + 0 ... 32 => .p32, + 33 ... 64 => .p64, else => return error.UnsupportedELFArchitecture, }, .shdr_table_dirty = true, @@ -346,7 +344,7 @@ fn getDebugLineProgramEnd(self: Elf) u32 { /// Returns end pos of collision, if any. fn detectAllocCollision(self: *Elf, start: u64, size: u64) ?u64 { - const small_ptr = self.base.options.target.cpu.arch.ptrBitWidth() == 32; + const small_ptr = self.ptr_width == .p32; const ehdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Ehdr) else @sizeOf(elf.Elf64_Ehdr); if (start < ehdr_size) return ehdr_size; @@ -462,12 +460,13 @@ pub fn populateMissingMetadata(self: *Elf) !void { const p_align = 0x1000; const off = self.findFreeSpace(file_size, p_align); log.debug("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); + const entry_addr: u64 = self.entry_addr orelse if (self.base.options.target.cpu.arch == .spu_2) @as(u64, 0) else default_entry_addr; try self.program_headers.append(self.base.allocator, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, - .p_vaddr = default_entry_addr, - .p_paddr = default_entry_addr, + .p_vaddr = entry_addr, + .p_paddr = entry_addr, .p_memsz = file_size, .p_align = p_align, .p_flags = elf.PF_X | elf.PF_R, @@ -486,13 +485,13 @@ pub fn populateMissingMetadata(self: *Elf) !void { // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at. // we'll need to re-use that function anyway, in case the GOT grows and overlaps something // else in virtual memory. - const default_got_addr = if (ptr_size == 2) @as(u32, 0x8000) else 0x4000000; + const got_addr: u32 = if (self.base.options.target.cpu.arch.ptrBitWidth() >= 32) 0x4000000 else 0x8000; try self.program_headers.append(self.base.allocator, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, - .p_vaddr = default_got_addr, - .p_paddr = default_got_addr, + .p_vaddr = got_addr, + .p_paddr = got_addr, .p_memsz = file_size, .p_align = p_align, .p_flags = elf.PF_R, @@ -863,7 +862,7 @@ pub fn flush(self: *Elf, module: *Module) !void { // Write the form for the compile unit, which must match the abbrev table above. const name_strp = try self.makeDebugString(self.base.options.root_pkg.root_src_path); const comp_dir_strp = try self.makeDebugString(self.base.options.root_pkg.root_src_dir_path); - const producer_strp = try self.makeDebugString(producer_string); + const producer_strp = try self.makeDebugString(link.producer_string); // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. const text_phdr = &self.program_headers.items[self.phdr_load_re_index.?]; @@ -1349,6 +1348,7 @@ fn freeTextBlock(self: *Elf, text_block: *TextBlock) void { var already_have_free_list_node = false; { var i: usize = 0; + // TODO turn text_block_free_list into a hash map while (i < self.text_block_free_list.items.len) { if (self.text_block_free_list.items[i] == text_block) { _ = self.text_block_free_list.swapRemove(i); @@ -1360,11 +1360,19 @@ fn freeTextBlock(self: *Elf, text_block: *TextBlock) void { i += 1; } } + // TODO process free list for dbg info just like we do above for vaddrs if (self.last_text_block == text_block) { // TODO shrink the .text section size here self.last_text_block = text_block.prev; } + if (self.dbg_info_decl_first == text_block) { + self.dbg_info_decl_first = text_block.dbg_info_next; + } + if (self.dbg_info_decl_last == text_block) { + // TODO shrink the .debug_info section size here + self.dbg_info_decl_last = text_block.dbg_info_prev; + } if (text_block.prev) |prev| { prev.next = text_block.next; @@ -1383,6 +1391,20 @@ fn freeTextBlock(self: *Elf, text_block: *TextBlock) void { } else { text_block.next = null; } + + if (text_block.dbg_info_prev) |prev| { + prev.dbg_info_next = text_block.dbg_info_next; + + // TODO the free list logic like we do for text blocks above + } else { + text_block.dbg_info_prev = null; + } + + if (text_block.dbg_info_next) |next| { + next.dbg_info_prev = text_block.dbg_info_prev; + } else { + text_block.dbg_info_next = null; + } } fn shrinkTextBlock(self: *Elf, text_block: *TextBlock, new_block_size: u64) void { @@ -1584,10 +1606,10 @@ pub fn freeDecl(self: *Elf, decl: *Module.Decl) void { next.prev = null; } if (self.dbg_line_fn_first == &decl.fn_link.elf) { - self.dbg_line_fn_first = null; + self.dbg_line_fn_first = decl.fn_link.elf.next; } if (self.dbg_line_fn_last == &decl.fn_link.elf) { - self.dbg_line_fn_last = null; + self.dbg_line_fn_last = decl.fn_link.elf.prev; } } @@ -2151,29 +2173,28 @@ pub fn deleteExport(self: *Elf, exp: Export) void { fn writeProgHeader(self: *Elf, index: usize) !void { const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const offset = self.program_headers.items[index].p_offset; - switch (self.base.options.target.cpu.arch.ptrBitWidth()) { - 32 => { + switch (self.ptr_width) { + .p32 => { var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])}; if (foreign_endian) { bswapAllFields(elf.Elf32_Phdr, &phdr[0]); } return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, - 64 => { + .p64 => { var phdr = [1]elf.Elf64_Phdr{self.program_headers.items[index]}; if (foreign_endian) { bswapAllFields(elf.Elf64_Phdr, &phdr[0]); } return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, - else => return error.UnsupportedArchitecture, } } fn writeSectHeader(self: *Elf, index: usize) !void { const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); - switch (self.base.options.target.cpu.arch.ptrBitWidth()) { - 32 => { + switch (self.ptr_width) { + .p32 => { var shdr: [1]elf.Elf32_Shdr = undefined; shdr[0] = sectHeaderTo32(self.sections.items[index]); if (foreign_endian) { @@ -2182,7 +2203,7 @@ fn writeSectHeader(self: *Elf, index: usize) !void { const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf32_Shdr); return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); }, - 64 => { + .p64 => { var shdr = [1]elf.Elf64_Shdr{self.sections.items[index]}; if (foreign_endian) { bswapAllFields(elf.Elf64_Shdr, &shdr[0]); @@ -2190,14 +2211,13 @@ fn writeSectHeader(self: *Elf, index: usize) !void { const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf64_Shdr); return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); }, - else => return error.UnsupportedArchitecture, } } fn writeOffsetTableEntry(self: *Elf, index: usize) !void { const shdr = &self.sections.items[self.got_section_index.?]; const phdr = &self.program_headers.items[self.phdr_got_index.?]; - const entry_size: u16 = self.ptrWidthBytes(); + const entry_size: u16 = self.archPtrWidthBytes(); if (self.offset_table_count_dirty) { // TODO Also detect virtual address collisions. const allocated_size = self.allocatedSize(shdr.sh_offset); @@ -2221,17 +2241,23 @@ fn writeOffsetTableEntry(self: *Elf, index: usize) !void { } const endian = self.base.options.target.cpu.arch.endian(); const off = shdr.sh_offset + @as(u64, entry_size) * index; - switch (self.ptr_width) { - .p32 => { + switch (entry_size) { + 2 => { + var buf: [2]u8 = undefined; + mem.writeInt(u16, &buf, @intCast(u16, self.offset_table.items[index]), endian); + try self.base.file.?.pwriteAll(&buf, off); + }, + 4 => { var buf: [4]u8 = undefined; mem.writeInt(u32, &buf, @intCast(u32, self.offset_table.items[index]), endian); try self.base.file.?.pwriteAll(&buf, off); }, - .p64 => { + 8 => { var buf: [8]u8 = undefined; mem.writeInt(u64, &buf, self.offset_table.items[index], endian); try self.base.file.?.pwriteAll(&buf, off); }, + else => unreachable, } } @@ -2344,6 +2370,7 @@ fn writeAllGlobalSymbols(self: *Elf) !void { } } +/// Always 4 or 8 depending on whether this is 32-bit ELF or 64-bit ELF. fn ptrWidthBytes(self: Elf) u8 { return switch (self.ptr_width) { .p32 => 4, @@ -2351,6 +2378,12 @@ fn ptrWidthBytes(self: Elf) u8 { }; } +/// Does not necessarily match `ptrWidthBytes` for example can be 2 bytes +/// in a 32-bit ELF file. +fn archPtrWidthBytes(self: Elf) u8 { + return @intCast(u8, self.base.options.target.cpu.arch.ptrBitWidth() / 8); +} + /// The reloc offset for the virtual address of a function in its Line Number Program. /// Size is a virtual address integer. const dbg_line_vaddr_reloc_index = 3; diff --git a/src-self-hosted/link/MachO.zig b/src-self-hosted/link/MachO.zig @@ -6,29 +6,66 @@ const assert = std.debug.assert; const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; +const codegen = @import("../codegen.zig"); const math = std.math; const mem = std.mem; +const trace = @import("../tracy.zig").trace; +const Type = @import("../type.zig").Type; const Module = @import("../Module.zig"); const link = @import("../link.zig"); const File = link.File; +const is_darwin = std.Target.current.os.tag.isDarwin(); + pub const base_tag: File.Tag = File.Tag.macho; base: File, -/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. -/// Same order as in the file. -segment_cmds: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){}, +/// List of all load command headers that are in the file. +/// We use it to track number and size of all commands needed by the header. +commands: std.ArrayListUnmanaged(macho.load_command) = std.ArrayListUnmanaged(macho.load_command){}, +command_file_offset: ?u64 = null, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. +segments: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){}, sections: std.ArrayListUnmanaged(macho.section_64) = std.ArrayListUnmanaged(macho.section_64){}, +segment_table_offset: ?u64 = null, +/// Entry point load command +entry_point_cmd: ?macho.entry_point_command = null, entry_addr: ?u64 = null, +/// Default VM start address set at 4GB +vm_start_address: u64 = 0x100000000, + +seg_table_dirty: bool = false, + error_flags: File.ErrorFlags = File.ErrorFlags{}, +/// TODO ultimately this will be propagated down from main() and set (in this form or another) +/// when user links against system lib. +link_against_system: bool = false, + +/// `alloc_num / alloc_den` is the factor of padding when allocating. +const alloc_num = 4; +const alloc_den = 3; + +/// Default path to dyld +/// TODO instead of hardcoding it, we should probably look through some env vars and search paths +/// instead but this will do for now. +const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; + +/// Default lib search path +/// TODO instead of hardcoding it, we should probably look through some env vars and search paths +/// instead but this will do for now. +const DEFAULT_LIB_SEARCH_PATH: []const u8 = "/usr/lib"; + +const LIB_SYSTEM_NAME: [*:0]const u8 = "System"; +/// TODO we should search for libSystem and fail if it doesn't exist, instead of hardcoding it +const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B.dylib"; + pub const TextBlock = struct { pub const empty = TextBlock{}; }; @@ -80,12 +117,6 @@ fn openFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO /// Truncates the existing file contents and overwrites the contents. /// Returns an error if `file` is not already open with +read +write +seek abilities. fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO { - switch (options.output_mode) { - .Exe => {}, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - var self: MachO = .{ .base = .{ .file = file, @@ -96,31 +127,35 @@ fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !Mach }; errdefer self.deinit(); - if (options.output_mode == .Exe) { - // The first segment command for executables is always a __PAGEZERO segment. - try self.segment_cmds.append(allocator, .{ - .cmd = macho.LC_SEGMENT_64, - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = self.makeString("__PAGEZERO"), - .vmaddr = 0, - .vmsize = 0, - .fileoff = 0, - .filesize = 0, - .maxprot = 0, - .initprot = 0, - .nsects = 0, - .flags = 0, - }); + switch (options.output_mode) { + .Exe => { + // The first segment command for executables is always a __PAGEZERO segment. + const pagezero = .{ + .cmd = macho.LC_SEGMENT_64, + .cmdsize = commandSize(@sizeOf(macho.segment_command_64)), + .segname = makeString("__PAGEZERO"), + .vmaddr = 0, + .vmsize = self.vm_start_address, + .fileoff = 0, + .filesize = 0, + .maxprot = 0, + .initprot = 0, + .nsects = 0, + .flags = 0, + }; + try self.commands.append(allocator, .{ + .cmd = pagezero.cmd, + .cmdsize = pagezero.cmdsize, + }); + try self.segments.append(allocator, pagezero); + }, + .Obj => return error.TODOImplementWritingObjFiles, + .Lib => return error.TODOImplementWritingLibFiles, } - return self; -} + try self.populateMissingMetadata(); -fn makeString(self: *MachO, comptime bytes: []const u8) [16]u8 { - var buf: [16]u8 = undefined; - if (bytes.len > buf.len) @compileError("MachO segment/section name too long"); - mem.copy(u8, buf[0..], bytes); - return buf; + return self; } fn writeMachOHeader(self: *MachO) !void { @@ -156,10 +191,14 @@ fn writeMachOHeader(self: *MachO) !void { }; hdr.filetype = filetype; - // TODO consider other commands - const ncmds = try math.cast(u32, self.segment_cmds.items.len); + const ncmds = try math.cast(u32, self.commands.items.len); hdr.ncmds = ncmds; - hdr.sizeofcmds = ncmds * @sizeOf(macho.segment_command_64); + + var sizeof_cmds: u32 = 0; + for (self.commands.items) |cmd| { + sizeof_cmds += cmd.cmdsize; + } + hdr.sizeofcmds = sizeof_cmds; // TODO should these be set to something else? hdr.flags = 0; @@ -169,18 +208,90 @@ fn writeMachOHeader(self: *MachO) !void { } pub fn flush(self: *MachO, module: *Module) !void { - // TODO implement flush + // Save segments first { - const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segment_cmds.items.len); + const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segments.items.len); defer self.base.allocator.free(buf); + self.command_file_offset = @sizeOf(macho.mach_header_64); + for (buf) |*seg, i| { - seg.* = self.segment_cmds.items[i]; + seg.* = self.segments.items[i]; + self.command_file_offset.? += self.segments.items[i].cmdsize; } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), @sizeOf(macho.mach_header_64)); } + switch (self.base.options.output_mode) { + .Exe => { + if (self.link_against_system) { + if (is_darwin) { + { + // Specify path to dynamic linker dyld + const cmdsize = commandSize(@intCast(u32, @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH))); + const load_dylinker = [1]macho.dylinker_command{ + .{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }, + }; + try self.commands.append(self.base.allocator, .{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + }); + + try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylinker[0..1]), self.command_file_offset.?); + + const file_offset = self.command_file_offset.? + @sizeOf(macho.dylinker_command); + try self.addPadding(cmdsize - @sizeOf(macho.dylinker_command), file_offset); + + try self.base.file.?.pwriteAll(mem.spanZ(DEFAULT_DYLD_PATH), file_offset); + self.command_file_offset.? += cmdsize; + } + + { + // Link against libSystem + const cmdsize = commandSize(@intCast(u32, @sizeOf(macho.dylib_command) + mem.lenZ(LIB_SYSTEM_PATH))); + // According to Apple's manual, we should obtain current libSystem version using libc call + // NSVersionOfRunTimeLibrary. + const version = std.c.NSVersionOfRunTimeLibrary(LIB_SYSTEM_NAME); + const dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = 2, // not sure why not simply 0; this is reverse engineered from Mach-O files + .current_version = version, + .compatibility_version = 0x10000, // not sure why this either; value from reverse engineering + }; + const load_dylib = [1]macho.dylib_command{ + .{ + .cmd = macho.LC_LOAD_DYLIB, + .cmdsize = cmdsize, + .dylib = dylib, + }, + }; + try self.commands.append(self.base.allocator, .{ + .cmd = macho.LC_LOAD_DYLIB, + .cmdsize = cmdsize, + }); + + try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylib[0..1]), self.command_file_offset.?); + + const file_offset = self.command_file_offset.? + @sizeOf(macho.dylib_command); + try self.addPadding(cmdsize - @sizeOf(macho.dylib_command), file_offset); + + try self.base.file.?.pwriteAll(mem.spanZ(LIB_SYSTEM_PATH), file_offset); + self.command_file_offset.? += cmdsize; + } + } else { + @panic("linking against libSystem on non-native target is unsupported"); + } + } + }, + .Obj => return error.TODOImplementWritingObjFiles, + .Lib => return error.TODOImplementWritingLibFiles, + } + if (self.entry_addr == null and self.base.options.output_mode == .Exe) { log.debug("flushing. no_entry_point_found = true\n", .{}); self.error_flags.no_entry_point_found = true; @@ -192,7 +303,8 @@ pub fn flush(self: *MachO, module: *Module) !void { } pub fn deinit(self: *MachO) void { - self.segment_cmds.deinit(self.base.allocator); + self.commands.deinit(self.base.allocator); + self.segments.deinit(self.base.allocator); self.sections.deinit(self.base.allocator); } @@ -214,3 +326,30 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {} pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { @panic("TODO implement getDeclVAddr for MachO"); } + +pub fn populateMissingMetadata(self: *MachO) !void {} + +fn makeString(comptime bytes: []const u8) [16]u8 { + var buf: [16]u8 = undefined; + if (bytes.len > buf.len) @compileError("MachO segment/section name too long"); + mem.copy(u8, buf[0..], bytes); + return buf; +} + +fn commandSize(min_size: u32) u32 { + if (min_size % @sizeOf(u64) == 0) return min_size; + + const div = min_size / @sizeOf(u64); + return (div + 1) * @sizeOf(u64); +} + +fn addPadding(self: *MachO, size: u32, file_offset: u64) !void { + if (size == 0) return; + + const buf = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buf); + + mem.set(u8, buf[0..], 0); + + try self.base.file.?.pwriteAll(buf, file_offset); +} diff --git a/src-self-hosted/test.zig b/src-self-hosted/test.zig @@ -583,7 +583,10 @@ pub const TestContext = struct { switch (case.target.getExternalExecutor()) { .native => try argv.append(exe_path), - .unavailable => return, // No executor available; pass test. + .unavailable => { + try self.runInterpreterIfAvailable(allocator, &exec_node, case, tmp.dir, bin_name); + return; // Pass test. + }, .qemu => |qemu_bin_name| if (enable_qemu) { // TODO Ability for test cases to specify whether to link libc. @@ -635,7 +638,6 @@ pub const TestContext = struct { var test_node = update_node.start("test", null); test_node.activate(); defer test_node.end(); - defer allocator.free(exec_result.stdout); defer allocator.free(exec_result.stderr); switch (exec_result.term) { @@ -657,4 +659,115 @@ pub const TestContext = struct { } } } + + fn runInterpreterIfAvailable( + self: *TestContext, + gpa: *Allocator, + node: *std.Progress.Node, + case: Case, + tmp_dir: std.fs.Dir, + bin_name: []const u8, + ) !void { + const arch = case.target.cpu_arch orelse return; + switch (arch) { + .spu_2 => return self.runSpu2Interpreter(gpa, node, case, tmp_dir, bin_name), + else => return, + } + } + + fn runSpu2Interpreter( + self: *TestContext, + gpa: *Allocator, + update_node: *std.Progress.Node, + case: Case, + tmp_dir: std.fs.Dir, + bin_name: []const u8, + ) !void { + const spu = @import("codegen/spu-mk2.zig"); + if (case.target.os_tag) |os| { + if (os != .freestanding) { + std.debug.panic("Only freestanding makes sense for SPU-II tests!", .{}); + } + } else { + std.debug.panic("SPU_2 has no native OS, check the test!", .{}); + } + + var interpreter = spu.Interpreter(struct { + RAM: [0x10000]u8 = undefined, + + pub fn read8(bus: @This(), addr: u16) u8 { + return bus.RAM[addr]; + } + pub fn read16(bus: @This(), addr: u16) u16 { + return std.mem.readIntLittle(u16, bus.RAM[addr..][0..2]); + } + + pub fn write8(bus: *@This(), addr: u16, val: u8) void { + bus.RAM[addr] = val; + } + + pub fn write16(bus: *@This(), addr: u16, val: u16) void { + std.mem.writeIntLittle(u16, bus.RAM[addr..][0..2], val); + } + }){ + .bus = .{}, + }; + + { + var load_node = update_node.start("load", null); + load_node.activate(); + defer load_node.end(); + + var file = try tmp_dir.openFile(bin_name, .{ .read = true }); + defer file.close(); + + const header = try std.elf.readHeader(file); + var iterator = header.program_header_iterator(file); + + var none_loaded = true; + + while (try iterator.next()) |phdr| { + if (phdr.p_type != std.elf.PT_LOAD) { + std.debug.print("Encountered unexpected ELF program header: type {}\n", .{phdr.p_type}); + std.process.exit(1); + } + if (phdr.p_paddr != phdr.p_vaddr) { + std.debug.print("Physical address does not match virtual address in ELF header!\n", .{}); + std.process.exit(1); + } + if (phdr.p_filesz != phdr.p_memsz) { + std.debug.print("Physical size does not match virtual size in ELF header!\n", .{}); + std.process.exit(1); + } + if ((try file.pread(interpreter.bus.RAM[phdr.p_paddr .. phdr.p_paddr + phdr.p_filesz], phdr.p_offset)) != phdr.p_filesz) { + std.debug.print("Read less than expected from ELF file!", .{}); + std.process.exit(1); + } + std.log.scoped(.spu2_test).debug("Loaded 0x{x} bytes to 0x{x:0<4}\n", .{ phdr.p_filesz, phdr.p_paddr }); + none_loaded = false; + } + if (none_loaded) { + std.debug.print("No data found in ELF file!\n", .{}); + std.process.exit(1); + } + } + + var exec_node = update_node.start("execute", null); + exec_node.activate(); + defer exec_node.end(); + + var blocks: u16 = 1000; + const block_size = 1000; + while (!interpreter.undefined0) { + const pre_ip = interpreter.ip; + if (blocks > 0) { + blocks -= 1; + try interpreter.ExecuteBlock(block_size); + if (pre_ip == interpreter.ip) { + std.debug.print("Infinite loop detected in SPU II test!\n", .{}); + std.process.exit(1); + } + } + } + } }; diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig @@ -3,6 +3,7 @@ const Value = @import("value.zig").Value; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const Target = std.Target; +const Module = @import("Module.zig"); /// This is the raw data, with no bookkeeping, no memory awareness, no de-duplication. /// It's important for this type to be small. @@ -52,7 +53,7 @@ pub const Type = extern union { .bool => return .Bool, .void => return .Void, .type => return .Type, - .anyerror => return .ErrorSet, + .error_set, .error_set_single, .anyerror => return .ErrorSet, .comptime_int => return .ComptimeInt, .comptime_float => return .ComptimeFloat, .noreturn => return .NoReturn, @@ -84,6 +85,10 @@ pub const Type = extern union { .optional_single_mut_pointer, => return .Optional, .enum_literal => return .EnumLiteral, + + .anyerror_void_error_union, .error_union => return .ErrorUnion, + + .anyframe_T, .@"anyframe" => return .AnyFrame, } } @@ -151,6 +156,9 @@ pub const Type = extern union { .ComptimeInt => return true, .Undefined => return true, .Null => return true, + .AnyFrame => { + return a.elemType().eql(b.elemType()); + }, .Pointer => { // Hot path for common case: if (a.castPointer()) |a_payload| { @@ -225,7 +233,6 @@ pub const Type = extern union { .BoundFn, .Opaque, .Frame, - .AnyFrame, .Vector, => std.debug.panic("TODO implement Type equality comparison of {} and {}", .{ a, b }), } @@ -343,6 +350,8 @@ pub const Type = extern union { .single_const_pointer_to_comptime_int, .const_slice_u8, .enum_literal, + .anyerror_void_error_union, + .@"anyframe", => unreachable, .array_u8_sentinel_0 => return self.copyPayloadShallow(allocator, Payload.Array_u8_Sentinel0), @@ -397,6 +406,7 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, => return self.copyPayloadSingleField(allocator, Payload.PointerSimple, "pointee_type"), + .anyframe_T => return self.copyPayloadSingleField(allocator, Payload.AnyFrame, "return_type"), .pointer => { const payload = @fieldParentPtr(Payload.Pointer, "base", self.ptr_otherwise); @@ -416,6 +426,19 @@ pub const Type = extern union { }; return Type{ .ptr_otherwise = &new_payload.base }; }, + .error_union => { + const payload = @fieldParentPtr(Payload.ErrorUnion, "base", self.ptr_otherwise); + const new_payload = try allocator.create(Payload.ErrorUnion); + new_payload.* = .{ + .base = payload.base, + + .error_set = try payload.error_set.copy(allocator), + .payload = try payload.payload.copy(allocator), + }; + return Type{ .ptr_otherwise = &new_payload.base }; + }, + .error_set => return self.copyPayloadShallow(allocator, Payload.ErrorSet), + .error_set_single => return self.copyPayloadShallow(allocator, Payload.ErrorSetSingle), } } @@ -482,6 +505,8 @@ pub const Type = extern union { .@"null" => return out_stream.writeAll("@TypeOf(null)"), .@"undefined" => return out_stream.writeAll("@TypeOf(undefined)"), + .@"anyframe" => return out_stream.writeAll("anyframe"), + .anyerror_void_error_union => return out_stream.writeAll("anyerror!void"), .const_slice_u8 => return out_stream.writeAll("[]const u8"), .fn_noreturn_no_args => return out_stream.writeAll("fn() noreturn"), .fn_void_no_args => return out_stream.writeAll("fn() void"), @@ -500,6 +525,12 @@ pub const Type = extern union { continue; }, + .anyframe_T => { + const payload = @fieldParentPtr(Payload.AnyFrame, "base", ty.ptr_otherwise); + try out_stream.print("anyframe->", .{}); + ty = payload.return_type; + continue; + }, .array_u8 => { const payload = @fieldParentPtr(Payload.Array_u8, "base", ty.ptr_otherwise); return out_stream.print("[{}]u8", .{payload.len}); @@ -622,6 +653,21 @@ pub const Type = extern union { ty = payload.pointee_type; continue; }, + .error_union => { + const payload = @fieldParentPtr(Payload.ErrorUnion, "base", ty.ptr_otherwise); + try payload.error_set.format("", .{}, out_stream); + try out_stream.writeAll("!"); + ty = payload.payload; + continue; + }, + .error_set => { + const payload = @fieldParentPtr(Payload.ErrorSet, "base", ty.ptr_otherwise); + return out_stream.writeAll(std.mem.spanZ(payload.decl.name)); + }, + .error_set_single => { + const payload = @fieldParentPtr(Payload.ErrorSetSingle, "base", ty.ptr_otherwise); + return out_stream.print("error{{{}}}", .{payload.name}); + }, } unreachable; } @@ -715,6 +761,11 @@ pub const Type = extern union { .optional, .optional_single_mut_pointer, .optional_single_const_pointer, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => true, // TODO lazy types .array => self.elemType().hasCodeGenBits() and self.arrayLen() != 0, @@ -723,6 +774,11 @@ pub const Type = extern union { .int_signed => self.cast(Payload.IntSigned).?.bits == 0, .int_unsigned => self.cast(Payload.IntUnsigned).?.bits == 0, + .error_union => { + const payload = self.cast(Payload.ErrorUnion).?; + return payload.error_set.hasCodeGenBits() or payload.payload.hasCodeGenBits(); + }, + .c_void, .void, .type, @@ -756,6 +812,7 @@ pub const Type = extern union { .fn_ccc_void_no_args, // represents machine code; not a pointer .function, // represents machine code; not a pointer => return switch (target.cpu.arch) { + .arm => 4, .riscv64 => 2, else => 1, }, @@ -778,6 +835,8 @@ pub const Type = extern union { .mut_slice, .optional_single_const_pointer, .optional_single_mut_pointer, + .@"anyframe", + .anyframe_T, => return @divExact(target.cpu.arch.ptrBitWidth(), 8), .pointer => { @@ -802,7 +861,11 @@ pub const Type = extern union { .f128 => return 16, .c_longdouble => return 16, - .anyerror => return 2, // TODO revisit this when we have the concept of the error tag type + .error_set, + .error_set_single, + .anyerror_void_error_union, + .anyerror, + => return 2, // TODO revisit this when we have the concept of the error tag type .array, .array_sentinel => return self.elemType().abiAlignment(target), @@ -828,6 +891,16 @@ pub const Type = extern union { return child_type.abiAlignment(target); }, + .error_union => { + const payload = self.cast(Payload.ErrorUnion).?; + if (!payload.error_set.hasCodeGenBits()) { + return payload.payload.abiAlignment(target); + } else if (!payload.payload.hasCodeGenBits()) { + return payload.error_set.abiAlignment(target); + } + @panic("TODO abiAlignment error union"); + }, + .c_void, .void, .type, @@ -881,12 +954,15 @@ pub const Type = extern union { .i32, .u32 => return 4, .i64, .u64 => return 8, - .isize, .usize => return @divExact(target.cpu.arch.ptrBitWidth(), 8), + .@"anyframe", .anyframe_T, .isize, .usize => return @divExact(target.cpu.arch.ptrBitWidth(), 8), .const_slice, .mut_slice, - .const_slice_u8, - => return @divExact(target.cpu.arch.ptrBitWidth(), 8) * 2, + => { + if (self.elemType().hasCodeGenBits()) return @divExact(target.cpu.arch.ptrBitWidth(), 8) * 2; + return @divExact(target.cpu.arch.ptrBitWidth(), 8); + }, + .const_slice_u8 => return @divExact(target.cpu.arch.ptrBitWidth(), 8) * 2, .optional_single_const_pointer, .optional_single_mut_pointer, @@ -922,7 +998,11 @@ pub const Type = extern union { .f128 => return 16, .c_longdouble => return 16, - .anyerror => return 2, // TODO revisit this when we have the concept of the error tag type + .error_set, + .error_set_single, + .anyerror_void_error_union, + .anyerror, + => return 2, // TODO revisit this when we have the concept of the error tag type .int_signed, .int_unsigned => { const bits: u16 = if (self.cast(Payload.IntSigned)) |pl| @@ -949,6 +1029,18 @@ pub const Type = extern union { // to the child type's ABI alignment. return child_type.abiAlignment(target) + child_type.abiSize(target); }, + + .error_union => { + const payload = self.cast(Payload.ErrorUnion).?; + if (!payload.error_set.hasCodeGenBits() and !payload.payload.hasCodeGenBits()) { + return 0; + } else if (!payload.error_set.hasCodeGenBits()) { + return payload.payload.abiSize(target); + } else if (!payload.payload.hasCodeGenBits()) { + return payload.error_set.abiSize(target); + } + @panic("TODO abiSize error union"); + }, }; } @@ -1009,6 +1101,12 @@ pub const Type = extern union { .c_mut_pointer, .const_slice, .mut_slice, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, .single_const_pointer, @@ -1077,6 +1175,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, .const_slice, @@ -1142,6 +1246,12 @@ pub const Type = extern union { .optional_single_const_pointer, .enum_literal, .mut_slice, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, .single_const_pointer, @@ -1216,6 +1326,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, .pointer => { @@ -1327,6 +1443,12 @@ pub const Type = extern union { .optional_single_const_pointer, .optional_single_mut_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, .array => self.cast(Payload.Array).?.elem_type, @@ -1448,6 +1570,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, .array => self.cast(Payload.Array).?.len, @@ -1515,6 +1643,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, .array, .array_u8 => return null, @@ -1580,6 +1714,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, .int_signed, @@ -1648,6 +1788,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, .int_unsigned, @@ -1706,6 +1852,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, .int_unsigned => .{ .signed = false, .bits = self.cast(Payload.IntUnsigned).?.bits }, @@ -1782,6 +1934,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, .usize, @@ -1887,6 +2045,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, }; } @@ -1958,6 +2122,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, } } @@ -2028,6 +2198,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, } } @@ -2098,6 +2274,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, }; } @@ -2165,6 +2347,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, }; } @@ -2232,6 +2420,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => unreachable, }; } @@ -2299,6 +2493,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => false, }; } @@ -2350,6 +2550,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .anyerror_void_error_union, + .anyframe_T, + .@"anyframe", + .error_union, + .error_set, + .error_set_single, => return null, .void => return Value.initTag(.void_value), @@ -2453,6 +2659,12 @@ pub const Type = extern union { .optional_single_mut_pointer, .optional_single_const_pointer, .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, => return false, .c_const_pointer, @@ -2510,6 +2722,8 @@ pub const Type = extern union { fn_naked_noreturn_no_args, fn_ccc_void_no_args, single_const_pointer_to_comptime_int, + anyerror_void_error_union, + @"anyframe", const_slice_u8, // See last_no_payload_tag below. // After this, the tag requires a payload. @@ -2532,6 +2746,10 @@ pub const Type = extern union { optional, optional_single_mut_pointer, optional_single_const_pointer, + error_union, + anyframe_T, + error_set, + error_set_single, pub const last_no_payload_tag = Tag.const_slice_u8; pub const no_payload_count = @enumToInt(last_no_payload_tag) + 1; @@ -2613,6 +2831,32 @@ pub const Type = extern union { @"volatile": bool, size: std.builtin.TypeInfo.Pointer.Size, }; + + pub const ErrorUnion = struct { + base: Payload = .{ .tag = .error_union }, + + error_set: Type, + payload: Type, + }; + + pub const AnyFrame = struct { + base: Payload = .{ .tag = .anyframe_T }, + + return_type: Type, + }; + + pub const ErrorSet = struct { + base: Payload = .{ .tag = .error_set }, + + decl: *Module.Decl, + }; + + pub const ErrorSetSingle = struct { + base: Payload = .{ .tag = .error_set_single }, + + /// memory is owned by `Module` + name: []const u8, + }; }; }; diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig @@ -61,6 +61,7 @@ pub const Value = extern union { single_const_pointer_to_comptime_int_type, const_slice_u8_type, enum_literal_type, + anyframe_type, undef, zero, @@ -90,6 +91,8 @@ pub const Value = extern union { float_64, float_128, enum_literal, + error_set, + @"error", pub const last_no_payload_tag = Tag.bool_false; pub const no_payload_count = @enumToInt(last_no_payload_tag) + 1; @@ -168,6 +171,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .undef, .zero, .void_value, @@ -241,6 +245,10 @@ pub const Value = extern union { }; return Value{ .ptr_otherwise = &new_payload.base }; }, + .@"error" => return self.copyPayloadShallow(allocator, Payload.Error), + + // memory is managed by the declaration + .error_set => return self.copyPayloadShallow(allocator, Payload.ErrorSet), } } @@ -300,6 +308,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type => return out_stream.writeAll("*const comptime_int"), .const_slice_u8_type => return out_stream.writeAll("[]const u8"), .enum_literal_type => return out_stream.writeAll("@TypeOf(.EnumLiteral)"), + .anyframe_type => return out_stream.writeAll("anyframe"), .null_value => return out_stream.writeAll("null"), .undef => return out_stream.writeAll("undefined"), @@ -343,6 +352,15 @@ pub const Value = extern union { .float_32 => return out_stream.print("{}", .{val.cast(Payload.Float_32).?.val}), .float_64 => return out_stream.print("{}", .{val.cast(Payload.Float_64).?.val}), .float_128 => return out_stream.print("{}", .{val.cast(Payload.Float_128).?.val}), + .error_set => { + const error_set = val.cast(Payload.ErrorSet).?; + try out_stream.writeAll("error{"); + for (error_set.fields.items()) |entry| { + try out_stream.print("{},", .{entry.value}); + } + return out_stream.writeAll("}"); + }, + .@"error" => return out_stream.print("error.{}", .{val.cast(Payload.Error).?.name}), }; } @@ -363,11 +381,9 @@ pub const Value = extern union { } /// Asserts that the value is representable as a type. - pub fn toType(self: Value) Type { + pub fn toType(self: Value, allocator: *Allocator) !Type { return switch (self.tag()) { .ty => self.cast(Payload.Ty).?.ty, - .int_type => @panic("TODO int type to type"), - .u8_type => Type.initTag(.u8), .i8_type => Type.initTag(.i8), .u16_type => Type.initTag(.u16), @@ -408,6 +424,26 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type => Type.initTag(.single_const_pointer_to_comptime_int), .const_slice_u8_type => Type.initTag(.const_slice_u8), .enum_literal_type => Type.initTag(.enum_literal), + .anyframe_type => Type.initTag(.@"anyframe"), + + .int_type => { + const payload = self.cast(Payload.IntType).?; + if (payload.signed) { + const new = try allocator.create(Type.Payload.IntSigned); + new.* = .{ .bits = payload.bits }; + return Type.initPayload(&new.base); + } else { + const new = try allocator.create(Type.Payload.IntUnsigned); + new.* = .{ .bits = payload.bits }; + return Type.initPayload(&new.base); + } + }, + .error_set => { + const payload = self.cast(Payload.ErrorSet).?; + const new = try allocator.create(Type.Payload.ErrorSet); + new.* = .{ .decl = payload.decl }; + return Type.initPayload(&new.base); + }, .undef, .zero, @@ -433,6 +469,7 @@ pub const Value = extern union { .float_64, .float_128, .enum_literal, + .@"error", => unreachable, }; } @@ -482,6 +519,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .null_value, .function, .variable, @@ -498,6 +536,8 @@ pub const Value = extern union { .unreachable_value, .empty_array, .enum_literal, + .error_set, + .@"error", => unreachable, .undef => unreachable, @@ -560,6 +600,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .null_value, .function, .variable, @@ -576,6 +617,8 @@ pub const Value = extern union { .unreachable_value, .empty_array, .enum_literal, + .error_set, + .@"error", => unreachable, .undef => unreachable, @@ -638,6 +681,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .null_value, .function, .variable, @@ -654,6 +698,8 @@ pub const Value = extern union { .unreachable_value, .empty_array, .enum_literal, + .error_set, + .@"error", => unreachable, .undef => unreachable, @@ -742,6 +788,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .null_value, .function, .variable, @@ -759,6 +806,8 @@ pub const Value = extern union { .unreachable_value, .empty_array, .enum_literal, + .error_set, + .@"error", => unreachable, .zero, @@ -825,6 +874,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .null_value, .function, .variable, @@ -841,6 +891,8 @@ pub const Value = extern union { .unreachable_value, .empty_array, .enum_literal, + .error_set, + .@"error", => unreachable, .zero, @@ -988,6 +1040,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .bool_true, .bool_false, .null_value, @@ -1007,6 +1060,8 @@ pub const Value = extern union { .void_value, .unreachable_value, .enum_literal, + .error_set, + .@"error", => unreachable, .zero => false, @@ -1063,6 +1118,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .null_value, .function, .variable, @@ -1076,6 +1132,8 @@ pub const Value = extern union { .unreachable_value, .empty_array, .enum_literal, + .error_set, + .@"error", => unreachable, .zero, @@ -1197,6 +1255,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .zero, .bool_true, .bool_false, @@ -1218,6 +1277,8 @@ pub const Value = extern union { .unreachable_value, .empty_array, .enum_literal, + .error_set, + .@"error", => unreachable, .ref_val => self.cast(Payload.RefVal).?.val, @@ -1276,6 +1337,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .zero, .bool_true, .bool_false, @@ -1297,6 +1359,8 @@ pub const Value = extern union { .void_value, .unreachable_value, .enum_literal, + .error_set, + .@"error", => unreachable, .empty_array => unreachable, // out of bounds array index @@ -1372,6 +1436,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .enum_literal_type, + .anyframe_type, .zero, .empty_array, .bool_true, @@ -1393,6 +1458,8 @@ pub const Value = extern union { .float_128, .void_value, .enum_literal, + .error_set, + .@"error", => false, .undef => unreachable, @@ -1522,6 +1589,24 @@ pub const Value = extern union { base: Payload = .{ .tag = .float_128 }, val: f128, }; + + pub const ErrorSet = struct { + base: Payload = .{ .tag = .error_set }, + + // TODO revisit this when we have the concept of the error tag type + fields: std.StringHashMapUnmanaged(u16), + decl: *Module.Decl, + }; + + pub const Error = struct { + base: Payload = .{ .tag = .@"error" }, + + // TODO revisit this when we have the concept of the error tag type + /// `name` is owned by `Module` and will be valid for the entire + /// duration of the compilation. + name: []const u8, + value: u16, + }; }; /// Big enough to fit any non-BigInt value diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig @@ -43,6 +43,8 @@ pub const Inst = struct { alloc, /// Same as `alloc` except the type is inferred. alloc_inferred, + /// Create an `anyframe->T`. + anyframe_type, /// Array concatenation. `a ++ b` array_cat, /// Array multiplication `a ** b` @@ -70,6 +72,8 @@ pub const Inst = struct { /// A typed result location pointer is bitcasted to a new result location pointer. /// The new result location pointer has an inferred type. bitcast_result_ptr, + /// Bitwise NOT. `~` + bitnot, /// Bitwise OR. `|` bitor, /// A labeled block of code, which can return a value. @@ -133,6 +137,10 @@ pub const Inst = struct { ensure_result_used, /// Emits a compile error if an error is ignored. ensure_result_non_error, + /// Create a `E!T` type. + error_union_type, + /// Create an error set. + error_set, /// Export the provided Decl as the provided name in the compilation's output object file. @"export", /// Given a pointer to a struct or object that contains virtual fields, returns a pointer @@ -160,6 +168,8 @@ pub const Inst = struct { /// A labeled block of code that loops forever. At the end of the body it is implied /// to repeat; no explicit "repeat" instruction terminates loop bodies. loop, + /// Merge two error sets into one, `E1 || E2`. + merge_error_sets, /// Ambiguously remainder division or modulus. If the computation would possibly have /// a different value depending on whether the operation is remainder division or modulus, /// a compile error is emitted. Otherwise the computation is performed. @@ -286,6 +296,8 @@ pub const Inst = struct { .unwrap_err_safe, .unwrap_err_unsafe, .ensure_err_payload_void, + .anyframe_type, + .bitnot, => UnOp, .add, @@ -316,6 +328,8 @@ pub const Inst = struct { .bitcast, .coerce_result_ptr, .xor, + .error_union_type, + .merge_error_sets, => BinOp, .arg => Arg, @@ -347,6 +361,7 @@ pub const Inst = struct { .condbr => CondBr, .ptr_type => PtrType, .enum_literal => EnumLiteral, + .error_set => ErrorSet, }; } @@ -438,6 +453,11 @@ pub const Inst = struct { .ptr_type, .ensure_err_payload_void, .enum_literal, + .merge_error_sets, + .anyframe_type, + .error_union_type, + .bitnot, + .error_set, => false, .@"break", @@ -908,6 +928,16 @@ pub const Inst = struct { }, kw_args: struct {}, }; + + pub const ErrorSet = struct { + pub const base_tag = Tag.error_set; + base: Inst, + + positionals: struct { + fields: [][]const u8, + }, + kw_args: struct {}, + }; }; pub const ErrorMsg = struct { @@ -1142,6 +1172,16 @@ const Writer = struct { const name = self.loop_table.get(param).?; return std.zig.renderStringLiteral(name, stream); }, + [][]const u8 => { + try stream.writeByte('['); + for (param) |str, i| { + if (i != 0) { + try stream.writeAll(", "); + } + try std.zig.renderStringLiteral(str, stream); + } + try stream.writeByte(']'); + }, else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)), } } @@ -1539,6 +1579,21 @@ const Parser = struct { const name = try self.parseStringLiteral(); return self.loop_table.get(name).?; }, + [][]const u8 => { + try requireEatBytes(self, "["); + skipSpace(self); + if (eatByte(self, ']')) return &[0][]const u8{}; + + var strings = std.ArrayList([]const u8).init(&self.arena.allocator); + while (true) { + skipSpace(self); + try strings.append(try self.parseStringLiteral()); + skipSpace(self); + if (!eatByte(self, ',')) break; + } + try requireEatBytes(self, "]"); + return strings.toOwnedSlice(); + }, else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)), } return self.fail("TODO parse parameter {}", .{@typeName(T)}); @@ -1961,7 +2016,7 @@ const EmitZIR = struct { return self.emitUnnamedDecl(&as_inst.base); }, .Type => { - const ty = typed_value.val.toType(); + const ty = try typed_value.val.toType(&self.arena.allocator); return self.emitType(src, ty); }, .Fn => { diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig @@ -97,6 +97,7 @@ pub fn analyzeInst(mod: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError! .array_cat => return analyzeInstArrayCat(mod, scope, old_inst.castTag(.array_cat).?), .array_mul => return analyzeInstArrayMul(mod, scope, old_inst.castTag(.array_mul).?), .bitand => return analyzeInstBitwise(mod, scope, old_inst.castTag(.bitand).?), + .bitnot => return analyzeInstBitNot(mod, scope, old_inst.castTag(.bitnot).?), .bitor => return analyzeInstBitwise(mod, scope, old_inst.castTag(.bitor).?), .xor => return analyzeInstBitwise(mod, scope, old_inst.castTag(.xor).?), .shl => return analyzeInstShl(mod, scope, old_inst.castTag(.shl).?), @@ -122,6 +123,10 @@ pub fn analyzeInst(mod: *Module, scope: *Scope, old_inst: *zir.Inst) InnerError! .array_type => return analyzeInstArrayType(mod, scope, old_inst.castTag(.array_type).?), .array_type_sentinel => return analyzeInstArrayTypeSentinel(mod, scope, old_inst.castTag(.array_type_sentinel).?), .enum_literal => return analyzeInstEnumLiteral(mod, scope, old_inst.castTag(.enum_literal).?), + .merge_error_sets => return analyzeInstMergeErrorSets(mod, scope, old_inst.castTag(.merge_error_sets).?), + .error_union_type => return analyzeInstErrorUnionType(mod, scope, old_inst.castTag(.error_union_type).?), + .anyframe_type => return analyzeInstAnyframeType(mod, scope, old_inst.castTag(.anyframe_type).?), + .error_set => return analyzeInstErrorSet(mod, scope, old_inst.castTag(.error_set).?), } } @@ -145,7 +150,7 @@ pub fn analyzeBodyValueAsType(mod: *Module, block_scope: *Scope.Block, body: zir for (block_scope.instructions.items) |inst| { if (inst.castTag(.ret)) |ret| { const val = try mod.resolveConstValue(&block_scope.base, ret.operand); - return val.toType(); + return val.toType(block_scope.base.arena()); } else { return mod.fail(&block_scope.base, inst.src, "unable to resolve comptime value", .{}); } @@ -270,7 +275,7 @@ fn resolveType(mod: *Module, scope: *Scope, old_inst: *zir.Inst) !Type { const wanted_type = Type.initTag(.@"type"); const coerced_inst = try mod.coerce(scope, wanted_type, new_inst); const val = try mod.resolveConstValue(scope, coerced_inst); - return val.toType(); + return val.toType(scope.arena()); } fn resolveInt(mod: *Module, scope: *Scope, old_inst: *zir.Inst, dest_type: Type) !u64 { @@ -431,6 +436,7 @@ fn analyzeInstStr(mod: *Module, scope: *Scope, str_inst: *zir.Inst.Str) InnerErr // The bytes references memory inside the ZIR module, which can get deallocated // after semantic analysis is complete. We need the memory to be in the new anonymous Decl's arena. var new_decl_arena = std.heap.ArenaAllocator.init(mod.gpa); + errdefer new_decl_arena.deinit(); const arena_bytes = try new_decl_arena.allocator.dupe(u8, str_inst.positionals.bytes); const ty_payload = try scope.arena().create(Type.Payload.Array_u8_Sentinel0); @@ -716,6 +722,54 @@ fn analyzeInstArrayTypeSentinel(mod: *Module, scope: *Scope, array: *zir.Inst.Ar return mod.constType(scope, array.base.src, try mod.arrayType(scope, len.val.toUnsignedInt(), sentinel.val, elem_type)); } +fn analyzeInstErrorUnionType(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) InnerError!*Inst { + const error_union = try resolveType(mod, scope, inst.positionals.lhs); + const payload = try resolveType(mod, scope, inst.positionals.rhs); + + if (error_union.zigTypeTag() != .ErrorSet) { + return mod.fail(scope, inst.base.src, "expected error set type, found {}", .{error_union.elemType()}); + } + + return mod.constType(scope, inst.base.src, try mod.errorUnionType(scope, error_union, payload)); +} + +fn analyzeInstAnyframeType(mod: *Module, scope: *Scope, inst: *zir.Inst.UnOp) InnerError!*Inst { + const return_type = try resolveType(mod, scope, inst.positionals.operand); + + return mod.constType(scope, inst.base.src, try mod.anyframeType(scope, return_type)); +} + +fn analyzeInstErrorSet(mod: *Module, scope: *Scope, inst: *zir.Inst.ErrorSet) InnerError!*Inst { + // The declarations arena will store the hashmap. + var new_decl_arena = std.heap.ArenaAllocator.init(mod.gpa); + errdefer new_decl_arena.deinit(); + + const payload = try scope.arena().create(Value.Payload.ErrorSet); + payload.* = .{ + .fields = .{}, + .decl = undefined, // populated below + }; + try payload.fields.ensureCapacity(&new_decl_arena.allocator, inst.positionals.fields.len); + + for (inst.positionals.fields) |field_name| { + const entry = try mod.getErrorValue(field_name); + if (payload.fields.fetchPutAssumeCapacity(entry.key, entry.value)) |prev| { + return mod.fail(scope, inst.base.src, "duplicate error: '{}'", .{field_name}); + } + } + // TODO create name in format "error:line:column" + const new_decl = try mod.createAnonymousDecl(scope, &new_decl_arena, .{ + .ty = Type.initTag(.type), + .val = Value.initPayload(&payload.base), + }); + payload.decl = new_decl; + return mod.analyzeDeclRef(scope, inst.base.src, new_decl); +} + +fn analyzeInstMergeErrorSets(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) InnerError!*Inst { + return mod.fail(scope, inst.base.src, "TODO implement merge_error_sets", .{}); +} + fn analyzeInstEnumLiteral(mod: *Module, scope: *Scope, inst: *zir.Inst.EnumLiteral) InnerError!*Inst { const payload = try scope.arena().create(Value.Payload.Bytes); payload.* = .{ @@ -858,8 +912,72 @@ fn analyzeInstFieldPtr(mod: *Module, scope: *Scope, fieldptr: *zir.Inst.FieldPtr ); } }, - else => return mod.fail(scope, fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}), + .Pointer => { + const ptr_child = elem_ty.elemType(); + switch (ptr_child.zigTypeTag()) { + .Array => { + if (mem.eql(u8, field_name, "len")) { + const len_payload = try scope.arena().create(Value.Payload.Int_u64); + len_payload.* = .{ .int = ptr_child.arrayLen() }; + + const ref_payload = try scope.arena().create(Value.Payload.RefVal); + ref_payload.* = .{ .val = Value.initPayload(&len_payload.base) }; + + return mod.constInst(scope, fieldptr.base.src, .{ + .ty = Type.initTag(.single_const_pointer_to_comptime_int), + .val = Value.initPayload(&ref_payload.base), + }); + } else { + return mod.fail( + scope, + fieldptr.positionals.field_name.src, + "no member named '{}' in '{}'", + .{ field_name, elem_ty }, + ); + } + }, + else => {}, + } + }, + .Type => { + _ = try mod.resolveConstValue(scope, object_ptr); + const result = try mod.analyzeDeref(scope, fieldptr.base.src, object_ptr, object_ptr.src); + const val = result.value().?; + const child_type = try val.toType(scope.arena()); + switch (child_type.zigTypeTag()) { + .ErrorSet => { + // TODO resolve inferred error sets + const entry = if (val.cast(Value.Payload.ErrorSet)) |payload| + (payload.fields.getEntry(field_name) orelse + return mod.fail(scope, fieldptr.base.src, "no error named '{}' in '{}'", .{ field_name, child_type })).* + else try mod.getErrorValue(field_name); + + const error_payload = try scope.arena().create(Value.Payload.Error); + error_payload.* = .{ + .name = entry.key, + .value = entry.value, + }; + + const ref_payload = try scope.arena().create(Value.Payload.RefVal); + ref_payload.* = .{ .val = Value.initPayload(&error_payload.base) }; + + const result_type = if (child_type.tag() == .anyerror) blk: { + const result_payload = try scope.arena().create(Type.Payload.ErrorSetSingle); + result_payload.* = .{ .name = entry.key }; + break :blk Type.initPayload(&result_payload.base); + } else child_type; + + return mod.constInst(scope, fieldptr.base.src, .{ + .ty = try mod.simplePtrType(scope, fieldptr.base.src, result_type, false, .One), + .val = Value.initPayload(&ref_payload.base), + }); + }, + else => return mod.fail(scope, fieldptr.base.src, "type '{}' does not support field access", .{child_type}), + } + }, + else => {}, } + return mod.fail(scope, fieldptr.base.src, "type '{}' does not support field access", .{elem_ty}); } fn analyzeInstIntCast(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) InnerError!*Inst { @@ -983,6 +1101,10 @@ fn analyzeInstBitwise(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) InnerE return mod.fail(scope, inst.base.src, "TODO implement analyzeInstBitwise", .{}); } +fn analyzeInstBitNot(mod: *Module, scope: *Scope, inst: *zir.Inst.UnOp) InnerError!*Inst { + return mod.fail(scope, inst.base.src, "TODO implement analyzeInstBitNot", .{}); +} + fn analyzeInstArrayCat(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) InnerError!*Inst { return mod.fail(scope, inst.base.src, "TODO implement analyzeInstArrayCat", .{}); } @@ -1348,7 +1470,7 @@ fn analyzeInstPtrType(mod: *Module, scope: *Scope, inst: *zir.Inst.PtrType) Inne if (host_size != 0 and bit_offset >= host_size * 8) return mod.fail(scope, inst.base.src, "bit offset starts after end of host integer", .{}); - + const sentinel = if (inst.kw_args.sentinel) |some| (try resolveInstConst(mod, scope, some)).val else diff --git a/src/analyze.cpp b/src/analyze.cpp @@ -2586,7 +2586,6 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) { return ErrorNone; AstNode *decl_node = enum_type->data.enumeration.decl_node; - assert(decl_node->type == NodeTypeContainerDecl); if (enum_type->data.enumeration.resolve_loop_flag) { if (enum_type->data.enumeration.resolve_status != ResolveStatusInvalid) { @@ -2600,15 +2599,20 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) { enum_type->data.enumeration.resolve_loop_flag = true; - assert(!enum_type->data.enumeration.fields); - uint32_t field_count = (uint32_t)decl_node->data.container_decl.fields.length; - if (field_count == 0) { - add_node_error(g, decl_node, buf_sprintf("enums must have 1 or more fields")); + uint32_t field_count; + if (decl_node->type == NodeTypeContainerDecl) { + assert(!enum_type->data.enumeration.fields); + field_count = (uint32_t)decl_node->data.container_decl.fields.length; + if (field_count == 0) { + add_node_error(g, decl_node, buf_sprintf("enums must have 1 or more fields")); - enum_type->data.enumeration.src_field_count = field_count; - enum_type->data.enumeration.fields = nullptr; - enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; - return ErrorSemanticAnalyzeFail; + enum_type->data.enumeration.src_field_count = field_count; + enum_type->data.enumeration.fields = nullptr; + enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; + return ErrorSemanticAnalyzeFail; + } + } else { + field_count = enum_type->data.enumeration.src_field_count; } Scope *scope = &enum_type->data.enumeration.decls_scope->base; @@ -2624,8 +2628,16 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) { enum_type->abi_size = tag_int_type->abi_size; enum_type->abi_align = tag_int_type->abi_align; - if (decl_node->data.container_decl.init_arg_expr != nullptr) { - ZigType *wanted_tag_int_type = analyze_type_expr(g, scope, decl_node->data.container_decl.init_arg_expr); + ZigType *wanted_tag_int_type = nullptr; + if (decl_node->type == NodeTypeContainerDecl) { + if (decl_node->data.container_decl.init_arg_expr != nullptr) { + wanted_tag_int_type = analyze_type_expr(g, scope, decl_node->data.container_decl.init_arg_expr); + } + } else { + wanted_tag_int_type = enum_type->data.enumeration.tag_int_type; + } + + if (wanted_tag_int_type != nullptr) { if (type_is_invalid(wanted_tag_int_type)) { enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; } else if (wanted_tag_int_type->id != ZigTypeIdInt && @@ -2654,7 +2666,6 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) { } } - enum_type->data.enumeration.non_exhaustive = false; enum_type->data.enumeration.tag_int_type = tag_int_type; enum_type->size_in_bits = tag_int_type->size_in_bits; enum_type->abi_size = tag_int_type->abi_size; @@ -2663,121 +2674,131 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) { BigInt bi_one; bigint_init_unsigned(&bi_one, 1); - AstNode *last_field_node = decl_node->data.container_decl.fields.at(field_count - 1); - if (buf_eql_str(last_field_node->data.struct_field.name, "_")) { + if (decl_node->type == NodeTypeContainerDecl) { + AstNode *last_field_node = decl_node->data.container_decl.fields.at(field_count - 1); + if (buf_eql_str(last_field_node->data.struct_field.name, "_")) { + if (last_field_node->data.struct_field.value != nullptr) { + add_node_error(g, last_field_node, buf_sprintf("value assigned to '_' field of non-exhaustive enum")); + enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; + } + if (decl_node->data.container_decl.init_arg_expr == nullptr) { + add_node_error(g, decl_node, buf_sprintf("non-exhaustive enum must specify size")); + enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; + } + enum_type->data.enumeration.non_exhaustive = true; + } else { + enum_type->data.enumeration.non_exhaustive = false; + } + } + + if (enum_type->data.enumeration.non_exhaustive) { field_count -= 1; if (field_count > 1 && log2_u64(field_count) == enum_type->size_in_bits) { - add_node_error(g, last_field_node, buf_sprintf("non-exhaustive enum specifies every value")); + add_node_error(g, decl_node, buf_sprintf("non-exhaustive enum specifies every value")); enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; } - if (decl_node->data.container_decl.init_arg_expr == nullptr) { - add_node_error(g, last_field_node, buf_sprintf("non-exhaustive enum must specify size")); - enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; - } - if (last_field_node->data.struct_field.value != nullptr) { - add_node_error(g, last_field_node, buf_sprintf("value assigned to '_' field of non-exhaustive enum")); - enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; - } - enum_type->data.enumeration.non_exhaustive = true; } - enum_type->data.enumeration.src_field_count = field_count; - enum_type->data.enumeration.fields = heap::c_allocator.allocate<TypeEnumField>(field_count); - enum_type->data.enumeration.fields_by_name.init(field_count); - - HashMap<BigInt, AstNode *, bigint_hash, bigint_eql> occupied_tag_values = {}; - occupied_tag_values.init(field_count); - - TypeEnumField *last_enum_field = nullptr; - - for (uint32_t field_i = 0; field_i < field_count; field_i += 1) { - AstNode *field_node = decl_node->data.container_decl.fields.at(field_i); - TypeEnumField *type_enum_field = &enum_type->data.enumeration.fields[field_i]; - type_enum_field->name = field_node->data.struct_field.name; - type_enum_field->decl_index = field_i; - type_enum_field->decl_node = field_node; + if (decl_node->type == NodeTypeContainerDecl) { + enum_type->data.enumeration.src_field_count = field_count; + enum_type->data.enumeration.fields = heap::c_allocator.allocate<TypeEnumField>(field_count); + enum_type->data.enumeration.fields_by_name.init(field_count); - if (field_node->data.struct_field.type != nullptr) { - ErrorMsg *msg = add_node_error(g, field_node->data.struct_field.type, - buf_sprintf("structs and unions, not enums, support field types")); - add_error_note(g, msg, decl_node, - buf_sprintf("consider 'union(enum)' here")); - } else if (field_node->data.struct_field.align_expr != nullptr) { - ErrorMsg *msg = add_node_error(g, field_node->data.struct_field.align_expr, - buf_sprintf("structs and unions, not enums, support field alignment")); - add_error_note(g, msg, decl_node, - buf_sprintf("consider 'union(enum)' here")); - } + HashMap<BigInt, AstNode *, bigint_hash, bigint_eql> occupied_tag_values = {}; + occupied_tag_values.init(field_count); - if (buf_eql_str(type_enum_field->name, "_")) { - add_node_error(g, field_node, buf_sprintf("'_' field of non-exhaustive enum must be last")); - enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; - } + TypeEnumField *last_enum_field = nullptr; - auto field_entry = enum_type->data.enumeration.fields_by_name.put_unique(type_enum_field->name, type_enum_field); - if (field_entry != nullptr) { - ErrorMsg *msg = add_node_error(g, field_node, - buf_sprintf("duplicate enum field: '%s'", buf_ptr(type_enum_field->name))); - add_error_note(g, msg, field_entry->value->decl_node, buf_sprintf("other field here")); - enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; - continue; - } - - AstNode *tag_value = field_node->data.struct_field.value; + for (uint32_t field_i = 0; field_i < field_count; field_i += 1) { + AstNode *field_node = decl_node->data.container_decl.fields.at(field_i); + TypeEnumField *type_enum_field = &enum_type->data.enumeration.fields[field_i]; + type_enum_field->name = field_node->data.struct_field.name; + type_enum_field->decl_index = field_i; + type_enum_field->decl_node = field_node; + + if (field_node->data.struct_field.type != nullptr) { + ErrorMsg *msg = add_node_error(g, field_node->data.struct_field.type, + buf_sprintf("structs and unions, not enums, support field types")); + add_error_note(g, msg, decl_node, + buf_sprintf("consider 'union(enum)' here")); + } else if (field_node->data.struct_field.align_expr != nullptr) { + ErrorMsg *msg = add_node_error(g, field_node->data.struct_field.align_expr, + buf_sprintf("structs and unions, not enums, support field alignment")); + add_error_note(g, msg, decl_node, + buf_sprintf("consider 'union(enum)' here")); + } + + if (buf_eql_str(type_enum_field->name, "_")) { + add_node_error(g, field_node, buf_sprintf("'_' field of non-exhaustive enum must be last")); + enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; + } - if (tag_value != nullptr) { - // A user-specified value is available - ZigValue *result = analyze_const_value(g, scope, tag_value, tag_int_type, - nullptr, UndefBad); - if (type_is_invalid(result->type)) { + auto field_entry = enum_type->data.enumeration.fields_by_name.put_unique(type_enum_field->name, type_enum_field); + if (field_entry != nullptr) { + ErrorMsg *msg = add_node_error(g, field_node, + buf_sprintf("duplicate enum field: '%s'", buf_ptr(type_enum_field->name))); + add_error_note(g, msg, field_entry->value->decl_node, buf_sprintf("other field here")); enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; continue; } - assert(result->special != ConstValSpecialRuntime); - assert(result->type->id == ZigTypeIdInt || result->type->id == ZigTypeIdComptimeInt); + AstNode *tag_value = field_node->data.struct_field.value; - bigint_init_bigint(&type_enum_field->value, &result->data.x_bigint); - } else { - // No value was explicitly specified: allocate the last value + 1 - // or, if this is the first element, zero - if (last_enum_field != nullptr) { - bigint_add(&type_enum_field->value, &last_enum_field->value, &bi_one); + if (tag_value != nullptr) { + // A user-specified value is available + ZigValue *result = analyze_const_value(g, scope, tag_value, tag_int_type, + nullptr, UndefBad); + if (type_is_invalid(result->type)) { + enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; + continue; + } + + assert(result->special != ConstValSpecialRuntime); + assert(result->type->id == ZigTypeIdInt || result->type->id == ZigTypeIdComptimeInt); + + bigint_init_bigint(&type_enum_field->value, &result->data.x_bigint); } else { - bigint_init_unsigned(&type_enum_field->value, 0); + // No value was explicitly specified: allocate the last value + 1 + // or, if this is the first element, zero + if (last_enum_field != nullptr) { + bigint_add(&type_enum_field->value, &last_enum_field->value, &bi_one); + } else { + bigint_init_unsigned(&type_enum_field->value, 0); + } + + // Make sure we can represent this number with tag_int_type + if (!bigint_fits_in_bits(&type_enum_field->value, + tag_int_type->size_in_bits, + tag_int_type->data.integral.is_signed)) { + enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; + + Buf *val_buf = buf_alloc(); + bigint_append_buf(val_buf, &type_enum_field->value, 10); + add_node_error(g, field_node, + buf_sprintf("enumeration value %s too large for type '%s'", + buf_ptr(val_buf), buf_ptr(&tag_int_type->name))); + + break; + } } - // Make sure we can represent this number with tag_int_type - if (!bigint_fits_in_bits(&type_enum_field->value, - tag_int_type->size_in_bits, - tag_int_type->data.integral.is_signed)) { + // Make sure the value is unique + auto entry = occupied_tag_values.put_unique(type_enum_field->value, field_node); + if (entry != nullptr && enum_type->data.enumeration.layout != ContainerLayoutExtern) { enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; Buf *val_buf = buf_alloc(); bigint_append_buf(val_buf, &type_enum_field->value, 10); - add_node_error(g, field_node, - buf_sprintf("enumeration value %s too large for type '%s'", - buf_ptr(val_buf), buf_ptr(&tag_int_type->name))); - break; + ErrorMsg *msg = add_node_error(g, field_node, + buf_sprintf("enum tag value %s already taken", buf_ptr(val_buf))); + add_error_note(g, msg, entry->value, + buf_sprintf("other occurrence here")); } - } - - // Make sure the value is unique - auto entry = occupied_tag_values.put_unique(type_enum_field->value, field_node); - if (entry != nullptr && enum_type->data.enumeration.layout != ContainerLayoutExtern) { - enum_type->data.enumeration.resolve_status = ResolveStatusInvalid; - - Buf *val_buf = buf_alloc(); - bigint_append_buf(val_buf, &type_enum_field->value, 10); - ErrorMsg *msg = add_node_error(g, field_node, - buf_sprintf("enum tag value %s already taken", buf_ptr(val_buf))); - add_error_note(g, msg, entry->value, - buf_sprintf("other occurrence here")); + last_enum_field = type_enum_field; } - - last_enum_field = type_enum_field; + occupied_tag_values.deinit(); } if (enum_type->data.enumeration.resolve_status == ResolveStatusInvalid) @@ -2786,8 +2807,6 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) { enum_type->data.enumeration.resolve_loop_flag = false; enum_type->data.enumeration.resolve_status = ResolveStatusSizeKnown; - occupied_tag_values.deinit(); - return ErrorNone; } diff --git a/src/ir.cpp b/src/ir.cpp @@ -2147,6 +2147,7 @@ static IrInstSrc *ir_build_const_undefined(IrBuilderSrc *irb, Scope *scope, AstN IrInstSrcConst *const_instruction = ir_create_instruction<IrInstSrcConst>(irb, scope, source_node); ir_instruction_append(irb->current_basic_block, &const_instruction->base); const_instruction->value = irb->codegen->intern.for_undefined(); + const_instruction->value->special = ConstValSpecialUndef; return &const_instruction->base; } @@ -14917,6 +14918,9 @@ static IrInstGen *ir_analyze_struct_literal_to_struct(IrAnalyze *ira, IrInst* so field_val->parent.data.p_struct.struct_val = const_result->value; field_val->parent.data.p_struct.field_index = dst_field->src_index; field_values[dst_field->src_index] = field_val; + if (field_val->type->id == ZigTypeIdUndefined && dst_field->type_entry->id != ZigTypeIdUndefined) { + field_values[dst_field->src_index]->special = ConstValSpecialUndef; + } } else { is_comptime = false; } @@ -15649,7 +15653,7 @@ static IrInstGen *ir_analyze_cast(IrAnalyze *ira, IrInst *source_instr, wanted_type->data.array.len == field_count) { return ir_analyze_struct_literal_to_array(ira, source_instr, value, wanted_type); - } else if (wanted_type->id == ZigTypeIdStruct && + } else if (wanted_type->id == ZigTypeIdStruct && !is_slice(wanted_type) && (!is_array_init || field_count == 0)) { return ir_analyze_struct_literal_to_struct(ira, source_instr, value, wanted_type); @@ -20692,8 +20696,13 @@ static IrInstGen *ir_analyze_fn_call(IrAnalyze *ira, IrInst* source_instr, if ((return_type->id == ZigTypeIdErrorUnion || return_type->id == ZigTypeIdErrorSet) && expected_return_type->id != ZigTypeIdErrorUnion && expected_return_type->id != ZigTypeIdErrorSet) { - add_error_note(ira->codegen, ira->new_irb.exec->first_err_trace_msg, - ira->explicit_return_type_source_node, buf_create_from_str("function cannot return an error")); + if (call_result_loc->id == ResultLocIdReturn) { + add_error_note(ira->codegen, ira->new_irb.exec->first_err_trace_msg, + ira->explicit_return_type_source_node, buf_sprintf("function cannot return an error")); + } else { + add_error_note(ira->codegen, ira->new_irb.exec->first_err_trace_msg, result_loc->base.source_node, + buf_sprintf("cannot store an error in type '%s'", buf_ptr(&expected_return_type->name))); + } } return ira->codegen->invalid_inst_gen; } @@ -22302,6 +22311,7 @@ static IrInstGen *ir_analyze_container_member_access_inner(IrAnalyze *ira, static void memoize_field_init_val(CodeGen *codegen, ZigType *container_type, TypeStructField *field) { if (field->init_val != nullptr) return; + if (field->decl_node == nullptr) return; if (field->decl_node->type != NodeTypeStructField) return; AstNode *init_node = field->decl_node->data.struct_field.value; if (init_node == nullptr) return; @@ -25495,9 +25505,7 @@ static Error ir_make_type_info_value(IrAnalyze *ira, IrInst* source_instr, ZigTy error_val->special = ConstValSpecialStatic; error_val->type = type_info_error_type; - ZigValue **inner_fields = alloc_const_vals_ptrs(ira->codegen, 2); - inner_fields[1]->special = ConstValSpecialStatic; - inner_fields[1]->type = ira->codegen->builtin_types.entry_num_lit_int; + ZigValue **inner_fields = alloc_const_vals_ptrs(ira->codegen, 1); ZigValue *name = nullptr; if (error->cached_error_name_val != nullptr) @@ -25505,7 +25513,6 @@ static Error ir_make_type_info_value(IrAnalyze *ira, IrInst* source_instr, ZigTy if (name == nullptr) name = create_const_str_lit(ira->codegen, &error->name)->data.x_ptr.data.ref.pointee; init_const_slice(ira->codegen, inner_fields[0], name, 0, buf_len(&error->name), true); - bigint_init_unsigned(&inner_fields[1]->data.x_bigint, error->value); error_val->data.x_struct.fields = inner_fields; error_val->parent.id = ConstParentIdArray; @@ -26020,6 +26027,9 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI assert(payload->special == ConstValSpecialStatic); assert(payload->type == type_info_pointer_type); ZigValue *size_value = get_const_field(ira, source_instr->source_node, payload, "size", 0); + if (size_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + assert(size_value->type == ir_type_info_get_type(ira, "Size", type_info_pointer_type)); BuiltinPtrSize size_enum_index = (BuiltinPtrSize)bigint_as_u32(&size_value->data.x_enum_tag); PtrLen ptr_len = size_enum_index_to_ptr_len(size_enum_index); @@ -26103,13 +26113,21 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI assert(payload->special == ConstValSpecialStatic); assert(payload->type == ir_type_info_get_type(ira, "Optional", nullptr)); ZigType *child_type = get_const_field_meta_type(ira, source_instr->source_node, payload, "child", 0); + if (type_is_invalid(child_type)) + return ira->codegen->invalid_inst_gen->value->type; return get_optional_type(ira->codegen, child_type); } case ZigTypeIdErrorUnion: { assert(payload->special == ConstValSpecialStatic); assert(payload->type == ir_type_info_get_type(ira, "ErrorUnion", nullptr)); ZigType *err_set_type = get_const_field_meta_type(ira, source_instr->source_node, payload, "error_set", 0); + if (type_is_invalid(err_set_type)) + return ira->codegen->invalid_inst_gen->value->type; + ZigType *payload_type = get_const_field_meta_type(ira, source_instr->source_node, payload, "payload", 1); + if (type_is_invalid(payload_type)) + return ira->codegen->invalid_inst_gen->value->type; + return get_error_union_type(ira->codegen, err_set_type, payload_type); } case ZigTypeIdOpaque: { @@ -26123,8 +26141,10 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI assert(payload->special == ConstValSpecialStatic); assert(payload->type == ir_type_info_get_type(ira, "Vector", nullptr)); BigInt *len = get_const_field_lit_int(ira, source_instr->source_node, payload, "len", 0); + if (len == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + ZigType *child_type = get_const_field_meta_type(ira, source_instr->source_node, payload, "child", 1); - Error err; if ((err = ir_validate_vector_elem_type(ira, source_instr->source_node, child_type))) { return ira->codegen->invalid_inst_gen->value->type; } @@ -26134,6 +26154,9 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI assert(payload->special == ConstValSpecialStatic); assert(payload->type == ir_type_info_get_type(ira, "AnyFrame", nullptr)); ZigType *child_type = get_const_field_meta_type_optional(ira, source_instr->source_node, payload, "child", 0); + if (child_type != nullptr && type_is_invalid(child_type)) + return ira->codegen->invalid_inst_gen->value->type; + return get_any_frame_type(ira->codegen, child_type); } case ZigTypeIdEnumLiteral: @@ -26142,6 +26165,9 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI assert(payload->special == ConstValSpecialStatic); assert(payload->type == ir_type_info_get_type(ira, "Frame", nullptr)); ZigValue *function = get_const_field(ira, source_instr->source_node, payload, "function", 0); + if (function == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + assert(function->type->id == ZigTypeIdFn); ZigFn *fn = function->data.x_ptr.data.fn.fn_entry; return get_fn_frame_type(ira->codegen, fn); @@ -26176,7 +26202,6 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI assert(error->type == ir_type_info_get_type(ira, "Error", nullptr)); ErrorTableEntry *err_entry = heap::c_allocator.create<ErrorTableEntry>(); err_entry->decl_node = source_instr->source_node; - Error err; if ((err = get_const_field_buf(ira, source_instr->source_node, error, "name", 0, &err_entry->name))) return ira->codegen->invalid_inst_gen->value->type; auto existing_entry = ira->codegen->error_table.put_unique(&err_entry->name, err_entry); @@ -26203,11 +26228,15 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI assert(payload->type == ir_type_info_get_type(ira, "Struct", nullptr)); ZigValue *layout_value = get_const_field(ira, source_instr->source_node, payload, "layout", 0); + if (layout_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; assert(layout_value->special == ConstValSpecialStatic); assert(layout_value->type == ir_type_info_get_type(ira, "ContainerLayout", nullptr)); ContainerLayout layout = (ContainerLayout)bigint_as_u32(&layout_value->data.x_enum_tag); ZigValue *fields_value = get_const_field(ira, source_instr->source_node, payload, "fields", 1); + if (fields_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; assert(fields_value->special == ConstValSpecialStatic); assert(is_slice(fields_value->type)); ZigValue *fields_ptr = fields_value->data.x_struct.fields[slice_ptr_index]; @@ -26215,6 +26244,8 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI size_t fields_len = bigint_as_usize(&fields_len_value->data.x_bigint); ZigValue *decls_value = get_const_field(ira, source_instr->source_node, payload, "decls", 2); + if (decls_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; assert(decls_value->special == ConstValSpecialStatic); assert(is_slice(decls_value->type)); ZigValue *decls_len_value = decls_value->data.x_struct.fields[slice_len_index]; @@ -26225,7 +26256,8 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI } bool is_tuple; - get_const_field_bool(ira, source_instr->source_node, payload, "is_tuple", 3, &is_tuple); + if ((err = get_const_field_bool(ira, source_instr->source_node, payload, "is_tuple", 3, &is_tuple))) + return ira->codegen->invalid_inst_gen->value->type; ZigType *entry = new_type_table_entry(ZigTypeIdStruct); buf_init_from_buf(&entry->name, @@ -26253,6 +26285,8 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI return ira->codegen->invalid_inst_gen->value->type; field->decl_node = source_instr->source_node; ZigValue *type_value = get_const_field(ira, source_instr->source_node, field_value, "field_type", 1); + if (type_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; field->type_val = type_value; field->type_entry = type_value->data.x_type; if (entry->data.structure.fields_by_name.put_unique(field->name, field) != nullptr) { @@ -26260,6 +26294,8 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI return ira->codegen->invalid_inst_gen->value->type; } ZigValue *default_value = get_const_field(ira, source_instr->source_node, field_value, "default_value", 2); + if (default_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; if (default_value->type->id == ZigTypeIdNull) { field->init_val = nullptr; } else if (default_value->type->id == ZigTypeIdOptional && default_value->type->data.maybe.child_type == field->type_entry) { @@ -26277,7 +26313,87 @@ static ZigType *type_info_to_type(IrAnalyze *ira, IrInst *source_instr, ZigTypeI return entry; } - case ZigTypeIdEnum: + case ZigTypeIdEnum: { + assert(payload->special == ConstValSpecialStatic); + assert(payload->type == ir_type_info_get_type(ira, "Enum", nullptr)); + + ZigValue *layout_value = get_const_field(ira, source_instr->source_node, payload, "layout", 0); + if (layout_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + + assert(layout_value->special == ConstValSpecialStatic); + assert(layout_value->type == ir_type_info_get_type(ira, "ContainerLayout", nullptr)); + ContainerLayout layout = (ContainerLayout)bigint_as_u32(&layout_value->data.x_enum_tag); + + ZigType *tag_type = get_const_field_meta_type(ira, source_instr->source_node, payload, "tag_type", 1); + + ZigValue *fields_value = get_const_field(ira, source_instr->source_node, payload, "fields", 2); + if (fields_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + + assert(fields_value->special == ConstValSpecialStatic); + assert(is_slice(fields_value->type)); + ZigValue *fields_ptr = fields_value->data.x_struct.fields[slice_ptr_index]; + ZigValue *fields_len_value = fields_value->data.x_struct.fields[slice_len_index]; + size_t fields_len = bigint_as_usize(&fields_len_value->data.x_bigint); + + ZigValue *decls_value = get_const_field(ira, source_instr->source_node, payload, "decls", 3); + if (decls_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + + assert(decls_value->special == ConstValSpecialStatic); + assert(is_slice(decls_value->type)); + ZigValue *decls_len_value = decls_value->data.x_struct.fields[slice_len_index]; + size_t decls_len = bigint_as_usize(&decls_len_value->data.x_bigint); + if (decls_len != 0) { + ir_add_error(ira, source_instr, buf_create_from_str("TypeInfo.Enum.decls must be empty for @Type")); + return ira->codegen->invalid_inst_gen->value->type; + } + + Error err; + bool is_exhaustive; + if ((err = get_const_field_bool(ira, source_instr->source_node, payload, "is_exhaustive", 4, &is_exhaustive))) + return ira->codegen->invalid_inst_gen->value->type; + + ZigType *entry = new_type_table_entry(ZigTypeIdEnum); + buf_init_from_buf(&entry->name, + get_anon_type_name(ira->codegen, ira->old_irb.exec, "enum", source_instr->scope, source_instr->source_node, &entry->name)); + entry->data.enumeration.decl_node = source_instr->source_node; + entry->data.enumeration.tag_int_type = tag_type; + entry->data.enumeration.decls_scope = create_decls_scope( + ira->codegen, source_instr->source_node, source_instr->scope, entry, get_scope_import(source_instr->scope), &entry->name); + entry->data.enumeration.fields = heap::c_allocator.allocate<TypeEnumField>(fields_len); + entry->data.enumeration.fields_by_name.init(fields_len); + entry->data.enumeration.src_field_count = fields_len; + entry->data.enumeration.layout = layout; + entry->data.enumeration.non_exhaustive = !is_exhaustive; + + assert(fields_ptr->data.x_ptr.special == ConstPtrSpecialBaseArray); + assert(fields_ptr->data.x_ptr.data.base_array.elem_index == 0); + ZigValue *fields_arr = fields_ptr->data.x_ptr.data.base_array.array_val; + assert(fields_arr->special == ConstValSpecialStatic); + assert(fields_arr->data.x_array.special == ConstArraySpecialNone); + for (size_t i = 0; i < fields_len; i++) { + ZigValue *field_value = &fields_arr->data.x_array.data.s_none.elements[i]; + assert(field_value->type == ir_type_info_get_type(ira, "EnumField", nullptr)); + TypeEnumField *field = &entry->data.enumeration.fields[i]; + field->name = buf_alloc(); + if ((err = get_const_field_buf(ira, source_instr->source_node, field_value, "name", 0, field->name))) + return ira->codegen->invalid_inst_gen->value->type; + field->decl_index = i; + field->decl_node = source_instr->source_node; + if (entry->data.enumeration.fields_by_name.put_unique(field->name, field) != nullptr) { + ir_add_error(ira, source_instr, buf_sprintf("duplicate enum field '%s'", buf_ptr(field->name))); + return ira->codegen->invalid_inst_gen->value->type; + } + BigInt *field_int_value = get_const_field_lit_int(ira, source_instr->source_node, field_value, "value", 1); + if (field_int_value == nullptr) + return ira->codegen->invalid_inst_gen->value->type; + field->value = *field_int_value; + } + + return entry; + } case ZigTypeIdUnion: ir_add_error(ira, source_instr, buf_sprintf( "TODO implement @Type for 'TypeInfo.%s': see https://github.com/ziglang/zig/issues/2907", type_id_name(tagTypeId))); diff --git a/test/compile_errors.zig b/test/compile_errors.zig @@ -2,6 +2,41 @@ const tests = @import("tests.zig"); const std = @import("std"); pub fn addCases(cases: *tests.CompileErrorContext) void { + cases.add("@Type with undefined", + \\comptime { + \\ _ = @Type(.{ .Array = .{ .len = 0, .child = u8, .sentinel = undefined } }); + \\} + \\comptime { + \\ _ = @Type(.{ + \\ .Struct = .{ + \\ .fields = undefined, + \\ .decls = undefined, + \\ .is_tuple = false, + \\ .layout = .Auto, + \\ }, + \\ }); + \\} + , &[_][]const u8{ + "tmp.zig:2:16: error: use of undefined value here causes undefined behavior", + "tmp.zig:5:16: error: use of undefined value here causes undefined behavior", + }); + + cases.add("struct with declarations unavailable for @Type", + \\export fn entry() void { + \\ _ = @Type(@typeInfo(struct { const foo = 1; })); + \\} + , &[_][]const u8{ + "tmp.zig:2:15: error: TypeInfo.Struct.decls must be empty for @Type", + }); + + cases.add("enum with declarations unavailable for @Type", + \\export fn entry() void { + \\ _ = @Type(@typeInfo(enum { foo, const bar = 1; })); + \\} + , &[_][]const u8{ + "tmp.zig:2:15: error: TypeInfo.Enum.decls must be empty for @Type", + }); + cases.addTest("reject extern variables with initializers", \\extern var foo: int = 2; , &[_][]const u8{ @@ -123,16 +158,22 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { \\export fn baz() void { \\ try bar(); \\} - \\export fn quux() u32 { + \\export fn qux() u32 { \\ return bar(); \\} + \\export fn quux() u32 { + \\ var buf: u32 = 0; + \\ buf = bar(); + \\} , &[_][]const u8{ "tmp.zig:2:17: error: expected type 'u32', found 'error{Ohno}'", "tmp.zig:1:17: note: function cannot return an error", "tmp.zig:8:5: error: expected type 'void', found '@TypeOf(bar).ReturnType.ErrorSet'", "tmp.zig:7:17: note: function cannot return an error", "tmp.zig:11:15: error: expected type 'u32', found '@TypeOf(bar).ReturnType.ErrorSet!u32'", - "tmp.zig:10:18: note: function cannot return an error", + "tmp.zig:10:17: note: function cannot return an error", + "tmp.zig:15:14: error: expected type 'u32', found '@TypeOf(bar).ReturnType.ErrorSet!u32'", + "tmp.zig:14:5: note: cannot store an error in type 'u32'", }); cases.addTest("int/float conversion to comptime_int/float", @@ -598,8 +639,8 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { \\ _ = C; \\} , &[_][]const u8{ - "tmp.zig:4:5: error: non-exhaustive enum must specify size", - "error: value assigned to '_' field of non-exhaustive enum", + "tmp.zig:4:5: error: value assigned to '_' field of non-exhaustive enum", + "error: non-exhaustive enum must specify size", "error: non-exhaustive enum specifies every value", "error: '_' field of non-exhaustive enum must be last", }); @@ -1400,15 +1441,6 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { , &[_][]const u8{ "tmp.zig:3:36: error: expected type 'std.builtin.TypeInfo', found 'std.builtin.Int'", }); - - cases.add("struct with declarations unavailable for @Type", - \\export fn entry() void { - \\ _ = @Type(@typeInfo(struct { const foo = 1; })); - \\} - , &[_][]const u8{ - "tmp.zig:2:15: error: TypeInfo.Struct.decls must be empty for @Type", - }); - cases.add("wrong type for argument tuple to @asyncCall", \\export fn entry1() void { \\ var frame: @Frame(foo) = undefined; diff --git a/test/stage1/behavior/type.zig b/test/stage1/behavior/type.zig @@ -280,3 +280,37 @@ test "Type.Struct" { testing.expectEqual(@as(usize, 0), infoC.decls.len); testing.expectEqual(@as(bool, false), infoC.is_tuple); } + +test "Type.Enum" { + const Foo = @Type(.{ + .Enum = .{ + .layout = .Auto, + .tag_type = u8, + .fields = &[_]TypeInfo.EnumField{ + .{ .name = "a", .value = 1 }, + .{ .name = "b", .value = 5 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + .is_exhaustive = true, + }, + }); + testing.expectEqual(true, @typeInfo(Foo).Enum.is_exhaustive); + testing.expectEqual(@as(u8, 1), @enumToInt(Foo.a)); + testing.expectEqual(@as(u8, 5), @enumToInt(Foo.b)); + const Bar = @Type(.{ + .Enum = .{ + .layout = .Extern, + .tag_type = u32, + .fields = &[_]TypeInfo.EnumField{ + .{ .name = "a", .value = 1 }, + .{ .name = "b", .value = 5 }, + }, + .decls = &[_]TypeInfo.Declaration{}, + .is_exhaustive = false, + }, + }); + testing.expectEqual(false, @typeInfo(Bar).Enum.is_exhaustive); + testing.expectEqual(@as(u32, 1), @enumToInt(Bar.a)); + testing.expectEqual(@as(u32, 5), @enumToInt(Bar.b)); + testing.expectEqual(@as(u32, 6), @enumToInt(@intToEnum(Bar, 6))); +} diff --git a/test/stage1/behavior/type_info.zig b/test/stage1/behavior/type_info.zig @@ -153,7 +153,6 @@ fn testErrorSet() void { expect(error_set_info == .ErrorSet); expect(error_set_info.ErrorSet.?.len == 3); expect(mem.eql(u8, error_set_info.ErrorSet.?[0].name, "First")); - expect(error_set_info.ErrorSet.?[2].value == @errorToInt(TestErrorSet.Third)); const error_union_info = @typeInfo(TestErrorSet!usize); expect(error_union_info == .ErrorUnion); diff --git a/test/stage2/spu-ii.zig b/test/stage2/spu-ii.zig @@ -0,0 +1,23 @@ +const std = @import("std"); +const TestContext = @import("../../src-self-hosted/test.zig").TestContext; + +const spu = std.zig.CrossTarget{ + .cpu_arch = .spu_2, + .os_tag = .freestanding, +}; + +pub fn addCases(ctx: *TestContext) !void { + { + var case = ctx.exe("SPU-II Basic Test", spu); + case.addCompareOutput( + \\fn killEmulator() noreturn { + \\ asm volatile ("undefined0"); + \\ unreachable; + \\} + \\ + \\export fn _start() noreturn { + \\ killEmulator(); + \\} + , ""); + } +} diff --git a/test/stage2/test.zig b/test/stage2/test.zig @@ -18,6 +18,11 @@ const linux_riscv64 = std.zig.CrossTarget{ .os_tag = .linux, }; +const linux_arm = std.zig.CrossTarget{ + .cpu_arch = .arm, + .os_tag = .linux, +}; + const wasi = std.zig.CrossTarget{ .cpu_arch = .wasm32, .os_tag = .wasi, @@ -26,6 +31,8 @@ const wasi = std.zig.CrossTarget{ pub fn addCases(ctx: *TestContext) !void { try @import("zir.zig").addCases(ctx); try @import("cbe.zig").addCases(ctx); + try @import("spu-ii.zig").addCases(ctx); + { var case = ctx.exe("hello world with updates", linux_x64); @@ -180,6 +187,41 @@ pub fn addCases(ctx: *TestContext) !void { } { + var case = ctx.exe("hello world", linux_arm); + // Regular old hello world + case.addCompareOutput( + \\export fn _start() noreturn { + \\ print(); + \\ exit(); + \\} + \\ + \\fn print() void { + \\ asm volatile ("svc #0" + \\ : + \\ : [number] "{r7}" (4), + \\ [arg1] "{r0}" (1), + \\ [arg2] "{r1}" (@ptrToInt("Hello, World!\n")), + \\ [arg3] "{r2}" (14) + \\ : "memory" + \\ ); + \\ return; + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("svc #0" + \\ : + \\ : [number] "{r7}" (1), + \\ [arg1] "{r0}" (0) + \\ : "memory" + \\ ); + \\ unreachable; + \\} + , + "Hello, World!\n", + ); + } + + { var case = ctx.exe("adding numbers at comptime", linux_x64); case.addCompareOutput( \\export fn _start() noreturn { @@ -600,6 +642,58 @@ pub fn addCases(ctx: *TestContext) !void { "", ); + // Spilling registers to the stack. + case.addCompareOutput( + \\export fn _start() noreturn { + \\ assert(add(3, 4) == 791); + \\ + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) u32 { + \\ const x: u32 = blk: { + \\ const c = a + b; // 7 + \\ const d = a + c; // 10 + \\ const e = d + b; // 14 + \\ const f = d + e; // 24 + \\ const g = e + f; // 38 + \\ const h = f + g; // 62 + \\ const i = g + h; // 100 + \\ const j = i + d; // 110 + \\ const k = i + j; // 210 + \\ const l = k + c; // 217 + \\ const m = l + d; // 227 + \\ const n = m + e; // 241 + \\ const o = n + f; // 265 + \\ const p = o + g; // 303 + \\ const q = p + h; // 365 + \\ const r = q + i; // 465 + \\ const s = r + j; // 575 + \\ const t = s + k; // 785 + \\ break :blk t; + \\ }; + \\ const y = x + a; // 788 + \\ const z = y + a; // 791 + \\ return z; + \\} + \\ + \\pub fn assert(ok: bool) void { + \\ if (!ok) unreachable; // assertion failure + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("syscall" + \\ : + \\ : [number] "{rax}" (231), + \\ [arg1] "{rdi}" (0) + \\ : "rcx", "r11", "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); + // Character literals and multiline strings. case.addCompareOutput( \\export fn _start() noreturn { diff --git a/tools/process_headers.zig b/tools/process_headers.zig @@ -15,6 +15,7 @@ const Arch = std.Target.Cpu.Arch; const Abi = std.Target.Abi; const OsTag = std.Target.Os.Tag; const assert = std.debug.assert; +const Sha256 = std.crypto.hash.sha2.Sha256; const LibCTarget = struct { name: []const u8, @@ -313,7 +314,7 @@ pub fn main() !void { var max_bytes_saved: usize = 0; var total_bytes: usize = 0; - var hasher = std.crypto.hash.sha2.Sha256.init(.{}); + var hasher = Sha256.init(.{}); for (libc_targets) |libc_target| { const dest_target = DestTarget{ @@ -359,7 +360,7 @@ pub fn main() !void { const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t"); total_bytes += raw_bytes.len; const hash = try allocator.alloc(u8, 32); - hasher.reset(); + hasher = Sha256.init(.{}); hasher.update(rel_path); hasher.update(trimmed); hasher.final(hash); diff --git a/tools/update_glibc.zig b/tools/update_glibc.zig @@ -148,12 +148,12 @@ pub fn main() !void { for (abi_lists) |*abi_list| { const target_funcs_gop = try target_functions.getOrPut(@ptrToInt(abi_list)); if (!target_funcs_gop.found_existing) { - target_funcs_gop.kv.value = FunctionSet{ + target_funcs_gop.entry.value = FunctionSet{ .list = std.ArrayList(VersionedFn).init(allocator), .fn_vers_list = FnVersionList.init(allocator), }; } - const fn_set = &target_funcs_gop.kv.value.list; + const fn_set = &target_funcs_gop.entry.value.list; for (lib_names) |lib_name, lib_name_index| { const lib_prefix = if (std.mem.eql(u8, lib_name, "ld")) "" else "lib"; @@ -203,11 +203,11 @@ pub fn main() !void { _ = try global_ver_set.put(ver, undefined); const gop = try global_fn_set.getOrPut(name); if (gop.found_existing) { - if (!std.mem.eql(u8, gop.kv.value.lib, "c")) { - gop.kv.value.lib = lib_name; + if (!std.mem.eql(u8, gop.entry.value.lib, "c")) { + gop.entry.value.lib = lib_name; } } else { - gop.kv.value = Function{ + gop.entry.value = Function{ .name = name, .lib = lib_name, .index = undefined, @@ -224,14 +224,14 @@ pub fn main() !void { const global_fn_list = blk: { var list = std.ArrayList([]const u8).init(allocator); var it = global_fn_set.iterator(); - while (it.next()) |kv| try list.append(kv.key); + while (it.next()) |entry| try list.append(entry.key); std.sort.sort([]const u8, list.span(), {}, strCmpLessThan); break :blk list.span(); }; const global_ver_list = blk: { var list = std.ArrayList([]const u8).init(allocator); var it = global_ver_set.iterator(); - while (it.next()) |kv| try list.append(kv.key); + while (it.next()) |entry| try list.append(entry.key); std.sort.sort([]const u8, list.span(), {}, versionLessThan); break :blk list.span(); }; @@ -254,9 +254,9 @@ pub fn main() !void { var buffered = std.io.bufferedOutStream(fns_txt_file.outStream()); const fns_txt = buffered.outStream(); for (global_fn_list) |name, i| { - const kv = global_fn_set.get(name).?; - kv.value.index = i; - try fns_txt.print("{} {}\n", .{ name, kv.value.lib }); + const entry = global_fn_set.getEntry(name).?; + entry.value.index = i; + try fns_txt.print("{} {}\n", .{ name, entry.value.lib }); } try buffered.flush(); } @@ -264,16 +264,16 @@ pub fn main() !void { // Now the mapping of version and function to integer index is complete. // Here we create a mapping of function name to list of versions. for (abi_lists) |*abi_list, abi_index| { - const kv = target_functions.get(@ptrToInt(abi_list)).?; - const fn_vers_list = &kv.value.fn_vers_list; - for (kv.value.list.span()) |*ver_fn| { + const entry = target_functions.getEntry(@ptrToInt(abi_list)).?; + const fn_vers_list = &entry.value.fn_vers_list; + for (entry.value.list.span()) |*ver_fn| { const gop = try fn_vers_list.getOrPut(ver_fn.name); if (!gop.found_existing) { - gop.kv.value = std.ArrayList(usize).init(allocator); + gop.entry.value = std.ArrayList(usize).init(allocator); } - const ver_index = global_ver_set.get(ver_fn.ver).?.value; - if (std.mem.indexOfScalar(usize, gop.kv.value.span(), ver_index) == null) { - try gop.kv.value.append(ver_index); + const ver_index = global_ver_set.getEntry(ver_fn.ver).?.value; + if (std.mem.indexOfScalar(usize, gop.entry.value.span(), ver_index) == null) { + try gop.entry.value.append(ver_index); } } } @@ -287,7 +287,7 @@ pub fn main() !void { // first iterate over the abi lists for (abi_lists) |*abi_list, abi_index| { - const fn_vers_list = &target_functions.get(@ptrToInt(abi_list)).?.value.fn_vers_list; + const fn_vers_list = &target_functions.getEntry(@ptrToInt(abi_list)).?.value.fn_vers_list; for (abi_list.targets) |target, it_i| { if (it_i != 0) try abilist_txt.writeByte(' '); try abilist_txt.print("{}-linux-{}", .{ @tagName(target.arch), @tagName(target.abi) }); @@ -295,11 +295,11 @@ pub fn main() !void { try abilist_txt.writeByte('\n'); // next, each line implicitly corresponds to a function for (global_fn_list) |name| { - const kv = fn_vers_list.get(name) orelse { + const entry = fn_vers_list.getEntry(name) orelse { try abilist_txt.writeByte('\n'); continue; }; - for (kv.value.span()) |ver_index, it_i| { + for (entry.value.span()) |ver_index, it_i| { if (it_i != 0) try abilist_txt.writeByte(' '); try abilist_txt.print("{d}", .{ver_index}); }