commit e0173c2ce01890ea71ab7660f2ecdeda29956f77 (tree)
parent bb304796f466b4fd15a4adac7ffbb81ace95d2a4
Author: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 25 Feb 2026 20:23:36 +0100
Merge pull request 'rework fuzz testing to be smith based' (#31205) from gooncreeper/zig:integrated-smith into master
Reviewed-on: https://codeberg.org/ziglang/zig/pulls/31205
Reviewed-by: Andrew Kelley <andrew@ziglang.org>
Diffstat:
29 files changed, 3404 insertions(+), 1544 deletions(-)
diff --git a/build.zig b/build.zig
@@ -85,6 +85,7 @@ pub fn build(b: *std.Build) !void {
docs_step.dependOn(std_docs_step);
const no_matrix = b.option(bool, "no-matrix", "Limit test matrix to exactly one target configuration") orelse false;
+ const fuzz_only = b.option(bool, "fuzz-only", "Limit test matrix to one target suitable for fuzzing") orelse false;
const skip_debug = b.option(bool, "skip-debug", "Main test suite skips debug builds") orelse false;
const skip_release = b.option(bool, "skip-release", "Main test suite skips release builds") orelse no_matrix;
const skip_release_small = b.option(bool, "skip-release-small", "Main test suite skips release-small builds") orelse skip_release;
@@ -417,6 +418,13 @@ pub fn build(b: *std.Build) !void {
}
const optimization_modes = chosen_opt_modes_buf[0..chosen_mode_index];
+ const test_only: ?tests.ModuleTestOptions.TestOnly = if (no_matrix)
+ .default
+ else if (fuzz_only)
+ .{ .fuzz = optimize }
+ else
+ null;
+
const fmt_include_paths = &.{ "lib", "src", "test", "tools", "build.zig", "build.zig.zon" };
const fmt_exclude_paths = &.{ "test/cases", "test/behavior/zon" };
const do_fmt = b.addFmt(.{
@@ -472,7 +480,7 @@ pub fn build(b: *std.Build) !void {
.include_paths = &.{},
.skip_single_threaded = skip_single_threaded,
.skip_non_native = skip_non_native,
- .test_default_only = no_matrix,
+ .test_only = test_only,
.skip_spirv = skip_spirv,
.skip_wasm = skip_wasm,
.skip_freebsd = skip_freebsd,
@@ -497,7 +505,7 @@ pub fn build(b: *std.Build) !void {
.include_paths = &.{},
.skip_single_threaded = true,
.skip_non_native = skip_non_native,
- .test_default_only = no_matrix,
+ .test_only = test_only,
.skip_spirv = skip_spirv,
.skip_wasm = skip_wasm,
.skip_freebsd = skip_freebsd,
@@ -523,7 +531,7 @@ pub fn build(b: *std.Build) !void {
.include_paths = &.{},
.skip_single_threaded = true,
.skip_non_native = skip_non_native,
- .test_default_only = no_matrix,
+ .test_only = test_only,
.skip_spirv = skip_spirv,
.skip_wasm = skip_wasm,
.skip_freebsd = skip_freebsd,
@@ -549,7 +557,7 @@ pub fn build(b: *std.Build) !void {
.include_paths = &.{},
.skip_single_threaded = skip_single_threaded,
.skip_non_native = skip_non_native,
- .test_default_only = no_matrix,
+ .test_only = test_only,
.skip_spirv = skip_spirv,
.skip_wasm = skip_wasm,
.skip_freebsd = skip_freebsd,
diff --git a/lib/build-web/fuzz.zig b/lib/build-web/fuzz.zig
@@ -1,5 +1,6 @@
// Server timestamp.
var start_fuzzing_timestamp: i64 = undefined;
+var start_fuzzing_n_runs: u64 = undefined;
const js = struct {
extern "fuzz" fn requestSources() void;
@@ -36,6 +37,7 @@ pub fn sourceIndexMessage(msg_bytes: []u8) error{OutOfMemory}!void {
const source_locations: []const Coverage.SourceLocation = @alignCast(std.mem.bytesAsSlice(Coverage.SourceLocation, msg_bytes[source_locations_start..source_locations_end]));
start_fuzzing_timestamp = header.start_timestamp;
+ start_fuzzing_n_runs = header.start_n_runs;
try updateCoverageSources(directories, files, source_locations, string_bytes);
js.ready();
}
@@ -271,7 +273,7 @@ fn updateStats() error{OutOfMemory}!void {
const avg_speed: f64 = speed: {
const ns_elapsed: f64 = @floatFromInt(nsSince(start_fuzzing_timestamp));
- const n_runs: f64 = @floatFromInt(hdr.n_runs);
+ const n_runs: f64 = @floatFromInt(hdr.n_runs -% start_fuzzing_n_runs);
break :speed n_runs / (ns_elapsed / std.time.ns_per_s);
};
diff --git a/lib/compiler/build_runner.zig b/lib/compiler/build_runner.zig
@@ -306,7 +306,11 @@ pub fn main(init: process.Init.Minimal) !void {
} else if (mem.eql(u8, arg, "--debug-pkg-config")) {
builder.debug_pkg_config = true;
} else if (mem.eql(u8, arg, "--debug-rt")) {
- graph.debug_compiler_runtime_libs = true;
+ graph.debug_compiler_runtime_libs = .Debug;
+ } else if (mem.cutPrefix(u8, arg, "--debug-rt=")) |rest| {
+ graph.debug_compiler_runtime_libs =
+ std.meta.stringToEnum(std.builtin.OptimizeMode, rest) orelse
+ fatal("unrecognized optimization mode: '{s}'", .{rest});
} else if (mem.eql(u8, arg, "--debug-compile-errors")) {
builder.debug_compile_errors = true;
} else if (mem.eql(u8, arg, "--debug-incremental")) {
diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig
@@ -379,7 +379,7 @@ var fuzz_amount_or_instance: u64 = undefined;
pub fn fuzz(
context: anytype,
- comptime testOne: fn (context: @TypeOf(context), []const u8) anyerror!void,
+ comptime testOne: fn (context: @TypeOf(context), *std.testing.Smith) anyerror!void,
options: testing.FuzzInputOptions,
) anyerror!void {
// Prevent this function from confusing the fuzzer by omitting its own code
@@ -406,12 +406,12 @@ pub fn fuzz(
const global = struct {
var ctx: @TypeOf(context) = undefined;
- fn test_one(input: fuzz_abi.Slice) callconv(.c) void {
+ fn test_one() callconv(.c) void {
@disableInstrumentation();
testing.allocator_instance = .{};
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
log_err_count = 0;
- testOne(ctx, input.toSlice()) catch |err| switch (err) {
+ testOne(ctx, @constCast(&testing.Smith{ .in = null })) catch |err| switch (err) {
error.SkipZigTest => return,
else => {
const stderr = std.debug.lockStderr(&.{}).terminal();
@@ -435,13 +435,11 @@ pub fn fuzz(
const prev_allocator_state = testing.allocator_instance;
testing.allocator_instance = .{};
defer testing.allocator_instance = prev_allocator_state;
-
global.ctx = context;
- fuzz_abi.fuzzer_init_test(&global.test_one, .fromSlice(builtin.test_functions[fuzz_test_index].name));
+ fuzz_abi.fuzzer_set_test(&global.test_one, .fromSlice(builtin.test_functions[fuzz_test_index].name));
for (options.corpus) |elem|
fuzz_abi.fuzzer_new_input(.fromSlice(elem));
-
fuzz_abi.fuzzer_main(fuzz_mode, fuzz_amount_or_instance);
return;
}
@@ -449,10 +447,12 @@ pub fn fuzz(
// When the unit test executable is not built in fuzz mode, only run the
// provided corpus.
for (options.corpus) |input| {
- try testOne(context, input);
+ var smith: testing.Smith = .{ .in = input };
+ try testOne(context, &smith);
}
// In case there is no provided corpus, also use an empty
// string as a smoke test.
- try testOne(context, "");
+ var smith: testing.Smith = .{ .in = "" };
+ try testOne(context, &smith);
}
diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig
@@ -1,15 +1,13 @@
const builtin = @import("builtin");
-const native_endian = builtin.cpu.arch.endian();
const std = @import("std");
const Io = std.Io;
-const fatal = std.process.fatal;
const mem = std.mem;
const math = std.math;
-const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const panic = std.debug.panic;
const abi = std.Build.abi.fuzz;
+const Uid = abi.Uid;
pub const std_options = std.Options{
.logFn = logOverride,
@@ -23,8 +21,7 @@ fn logOverride(
comptime format: []const u8,
args: anytype,
) void {
- const f = log_f orelse
- panic("attempt to use log before initialization, message:\n" ++ format, args);
+ const f = log_f orelse panic("log before initialization, message:\n" ++ format, args);
f.lock(io, .exclusive) catch |e| panic("failed to lock logging file: {t}", .{e});
defer f.unlock(io);
@@ -48,10 +45,9 @@ const gpa = switch (builtin.mode) {
.ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
};
-/// Part of `exec`, however seperate to allow it to be set before `exec` is.
+// Seperate from `exec` to allow initialization before `exec` is.
var log_f: ?Io.File = null;
-var exec: Executable = .preinit;
-var inst: Instrumentation = .preinit;
+var exec: Executable = undefined;
var fuzzer: Fuzzer = undefined;
var current_test_name: ?[]const u8 = null;
@@ -60,36 +56,28 @@ fn bitsetUsizes(elems: usize) usize {
}
const Executable = struct {
- /// Tracks the hit count for each pc as updated by the process's instrumentation.
+ /// Tracks the hit count for each pc as updated by the test's instrumentation.
pc_counters: []u8,
cache_f: Io.Dir,
/// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
/// while the fuzzer is running.
- shared_seen_pcs: MemoryMappedList,
+ shared_seen_pcs: []align(std.heap.page_size_min) volatile u8,
/// Hash of pcs used to uniquely identify the shared coverage file
pc_digest: u64,
- /// A minimal state for this struct which instrumentation can function on.
- /// Used before this structure is initialized to avoid illegal behavior
- /// from instrumentation functions being called and using undefined values.
- pub const preinit: Executable = .{
- .pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
- .cache_f = undefined,
- .shared_seen_pcs = undefined,
- .pc_digest = undefined,
- };
-
- fn getCoverageFile(cache_dir: Io.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
- const pc_bitset_usizes = bitsetUsizes(pcs.len);
- const coverage_file_name = std.fmt.hex(pc_digest);
- comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
- comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
+ fn getCoverageMap(
+ cache_dir: Io.Dir,
+ pcs: []const usize,
+ pc_digest: u64,
+ ) []align(std.heap.page_size_min) volatile u8 {
+ const file_name = std.fmt.hex(pc_digest);
var v = cache_dir.createDirPathOpen(io, "v", .{}) catch |e|
panic("failed to create directory 'v': {t}", .{e});
defer v.close(io);
- const coverage_file, const populate = if (v.createFile(io, &coverage_file_name, .{
+
+ const coverage_file, const populate = if (v.createFile(io, &file_name, .{
.read = true,
// If we create the file, we want to block other processes while we populate it
.lock = .exclusive,
@@ -97,71 +85,76 @@ const Executable = struct {
})) |f|
.{ f, true }
else |e| switch (e) {
- error.PathAlreadyExists => .{ v.openFile(io, &coverage_file_name, .{
+ error.PathAlreadyExists => .{ v.openFile(io, &file_name, .{
.mode = .read_write,
.lock = .shared,
}) catch |e2| panic(
"failed to open existing coverage file '{s}': {t}",
- .{ &coverage_file_name, e2 },
+ .{ &file_name, e2 },
), false },
- else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }),
+ else => panic("failed to create coverage file '{s}': {t}", .{ &file_name, e }),
};
+ comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
+ comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
+ const pc_bitset_usizes = bitsetUsizes(pcs.len);
const coverage_file_len = @sizeOf(abi.SeenPcsHeader) +
pc_bitset_usizes * @sizeOf(usize) +
pcs.len * @sizeOf(usize);
if (populate) {
- defer coverage_file.lock(io, .shared) catch |e| panic(
- "failed to demote lock for coverage file '{s}': {t}",
- .{ &coverage_file_name, e },
- );
- var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic(
- "failed to init memory map for coverage file '{s}': {t}",
- .{ &coverage_file_name, e },
- );
- map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{
- .n_runs = 0,
- .unique_runs = 0,
- .pcs_len = pcs.len,
- }));
- map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
- // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
- // applied). We need to translate these to the virtual addresses as on disk.
- for (pcs) |pc| {
- const pc_vaddr = fuzzer_unslide_address(pc);
- map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr));
- }
- return map;
+ coverage_file.setLength(io, coverage_file_len) catch |e|
+ panic("failed to resize new coverage file '{s}': {t}", .{ &file_name, e });
} else {
const size = coverage_file.length(io) catch |e|
- panic("failed to stat coverage file '{s}': {t}", .{ &coverage_file_name, e });
+ panic("failed to stat coverage file '{s}': {t}", .{ &file_name, e });
if (size != coverage_file_len) panic(
"incompatible existing coverage file '{s}' (differing lengths: {} != {})",
- .{ &coverage_file_name, size, coverage_file_len },
+ .{ &file_name, size, coverage_file_len },
);
+ }
- const map = MemoryMappedList.init(
- coverage_file,
- coverage_file_len,
- coverage_file_len,
- ) catch |e| panic(
- "failed to init memory map for coverage file '{s}': {t}",
- .{ &coverage_file_name, e },
- );
+ var io_map = coverage_file.createMemoryMap(io, .{ .len = coverage_file_len }) catch |e|
+ panic("failed to memmap coverage file '{s}': {t}", .{ &file_name, e });
+ const map = io_map.memory;
- const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items));
- if (seen_pcs_header.pcs_len != pcs.len) panic(
- "incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
- .{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len },
+ const header: *abi.SeenPcsHeader = @ptrCast(map[0..@sizeOf(abi.SeenPcsHeader)]);
+ const trailing = map[@sizeOf(abi.SeenPcsHeader)..];
+ const trailing_bitset_end = pc_bitset_usizes * @sizeOf(usize);
+ const trailing_bitset: []usize = @ptrCast(@alignCast(trailing[0..trailing_bitset_end]));
+ const trailing_addresses: []usize = @ptrCast(@alignCast(trailing[trailing_bitset_end..]));
+
+ if (populate) {
+ header.* = .{
+ .n_runs = 0,
+ .unique_runs = 0,
+ .pcs_len = pcs.len,
+ };
+ @memset(trailing_bitset, 0);
+ for (trailing_addresses, pcs) |*cov_pc, slided_pc| {
+ cov_pc.* = fuzzer_unslide_address(slided_pc);
+ }
+ io_map.write(io) catch |e|
+ panic("failed to write memory map of '{s}': {t}", .{ &file_name, e });
+
+ coverage_file.lock(io, .shared) catch |e| panic(
+ "failed to demote lock for coverage file '{s}': {t}",
+ .{ &file_name, e },
);
- if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic(
- "incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
- .{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] },
+ } else { // Check expected contents
+ if (header.pcs_len != pcs.len) panic(
+ "incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
+ .{ &file_name, header.pcs_len, pcs.len },
);
-
- return map;
+ for (0.., header.pcAddrs(), pcs) |i, cov_pc, slided_pc| {
+ const pc = fuzzer_unslide_address(slided_pc);
+ if (cov_pc != pc) panic(
+ "incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
+ .{ &file_name, i, cov_pc, pc },
+ );
+ }
}
+ return map;
}
pub fn init(cache_dir_path: []const u8) Executable {
@@ -230,7 +223,7 @@ const Executable = struct {
}
break :digest h.final();
};
- self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
+ self.shared_seen_pcs = getCoverageMap(cache_dir, pcs, self.pc_digest);
return self;
}
@@ -244,14 +237,14 @@ const Executable = struct {
index: usize = 0,
pc_counters: []u8,
- pub fn next(self: *PcBitsetIterator) usize {
- const rest = self.pc_counters[self.index..];
+ pub fn next(i: *PcBitsetIterator) usize {
+ const rest = i.pc_counters[i.index..];
if (rest.len >= @bitSizeOf(usize)) {
- defer self.index += @bitSizeOf(usize);
+ defer i.index += @bitSizeOf(usize);
const V = @Vector(@bitSizeOf(usize), u8);
return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*));
} else if (rest.len != 0) {
- defer self.index += rest.len;
+ defer i.index += rest.len;
var res: usize = 0;
for (0.., rest) |bit_index, byte| {
res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index));
@@ -260,155 +253,414 @@ const Executable = struct {
} else unreachable;
}
};
+
+ pub fn seenPcsHeader(e: Executable) *align(std.heap.page_size_min) volatile abi.SeenPcsHeader {
+ return mem.bytesAsValue(
+ abi.SeenPcsHeader,
+ e.shared_seen_pcs[0..@sizeOf(abi.SeenPcsHeader)],
+ );
+ }
};
-/// Data gathered from instrumentation functions.
-/// Seperate from Executable since its state is resetable and changes.
-/// Seperate from Fuzzer since it may be needed before fuzzing starts.
-const Instrumentation = struct {
- /// Bitset of seen pcs across all runs excluding fresh pcs.
- /// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
- /// it which causes contention and unrelated pcs to our campaign being set.
- seen_pcs: []usize,
+const Fuzzer = struct {
+ // The default PRNG is not used here since going through `Random` can be very expensive
+ // since LLVM often fails to devirtualize and inline `fill`. Additionally, optimization
+ // is simpler since integers are not serialized then deserialized in the random stream.
+ //
+ // This acounts for a 30% performance improvement with LLVM 21.
+ xoshiro: std.Random.Xoshiro256,
+ test_one: abi.TestOne,
- /// Stores a fresh input's new pcs
- fresh_pcs: []usize,
-
- /// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
- /// have been called from and have had their already been added to const_x_vals
- const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
- /// Values that have been constant operands in comparisons and switch cases.
- /// There may be duplicates in this array if they came from different addresses, which is
- /// fine as they are likely more important and hence more likely to be selected.
- const_vals2: std.ArrayList(u16) = .empty,
- const_vals4: std.ArrayList(u32) = .empty,
- const_vals8: std.ArrayList(u64) = .empty,
- const_vals16: std.ArrayList(u128) = .empty,
-
- /// A minimal state for this struct which instrumentation can function on.
- /// Used before this structure is initialized to avoid illegal behavior
- /// from instrumentation functions being called and using undefined values.
- pub const preinit: Instrumentation = .{
- .seen_pcs = undefined, // currently only updated by `Fuzzer`
- .fresh_pcs = undefined,
+ seen_pcs: []usize,
+ bests: struct {
+ len: u32,
+ quality_buf: []Input.Best,
+ input_buf: []Input.Best.Map,
+ },
+ seen_uids: std.ArrayHashMapUnmanaged(Uid, struct {
+ slices: union {
+ ints: std.ArrayList([]u64),
+ bytes: std.ArrayList(Input.Data.Bytes),
+ },
+ }, Uid.hashmap_ctx, false),
+
+ /// Past inputs leading to new pc or uid hits.
+ /// These are randomly mutated in round-robin fashion.
+ corpus: std.MultiArrayList(Input),
+ corpus_pos: Input.Index,
+
+ bytes_input: std.testing.Smith,
+ input_builder: Input.Builder,
+ /// Number of data calls the current run has made.
+ req_values: u32,
+ /// Number of bytes provided to the current run.
+ req_bytes: u32,
+ /// Index into the uid slices the current run is at.
+ /// `uid_data_i[i]` corresponds to `corpus[corpus_pos].data.uid_slices.values()[i]`.
+ uid_data_i: std.ArrayList(u32),
+ mut_data: struct {
+ /// Untyped indexes of `corpus[corpus_pos].data` that should be mutated.
+ ///
+ /// If an index appears multiple times, the first should be prioritized.
+ i: [4]u32,
+ /// For mutations which are a sequential mutation, the state is stored here.
+ seq: [4]struct {
+ kind: packed struct {
+ class: enum(u1) { replace, insert },
+ copy: bool,
+ /// If set then `.copy = true` and `.class = .replace`
+ ordered_mutate: bool,
+ /// If set then all other bits are undefined
+ none: bool,
+ },
+ len: u32,
+ copy: SeqCopy,
+ },
+ },
+
+ /// As values are provided to the Smith, they are appended to this. If the test
+ /// crashes, this can be recovered and used to obtain the crashing values.
+ mmap_input: MemoryMappedInput,
+ /// Filesystem directory containing found inputs for future runs
+ corpus_dir: Io.Dir,
+ /// The values in `corpus` past this point directly correspond to what is found
+ /// in `corpus_dir`.
+ start_corpus_dir: u32,
+
+ const SeqCopy = union {
+ order_i: u32,
+ ints: []u64,
+ bytes: Input.Data.Bytes,
};
- pub fn depreinit(self: *Instrumentation) void {
- self.const_vals2.deinit(gpa);
- self.const_vals4.deinit(gpa);
- self.const_vals8.deinit(gpa);
- self.const_vals16.deinit(gpa);
- self.* = undefined;
- }
+ const Input = struct {
+ /// Untyped indexes into this are formed as follows: If the index is less than `ints.len`
+ /// it indexes into `ints`, otherwise it indexes into `bytes` subtracted by `ints.len`.
+ /// `math.maxInt(u32)` is reserved and impossible normally.
+ data: Data,
+ /// Corresponds with `data.uid_slices`.
+ /// Values are the indexes of `seen_uids` with the same uid.
+ seen_uid_i: []u32,
+ /// Used to select a random uid to mutate from.
+ ///
+ /// The number of times a uid is present in this array is logarithmic
+ /// to its data length in order to avoid long inputs from only being
+ /// selected while still having some bias towards longer ones.
+ weighted_uid_slice_i: []u32,
+
+ ref: struct {
+ /// Values are indexes of `Fuzzer.bests`.
+ best_i_buf: []u32,
+ best_i_len: u32,
+ },
+
+ pub const Data = struct {
+ uid_slices: Data.UidSlices,
+ ints: []u64,
+ bytes: Bytes,
+ /// Contains untyped indexes in the order they were requested.
+ order: []u32,
+
+ pub const Bytes = struct {
+ entries: []Entry,
+ table: []u8,
+
+ pub const Entry = struct {
+ off: u32,
+ len: u32,
+ };
- pub fn init() Instrumentation {
- const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
- const alloc_usizes = pc_bitset_usizes * 2;
- const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
- var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
- const fba = fba_ctx.allocator();
+ pub fn deinit(b: Bytes) void {
+ gpa.free(b.entries);
+ gpa.free(b.table);
+ }
+ };
- var self: Instrumentation = .{
- .seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
- .fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
+ pub const UidSlices = std.ArrayHashMapUnmanaged(Uid, struct {
+ base: u32,
+ len: u32,
+ }, Uid.hashmap_ctx, false);
};
- self.reset();
- return self;
- }
- pub fn reset(self: *Instrumentation) void {
- @memset(self.seen_pcs, 0);
- @memset(self.fresh_pcs, 0);
- self.const_pcs.clearRetainingCapacity();
- self.const_vals2.clearRetainingCapacity();
- self.const_vals4.clearRetainingCapacity();
- self.const_vals8.clearRetainingCapacity();
- self.const_vals16.clearRetainingCapacity();
- }
+ pub fn deinit(i: *Input) void {
+ i.data.uid_slices.deinit(gpa);
+ gpa.free(i.data.ints);
+ i.data.bytes.deinit();
+ gpa.free(i.data.order);
+ gpa.free(i.seen_uid_i);
+ gpa.free(i.weighted_uid_slice_i);
+ gpa.free(i.ref.best_i_buf);
+ i.* = undefined;
+ }
- /// If false is returned, then the pc is marked as seen
- pub fn constPcSeen(self: *Instrumentation, pc: usize) bool {
- return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
- }
+ pub const none: Input = .{
+ .data = .{
+ .uid_slices = .empty,
+ .ints = &.{},
+ .bytes = .{
+ .entries = &.{},
+ .table = undefined,
+ },
+ .order = &.{},
+ },
+ .seen_uid_i = &.{},
+ .weighted_uid_slice_i = &.{},
- pub fn isFresh(self: *Instrumentation) bool {
- var hit_pcs = exec.pcBitsetIterator();
- for (self.seen_pcs) |seen_pcs| {
- if (hit_pcs.next() & ~seen_pcs != 0) return true;
- }
+ // Empty input is not referenced by `Fuzzer`
+ .ref = undefined,
+ };
- return false;
- }
+ pub const Index = enum(u32) {
+ pub const reserved_start: Index = .bytes_dry;
+ /// Only touches `Fuzzer.smith`.
+ bytes_dry = math.maxInt(u32) - 1,
+ /// Only touches `Fuzzer.smith` and `Fuzzer.input_builder`.
+ bytes_fresh = math.maxInt(u32),
+ _,
+ };
- /// Updates `fresh_pcs`
- pub fn setFresh(self: *Instrumentation) void {
- var hit_pcs = exec.pcBitsetIterator();
- for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
- fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
- }
- }
+ pub const Best = struct {
+ pc: u32,
+ min: Quality,
+ max: Quality,
+
+ /// Order of significance:
+ /// * n_pcs
+ /// * req.values
+ /// * req.bytes
+ pub const Quality = struct {
+ n_pcs: u32,
+ req: packed struct(u64) {
+ bytes: u32,
+ values: u32,
+
+ pub fn int(r: @This()) u64 {
+ return @bitCast(r);
+ }
+ },
- /// Returns if `exec.pc_counters` is a superset of `fresh_pcs`.
- pub fn atleastFresh(self: *Instrumentation) bool {
- var hit_pcs = exec.pcBitsetIterator();
- for (self.fresh_pcs) |fresh_pcs| {
- if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
- }
- return true;
- }
+ pub fn betterLess(a: Quality, b: Quality) bool {
+ return (a.n_pcs < b.n_pcs) | ((a.n_pcs == b.n_pcs) & (a.req.int() < b.req.int()));
+ }
- /// Updates based off `fresh_pcs`
- fn updateSeen(self: *Instrumentation) void {
- comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
- const shared_seen_pcs: [*]volatile usize = @ptrCast(
- exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr,
- );
+ pub fn betterMore(a: Quality, b: Quality) bool {
+ return (a.n_pcs > b.n_pcs) | ((a.n_pcs == b.n_pcs) & (a.req.int() < b.req.int()));
+ }
+ };
- for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| {
- seen.* |= fresh;
- if (fresh != 0)
- _ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
- }
- }
-};
+ pub const Map = struct {
+ min: Input.Index,
+ max: Input.Index,
+ };
+ };
-const Fuzzer = struct {
- arena_ctx: std.heap.ArenaAllocator = .init(gpa),
- rng: std.Random.DefaultPrng = .init(0),
- test_one: abi.TestOne,
- /// The next input that will be given to the testOne function. When the
- /// current process crashes, this memory-mapped file is used to recover the
- /// input.
- input: MemoryMappedList,
-
- /// Minimized past inputs leading to new pc hits.
- /// These are randomly mutated in round-robin fashion
- /// Element zero is always an empty input. It is gauraunteed no other elements are empty.
- corpus: std.ArrayList([]const u8),
- corpus_pos: usize,
- /// List of past mutations that have led to new inputs. This way, the mutations that are the
- /// most effective are the most likely to be selected again. Starts with one of each mutation.
- mutations: std.ArrayList(Mutation) = .empty,
+ pub const Builder = struct {
+ uid_slices: std.ArrayHashMapUnmanaged(Uid, union {
+ ints: std.MultiArrayList(struct {
+ value: u64,
+ order_i: u32,
+ }),
+ bytes: std.MultiArrayList(struct {
+ value: Data.Bytes.Entry,
+ order_i: u32,
+ }),
+ }, Uid.hashmap_ctx, false),
+ bytes_table: std.ArrayList(u8),
+ // These will not overflow due to the 32-bit constraint on `MemoryMappedInput`
+ total_ints: u32,
+ total_bytes: u32,
+ weighted_len: u32,
+ /// Used to ensure that the 32-bit constraint in
+ /// `MemoryMappedInput` applies to this run.
+ smithed_len: u32,
+
+ pub const init: Builder = .{
+ .uid_slices = .empty,
+ .bytes_table = .empty,
+ .total_ints = 0,
+ .total_bytes = 0,
+ .weighted_len = 0,
+ .smithed_len = 4,
+ };
- /// Filesystem directory containing found inputs for future runs
- corpus_dir: Io.Dir,
- corpus_dir_idx: usize = 0,
+ pub fn addInt(b: *Builder, uid: Uid, int: u64) void {
+ const u = &b.uid_slices;
+ const gop = u.getOrPutValue(gpa, uid, .{ .ints = .empty }) catch @panic("OOM");
+ gop.value_ptr.ints.append(gpa, .{
+ .value = int,
+ .order_i = b.total_ints + b.total_bytes,
+ }) catch @panic("OOM");
+ b.total_ints += 1;
+ b.weighted_len += @intFromBool(math.isPowerOfTwo(gop.value_ptr.ints.len));
+ }
+
+ pub fn addBytes(b: *Builder, uid: Uid, bytes: []const u8) void {
+ const u = &b.uid_slices;
+ const gop = u.getOrPutValue(gpa, uid, .{ .bytes = .empty }) catch @panic("OOM");
+ gop.value_ptr.bytes.append(gpa, .{
+ .value = .{
+ .off = @intCast(b.bytes_table.items.len),
+ .len = @intCast(bytes.len),
+ },
+ .order_i = b.total_ints + b.total_bytes,
+ }) catch @panic("OOM");
+ b.bytes_table.appendSlice(gpa, bytes) catch @panic("OOM");
+ b.total_bytes += 1;
+ b.weighted_len += @intFromBool(math.isPowerOfTwo(gop.value_ptr.bytes.len));
+ }
+
+ pub fn checkSmithedLen(b: *Builder, n: usize) void {
+ const n32 = @min(n, math.maxInt(u32)); // second will overflow
+ b.smithed_len, const ov = @addWithOverflow(b.smithed_len, n32);
+ if (ov == 1) @panic("too much smith data requested (non-deterministic)");
+ }
+
+ /// Additionally resets the state of this structure.
+ ///
+ /// The callee must populate
+ /// * `.seen_uid_i`
+ /// * `.ref`
+ pub fn build(b: *Builder) Input {
+ const uid_slices = b.uid_slices.entries.slice();
+ var input: Input = .{
+ .data = .{
+ .uid_slices = Data.UidSlices.init(gpa, uid_slices.items(.key), &.{}) catch
+ @panic("OOM"),
+ .ints = gpa.alloc(u64, b.total_ints) catch @panic("OOM"),
+ .bytes = .{
+ .entries = gpa.alloc(Data.Bytes.Entry, b.total_bytes) catch @panic("OOM"),
+ .table = b.bytes_table.toOwnedSlice(gpa) catch @panic("OOM"),
+ },
+ .order = gpa.alloc(u32, b.total_ints + b.total_bytes) catch @panic("OOM"),
+ },
+ .seen_uid_i = gpa.alloc(u32, uid_slices.len) catch @panic("OOM"),
+ .weighted_uid_slice_i = gpa.alloc(u32, b.weighted_len) catch @panic("OOM"),
+ .ref = undefined,
+ };
+ var ints_pos: u32 = 0;
+ var bytes_pos: u32 = 0;
+ var weighted_pos: u32 = 0;
+
+ assert(mem.eql(Uid, uid_slices.items(.key), input.data.uid_slices.keys()));
+ for (
+ 0..,
+ uid_slices.items(.key),
+ uid_slices.items(.value),
+ input.data.uid_slices.values(),
+ ) |uid_i, uid, *uid_data, *slice| {
+ const weighted_len = 1 + math.log2_int(u32, len: switch (uid.kind) {
+ .int => {
+ const ints = uid_data.ints.slice();
+ @memcpy(input.data.ints[ints_pos..][0..ints.len], ints.items(.value));
+ for (ints.items(.order_i), ints_pos..) |order_i, data_i| {
+ input.data.order[order_i] = @intCast(data_i);
+ }
+ uid_data.ints.deinit(gpa);
+ slice.* = .{ .base = ints_pos, .len = @intCast(ints.len) };
+ ints_pos += @intCast(ints.len);
+ break :len @intCast(ints.len);
+ },
+ .bytes => {
+ const bytes = uid_data.bytes.slice();
+ @memcpy(
+ input.data.bytes.entries[bytes_pos..][0..bytes.len],
+ bytes.items(.value),
+ );
+ for (
+ bytes.items(.order_i),
+ b.total_ints + bytes_pos..,
+ ) |order_i, data_i| {
+ input.data.order[order_i] = @intCast(data_i);
+ }
+ uid_data.bytes.deinit(gpa);
+ slice.* = .{ .base = bytes_pos, .len = @intCast(bytes.len) };
+ bytes_pos += @intCast(bytes.len);
+ break :len @intCast(bytes.len);
+ },
+ });
+ const weighted = input.weighted_uid_slice_i[weighted_pos..][0..weighted_len];
+ @memset(weighted, @intCast(uid_i));
+ weighted_pos += weighted_len;
+ }
+
+ assert(ints_pos == b.total_ints);
+ assert(bytes_pos == b.total_bytes);
+ assert(weighted_pos == b.weighted_len);
+
+ b.uid_slices.clearRetainingCapacity();
+ b.total_ints = 0;
+ b.total_bytes = 0;
+ b.weighted_len = 0;
+ b.smithed_len = 4;
+ return input;
+ }
+ };
+ };
+
+ pub fn init() Fuzzer {
+ if (exec.pc_counters.len > math.maxInt(u32)) @panic("too many pcs");
+ const f: Fuzzer = .{
+ .xoshiro = .init(0),
+ .test_one = undefined,
+
+ .seen_pcs = gpa.alloc(usize, bitsetUsizes(exec.pc_counters.len)) catch @panic("OOM"),
+ .bests = .{
+ .len = 0,
+ .quality_buf = gpa.alloc(Input.Best, exec.pc_counters.len) catch @panic("OOM"),
+ .input_buf = gpa.alloc(Input.Best.Map, exec.pc_counters.len) catch @panic("OOM"),
+ },
+ .seen_uids = .empty,
- pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer {
- var self: Fuzzer = .{
- .test_one = test_one,
- .input = undefined,
.corpus = .empty,
- .corpus_pos = 0,
- .mutations = .empty,
+ .corpus_pos = undefined,
+
+ .bytes_input = undefined,
+ .input_builder = .init,
+ .req_values = undefined,
+ .req_bytes = undefined,
+ .uid_data_i = .empty,
+ .mut_data = undefined,
+
+ .mmap_input = undefined,
.corpus_dir = undefined,
+ .start_corpus_dir = undefined,
};
- const arena = self.arena_ctx.allocator();
+ @memset(f.seen_pcs, 0);
+ return f;
+ }
- self.corpus_dir = exec.cache_f.createDirPathOpen(io, unit_test_name, .{}) catch |e|
+ /// May only be called after `f.setTest` has been called
+ pub fn reset(f: *Fuzzer) void {
+ f.test_one = undefined;
+
+ @memset(f.seen_pcs, 0);
+ f.bests.len = 0;
+ @memset(f.bests.quality_buf, undefined);
+ @memset(f.bests.input_buf, undefined);
+ for (f.seen_uids.keys(), f.seen_uids.values()) |uid, *u| {
+ switch (uid.kind) {
+ .int => u.slices.ints.deinit(gpa),
+ .bytes => u.slices.bytes.deinit(gpa),
+ }
+ }
+ f.seen_uids.clearRetainingCapacity();
+
+ f.corpus.clearRetainingCapacity();
+ f.corpus_pos = undefined;
+
+ f.uid_data_i.clearRetainingCapacity();
+
+ f.mmap_input.deinit();
+ f.corpus_dir.close(io);
+ f.start_corpus_dir = undefined;
+ }
+
+ pub fn setTest(f: *Fuzzer, test_one: abi.TestOne, unit_test_name: []const u8) void {
+ f.test_one = test_one;
+ f.corpus_dir = exec.cache_f.createDirPathOpen(io, unit_test_name, .{}) catch |e|
panic("failed to open directory '{s}': {t}", .{ unit_test_name, e });
- self.input = in: {
- const f = self.corpus_dir.createFile(io, "in", .{
+ f.mmap_input = map: {
+ const input = f.corpus_dir.createFile(io, "in", .{
.read = true,
.truncate = false,
// In case any other fuzz tests are running under the same test name,
@@ -419,181 +671,979 @@ const Fuzzer = struct {
error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"),
else => panic("failed to create input file 'in': {t}", .{e}),
};
- const size = f.length(io) catch |e| panic("failed to stat input file 'in': {t}", .{e});
- const map = (if (size < std.heap.page_size_max)
- MemoryMappedList.create(f, 8, std.heap.page_size_max)
- else
- MemoryMappedList.init(f, size, size)) catch |e|
- panic("failed to memory map input file 'in': {t}", .{e});
-
- // Perform a dry-run of the stored input if there was one in case it might reproduce a
- // crash.
- const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*);
- if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) {
- test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len])));
+
+ var size = input.length(io) catch |e| panic("failed to stat input file 'in': {t}", .{e});
+ if (size < std.heap.page_size_max) {
+ size = std.heap.page_size_max;
+ input.setLength(io, size) catch |e| panic("failed to resize input file 'in': {t}", .{e});
}
- break :in map;
+ break :map MemoryMappedInput.init(input, size) catch |e|
+ panic("failed to memmap input file 'in': {t}", .{e});
};
- inst.reset();
- self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM");
- // Ensure there is never an empty corpus. Additionally, an empty input usually leads to
- // new inputs.
- self.addInput(&.{});
+ // Perform a dry-run of the stored input in case it might reproduce a crash.
+ const len = mem.readInt(u32, f.mmap_input.mmap.memory[0..4], .little);
+ if (len < f.mmap_input.mmap.memory[4..].len) {
+ f.mmap_input.len = len;
+ f.runBytes(f.mmap_input.inputSlice(), .bytes_dry);
+ f.mmap_input.clearRetainingCapacity();
+ }
+ }
+ pub fn loadCorpus(f: *Fuzzer) void {
+ f.corpus_pos = @enumFromInt(f.corpus.len);
+ f.corpus.append(gpa, .none) catch @panic("OOM"); // Also ensures the corpus is not empty
+ f.start_corpus_dir = @intCast(f.corpus.len);
while (true) {
- var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
- const bytes = self.corpus_dir.readFileAlloc(
- io,
- std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
- arena,
- .unlimited,
- ) catch |e| switch (e) {
+ var name_buf: [8]u8 = undefined;
+ const name = f.corpusFileName(&name_buf, @enumFromInt(f.corpus.len));
+ const bytes = f.corpus_dir.readFileAlloc(io, name, gpa, .unlimited) catch |e| switch (e) {
error.FileNotFound => break,
- else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }),
+ else => panic("failed to read corpus file '{s}': {t}", .{ name, e }),
};
- // No corpus file of length zero will ever be created
- if (bytes.len == 0)
- panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx});
- self.addInput(bytes);
- self.corpus_dir_idx += 1;
+ defer gpa.free(bytes);
+ f.newInput(bytes, false);
}
+ f.corpus_pos = @enumFromInt(0);
+ }
- return self;
+ fn corpusFileName(f: *Fuzzer, buf: *[8]u8, i: Input.Index) []u8 {
+ const dir_i = @intFromEnum(i) - f.start_corpus_dir;
+ return std.fmt.bufPrint(buf, "{x}", .{dir_i}) catch unreachable;
}
- pub fn deinit(self: *Fuzzer) void {
- self.input.deinit();
- self.corpus.deinit(gpa);
- self.mutations.deinit(gpa);
- self.corpus_dir.close(io);
- self.arena_ctx.deinit();
- self.* = undefined;
+ fn rngInt(f: *Fuzzer, T: type) T {
+ comptime assert(@bitSizeOf(T) <= 64);
+ const Unsigned = @Int(.unsigned, @bitSizeOf(T));
+ return @bitCast(@as(Unsigned, @truncate(f.xoshiro.next())));
}
- pub fn addInput(self: *Fuzzer, bytes: []const u8) void {
- self.corpus.append(gpa, bytes) catch @panic("OOM");
- self.input.clearRetainingCapacity();
- self.input.ensureTotalCapacity(8 + bytes.len) catch |e|
- panic("could not resize shared input file: {t}", .{e});
- self.input.items.len = 8;
- self.input.appendSliceAssumeCapacity(bytes);
- self.run();
- inst.setFresh();
- inst.updateSeen();
+ fn rngLessThan(f: *Fuzzer, T: type, limit: T) T {
+ return std.Random.limitRangeBiased(T, f.rngInt(T), limit);
+ }
+
+ /// Used for generating small values rather than making many calls into the prng.
+ const SmallEntronopy = struct {
+ bits: u64,
+
+ pub fn take(e: *SmallEntronopy, T: type) T {
+ defer e.bits >>= @bitSizeOf(T);
+ return @truncate(e.bits);
+ }
+ };
+
+ fn isFresh(f: *Fuzzer) bool {
+ // Store as a bool instead of returning immediately to aid optimizations
+ // by reducing branching since a fresh input is the unlikely case.
+ var fresh: bool = false;
+
+ var n_pcs: u32 = 0;
+ var hit_pcs = exec.pcBitsetIterator();
+ for (f.seen_pcs) |seen| {
+ const hits = hit_pcs.next();
+ fresh |= hits & ~seen != 0;
+ n_pcs += @popCount(hits);
+ }
+
+ const quality: Input.Best.Quality = .{
+ .n_pcs = n_pcs,
+ .req = .{
+ .values = f.req_values,
+ .bytes = f.req_bytes,
+ },
+ };
+ for (f.bests.quality_buf[0..f.bests.len]) |best| {
+ if (exec.pc_counters[best.pc] == 0) continue;
+ fresh |= quality.betterLess(best.min) | quality.betterMore(best.max);
+ }
+
+ return fresh;
+ }
+
+ fn runBytes(f: *Fuzzer, bytes: []const u8, mode: Input.Index) void {
+ assert(mode == .bytes_dry or mode == .bytes_fresh);
+
+ f.bytes_input = .{ .in = bytes };
+ f.corpus_pos = mode;
+ f.run(0); // 0 since `f.uid_data` is unused
+ }
+
+ fn updateSeenPcs(f: *Fuzzer) void {
+ comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
+ const shared_seen_pcs: [*]volatile usize = @ptrCast(
+ exec.shared_seen_pcs[@sizeOf(abi.SeenPcsHeader)..].ptr,
+ );
+
+ var hit_pcs = exec.pcBitsetIterator();
+ for (f.seen_pcs, shared_seen_pcs) |*seen, *shared_seen| {
+ const new = hit_pcs.next() & ~seen.*;
+ if (new != 0) {
+ seen.* |= new;
+ _ = @atomicRmw(usize, shared_seen, .Or, new, .monotonic);
+ }
+ }
+ }
+
+ fn removeBest(f: *Fuzzer, i: Input.Index, best_i: u32, modify_fs_corpus: bool) void {
+ const ref = &f.corpus.items(.ref)[@intFromEnum(i)];
+ const list_i = mem.indexOfScalar(u32, ref.best_i_buf[0..ref.best_i_len], best_i).?;
+ ref.best_i_len -= 1;
+ ref.best_i_buf[list_i] = ref.best_i_buf[ref.best_i_len];
+
+ if (ref.best_i_len == 0 and @intFromEnum(i) >= f.start_corpus_dir and modify_fs_corpus) {
+ // The input is no longer valuable, so remove it.
+ var removed_input = f.corpus.get(@intFromEnum(i));
+ for (
+ removed_input.data.uid_slices.keys(),
+ removed_input.data.uid_slices.values(),
+ removed_input.seen_uid_i,
+ ) |uid, slice, seen_uid_i| {
+ switch (uid.kind) {
+ .int => {
+ const seen_ints = &f.seen_uids.values()[seen_uid_i].slices.ints;
+ const removed_ints = removed_input.data.ints[slice.base..][0..slice.len];
+ _ = seen_ints.swapRemove(for (0.., seen_ints.items) |idx, ints| {
+ if (removed_ints.ptr == ints.ptr) {
+ assert(removed_ints.len == ints.len);
+ break idx;
+ }
+ } else unreachable);
+ },
+ .bytes => {
+ const seen_bytes = &f.seen_uids.values()[seen_uid_i].slices.bytes;
+ const removed_bytes: Input.Data.Bytes = .{
+ .entries = removed_input.data.bytes.entries[slice.base..][0..slice.len],
+ .table = removed_input.data.bytes.table,
+ };
+ _ = seen_bytes.swapRemove(for (0.., seen_bytes.items) |idx, bytes| {
+ if (removed_bytes.entries.ptr == bytes.entries.ptr) {
+ assert(removed_bytes.entries.len == bytes.entries.len);
+ assert(removed_bytes.table.ptr == bytes.table.ptr);
+ assert(removed_bytes.table.len == bytes.table.len);
+ break idx;
+ }
+ } else unreachable);
+ },
+ }
+ }
+ removed_input.deinit();
+ f.corpus.swapRemove(@intFromEnum(i));
+
+ var removed_name_buf: [8]u8 = undefined;
+ const removed_name = f.corpusFileName(&removed_name_buf, i);
+
+ if (@intFromEnum(i) == f.corpus.len) {
+ f.corpus_dir.deleteFile(io, removed_name) catch |e| panic(
+ "failed to remove corpus file '{s}': {t}",
+ .{ removed_name, e },
+ );
+ return; // No item moved so no refs to update
+ }
+
+ var swapped_name_buf: [8]u8 = undefined;
+ const swapped_name = f.corpusFileName(&swapped_name_buf, @enumFromInt(f.corpus.len));
+
+ f.corpus_dir.rename(swapped_name, f.corpus_dir, removed_name, io) catch |e| panic(
+ "failed to rename corpus file '{s}' to '{s}': {t}",
+ .{ swapped_name, removed_name, e },
+ );
+
+ // Update refrences. `ref` can be reused since it was a swap remove
+ for (ref.best_i_buf[0..ref.best_i_len]) |update_pc_i| {
+ const best = &f.bests.input_buf[update_pc_i];
+ assert(@intFromEnum(best.min) == f.corpus.len or
+ @intFromEnum(best.max) == f.corpus.len);
+
+ if (@intFromEnum(best.min) == f.corpus.len) best.min = i;
+ if (@intFromEnum(best.max) == f.corpus.len) best.max = i;
+ }
+ }
}
- /// Assumes `fresh_pcs` correspond to the input
- fn minimizeInput(self: *Fuzzer) void {
- // The minimization technique is kept relatively simple, we sequentially try to remove each
- // byte and check that the new pcs and memory loads are still hit.
- var i = self.input.items.len;
- while (i != 8) {
- i -= 1;
- const old = self.input.orderedRemove(i);
+ pub fn newInput(f: *Fuzzer, bytes: []const u8, modify_fs_corpus: bool) void {
+ f.runBytes(bytes, .bytes_fresh);
+ f.req_values = f.input_builder.total_ints + f.input_builder.total_bytes;
+ f.req_bytes = @intCast(f.input_builder.bytes_table.items.len);
+ var input = f.input_builder.build();
+
+ f.uid_data_i.ensureTotalCapacity(gpa, input.data.uid_slices.entries.len) catch @panic("OOM");
+ for (
+ input.seen_uid_i,
+ input.data.uid_slices.keys(),
+ input.data.uid_slices.values(),
+ ) |*i, uid, slice| {
+ const gop = f.seen_uids.getOrPutValue(gpa, uid, switch (uid.kind) {
+ .int => .{ .slices = .{ .ints = .empty } },
+ .bytes => .{ .slices = .{ .bytes = .empty } },
+ }) catch @panic("OOM");
+ switch (uid.kind) {
+ .int => f.seen_uids.values()[gop.index].slices.ints.append(
+ gpa,
+ input.data.ints[slice.base..][0..slice.len],
+ ) catch @panic("OOM"),
+ .bytes => f.seen_uids.values()[gop.index].slices.bytes.append(gpa, .{
+ .entries = input.data.bytes.entries[slice.base..][0..slice.len],
+ .table = input.data.bytes.table,
+ }) catch @panic("OOM"),
+ }
+ i.* = @intCast(gop.index);
+ }
+
+ const quality: Input.Best.Quality = .{
+ .n_pcs = n_pcs: {
+ @setRuntimeSafety(builtin.mode == .Debug); // Necessary for vectorization
+ var n: u32 = 0;
+ for (exec.pc_counters) |c| {
+ n += @intFromBool(c != 0);
+ }
+ break :n_pcs n;
+ },
+ .req = .{
+ .values = f.req_values,
+ .bytes = f.req_bytes,
+ },
+ };
+
+ var best_i_list: std.ArrayList(u32) = .empty;
+ for (0.., f.bests.quality_buf[0..f.bests.len]) |best_i, best| {
+ if (exec.pc_counters[best.pc] == 0) continue;
- @memset(exec.pc_counters, 0);
- self.run();
+ const better_min = quality.betterLess(best.min);
+ const better_max = quality.betterMore(best.max);
+ if (!better_min and !better_max) {
+ @branchHint(.likely);
+ continue;
+ }
+ best_i_list.append(gpa, @intCast(best_i)) catch @panic("OOM");
- if (!inst.atleastFresh()) {
- self.input.insertAssumeCapacity(i, old);
+ const map = &f.bests.input_buf[best_i];
+ if (map.min != map.max) {
+ if (better_min) {
+ f.removeBest(map.min, @intCast(best_i), modify_fs_corpus);
+ }
+ if (better_max) {
+ f.removeBest(map.max, @intCast(best_i), modify_fs_corpus);
+ }
} else {
- // This removal may have led to new pcs or memory loads being hit, so we need to
- // update them to avoid duplicates.
- inst.setFresh();
+ if (better_min and better_max) {
+ f.removeBest(map.min, @intCast(best_i), modify_fs_corpus);
+ }
}
}
+
+ // Must come after the above since some inputs may be removed
+ const input_i: Input.Index = @enumFromInt(f.corpus.len);
+ if (input_i == Input.Index.reserved_start) {
+ @panic("corpus size limit exceeded");
+ }
+
+ for (best_i_list.items) |i| {
+ const best_qual = &f.bests.quality_buf[i];
+ const best_map = &f.bests.input_buf[i];
+
+ if (quality.betterLess(best_qual.min)) {
+ best_qual.min = quality;
+ best_map.min = input_i;
+ }
+ if (quality.betterMore(best_qual.max)) {
+ best_qual.max = quality;
+ best_map.max = input_i;
+ }
+ }
+
+ for (0.., exec.pc_counters) |i, hits| {
+ if (hits == 0) {
+ @branchHint(.likely);
+ continue;
+ }
+
+ if ((f.seen_pcs[i / @bitSizeOf(usize)] >> @intCast(i % @bitSizeOf(usize))) & 1 == 0) {
+ @branchHint(.unlikely);
+ best_i_list.append(gpa, f.bests.len) catch @panic("OOM");
+ f.bests.quality_buf[f.bests.len] = .{
+ .pc = @intCast(i),
+ .min = quality,
+ .max = quality,
+ };
+ f.bests.input_buf[f.bests.len] = .{ .min = input_i, .max = input_i };
+ f.bests.len += 1;
+ }
+ }
+
+ if (best_i_list.items.len == 0 and
+ modify_fs_corpus // Found by freshness; otherwise, it does not need to be better
+ ) {
+ @branchHint(.cold); // Nondeterministic test
+ std.log.warn("nondeterministic rerun", .{});
+ return;
+ }
+
+ input.ref.best_i_buf = best_i_list.toOwnedSlice(gpa) catch @panic("OOM");
+ input.ref.best_i_len = @intCast(input.ref.best_i_buf.len);
+ f.corpus.append(gpa, input) catch @panic("OOM");
+ f.corpus_pos = input_i;
+
+ // Must come after the above since `seen_pcs` is used
+ f.updateSeenPcs();
+
+ if (!modify_fs_corpus) return;
+
+ // Write new input to cache
+ var name_buf: [8]u8 = undefined;
+ const name = f.corpusFileName(&name_buf, input_i);
+ f.corpus_dir.writeFile(io, .{ .sub_path = name, .data = bytes }) catch |e|
+ panic("failed to write corpus file '{s}': {t}", .{ name, e });
}
- fn run(self: *Fuzzer) void {
- // `pc_counters` is not cleared since only new hits are relevant.
+ fn run(f: *Fuzzer, input_uids: usize) void {
+ @memset(exec.pc_counters, 0);
+ f.uid_data_i.items.len = input_uids;
+ @memset(f.uid_data_i.items, 0);
+ f.req_values = 0;
+ f.req_bytes = 0;
- mem.bytesAsValue(usize, self.input.items[0..8]).* =
- mem.nativeToLittle(usize, self.input.items.len - 8);
- self.test_one(.fromSlice(@volatileCast(self.input.items[8..])));
+ f.test_one();
+ _ = @atomicRmw(usize, &exec.seenPcsHeader().n_runs, .Add, 1, .monotonic);
+ }
- const header = mem.bytesAsValue(
- abi.SeenPcsHeader,
- exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
- );
- _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
+ /// Returns a number of mutations to perform from 1-4
+ /// with smaller values exponentially more likely.
+ pub fn mutCount(rng: u16) u8 {
+ // The below provides the following distribution
+ // @clz(@clz( range mapped percentage ratio
+ // 0 -> 0 -> 4 1 = 93.750% (15 / 16 )
+ // 1 -> 1 - 255 -> 3 2 = 5.859% (15 / 256 )
+ // 2 -> 256 - 4095 -> 2 3 = .391% (<1 / 256 )
+ // 3 -> 4096 - 16383 -> 1 4 = .002% ( 1 / 65536)
+ // 4 -> 16384 - 32767 -> 1
+ // 5 -> 32768 - 65535 -> 1
+ return @as(u8, 4) - @min(@clz(@clz(rng)), 3);
}
- pub fn cycle(self: *Fuzzer) void {
- const input = self.corpus.items[self.corpus_pos];
- self.corpus_pos += 1;
- if (self.corpus_pos == self.corpus.items.len)
- self.corpus_pos = 0;
-
- const rng = self.rng.random();
- const m = while (true) {
- const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)];
- if (!m.mutate(
- rng,
- input,
- &self.input,
- self.corpus.items,
- inst.const_vals2.items,
- inst.const_vals4.items,
- inst.const_vals8.items,
- inst.const_vals16.items,
- )) continue;
- break m;
+ pub fn cycle(f: *Fuzzer) void {
+ assert(f.mmap_input.len == 0);
+ const corpus = f.corpus.slice();
+ const corpus_i = @intFromEnum(f.corpus_pos);
+
+ var small_entronopy: SmallEntronopy = .{ .bits = f.rngInt(u64) };
+ var n_mutate = mutCount(small_entronopy.take(u16));
+ const data = &corpus.items(.data)[corpus_i];
+ const weighted_uid_slice_i = corpus.items(.weighted_uid_slice_i)[corpus_i];
+ n_mutate *= @intFromBool(weighted_uid_slice_i.len != 0); // No static mutations on empty
+
+ f.mut_data = .{
+ .i = @splat(math.maxInt(u32)),
+ .seq = @splat(.{
+ .kind = .{
+ .class = undefined,
+ .copy = undefined,
+ .ordered_mutate = undefined,
+ .none = true,
+ },
+ .len = undefined,
+ .copy = undefined,
+ }),
};
- self.run();
+ const uid_slices = data.uid_slices.entries.slice();
+ for (
+ f.mut_data.i[0..n_mutate],
+ f.mut_data.seq[0..n_mutate],
+ ) |*i, *s| if ((data.order.len < 2) | (small_entronopy.take(u3) != 0)) {
+ // Mutation on uid
+ const uid_slice_wi = f.rngLessThan(u32, @intCast(weighted_uid_slice_i.len));
+ const uid_slice_i = weighted_uid_slice_i[uid_slice_wi];
+
+ const is_bytes = uid_slices.items(.key)[uid_slice_i].kind == .bytes;
+ const data_slice = uid_slices.items(.value)[uid_slice_i];
+ i.* = @as(u32, @intCast(data.ints.len)) * @intFromBool(is_bytes) +
+ data_slice.base + f.rngLessThan(u32, data_slice.len);
+ } else {
+ // Sequence mutation on order
+ const order_len: u32 = @intCast(data.order.len);
+ const order_i = f.rngLessThan(u32, order_len - 1);
+ s.* = .{
+ .kind = .{
+ .class = .replace,
+ .copy = true,
+ .ordered_mutate = true,
+ .none = false,
+ },
+ .len = @min(@clz(f.rngInt(u16)) + 1, order_len - order_i),
+ .copy = .{ .order_i = order_i },
+ };
+ i.* = data.order[order_i];
+ };
- if (inst.isFresh()) {
+ f.run(data.uid_slices.entries.len);
+ if (f.isFresh()) {
@branchHint(.unlikely);
- const header = mem.bytesAsValue(
- abi.SeenPcsHeader,
- exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
- );
- _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
-
- inst.setFresh();
- self.minimizeInput();
- inst.updateSeen();
-
- // An empty-input has always been tried, so if an empty input is fresh then the
- // test has to be non-deterministic. This has to be checked as duplicate empty
- // entries are not allowed.
- if (self.input.items.len - 8 == 0) {
- std.log.warn("non-deterministic test (empty input produces different hits)", .{});
- _ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic);
- return;
+ _ = @atomicRmw(usize, &exec.seenPcsHeader().unique_runs, .Add, 1, .monotonic);
+ f.newInput(f.mmap_input.inputSlice(), true);
+ }
+ f.mmap_input.clearRetainingCapacity();
+
+ assert(@intFromEnum(f.corpus_pos) < f.corpus.len);
+ f.corpus_pos = @enumFromInt((@intFromEnum(f.corpus_pos) + 1) % f.corpus.len);
+ }
+
+ fn weightsContain(int: u64, weights: []const abi.Weight) bool {
+ var contains: bool = false;
+ for (weights) |w| {
+ contains |= w.min <= int and int <= w.max;
+ }
+ return contains;
+ }
+
+ fn weightsContainBytes(bytes: []const u8, weights: []const abi.Weight) bool {
+ if (weights[0].min == 0 and weights[0].max == 0xff) {
+ // Fast path: all bytes are valid
+ return true;
+ }
+
+ var contains: bool = true;
+ for (bytes) |b| {
+ contains &= weightsContain(b, weights);
+ }
+ return contains;
+ }
+
+ fn sumWeightsInclusive(weights: []const abi.Weight) u64 {
+ var sum: u64 = math.maxInt(u64);
+ for (weights) |w| {
+ sum +%= (w.max - w.min +% 1) *% w.weight;
+ }
+ return sum;
+ }
+
+ fn weightedValue(f: *Fuzzer, weights: []const abi.Weight, incl_sum: u64) u64 {
+ var incl_n: u64 = f.rngInt(u64);
+ const limit = incl_sum +% 1;
+ if (limit != 0) incl_n = std.Random.limitRangeBiased(u64, incl_n, limit);
+
+ for (weights) |w| {
+ // (w.max - w.min + 1) * w.weight - 1
+ const incl_vals = (w.max - w.min) * w.weight + (w.weight - 1);
+ if (incl_n > incl_vals) {
+ incl_n -= incl_vals + 1;
+ } else {
+ const val = w.min + incl_n / w.weight;
+ assert(val <= w.max);
+ return val;
+ }
+ } else unreachable;
+ }
+
+ const Untyped = union {
+ int: u64,
+ bytes: []u8,
+ };
+
+ fn nextUntyped(f: *Fuzzer, uid: Uid, weights: []const abi.Weight) union(enum) {
+ copy: Untyped,
+ mutate: Untyped,
+ fresh: void,
+ } {
+ const corpus = f.corpus.slice();
+ const corpus_i = @intFromEnum(f.corpus_pos);
+ const data = &corpus.items(.data)[corpus_i];
+ var small_entronopy: SmallEntronopy = .{ .bits = f.rngInt(u64) };
+
+ const uid_i = data.uid_slices.getIndex(uid) orelse {
+ @branchHint(.unlikely);
+ return .fresh;
+ };
+ const data_slice = data.uid_slices.values()[uid_i];
+ var slice_i = f.uid_data_i.items[uid_i];
+ var data_i = data_slice.base + slice_i;
+
+ new_data: while (true) {
+ assert(slice_i == f.uid_data_i.items[uid_i] and data_i == data_slice.base + slice_i);
+ if (slice_i == data_slice.len) break :new_data;
+ assert(slice_i < data_slice.len);
+
+ f.uid_data_i.items[uid_i] += 1;
+ const mut_i = std.simd.firstIndexOfValue(
+ @as(@Vector(4, u32), f.mut_data.i),
+ data_i + @as(u32, @intCast(data.ints.len)) * @intFromEnum(uid.kind),
+ ) orelse {
+ @branchHint(.likely);
+ switch (uid.kind) {
+ .int => {
+ const int = data.ints[data_i];
+ if (weightsContain(int, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .int = int } };
+ }
+ },
+ .bytes => {
+ const entry = data.bytes.entries[data_i];
+ const bytes = data.bytes.table[entry.off..][0..entry.len];
+ if (weightsContainBytes(bytes, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .bytes = bytes } };
+ }
+ },
+ }
+ break :new_data;
+ };
+
+ const seq = &f.mut_data.seq[mut_i];
+ new_seq: {
+ if (!seq.kind.none) break :new_seq;
+
+ var opts: packed struct(u6) {
+ // Matches layout as `mut_data.seq.kind`
+ insert: bool,
+ copy: bool,
+
+ seq: u2,
+ delete: bool,
+ splice: bool,
+ } = @bitCast(small_entronopy.take(u6));
+ if (opts.seq != 0) break :new_data;
+
+ const max_consume = data_slice.len - slice_i; // inclusive
+ if (opts.delete) {
+ f.uid_data_i.items[uid_i] += f.rngLessThan(u32, max_consume);
+ slice_i = f.uid_data_i.items[uid_i];
+ data_i = data_slice.base + slice_i;
+ continue;
+ }
+ opts.insert |= max_consume == 0;
+ seq.kind = .{
+ .class = if (opts.insert) .replace else .insert,
+ .copy = opts.copy,
+ .ordered_mutate = false,
+ .none = false,
+ };
+
+ if (!seq.kind.copy) {
+ seq.len = switch (seq.kind.class) {
+ .replace => f.rngLessThan(u32, max_consume) + 1,
+ .insert => @clz(f.rngInt(u16)) + 1,
+ };
+ seq.copy = undefined;
+ } else {
+ const src: SeqCopy, const src_len: u32 = if (!opts.splice) .{
+ switch (uid.kind) {
+ .int => .{ .ints = data.ints[data_slice.base..][0..data_slice.len] },
+ .bytes => .{ .bytes = .{
+ .entries = data.bytes.entries[data_slice.base..][0..data_slice.len],
+ .table = data.bytes.table,
+ } },
+ },
+ data_slice.len,
+ } else src: {
+ const seen_uid_i = corpus.items(.seen_uid_i)[corpus_i][uid_i];
+ const untyped_slices = f.seen_uids.values()[seen_uid_i].slices;
+ switch (uid.kind) {
+ .int => {
+ const slices = untyped_slices.ints.items;
+ const i = f.rngLessThan(u32, @intCast(slices.len));
+ break :src .{
+ .{ .ints = slices[i] },
+ @intCast(slices[i].len),
+ };
+ },
+ .bytes => {
+ const slices = untyped_slices.bytes.items;
+ const i = f.rngLessThan(u32, @intCast(slices.len));
+ break :src .{
+ .{ .bytes = slices[i] },
+ @intCast(slices[i].entries.len),
+ };
+ },
+ }
+ };
+
+ const off = f.rngLessThan(u32, src_len);
+ seq.len = f.rngLessThan(u32, src_len - off) + 1;
+ if (seq.kind.class == .replace) seq.len = @min(seq.len, max_consume);
+ seq.copy = switch (uid.kind) {
+ .int => .{ .ints = src.ints[off..][0..seq.len] },
+ .bytes => .{ .bytes = .{
+ .entries = src.bytes.entries[off..][0..seq.len],
+ .table = src.bytes.table,
+ } },
+ };
+ }
+ }
+
+ assert(!seq.kind.none);
+ f.uid_data_i.items[uid_i] -= @intFromBool(seq.kind.class == .insert);
+ seq.len -= 1;
+ seq.kind.none |= seq.len == 0;
+ f.mut_data.i[mut_i] += @intFromBool(seq.kind.class == .replace and seq.len != 0);
+
+ if (!seq.kind.copy) {
+ assert(!seq.kind.ordered_mutate);
+ break :new_data;
+ }
+ if (seq.kind.ordered_mutate) {
+ assert(seq.kind.class == .replace);
+ seq.copy.order_i += @intFromBool(seq.len != 0);
+ f.mut_data.i[mut_i] = data.order[seq.copy.order_i];
+ break :new_data;
+ }
+ switch (uid.kind) {
+ .int => {
+ const int = seq.copy.ints[0];
+ seq.copy.ints = seq.copy.ints[1..];
+ if (weightsContain(int, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .int = int } };
+ }
+ },
+ .bytes => {
+ const entry = seq.copy.bytes.entries[0];
+ const bytes = seq.copy.bytes.table[entry.off..][0..entry.len];
+ seq.copy.bytes.entries = seq.copy.bytes.entries[1..];
+ if (weightsContainBytes(bytes, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .bytes = bytes } };
+ }
+ },
+ }
+ break;
+ }
+
+ const opts: packed struct(u10) {
+ copy: u2,
+ fresh: u2,
+ splice: bool,
+ local_far: bool,
+ local_off: i4,
+ } = @bitCast(small_entronopy.take(u10));
+
+ if (opts.copy != 0) {
+ if (opts.fresh == 0 or slice_i == data_slice.len) return .fresh;
+ return .{ .mutate = switch (uid.kind) {
+ .int => .{ .int = data.ints[data_i] },
+ .bytes => .{ .bytes = b: {
+ const entry = data.bytes.entries[data_i];
+ break :b data.bytes.table[entry.off..][0..entry.len];
+ } },
+ } };
+ }
+
+ if (!opts.splice) {
+ const src_data_i = data_slice.base + if (!opts.local_far) i: {
+ const off = opts.local_off;
+ break :i if (off >= 0) @min(
+ f.uid_data_i.items[uid_i] +| @as(u4, @intCast(off)),
+ data_slice.len - 1,
+ ) else f.uid_data_i.items[uid_i] -| @abs(off);
+ } else f.rngLessThan(u32, data_slice.len);
+ switch (uid.kind) {
+ .int => {
+ const int = data.ints[src_data_i];
+ if (weightsContain(int, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .int = int } };
+ }
+ },
+ .bytes => {
+ const entry = data.bytes.entries[src_data_i];
+ const bytes = data.bytes.table[entry.off..][0..entry.len];
+ if (weightsContainBytes(bytes, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .bytes = bytes } };
+ }
+ },
+ }
+ } else {
+ const seen_uid_i = corpus.items(.seen_uid_i)[corpus_i][uid_i];
+ const untyped_slices = f.seen_uids.values()[seen_uid_i].slices;
+ switch (uid.kind) {
+ .int => {
+ const slices = untyped_slices.ints.items;
+ const from = slices[f.rngLessThan(u32, @intCast(slices.len))];
+ const int = from[f.rngLessThan(u32, @intCast(from.len))];
+ if (weightsContain(int, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .int = int } };
+ }
+ },
+ .bytes => {
+ const slices = untyped_slices.bytes.items;
+ const from = slices[f.rngLessThan(u32, @intCast(slices.len))];
+ const entry_i = f.rngLessThan(u32, @intCast(from.entries.len));
+ const entry = from.entries[entry_i];
+ const bytes = from.table[entry.off..][0..entry.len];
+ if (weightsContainBytes(bytes, weights)) {
+ @branchHint(.likely);
+ return .{ .copy = .{ .bytes = bytes } };
+ }
+ },
+ }
+ }
+ return .fresh;
+ }
+
+ pub fn nextInt(f: *Fuzzer, uid: Uid, weights: []const abi.Weight) u64 {
+ f.req_values += 1;
+ if (@intFromEnum(f.corpus_pos) >= @intFromEnum(Input.Index.reserved_start)) {
+ @branchHint(.unlikely);
+ const int = f.bytes_input.valueWeightedWithHash(u64, weights, undefined);
+ if (f.corpus_pos == .bytes_fresh) {
+ f.input_builder.checkSmithedLen(8);
+ f.input_builder.addInt(uid, int);
+ }
+ return int;
+ }
+ const int = f.nextIntInner(uid, weights);
+ f.mmap_input.appendLittleInt(u64, int);
+ return int;
+ }
+
+ fn nextIntInner(f: *Fuzzer, uid: Uid, weights: []const abi.Weight) u64 {
+ return switch (f.nextUntyped(uid, weights)) {
+ .copy => |u| u.int,
+ .mutate, .fresh => f.weightedValue(weights, sumWeightsInclusive(weights)),
+ };
+ }
+
+ pub fn nextEos(f: *Fuzzer, uid: Uid, weights: []const abi.Weight) bool {
+ f.req_values += 1;
+ if (@intFromEnum(f.corpus_pos) >= @intFromEnum(Input.Index.reserved_start)) {
+ @branchHint(.unlikely);
+ const eos = f.bytes_input.eosWeightedWithHash(weights, undefined);
+ if (f.corpus_pos == .bytes_fresh) {
+ f.input_builder.checkSmithedLen(1);
+ f.input_builder.addInt(uid, @intFromBool(eos));
+ }
+ return eos;
+ }
+ // `nextIntInner` is already gauraunteed to eventually return `1`
+ const eos = @as(u1, @intCast(f.nextIntInner(uid, weights))) != 0;
+ f.mmap_input.appendLittleInt(u8, @intFromBool(eos));
+ return eos;
+ }
+
+ fn mutateBytes(f: *Fuzzer, in: []u8, out: []u8, weights: []const abi.Weight) void {
+ assert(in.len != 0);
+ const weights_incl_sum = sumWeightsInclusive(weights);
+
+ var small_entronopy: SmallEntronopy = .{ .bits = f.rngInt(u64) };
+ var muts = mutCount(small_entronopy.take(u16));
+ var rem_out = out;
+ var rem_copy = in;
+ while (rem_out.len != 0 and muts != 0) {
+ muts -= 1;
+ const opts: packed struct(u4) {
+ kind: enum(u2) {
+ random,
+ stream_copy,
+ stream_discard,
+ absolute_copy,
+ },
+ small: u2,
+
+ pub fn limitSmall(o: @This(), n: usize) u32 {
+ return @min(
+ @as(u32, @intCast(n)),
+ @as(u32, if (o.small != 0) 8 else math.maxInt(u32)),
+ );
+ }
+ } = @bitCast(small_entronopy.take(u4));
+ s: switch (opts.kind) {
+ .random => {
+ const n = f.rngLessThan(u32, opts.limitSmall(rem_out.len)) + 1;
+ for (rem_out[0..n]) |*o| {
+ o.* = @intCast(f.weightedValue(weights, weights_incl_sum));
+ }
+ rem_out = rem_out[n..];
+ },
+ .stream_copy => {
+ if (rem_copy.len == 0) continue :s .random;
+ const n = @min(
+ f.rngLessThan(u32, opts.limitSmall(rem_copy.len)) + 1,
+ rem_out.len,
+ );
+ @memcpy(rem_out[0..n], rem_copy[0..n]);
+ rem_out = rem_out[n..];
+ rem_copy = rem_copy[n..];
+ },
+ .stream_discard => {
+ if (rem_copy.len == 0) continue :s .random;
+ const n = f.rngLessThan(u32, opts.limitSmall(rem_copy.len)) + 1;
+ rem_copy = rem_copy[n..];
+ },
+ .absolute_copy => {
+ const in_len: u32 = @intCast(in.len);
+ const off = f.rngLessThan(u32, in_len);
+ const len = @min(
+ f.rngLessThan(u32, in_len - off) + 1,
+ opts.limitSmall(rem_out.len),
+ );
+ @memcpy(rem_out[0..len], in[off..][0..len]);
+ rem_out = rem_out[len..];
+ },
}
+ }
- const arena = self.arena_ctx.allocator();
- const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM");
+ const copy = @min(rem_out.len, rem_copy.len);
+ @memcpy(rem_out[0..copy], rem_copy[0..copy]);
+ for (rem_out[copy..]) |*o| {
+ o.* = @intCast(f.weightedValue(weights, weights_incl_sum));
+ }
+ }
- self.corpus.append(gpa, bytes) catch @panic("OOM");
- self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM");
+ fn nextBytesInner(f: *Fuzzer, uid: Uid, out: []u8, weights: []const abi.Weight) void {
+ so: switch (f.nextUntyped(uid, weights)) {
+ .copy => |u| {
+ if (u.bytes.len >= out.len) {
+ @branchHint(.likely);
+ @memcpy(out, u.bytes[0..out.len]);
+ return;
+ }
- // Write new corpus to cache
- var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
- self.corpus_dir.writeFile(io, .{
- .sub_path = std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
- .data = bytes,
- }) catch |e| panic("failed to write corpus file '{x}': {t}", .{ self.corpus_dir_idx, e });
- self.corpus_dir_idx += 1;
+ @memcpy(out[0..u.bytes.len], u.bytes);
+ const weights_incl_sum = sumWeightsInclusive(weights);
+ for (out[u.bytes.len..]) |*o| {
+ o.* = @intCast(f.weightedValue(weights, weights_incl_sum));
+ }
+ },
+ .mutate => |u| {
+ if (u.bytes.len == 0) continue :so .fresh;
+ f.mutateBytes(u.bytes, out, weights);
+ },
+ .fresh => {
+ const weights_incl_sum = sumWeightsInclusive(weights);
+ for (out) |*o| {
+ o.* = @intCast(f.weightedValue(weights, weights_incl_sum));
+ }
+ },
}
}
+
+ pub fn nextBytes(f: *Fuzzer, uid: Uid, out: []u8, weights: []const abi.Weight) void {
+ f.req_values += 1;
+ f.req_bytes +%= @truncate(out.len); // This function should panic since the 32-bit
+ // data limit is exceeded, so wrapping is fine.
+ if (@intFromEnum(f.corpus_pos) >= @intFromEnum(Input.Index.reserved_start)) {
+ @branchHint(.unlikely);
+ f.bytes_input.bytesWeightedWithHash(out, weights, undefined);
+ if (f.corpus_pos == .bytes_fresh) {
+ f.input_builder.checkSmithedLen(out.len);
+ f.input_builder.addBytes(uid, out);
+ }
+ return;
+ }
+
+ f.nextBytesInner(uid, out, weights);
+ f.mmap_input.appendSlice(out);
+ }
+
+ fn nextSliceInner(
+ f: *Fuzzer,
+ uid: Uid,
+ buf: []u8,
+ len_weights: []const abi.Weight,
+ byte_weights: []const abi.Weight,
+ ) u32 {
+ so: switch (f.nextUntyped(uid, byte_weights)) {
+ .copy => |u| {
+ var len: u32 = @intCast(u.bytes.len);
+ if (!weightsContain(len, len_weights)) {
+ @branchHint(.unlikely);
+ len = @intCast(f.weightedValue(len_weights, sumWeightsInclusive(len_weights)));
+ }
+
+ if (u.bytes.len >= len) {
+ @branchHint(.likely);
+ @memcpy(buf[0..len], u.bytes[0..len]);
+ return len;
+ }
+
+ @memcpy(buf[0..u.bytes.len], u.bytes);
+ const weights_incl_sum = sumWeightsInclusive(byte_weights);
+ for (buf[u.bytes.len..len]) |*o| {
+ o.* = @intCast(f.weightedValue(byte_weights, weights_incl_sum));
+ }
+ return len;
+ },
+ .mutate => |u| {
+ if (u.bytes.len == 0) continue :so .fresh;
+ const len: u32 = len: {
+ const offseted: packed struct {
+ is: u3,
+ sub: bool,
+ by: u3,
+ } = @bitCast(f.rngInt(u7));
+ if (offseted.is != 0) {
+ const len = if (offseted.sub)
+ @as(u32, @intCast(u.bytes.len)) -| offseted.by
+ else
+ @min(u.bytes.len + offseted.by, @as(u32, @intCast(buf.len)));
+ if (weightsContain(len, len_weights)) {
+ break :len len;
+ }
+ }
+ break :len @intCast(f.weightedValue(
+ len_weights,
+ sumWeightsInclusive(len_weights),
+ ));
+ };
+ f.mutateBytes(u.bytes, buf[0..len], byte_weights);
+ return len;
+ },
+ .fresh => {
+ const len: u32 = @intCast(f.weightedValue(
+ len_weights,
+ sumWeightsInclusive(len_weights),
+ ));
+ const weights_incl_sum = sumWeightsInclusive(byte_weights);
+ for (buf[0..len]) |*o| {
+ o.* = @intCast(f.weightedValue(byte_weights, weights_incl_sum));
+ }
+ return len;
+ },
+ }
+ }
+
+ pub fn nextSlice(
+ f: *Fuzzer,
+ uid: Uid,
+ buf: []u8,
+ len_weights: []const abi.Weight,
+ byte_weights: []const abi.Weight,
+ ) u32 {
+ f.req_values += 1;
+ if (@intFromEnum(f.corpus_pos) >= @intFromEnum(Input.Index.reserved_start)) {
+ @branchHint(.unlikely);
+ const n = f.bytes_input.sliceWeightedWithHash(
+ buf,
+ len_weights,
+ byte_weights,
+ undefined,
+ );
+ if (f.corpus_pos == .bytes_fresh) {
+ f.input_builder.checkSmithedLen(@as(usize, 4) + n);
+ f.input_builder.addBytes(uid, buf[0..n]);
+ }
+ return n;
+ }
+
+ const n = f.nextSliceInner(uid, buf, len_weights, byte_weights);
+ f.mmap_input.appendLittleInt(u32, n);
+ f.mmap_input.appendSlice(buf[0..n]);
+ f.req_bytes += n;
+ return n;
+ }
};
-/// Instrumentation must not be triggered before this function is called
export fn fuzzer_init(cache_dir_path: abi.Slice) void {
- inst.depreinit();
exec = .init(cache_dir_path.toSlice());
- inst = .init();
+ fuzzer = .init();
}
-/// Invalid until `fuzzer_init` is called.
export fn fuzzer_coverage() abi.Coverage {
const coverage_id = exec.pc_digest;
- const header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(exec.shared_seen_pcs.items.ptr));
+ const header = @volatileCast(exec.seenPcsHeader());
var seen_count: usize = 0;
for (header.seenBits()) |chunk| {
@@ -608,107 +1658,63 @@ export fn fuzzer_coverage() abi.Coverage {
};
}
-/// fuzzer_init must be called beforehand
-export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
+export fn fuzzer_set_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
current_test_name = unit_test_name.toSlice();
- fuzzer = .init(test_one, unit_test_name.toSlice());
+ fuzzer.setTest(test_one, unit_test_name.toSlice());
}
-/// fuzzer_init_test must be called beforehand
-/// The callee owns the memory of bytes and must not free it until the fuzzer is finished.
export fn fuzzer_new_input(bytes: abi.Slice) void {
- // An entry of length zero is always added and duplicates of it are not allowed.
- if (bytes.len != 0)
- fuzzer.addInput(bytes.toSlice());
+ if (bytes.len == 0) return; // An entry of length zero is always present
+ fuzzer.newInput(bytes.toSlice(), false);
}
-/// fuzzer_init_test must be called first
export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
+ fuzzer.loadCorpus();
switch (limit_kind) {
.forever => while (true) fuzzer.cycle(),
.iterations => for (0..amount) |_| fuzzer.cycle(),
}
+ fuzzer.reset();
}
-export fn fuzzer_unslide_address(addr: usize) usize {
- const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
- const slide = si.getModuleSlide(io, addr) catch |err| {
- std.debug.panic("failed to find virtual address slide: {t}", .{err});
- };
- return addr - slide;
-}
-
-/// Helps determine run uniqueness in the face of recursion.
-/// Currently not used by the fuzzer.
-export threadlocal var __sancov_lowest_stack: usize = 0;
-
-/// Inline since the return address of the callee is required
-inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void {
- if (!inst.constPcSeen(@returnAddress())) {
- @branchHint(.unlikely);
- @field(inst, const_vals_field).append(gpa, val) catch @panic("OOM");
- }
-}
-
-export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void {
- _ = const_arg;
- _ = arg;
+export fn fuzzer_int(uid: Uid, weights: abi.Weights) u64 {
+ assert(uid.kind == .int);
+ return fuzzer.nextInt(uid, weights.toSlice());
}
-export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void {
- _ = arg;
- genericConstCmp(u16, const_arg, "const_vals2");
+export fn fuzzer_eos(uid: Uid, weights: abi.Weights) bool {
+ assert(uid.kind == .int);
+ return fuzzer.nextEos(uid, weights.toSlice());
}
-export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void {
- _ = arg;
- genericConstCmp(u32, const_arg, "const_vals4");
+export fn fuzzer_bytes(uid: Uid, out: abi.MutSlice, weights: abi.Weights) void {
+ assert(uid.kind == .bytes);
+ return fuzzer.nextBytes(uid, out.toSlice(), weights.toSlice());
}
-export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void {
- _ = arg;
- genericConstCmp(u64, const_arg, "const_vals8");
+export fn fuzzer_slice(
+ uid: Uid,
+ buf: abi.MutSlice,
+ len_weights: abi.Weights,
+ byte_weights: abi.Weights,
+) u32 {
+ assert(uid.kind == .bytes);
+ return fuzzer.nextSlice(uid, buf.toSlice(), len_weights.toSlice(), byte_weights.toSlice());
}
-export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
- _ = val;
- if (!inst.constPcSeen(@returnAddress())) {
- @branchHint(.unlikely);
- const case_bits = cases[1];
- const cases_slice = cases[2..][0..cases[0]];
- switch (case_bits) {
- // 8-bit cases are ignored because they are likely to be randomly generated
- 0...8 => {},
- 9...16 => for (cases_slice) |c|
- inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"),
- 17...32 => for (cases_slice) |c|
- inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"),
- 33...64 => for (cases_slice) |c|
- inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"),
- else => {}, // Should be impossible
- }
- }
-}
-
-export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
- _ = arg1;
- _ = arg2;
-}
-
-export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
- _ = arg1;
- _ = arg2;
-}
-
-export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
- _ = arg1;
- _ = arg2;
+export fn fuzzer_unslide_address(addr: usize) usize {
+ const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
+ const slide = si.getModuleSlide(io, addr) catch |err| {
+ // The LLVM backend seems to insert placeholder values of `1` in __sancov_pcs1
+ if (addr == 1) return 1;
+ panic("failed to find virtual address slide for address 0x{x}: {t}", .{ addr, err });
+ };
+ return addr - slide;
}
-export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
- _ = arg1;
- _ = arg2;
-}
+/// Helps determine run uniqueness in the face of recursion.
+/// Currently not used by the fuzzer.
+export threadlocal var __sancov_lowest_stack: usize = 0;
export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
// Not valuable because we already have pc tracing via 8bit counters.
@@ -729,723 +1735,117 @@ export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
_ = end;
}
-/// Copy all of source into dest at position 0.
-/// If the slices overlap, dest.ptr must be <= src.ptr.
-fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
- for (dest, source) |*d, s| d.* = s;
-}
-
-/// Copy all of source into dest at position 0.
-/// If the slices overlap, dest.ptr must be >= src.ptr.
-fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
- var i = source.len;
- while (i > 0) {
- i -= 1;
- dest[i] = source[i];
- }
-}
-
-const Mutation = enum {
- /// Applies .insert_*_span, .push_*_span
- /// For wtf-8, this limits code units, not code points
- const max_insert_len = 12;
- /// Applies to .insert_large_*_span and .push_large_*_span
- /// 4096 is used as it is a common sector size
- const max_large_insert_len = 4096;
- /// Applies to .delete_span and .pop_span
- const max_delete_len = 16;
- /// Applies to .set_*span, .move_span, .set_existing_span
- const max_set_len = 12;
- const max_replicate_len = 64;
- const AddValue = i6;
- const SmallValue = i10;
-
- delete_byte,
- delete_span,
- /// Removes the last byte from the input
- pop_byte,
- pop_span,
- /// Inserts a group of bytes which is already in the input and removes the original copy.
- move_span,
- /// Replaces a group of bytes in the input with another group of bytes in the input
- set_existing_span,
- insert_existing_span,
- push_existing_span,
- set_rng_byte,
- set_rng_span,
- insert_rng_byte,
- insert_rng_span,
- /// Adds a byte to the end of the input
- push_rng_byte,
- push_rng_span,
- set_zero_byte,
- set_zero_span,
- insert_zero_byte,
- insert_zero_span,
- push_zero_byte,
- push_zero_span,
- /// Inserts a lot of zeros to the end of the input
- /// This is intended to work with fuzz tests that require data in (large) blocks
- push_large_zero_span,
- /// Inserts a group of ascii printable character
- insert_print_span,
- /// Inserts a group of character from a...z, A...Z, 0...9, _, and ' '
- insert_common_span,
- /// Inserts a group of ascii digits possibly preceded by a `-`
- insert_integer,
- /// Code units are evenly distributed between one to four
- insert_wtf8_char,
- insert_wtf8_span,
- /// Inserts a group of bytes from another input
- insert_splice_span,
- // utf16 is not yet included since insertion of random bytes should adaquetly check
- // BMP character, surrogate handling, and occasionally chacters outside of the BMP.
- set_print_span,
- set_common_span,
- set_splice_span,
- /// Similar to set_splice_span, but the bytes are copied to the same index instead of a random
- replicate_splice_span,
- push_print_span,
- push_common_span,
- push_integer,
- push_wtf8_char,
- push_wtf8_span,
- push_splice_span,
- /// Clears a random amount of high bits of a byte
- truncate_8,
- truncate_16le,
- truncate_16be,
- truncate_32le,
- truncate_32be,
- truncate_64le,
- truncate_64be,
- /// Flips a random bit
- xor_1,
- /// Swaps up to three bits of a byte biased to less bits
- xor_few_8,
- /// Swaps up to six bits of a 16-bit value biased to less bits
- xor_few_16,
- /// Swaps up to nine bits of a 32-bit value biased to less bits
- xor_few_32,
- /// Swaps up to twelve bits of 64-bit value biased to less bits
- xor_few_64,
- /// Adds to a byte a value of type AddValue
- add_8,
- add_16le,
- add_16be,
- add_32le,
- add_32be,
- add_64le,
- add_64be,
- /// Sets a 16-bit little-endian value to a value of type SmallValue
- set_small_16le,
- set_small_16be,
- set_small_32le,
- set_small_32be,
- set_small_64le,
- set_small_64be,
- insert_small_16le,
- insert_small_16be,
- insert_small_32le,
- insert_small_32be,
- insert_small_64le,
- insert_small_64be,
- push_small_16le,
- push_small_16be,
- push_small_32le,
- push_small_32be,
- push_small_64le,
- push_small_64be,
- set_const_16,
- set_const_32,
- set_const_64,
- set_const_128,
- insert_const_16,
- insert_const_32,
- insert_const_64,
- insert_const_128,
- push_const_16,
- push_const_32,
- push_const_64,
- push_const_128,
- /// Sets a byte with up to three bits set biased to less bits
- set_few_8,
- /// Sets a 16-bit value with up to six bits set biased to less bits
- set_few_16,
- /// Sets a 32-bit value with up to nine bits set biased to less bits
- set_few_32,
- /// Sets a 64-bit value with up to twelve bits set biased to less bits
- set_few_64,
- insert_few_8,
- insert_few_16,
- insert_few_32,
- insert_few_64,
- push_few_8,
- push_few_16,
- push_few_32,
- push_few_64,
- /// Randomizes a random contigous group of bits in a byte
- packed_set_rng_8,
- packed_set_rng_16le,
- packed_set_rng_16be,
- packed_set_rng_32le,
- packed_set_rng_32be,
- packed_set_rng_64le,
- packed_set_rng_64be,
-
- fn fewValue(rng: std.Random, T: type, comptime bits: u16) T {
- var result: T = 0;
- var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits);
- while (remaining_bits > 0) {
- result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T)));
- remaining_bits -= 1;
- }
- return result;
- }
-
- /// Returns if the mutation was applicable to the input
- pub fn mutate(
- mutation: Mutation,
- rng: std.Random,
- in: []const u8,
- out: *MemoryMappedList,
- corpus: []const []const u8,
- const_vals2: []const u16,
- const_vals4: []const u32,
- const_vals8: []const u64,
- const_vals16: []const u128,
- ) bool {
- out.clearRetainingCapacity();
- const new_capacity = 8 + in.len + @max(
- 16, // builtin 128 value
- Mutation.max_insert_len,
- Mutation.max_large_insert_len,
- );
- out.ensureTotalCapacity(new_capacity) catch |e|
- panic("could not resize shared input file: {t}", .{e});
- out.items.len = 8; // Length field
-
- const applied = switch (mutation) {
- inline else => |m| m.comptimeMutate(
- rng,
- in,
- out,
- corpus,
- const_vals2,
- const_vals4,
- const_vals8,
- const_vals16,
- ),
- };
- if (!applied)
- assert(out.items.len == 8)
- else
- assert(out.items.len <= new_capacity);
- return applied;
- }
-
- /// Assumes out has already been cleared
- fn comptimeMutate(
- comptime mutation: Mutation,
- rng: std.Random,
- in: []const u8,
- out: *MemoryMappedList,
- corpus: []const []const u8,
- const_vals2: []const u16,
- const_vals4: []const u32,
- const_vals8: []const u64,
- const_vals16: []const u128,
- ) bool {
- const Class = enum { new, remove, rmw, move_span, replicate_splice_span };
- const class: Class, const class_ctx = switch (mutation) {
- // zig fmt: off
- .move_span => .{ .move_span, null },
- .replicate_splice_span => .{ .replicate_splice_span, null },
-
- .delete_byte => .{ .remove, .{ .delete, 1 } },
- .delete_span => .{ .remove, .{ .delete, max_delete_len } },
-
- .pop_byte => .{ .remove, .{ .pop, 1 } },
- .pop_span => .{ .remove, .{ .pop, max_delete_len } },
-
- .set_rng_byte => .{ .new, .{ .set , 1, .rng , .one } },
- .set_zero_byte => .{ .new, .{ .set , 1, .zero , .one } },
- .set_rng_span => .{ .new, .{ .set , 1, .rng , .many } },
- .set_zero_span => .{ .new, .{ .set , 1, .zero , .many } },
- .set_common_span => .{ .new, .{ .set , 1, .common , .many } },
- .set_print_span => .{ .new, .{ .set , 1, .print , .many } },
- .set_existing_span => .{ .new, .{ .set , 2, .existing, .many } },
- .set_splice_span => .{ .new, .{ .set , 1, .splice , .many } },
- .set_const_16 => .{ .new, .{ .set , 2, .@"const", const_vals2 } },
- .set_const_32 => .{ .new, .{ .set , 4, .@"const", const_vals4 } },
- .set_const_64 => .{ .new, .{ .set , 8, .@"const", const_vals8 } },
- .set_const_128 => .{ .new, .{ .set , 16, .@"const", const_vals16 } },
- .set_small_16le => .{ .new, .{ .set , 2, .small , .{ i16, .little } } },
- .set_small_32le => .{ .new, .{ .set , 4, .small , .{ i32, .little } } },
- .set_small_64le => .{ .new, .{ .set , 8, .small , .{ i64, .little } } },
- .set_small_16be => .{ .new, .{ .set , 2, .small , .{ i16, .big } } },
- .set_small_32be => .{ .new, .{ .set , 4, .small , .{ i32, .big } } },
- .set_small_64be => .{ .new, .{ .set , 8, .small , .{ i64, .big } } },
- .set_few_8 => .{ .new, .{ .set , 1, .few , .{ u8 , 3 } } },
- .set_few_16 => .{ .new, .{ .set , 2, .few , .{ u16, 6 } } },
- .set_few_32 => .{ .new, .{ .set , 4, .few , .{ u32, 9 } } },
- .set_few_64 => .{ .new, .{ .set , 8, .few , .{ u64, 12 } } },
-
- .insert_rng_byte => .{ .new, .{ .insert, 0, .rng , .one } },
- .insert_zero_byte => .{ .new, .{ .insert, 0, .zero , .one } },
- .insert_rng_span => .{ .new, .{ .insert, 0, .rng , .many } },
- .insert_zero_span => .{ .new, .{ .insert, 0, .zero , .many } },
- .insert_print_span => .{ .new, .{ .insert, 0, .print , .many } },
- .insert_common_span => .{ .new, .{ .insert, 0, .common , .many } },
- .insert_integer => .{ .new, .{ .insert, 0, .integer , .many } },
- .insert_wtf8_char => .{ .new, .{ .insert, 0, .wtf8 , .one } },
- .insert_wtf8_span => .{ .new, .{ .insert, 0, .wtf8 , .many } },
- .insert_existing_span => .{ .new, .{ .insert, 1, .existing, .many } },
- .insert_splice_span => .{ .new, .{ .insert, 0, .splice , .many } },
- .insert_const_16 => .{ .new, .{ .insert, 0, .@"const", const_vals2 } },
- .insert_const_32 => .{ .new, .{ .insert, 0, .@"const", const_vals4 } },
- .insert_const_64 => .{ .new, .{ .insert, 0, .@"const", const_vals8 } },
- .insert_const_128 => .{ .new, .{ .insert, 0, .@"const", const_vals16 } },
- .insert_small_16le => .{ .new, .{ .insert, 0, .small , .{ i16, .little } } },
- .insert_small_32le => .{ .new, .{ .insert, 0, .small , .{ i32, .little } } },
- .insert_small_64le => .{ .new, .{ .insert, 0, .small , .{ i64, .little } } },
- .insert_small_16be => .{ .new, .{ .insert, 0, .small , .{ i16, .big } } },
- .insert_small_32be => .{ .new, .{ .insert, 0, .small , .{ i32, .big } } },
- .insert_small_64be => .{ .new, .{ .insert, 0, .small , .{ i64, .big } } },
- .insert_few_8 => .{ .new, .{ .insert, 0, .few , .{ u8 , 3 } } },
- .insert_few_16 => .{ .new, .{ .insert, 0, .few , .{ u16, 6 } } },
- .insert_few_32 => .{ .new, .{ .insert, 0, .few , .{ u32, 9 } } },
- .insert_few_64 => .{ .new, .{ .insert, 0, .few , .{ u64, 12 } } },
-
- .push_rng_byte => .{ .new, .{ .push , 0, .rng , .one } },
- .push_zero_byte => .{ .new, .{ .push , 0, .zero , .one } },
- .push_rng_span => .{ .new, .{ .push , 0, .rng , .many } },
- .push_zero_span => .{ .new, .{ .push , 0, .zero , .many } },
- .push_print_span => .{ .new, .{ .push , 0, .print , .many } },
- .push_common_span => .{ .new, .{ .push , 0, .common , .many } },
- .push_integer => .{ .new, .{ .push , 0, .integer , .many } },
- .push_large_zero_span => .{ .new, .{ .push , 0, .zero , .large } },
- .push_wtf8_char => .{ .new, .{ .push , 0, .wtf8 , .one } },
- .push_wtf8_span => .{ .new, .{ .push , 0, .wtf8 , .many } },
- .push_existing_span => .{ .new, .{ .push , 1, .existing, .many } },
- .push_splice_span => .{ .new, .{ .push , 0, .splice , .many } },
- .push_const_16 => .{ .new, .{ .push , 0, .@"const", const_vals2 } },
- .push_const_32 => .{ .new, .{ .push , 0, .@"const", const_vals4 } },
- .push_const_64 => .{ .new, .{ .push , 0, .@"const", const_vals8 } },
- .push_const_128 => .{ .new, .{ .push , 0, .@"const", const_vals16 } },
- .push_small_16le => .{ .new, .{ .push , 0, .small , .{ i16, .little } } },
- .push_small_32le => .{ .new, .{ .push , 0, .small , .{ i32, .little } } },
- .push_small_64le => .{ .new, .{ .push , 0, .small , .{ i64, .little } } },
- .push_small_16be => .{ .new, .{ .push , 0, .small , .{ i16, .big } } },
- .push_small_32be => .{ .new, .{ .push , 0, .small , .{ i32, .big } } },
- .push_small_64be => .{ .new, .{ .push , 0, .small , .{ i64, .big } } },
- .push_few_8 => .{ .new, .{ .push , 0, .few , .{ u8 , 3 } } },
- .push_few_16 => .{ .new, .{ .push , 0, .few , .{ u16, 6 } } },
- .push_few_32 => .{ .new, .{ .push , 0, .few , .{ u32, 9 } } },
- .push_few_64 => .{ .new, .{ .push , 0, .few , .{ u64, 12 } } },
-
- .xor_1 => .{ .rmw, .{ .xor , u8 , native_endian, 1 } },
- .xor_few_8 => .{ .rmw, .{ .xor , u8 , native_endian, 3 } },
- .xor_few_16 => .{ .rmw, .{ .xor , u16, native_endian, 6 } },
- .xor_few_32 => .{ .rmw, .{ .xor , u32, native_endian, 9 } },
- .xor_few_64 => .{ .rmw, .{ .xor , u64, native_endian, 12 } },
-
- .truncate_8 => .{ .rmw, .{ .truncate , u8 , native_endian, {} } },
- .truncate_16le => .{ .rmw, .{ .truncate , u16, .little , {} } },
- .truncate_32le => .{ .rmw, .{ .truncate , u32, .little , {} } },
- .truncate_64le => .{ .rmw, .{ .truncate , u64, .little , {} } },
- .truncate_16be => .{ .rmw, .{ .truncate , u16, .big , {} } },
- .truncate_32be => .{ .rmw, .{ .truncate , u32, .big , {} } },
- .truncate_64be => .{ .rmw, .{ .truncate , u64, .big , {} } },
-
- .add_8 => .{ .rmw, .{ .add , i8 , native_endian, {} } },
- .add_16le => .{ .rmw, .{ .add , i16, .little , {} } },
- .add_32le => .{ .rmw, .{ .add , i32, .little , {} } },
- .add_64le => .{ .rmw, .{ .add , i64, .little , {} } },
- .add_16be => .{ .rmw, .{ .add , i16, .big , {} } },
- .add_32be => .{ .rmw, .{ .add , i32, .big , {} } },
- .add_64be => .{ .rmw, .{ .add , i64, .big , {} } },
-
- .packed_set_rng_8 => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } },
- .packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little , {} } },
- .packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little , {} } },
- .packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little , {} } },
- .packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big , {} } },
- .packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big , {} } },
- .packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big , {} } },
- // zig fmt: on
- };
-
- switch (class) {
- .new => {
- const op: enum {
- set,
- insert,
- push,
-
- pub fn maxLen(comptime op: @This(), in_len: usize) usize {
- return switch (op) {
- .set => @min(in_len, max_set_len),
- .insert, .push => max_insert_len,
- };
- }
- }, const min_in_len, const data: enum {
- rng,
- zero,
- common,
- print,
- integer,
- wtf8,
- existing,
- splice,
- @"const",
- small,
- few,
- }, const data_ctx = class_ctx;
- const Size = enum { one, many, large };
- if (in.len < min_in_len) return false;
- if (data == .@"const" and data_ctx.len == 0) return false;
-
- const splice_i = if (data == .splice) blk: {
- // Element zero always holds an empty input, so we do not select it
- if (corpus.len == 1) return false;
- break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len);
- } else undefined;
-
- // Only needs to be followed for set
- const len = switch (data) {
- else => switch (@as(Size, data_ctx)) {
- .one => 1,
- .many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)),
- .large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len),
- },
- .wtf8 => undefined, // varies by size of each code unit
- .splice => rng.intRangeAtMostBiased(usize, 1, @min(
- corpus[splice_i].len,
- op.maxLen(in.len),
- )),
- .existing => rng.intRangeAtMostBiased(usize, 1, @min(
- in.len,
- op.maxLen(in.len),
- )),
- .@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child),
- .small, .few => @sizeOf(data_ctx[0]),
- };
-
- const i = switch (op) {
- .set => rng.uintAtMostBiased(usize, in.len - len),
- .insert => rng.uintAtMostBiased(usize, in.len),
- .push => in.len,
- };
-
- out.appendSliceAssumeCapacity(in[0..i]);
- switch (data) {
- .rng => {
- var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined;
- rng.bytes(bytes[0..len]);
- out.appendSliceAssumeCapacity(bytes[0..len]);
- },
- .zero => out.appendNTimesAssumeCapacity(0, len),
- .common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
- c.* = switch (rng.int(u6)) {
- 0 => ' ',
- 1...10 => |x| '0' + (@as(u8, x) - 1),
- 11...36 => |x| 'A' + (@as(u8, x) - 11),
- 37 => '_',
- 38...63 => |x| 'a' + (@as(u8, x) - 38),
- };
- },
- .print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
- c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E);
- },
- .integer => {
- const negative = len != 0 and rng.boolean();
- if (negative) {
- out.appendAssumeCapacity('-');
- }
-
- for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| {
- c.* = rng.intRangeAtMostBiased(u8, '0', '9');
- }
- },
- .wtf8 => {
- comptime assert(op != .set);
- var codepoints: usize = if (data_ctx == .one)
- 1
- else
- rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4);
-
- while (true) {
- const units1 = rng.int(u2);
- const value = switch (units1) {
- 0 => rng.int(u7),
- 1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF),
- 2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF),
- 3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF),
- };
- const units = @as(u3, units1) + 1;
-
- var buf: [4]u8 = undefined;
- assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units);
- out.appendSliceAssumeCapacity(buf[0..units]);
-
- codepoints -= 1;
- if (codepoints == 0) break;
- }
- },
- .existing => {
- const j = rng.uintAtMostBiased(usize, in.len - len);
- out.appendSliceAssumeCapacity(in[j..][0..len]);
- },
- .splice => {
- const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
- out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
- },
- .@"const" => out.appendSliceAssumeCapacity(@ptrCast(
- &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
- )),
- .small => out.appendSliceAssumeCapacity(@ptrCast(
- &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
- )),
- .few => out.appendSliceAssumeCapacity(@ptrCast(
- &fewValue(rng, data_ctx[0], data_ctx[1]),
- )),
- }
- switch (op) {
- .set => out.appendSliceAssumeCapacity(in[i + len ..]),
- .insert => out.appendSliceAssumeCapacity(in[i..]),
- .push => {},
- }
- },
- .remove => {
- if (in.len == 0) return false;
- const Op = enum { delete, pop };
- const op: Op, const max_len = class_ctx;
- // LessThan is used so we don't delete the entire span (which is unproductive since
- // an empty input has always been tried)
- const len = if (max_len == 1) 1 else rng.uintLessThanBiased(
- usize,
- @min(max_len + 1, in.len),
- );
- switch (op) {
- .delete => {
- const i = rng.uintAtMostBiased(usize, in.len - len);
- out.appendSliceAssumeCapacity(in[0..i]);
- out.appendSliceAssumeCapacity(in[i + len ..]);
- },
- .pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]),
- }
- },
- .rmw => {
- const Op = enum { xor, truncate, add, packed_rng };
- const op: Op, const T, const endian, const xor_bits = class_ctx;
- if (in.len < @sizeOf(T)) return false;
- const Log2T = math.Log2Int(T);
-
- const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T));
- const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian);
- const new = switch (op) {
- .xor => old ^ fewValue(rng, T, xor_bits),
- .truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)),
- .add => old +% addend: {
- const val = rng.int(Mutation.AddValue);
- break :addend if (val == 0) 1 else val;
- },
- .packed_rng => blk: {
- const bits = rng.int(math.Log2Int(T)) +| 1;
- break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits));
- },
- };
- out.appendSliceAssumeCapacity(in);
- mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* =
- mem.nativeTo(T, new, endian);
- },
- .move_span => {
- if (in.len < 2) return false;
- // One less since moving whole output will never change anything
- const len = rng.intRangeAtMostBiased(usize, 1, @min(
- in.len - 1,
- Mutation.max_set_len,
- ));
-
- const src = rng.uintAtMostBiased(usize, in.len - len);
- // This indexes into the final input
- const dst = blk: {
- const res = rng.uintAtMostBiased(usize, in.len - len - 1);
- break :blk res + @intFromBool(res >= src);
- };
-
- if (src < dst) {
- out.appendSliceAssumeCapacity(in[0..src]);
- out.appendSliceAssumeCapacity(in[src + len .. dst + len]);
- out.appendSliceAssumeCapacity(in[src..][0..len]);
- out.appendSliceAssumeCapacity(in[dst + len ..]);
- } else {
- out.appendSliceAssumeCapacity(in[0..dst]);
- out.appendSliceAssumeCapacity(in[src..][0..len]);
- out.appendSliceAssumeCapacity(in[dst..src]);
- out.appendSliceAssumeCapacity(in[src + len ..]);
- }
- },
- .replicate_splice_span => {
- if (in.len == 0) return false;
- if (corpus.len == 1) return false;
- const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)];
- const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len));
- const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len);
- out.appendSliceAssumeCapacity(in[0..i]);
- out.appendSliceAssumeCapacity(from[i..][0..len]);
- out.appendSliceAssumeCapacity(in[i + len ..]);
- },
- }
- return true;
- }
-};
-
-/// Like `std.ArrayList(u8)` but backed by memory mapping.
-pub const MemoryMappedList = struct {
- /// Contents of the list.
+/// Reusable and recoverable input.
+///
+/// Has a 32-bit limit on the input length. This has the nice side effect that `u32`
+/// can be used in most placed in `fuzzer` with the last four values reserved.
+const MemoryMappedInput = struct {
+ len: u32,
+ /// Directly accessing `memory` is unsafe, use either `inputSlice` or `writeSlice`.
///
- /// Pointers to elements in this slice are invalidated by various functions
- /// of this ArrayList in accordance with the respective documentation. In
- /// all cases, "invalidated" means that the memory has been passed to this
- /// allocator's resize or free function.
- items: []align(std.heap.page_size_min) volatile u8,
- /// How many bytes this list can hold without allocating additional memory.
- capacity: usize,
- /// The file is kept open so that it can be resized.
- file: Io.File,
-
- pub fn init(file: Io.File, length: usize, capacity: usize) !MemoryMappedList {
- const ptr = try std.posix.mmap(
- null,
- capacity,
- .{ .READ = true, .WRITE = true },
- .{ .TYPE = .SHARED },
- file.handle,
- 0,
- );
+ /// `memory` starts with the length of the input as a little-endian 32-bit integer.
+ mmap: Io.File.MemoryMap,
+
+ /// `file` becomes owned by the returned `MemoryMappedInput`
+ pub fn init(file: Io.File, size: usize) !MemoryMappedInput {
+ assert(size >= 4);
return .{
- .file = file,
- .items = ptr[0..length],
- .capacity = capacity,
+ .len = 0,
+ .mmap = try file.createMemoryMap(io, .{ .len = size }),
};
}
- pub fn create(file: Io.File, length: usize, capacity: usize) !MemoryMappedList {
- try file.setLength(io, capacity);
- return init(file, length, capacity);
- }
-
- pub fn deinit(l: *MemoryMappedList) void {
- l.file.close(io);
- std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
+ pub fn deinit(l: *MemoryMappedInput) void {
+ const f = l.mmap.file;
+ l.mmap.write(io) catch |e| panic("failed to write memory map of 'in': {t}", .{e});
+ l.mmap.destroy(io);
+ f.close(io);
l.* = undefined;
}
/// Modify the array so that it can hold at least `additional_count` **more** items.
+ ///
/// Invalidates element pointers if additional memory is needed.
- pub fn ensureUnusedCapacity(l: *MemoryMappedList, additional_count: usize) !void {
- return l.ensureTotalCapacity(l.items.len + additional_count);
+ pub fn ensureUnusedCapacity(l: *MemoryMappedInput, additional_count: usize) void {
+ return l.ensureTotalCapacity(4 + l.len + additional_count);
}
- /// If the current capacity is less than `new_capacity`, this function will
- /// modify the array so that it can hold at least `new_capacity` items.
+ /// If the current capacity is less than `min_capacity`, this function will
+ /// modify the array so that it can hold at least `min_capacity` items.
+ ///
/// Invalidates element pointers if additional memory is needed.
- pub fn ensureTotalCapacity(l: *MemoryMappedList, new_capacity: usize) !void {
- if (l.capacity >= new_capacity) return;
-
- const better_capacity = growCapacity(l.capacity, new_capacity);
- return l.ensureTotalCapacityPrecise(better_capacity);
- }
-
- pub fn ensureTotalCapacityPrecise(l: *MemoryMappedList, new_capacity: usize) !void {
- if (l.capacity >= new_capacity) return;
+ pub fn ensureTotalCapacity(l: *MemoryMappedInput, min_capacity: usize) void {
+ if (l.mmap.memory.len < min_capacity) {
+ @branchHint(.unlikely);
- std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
- try l.file.setLength(io, new_capacity);
- l.* = try init(l.file, l.items.len, new_capacity);
+ const max_capacity = 1 << 32; // The size of the length header is not added
+ // in order to keep the capacity page aligned and to allow those values to
+ // reserved for other places.
+ if (min_capacity > max_capacity) @panic("too much smith data requested");
+
+ const new_capacity = @min(growCapacity(min_capacity), max_capacity);
+ l.mmap.file.setLength(io, new_capacity) catch |e|
+ panic("failed to resize 'in': {t}", .{e});
+ l.mmap.setLength(io, new_capacity) catch |se| switch (se) {
+ error.OperationUnsupported => {
+ const f = l.mmap.file;
+ l.mmap.destroy(io);
+ l.mmap = f.createMemoryMap(io, .{ .len = new_capacity }) catch |e|
+ panic("failed to memory map 'in': {t}", .{e});
+ },
+ else => panic("failed to resize memory map of 'in': {t}", .{se}),
+ };
+ }
}
- /// Invalidates all element pointers.
- pub fn clearRetainingCapacity(l: *MemoryMappedList) void {
- l.items.len = 0;
+ // Only writing has side effects, so volatile is not needed
+ pub fn inputSlice(l: *MemoryMappedInput) []const u8 {
+ return l.mmap.memory[4..][0..l.len];
}
- /// Append the slice of items to the list.
- /// Asserts that the list can hold the additional items.
- pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
- const old_len = l.items.len;
- const new_len = old_len + items.len;
- assert(new_len <= l.capacity);
- l.items.len = new_len;
- @memcpy(l.items[old_len..][0..items.len], items);
+ // Writing has side effectsd, so volatile is necessary
+ pub fn writeSlice(l: *MemoryMappedInput) []volatile u8 {
+ return l.mmap.memory;
}
- /// Extends the list by 1 element.
- /// Never invalidates element pointers.
- /// Asserts that the list can hold one additional item.
- pub fn appendAssumeCapacity(l: *MemoryMappedList, item: u8) void {
- const new_item_ptr = l.addOneAssumeCapacity();
- new_item_ptr.* = item;
+ fn writeLen(l: *MemoryMappedInput) void {
+ l.writeSlice()[0..4].* = @bitCast(mem.nativeToLittle(u32, l.len));
}
- /// Increase length by 1, returning pointer to the new item.
- /// The returned pointer becomes invalid when the list is resized.
- /// Never invalidates element pointers.
- /// Asserts that the list can hold one additional item.
- pub fn addOneAssumeCapacity(l: *MemoryMappedList) *volatile u8 {
- assert(l.items.len < l.capacity);
- l.items.len += 1;
- return &l.items[l.items.len - 1];
+ /// Invalidates all element pointers.
+ pub fn clearRetainingCapacity(l: *MemoryMappedInput) void {
+ l.len = 0;
+ l.writeLen();
}
- /// Append a value to the list `n` times.
- /// Never invalidates element pointers.
- /// The function is inline so that a comptime-known `value` parameter will
- /// have better memset codegen in case it has a repeated byte pattern.
- /// Asserts that the list can hold the additional items.
- pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
- const new_len = l.items.len + n;
- assert(new_len <= l.capacity);
- @memset(l.items.ptr[l.items.len..new_len], value);
- l.items.len = new_len;
+ /// Append the slice of items to the list.
+ ///
+ /// Invalidates item pointers if more space is required.
+ pub fn appendSlice(l: *MemoryMappedInput, items: []const u8) void {
+ l.ensureUnusedCapacity(items.len);
+ @memcpy(l.writeSlice()[4 + l.len ..][0..items.len], items);
+ l.len += @as(u32, @intCast(items.len));
+ l.writeLen();
}
- /// Resize the array, adding `n` new elements, which have `undefined` values.
- /// The return value is a slice pointing to the newly allocated elements.
- /// Never invalidates element pointers.
- /// The returned pointer becomes invalid when the list is resized.
- /// Asserts that the list can hold the additional items.
- pub fn addManyAsSliceAssumeCapacity(l: *MemoryMappedList, n: usize) []volatile u8 {
- assert(l.items.len + n <= l.capacity);
- const prev_len = l.items.len;
- l.items.len += n;
- return l.items[prev_len..][0..n];
+ /// Append the little-endian integer to the list.
+ ///
+ /// Invalidates item pointers if more space is required.
+ pub fn appendLittleInt(l: *MemoryMappedInput, T: type, x: T) void {
+ l.ensureUnusedCapacity(@sizeOf(T));
+ //std.log.debug("{} {} {}", .{ l.writeSlice().len, l.len, @sizeOf(T) });
+ l.writeSlice()[4 + l.len ..][0..@sizeOf(T)].* = @bitCast(mem.nativeToLittle(T, x));
+ l.len += @sizeOf(T);
+ l.writeLen();
}
/// Called when memory growth is necessary. Returns a capacity larger than
/// minimum that grows super-linearly.
- fn growCapacity(current: usize, minimum: usize) usize {
- var new = current;
- while (true) {
- new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
- if (new >= minimum) return new;
- }
- }
-
- pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void {
- assert(l.items.len + 1 <= l.capacity);
- l.items.len += 1;
- volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]);
- l.items[i] = item;
- }
-
- pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 {
- assert(l.items.len + 1 <= l.capacity);
- const old = l.items[i];
- volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]);
- l.items.len -= 1;
- return old;
+ fn growCapacity(minimum: usize) usize {
+ return mem.alignForward(
+ usize,
+ minimum +| (minimum / 2 + std.heap.page_size_max),
+ std.heap.page_size_max,
+ );
}
};
diff --git a/lib/init/src/main.zig b/lib/init/src/main.zig
@@ -40,12 +40,32 @@ test "simple test" {
}
test "fuzz example" {
- const Context = struct {
- fn testOne(context: @This(), input: []const u8) anyerror!void {
- _ = context;
- // Try passing `--fuzz` to `zig build test` and see if it manages to fail this test case!
- try std.testing.expect(!std.mem.eql(u8, "canyoufindme", input));
- }
+ try std.testing.fuzz({}, testOne, .{});
+}
+
+fn testOne(context: void, smith: *std.testing.Smith) !void {
+ _ = context;
+ // Try passing `--fuzz` to `zig build test` and see if it manages to fail this test case!
+
+ const gpa = std.testing.allocator;
+ var list: std.ArrayList(u8) = .empty;
+ defer list.deinit(gpa);
+ while (!smith.eos()) switch (smith.value(enum { add_data, dup_data })) {
+ .add_data => {
+ const slice = try list.addManyAsSlice(gpa, smith.value(u4));
+ smith.bytes(slice);
+ },
+ .dup_data => {
+ if (list.items.len == 0) continue;
+ if (list.items.len > std.math.maxInt(u32)) return error.SkipZigTest;
+ const len = smith.valueRangeAtMost(u32, 1, @min(32, list.items.len));
+ const off = smith.valueRangeAtMost(u32, 0, @intCast(list.items.len - len));
+ try list.appendSlice(gpa, list.items[off..][0..len]);
+ try std.testing.expectEqualSlices(
+ u8,
+ list.items[off..][0..len],
+ list.items[list.items.len - len ..],
+ );
+ },
};
- try std.testing.fuzz(Context{}, Context.testOne, .{});
}
diff --git a/lib/std/Build.zig b/lib/std/Build.zig
@@ -115,7 +115,7 @@ pub const Graph = struct {
arena: Allocator,
system_library_options: std.StringArrayHashMapUnmanaged(SystemLibraryMode) = .empty,
system_package_mode: bool = false,
- debug_compiler_runtime_libs: bool = false,
+ debug_compiler_runtime_libs: ?std.builtin.OptimizeMode = null,
cache: Cache,
zig_exe: [:0]const u8,
environ_map: process.Environ.Map,
diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig
@@ -64,6 +64,7 @@ const CoverageMap = struct {
/// Elements are indexes into `source_locations` pointing to the unit tests that are being fuzz tested.
entry_points: std.ArrayList(u32),
start_timestamp: i64,
+ start_n_runs: u64,
fn deinit(cm: *CoverageMap, gpa: Allocator) void {
std.posix.munmap(cm.mapped_memory);
@@ -136,6 +137,14 @@ pub fn start(fuzz: *Fuzz) void {
}
for (fuzz.run_steps) |run| {
+ if (run.fuzz_tests.items.len > 1) {
+ // Multiple fuzzWorkerRuns currently cause race-conditions
+ // since they use the same Run step. See #30969
+ fatal("--fuzz not yet implemented for multiple tests", .{});
+ }
+ }
+
+ for (fuzz.run_steps) |run| {
for (run.fuzz_tests.items) |unit_test_index| {
assert(run.rebuilt_executable != null);
fuzz.group.async(io, fuzzWorkerRun, .{ fuzz, run, unit_test_index });
@@ -291,6 +300,7 @@ pub fn sendUpdate(
.source_locations_len = @intCast(coverage_map.source_locations.len),
.string_bytes_len = @intCast(coverage_map.coverage.string_bytes.items.len),
.start_timestamp = coverage_map.start_timestamp,
+ .start_n_runs = coverage_map.start_n_runs,
};
var iovecs: [5][]const u8 = .{
@ptrCast(&header),
@@ -382,6 +392,7 @@ fn prepareTables(fuzz: *Fuzz, run_step: *Step.Run, coverage_id: u64) error{ OutO
.source_locations = undefined, // populated below
.entry_points = .{},
.start_timestamp = ws.now(),
+ .start_n_runs = undefined, // populated below
};
errdefer gop.value_ptr.coverage.deinit(gpa);
@@ -459,6 +470,7 @@ fn prepareTables(fuzz: *Fuzz, run_step: *Step.Run, coverage_id: u64) error{ OutO
for (sorted_pcs.items(.index), sorted_pcs.items(.sl)) |i, sl| source_locations[i] = sl;
gop.value_ptr.source_locations = source_locations;
+ gop.value_ptr.start_n_runs = header.n_runs;
ws.notifyUpdate();
}
diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig
@@ -1702,6 +1702,10 @@ const ElfDumper = struct {
return error.InvalidArchiveMagicNumber;
}
+ if (!mem.isAligned(bytes.len, 2)) {
+ return error.InvalidArchivePadding;
+ }
+
var ctx = ArchiveContext{
.gpa = gpa,
.data = bytes,
@@ -1715,8 +1719,8 @@ const ElfDumper = struct {
}
while (true) {
- if (reader.seek >= ctx.data.len) break;
if (!mem.isAligned(reader.seek, 2)) reader.seek += 1;
+ if (reader.seek >= ctx.data.len) break;
const hdr = try reader.takeStruct(elf.ar_hdr, .little);
diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig
@@ -1462,7 +1462,8 @@ fn getZigArgs(compile: *Compile, fuzz: bool) ![][]const u8 {
try zig_args.append("--global-cache-dir");
try zig_args.append(b.graph.global_cache_root.path orelse ".");
- if (b.graph.debug_compiler_runtime_libs) try zig_args.append("--debug-rt");
+ if (b.graph.debug_compiler_runtime_libs) |mode|
+ try zig_args.append(b.fmt("--debug-rt={t}", .{mode}));
try zig_args.append("--name");
try zig_args.append(compile.name);
diff --git a/lib/std/Build/WebServer.zig b/lib/std/Build/WebServer.zig
@@ -608,7 +608,10 @@ fn buildClientWasm(ws: *WebServer, arena: Allocator, optimize: std.builtin.Optim
defer body_buffer.deinit(gpa);
while (true) {
- const header = try stdout.takeStruct(Header, .little);
+ const header = stdout.takeStruct(Header, .little) catch |e| switch (e) {
+ error.ReadFailed => return error.ReadFailed,
+ error.EndOfStream => break,
+ };
body_buffer.clearRetainingCapacity();
try stdout.appendExact(gpa, &body_buffer, header.bytes_len);
const body = body_buffer.items;
diff --git a/lib/std/Build/abi.zig b/lib/std/Build/abi.zig
@@ -6,6 +6,7 @@
//! All of these components interface to some degree via an ABI:
//! * The build runner communicates with the web interface over a WebSocket connection
//! * The build runner communicates with `libfuzzer` over a shared memory-mapped file
+const std = @import("std");
// Check that no WebSocket message type has implicit padding bits. This ensures we never send any
// undefined bits over the wire, and also helps validate that the layout doesn't differ between, for
@@ -13,7 +14,6 @@
comptime {
const check = struct {
fn check(comptime T: type) void {
- const std = @import("std");
std.debug.assert(@typeInfo(T) == .@"struct");
std.debug.assert(@typeInfo(T).@"struct".layout == .@"extern");
std.debug.assert(std.meta.hasUniqueRepresentation(T));
@@ -139,14 +139,48 @@ pub const Rebuild = extern struct {
/// ABI bits specifically relating to the fuzzer interface.
pub const fuzz = struct {
- pub const TestOne = *const fn (Slice) callconv(.c) void;
+ pub const TestOne = *const fn () callconv(.c) void;
+
+ /// A unique value to identify the related requests across runs
+ pub const Uid = packed struct(u32) {
+ kind: enum(u1) { int, bytes },
+ hash: u31,
+
+ pub const hashmap_ctx = struct {
+ pub fn hash(_: @This(), u: Uid) u32 {
+ // We can ignore `kind` since `hash` should be unique regardless
+ return u.hash;
+ }
+
+ pub fn eql(_: @This(), a: Uid, b: Uid, _: usize) bool {
+ return a == b;
+ }
+ };
+ };
+
pub extern fn fuzzer_init(cache_dir_path: Slice) void;
+ /// `fuzzer_init` must be called first.
pub extern fn fuzzer_coverage() Coverage;
- pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void;
+ /// `fuzzer_init` must be called first.
+ pub extern fn fuzzer_set_test(test_one: TestOne, unit_test_name: Slice) void;
+ /// `fuzzer_set_test` must be called first.
+ /// The callee owns the memory of bytes and must not free it until `fuzzer_main` returns
pub extern fn fuzzer_new_input(bytes: Slice) void;
+ /// `fuzzer_set_test` must be called first.
+ /// Resets the fuzzer's state to that of `fuzzer_init`.
pub extern fn fuzzer_main(limit_kind: LimitKind, amount: u64) void;
pub extern fn fuzzer_unslide_address(addr: usize) usize;
+ pub extern fn fuzzer_int(uid: Uid, weights: Weights) u64;
+ pub extern fn fuzzer_eos(uid: Uid, weights: Weights) bool;
+ pub extern fn fuzzer_bytes(uid: Uid, out: MutSlice, weights: Weights) void;
+ pub extern fn fuzzer_slice(
+ uid: Uid,
+ buf: MutSlice,
+ len_weights: Weights,
+ byte_weights: Weights,
+ ) u32;
+
pub const Slice = extern struct {
ptr: [*]const u8,
len: usize,
@@ -160,6 +194,100 @@ pub const fuzz = struct {
}
};
+ pub const MutSlice = extern struct {
+ ptr: [*]u8,
+ len: usize,
+
+ pub fn toSlice(s: MutSlice) []u8 {
+ return s.ptr[0..s.len];
+ }
+
+ pub fn fromSlice(s: []u8) MutSlice {
+ return .{ .ptr = s.ptr, .len = s.len };
+ }
+ };
+
+ pub const Weights = extern struct {
+ ptr: [*]const Weight,
+ len: usize,
+
+ pub fn toSlice(s: Weights) []const Weight {
+ return s.ptr[0..s.len];
+ }
+
+ pub fn fromSlice(s: []const Weight) Weights {
+ return .{ .ptr = s.ptr, .len = s.len };
+ }
+ };
+
+ /// Increases the probability of values being selected by the fuzzer.
+ ///
+ /// `weight` applies to each value in the range (i.e. not evenly across
+ /// the range) and must be nonzero.
+ ///
+ /// In a set of weights, the total weight must not exceed 2^64 and be
+ /// nonzero.
+ pub const Weight = extern struct {
+ /// Inclusive
+ min: u64,
+ /// Inclusive
+ max: u64,
+ weight: u64,
+
+ fn intFromValue(x: anytype) u64 {
+ const T = @TypeOf(x);
+ return switch (@typeInfo(T)) {
+ .comptime_int => x,
+ .bool => @intFromBool(x),
+ .@"enum" => @intFromEnum(x),
+ else => @as(std.meta.Int(.unsigned, @bitSizeOf(T)), @bitCast(x)),
+
+ .int => |i| x: {
+ comptime {
+ if (i.signedness == .signed) {
+ @compileError("type does not have a continous range: " ++ @typeName(T));
+ }
+ // Reject types that don't have a fixed bitsize (esp. usize)
+ // since they are not gauraunteed to fit in a u64 across targets.
+ if (std.mem.indexOfScalar(type, &.{
+ usize, c_char, c_ushort, c_uint, c_ulong, c_ulonglong,
+ }, T) != null) {
+ @compileError("type does not have a fixed bitsize: " ++ @typeName(T));
+ }
+ }
+ break :x x;
+ },
+
+ .comptime_float,
+ .float,
+ => @compileError("type does not have a continous range: " ++ @typeName(T)),
+ .pointer => @compileError("type does not have a fixed bitsize: " ++ @typeName(T)),
+ };
+ }
+
+ pub fn value(T: type, x: T, weight: u64) Weight {
+ return .{ .min = intFromValue(x), .max = intFromValue(x), .weight = weight };
+ }
+
+ pub fn rangeAtMost(T: type, at_least: T, at_most: T, weight: u64) Weight {
+ std.debug.assert(intFromValue(at_least) <= intFromValue(at_most));
+ return .{
+ .min = intFromValue(at_least),
+ .max = intFromValue(at_most),
+ .weight = weight,
+ };
+ }
+
+ pub fn rangeLessThan(T: type, at_least: T, less_than: T, weight: u64) Weight {
+ std.debug.assert(intFromValue(at_least) < intFromValue(less_than));
+ return .{
+ .min = intFromValue(at_least),
+ .max = intFromValue(less_than) - 1,
+ .weight = weight,
+ };
+ }
+ };
+
pub const LimitKind = enum(u8) { forever, iterations };
/// libfuzzer uses this and its usize is the one that counts. To match the ABI,
@@ -219,6 +347,7 @@ pub const fuzz = struct {
string_bytes_len: u32,
/// When, according to the server, fuzzing started.
start_timestamp: i64 align(4),
+ start_n_runs: u64 align(4),
};
/// WebSocket server->client.
diff --git a/lib/std/compress/flate/Compress.zig b/lib/std/compress/flate/Compress.zig
@@ -279,7 +279,7 @@ pub fn init(
assert(buffer.len >= flate.max_window_len);
// note that disallowing some of these simplifies matching logic
- assert(opts.chain != 0); // use `Huffman`, disallowing this simplies matching
+ assert(opts.chain != 0); // use `Huffman`; disallowing this simplies matching
assert(opts.good >= 3 and opts.nice >= 3); // a match will (usually) not be found
assert(opts.good <= 258 and opts.nice <= 258); // a longer match will not be found
assert(opts.lazy <= opts.nice); // a longer match will (usually) not be found
@@ -558,45 +558,35 @@ test betterMatchLen {
try std.testing.fuzz({}, testFuzzedMatchLen, .{});
}
-fn testFuzzedMatchLen(_: void, input: []const u8) !void {
+fn testFuzzedMatchLen(_: void, smith: *std.testing.Smith) !void {
@disableInstrumentation();
- var r: Io.Reader = .fixed(input);
var buf: [1024]u8 = undefined;
var w: Writer = .fixed(&buf);
- var old = r.takeLeb128(u9) catch 0;
- var bytes_off = @max(1, r.takeLeb128(u10) catch 258);
- const prev_back = @max(1, r.takeLeb128(u10) catch 258);
- while (r.takeByte()) |byte| {
- const op: packed struct(u8) {
- kind: enum(u2) { splat, copy, insert_imm, insert },
- imm: u6,
-
- pub fn immOrByte(op_s: @This(), r_s: *Io.Reader) usize {
- return if (op_s.imm == 0) op_s.imm else @as(usize, r_s.takeByte() catch 0) + 64;
- }
- } = @bitCast(byte);
- (switch (op.kind) {
- .splat => w.splatByteAll(r.takeByte() catch 0, op.immOrByte(&r)),
+ while (w.unusedCapacityLen() != 0 and !smith.eosWeightedSimple(7, 1)) {
+ switch (smith.value(enum(u2) { splat, copy, insert })) {
+ .splat => w.splatByteAll(
+ smith.value(u8),
+ smith.valueRangeAtMost(u9, 1, @min(511, w.unusedCapacityLen())),
+ ) catch unreachable,
.copy => write: {
- const start = w.buffered().len -| op.immOrByte(&r);
- const len = @min(w.buffered().len - start, r.takeByte() catch 3);
- break :write w.writeAll(w.buffered()[start..][0..len]);
+ if (w.buffered().len == 0) continue;
+ const start = smith.valueRangeAtMost(u10, 0, @intCast(w.buffered().len - 1));
+ const max_len = @min(w.unusedCapacityLen(), w.buffered().len - start);
+ const len = smith.valueRangeAtMost(u10, 1, @intCast(max_len));
+ break :write w.writeAll(w.buffered()[start..][0..len]) catch unreachable;
},
- .insert_imm => w.writeByte(op.imm),
- .insert => w.writeAll(r.take(
- @min(r.bufferedLen(), @as(usize, op.imm) + 1),
- ) catch unreachable),
- }) catch break;
- } else |_| {}
-
- w.splatByteAll(0, (1 + 3) -| w.buffered().len) catch unreachable;
- bytes_off = @min(bytes_off, @as(u10, @intCast(w.buffered().len - 3)));
- const prev_off = bytes_off -| prev_back;
- assert(prev_off < bytes_off);
+ .insert => w.advance(smith.slice(w.unusedCapacitySlice())),
+ }
+ }
+ w.splatByteAll(0, (1 + token.min_length) -| w.buffered().len) catch unreachable;
+
+ const max_start = w.buffered().len - token.min_length;
+ const bytes_off = smith.valueRangeAtMost(u10, 1, @intCast(max_start));
+ const prev_off = smith.valueRangeAtMost(u10, 0, bytes_off - 1);
const prev = w.buffered()[prev_off..];
const bytes = w.buffered()[bytes_off..];
- old = @min(old, bytes.len - 1, token.max_length - 1);
+ const old = smith.valueRangeLessThan(u10, 0, @min(bytes.len, token.max_length));
const diff_index = mem.findDiff(u8, prev, bytes).?; // unwrap since lengths are not same
const expected_len = @min(diff_index, 258);
@@ -1036,7 +1026,7 @@ const huffman = struct {
max_bits: u4,
incomplete_allowed: bool,
) struct { u32, u16 } {
- assert(out_codes.len - 1 >= @intFromBool(incomplete_allowed));
+ assert(out_codes.len - 1 >= @intFromBool(!incomplete_allowed));
// freqs and out_codes are in the loop to assert they are all the same length
for (freqs, out_codes, out_bits) |_, _, n| assert(n == 0);
assert(out_codes.len <= @as(u16, 1) << max_bits);
@@ -1255,40 +1245,35 @@ const huffman = struct {
try std.testing.fuzz({}, checkFuzzedBuildFreqs, .{});
}
- fn checkFuzzedBuildFreqs(_: void, freqs: []const u8) !void {
+ fn checkFuzzedBuildFreqs(_: void, smith: *std.testing.Smith) !void {
@disableInstrumentation();
- var r: Io.Reader = .fixed(freqs);
var freqs_limit: u16 = 65535;
var freqs_buf: [max_leafs]u16 = undefined;
var nfreqs: u15 = 0;
- const params: packed struct(u8) {
- max_bits: u4,
- _: u3,
- incomplete_allowed: bool,
- } = @bitCast(r.takeByte() catch 255);
- while (nfreqs != freqs_buf.len) {
- const leb = r.takeLeb128(u16);
- const f = if (leb) |f| @min(f, freqs_limit) else |e| switch (e) {
- error.ReadFailed => unreachable,
- error.EndOfStream => 0,
- error.Overflow => freqs_limit,
- };
+ const incomplete_allowed = smith.value(bool);
+ while (nfreqs < @as(u8, @intFromBool(!incomplete_allowed)) + 1 or
+ nfreqs != freqs_buf.len and freqs_limit != 0 and
+ smith.eosWeightedSimple(15, 1))
+ {
+ const f = smith.valueWeighted(u16, &.{
+ .rangeAtMost(u16, 0, @min(31, freqs_limit), @max(freqs_limit, 1)),
+ .rangeAtMost(u16, 0, freqs_limit, 1),
+ });
freqs_buf[nfreqs] = f;
- nfreqs += 1;
freqs_limit -= f;
- if (leb == error.EndOfStream and nfreqs - 1 > @intFromBool(params.incomplete_allowed))
- break;
+ nfreqs += 1;
}
var codes_buf: [max_leafs]u16 = undefined;
var bits_buf: [max_leafs]u4 = @splat(0);
+ const max_bits = smith.valueRangeAtMost(u4, math.log2_int_ceil(u15, nfreqs), 15);
const total_bits, const last_nonzero = build(
freqs_buf[0..nfreqs],
codes_buf[0..nfreqs],
bits_buf[0..nfreqs],
- @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
- params.incomplete_allowed,
+ max_bits,
+ incomplete_allowed,
);
var has_bitlen_one: bool = false;
@@ -1303,21 +1288,21 @@ const huffman = struct {
}
errdefer std.log.err(
- \\ params: {}
+ \\ incomplete_allowed: {}
+ \\ max_bits: {}
\\ freqs: {any}
\\ bits: {any}
\\ # freqs: {}
- \\ max bits: {}
\\ weighted sum: {}
\\ has_bitlen_one: {}
\\ expected/actual total bits: {}/{}
\\ expected/actual last nonzero: {?}/{}
++ "\n", .{
- params,
+ incomplete_allowed,
+ max_bits,
freqs_buf[0..nfreqs],
bits_buf[0..nfreqs],
nfreqs,
- @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
weighted_sum,
has_bitlen_one,
expected_total_bits,
@@ -1331,7 +1316,7 @@ const huffman = struct {
if (weighted_sum > 1 << 15)
return error.OversubscribedHuffmanTree;
if (weighted_sum < 1 << 15 and
- !(params.incomplete_allowed and has_bitlen_one and weighted_sum == 1 << 14))
+ !(incomplete_allowed and has_bitlen_one and weighted_sum == 1 << 14))
return error.IncompleteHuffmanTree;
}
};
@@ -1353,6 +1338,7 @@ fn testingFreqBufs() !*[2][65536]u8 {
}
return fbufs;
}
+const FreqBufIndex = enum(u1) { gradient, random };
fn testingCheckDecompressedMatches(
flate_bytes: []const u8,
@@ -1426,34 +1412,31 @@ test Compress {
try std.testing.fuzz(fbufs, testFuzzedCompressInput, .{});
}
-fn testFuzzedCompressInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
- var in: Io.Reader = .fixed(input);
- var opts: packed struct(u51) {
- container: PackedContainer,
- buf_size: u16,
- good: u8,
- nice: u8,
- lazy: u8,
- /// Not a `u16` to limit it for performance
- chain: u9,
- } = @bitCast(in.takeLeb128(u51) catch 0);
- var expected_hash: flate.Container.Hasher = .init(opts.container.val());
+fn testFuzzedCompressInput(fbufs: *const [2][65536]u8, smith: *std.testing.Smith) !void {
+ @disableInstrumentation();
+ const container = smith.value(flate.Container);
+ const good = smith.valueRangeAtMost(u16, 3, 258);
+ const nice = smith.valueRangeAtMost(u16, 3, 258);
+ const lazy = smith.valueRangeAtMost(u16, 3, nice);
+ const chain = smith.valueWeighted(u16, &.{
+ .rangeAtMost(u16, if (good <= lazy) 4 else 1, 255, 65536),
+ // The following weights are greatly reduced since they increasing take more time to run
+ .rangeAtMost(u16, 256, 4095, 256),
+ .rangeAtMost(u16, 4096, 32767 + 256, 1),
+ });
+ var expected_hash: flate.Container.Hasher = .init(container);
var expected_size: u32 = 0;
var flate_buf: [128 * 1024]u8 = undefined;
var flate_w: Writer = .fixed(&flate_buf);
var deflate_buf: [flate.max_window_len * 2]u8 = undefined;
- var deflate_w = try Compress.init(
- &flate_w,
- deflate_buf[0 .. flate.max_window_len + @as(usize, opts.buf_size)],
- opts.container.val(),
- .{
- .good = @as(u16, opts.good) + 3,
- .nice = @as(u16, opts.nice) + 3,
- .lazy = @as(u16, @min(opts.lazy, opts.nice)) + 3,
- .chain = @max(1, opts.chain, @as(u8, 4) * @intFromBool(opts.good <= opts.lazy)),
- },
- );
+ const bufsize = smith.valueRangeAtMost(u32, flate.max_window_len, @intCast(deflate_buf.len));
+ var deflate_w = try Compress.init(&flate_w, deflate_buf[0..bufsize], container, .{
+ .good = good,
+ .nice = nice,
+ .lazy = lazy,
+ .chain = chain,
+ });
// It is ensured that more bytes are not written then this to ensure this run
// does not take too long and that `flate_buf` does not run out of space.
@@ -1465,79 +1448,57 @@ fn testFuzzedCompressInput(fbufs: *const [2][65536]u8, input: []const u8) !void
// extra 32 bytes is reserved on top of that for container headers and footers.
const max_size = flate_buf.len - (flate_buf_blocks * 64 + 32);
- while (true) {
- const data: packed struct(u36) {
- is_rebase: bool,
- is_bytes: bool,
- params: packed union {
- copy: packed struct(u34) {
- len_lo: u5,
- dist: u15,
- len_hi: u4,
- _: u10,
- },
- bytes: packed struct(u34) {
- kind: enum(u1) { gradient, random },
- off_hi: u4,
- len_lo: u10,
- off_mi: u4,
- len_hi: u5,
- off_lo: u8,
- _: u2,
- },
- rebase: packed struct(u34) {
- preserve: u17,
- capacity: u17,
- },
- },
- } = @bitCast(in.takeLeb128(u36) catch |e| switch (e) {
- error.ReadFailed => unreachable,
- error.Overflow => 0,
- error.EndOfStream => break,
- });
+ while (!smith.eosWeightedSimple(7, 1)) {
+ const max_bytes = max_size -| expected_size;
+ if (max_bytes == 0) break;
const buffered = deflate_w.writer.buffered();
// Required for repeating patterns and since writing from `buffered` is illegal
var copy_buf: [512]u8 = undefined;
- if (data.is_rebase) {
- const usable_capacity = deflate_w.writer.buffer.len - rebase_reserved_capacity;
- const preserve = @min(data.params.rebase.preserve, usable_capacity);
- const capacity = @min(data.params.rebase.capacity, usable_capacity -
- @max(rebase_min_preserve, preserve));
- try deflate_w.writer.rebase(preserve, capacity);
- continue;
- }
-
- const max_bytes = max_size -| expected_size;
- const bytes = if (!data.is_bytes and buffered.len != 0) bytes: {
- const dist = @min(buffered.len, @as(u32, data.params.copy.dist) + 1);
- const len = @min(
- @max(@shlExact(@as(u9, data.params.copy.len_hi), 5) | data.params.copy.len_lo, 1),
- max_bytes,
- );
- // Reuse the implementation's history. Otherwise our own would need maintained.
- const bytes_start = buffered[buffered.len - dist ..];
- const history_bytes = bytes_start[0..@min(bytes_start.len, len)];
-
- @memcpy(copy_buf[0..history_bytes.len], history_bytes);
- const new_history = len - history_bytes.len;
- if (history_bytes.len != len) for ( // check needed for `- dist`
- copy_buf[history_bytes.len..][0..new_history],
- copy_buf[history_bytes.len - dist ..][0..new_history],
- ) |*next, prev| {
- next.* = prev;
- };
- break :bytes copy_buf[0..len];
- } else bytes: {
- const off = @shlExact(@as(u16, data.params.bytes.off_hi), 12) |
- @shlExact(@as(u16, data.params.bytes.off_mi), 8) |
- data.params.bytes.off_lo;
- const len = @shlExact(@as(u16, data.params.bytes.len_hi), 10) |
- data.params.bytes.len_lo;
- const fbuf = &fbufs[@intFromEnum(data.params.bytes.kind)];
- break :bytes fbuf[off..][0..@min(len, fbuf.len - off, max_bytes)];
+ const bytes = bytes: switch (smith.valueRangeAtMost(
+ u2,
+ @intFromBool(buffered.len == 0),
+ 2,
+ )) {
+ 0 => { // Copy
+ const start = smith.valueRangeLessThan(u32, 0, @intCast(buffered.len));
+ // Reuse the implementation's history; otherwise, our own would need maintained.
+ const from = buffered[start..];
+ const len = smith.valueRangeAtMost(u16, 1, @min(copy_buf.len, max_bytes));
+
+ const history_bytes = from[0..@min(from.len, len)];
+ @memcpy(copy_buf[0..history_bytes.len], history_bytes);
+ const repeat_len = len - history_bytes.len;
+ for (
+ copy_buf[history_bytes.len..][0..repeat_len],
+ copy_buf[0..repeat_len],
+ ) |*next, prev| {
+ next.* = prev;
+ }
+ break :bytes copy_buf[0..len];
+ },
+ 1 => { // Bytes
+ const fbuf = &fbufs[
+ smith.valueWeighted(u1, &.{
+ .value(FreqBufIndex, .gradient, 3),
+ .value(FreqBufIndex, .random, 1),
+ })
+ ];
+ const len = smith.valueRangeAtMost(u32, 1, @min(fbuf.len, max_bytes));
+ const off = smith.valueRangeAtMost(u32, 0, @intCast(fbuf.len - len));
+ break :bytes fbuf[off..][0..len];
+ },
+ 2 => { // Rebase
+ const rebaseable = bufsize - rebase_reserved_capacity;
+ const capacity = smith.valueRangeAtMost(u32, 1, rebaseable - rebase_min_preserve);
+ const preserve = smith.valueRangeAtMost(u32, 0, rebaseable - capacity);
+ try deflate_w.writer.rebase(preserve, capacity);
+ continue;
+ },
+ else => unreachable,
};
+
assert(bytes.len <= max_bytes);
try deflate_w.writer.writeAll(bytes);
expected_hash.update(bytes);
@@ -1780,7 +1741,8 @@ fn countVec(data: []const []const u8) usize {
return bytes;
}
-fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, input: []const u8) !void {
+fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, smith: *std.testing.Smith) !void {
+ @disableInstrumentation();
const HashedStoreWriter = struct {
writer: Writer,
state: enum {
@@ -1819,8 +1781,8 @@ fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, input: []const u8) !void {
/// Note that this implementation is somewhat dependent on the implementation of
/// `Raw` by expecting headers / footers to be continous in data elements. It
- /// also expects the header to be the same as `flate.Container.header` and not
- /// for multiple streams to be concatenated.
+ /// also expects the header to be the same as `flate.Container.header` and for
+ /// multiple streams to not be concatenated.
fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
errdefer w.* = .failing;
var h: *@This() = @fieldParentPtr("writer", w);
@@ -1909,102 +1871,110 @@ fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, input: []const u8) !void {
}
fn flush(w: *Writer) Writer.Error!void {
- defer w.* = .failing; // Clears buffer even if state hasn't reached `end`
+ defer w.* = .failing; // Empties buffer even if state hasn't reached `end`
_ = try @This().drain(w, &.{""}, 0);
}
};
- var in: Io.Reader = .fixed(input);
- const opts: packed struct(u19) {
- container: PackedContainer,
- buf_len: u17,
- } = @bitCast(in.takeLeb128(u19) catch 0);
- var output: HashedStoreWriter = .init(&.{}, opts.container.val());
- var r_buf: [2 * 65536]u8 = undefined;
- var r: Raw = try .init(
- &output.writer,
- r_buf[0 .. opts.buf_len +% flate.max_window_len],
- opts.container.val(),
- );
-
- var data_base: u18 = 0;
- var expected_hash: flate.Container.Hasher = .init(opts.container.val());
+ const container = smith.value(flate.Container);
+ var output: HashedStoreWriter = .init(&.{}, container);
+ var expected_hash: flate.Container.Hasher = .init(container);
var expected_size: u32 = 0;
+ // 10 maximum blocks is the choosen limit since it is two more
+ // than the maximum the implementation can output in one drain.
+ const max_size = 10 * @as(u32, Raw.max_block_size);
+
+ var raw_buf: [2 * @as(usize, Raw.max_block_size)]u8 = undefined;
+ const raw_buf_len = smith.valueWeighted(u32, &.{
+ .value(u32, 0, @intCast(raw_buf.len)), // unbuffered
+ .rangeAtMost(u32, 0, @intCast(raw_buf.len), 1),
+ });
+ var raw: Raw = try .init(&output.writer, raw_buf[0..raw_buf_len], container);
+
+ const data_buf_len: u32 = @intCast(data_buf.len);
var vecs: [32][]const u8 = undefined;
var vecs_n: usize = 0;
- while (in.seek != in.end) {
- const VecInfo = packed struct(u58) {
- output: bool,
- /// If set, `data_len` and `splat` are reinterpreted as `capacity`
- /// and `preserve_len` respectively and `output` is treated as set.
- rebase: bool,
- block_aligning_len: bool,
- block_aligning_splat: bool,
- data_len: u18,
- splat: u18,
- data_off: u18,
+ while (true) {
+ const Op = packed struct {
+ drain: bool = false,
+ add_vec: bool = false,
+ rebase: bool = false,
+
+ pub const drain_only: @This() = .{ .drain = true };
+ pub const add_vec_only: @This() = .{ .add_vec = true };
+ pub const add_vec_and_drain: @This() = .{ .add_vec = true, .drain = true };
+ pub const drain_and_rebase: @This() = .{ .drain = true, .rebase = true };
};
- var vec_info: VecInfo = @bitCast(in.takeLeb128(u58) catch |e| switch (e) {
- error.ReadFailed => unreachable,
- error.Overflow, error.EndOfStream => 0,
- });
-
- {
- const buffered = r.writer.buffered().len + countVec(vecs[0..vecs_n]);
- const to_align = mem.alignForwardAnyAlign(usize, buffered, Raw.max_block_size) - buffered;
- assert((buffered + to_align) % Raw.max_block_size == 0);
-
- if (vec_info.block_aligning_len) {
- vec_info.data_len = @intCast(to_align);
- } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
- to_align % vec_info.data_len == 0)
- {
- vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
- }
- }
-
- var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
- add_vec: {
- if (vec_info.rebase) break :add_vec;
- if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) >
- 10 * (1 << 16))
- {
- // Skip this vector to avoid this test taking too long.
- // 10 maximum sized blocks is choosen as the limit since it is two more
- // than the maximum the implementation can output in one drain.
- splat = 1;
- break :add_vec;
- }
-
- vecs[vecs_n] = data_buf[@min(
- data_base +% vec_info.data_off,
- data_buf.len - vec_info.data_len,
- )..][0..vec_info.data_len];
-
- data_base +%= vec_info.data_len +% 3; // extra 3 to help catch aliasing bugs
- for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
- expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
+ const is_eos = expected_size == max_size or smith.eosWeightedSimple(7, 1);
+ var op: Op = if (!is_eos) smith.valueWeighted(Op, &.{
+ .value(Op, .add_vec_only, 6),
+ .value(Op, .add_vec_and_drain, 1),
+ .value(Op, .drain_and_rebase, 1),
+ }) else .drain_only;
+
+ if (op.add_vec) {
+ const max_write = max_size - expected_size;
+ const buffered: u32 = @intCast(raw.writer.buffered().len + countVec(vecs[0..vecs_n]));
+ const to_align = Raw.max_block_size - buffered % Raw.max_block_size;
+ assert(to_align != 0); // otherwise, not helpful.
+
+ const max_data = @min(data_buf_len, max_write);
+ const len = smith.valueWeighted(u32, &.{
+ .rangeAtMost(u32, 0, max_data, 1),
+ .rangeAtMost(u32, 0, @min(Raw.max_block_size, max_data), 4),
+ .value(u32, @min(to_align, max_data), max_data), // @min 2nd arg is an edge-case
+ });
+ const off = smith.valueRangeAtMost(u32, 0, data_buf_len - len);
+
+ expected_size += len;
+ vecs[vecs_n] = data_buf[off..][0..len];
vecs_n += 1;
+ op.drain |= vecs_n == vecs.len;
}
- const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
- in.seek == in.end;
- if (want_drain and vecs_n != 0) {
- try r.writer.writeSplatAll(vecs[0..vecs_n], splat);
+ op.drain |= is_eos;
+ op.drain &= vecs_n != 0;
+ if (op.drain) {
+ const pattern_len: u32 = @intCast(vecs[vecs_n - 1].len);
+ const pattern_len_z = @max(pattern_len, 1);
+
+ const max_write = max_size - (expected_size - pattern_len);
+ const buffered: u32 = @intCast(raw.writer.buffered().len + countVec(vecs[0 .. vecs_n - 1]));
+ const to_align = Raw.max_block_size - buffered % Raw.max_block_size;
+ assert(to_align != 0); // otherwise, not helpful.
+
+ const max_splat = max_write / pattern_len_z;
+ const weights: [3]std.testing.Smith.Weight = .{
+ .rangeAtMost(u32, 0, max_splat, 1),
+ .rangeAtMost(u32, 0, @min(
+ Raw.max_block_size + pattern_len_z,
+ max_write,
+ ) / pattern_len_z, 4),
+ .value(u32, to_align / pattern_len_z, max_splat * 4),
+ };
+ const align_weight = to_align % pattern_len_z == 0 and to_align <= max_write;
+ const n_weights = @as(u8, 2) + @intFromBool(align_weight);
+ const splat = smith.valueWeighted(u32, weights[0..n_weights]);
+
+ expected_size = expected_size - pattern_len + pattern_len * splat; // splat may be zero
+ for (vecs[0 .. vecs_n - 1]) |v| expected_hash.update(v);
+ for (0..splat) |_| expected_hash.update(vecs[vecs_n - 1]);
+ try raw.writer.writeSplatAll(vecs[0..vecs_n], splat);
vecs_n = 0;
- } else assert(splat == 1);
+ }
- if (vec_info.rebase) {
- try r.writer.rebase(vec_info.data_len, @min(
- r.writer.buffer.len -| vec_info.data_len,
- vec_info.splat,
- ));
+ if (op.rebase) {
+ const capacity = smith.valueRangeAtMost(u32, 0, raw_buf_len);
+ const preserve = smith.valueRangeAtMost(u32, 0, raw_buf_len - capacity);
+ try raw.writer.rebase(preserve, capacity);
}
+
+ if (is_eos) break;
}
- try r.writer.flush();
+ try raw.writer.flush();
try output.writer.flush();
try std.testing.expectEqual(.end, output.state);
@@ -2432,120 +2402,146 @@ test Huffman {
try std.testing.fuzz(fbufs, testFuzzedHuffmanInput, .{});
}
+fn fuzzedHuffmanDrainSpaceLimit(max_drain: usize, written: usize, eos: bool) usize {
+ var block_lim = math.divCeil(usize, max_drain, Huffman.max_tokens) catch unreachable;
+ block_lim = @max(block_lim, @intFromBool(eos));
+ const footer_overhead = @as(u8, 8) * @intFromBool(eos);
+ // 6 for a raw block header (the block header may span two bytes)
+ return written + 6 * block_lim + max_drain + footer_overhead;
+}
+
/// This function is derived from `testFuzzedRawInput` with a few changes for fuzzing `Huffman`.
-fn testFuzzedHuffmanInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
- var in: Io.Reader = .fixed(input);
- const opts: packed struct(u19) {
- container: PackedContainer,
- buf_len: u17,
- } = @bitCast(in.takeLeb128(u19) catch 0);
+fn testFuzzedHuffmanInput(fbufs: *const [2][65536]u8, smith: *std.testing.Smith) !void {
+ @disableInstrumentation();
+ const container = smith.value(flate.Container);
var flate_buf: [2 * 65536]u8 = undefined;
var flate_w: Writer = .fixed(&flate_buf);
- var h_buf: [2 * 65536]u8 = undefined;
- var h: Huffman = try .init(
- &flate_w,
- h_buf[0 .. opts.buf_len +% flate.max_window_len],
- opts.container.val(),
- );
-
- var expected_hash: flate.Container.Hasher = .init(opts.container.val());
+ var expected_hash: flate.Container.Hasher = .init(container);
var expected_size: u32 = 0;
+ const max_size = 4 * @as(u32, Huffman.max_tokens);
+
+ var h_buf: [2 * @as(usize, Huffman.max_tokens)]u8 = undefined;
+ const h_buf_len = smith.valueWeighted(u32, &.{
+ .value(u32, 0, @intCast(h_buf.len)), // unbuffered
+ .rangeAtMost(u32, 0, @intCast(h_buf.len), 1),
+ });
+ var h: Huffman = try .init(&flate_w, h_buf[0..h_buf_len], container);
+
var vecs: [32][]const u8 = undefined;
var vecs_n: usize = 0;
- while (in.seek != in.end) {
- const VecInfo = packed struct(u55) {
- output: bool,
- /// If set, `data_len` and `splat` are reinterpreted as `capacity`
- /// and `preserve_len` respectively and `output` is treated as set.
- rebase: bool,
- block_aligning_len: bool,
- block_aligning_splat: bool,
- data_off_hi: u8,
- random_data: u1,
- data_len: u16,
- splat: u18,
- /// This is less useful as each value is part of the same gradient 'step'
- data_off_lo: u8,
+ while (true) {
+ const Op = packed struct {
+ drain: bool = false,
+ add_vec: bool = false,
+ rebase: bool = false,
+
+ pub const drain_only: @This() = .{ .drain = true };
+ pub const add_vec_only: @This() = .{ .add_vec = true };
+ pub const add_vec_and_drain: @This() = .{ .add_vec = true, .drain = true };
+ pub const drain_and_rebase: @This() = .{ .drain = true, .rebase = true };
};
- var vec_info: VecInfo = @bitCast(in.takeLeb128(u55) catch |e| switch (e) {
- error.ReadFailed => unreachable,
- error.Overflow, error.EndOfStream => 0,
- });
- {
- const buffered = h.writer.buffered().len + countVec(vecs[0..vecs_n]);
- const to_align = mem.alignForwardAnyAlign(usize, buffered, Huffman.max_tokens) - buffered;
- assert((buffered + to_align) % Huffman.max_tokens == 0);
-
- if (vec_info.block_aligning_len) {
- vec_info.data_len = @intCast(to_align);
- } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
- to_align % vec_info.data_len == 0)
- {
- vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
- }
+ const is_eos = expected_size == max_size or smith.eosWeightedSimple(7, 1);
+ var op: Op = if (!is_eos) smith.valueWeighted(Op, &.{
+ .value(Op, .add_vec_only, 6),
+ .value(Op, .add_vec_and_drain, 1),
+ .value(Op, .drain_and_rebase, 1),
+ }) else .drain_only;
+
+ if (op.add_vec) {
+ const max_write = max_size - expected_size;
+ const buffered: u32 = @intCast(h.writer.buffered().len + countVec(vecs[0..vecs_n]));
+ const to_align = Huffman.max_tokens - buffered % Huffman.max_tokens;
+ assert(to_align != 0); // otherwise, not helpful.
+
+ const data_buf = &fbufs[
+ smith.valueWeighted(u1, &.{
+ .value(FreqBufIndex, .gradient, 3),
+ .value(FreqBufIndex, .random, 1),
+ })
+ ];
+ const data_buf_len: u32 = @intCast(data_buf.len);
+
+ const max_data = @min(data_buf_len, max_write);
+ const len = smith.valueWeighted(u32, &.{
+ .rangeAtMost(u32, 0, max_data, 1),
+ .rangeAtMost(u32, 0, @min(Huffman.max_tokens, max_data), 4),
+ .value(u32, @min(to_align, max_data), max_data), // @min 2nd arg is an edge-case
+ });
+ const off = smith.valueRangeAtMost(u32, 0, data_buf_len - len);
+
+ expected_size += len;
+ vecs[vecs_n] = data_buf[off..][0..len];
+ vecs_n += 1;
+ op.drain |= vecs_n == vecs.len;
}
- var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
- add_vec: {
- if (vec_info.rebase) break :add_vec;
- if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) > 4 * (1 << 16)) {
- // Skip this vector to avoid this test taking too long.
- splat = 1;
- break :add_vec;
- }
-
- const data_buf = &fbufs[vec_info.random_data];
- vecs[vecs_n] = data_buf[@min(
- (@as(u16, vec_info.data_off_hi) << 8) | vec_info.data_off_lo,
- data_buf.len - vec_info.data_len,
- )..][0..vec_info.data_len];
+ op.drain |= is_eos;
+ op.drain &= vecs_n != 0;
+ if (op.drain) {
+ const pattern_len: u32 = @intCast(vecs[vecs_n - 1].len);
+ const pattern_len_z = @max(pattern_len, 1);
+
+ const max_write = max_size - (expected_size - pattern_len);
+ const buffered: u32 = @intCast(h.writer.buffered().len + countVec(vecs[0 .. vecs_n - 1]));
+ const to_align = Huffman.max_tokens - buffered % Huffman.max_tokens;
+ assert(to_align != 0); // otherwise, not helpful.
+
+ const max_splat = max_write / pattern_len_z;
+ const weights: [3]std.testing.Smith.Weight = .{
+ .rangeAtMost(u32, 0, max_splat, 1),
+ .rangeAtMost(u32, 0, @min(
+ Huffman.max_tokens + pattern_len_z,
+ max_write,
+ ) / pattern_len_z, 4),
+ .value(u32, to_align / pattern_len_z, max_splat * 4),
+ };
+ const align_weight = to_align % pattern_len_z == 0 and to_align <= max_write;
+ const n_weights = @as(u8, 2) + @intFromBool(align_weight);
+ const splat = smith.valueWeighted(u32, weights[0..n_weights]);
+
+ expected_size = expected_size - pattern_len + pattern_len * splat; // splat may be zero
+ for (vecs[0 .. vecs_n - 1]) |v| expected_hash.update(v);
+ for (0..splat) |_| expected_hash.update(vecs[vecs_n - 1]);
+
+ const max_space = fuzzedHuffmanDrainSpaceLimit(
+ buffered + pattern_len * splat,
+ flate_w.buffered().len,
+ false,
+ );
+ h.writer.writeSplatAll(vecs[0..vecs_n], splat) catch
+ return if (max_space <= flate_w.buffer.len) error.OverheadTooLarge else {};
+ if (flate_w.buffered().len > max_space) return error.OverheadTooLarge;
- for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
- expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
- vecs_n += 1;
+ vecs_n = 0;
}
- const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
- in.seek == in.end;
- if (want_drain and vecs_n != 0) {
- var n = h.writer.buffered().len + Writer.countSplat(vecs[0..vecs_n], splat);
- const oos = h.writer.writeSplatAll(vecs[0..vecs_n], splat) == error.WriteFailed;
- n -= h.writer.buffered().len;
- const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
- const lim = flate_w.end + 6 * block_lim + n; // 6 since block header may span two bytes
- if (flate_w.end > lim) return error.OverheadTooLarge;
- if (oos) return;
+ if (op.rebase) {
+ const capacity = smith.valueRangeAtMost(u32, 0, h_buf_len);
+ const preserve = smith.valueRangeAtMost(u32, 0, h_buf_len - capacity);
- vecs_n = 0;
- } else assert(splat == 1);
-
- if (vec_info.rebase) {
- const old_end = flate_w.end;
- var n = h.writer.buffered().len;
- const oos = h.writer.rebase(vec_info.data_len, @min(
- h.writer.buffer.len -| vec_info.data_len,
- vec_info.splat,
- )) == error.WriteFailed;
- n -= h.writer.buffered().len;
- const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
- const lim = old_end + 6 * block_lim + n; // 6 since block header may span two bytes
- if (flate_w.end > lim) return error.OverheadTooLarge;
- if (oos) return;
+ const max_space = fuzzedHuffmanDrainSpaceLimit(
+ h.writer.buffered().len,
+ flate_w.buffered().len,
+ false,
+ );
+ h.writer.rebase(preserve, capacity) catch
+ return if (max_space <= flate_w.buffer.len) error.OverheadTooLarge else {};
+ if (flate_w.buffered().len > max_space) return error.OverheadTooLarge;
}
- }
- {
- const old_end = flate_w.end;
- const n = h.writer.buffered().len;
- const oos = h.writer.flush() == error.WriteFailed;
- assert(h.writer.buffered().len == 0);
- const block_lim = @max(1, math.divCeil(usize, n, Huffman.max_tokens) catch unreachable);
- const lim = old_end + 6 * block_lim + n + opts.container.val().footerSize();
- if (flate_w.end > lim) return error.OverheadTooLarge;
- if (oos) return;
+ if (is_eos) break;
}
+ const max_space = fuzzedHuffmanDrainSpaceLimit(
+ h.writer.buffered().len,
+ flate_w.buffered().len,
+ true,
+ );
+ h.writer.flush() catch
+ return if (max_space <= flate_w.buffer.len) error.OverheadTooLarge else {};
+ if (flate_w.buffered().len > max_space) return error.OverheadTooLarge;
+
try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash);
}
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
@@ -417,6 +417,7 @@ pub const CpuContextPtr = if (cpu_context.Native == noreturn) noreturn else *con
/// ReleaseFast and ReleaseSmall mode. Outside of a test block, this assert
/// function is the correct function to use.
pub fn assert(ok: bool) void {
+ @disableInstrumentation();
if (!ok) unreachable; // assertion failure
}
diff --git a/lib/std/deque.zig b/lib/std/deque.zig
@@ -518,55 +518,139 @@ test "fuzz against ArrayList oracle" {
try std.testing.fuzz({}, fuzzAgainstArrayList, .{});
}
-test "dumb fuzz against ArrayList oracle" {
- const testing = std.testing;
- const gpa = testing.allocator;
+const FuzzAllocator = struct {
+ smith: *std.testing.Smith,
+ bufs: [2][256 * 4]u8 align(4),
+ used_bitmap: u2,
+ used_len: [2]usize,
+
+ pub fn init(smith: *std.testing.Smith) FuzzAllocator {
+ return .{
+ .smith = smith,
+ .bufs = undefined,
+ .used_len = undefined,
+ .used_bitmap = 0,
+ };
+ }
+
+ pub fn allocator(f: *FuzzAllocator) std.mem.Allocator {
+ return .{
+ .ptr = f,
+ .vtable = &.{
+ .alloc = alloc,
+ .resize = resize,
+ .remap = remap,
+ .free = free,
+ },
+ };
+ }
- const input = try gpa.alloc(u8, 1024);
- defer gpa.free(input);
+ pub fn allocCount(f: *FuzzAllocator) u2 {
+ return @popCount(f.used_bitmap);
+ }
- var prng = std.Random.DefaultPrng.init(testing.random_seed);
- prng.random().bytes(input);
+ fn alloc(ctx: *anyopaque, len: usize, a: std.mem.Alignment, _: usize) ?[*]u8 {
+ const f: *FuzzAllocator = @ptrCast(@alignCast(ctx));
+ assert(a == .@"4");
+ assert(len % 4 == 0);
+
+ const slot: u1 = @intCast(@ctz(~f.used_bitmap));
+ const buf: []u8 = &f.bufs[slot];
+ if (len > buf.len) return null;
+ f.used_bitmap |= @as(u2, 1) << slot;
+ f.used_len[slot] = len;
+ return buf.ptr;
+ }
- try fuzzAgainstArrayList({}, input);
-}
+ fn memSlot(f: *FuzzAllocator, mem: []u8) u1 {
+ const slot: u1 = if (&mem[0] == &f.bufs[0][0])
+ 0
+ else if (&mem[0] == &f.bufs[1][0])
+ 1
+ else
+ unreachable;
+ assert((f.used_bitmap >> slot) & 1 == 1);
+ assert(mem.len == f.used_len[slot]);
+ return slot;
+ }
+
+ fn resize(ctx: *anyopaque, mem: []u8, a: std.mem.Alignment, new_len: usize, _: usize) bool {
+ const f: *FuzzAllocator = @ptrCast(@alignCast(ctx));
+ assert(a == .@"4");
+ assert(f.allocCount() == 1);
+
+ const slot = f.memSlot(mem);
+ if (new_len > f.bufs[slot].len or f.smith.value(bool)) return false;
+ f.used_len[slot] = new_len;
+ return true;
+ }
+
+ fn remap(ctx: *anyopaque, mem: []u8, a: std.mem.Alignment, new_len: usize, _: usize) ?[*]u8 {
+ const f: *FuzzAllocator = @ptrCast(@alignCast(ctx));
+ assert(a == .@"4");
+ assert(f.allocCount() == 1);
-fn fuzzAgainstArrayList(_: void, input: []const u8) anyerror!void {
+ const slot = f.memSlot(mem);
+ if (new_len > f.bufs[slot].len or f.smith.value(bool)) return null;
+
+ if (f.smith.value(bool)) {
+ f.used_len[slot] = new_len;
+ // remap in place
+ return mem.ptr;
+ } else {
+ // moving remap
+ const new_slot = ~slot;
+ f.used_bitmap = ~f.used_bitmap;
+ f.used_len[new_slot] = new_len;
+
+ const new_buf = &f.bufs[new_slot];
+ @memcpy(new_buf[0..mem.len], mem);
+ return new_buf.ptr;
+ }
+ }
+
+ fn free(ctx: *anyopaque, mem: []u8, a: std.mem.Alignment, _: usize) void {
+ const f: *FuzzAllocator = @ptrCast(@alignCast(ctx));
+ assert(a == .@"4");
+ f.used_bitmap ^= @as(u2, 1) << f.memSlot(mem);
+ }
+};
+
+fn fuzzAgainstArrayList(_: void, smith: *std.testing.Smith) anyerror!void {
const testing = std.testing;
- const gpa = testing.allocator;
+
+ var q_gpa_inst: FuzzAllocator = .init(smith);
+ var l_gpa_buf: [q_gpa_inst.bufs[0].len]u8 align(4) = undefined;
+ var l_gpa_inst: std.heap.FixedBufferAllocator = .init(&l_gpa_buf);
+ const q_gpa = q_gpa_inst.allocator();
+ const l_gpa = l_gpa_inst.allocator();
var q: Deque(u32) = .empty;
- defer q.deinit(gpa);
var l: std.ArrayList(u32) = .empty;
- defer l.deinit(gpa);
-
- if (input.len < 2) return;
-
- var prng = std.Random.DefaultPrng.init(input[0]);
- const random = prng.random();
- const Action = enum {
+ const Action = enum(u8) {
+ grow,
push_back,
push_front,
push_back_slice,
push_front_slice,
pop_back,
pop_front,
- grow,
- /// Sentinel to avoid hardcoding the cast below
- max,
};
- for (input[1..]) |byte| {
- switch (@as(Action, @enumFromInt(byte % (@intFromEnum(Action.max))))) {
+
+ while (!smith.eosWeightedSimple(15, 1)) {
+ const baseline = testing.Smith.baselineWeights(Action);
+ const grow_weight: testing.Smith.Weight = .value(Action, .grow, 3);
+ switch (smith.valueWeighted(Action, baseline ++ .{grow_weight})) {
.push_back => {
- const item = random.int(u8);
+ const item = smith.value(u32);
try testing.expectEqual(
l.appendBounded(item),
q.pushBackBounded(item),
);
},
.push_front => {
- const item = random.int(u8);
+ const item = smith.value(u32);
try testing.expectEqual(
l.insertBounded(0, item),
q.pushFrontBounded(item),
@@ -574,9 +658,9 @@ fn fuzzAgainstArrayList(_: void, input: []const u8) anyerror!void {
},
.push_back_slice => {
var buffer: [std.math.maxInt(u3)]u32 = undefined;
- const items = buffer[0..random.int(u3)];
+ const items = buffer[0..smith.value(u3)];
for (items) |*item| {
- item.* = random.int(u8);
+ item.* = smith.value(u32);
}
try testing.expectEqual(
l.appendSliceBounded(items),
@@ -585,9 +669,9 @@ fn fuzzAgainstArrayList(_: void, input: []const u8) anyerror!void {
},
.push_front_slice => {
var buffer: [std.math.maxInt(u3)]u32 = undefined;
- const items = buffer[0..random.int(u3)];
+ const items = buffer[0..smith.value(u3)];
for (items) |*item| {
- item.* = random.int(u8);
+ item.* = smith.value(u32);
}
try testing.expectEqual(
l.insertSliceBounded(0, items),
@@ -607,11 +691,10 @@ fn fuzzAgainstArrayList(_: void, input: []const u8) anyerror!void {
// ensureTotalCapacityPrecise(), which is the most complex part
// of the Deque implementation.
.grow => {
- const growth = random.int(u3);
- try l.ensureTotalCapacityPrecise(gpa, l.items.len + growth);
- try q.ensureTotalCapacityPrecise(gpa, q.len + growth);
+ const growth = smith.value(u3);
+ try l.ensureTotalCapacityPrecise(l_gpa, l.items.len + growth);
+ try q.ensureTotalCapacityPrecise(q_gpa, q.len + growth);
},
- .max => unreachable,
}
try testing.expectEqual(l.getLastOrNull(), q.back());
try testing.expectEqual(
@@ -627,5 +710,8 @@ fn fuzzAgainstArrayList(_: void, input: []const u8) anyerror!void {
}
try testing.expectEqual(null, it.next());
}
+ try testing.expectEqual(@intFromBool(q.buffer.len != 0), q_gpa_inst.allocCount());
}
+ q.deinit(q_gpa);
+ try testing.expectEqual(0, q_gpa_inst.allocCount());
}
diff --git a/lib/std/json/scanner_test.zig b/lib/std/json/scanner_test.zig
@@ -490,20 +490,3 @@ test isNumberFormattedLikeAnInteger {
try std.testing.expect(!isNumberFormattedLikeAnInteger("1e10"));
try std.testing.expect(!isNumberFormattedLikeAnInteger("1E10"));
}
-
-test "fuzz" {
- try std.testing.fuzz({}, fuzzTestOne, .{});
-}
-
-fn fuzzTestOne(_: void, input: []const u8) !void {
- var buf: [16384]u8 = undefined;
- var fba: std.heap.FixedBufferAllocator = .init(&buf);
-
- var scanner = Scanner.initCompleteInput(fba.allocator(), input);
- // Property: There are at most input.len tokens
- var tokens: usize = 0;
- while ((scanner.next() catch return) != .end_of_document) {
- tokens += 1;
- if (tokens > input.len) return error.Overflow;
- }
-}
diff --git a/lib/std/testing.zig b/lib/std/testing.zig
@@ -1203,6 +1203,8 @@ pub fn refAllDecls(comptime T: type) void {
}
}
+pub const Smith = @import("testing/Smith.zig");
+
pub const FuzzInputOptions = struct {
corpus: []const []const u8 = &.{},
};
@@ -1210,7 +1212,7 @@ pub const FuzzInputOptions = struct {
/// Inline to avoid coverage instrumentation.
pub inline fn fuzz(
context: anytype,
- comptime testOne: fn (context: @TypeOf(context), input: []const u8) anyerror!void,
+ comptime testOne: fn (context: @TypeOf(context), smith: *Smith) anyerror!void,
options: FuzzInputOptions,
) anyerror!void {
return @import("root").fuzz(context, testOne, options);
@@ -1317,3 +1319,7 @@ pub const ReaderIndirect = struct {
};
}
};
+
+test {
+ _ = &Smith;
+}
diff --git a/lib/std/testing/Smith.zig b/lib/std/testing/Smith.zig
@@ -0,0 +1,895 @@
+//! Used in conjuncation with `std.testing.fuzz` to generate values
+
+const builtin = @import("builtin");
+const std = @import("../std.zig");
+const assert = std.debug.assert;
+const fuzz_abi = std.Build.abi.fuzz;
+const Smith = @This();
+
+/// Null if the fuzzer is being used, in which case this struct will not be mutated.
+///
+/// Intended to be initialized directly.
+in: ?[]const u8,
+
+pub const Weight = fuzz_abi.Weight;
+
+fn intUid(hash: u32) fuzz_abi.Uid {
+ @disableInstrumentation();
+ return @bitCast(hash << 1);
+}
+
+fn bytesUid(hash: u32) fuzz_abi.Uid {
+ @disableInstrumentation();
+ return @bitCast(hash | 1);
+}
+
+fn Backing(T: type) type {
+ return @Int(.unsigned, @bitSizeOf(T));
+}
+
+fn toExcessK(T: type, x: T) Backing(T) {
+ return @bitCast(x -% std.math.minInt(T));
+}
+
+fn fromExcessK(T: type, x: Backing(T)) T {
+ return @as(T, @bitCast(x)) +% std.math.minInt(T);
+}
+
+fn enumFieldLessThan(_: void, a: std.builtin.Type.EnumField, b: std.builtin.Type.EnumField) bool {
+ return a.value < b.value;
+}
+
+/// Returns an array of weights containing each possible value of `T`.
+//
+// `inline` to propogate the `comptime`ness of the result
+pub inline fn baselineWeights(T: type) []const Weight {
+ return comptime switch (@typeInfo(T)) {
+ .bool, .int, .float => i: {
+ // Reject types that don't have a fixed bitsize (esp. usize)
+ // since they are not gauraunteed to fit in a u64 across targets.
+ if (std.mem.indexOfScalar(type, &.{
+ isize, usize,
+ c_char, c_longdouble,
+ c_short, c_ushort,
+ c_int, c_uint,
+ c_long, c_ulong,
+ c_longlong, c_ulonglong,
+ }, T) != null) {
+ @compileError("type does not have a fixed bitsize: " ++ @typeName(T));
+ }
+ break :i &.{.rangeAtMost(Backing(T), 0, (1 << @bitSizeOf(T)) - 1, 1)};
+ },
+ .@"struct" => |s| if (s.backing_integer) |B|
+ baselineWeights(B)
+ else
+ @compileError("non-packed structs cannot be weighted"),
+ .@"union" => |u| if (u.layout == .@"packed")
+ baselineWeights(Backing(T))
+ else
+ @compileError("non-packed unions cannot be weighted"),
+ .@"enum" => |e| if (!e.is_exhaustive)
+ baselineWeights(e.tag_type)
+ else if (e.fields.len == 0)
+ // Cannot be included in below branch due to `log2_int_ceil`
+ @compileError("exhaustive zero-field enums cannot be weighted")
+ else e: {
+ @setEvalBranchQuota(@intCast(4 * e.fields.len *
+ std.math.log2_int_ceil(usize, e.fields.len)));
+
+ var sorted_fields = e.fields[0..e.fields.len].*;
+ std.mem.sortUnstable(std.builtin.Type.EnumField, &sorted_fields, {}, enumFieldLessThan);
+
+ var weights: []const Weight = &.{};
+ var seq_first: u64 = sorted_fields[0].value;
+ for (sorted_fields[0 .. sorted_fields.len - 1], sorted_fields[1..]) |prev, field| {
+ if (field.value != prev.value + 1) {
+ weights = weights ++ .{Weight.rangeAtMost(u64, seq_first, prev.value, 1)};
+ seq_first = field.value;
+ }
+ }
+ weights = weights ++ .{Weight.rangeAtMost(
+ u64,
+ seq_first,
+ sorted_fields[sorted_fields.len - 1].value,
+ 1,
+ )};
+
+ break :e weights;
+ },
+ else => @compileError("unexpected type: " ++ @typeName(T)),
+ };
+}
+
+test baselineWeights {
+ try std.testing.expectEqualSlices(
+ Weight,
+ &.{.rangeAtMost(bool, false, true, 1)},
+ baselineWeights(bool),
+ );
+ try std.testing.expectEqualSlices(
+ Weight,
+ &.{.rangeAtMost(u4, 0, 15, 1)},
+ baselineWeights(u4),
+ );
+ try std.testing.expectEqualSlices(
+ Weight,
+ &.{.rangeAtMost(u4, 0, 15, 1)},
+ baselineWeights(i4),
+ );
+ try std.testing.expectEqualSlices(
+ Weight,
+ &.{.rangeAtMost(u16, 0, 0xffff, 1)},
+ baselineWeights(f16),
+ );
+ try std.testing.expectEqualSlices(
+ Weight,
+ &.{.rangeAtMost(u4, 0, 15, 1)},
+ baselineWeights(packed struct(u4) { _: u4 }),
+ );
+ try std.testing.expectEqualSlices(
+ Weight,
+ &.{.rangeAtMost(u4, 0, 15, 1)},
+ baselineWeights(packed union { _: u4 }),
+ );
+ try std.testing.expectEqualSlices(
+ Weight,
+ &.{.rangeAtMost(u4, 0, 15, 1)},
+ baselineWeights(enum(u4) { _ }),
+ );
+ try std.testing.expectEqualSlices(Weight, &.{
+ .rangeAtMost(u4, 0, 1, 1),
+ .value(u4, 3, 1),
+ .value(u4, 5, 1),
+ .rangeAtMost(u4, 8, 10, 1),
+ }, baselineWeights(enum(u4) {
+ a = 1,
+ b = 5,
+ c = 8,
+ d = 3,
+ e = 0,
+ f = 9,
+ g = 10,
+ }));
+}
+
+fn valueFromInt(T: anytype, int: Backing(T)) T {
+ @disableInstrumentation();
+ return switch (@typeInfo(T)) {
+ .@"enum" => @enumFromInt(int),
+ else => @bitCast(int),
+ };
+}
+
+fn checkWeights(weights: []const Weight, max_incl: u64) void {
+ @disableInstrumentation();
+ const w0 = weights[0]; // Sum of weights is zero
+ assert(w0.weight != 0);
+ assert(w0.max <= max_incl);
+
+ var incl_sum: u64 = (w0.max - w0.min) * w0.weight + (w0.weight - 1); // Sum of weights greater than 2^64
+ for (weights[1..]) |w| {
+ assert(w.weight != 0);
+ assert(w.max <= max_incl);
+ // This addition will not overflow except with an illegal combination of weights since
+ // the exclusive sum must be at least one so a span of all values is impossible.
+ incl_sum += (w.max - w.min + 1) * w.weight; // Sum of weights greater than 2^64
+ }
+}
+
+// `inline` to propogate callee's unique return address
+inline fn firstHash() u32 {
+ return @truncate(std.hash.int(@returnAddress()));
+}
+
+// `noinline` to capture a unique return address
+pub noinline fn value(s: *Smith, T: type) T {
+ @disableInstrumentation();
+ return s.valueWithHash(T, firstHash());
+}
+
+// `noinline` to capture a unique return address
+pub noinline fn valueWeighted(s: *Smith, T: type, weights: []const Weight) T {
+ @disableInstrumentation();
+ return s.valueWeightedWithHash(T, weights, firstHash());
+}
+
+// `noinline` to capture a unique return address
+pub noinline fn valueRangeAtMost(s: *Smith, T: type, at_least: T, at_most: T) T {
+ @disableInstrumentation();
+ return s.valueRangeAtMostWithHash(T, at_least, at_most, firstHash());
+}
+
+// `noinline` to capture a unique return address
+pub noinline fn valueRangeLessThan(s: *Smith, T: type, at_least: T, less_than: T) T {
+ @disableInstrumentation();
+ return s.valueRangeLessThanWithHash(T, at_least, less_than, firstHash());
+}
+
+/// This is similar to `value(bool)` however it is gauraunteed to eventually
+/// return `true` and provides the fuzzer with an extra hint about the data.
+//
+// `noinline` to capture a unique return address
+pub noinline fn eos(s: *Smith) bool {
+ @disableInstrumentation();
+ return s.eosWithHash(firstHash());
+}
+
+/// This is similar to `value(bool)` however it is gauraunteed to eventually
+/// return `true` and provides the fuzzer with an extra hint about the data.
+///
+/// It is asserted that the weight of `true` is non-zero.
+//
+// `noinline` to capture a unique return address
+pub noinline fn eosWeighted(s: *Smith, weights: []const Weight) bool {
+ @disableInstrumentation();
+ return s.eosWeightedWithHash(weights, firstHash());
+}
+
+/// This is similar to `value(bool)` however it is gauraunteed to eventually
+/// return `true` and provides the fuzzer with an extra hint about the data.
+///
+/// It is asserted that the weight of `true` is non-zero.
+//
+// `noinline` to capture a unique return address
+pub noinline fn eosWeightedSimple(s: *Smith, false_weight: u64, true_weight: u64) bool {
+ @disableInstrumentation();
+ return s.eosWeightedSimpleWithHash(false_weight, true_weight, firstHash());
+}
+
+// `noinline` to capture a unique return address
+pub noinline fn bytes(s: *Smith, out: []u8) void {
+ @disableInstrumentation();
+ return s.bytesWithHash(out, firstHash());
+}
+
+// `noinline` to capture a unique return address
+pub noinline fn bytesWeighted(s: *Smith, out: []u8, weights: []const Weight) void {
+ @disableInstrumentation();
+ return s.bytesWeightedWithHash(out, weights, firstHash());
+}
+
+/// Returns the length of the filled slice
+///
+/// It is asserted that `buf.len` fits within a u32
+// `noinline` to capture a unique return address
+pub noinline fn slice(s: *Smith, buf: []u8) u32 {
+ @disableInstrumentation();
+ return s.sliceWithHash(buf, firstHash());
+}
+
+/// Returns the length of the filled slice
+///
+/// It is asserted that `buf.len` fits within a u32
+//
+// `noinline` to capture a unique return address
+pub noinline fn sliceWeightedBytes(s: *Smith, buf: []u8, byte_weights: []const Weight) u32 {
+ @disableInstrumentation();
+ return s.sliceWeightedBytesWithHash(buf, byte_weights, firstHash());
+}
+
+/// Returns the length of the filled slice
+///
+/// It is asserted that `buf.len` fits within a u32
+//
+// `noinline` to capture a unique return address
+pub noinline fn sliceWeighted(
+ s: *Smith,
+ buf: []u8,
+ len_weights: []const Weight,
+ byte_weights: []const Weight,
+) u32 {
+ @disableInstrumentation();
+ return s.sliceWeightedWithHash(buf, len_weights, byte_weights, firstHash());
+}
+
+fn weightsContain(int: u64, weights: []const Weight) bool {
+ @disableInstrumentation();
+ var contains: bool = false;
+ for (weights) |w| {
+ contains |= w.min <= int and int <= w.max;
+ }
+ return contains;
+}
+
+/// Asserts `T` can be a member of a packed type
+//
+// `inline` to propogate the `comptime`ness of the result
+inline fn allBitPatternsValid(T: type) bool {
+ return comptime switch (@typeInfo(T)) {
+ .void, .bool, .int, .float => true,
+ inline .@"struct", .@"union" => |c| c.layout == .@"packed" and for (c.fields) |f| {
+ if (!allBitPatternsValid(f.type)) break false;
+ } else true,
+ .@"enum" => |e| !e.is_exhaustive,
+ else => unreachable,
+ };
+}
+
+test allBitPatternsValid {
+ try std.testing.expect(allBitPatternsValid(packed struct {
+ a: void,
+ b: u8,
+ c: f16,
+ d: packed union {
+ a: u16,
+ b: i16,
+ c: f16,
+ },
+ e: enum(u4) { _ },
+ }));
+ try std.testing.expect(!allBitPatternsValid(packed union {
+ a: i4,
+ b: enum(u4) { a },
+ }));
+}
+
+fn UnionTagWithoutUninitializable(T: type) type {
+ const u = @typeInfo(T).@"union";
+ const Tag = u.tag_type orelse @compileError("union must have tag");
+ const e = @typeInfo(Tag).@"enum";
+ var field_names: [e.fields.len][]const u8 = undefined;
+ var field_values: [e.fields.len]e.tag_type = undefined;
+ var n_fields = 0;
+ for (u.fields) |f| {
+ switch (f.type) {
+ noreturn => continue,
+ else => {},
+ }
+ field_names[n_fields] = f.name;
+ field_values[n_fields] = @intFromEnum(@field(Tag, f.name));
+ n_fields += 1;
+ }
+ return @Enum(e.tag_type, .exhaustive, field_names[0..n_fields], field_values[0..n_fields]);
+}
+
+pub fn valueWithHash(s: *Smith, T: type, hash: u32) T {
+ @disableInstrumentation();
+ return switch (@typeInfo(T)) {
+ .void => {},
+ .bool, .int, .float => full: {
+ var int: Backing(T) = 0;
+ comptime var biti = 0;
+ var rhash = hash; // 'running' hash
+ inline while (biti < @bitSizeOf(T)) {
+ const n = @min(@bitSizeOf(T) - biti, 64);
+ const P = @Int(.unsigned, n);
+ int |= @as(
+ @TypeOf(int),
+ s.valueWeightedWithHash(P, baselineWeights(P), rhash),
+ ) << biti;
+ biti += n;
+ rhash = std.hash.int(rhash);
+ }
+ break :full @bitCast(int);
+ },
+ .@"enum" => |e| if (e.is_exhaustive) v: {
+ if (@bitSizeOf(e.tag_type) <= 64) {
+ break :v s.valueWeightedWithHash(T, baselineWeights(T), hash);
+ }
+ break :v std.enums.fromInt(T, s.valueWithHash(e.tag_type, hash)) orelse
+ @enumFromInt(e.fields[0].value);
+ } else @enumFromInt(s.valueWithHash(e.tag_type, hash)),
+ .optional => |o| if (s.valueWithHash(bool, hash))
+ null
+ else
+ s.valueWithHash(o.child, std.hash.int(hash)),
+ inline .array, .vector => |a| arr: {
+ var arr: [a.len]a.child = undefined; // `T` cannot be used due to the vector case
+ if (a.child != u8) {
+ for (&arr) |*v| {
+ v.* = s.valueWithHash(a.child, hash);
+ }
+ } else {
+ s.bytesWithHash(&arr, hash);
+ }
+ break :arr arr;
+ },
+ .@"struct" => |st| if (!allBitPatternsValid(T)) v: {
+ var v: T = undefined;
+ var rhash = hash;
+ inline for (st.fields) |f| {
+ // rhash is incremented in the call so our rhash state is not reused (e.g. with
+ // two nested structs. note that xor cannot work for this case as the bit would
+ // be flipped back here)
+ @field(v, f.name) = s.valueWithHash(f.type, rhash +% 1);
+ rhash = std.hash.int(rhash);
+ }
+ break :v v;
+ } else @bitCast(s.valueWithHash(st.backing_integer.?, hash)),
+ .@"union" => if (!allBitPatternsValid(T))
+ switch (s.valueWithHash(
+ UnionTagWithoutUninitializable(T),
+ // hash is incremented in the call so our hash state is not reused for below
+ std.hash.int(hash +% 1),
+ )) {
+ inline else => |t| @unionInit(
+ T,
+ @tagName(t),
+ s.valueWithHash(@FieldType(T, @tagName(t)), hash),
+ ),
+ }
+ else
+ @bitCast(s.valueWithHash(Backing(T), hash)),
+ else => @compileError("unexpected type '" ++ @typeName(T) ++ "'"),
+ };
+}
+
+pub fn valueWeightedWithHash(s: *Smith, T: type, weights: []const Weight, hash: u32) T {
+ @disableInstrumentation();
+ checkWeights(weights, (1 << @bitSizeOf(T)) - 1);
+ return valueFromInt(T, @intCast(s.valueWeightedWithHashInner(weights, hash)));
+}
+
+fn valueWeightedWithHashInner(s: *Smith, weights: []const Weight, hash: u32) u64 {
+ @disableInstrumentation();
+ return if (s.in) |*in| int: {
+ if (in.len < 8) {
+ @branchHint(.unlikely);
+ in.* = &.{};
+ break :int weights[0].min;
+ }
+ const int = std.mem.readInt(u64, in.*[0..8], .little);
+ in.* = in.*[8..];
+ break :int if (weightsContain(int, weights)) int else weights[0].min;
+ } else if (builtin.fuzz) int: {
+ @branchHint(.likely);
+ break :int fuzz_abi.fuzzer_int(intUid(hash), .fromSlice(weights));
+ } else unreachable;
+}
+
+pub fn valueRangeAtMostWithHash(s: *Smith, T: type, at_least: T, at_most: T, hash: u32) T {
+ @disableInstrumentation();
+ if (@typeInfo(T) == .int and @typeInfo(T).int.signedness == .signed) {
+ return fromExcessK(T, s.valueRangeAtMostWithHash(
+ Backing(T),
+ toExcessK(T, at_least),
+ toExcessK(T, at_most),
+ hash,
+ ));
+ }
+ return s.valueWeightedWithHash(T, &.{.rangeAtMost(T, at_least, at_most, 1)}, hash);
+}
+
+pub fn valueRangeLessThanWithHash(s: *Smith, T: type, at_least: T, less_than: T, hash: u32) T {
+ @disableInstrumentation();
+ if (@typeInfo(T) == .int and @typeInfo(T).int.signedness == .signed) {
+ return fromExcessK(T, s.valueRangeLessThanWithHash(
+ Backing(T),
+ toExcessK(T, at_least),
+ toExcessK(T, less_than),
+ hash,
+ ));
+ }
+ return s.valueWeightedWithHash(T, &.{.rangeLessThan(T, at_least, less_than, 1)}, hash);
+}
+
+/// This is similar to `value(bool)` however it is gauraunteed to eventually
+/// return `true` and provides the fuzzer with an extra hint about the data.
+pub fn eosWithHash(s: *Smith, hash: u32) bool {
+ @disableInstrumentation();
+ return s.eosWeightedWithHash(baselineWeights(bool), hash);
+}
+
+/// This is similar to `value(bool)` however it is gauraunteed to eventually
+/// return `true` and provides the fuzzer with an extra hint about the data.
+///
+/// It is asserted that the weight of `true` is non-zero.
+pub fn eosWeightedWithHash(s: *Smith, weights: []const Weight, hash: u32) bool {
+ @disableInstrumentation();
+ checkWeights(weights, 1);
+ for (weights) |w| (if (w.max == 1) break) else unreachable; // `true` must have non-zero weight
+
+ if (s.in) |*in| {
+ if (in.len == 0) {
+ @branchHint(.unlikely);
+ return true;
+ }
+ const eos_val = in.*[0] != 0;
+ in.* = in.*[1..];
+ return eos_val or b: {
+ var only_true: bool = true;
+ for (weights) |w| {
+ only_true &= @as(u1, @intCast(w.min)) == 1;
+ }
+ break :b only_true;
+ };
+ } else if (builtin.fuzz) {
+ @branchHint(.likely);
+ return fuzz_abi.fuzzer_eos(intUid(hash), .fromSlice(weights));
+ } else unreachable;
+}
+
+/// This is similar to `value(bool)` however it is gauraunteed to eventually
+/// return `true` and provides the fuzzer with an extra hint about the data.
+///
+/// It is asserted that the weight of `false` is non-zero.
+/// It is asserted that the weight of `true` is non-zero.
+//
+// `noinline` to capture a unique return address
+pub fn eosWeightedSimpleWithHash(s: *Smith, false_weight: u64, true_weight: u64, hash: u32) bool {
+ @disableInstrumentation();
+ return s.eosWeightedWithHash(&.{
+ .value(bool, false, false_weight),
+ .value(bool, true, true_weight),
+ }, hash);
+}
+
+pub fn bytesWithHash(s: *Smith, out: []u8, hash: u32) void {
+ @disableInstrumentation();
+ return s.bytesWeightedWithHash(out, baselineWeights(u8), hash);
+}
+
+pub fn bytesWeightedWithHash(s: *Smith, out: []u8, weights: []const Weight, hash: u32) void {
+ @disableInstrumentation();
+ checkWeights(weights, 255);
+
+ if (s.in) |*in| {
+ var present_weights: [256]bool = @splat(false);
+ for (weights) |w| {
+ @memset(present_weights[@intCast(w.min)..@intCast(w.max + 1)], true);
+ }
+ const default: u8 = @intCast(weights[0].min);
+
+ const copy_len = @min(out.len, in.len);
+ for (in.*[0..copy_len], out[0..copy_len]) |i, *o| {
+ o.* = if (present_weights[i]) i else default;
+ }
+ in.* = in.*[copy_len..];
+ @memset(out[copy_len..], default);
+ } else if (builtin.fuzz) {
+ @branchHint(.likely);
+ fuzz_abi.fuzzer_bytes(bytesUid(hash), .fromSlice(out), .fromSlice(weights));
+ } else unreachable;
+}
+
+/// Returns the length of the filled slice
+///
+/// It is asserted that `buf.len` fits within a u32
+pub fn sliceWithHash(s: *Smith, buf: []u8, hash: u32) u32 {
+ @disableInstrumentation();
+ return s.sliceWeightedBytesWithHash(buf, baselineWeights(u8), hash);
+}
+
+/// Returns the length of the filled slice
+///
+/// It is asserted that `buf.len` fits within a u32
+pub fn sliceWeightedBytesWithHash(
+ s: *Smith,
+ buf: []u8,
+ byte_weights: []const Weight,
+ hash: u32,
+) u32 {
+ @disableInstrumentation();
+ return s.sliceWeightedWithHash(
+ buf,
+ &.{.rangeAtMost(u32, 0, @intCast(buf.len), 1)},
+ byte_weights,
+ hash,
+ );
+}
+
+/// Returns the length of the filled slice
+///
+/// It is asserted that `buf.len` fits within a u32
+pub fn sliceWeightedWithHash(
+ s: *Smith,
+ buf: []u8,
+ len_weights: []const Weight,
+ byte_weights: []const Weight,
+ hash: u32,
+) u32 {
+ @disableInstrumentation();
+ checkWeights(byte_weights, 255);
+ checkWeights(len_weights, @as(u32, @intCast(buf.len)));
+
+ if (s.in) |*in| {
+ const in_len = len: {
+ if (in.len < 4) {
+ @branchHint(.unlikely);
+ in.* = &.{};
+ break :len 0;
+ }
+ const len = std.mem.readInt(u32, in.*[0..4], .little);
+ in.* = in.*[4..];
+ break :len @min(len, in.len);
+ };
+ const out_len: u32 = if (weightsContain(in_len, len_weights))
+ in_len
+ else
+ @intCast(len_weights[0].min);
+
+ var present_weights: [256]bool = @splat(false);
+ for (byte_weights) |w| {
+ @memset(present_weights[@intCast(w.min)..@intCast(w.max + 1)], true);
+ }
+ const default: u8 = @intCast(byte_weights[0].min);
+
+ const copy_len = @min(out_len, in_len);
+ for (in.*[0..copy_len], buf[0..copy_len]) |i, *o| {
+ o.* = if (present_weights[i]) i else default;
+ }
+ in.* = in.*[in_len..];
+ @memset(buf[copy_len..], default);
+ return out_len;
+ } else if (builtin.fuzz) {
+ @branchHint(.likely);
+ return fuzz_abi.fuzzer_slice(
+ bytesUid(hash),
+ .fromSlice(buf),
+ .fromSlice(len_weights),
+ .fromSlice(byte_weights),
+ );
+ } else unreachable;
+}
+
+fn constructInput(comptime values: []const union(enum) {
+ eos: bool,
+ int: u64,
+ bytes: []const u8,
+ slice: []const u8,
+}) []const u8 {
+ const result = comptime result: {
+ var result: [
+ len: {
+ var len = 0;
+ for (values) |v| len += switch (v) {
+ .eos => 1,
+ .int => 8,
+ .bytes => |b| b.len,
+ .slice => |s| 4 + s.len,
+ };
+ break :len len;
+ }
+ ]u8 = undefined;
+ var w: std.Io.Writer = .fixed(&result);
+
+ for (values) |v| switch (v) {
+ .eos => |e| w.writeByte(@intFromBool(e)) catch unreachable,
+ .int => |i| w.writeInt(u64, i, .little) catch unreachable,
+ .bytes => |b| w.writeAll(b) catch unreachable,
+ .slice => |s| {
+ w.writeInt(u32, @intCast(s.len), .little) catch unreachable;
+ w.writeAll(s) catch unreachable;
+ },
+ };
+
+ break :result result;
+ };
+ return &result;
+}
+
+test value {
+ if (@import("builtin").zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+ const S = struct {
+ v: void = {},
+ b: bool = true,
+ ih: u16 = 123,
+ iq: u64 = 55555,
+ io: u128 = (1 << 80) | (1 << 23),
+ fd: f64 = std.math.pi,
+ ft: f80 = std.math.e,
+ eh: enum(u16) { a, _ } = @enumFromInt(999),
+ eo: enum(u128) { a, b, _ } = .b,
+ aw: [3]u32 = .{ 1 << 30, 1 << 20, 1 << 10 },
+ vw: @Vector(3, u32) = .{ 1 << 10, 1 << 20, 1 << 30 },
+ ab: [3]u8 = .{ 55, 33, 88 },
+ vb: @Vector(3, u8) = .{ 22, 44, 99 },
+ s: struct { q: u64 } = .{ .q = 1 },
+ sz: struct {} = .{},
+ sp: packed struct(u8) { a: u5, b: u3 } = .{ .a = 31, .b = 3 },
+ si: packed struct(u8) { a: u5, b: enum(u3) { a, b } } = .{ .a = 15, .b = .b },
+ u: union(enum(u2)) {
+ a: u64,
+ b: u64,
+ c: noreturn,
+ } = .{ .b = 777777 },
+ up: packed union {
+ a: u16,
+ b: f16,
+ } = .{ .b = std.math.phi },
+
+ invalid: struct {
+ ib: u8 = 0,
+ eb: enum(u8) { a, b } = .a,
+ eo: enum(u128) { a, b } = .a,
+ u: union(enum(u1)) { a: noreturn, b: void } = .{ .b = {} },
+ } = .{},
+ };
+ const s: S = .{};
+ const ft_bits: u80 = @bitCast(s.ft);
+ const eo_bits = @intFromEnum(s.eo);
+
+ var smith: Smith = .{
+ .in = constructInput(&.{
+ // v
+ .{ .int = @intFromBool(s.b) }, // b
+ .{ .int = s.ih }, // ih
+ .{ .int = s.iq }, // iq
+ .{ .int = @truncate(s.io) }, .{ .int = @intCast(s.io >> 64) }, // io
+ .{ .int = @bitCast(s.fd) }, // fd
+ .{ .int = @truncate(ft_bits) }, .{ .int = @intCast(ft_bits >> 64) }, // ft
+ .{ .int = @intFromEnum(s.eh) }, // eh
+ .{ .int = @truncate(eo_bits) }, .{ .int = @intCast(eo_bits >> 64) }, // eo
+ .{ .int = s.aw[0] }, .{ .int = s.aw[1] }, .{ .int = s.aw[2] }, // aw
+ .{ .int = s.vw[0] }, .{ .int = s.vw[1] }, .{ .int = s.vw[2] }, // vw
+ .{ .bytes = &s.ab }, // ab
+ .{ .bytes = &@as([3]u8, s.vb) }, // vb
+ .{ .int = s.s.q }, // s.q
+ //sz
+ .{ .int = @as(u8, @bitCast(s.sp)) }, // sp
+ .{ .int = s.si.a }, .{ .int = @intFromEnum(s.si.b) }, // si
+ .{ .int = @intFromEnum(s.u) }, .{ .int = s.u.b }, // u
+ .{ .int = @as(u16, @bitCast(s.up)) }, // up
+ // invalid values
+ .{ .int = 555 }, // invalid.ib
+ .{ .int = 123 }, // invalid.eb
+ .{ .int = 0 }, .{ .int = 1 }, // invalid.eo
+ .{ .int = 0 }, // invalid.u
+ }),
+ };
+
+ try std.testing.expectEqual(s, smith.value(S));
+}
+
+test valueWeighted {
+ var smith: Smith = .{
+ .in = constructInput(&.{
+ .{ .int = 200 },
+ .{ .int = 200 },
+ .{ .int = 300 },
+ .{ .int = 400 },
+ }),
+ };
+
+ try std.testing.expectEqual(200, smith.valueWeighted(u8, &.{.rangeAtMost(u8, 50, 200, 1)}));
+ try std.testing.expectEqual(50, smith.valueWeighted(u8, &.{.rangeLessThan(u8, 50, 200, 1)}));
+ const E = enum(u64) { a = 100, b = 200, c = 300 };
+ try std.testing.expectEqual(E.c, smith.valueWeighted(E, baselineWeights(E)));
+ try std.testing.expectEqual(E.a, smith.valueWeighted(E, baselineWeights(E)));
+ try std.testing.expectEqual(12345, smith.valueWeighted(u64, &.{.value(u64, 12345, 1)}));
+}
+
+test valueRangeAtMost {
+ var smith: Smith = .{
+ .in = constructInput(&.{
+ .{ .int = 100 },
+ .{ .int = 100 },
+ .{ .int = 200 },
+ .{ .int = 100 },
+ .{ .int = 200 },
+ .{ .int = 0 },
+ }),
+ };
+ try std.testing.expectEqual(100, smith.valueRangeAtMost(u8, 0, 250));
+ try std.testing.expectEqual(100, smith.valueRangeAtMost(u8, 100, 100));
+ try std.testing.expectEqual(0, smith.valueRangeAtMost(u8, 0, 100));
+ try std.testing.expectEqual(100 - 128, smith.valueRangeAtMost(i8, -100, 100));
+ try std.testing.expectEqual(200 - 128, smith.valueRangeAtMost(i8, -100, 100));
+ try std.testing.expectEqual(-100, smith.valueRangeAtMost(i8, -100, 100));
+}
+
+test valueRangeLessThan {
+ var smith: Smith = .{
+ .in = constructInput(&.{
+ .{ .int = 100 },
+ .{ .int = 100 },
+ .{ .int = 100 },
+ .{ .int = 100 + 128 },
+ }),
+ };
+ try std.testing.expectEqual(100, smith.valueRangeLessThan(u8, 0, 250));
+ try std.testing.expectEqual(0, smith.valueRangeLessThan(u8, 0, 100));
+ try std.testing.expectEqual(100 - 128, smith.valueRangeLessThan(i8, -100, 100));
+ try std.testing.expectEqual(-100, smith.valueRangeLessThan(i8, -100, 100));
+}
+
+test eos {
+ var smith: Smith = .{
+ .in = constructInput(&.{
+ .{ .eos = false },
+ .{ .eos = true },
+ }),
+ };
+ try std.testing.expect(!smith.eos());
+ try std.testing.expect(smith.eos());
+ try std.testing.expect(smith.eos());
+}
+
+test eosWeighted {
+ var smith: Smith = .{ .in = constructInput(&.{.{ .eos = false }}) };
+ try std.testing.expect(smith.eosWeighted(&.{.value(bool, true, std.math.maxInt(u64))}));
+}
+
+test bytes {
+ var smith: Smith = .{ .in = constructInput(&.{
+ .{ .bytes = "testing!" },
+ .{ .bytes = "ab" },
+ }) };
+ var buf: [8]u8 = undefined;
+
+ smith.bytes(&buf);
+ try std.testing.expectEqualSlices(u8, "testing!", &buf);
+ smith.bytes(buf[0..0]);
+ smith.bytes(buf[0..3]);
+ try std.testing.expectEqualSlices(u8, "ab\x00", buf[0..3]);
+}
+
+test bytesWeighted {
+ var smith: Smith = .{ .in = constructInput(&.{
+ .{ .bytes = "testing!" },
+ .{ .bytes = "ab" },
+ }) };
+ const weights: []const Weight = &.{.rangeAtMost(u8, 'a', 'z', 1)};
+ var buf: [8]u8 = undefined;
+
+ smith.bytesWeighted(&buf, weights);
+ try std.testing.expectEqualSlices(u8, "testinga", &buf);
+ smith.bytesWeighted(buf[0..0], weights);
+ smith.bytesWeighted(buf[0..3], weights);
+ try std.testing.expectEqualSlices(u8, "aba", buf[0..3]);
+}
+
+test slice {
+ var smith: Smith = .{
+ .in = constructInput(&.{
+ .{ .slice = "testing!" },
+ .{ .slice = "" },
+ .{ .slice = "ab" },
+ .{ .bytes = std.mem.asBytes(&std.mem.nativeToLittle(u32, 4)) }, // length past end
+ }),
+ };
+ var buf: [8]u8 = undefined;
+
+ try std.testing.expectEqualSlices(u8, "testing!", buf[0..smith.slice(&buf)]);
+ try std.testing.expectEqualSlices(u8, "", buf[0..smith.slice(&buf)]);
+ try std.testing.expectEqualSlices(u8, "ab", buf[0..smith.slice(&buf)]);
+ try std.testing.expectEqualSlices(u8, "", buf[0..smith.slice(&buf)]);
+}
+
+test sliceWeightedBytes {
+ const weights: []const Weight = &.{.rangeAtMost(u8, 'a', 'z', 1)};
+ var smith: Smith = .{ .in = constructInput(&.{
+ .{ .slice = "testing!" },
+ }) };
+ var buf: [8]u8 = undefined;
+
+ try std.testing.expectEqualSlices(
+ u8,
+ "testinga",
+ buf[0..smith.sliceWeightedBytes(&buf, weights)],
+ );
+ try std.testing.expectEqualSlices(u8, "", buf[0..smith.sliceWeightedBytes(&buf, weights)]);
+}
+
+test sliceWeighted {
+ const len_weights: []const Weight = &.{.rangeAtMost(u8, 3, 6, 1)};
+ const weights: []const Weight = &.{.rangeAtMost(u8, 'a', 'z', 1)};
+ var smith: Smith = .{ .in = constructInput(&.{
+ .{ .slice = "testing!" },
+ .{ .slice = "ing!" },
+ .{ .slice = "ab" },
+ }) };
+ var buf: [8]u8 = undefined;
+
+ try std.testing.expectEqualSlices(
+ u8,
+ "tes",
+ buf[0..smith.sliceWeighted(&buf, len_weights, weights)],
+ );
+ try std.testing.expectEqualSlices(
+ u8,
+ "inga",
+ buf[0..smith.sliceWeighted(&buf, len_weights, weights)],
+ );
+ try std.testing.expectEqualSlices(
+ u8,
+ "aba",
+ buf[0..smith.sliceWeighted(&buf, len_weights, weights)],
+ );
+ try std.testing.expectEqualSlices(
+ u8,
+ "aaa",
+ buf[0..smith.sliceWeighted(&buf, len_weights, weights)],
+ );
+}
diff --git a/lib/std/zig.zig b/lib/std/zig.zig
@@ -14,6 +14,7 @@ pub const Server = @import("zig/Server.zig");
pub const Client = @import("zig/Client.zig");
pub const Token = tokenizer.Token;
pub const Tokenizer = tokenizer.Tokenizer;
+pub const TokenSmith = @import("zig/TokenSmith.zig");
pub const string_literal = @import("zig/string_literal.zig");
pub const number_literal = @import("zig/number_literal.zig");
pub const primitives = @import("zig/primitives.zig");
@@ -991,6 +992,7 @@ test {
_ = LibCDirs;
_ = LibCInstallation;
_ = Server;
+ _ = TokenSmith;
_ = WindowsSdk;
_ = number_literal;
_ = primitives;
diff --git a/lib/std/zig/Ast.zig b/lib/std/zig/Ast.zig
@@ -160,10 +160,21 @@ pub fn parse(gpa: Allocator, source: [:0]const u8, mode: Mode) Allocator.Error!A
if (token.tag == .eof) break;
}
+ var tokens_slice = tokens.toOwnedSlice();
+ errdefer tokens_slice.deinit(gpa);
+ return parseTokens(gpa, source, tokens_slice, mode);
+}
+
+pub fn parseTokens(
+ gpa: Allocator,
+ source: [:0]const u8,
+ tokens: Ast.TokenList.Slice,
+ mode: Mode,
+) Allocator.Error!Ast {
var parser: Parse = .{
.source = source,
.gpa = gpa,
- .tokens = tokens.slice(),
+ .tokens = tokens,
.errors = .{},
.nodes = .{},
.extra_data = .{},
@@ -194,7 +205,7 @@ pub fn parse(gpa: Allocator, source: [:0]const u8, mode: Mode) Allocator.Error!A
return Ast{
.source = source,
.mode = mode,
- .tokens = tokens.toOwnedSlice(),
+ .tokens = tokens,
.nodes = parser.nodes.toOwnedSlice(),
.extra_data = extra_data,
.errors = errors,
diff --git a/lib/std/zig/TokenSmith.zig b/lib/std/zig/TokenSmith.zig
@@ -0,0 +1,277 @@
+//! Generates a list of tokens and a valid corresponding source.
+//! Smithed intertoken content is a non-goal of this.
+
+const std = @import("../std.zig");
+const Smith = std.testing.Smith;
+const Token = std.zig.Token;
+const TokenList = std.zig.Ast.TokenList;
+const TokenSmith = @This();
+
+source_buf: [4096]u8,
+source_len: u32,
+tag_buf: [512]Token.Tag,
+start_buf: [512]std.zig.Ast.ByteOffset,
+tags_len: u16,
+
+fn symbolLenWeights(t: *TokenSmith, min: u32, reserve: u32) [2]Smith.Weight {
+ @disableInstrumentation();
+ const space = @as(u32, t.source_buf.len - 1) - t.source_len - reserve;
+ std.debug.assert(space >= 15);
+ return .{
+ .rangeAtMost(u32, min, space, 1),
+ .rangeAtMost(u32, min, 15, space),
+ };
+}
+
+pub fn gen(smith: *Smith) TokenSmith {
+ @disableInstrumentation();
+ var t: TokenSmith = .{
+ .source_buf = undefined,
+ .source_len = 0,
+ .tag_buf = undefined,
+ .start_buf = undefined,
+ .tags_len = 0,
+ };
+
+ const max_lexeme_len = comptime max: {
+ var max: usize = 0;
+ for (std.meta.tags(Token.Tag)) |tag| {
+ max = @max(max, if (tag.lexeme()) |s| s.len else 0);
+ }
+ break :max max;
+ } + 1; // + space
+ const symbol_reserved = 15 + 4; // 4 = doc comment: "///\n"
+ const max_output_bytes = @max(symbol_reserved, max_lexeme_len);
+
+ while (t.tags_len + 2 < t.tag_buf.len - 1 and
+ t.source_len + max_output_bytes < t.source_buf.len - 1 and
+ !smith.eosWeightedSimple(7, 1))
+ {
+ const tag = smith.value(Token.Tag);
+ if (tag == .eof) continue;
+ t.tag_buf[t.tags_len] = tag;
+ t.start_buf[t.tags_len] = t.source_len;
+ t.tags_len += 1;
+
+ if (tag.lexeme()) |lexeme| {
+ @memcpy(t.source_buf[t.source_len..][0..lexeme.len], lexeme);
+ t.source_len += @intCast(lexeme.len);
+
+ if (tag == .invalid_periodasterisks) {
+ t.tag_buf[t.tags_len] = .asterisk;
+ t.start_buf[t.tags_len] = t.source_len - 1;
+ t.tags_len += 1;
+ }
+
+ t.source_buf[t.source_len] = '\n';
+ t.source_len += 1;
+ } else sw: switch (tag) {
+ .invalid => {
+ // While their are multiple ways invalid may be hit,
+ // it is unlikely the source will be inspected.
+ t.source_buf[t.source_len] = 0;
+ t.source_len += 1;
+ },
+ .identifier => {
+ const start = smith.valueWeighted(u8, &.{
+ .rangeAtMost(u8, 'a', 'z', 1),
+ .rangeAtMost(u8, '@', 'Z', 1), // @, A...Z
+ .value(u8, '_', 1),
+ });
+ t.source_buf[t.source_len] = start;
+ t.source_len += 1;
+ if (start == '@') continue :sw .string_literal;
+
+ const len_weights = t.symbolLenWeights(0, 1);
+ const len = smith.sliceWeighted(
+ t.source_buf[t.source_len..],
+ &len_weights,
+ &.{
+ .rangeAtMost(u8, 'a', 'z', 1),
+ .rangeAtMost(u8, 'A', 'Z', 1),
+ .rangeAtMost(u8, '0', '9', 1),
+ .value(u8, '_', 1),
+ },
+ );
+ if (Token.getKeyword(t.source_buf[t.source_len - 1 ..][0 .. len + 1]) != null) {
+ t.source_buf[t.source_len - 1] = '_';
+ }
+ t.source_len += len;
+
+ t.source_buf[t.source_len] = '\n';
+ t.source_len += 1;
+ },
+ .char_literal, .string_literal => |kind| {
+ const end: u8 = switch (kind) {
+ .char_literal => '\'',
+ .string_literal => '"',
+ else => unreachable,
+ };
+
+ t.source_buf[t.source_len] = end;
+ t.source_len += 1;
+
+ const len_weights = t.symbolLenWeights(0, 2);
+ const len = smith.sliceWeighted(
+ t.source_buf[t.source_len..],
+ &len_weights,
+ &.{
+ .rangeAtMost(u8, 0x20, 0x7e, 1),
+ .value(u8, '\\', 15),
+ },
+ );
+ var start_escape = false;
+ for (t.source_buf[t.source_len..][0..len]) |*c| {
+ if (!start_escape and c.* == end) c.* = ' ';
+ start_escape = !start_escape and c.* == '\\';
+ }
+ if (start_escape) t.source_buf[t.source_len..][len - 1] = ' ';
+ t.source_len += len;
+
+ t.source_buf[t.source_len] = end;
+ t.source_buf[t.source_len + 1] = '\n';
+ t.source_len += 2;
+ },
+ .multiline_string_literal_line => {
+ t.source_buf[t.source_len..][0..2].* = @splat('\\');
+ t.source_len += 2;
+
+ const len_weights = t.symbolLenWeights(0, 1);
+ t.source_len += smith.sliceWeighted(
+ t.source_buf[t.source_len..],
+ &len_weights,
+ &.{.rangeAtMost(u8, 0x20, 0x7e, 1)},
+ );
+
+ t.source_buf[t.source_len] = '\n';
+ t.source_len += 1;
+ },
+ .number_literal => {
+ t.source_buf[t.source_len] = smith.valueRangeAtMost(u8, '0', '9');
+ t.source_len += 1;
+
+ const len_weights = t.symbolLenWeights(0, 1);
+ const len = smith.sliceWeighted(
+ t.source_buf[t.source_len..],
+ &len_weights,
+ &.{
+ .rangeAtMost(u8, '0', '9', 8),
+ .rangeAtMost(u8, 'a', 'z', 1),
+ .rangeAtMost(u8, 'A', 'Z', 1),
+ .value(u8, '+', 1),
+ .rangeAtMost(u8, '-', '.', 1), // -, .
+ },
+ );
+
+ var no_period = false;
+ var not_exponent = true;
+ for (t.source_buf[t.source_len..][0..len], 0..) |*c, i| {
+ const invalid_period = no_period and c.* == '.' or i + 1 == len;
+ const is_exponent = c.* == '-' or c.* == '+';
+ const invalid_exponent = not_exponent and is_exponent;
+ const valid_exponent = !not_exponent and is_exponent;
+ if (invalid_period or invalid_exponent) c.* = '0';
+ no_period |= c.* == '.' or valid_exponent;
+ not_exponent = switch (c.*) {
+ 'e', 'E', 'p', 'P' => false,
+ else => true,
+ };
+ }
+
+ t.source_len += len;
+ t.source_buf[t.source_len] = '\n';
+ t.source_len += 1;
+ },
+ .builtin => {
+ t.source_buf[t.source_len] = '@';
+ t.source_len += 1;
+
+ const len_weights = t.symbolLenWeights(1, 1);
+ const len = smith.sliceWeighted(
+ t.source_buf[t.source_len..],
+ &len_weights,
+ &.{
+ .rangeAtMost(u8, 'a', 'z', 1),
+ .rangeAtMost(u8, 'A', 'Z', 1),
+ .rangeAtMost(u8, '0', '9', 1),
+ .value(u8, '_', 1),
+ },
+ );
+ if (t.source_buf[t.source_len] >= '0' and t.source_buf[t.source_len] <= '9') {
+ t.source_buf[t.source_len] = '_';
+ }
+ t.source_len += len;
+
+ t.source_buf[t.source_len] = '\n';
+ t.source_len += 1;
+ },
+ .doc_comment, .container_doc_comment => |kind| {
+ t.source_buf[t.source_len..][0..2].* = "//".*;
+ t.source_buf[t.source_len..][2] = switch (kind) {
+ .doc_comment => '/',
+ .container_doc_comment => '!',
+ else => unreachable,
+ };
+ t.source_len += 3;
+
+ const len_weights = t.symbolLenWeights(0, 1);
+ const len = smith.sliceWeighted(
+ t.source_buf[t.source_len..],
+ &len_weights,
+ &.{
+ .rangeAtMost(u8, 0x20, 0x7e, 1),
+ .rangeAtMost(u8, 0x80, 0xff, 1),
+ },
+ );
+ if (kind == .doc_comment and len != 0 and t.source_buf[t.source_len] == '/') {
+ t.source_buf[t.source_len] = ' ';
+ }
+ t.source_len += len;
+
+ t.source_buf[t.source_len] = '\n';
+ t.source_len += 1;
+ },
+ else => unreachable,
+ }
+ }
+
+ t.tag_buf[t.tags_len] = .eof;
+ t.start_buf[t.tags_len] = t.source_len;
+ t.tags_len += 1;
+ t.source_buf[t.source_len] = 0;
+ return t;
+}
+
+pub fn source(t: *TokenSmith) [:0]u8 {
+ return t.source_buf[0..t.source_len :0];
+}
+
+/// The Slice is not backed by a MultiArrayList, so calling deinit or toMultiArrayList is illegal.
+pub fn list(t: *TokenSmith) TokenList.Slice {
+ var slice: TokenList.Slice = .{
+ .ptrs = undefined,
+ .len = t.tags_len,
+ .capacity = t.tags_len,
+ };
+ comptime std.debug.assert(slice.ptrs.len == 2);
+ slice.ptrs[@intFromEnum(TokenList.Field.tag)] = @ptrCast(&t.tag_buf);
+ slice.ptrs[@intFromEnum(TokenList.Field.start)] = @ptrCast(&t.start_buf);
+ return slice;
+}
+
+test TokenSmith {
+ try std.testing.fuzz({}, checkSource, .{});
+}
+
+fn checkSource(_: void, smith: *Smith) !void {
+ var t: TokenSmith = .gen(smith);
+ try std.testing.expectEqual(Token.Tag.eof, t.tag_buf[t.tags_len - 1]);
+
+ var tokenizer: std.zig.Tokenizer = .init(t.source());
+ for (t.tag_buf[0..t.tags_len], t.start_buf[0..t.tags_len]) |tag, start| {
+ const tok = tokenizer.next();
+ try std.testing.expectEqual(tok.tag, tag);
+ try std.testing.expectEqual(tok.loc.start, start);
+ if (tag == .invalid) break;
+ }
+}
diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig
@@ -6420,14 +6420,9 @@ test "fuzz ast parse" {
try std.testing.fuzz({}, fuzzTestOneParse, .{});
}
-fn fuzzTestOneParse(_: void, input: []const u8) !void {
- // The first byte holds if zig / zon
- if (input.len == 0) return;
- const mode: std.zig.Ast.Mode = if (input[0] & 1 == 0) .zig else .zon;
- const bytes = input[1..];
-
+fn fuzzTestOneParse(_: void, smith: *std.testing.Smith) !void {
+ const mode = smith.value(std.zig.Ast.Mode);
+ var tokens: std.zig.TokenSmith = .gen(smith);
var fba: std.heap.FixedBufferAllocator = .init(&fixed_buffer_mem);
- const allocator = fba.allocator();
- const source = allocator.dupeZ(u8, bytes) catch return;
- _ = std.zig.Ast.parse(allocator, source, mode) catch return;
+ _ = std.zig.Ast.parseTokens(fba.allocator(), tokens.source(), tokens.list(), mode) catch return;
}
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
@@ -713,6 +713,9 @@ pub const Tokenizer = struct {
self.index += 1;
switch (self.buffer[self.index]) {
0, '\n' => result.tag = .invalid,
+ 0x01...0x09, 0x0b...0x1f, 0x7f => {
+ continue :state .invalid;
+ },
else => continue :state .string_literal,
}
},
@@ -1721,15 +1724,22 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v
try std.testing.expectEqual(source.len, last_token.loc.end);
}
-fn testPropertiesUpheld(_: void, source: []const u8) !void {
- var source0_buf: [512]u8 = undefined;
- if (source.len + 1 > source0_buf.len)
- return;
- @memcpy(source0_buf[0..source.len], source);
- source0_buf[source.len] = 0;
- const source0 = source0_buf[0..source.len :0];
+fn testPropertiesUpheld(_: void, smith: *std.testing.Smith) !void {
+ @disableInstrumentation();
+ var source_buf: [512]u8 = undefined;
+ const len = smith.sliceWeightedBytes(source_buf[0 .. source_buf.len - 1], &.{
+ .rangeAtMost(u8, 0x00, 0xff, 1),
+ .rangeAtMost(u8, 0x20, 0x7e, 4),
+ .rangeAtMost(u8, 0x00, 0x1f, 1),
+ .value(u8, 0, 6),
+ .value(u8, ' ', 6),
+ .rangeAtMost(u8, '\t', '\n', 6), // \t, \n
+ .value(u8, '\r', 3),
+ });
+ source_buf[len] = 0;
+ const source = source_buf[0..len :0];
- var tokenizer = Tokenizer.init(source0);
+ var tokenizer = Tokenizer.init(source);
var tokenization_failed = false;
while (true) {
const token = tokenizer.next();
@@ -1742,12 +1752,12 @@ fn testPropertiesUpheld(_: void, source: []const u8) !void {
tokenization_failed = true;
// Property: invalid token always ends at newline or eof
- try std.testing.expect(source0[token.loc.end] == '\n' or source0[token.loc.end] == 0);
+ try std.testing.expect(source[token.loc.end] == '\n' or source[token.loc.end] == 0);
},
.eof => {
// Property: EOF token is always 0-length at end of source.
- try std.testing.expectEqual(source0.len, token.loc.start);
- try std.testing.expectEqual(source0.len, token.loc.end);
+ try std.testing.expectEqual(source.len, token.loc.start);
+ try std.testing.expectEqual(source.len, token.loc.end);
break;
},
else => continue,
@@ -1755,7 +1765,7 @@ fn testPropertiesUpheld(_: void, source: []const u8) !void {
}
if (tokenization_failed) return;
- for (source0) |cur| {
+ for (source) |cur| {
// Property: No null byte allowed except at end.
if (cur == 0) {
return error.TestUnexpectedResult;
diff --git a/src/Compilation.zig b/src/Compilation.zig
@@ -186,7 +186,7 @@ verbose_link: bool,
link_depfile: ?[]const u8,
disable_c_depfile: bool,
stack_report: bool,
-debug_compiler_runtime_libs: bool,
+debug_compiler_runtime_libs: ?std.builtin.OptimizeMode,
debug_compile_errors: bool,
/// Do not check this field directly. Instead, use the `debugIncremental` wrapper function.
debug_incremental: bool,
@@ -1749,7 +1749,7 @@ pub const CreateOptions = struct {
link_depfile: ?[]const u8 = null,
verbose_cimport: bool = false,
verbose_llvm_cpu_features: bool = false,
- debug_compiler_runtime_libs: bool = false,
+ debug_compiler_runtime_libs: ?std.builtin.OptimizeMode = null,
debug_compile_errors: bool = false,
debug_incremental: bool = false,
/// Normally when you create a `Compilation`, Zig will automatically build
@@ -2201,7 +2201,8 @@ pub fn create(gpa: Allocator, arena: Allocator, io: Io, diag: *CreateDiagnostic,
cache.hash.addBytes(options.root_name);
cache.hash.add(options.config.wasi_exec_model);
cache.hash.add(options.config.san_cov_trace_pc_guard);
- cache.hash.add(options.debug_compiler_runtime_libs);
+ cache.hash.add(options.debug_compiler_runtime_libs != null);
+ if (options.debug_compiler_runtime_libs) |mode| cache.hash.add(mode);
// The actual emit paths don't matter. They're only user-specified if we aren't using the
// cache! However, it does matter whether the files are emitted at all.
cache.hash.add(options.emit_bin != .no);
@@ -8373,8 +8374,8 @@ pub fn addLinkLib(comp: *Compilation, lib_name: []const u8) !void {
/// This decides the optimization mode for all zig-provided libraries, including
/// compiler-rt, libcxx, libc, libunwind, etc.
pub fn compilerRtOptMode(comp: Compilation) std.builtin.OptimizeMode {
- if (comp.debug_compiler_runtime_libs) {
- return .Debug;
+ if (comp.debug_compiler_runtime_libs) |mode| {
+ return mode;
}
const target = &comp.root_mod.resolved_target.result;
switch (comp.root_mod.optimize_mode) {
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
@@ -1116,7 +1116,7 @@ pub const Object = struct {
// needs to for better fuzzing logic.
.IndirectCalls = false,
.TraceBB = false,
- .TraceCmp = options.fuzz,
+ .TraceCmp = false,
.TraceDiv = false,
.TraceGep = false,
.Use8bitCounters = false,
diff --git a/src/link/Elf/relocatable.zig b/src/link/Elf/relocatable.zig
@@ -95,10 +95,11 @@ pub fn flushStaticLib(elf_file: *Elf, comp: *Compilation) !void {
const total_size: usize = blk: {
var pos: usize = elf.ARMAG.len;
pos += @sizeOf(elf.ar_hdr) + ar_symtab.size(.p64);
+ pos = mem.alignForward(usize, pos, 2);
if (ar_strtab.size() > 0) {
- pos = mem.alignForward(usize, pos, 2);
pos += @sizeOf(elf.ar_hdr) + ar_strtab.size();
+ pos = mem.alignForward(usize, pos, 2);
}
for (files.items) |index| {
@@ -108,9 +109,9 @@ pub fn flushStaticLib(elf_file: *Elf, comp: *Compilation) !void {
.object => |x| &x.output_ar_state,
else => unreachable,
};
- pos = mem.alignForward(usize, pos, 2);
state.file_off = pos;
pos += @sizeOf(elf.ar_hdr) + (math.cast(usize, state.size) orelse return error.Overflow);
+ pos = mem.alignForward(usize, pos, 2);
}
break :blk pos;
@@ -131,17 +132,18 @@ pub fn flushStaticLib(elf_file: *Elf, comp: *Compilation) !void {
// Write symtab
try ar_symtab.write(.p64, elf_file, &writer);
+ if (!mem.isAligned(writer.end, 2)) try writer.writeByte(0);
// Write strtab
if (ar_strtab.size() > 0) {
- if (!mem.isAligned(writer.end, 2)) try writer.writeByte(0);
try ar_strtab.write(&writer);
+ if (!mem.isAligned(writer.end, 2)) try writer.writeByte(0);
}
// Write object files
for (files.items) |index| {
- if (!mem.isAligned(writer.end, 2)) try writer.writeByte(0);
try elf_file.file(index).?.writeAr(elf_file, &writer);
+ if (!mem.isAligned(writer.end, 2)) try writer.writeByte(0);
}
assert(writer.buffered().len == total_size);
diff --git a/src/main.zig b/src/main.zig
@@ -708,7 +708,8 @@ const usage_build_generic =
\\ --debug-log [scope] Enable printing debug/info log messages for scope
\\ --debug-compile-errors Crash with helpful diagnostics at the first compile error
\\ --debug-link-snapshot Enable dumping of the linker's state in JSON format
- \\ --debug-rt Debug compiler runtime libraries
+ \\ --debug-rt[=mode] Build compiler runtime libraries with [mode] optimization
+ \\ (Debug if [=mode] is omitted)
\\ --debug-incremental Enable incremental compilation debug features
\\
;
@@ -928,7 +929,7 @@ fn buildOutputType(
var minor_subsystem_version: ?u16 = null;
var mingw_unicode_entry_point: bool = false;
var enable_link_snapshots: bool = false;
- var debug_compiler_runtime_libs = false;
+ var debug_compiler_runtime_libs: ?std.builtin.OptimizeMode = null;
var install_name: ?[]const u8 = null;
var hash_style: link.File.Lld.Elf.HashStyle = .both;
var entitlements: ?[]const u8 = null;
@@ -1382,7 +1383,9 @@ fn buildOutputType(
enable_link_snapshots = true;
}
} else if (mem.eql(u8, arg, "--debug-rt")) {
- debug_compiler_runtime_libs = true;
+ debug_compiler_runtime_libs = .Debug;
+ } else if (mem.cutPrefix(u8, arg, "--debug-rt=")) |rest| {
+ debug_compiler_runtime_libs = parseOptimizeMode(rest);
} else if (mem.eql(u8, arg, "--debug-incremental")) {
if (build_options.enable_debug_extensions) {
debug_incremental = true;
diff --git a/test/standalone/libfuzzer/main.zig b/test/standalone/libfuzzer/main.zig
@@ -2,9 +2,7 @@ const std = @import("std");
const abi = std.Build.abi.fuzz;
const native_endian = @import("builtin").cpu.arch.endian();
-fn testOne(in: abi.Slice) callconv(.c) void {
- std.debug.assertReadable(in.toSlice());
-}
+fn testOne() callconv(.c) void {}
pub fn main(init: std.process.Init) !void {
const gpa = init.gpa;
@@ -19,7 +17,7 @@ pub fn main(init: std.process.Init) !void {
defer cache_dir.close(io);
abi.fuzzer_init(.fromSlice(cache_dir_path));
- abi.fuzzer_init_test(testOne, .fromSlice("test"));
+ abi.fuzzer_set_test(testOne, .fromSlice("test"));
abi.fuzzer_new_input(.fromSlice(""));
abi.fuzzer_new_input(.fromSlice("hello"));
diff --git a/test/tests.zig b/test/tests.zig
@@ -2310,7 +2310,7 @@ pub fn addCliTests(b: *std.Build) *Step {
return step;
}
-const ModuleTestOptions = struct {
+pub const ModuleTestOptions = struct {
test_filters: []const []const u8,
test_target_filters: []const []const u8,
test_extra_targets: bool,
@@ -2319,7 +2319,7 @@ const ModuleTestOptions = struct {
desc: []const u8,
optimize_modes: []const OptimizeMode,
include_paths: []const []const u8,
- test_default_only: bool,
+ test_only: ?TestOnly,
skip_single_threaded: bool,
skip_non_native: bool,
skip_spirv: bool,
@@ -2335,20 +2335,31 @@ const ModuleTestOptions = struct {
max_rss: usize = 0,
no_builtin: bool = false,
build_options: ?*Step.Options = null,
+
+ pub const TestOnly = union(enum) {
+ default: void,
+ fuzz: OptimizeMode,
+ };
};
pub fn addModuleTests(b: *std.Build, options: ModuleTestOptions) *Step {
const step = b.step(b.fmt("test-{s}", .{options.name}), options.desc);
- if (options.test_default_only) {
- const test_target = &test_targets[0];
+ if (options.test_only) |test_only| {
+ const test_target: TestTarget = switch (test_only) {
+ .default => test_targets[0],
+ .fuzz => |optimize| .{
+ .optimize_mode = optimize,
+ .use_llvm = true,
+ },
+ };
const resolved_target = b.resolveTargetQuery(test_target.target);
const triple_txt = resolved_target.query.zigTriple(b.allocator) catch @panic("OOM");
addOneModuleTest(b, step, test_target, &resolved_target, triple_txt, options);
return step;
}
- for_targets: for (&test_targets) |*test_target| {
+ for_targets: for (test_targets) |test_target| {
if (test_target.skip_modules.len > 0) {
for (test_target.skip_modules) |skip_mod| {
if (std.mem.eql(u8, options.name, skip_mod)) continue :for_targets;
@@ -2425,7 +2436,7 @@ pub fn addModuleTests(b: *std.Build, options: ModuleTestOptions) *Step {
fn addOneModuleTest(
b: *std.Build,
step: *Step,
- test_target: *const TestTarget,
+ test_target: TestTarget,
resolved_target: *const std.Build.ResolvedTarget,
triple_txt: []const u8,
options: ModuleTestOptions,