commit 476c3a1f379eadbd200d568340a2f479d5b6d260 (tree)
parent 0978566db8b7ed2b730cf01a7d359e9b52ec66ec
Author: Kendall Condon <goon.pri.low@gmail.com>
Date: Mon, 28 Jul 2025 13:24:31 -0400
zig fmt: add a fuzz test
This fuzz test checks several properties of zig fmt are upheld, namely
idempotency, textual equivilence, and no trailing whitespace.
All functions in the fuzz test have @disableInstrumentation for
performance and since their branches are not interesting to the fuzzer.
Diffstat:
1 file changed, 261 insertions(+), 0 deletions(-)
diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig
@@ -1,6 +1,7 @@
const std = @import("std");
const Io = std.Io;
const Allocator = std.mem.Allocator;
+const Token = std.zig.Token;
test "zig fmt: remove extra whitespace at start and end of file with comment between" {
try testTransform(
@@ -6426,3 +6427,263 @@ fn fuzzTestOneParse(_: void, smith: *std.testing.Smith) !void {
var fba: std.heap.FixedBufferAllocator = .init(&fixed_buffer_mem);
_ = std.zig.Ast.parseTokens(fba.allocator(), tokens.source(), tokens.list(), mode) catch return;
}
+
+test "zig fmt: fuzz" {
+ try std.testing.fuzz({}, fuzzRender, .{});
+}
+
+fn parseTokens(
+ fba: Allocator,
+ source: [:0]const u8,
+) error{ SkipZigTest, OutOfMemory }!struct {
+ toks: std.zig.Ast.TokenList,
+ maybe_rewritable: bool,
+ skip_idempotency: bool,
+} {
+ @disableInstrumentation();
+ // Byte-order marker is stripped
+ var maybe_rewritable = std.mem.startsWith(u8, source, "\xEF\xBB\xBF");
+ var skip_idempotency = false; // This should be able to be removed once all the bugs are fixed
+
+ var tokens: std.zig.Ast.TokenList = .{};
+ try tokens.ensureTotalCapacity(fba, source.len / 2);
+ var tokenizer: std.zig.Tokenizer = .init(source);
+ while (true) {
+ const tok = tokenizer.next();
+ switch (tok.tag) {
+ .invalid,
+ .invalid_periodasterisks,
+ => return error.SkipZigTest,
+ // Extra colons can be removed
+ .keyword_asm,
+ // Qualifiers can be reordered
+ // keyword_const is intentionally excluded since it is used in other contexts and
+ // having only one qualifier will never lead to reordering.
+ .keyword_addrspace,
+ .keyword_align,
+ .keyword_allowzero,
+ .keyword_callconv,
+ .keyword_linksection,
+ .keyword_volatile,
+ => maybe_rewritable = true,
+ .builtin,
+ // Pointer casts can be reordered
+ => for ([_][]const u8{
+ "ptrCast",
+ "alignCast",
+ "addrSpaceCast",
+ "constCast",
+ "volatileCast",
+ }) |id| {
+ if (std.mem.eql(u8, source[tok.loc.start + 1 .. tok.loc.end], id)) {
+ maybe_rewritable = false;
+ }
+ },
+ // Quoted identifiers can be unquoted
+ .identifier => maybe_rewritable = maybe_rewritable or source[tok.loc.start] == '@',
+ else => {},
+ // #23754
+ .container_doc_comment,
+ => if (std.mem.endsWith(Token.Tag, tokens.items(.tag), &.{.l_brace})) {
+ return error.SkipZigTest;
+ },
+ // #24507
+ .keyword_inline,
+ .keyword_for,
+ .keyword_while,
+ .l_brace,
+ => if (std.mem.endsWith(Token.Tag, tokens.items(.tag), &.{ .identifier, .colon })) {
+ maybe_rewritable = true;
+ skip_idempotency = true;
+ },
+ }
+ try tokens.append(fba, .{
+ .tag = tok.tag,
+ .start = @intCast(tok.loc.start),
+ });
+ if (tok.tag == .eof)
+ break;
+ }
+ return .{
+ .toks = tokens,
+ .maybe_rewritable = maybe_rewritable,
+ .skip_idempotency = skip_idempotency,
+ };
+}
+
+/// Checks equivelence of non-whitespace characters.
+/// If there are commas in `source`, then it is checked they are also present
+/// in `rendered`. Extra commas in `rendered` are ignored.
+fn isRewritten(source: [:0]const u8, rendered: [:0]const u8) bool {
+ @disableInstrumentation();
+ var i: usize = 0;
+ for (source[0 .. source.len + 1]) |c| switch (c) {
+ ' ', '\r', '\t', '\n' => {},
+ else => while (true) {
+ defer i += 1;
+ switch (rendered[i]) {
+ ' ', '\n' => {},
+ ',' => if (c == ',') break,
+ else => |r| if (c != r) return false else break,
+ }
+ },
+ };
+ std.debug.assert(i >= rendered.len);
+ return false;
+}
+
+/// Checks that no line ends in whitespace
+fn checkBetweenTokens(src: []const u8, fmt_on: *bool) error{
+ TrailingLineWhitespace,
+ DoubleEmptyLine,
+}!void {
+ @disableInstrumentation();
+ var pos: usize = 0;
+ while (true) {
+ const nl_pos = std.mem.indexOfScalarPos(u8, src, pos, '\n');
+ var check_trailing = fmt_on.*;
+
+ const line = src[pos .. nl_pos orelse src.len];
+ if (std.mem.indexOfScalar(u8, line, '/')) |comment_start| {
+ const comment_content = line[comment_start..][2..];
+ const trimmed_comment = std.mem.trim(u8, comment_content, &std.ascii.whitespace);
+ if (std.mem.eql(u8, trimmed_comment, "zig fmt: off")) {
+ fmt_on.* = false;
+ } else if (std.mem.eql(u8, trimmed_comment, "zig fmt: on")) {
+ fmt_on.* = true;
+ check_trailing = true;
+ }
+ }
+
+ pos = nl_pos orelse break;
+ if (check_trailing and pos != 0) switch (src[pos - 1]) {
+ ' ', '\t', '\r' => return error.TrailingLineWhitespace,
+ '\n' => if (pos != 1 and src[pos - 2] == '\n') return error.DoubleEmptyLine,
+ else => {},
+ };
+ pos += 1;
+ }
+}
+
+/// Ignores extre `.comma` tokens in `rendered`
+fn reparseTokens(
+ fba: Allocator,
+ rendered: [:0]const u8,
+ expected_tags: [:.eof]const Token.Tag,
+) error{
+ OutOfMemory,
+ SameLineMultilineStringLiteral,
+ TrailingLineWhitespace,
+ DoubleEmptyLine,
+}!struct {
+ toks: std.zig.Ast.TokenList,
+ rewritten: bool,
+} {
+ @disableInstrumentation();
+ var rewritten = false;
+ var tokens: std.zig.Ast.TokenList = .{};
+ var last_token_end: usize = 0;
+ var fmt_on = true;
+
+ try tokens.ensureTotalCapacity(fba, expected_tags.len + 2); // 1 for EOF and 1 for maybe a comma
+ var tokenizer: std.zig.Tokenizer = .init(rendered);
+ var i: usize = 0;
+ while (true) {
+ const tok = tokenizer.next();
+ try tokens.append(fba, .{
+ .tag = tok.tag,
+ .start = @intCast(tok.loc.start),
+ });
+
+ const between = rendered[last_token_end..tok.loc.start];
+ last_token_end = tok.loc.end;
+ try checkBetweenTokens(between, &fmt_on);
+ if (tok.tag == .multiline_string_literal_line and fmt_on) blk: {
+ if (tokens.len == 1)
+ break :blk; // first token
+ if (std.mem.indexOfScalar(u8, between, '\n') == null)
+ return error.SameLineMultilineStringLiteral;
+ }
+ if (tok.tag == expected_tags[i]) {
+ if (tok.tag == .eof)
+ break;
+ i += 1;
+ } else if (tok.tag != .comma or !fmt_on) {
+ rewritten = true;
+ }
+ }
+ std.debug.assert(i == expected_tags.len);
+ try checkBetweenTokens(rendered[last_token_end..], &fmt_on);
+
+ return .{ .toks = tokens, .rewritten = rewritten };
+}
+
+fn fuzzRender(_: void, smith: *std.testing.Smith) !void {
+ @disableInstrumentation();
+
+ var src_buf: [512]u8 = undefined;
+ const src_len = smith.sliceWeighted(&src_buf, &.{
+ .rangeLessThan(u32, 0, 32, 256),
+ .rangeLessThan(u32, 32, 64, 64),
+ .rangeLessThan(u32, 64, src_buf.len, 1),
+ }, &.{
+ .rangeAtMost(u8, 0x20, 0x7e, 8),
+ .value(u8, '\n', 32),
+ .value(u8, '\t', 8),
+ .value(u8, '\r', 4),
+ .rangeAtMost(u8, 0x7f, 0xff, 1),
+ });
+ src_buf[src_len] = 0;
+
+ var fba_ctx = std.heap.FixedBufferAllocator.init(&fixed_buffer_mem);
+ fuzzRenderInner(src_buf[0..src_len :0], fba_ctx.allocator()) catch |e| return switch (e) {
+ error.OutOfMemory => {},
+ else => e,
+ };
+}
+
+fn fuzzRenderInner(source: [:0]const u8, fba: Allocator) !void {
+ @disableInstrumentation();
+
+ const src_toks = try parseTokens(fba, source);
+ const src_tree = try std.zig.Ast.parseTokens(fba, source, src_toks.toks.slice(), .zig);
+ if (src_tree.errors.len != 0)
+ return;
+ for (src_tree.nodes.items(.tag)) |tag| switch (tag) {
+ // #24507 (`switch(x) { inline for (a) |a| a => {} }` to
+ // `switch(x) { { inline for (a) |a| a => {} }` since
+ // AST determines inline case token as one before the case expression's first)
+ .switch_case_inline, .switch_case_inline_one => return error.SkipZigTest,
+ else => {},
+ };
+
+ var rendered_w: std.Io.Writer.Allocating = .init(fba);
+ try rendered_w.ensureUnusedCapacity(source.len + source.len / 2);
+ try src_tree.render(fba, &rendered_w.writer, .{});
+ // `toOwnedSliceSentinel` is not used since it reallocates the entire
+ // list to save space which is useless for fixed buffer allocators.
+ try rendered_w.writer.writeByte(0);
+ const rendered = rendered_w.written()[0 .. rendered_w.written().len - 1 :0];
+
+ // First check that the non-whitespace characters match. This ensures that
+ // identifier names, numbers, comments, et cetera are preserved.
+ if (!src_toks.maybe_rewritable and isRewritten(source, rendered))
+ return error.Rewritten;
+ // Next check that the tokens are the same since whitespace removal can change the tokens
+ const src_tags = src_toks.toks.items(.tag);
+ const rendered_toks = try reparseTokens(fba, rendered, src_tags[0 .. src_tags.len - 1 :.eof]);
+ if (!src_toks.maybe_rewritable and rendered_toks.rewritten)
+ return error.Rewritten;
+
+ // Rerender the tree to check idempotency and that new commas
+ // and whitespace changes did not create an AST error.
+ const rendered_tree = try std.zig.Ast.parseTokens(fba, rendered, rendered_toks.toks.slice(), .zig);
+ if (rendered_tree.errors.len != 0)
+ return error.Rewritten;
+ if (!src_toks.skip_idempotency) {
+ var rerendered_w: std.Io.Writer.Allocating = .init(fba);
+ try rerendered_w.ensureUnusedCapacity(source.len);
+ try rendered_tree.render(fba, &rerendered_w.writer, .{});
+ try std.testing.expectEqualStrings(rendered, rerendered_w.written());
+ }
+}