MinGW: remove dependency on a C preprocessor for MinGW .def.in files (#35679) - zig

commit 0b22111bc94f6fc53f0565c386487ea772ae996c (tree)
parent 4f72106c859e71cc47bf53241387271ec3203a43
Author: Arthur Teixeira <arthurcarvalhot@yahoo.com.br>
Date:   Thu, 18 Jun 2026 00:01:17 +0200

MinGW: remove dependency on a C preprocessor for MinGW .def.in files (#35679)

closes #31955

Reviewed-on: https://codeberg.org/ziglang/zig/pulls/35679
Reviewed-by: Ryan Liptak <squeek502@noreply.codeberg.org>

Diffstat:
M build.zig  | 20 ++++++++++++++++++++
M src/libs/mingw.zig  | 66 +++++++++++++++++++-----------------------------------------------
A src/libs/mingw/Preprocessor.zig  | 975 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/libs/mingw/Tokenizer.zig  | 365 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M test/standalone/build.zig  | 9 +++++++++
A tools/check_mingw.zig  | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

6 files changed, 1510 insertions(+), 47 deletions(-)
diff --git a/build.zig b/build.zig
@@ -649,6 +649,26 @@ pub fn build(b: *std.Build) !void {
         update_mingw_step.dependOn(&b.addFail("The -Dmingw-src=... option is required for this step").step);
     }
 
+    const check_mingw_step = b.step("check-mingw", "Checks for mingw preprocessor regressions");
+    const mingw_preprocessor_mod = b.createModule(.{
+        .root_source_file = b.path("src/libs/mingw/Preprocessor.zig"),
+        .target = target,
+    });
+
+    const check_mingw_exe = b.addExecutable(.{
+        .name = "check_mingw",
+        .root_module = b.createModule(.{
+            .target = b.graph.host,
+            .root_source_file = b.path("tools/check_mingw.zig"),
+            .imports = &.{
+                .{ .name = "preprocessor", .module = mingw_preprocessor_mod },
+            },
+        }),
+    });
+    const check_mingw_run = b.addRunArtifact(check_mingw_exe);
+    check_mingw_run.addDirectoryArg(b.path("lib/libc/mingw"));
+    check_mingw_step.dependOn(&check_mingw_run.step);
+
     const test_incremental_step = b.step("test-incremental", "Run the incremental compilation test cases");
     try tests.addIncrementalTests(b, test_incremental_step, test_filters);
     if (!skip_test_incremental) test_step.dependOn(test_incremental_step);
diff --git a/src/libs/mingw.zig b/src/libs/mingw.zig
@@ -14,9 +14,12 @@ const dev = @import("../dev.zig");
 const def = @import("mingw/def.zig");
 const implib = @import("mingw/implib.zig");
 
+const Preprocessor = @import("mingw/Preprocessor.zig");
+
 test {
     _ = def;
     _ = implib;
+    _ = Preprocessor;
 }
 
 pub const CrtFile = enum {
@@ -273,22 +276,6 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void {
     var o_dir = try comp.dirs.global_cache.handle.createDirPathOpen(io, o_sub_path, .{});
     defer o_dir.close(io);
 
-    const aro = @import("aro");
-    var diagnostics: aro.Diagnostics = .{
-        .output = .{ .to_list = .{ .arena = .init(gpa) } },
-    };
-    defer diagnostics.deinit();
-    var aro_comp = try aro.Compilation.init(.{
-        .gpa = gpa,
-        .arena = arena,
-        .io = io,
-        .diagnostics = &diagnostics,
-        .environ_map = null,
-    });
-    defer aro_comp.deinit();
-
-    aro_comp.target = .fromZigTarget(target.*);
-
     const include_dir = try comp.dirs.zig_lib.join(arena, &.{ "libc", "mingw", "def-include" });
 
     if (comp.verbose_cc) {
@@ -304,39 +291,24 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void {
         };
     }
 
-    try aro_comp.search_path.append(gpa, .{ .path = include_dir, .kind = .normal });
-
-    const builtin_macros = try aro_comp.generateBuiltinMacros(.include_system_defines);
-    const def_file_source = try aro_comp.addSourceFromPath(def_file_path);
-
-    var pp = try aro.Preprocessor.init(&aro_comp, .{ .base_file = .unused });
-    defer pp.deinit();
-    pp.linemarkers = .none;
-    pp.preserve_whitespace = true;
-
-    try pp.preprocessSources(.{ .main = def_file_source, .builtin = builtin_macros });
-
-    if (aro_comp.diagnostics.output.to_list.messages.items.len != 0) {
-        var buffer: [64]u8 = undefined;
-        const stderr = try io.lockStderr(&buffer, null);
-        defer io.unlockStderr();
-        for (aro_comp.diagnostics.output.to_list.messages.items) |msg| {
-            if (msg.kind == .@"fatal error" or msg.kind == .@"error") {
-                msg.write(stderr.terminal(), true) catch |err| switch (err) {
-                    error.WriteFailed => return stderr.file_writer.err.?,
-                    error.Canceled, error.Unexpected => |e| return e,
-                };
-                return error.AroPreprocessorFailed;
-            }
-        }
-    }
-
     const members = members: {
-        var aw: Io.Writer.Allocating = .init(gpa);
-        errdefer aw.deinit();
-        try pp.prettyPrintTokens(&aw.writer, .result_only);
+        const input = pp: {
+            var aw: Io.Writer.Allocating = .init(gpa);
+            errdefer aw.deinit();
+
+            var pp_arena = std.heap.ArenaAllocator.init(gpa);
+            defer pp_arena.deinit();
+            var pp: Preprocessor = .{
+                .io = io,
+                .arena = pp_arena.allocator(),
+                .include_dir = include_dir,
+                .target = target,
+            };
+            try pp.preprocess(def_file_path);
+            try pp.prettyPrintTokens(&aw.writer);
 
-        const input = try aw.toOwnedSliceSentinel(0);
+            break :pp try aw.toOwnedSliceSentinel(0);
+        };
         defer gpa.free(input);
 
         const machine_type = target.toCoffMachine();
diff --git a/src/libs/mingw/Preprocessor.zig b/src/libs/mingw/Preprocessor.zig
@@ -0,0 +1,975 @@
+const std = @import("std");
+const Tokenizer = @import("./Tokenizer.zig");
+const Allocator = std.mem.Allocator;
+const Token = Tokenizer.Token;
+const mem = std.mem;
+const assert = std.debug.assert;
+
+test {
+    _ = Tokenizer;
+}
+
+const TokenList = std.MultiArrayList(Token);
+const RawTokenList = std.ArrayList(Token);
+
+const ExpandBuf = std.ArrayList(Token);
+
+const Preprocessor = @This();
+const DefineMap = std.StringArrayHashMapUnmanaged(Macro);
+
+const GeneratedTokens = std.ArrayList(u8);
+
+const MacroArgument = []const Token;
+
+pub const Source = struct {
+    pub const generated: Source.Id = std.math.maxInt(usize);
+    pub const Id = usize;
+    id: Id = generated,
+    path: []const u8,
+    buf: []const u8,
+};
+
+sources: std.StringArrayHashMapUnmanaged(Source) = .empty,
+
+arena: Allocator,
+io: std.Io,
+include_dir: []const u8,
+
+top_expansion_buf: ExpandBuf = .empty,
+add_expansion_nl: usize = 0,
+token_buf: RawTokenList = .empty,
+generated_tokens: GeneratedTokens = .empty,
+generated_line: u32 = 1,
+defines: DefineMap = .empty,
+tokens: TokenList = .empty,
+target: *const std.Target,
+
+const Macro = struct {
+    param: []const u8,
+    tokens: []const Token,
+    is_func: bool,
+};
+
+const IfContext = struct {
+    const Backing = u2;
+    const Nesting = enum(Backing) {
+        until_else,
+        until_endif,
+        until_endif_seen_else,
+    };
+
+    const buf_size_bits = @bitSizeOf(Backing) * 256;
+    kind: [buf_size_bits / std.mem.byte_size_in_bits]u8,
+    level: u8,
+
+    fn get(self: *const IfContext) Nesting {
+        return @enumFromInt(std.mem.readPackedInt(Backing, &self.kind, @as(usize, self.level) * 2, .native));
+    }
+
+    fn set(self: *IfContext, context: Nesting) void {
+        std.mem.writePackedInt(Backing, &self.kind, @as(usize, self.level) * 2, @intFromEnum(context), .native);
+    }
+
+    fn increment(self: *IfContext) void {
+        self.level += 1;
+    }
+
+    fn decrement(self: *IfContext) void {
+        self.level -= 1;
+    }
+
+    const default: IfContext = .{ .kind = @splat(0xFF), .level = 0 };
+};
+
+fn addToken(pp: *Preprocessor, tok: Token) !void {
+    try pp.tokens.append(pp.arena, tok);
+}
+
+fn addTokenAssumeCapacity(pp: *Preprocessor, tok: Token) void {
+    pp.tokens.appendAssumeCapacity(tok);
+}
+
+fn defineBuiltins(pp: *Preprocessor) !void {
+    var buf: [5]u8 = undefined;
+    var val = std.fmt.bufPrint(&buf, "{d}", .{pp.target.cTypeBitSize(.longdouble)}) catch unreachable;
+    try pp.defineBuiltinValue("__SIZEOF_LONG_DOUBLE__", val, .pp_num);
+    val = std.fmt.bufPrint(&buf, "{d}", .{pp.target.cTypeBitSize(.double)}) catch unreachable;
+    try pp.defineBuiltinValue("__SIZEOF_DOUBLE__", val, .pp_num);
+
+    if (pp.target.abi.isGnu()) {
+        try pp.defineBuiltinValue("__cdecl", "__attribute__((__cdecl__))", .identifier);
+    }
+
+    const arch = switch (pp.target.cpu.arch) {
+        .aarch64 => "__aarch64__",
+        .x86 => "__i386__",
+        .x86_64 => "__x86_64__",
+        .arm, .thumb => "__arm__",
+        else => return error.ArchitectureNotSupported,
+    };
+    try pp.defineBuiltin(arch);
+}
+
+fn defineBuiltinValue(pp: *Preprocessor, name: []const u8, value: []const u8, id: Token.Id) !void {
+    const start = pp.generated_tokens.items.len;
+    try pp.generated_tokens.appendSlice(pp.arena, value);
+    const end = pp.generated_tokens.items.len;
+
+    const token_list = try pp.arena.alloc(Token, 1);
+    token_list[0] = .{ .source = Source.generated, .id = id, .start = @intCast(start), .end = @intCast(end) };
+    try pp.defines.putNoClobber(pp.arena, name, .{
+        .is_func = false,
+        .param = "",
+        .tokens = token_list,
+    });
+}
+
+fn defineBuiltin(pp: *Preprocessor, name: []const u8) !void {
+    return pp.defines.putNoClobber(pp.arena, name, .{
+        .tokens = &.{},
+        .param = "",
+        .is_func = false,
+    });
+}
+
+pub fn preprocess(pp: *Preprocessor, file_path: []const u8) !void {
+    const source = try pp.addSourceFromPath(file_path);
+    try pp.preprocessFile(source);
+}
+
+fn preprocessFile(pp: *Preprocessor, src: Source) !void {
+    try pp.defineBuiltins();
+    const eof = try pp.preprocessFileExtra(src);
+    try pp.addToken(eof);
+}
+
+fn preprocessFileExtra(pp: *Preprocessor, src: Source) !Token {
+    var tokenizer: Tokenizer = .init(src.buf, src.id);
+    var if_context: IfContext = .default;
+
+    while (true) {
+        var tok = tokenizer.next();
+        switch (tok.id) {
+            .hash => {
+                const directive = tokenizer.nextNoWS();
+                switch (directive.id) {
+                    .keyword_define => try pp.define(&tokenizer),
+                    .keyword_if => {
+                        if_context.increment();
+                        if (try pp.expr(&tokenizer)) {
+                            if_context.set(.until_endif);
+                        } else {
+                            if_context.set(.until_else);
+                            try pp.skip(&tokenizer, .until_else);
+                        }
+                    },
+                    .keyword_ifdef => {
+                        if_context.increment();
+                        const macro_name = pp.expectMacroName(&tokenizer);
+                        skipToNl(&tokenizer);
+                        if (pp.defines.get(macro_name) != null) {
+                            if_context.set(.until_endif);
+                        } else {
+                            if_context.set(.until_else);
+                            try pp.skip(&tokenizer, .until_else);
+                        }
+                    },
+                    .keyword_ifndef => {
+                        if_context.increment();
+                        const macro_name = pp.expectMacroName(&tokenizer);
+                        skipToNl(&tokenizer);
+                        if (pp.defines.get(macro_name) == null) {
+                            if_context.set(.until_endif);
+                        } else {
+                            if_context.set(.until_else);
+                            try pp.skip(&tokenizer, .until_else);
+                        }
+                    },
+                    .keyword_elif => {
+                        assert(if_context.level > 0);
+                        switch (if_context.get()) {
+                            .until_else => if (try pp.expr(&tokenizer)) {
+                                if_context.set(.until_endif);
+                            } else {
+                                try pp.skip(&tokenizer, .until_else);
+                            },
+                            .until_endif => try pp.skip(&tokenizer, .until_endif),
+                            .until_endif_seen_else => unreachable, //elif after endif
+                        }
+                    },
+                    .keyword_else => {
+                        skipToNl(&tokenizer);
+                        assert(if_context.level > 0);
+                        switch (if_context.get()) {
+                            .until_else => if_context.set(.until_endif_seen_else),
+                            .until_endif => try pp.skip(&tokenizer, .until_endif),
+                            .until_endif_seen_else => unreachable, // else after else
+                        }
+                    },
+                    .keyword_endif => {
+                        skipToNl(&tokenizer);
+                        assert(if_context.level > 0);
+                        if_context.decrement();
+                    },
+                    .keyword_undef => {
+                        const macro_name = tokenizer.nextNoWS();
+                        assert(macro_name.id == .identifier);
+                        pp.undefineMacro(macro_name);
+                        skipToNl(&tokenizer);
+                    },
+                    .keyword_include => {
+                        try pp.include(&tokenizer);
+                        continue;
+                    },
+                    .keyword_defined, .keyword_error => {},
+                    else => unreachable,
+                }
+                tok.id = .nl;
+                try pp.addToken(tok);
+            },
+            .whitespace, .nl => try pp.addToken(tok),
+            .eof => {
+                assert(if_context.level == 0);
+                return tok;
+            },
+            else => try pp.expandMacro(&tokenizer, tok),
+        }
+    }
+}
+
+fn include(pp: *Preprocessor, tokenizer: *Tokenizer) anyerror!void {
+    const first = tokenizer.nextNoWS();
+    const src = try findIncludeSource(pp, tokenizer, first);
+
+    _ = try pp.preprocessFileExtra(src);
+    if (pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
+        try pp.addToken(.{ .id = .nl, .source = Source.generated });
+    }
+}
+
+fn findIncludeSource(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    first: Token,
+) !Source {
+    const filename_tok = first;
+    skipToNl(tokenizer);
+    const tok_slice = pp.expandToken(filename_tok);
+    assert(tok_slice.len >= 3);
+    const filename = tok_slice[1 .. tok_slice.len - 1];
+    return (try pp.findInclude(filename, first)) orelse @panic("include not found");
+}
+
+fn expectMacroName(pp: *const Preprocessor, tokenizer: *Tokenizer) []const u8 {
+    const macro_name = tokenizer.nextNoWS();
+    assert(macro_name.id.isMacroIdentifier());
+    return pp.expandToken(macro_name);
+}
+
+fn skipToNl(tokenizer: *Tokenizer) void {
+    while (true) {
+        const tok = tokenizer.next();
+        if (tok.id == .nl or tok.id == .eof) return;
+        if (tok.id == .whitespace) continue;
+    }
+}
+
+fn define(pp: *Preprocessor, tokenizer: *Tokenizer) !void {
+    const macro_name = tokenizer.nextNoWS();
+    assert(macro_name.id == .identifier);
+
+    const first = tokenizer.nextNoWS();
+    switch (first.id) {
+        .nl, .eof => return pp.defineMacro(macro_name, .{
+            .is_func = false,
+            .tokens = &.{},
+            .param = "",
+        }),
+        .l_paren => return pp.defineFn(tokenizer, macro_name),
+        else => {},
+    }
+}
+
+fn defineMacro(pp: *Preprocessor, tok: Token, macro: Macro) !void {
+    const token_value = pp.expandToken(tok);
+    try pp.defines.putNoClobber(pp.arena, token_value, macro);
+}
+
+fn undefineMacro(pp: *Preprocessor, tok: Token) void {
+    const token_value = pp.expandToken(tok);
+    _ = pp.defines.orderedRemove(token_value);
+}
+
+fn defineFn(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    macro_name: Token,
+) !void {
+    var tok = tokenizer.nextNoWS();
+    assert(tok.id == .identifier);
+    const param = pp.expandToken(tok);
+    tok = tokenizer.nextNoWS();
+    assert(tok.id == .r_paren);
+
+    pp.token_buf.items.len = 0;
+    var need_ws = false;
+    while (true) {
+        tok = tokenizer.next();
+        switch (tok.id) {
+            .nl, .eof => break,
+            .whitespace => need_ws = pp.token_buf.items.len != 0,
+            .hash => unreachable,
+            .hash_hash => {
+                need_ws = false;
+                try pp.token_buf.append(pp.arena, tok);
+            },
+            else => {
+                if (need_ws) {
+                    need_ws = false;
+                    try pp.token_buf.append(pp.arena, .{ .id = .whitespace, .source = Source.generated });
+                }
+
+                if (tok.id.isMacroIdentifier()) {
+                    tok.id = .identifier;
+                    const s = pp.expandToken(tok);
+                    if (mem.eql(u8, param, s)) {
+                        tok.id = .macro_param;
+                        tok.end = 0;
+                    }
+                }
+                try pp.token_buf.append(pp.arena, tok);
+            },
+        }
+    }
+
+    const token_list = try pp.arena.dupe(Token, pp.token_buf.items);
+    try pp.defineMacro(macro_name, .{
+        .tokens = token_list,
+        .is_func = true,
+        .param = param,
+    });
+}
+
+fn expandToken(pp: *const Preprocessor, tok: Token) []const u8 {
+    return switch (tok.source) {
+        Source.generated => pp.generated_tokens.items,
+        else => blk: {
+            const src = pp.sources.values()[tok.source];
+            break :blk src.buf;
+        },
+    }[@intCast(tok.start)..@intCast(tok.end)];
+}
+
+fn skip(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    cont: IfContext.Nesting,
+) !void {
+    var ifs_seen: u32 = 0;
+    var line_start = true;
+    while (tokenizer.index < tokenizer.buf.len) {
+        if (line_start) {
+            const tokenizer_bkp = tokenizer.*;
+            const hash = tokenizer.nextNoWS();
+            if (hash.id == .nl) continue;
+            line_start = false;
+            if (hash.id != .hash) continue;
+            const directive = tokenizer.nextNoWS();
+            switch (directive.id) {
+                .keyword_else => {
+                    if (ifs_seen != 0) continue;
+                    assert(cont != .until_endif_seen_else); // else after else;
+                    tokenizer.* = tokenizer_bkp;
+                    return;
+                },
+                .keyword_elif => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    assert(cont != .until_endif_seen_else); // elif after else;
+                    tokenizer.* = tokenizer_bkp;
+                    return;
+                },
+                .keyword_endif => {
+                    if (ifs_seen == 0) {
+                        tokenizer.* = tokenizer_bkp;
+                        return;
+                    }
+                    ifs_seen -= 1;
+                },
+                .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
+                else => {},
+            }
+        } else if (tokenizer.buf[tokenizer.index] == '\n') {
+            line_start = true;
+            tokenizer.index += 1;
+            try pp.addToken(.{ .id = .nl, .source = Source.generated });
+        } else {
+            line_start = false;
+            tokenizer.index += 1;
+        }
+    }
+}
+
+fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
+    try pp.tokens.ensureUnusedCapacity(pp.arena, capacity);
+}
+
+fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) !bool {
+    const token_state = pp.tokens.len;
+    defer pp.tokens.len = token_state;
+
+    pp.top_expansion_buf.items.len = 0;
+    while (true) {
+        const tok = tokenizer.next();
+        switch (tok.id) {
+            .nl, .eof => break,
+            .whitespace => if (pp.top_expansion_buf.items.len == 0) continue,
+            else => {},
+        }
+        try pp.top_expansion_buf.append(pp.arena, tok);
+    } else unreachable;
+    if (pp.top_expansion_buf.items.len != 0) {
+        try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
+    }
+    try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
+    var i: usize = 0;
+    const items = pp.top_expansion_buf.items;
+    while (i < items.len) : (i += 1) {
+        var tok = items[i];
+        switch (tok.id) {
+            .string_literal,
+            .semicolon,
+            .hash_hash,
+            => unreachable,
+            .whitespace => continue,
+            else => if (tok.id == .keyword_defined) {
+                i += try pp.handleKeywordDefined(&tok, items[i + 1 ..]);
+            },
+        }
+        pp.addTokenAssumeCapacity(tok);
+    }
+
+    try pp.addToken(.{ .id = .eof, .source = Source.generated });
+    return pp.evalExpression(token_state);
+}
+
+fn handleKeywordDefined(
+    pp: *Preprocessor,
+    macro_tok: *Token,
+    tokens: []const Token,
+) !usize {
+    assert(macro_tok.id == .keyword_defined);
+    var it = TokenIterator.init(tokens);
+
+    _ = it.expectNoWS(.l_paren);
+    const second = it.expectNoWS(.identifier);
+    _ = it.expectNoWS(.r_paren);
+
+    macro_tok.id = if (pp.defines.contains(pp.expandToken(second))) .one else .zero;
+
+    return it.i;
+}
+
+const TokenIterator = struct {
+    toks: []const Token,
+    i: usize,
+
+    fn init(toks: []const Token) TokenIterator {
+        return .{ .toks = toks, .i = 0 };
+    }
+
+    fn nextNoWS(self: *TokenIterator) ?Token {
+        while (self.i < self.toks.len) : (self.i += 1) {
+            const tok = self.toks[self.i];
+            if (tok.id == .whitespace) continue;
+
+            self.i += 1;
+            return tok;
+        }
+        return null;
+    }
+
+    fn expectNext(self: *TokenIterator) Token {
+        assert(self.i < self.toks.len);
+        const t = self.toks[self.i];
+        self.i += 1;
+        return t;
+    }
+
+    fn expectNoWS(self: *TokenIterator, expected: Token.Id) Token {
+        if (self.nextNoWS()) |tok| {
+            if (tok.id != expected) {
+                std.debug.panic("expected token {any} but got {any}\n", .{ expected, tok.id });
+            }
+            return tok;
+        }
+        std.debug.panic("expected token {any} but got null\n", .{expected});
+    }
+};
+
+fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, tok: Token) !void {
+    if (!tok.id.isMacroIdentifier()) {
+        return pp.addToken(tok);
+    }
+    pp.top_expansion_buf.items.len = 0;
+    try pp.top_expansion_buf.append(pp.arena, tok);
+    try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
+    try pp.addTokensFromExpandBuf(pp.top_expansion_buf.items, .{ .id = .nl, .source = Source.generated });
+}
+
+fn addTokensFromExpandBuf(pp: *Preprocessor, tokens: []Token, tokenizer_nl: Token) !void {
+    try pp.ensureUnusedTokenCapacity(tokens.len);
+    for (tokens) |tok| {
+        pp.addTokenAssumeCapacity(tok);
+    }
+    try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl);
+    while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
+        pp.addTokenAssumeCapacity(tokenizer_nl);
+    }
+}
+
+const EvalContext = enum {
+    expr,
+    non_expr,
+};
+
+fn expandMacroExhaustive(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    buf: *ExpandBuf,
+    start_idx: usize,
+    end_idx: usize,
+    extend_buf: bool,
+    eval_ctx: EvalContext,
+) !void {
+    var moving_end_idx = end_idx;
+    var advance_index: usize = 0;
+    var do_rescan = true;
+    while (do_rescan) {
+        do_rescan = false;
+        var idx: usize = start_idx + advance_index;
+        while (idx < moving_end_idx) {
+            const macro_tok = buf.items[idx];
+            if (macro_tok.id == .keyword_defined and eval_ctx == .expr) {
+                idx += 1;
+                var it = TokenIterator.init(buf.items[idx..moving_end_idx]);
+                if (it.nextNoWS()) |tok| {
+                    switch (tok.id) {
+                        .l_paren => {
+                            _ = it.nextNoWS();
+                            _ = it.nextNoWS();
+                        },
+                        else => {},
+                    }
+                }
+                idx += it.i;
+                continue;
+            }
+            if (!macro_tok.id.isMacroIdentifier()) {
+                idx += 1;
+                continue;
+            }
+            const expanded = pp.expandToken(macro_tok);
+            const macro = pp.defines.getPtr(expanded) orelse {
+                idx += 1;
+                continue;
+            };
+
+            if (macro.is_func) {
+                var macro_scan_idx = idx;
+                const arg = try pp.collectMacroArgument(
+                    tokenizer,
+                    buf,
+                    &macro_scan_idx,
+                    &moving_end_idx,
+                    extend_buf,
+                );
+                const expanded_arg = arg: {
+                    var expand_buf: ExpandBuf = .empty;
+                    errdefer expand_buf.deinit(pp.arena);
+                    try expand_buf.appendSlice(pp.arena, arg);
+                    try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx);
+                    break :arg try expand_buf.toOwnedSlice(pp.arena);
+                };
+
+                const res = try pp.expandFuncMacro(macro, arg, expanded_arg);
+                const tokens_added = res.items.len;
+                const tokens_removed = macro_scan_idx - idx + 1;
+                try buf.replaceRange(pp.arena, idx, tokens_removed, res.items);
+
+                moving_end_idx += tokens_added;
+                moving_end_idx -|= tokens_removed;
+                idx += tokens_added;
+                do_rescan = true;
+            } else {
+                var res = try pp.expandObjMacro(macro);
+                defer res.deinit(pp.arena);
+                var increment_idx_by = res.items.len;
+
+                for (res.items, 0..) |*tok, i| {
+                    if (i < increment_idx_by and pp.defines.contains(pp.expandToken(tok.*))) {
+                        increment_idx_by = i;
+                    }
+                }
+                try buf.replaceRange(pp.arena, idx, 1, res.items);
+                idx += res.items.len;
+                moving_end_idx = moving_end_idx + res.items.len - 1;
+                do_rescan = true;
+            }
+            if (idx - start_idx == advance_index + 1 and !do_rescan) {
+                advance_index += 1;
+            }
+        }
+    }
+    buf.items.len = moving_end_idx;
+}
+
+fn collectMacroArgument(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    buf: *ExpandBuf,
+    start_idx: *usize,
+    end_idx: *usize,
+    extend_buf: bool,
+) !MacroArgument {
+    var parens: u32 = 0;
+    var argument: std.ArrayList(Token) = .empty;
+    defer argument.deinit(pp.arena);
+
+    while (true) {
+        const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
+        switch (tok.id) {
+            .nl, .whitespace => {},
+            .l_paren => break,
+            else => unreachable,
+        }
+    }
+
+    while (true) {
+        const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
+        switch (tok.id) {
+            .l_paren => {
+                try argument.append(pp.arena, tok);
+                parens += 1;
+            },
+            .r_paren => {
+                if (parens == 0) {
+                    return try argument.toOwnedSlice(pp.arena);
+                } else {
+                    try argument.append(pp.arena, tok);
+                    parens -= 1;
+                }
+            },
+            .nl, .whitespace => try argument.append(pp.arena, .{ .id = .whitespace, .source = Source.generated }),
+            .eof => unreachable,
+            else => try argument.append(pp.arena, tok),
+        }
+    }
+}
+
+fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) !ExpandBuf {
+    var buf: ExpandBuf = .empty;
+    errdefer buf.deinit(pp.arena);
+    try buf.appendSlice(pp.arena, simple_macro.tokens);
+    return buf;
+}
+
+fn expandFuncMacro(
+    pp: *Preprocessor,
+    func_macro: *const Macro,
+    arg: MacroArgument,
+    expanded_arg: MacroArgument,
+) !ExpandBuf {
+    var buf: ExpandBuf = .empty;
+    errdefer buf.deinit(pp.arena);
+    try buf.ensureTotalCapacity(pp.arena, func_macro.tokens.len);
+
+    var tok_i: usize = 0;
+    while (tok_i < func_macro.tokens.len) : (tok_i += 1) {
+        const tok = func_macro.tokens[tok_i];
+        switch (tok.id) {
+            .hash_hash => while (tok_i + 1 < func_macro.tokens.len) {
+                tok_i += 1;
+                const tok_next = func_macro.tokens[tok_i];
+                const next = switch (tok_next.id) {
+                    .whitespace => continue,
+                    .hash_hash => continue,
+                    .macro_param => arg,
+                    else => &[1]Token{tok_next},
+                };
+                try pp.pasteTokens(&buf, next);
+                if (next.len != 0) break;
+            },
+            .macro_param => {
+                try buf.appendSlice(pp.arena, expanded_arg);
+            },
+            else => try buf.append(pp.arena, tok),
+        }
+    }
+
+    return buf;
+}
+
+fn pasteTokens(
+    pp: *Preprocessor,
+    lhs_toks: *ExpandBuf,
+    rhs_toks: []const Token,
+) !void {
+    const lhs = while (lhs_toks.pop()) |lhs| {
+        if (lhs.id != .whitespace) break lhs;
+    } else {
+        return lhs_toks.appendSlice(pp.arena, rhs_toks);
+    };
+
+    var rhs_rest: u32 = 1;
+    const rhs = for (rhs_toks) |rhs| {
+        if (rhs.id != .whitespace) break rhs;
+        rhs_rest += 1;
+    } else {
+        return lhs_toks.appendAssumeCapacity(lhs);
+    };
+
+    const start = pp.generated_tokens.items.len;
+    const end = start + pp.expandToken(lhs).len + pp.expandToken(rhs).len;
+    try pp.generated_tokens.ensureTotalCapacity(pp.arena, end + 1);
+    pp.generated_tokens.appendSliceAssumeCapacity(pp.expandToken(lhs));
+    pp.generated_tokens.appendSliceAssumeCapacity(pp.expandToken(rhs));
+    pp.generated_tokens.appendAssumeCapacity('\n');
+
+    var tmp_tokenizer: Tokenizer = .{
+        .index = @intCast(start),
+        .buf = pp.generated_tokens.items,
+        .source = Source.generated,
+    };
+    const pasted_token = tmp_tokenizer.nextNoWS();
+    const next = tmp_tokenizer.nextNoWS();
+
+    try lhs_toks.append(pp.arena, pp.makeGeneratedToken(start, end, pasted_token.id));
+    assert(next.id == .nl or next.id == .eof);
+
+    return lhs_toks.appendSlice(pp.arena, rhs_toks[rhs_rest..]);
+}
+
+fn nextBufToken(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    buf: *ExpandBuf,
+    start_idx: *usize,
+    end_idx: *usize,
+    extend_buf: bool,
+) !Token {
+    start_idx.* += 1;
+    if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) {
+        if (extend_buf) {
+            const tok = tokenizer.next();
+            if (tok.id == .nl) pp.add_expansion_nl += 1;
+
+            end_idx.* += 1;
+            try buf.append(pp.arena, tok);
+            return tok;
+        }
+        return .{ .id = .eof, .source = Source.generated };
+    }
+
+    return buf.items[start_idx.*];
+}
+
+fn makeGeneratedToken(
+    pp: *Preprocessor,
+    start: usize,
+    end: usize,
+    id: Token.Id,
+) Token {
+    const pasted_token: Token = .{
+        .id = id,
+        .source = Source.generated,
+        .start = @intCast(start),
+        .end = @intCast(end),
+    };
+    pp.generated_line += 1;
+    return pasted_token;
+}
+
+fn findInclude(
+    pp: *Preprocessor,
+    filename: []const u8,
+    includer_token: Token,
+) !?Source {
+    const other_file = pp.sources.values()[includer_token.source].path;
+    const dir = std.fs.path.dirname(other_file) orelse ".";
+    if (try pp.checkIncludeDir(filename, dir)) |res| return res;
+
+    return pp.checkIncludeDir(filename, pp.include_dir);
+}
+
+fn checkIncludeDir(
+    pp: *Preprocessor,
+    include_path: []const u8,
+    include_dir: []const u8,
+) !?Source {
+    const format = "{s}{c}{s}";
+    var bfa_buf: [1024]u8 = undefined;
+    var bfa_state: std.heap.BufferFirstAllocator = .init(&bfa_buf, pp.arena);
+    const bfa = bfa_state.allocator();
+    const header_path = try std.fmt.allocPrint(bfa, format, .{
+        include_dir,
+        std.fs.path.sep,
+        include_path,
+    });
+    defer bfa.free(header_path);
+
+    return pp.addSourceFromPath(header_path) catch |err| switch (err) {
+        error.OutOfMemory => |e| return e,
+        else => return null,
+    };
+}
+
+pub fn addSourceFromPath(pp: *Preprocessor, path: []const u8) !Source {
+    if (pp.sources.get(path)) |src| return src;
+    try pp.sources.ensureUnusedCapacity(pp.arena, 1);
+
+    const contents = try std.Io.Dir.cwd().readFileAlloc(pp.io, path, pp.arena, .limited(std.math.maxInt(u32)));
+    const duped_path = try pp.arena.dupe(u8, path);
+
+    const src: Source = .{
+        .buf = contents,
+        .path = duped_path,
+        .id = pp.sources.count(),
+    };
+
+    pp.sources.putAssumeCapacityNoClobber(duped_path, src);
+    return src;
+}
+
+fn evalExpression(
+    pp: *Preprocessor,
+    start: usize,
+) !bool {
+    const s = pp.tokens.slice();
+    const len = s.len - start;
+    const ss = s.subslice(start, len);
+
+    const ids: []Token.Id = ss.items(.id);
+    const starts: []u32 = ss.items(.start);
+    const ends: []u32 = ss.items(.end);
+    const srcs: []usize = ss.items(.source);
+
+    var toks = try pp.arena.alloc(Token, len);
+    defer pp.arena.free(toks);
+
+    for (0..len) |i| {
+        toks[i] = .{
+            .id = ids[i],
+            .source = srcs[i],
+            .start = starts[i],
+            .end = ends[i],
+        };
+    }
+
+    return pp.evaluateExpressionTokens(toks);
+}
+
+fn evaluateExpressionTokens(
+    pp: *const Preprocessor,
+    toks: []const Token,
+) bool {
+    var it = TokenIterator.init(toks);
+
+    const left = evalToken(&it);
+    assert(!left.id.isInfix());
+
+    const op = evalToken(&it);
+    if (op.id == .eof) return left.id == .one;
+
+    assert(op.id.isInfix());
+    const right = evalToken(&it);
+
+    return pp.evalInfix(left, op, right);
+}
+
+fn evalToken(it: *TokenIterator) Token {
+    const tok = it.expectNext();
+    if (tok.id != .bang) {
+        return tok;
+    }
+
+    var op = it.expectNext();
+    const flipped: Token.Id = switch (op.id) {
+        .one => .zero,
+        .zero => .one,
+        else => unreachable,
+    };
+    op.id = flipped;
+    return op;
+}
+
+fn evalInfix(
+    pp: *const Preprocessor,
+    left: Token,
+    op: Token,
+    right: Token,
+) bool {
+    switch (op.id) {
+        .pipe_pipe => return (left.id == .one) or (right.id == .one),
+        .equal_equal => {
+            switch (left.id) {
+                .one, .zero => {
+                    assert(right.id == .one or right.id == .zero);
+                    return left.id == right.id;
+                },
+                .pp_num => {
+                    assert(right.id == .pp_num);
+                    const lval = pp.expandToken(left);
+                    const rval = pp.expandToken(right);
+                    return std.mem.eql(u8, lval, rval);
+                },
+                else => unreachable,
+            }
+        },
+        else => unreachable,
+    }
+}
+
+pub fn prettyPrintTokens(pp: *Preprocessor, w: *std.Io.Writer) !void {
+    const tok_ids = pp.tokens.items(.id);
+    var i: usize = 0;
+    var last_nl = true;
+    outer: while (true) : (i += 1) {
+        const cur: Token = pp.tokens.get(i);
+        switch (cur.id) {
+            .eof => {
+                if (!last_nl) try w.writeByte('\n');
+                try w.flush();
+                return;
+            },
+            .nl => {
+                var newlines: u32 = 0;
+                for (tok_ids[i..], i..) |id, j| {
+                    if (id == .nl) {
+                        newlines += 1;
+                    } else if (id == .eof) {
+                        if (!last_nl) try w.writeByte('\n');
+                        try w.flush();
+                        return;
+                    } else if (id != .whitespace) {
+                        if (newlines < 2) break;
+
+                        i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
+                        if (!last_nl) try w.writeAll("\n");
+                        continue :outer;
+                    }
+                }
+                last_nl = true;
+                try w.writeAll("\n");
+            },
+            .whitespace => {
+                try w.writeByte(' ');
+                last_nl = false;
+            },
+            else => {
+                const slice = pp.expandToken(cur);
+                try w.writeAll(slice);
+                last_nl = false;
+            },
+        }
+    }
+}
diff --git a/src/libs/mingw/Tokenizer.zig b/src/libs/mingw/Tokenizer.zig
@@ -0,0 +1,365 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const Source = @import("Preprocessor.zig").Source;
+
+const Tokenizer = @This();
+
+pub fn init(buf: []const u8, source: Source.Id) Tokenizer {
+    return .{ .buf = buf, .source = source };
+}
+
+buf: []const u8,
+index: u32 = 0,
+source: Source.Id,
+
+pub const Token = struct {
+    pub const Id = enum {
+        bang,
+        eof,
+        equal_equal,
+        hash,
+        hash_hash,
+        macro_param,
+        identifier,
+        keyword_if,
+        keyword_ifndef,
+        keyword_ifdef,
+        keyword_define,
+        keyword_endif,
+        keyword_defined,
+        keyword_include,
+        keyword_elif,
+        keyword_else,
+        keyword_undef,
+        keyword_error,
+        l_paren,
+        nl,
+        pp_num,
+        pipe_pipe,
+        r_paren,
+        semicolon,
+        string_literal,
+        whitespace,
+        one,
+        zero,
+
+        pub fn isInfix(id: Id) bool {
+            switch (id) {
+                .pipe_pipe, .equal_equal => return true,
+                else => return false,
+            }
+        }
+
+        pub fn isMacroIdentifier(id: Id) bool {
+            switch (id) {
+                .keyword_if,
+                .keyword_ifndef,
+                .keyword_ifdef,
+                .keyword_define,
+                .keyword_endif,
+                .keyword_defined,
+                .keyword_include,
+                .keyword_elif,
+                .keyword_else,
+                .keyword_undef,
+                .keyword_error,
+                .identifier,
+                => return true,
+                else => return false,
+            }
+        }
+    };
+
+    const all_kws = std.StaticStringMap(Id).initComptime(.{
+        .{ "define", .keyword_define },
+        .{ "defined", .keyword_defined },
+        .{ "else", .keyword_else },
+        .{ "endif", .keyword_endif },
+        .{ "if", .keyword_if },
+        .{ "elif", .keyword_elif },
+        .{ "ifdef", .keyword_ifdef },
+        .{ "ifndef", .keyword_ifndef },
+        .{ "include", .keyword_include },
+        .{ "undef", .keyword_undef },
+        .{ "error", .keyword_error },
+    });
+
+    id: Id,
+    source: Source.Id,
+    start: u32 = 0,
+    end: u32 = 0,
+
+    fn getTokenId(str: []const u8) Id {
+        return all_kws.get(str) orelse .identifier;
+    }
+};
+
+pub fn next(self: *Tokenizer) Token {
+    var state: enum {
+        start,
+        cr,
+        string_literal,
+        identifier,
+        equal,
+        slash,
+        line_comment,
+        hash,
+        pipe,
+        pp_num,
+    } = .start;
+
+    const start = self.index;
+    var id: Token.Id = .eof;
+
+    while (self.index < self.buf.len) : (self.index += 1) {
+        const c = self.buf[self.index];
+        switch (state) {
+            .start => switch (c) {
+                '\r' => {
+                    id = .nl;
+                    state = .cr;
+                },
+                '\n' => {
+                    id = .nl;
+                    self.index += 1;
+                    break;
+                },
+                '!' => {
+                    id = .bang;
+                    self.index += 1;
+                    break;
+                },
+                '"' => {
+                    id = .string_literal;
+                    state = .string_literal;
+                },
+                '|' => state = .pipe,
+                '=' => state = .equal,
+                '(' => {
+                    id = .l_paren;
+                    self.index += 1;
+                    break;
+                },
+                ')' => {
+                    id = .r_paren;
+                    self.index += 1;
+                    break;
+                },
+                ';' => {
+                    id = .semicolon;
+                    self.index += 1;
+                    break;
+                },
+                '/' => state = .slash,
+                '#' => state = .hash,
+                '0'...'9' => state = .pp_num,
+                ' ' => {
+                    id = .whitespace;
+                    self.index += 1;
+                    break;
+                },
+                else => state = .identifier,
+            },
+            .cr => switch (c) {
+                '\n' => {
+                    self.index += 1;
+                    break;
+                },
+                else => break,
+            },
+            .pipe => switch (c) {
+                '|' => {
+                    id = .pipe_pipe;
+                    self.index += 1;
+                    break;
+                },
+                else => unreachable,
+            },
+            .hash => switch (c) {
+                '#' => {
+                    id = .hash_hash;
+                    self.index += 1;
+                    break;
+                },
+                else => {
+                    id = .hash;
+                    break;
+                },
+            },
+            .string_literal => switch (c) {
+                '"' => {
+                    self.index += 1;
+                    break;
+                },
+                else => {},
+            },
+            .identifier => switch (c) {
+                'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
+                else => {
+                    id = Token.getTokenId(self.buf[start..self.index]);
+                    break;
+                },
+            },
+            .equal => switch (c) {
+                '=' => {
+                    id = .equal_equal;
+                    self.index += 1;
+                    break;
+                },
+                else => unreachable,
+            },
+            .slash => switch (c) {
+                '/' => state = .line_comment,
+                else => {
+                    id = .identifier;
+                    break;
+                },
+            },
+            .line_comment => switch (c) {
+                '\n' => {
+                    self.index -= 1;
+                    state = .start;
+                },
+                else => {},
+            },
+            .pp_num => switch (c) {
+                '0'...'9' => {},
+                else => {
+                    id = .pp_num;
+                    break;
+                },
+            },
+        }
+    } else if (self.index == self.buf.len) {
+        switch (state) {
+            .start, .line_comment, .cr => {},
+            .identifier => id = Token.getTokenId(self.buf[start..self.index]),
+            .hash => id = .hash,
+            .pp_num => id = .pp_num,
+            else => unreachable,
+        }
+    }
+
+    return .{
+        .id = id,
+        .start = start,
+        .end = self.index,
+        .source = self.source,
+    };
+}
+
+pub fn nextNoWS(self: *Tokenizer) Token {
+    var tok = self.next();
+    while (tok.id == .whitespace) tok = self.next();
+    return tok;
+}
+
+pub fn nextNoWSComments(self: *Tokenizer) Token {
+    var tok = self.next();
+    while (tok.id == .whitespace) tok = self.next();
+    return tok;
+}
+
+fn expectToken(expected: Token.Id, actual: Token) !void {
+    try std.testing.expectEqual(expected, actual.id);
+}
+
+fn testToken(buf: []const u8, expected: Token.Id) !void {
+    var tokenizer = Tokenizer.init(buf, Source.generated);
+    const t = tokenizer.next();
+    try expectToken(expected, t);
+    try expectToken(.eof, tokenizer.next());
+}
+
+test "tokens" {
+    try testToken("TEST", .identifier);
+    try testToken("__x86_64__", .identifier);
+    try testToken("122", .pp_num);
+    try testToken("==", .equal_equal);
+    try testToken("#", .hash);
+    try testToken("##", .hash_hash);
+    try testToken("undef", .keyword_undef);
+    try testToken("||", .pipe_pipe);
+    try testToken("!", .bang);
+    try testToken("else", .keyword_else);
+    try testToken("endif", .keyword_endif);
+    try testToken("include", .keyword_include);
+    try testToken("define", .keyword_define);
+    try testToken("defined", .keyword_defined);
+    try testToken("if", .keyword_if);
+    try testToken("ifdef", .keyword_ifdef);
+    try testToken("ifndef", .keyword_ifndef);
+    try testToken("(", .l_paren);
+    try testToken("\n", .nl);
+    try testToken("\r", .nl);
+    try testToken("\r\n", .nl);
+    try testToken("5", .pp_num);
+    try testToken(")", .r_paren);
+    try testToken("\"str\"", .string_literal);
+    try testToken(" ", .whitespace);
+}
+
+fn expectTokens(contents: []const u8, expected_tokens: []const Token.Id) !void {
+    var tokenizer: Tokenizer = .init(contents, Source.generated);
+    var i: usize = 0;
+    while (i < expected_tokens.len) {
+        const token = tokenizer.next();
+        if (token.id == .whitespace) continue;
+        const expected_token_id = expected_tokens[i];
+        i += 1;
+        if (!std.meta.eql(token.id, expected_token_id)) {
+            std.debug.print("expected {s}, found {s}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
+            return error.TokensDoNotEqual;
+        }
+    }
+    const last_token = tokenizer.next();
+    try std.testing.expect(last_token.id == .eof);
+}
+
+test "preprocessor keywords" {
+    try expectTokens(
+        \\#if
+        \\#ifndef
+        \\#ifdef
+        \\#define
+        \\#endif
+        \\defined
+        \\#include
+        \\#elif
+        \\#else
+        \\#undef
+        \\#error
+    , &.{
+        .hash,
+        .keyword_if,
+        .nl,
+        .hash,
+        .keyword_ifndef,
+        .nl,
+        .hash,
+        .keyword_ifdef,
+        .nl,
+        .hash,
+        .keyword_define,
+        .nl,
+        .hash,
+        .keyword_endif,
+        .nl,
+        .keyword_defined,
+        .nl,
+        .hash,
+        .keyword_include,
+        .nl,
+        .hash,
+        .keyword_elif,
+        .nl,
+        .hash,
+        .keyword_else,
+        .nl,
+        .hash,
+        .keyword_undef,
+        .nl,
+        .hash,
+        .keyword_error,
+    });
+}
diff --git a/test/standalone/build.zig b/test/standalone/build.zig
@@ -31,6 +31,7 @@ pub fn build(b: *std.Build) void {
     const tools_target = b.resolveTargetQuery(.{});
     for ([_][]const u8{
         // Alphabetically sorted. No need to build `tools/spirv/grammar.zig`.
+        "../../tools/check_mingw.zig",
         "../../tools/dump-cov.zig",
         "../../tools/fetch_them_macos_headers.zig",
         "../../tools/gen_macos_headers_c.zig",
@@ -61,6 +62,14 @@ pub fn build(b: *std.Build) void {
                 .target = tools_target,
             }),
         });
+        if (std.mem.endsWith(u8, tool_src_path, "check_mingw.zig")) {
+            const mingw_preprocessor_mod = b.createModule(.{
+                .root_source_file = b.path("../../src/libs/mingw/Preprocessor.zig"),
+                .target = tools_target,
+            });
+            tool.root_module.addImport("preprocessor", mingw_preprocessor_mod);
+        }
+
         tools_tests_step.dependOn(&tool.step);
     }
     for ([_][]const u8{
diff --git a/tools/check_mingw.zig b/tools/check_mingw.zig
@@ -0,0 +1,122 @@
+const std = @import("std");
+const Io = std.Io;
+const Dir = Io.Dir;
+const Preprocessor = @import("preprocessor");
+
+pub fn main(init: std.process.Init) !void {
+    const arena = init.arena.allocator();
+    const io = init.io;
+    const args = try init.minimal.args.toSlice(arena);
+
+    const zig_src_mingw_lib_path = args[1];
+
+    const mingw_include_path = try Dir.path.join(arena, &.{
+        zig_src_mingw_lib_path, "def-include",
+    });
+    const mingw_libcommon_path = try Dir.path.join(arena, &.{
+        zig_src_mingw_lib_path, "lib-common",
+    });
+
+    var mingw_libcommon_dir = Dir.cwd().openDir(io, mingw_libcommon_path, .{ .iterate = true }) catch |err| {
+        std.log.err("unable to open directory {s}: {t}", .{ mingw_libcommon_path, err });
+        std.process.exit(1);
+    };
+    defer mingw_libcommon_dir.close(io);
+
+    var walker = try mingw_libcommon_dir.walk(arena);
+    defer walker.deinit();
+
+    while (try walker.next(io)) |entry| {
+        if (entry.kind != .file) continue;
+
+        var fail = false;
+        for (&targets) |*target| {
+            var target_arena: std.heap.ArenaAllocator = .init(init.gpa);
+            defer target_arena.deinit();
+
+            const pp_arena = target_arena.allocator();
+            const file_path = try Dir.path.join(pp_arena, &.{ mingw_libcommon_path, entry.path });
+
+            const aro = pp: {
+                const target_triple = try target.zigTriple(pp_arena);
+                const target_arg = try std.fmt.allocPrint(pp_arena, "--target={s}", .{target_triple});
+                const result = std.process.run(pp_arena, io, .{
+                    .argv = &.{
+                        "arocc",
+                        "-E",
+                        target_arg,
+                        "--no-line-commands",
+                        "-nostdinc",
+                        "-I",
+                        mingw_include_path,
+                        file_path,
+                    },
+                }) catch |err| {
+                    std.log.err("unable to execute arocc: {t}", .{err});
+                    std.process.exit(1);
+                };
+                if (result.term.exited != 0) {
+                    std.log.err("error executing arocc: {s}", .{result.stderr});
+                    std.process.exit(result.term.exited);
+                }
+                break :pp result.stdout;
+            };
+
+            const native = pp: {
+                var aw: Io.Writer.Allocating = .init(pp_arena);
+                errdefer aw.deinit();
+
+                var pp: Preprocessor = .{
+                    .io = io,
+                    .arena = pp_arena,
+                    .include_dir = mingw_include_path,
+                    .target = target,
+                };
+
+                pp.preprocess(file_path) catch |err| {
+                    std.log.err("error preprocessing file {s} for target {t}: {t}", .{ entry.path, target.cpu.arch, err });
+                    fail = true;
+                    continue;
+                };
+                pp.prettyPrintTokens(&aw.writer) catch |err| {
+                    std.log.err("error printing tokens for file {s} for target {t}: {t}", .{ entry.path, target.cpu.arch, err });
+                    fail = true;
+                    continue;
+                };
+
+                break :pp try aw.toOwnedSliceSentinel(0);
+            };
+
+            try std.testing.expectEqualStrings(aro, native);
+        }
+
+        if (fail) std.process.exit(1);
+    }
+}
+
+const targets = [_]std.Target{
+    .{
+        .ofmt = .coff,
+        .abi = .gnu,
+        .os = .{ .tag = .windows, .version_range = .default(.thumb, .windows, .gnu) },
+        .cpu = .{ .arch = .thumb, .model = .generic(.thumb), .features = .empty },
+    },
+    .{
+        .ofmt = .coff,
+        .abi = .gnu,
+        .os = .{ .tag = .windows, .version_range = .default(.aarch64, .windows, .gnu) },
+        .cpu = .{ .arch = .aarch64, .model = .generic(.aarch64), .features = .empty },
+    },
+    .{
+        .ofmt = .coff,
+        .abi = .gnu,
+        .os = .{ .tag = .windows, .version_range = .default(.x86, .windows, .gnu) },
+        .cpu = .{ .arch = .x86, .model = .generic(.x86), .features = .empty },
+    },
+    .{
+        .ofmt = .coff,
+        .abi = .gnu,
+        .os = .{ .tag = .windows, .version_range = .default(.x86_64, .windows, .gnu) },
+        .cpu = .{ .arch = .x86_64, .model = .generic(.x86_64), .features = .empty },
+    },
+};

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	build.zig	\|	20	++++++++++++++++++++
M	src/libs/mingw.zig	\|	66	+++++++++++++++++++-----------------------------------------------
A	src/libs/mingw/Preprocessor.zig	\|	975	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/libs/mingw/Tokenizer.zig	\|	365	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	test/standalone/build.zig	\|	9	+++++++++
A	tools/check_mingw.zig	\|	122	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++