zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 0b22111bc94f6fc53f0565c386487ea772ae996c (tree)
parent 4f72106c859e71cc47bf53241387271ec3203a43
Author: Arthur Teixeira <arthurcarvalhot@yahoo.com.br>
Date:   Thu, 18 Jun 2026 00:01:17 +0200

MinGW: remove dependency on a C preprocessor for MinGW .def.in files (#35679)

closes #31955

Reviewed-on: https://codeberg.org/ziglang/zig/pulls/35679
Reviewed-by: Ryan Liptak <squeek502@noreply.codeberg.org>

Diffstat:
Mbuild.zig | 20++++++++++++++++++++
Msrc/libs/mingw.zig | 66+++++++++++++++++++-----------------------------------------------
Asrc/libs/mingw/Preprocessor.zig | 975+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/libs/mingw/Tokenizer.zig | 365+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/standalone/build.zig | 9+++++++++
Atools/check_mingw.zig | 122+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 1510 insertions(+), 47 deletions(-)

diff --git a/build.zig b/build.zig @@ -649,6 +649,26 @@ pub fn build(b: *std.Build) !void { update_mingw_step.dependOn(&b.addFail("The -Dmingw-src=... option is required for this step").step); } + const check_mingw_step = b.step("check-mingw", "Checks for mingw preprocessor regressions"); + const mingw_preprocessor_mod = b.createModule(.{ + .root_source_file = b.path("src/libs/mingw/Preprocessor.zig"), + .target = target, + }); + + const check_mingw_exe = b.addExecutable(.{ + .name = "check_mingw", + .root_module = b.createModule(.{ + .target = b.graph.host, + .root_source_file = b.path("tools/check_mingw.zig"), + .imports = &.{ + .{ .name = "preprocessor", .module = mingw_preprocessor_mod }, + }, + }), + }); + const check_mingw_run = b.addRunArtifact(check_mingw_exe); + check_mingw_run.addDirectoryArg(b.path("lib/libc/mingw")); + check_mingw_step.dependOn(&check_mingw_run.step); + const test_incremental_step = b.step("test-incremental", "Run the incremental compilation test cases"); try tests.addIncrementalTests(b, test_incremental_step, test_filters); if (!skip_test_incremental) test_step.dependOn(test_incremental_step); diff --git a/src/libs/mingw.zig b/src/libs/mingw.zig @@ -14,9 +14,12 @@ const dev = @import("../dev.zig"); const def = @import("mingw/def.zig"); const implib = @import("mingw/implib.zig"); +const Preprocessor = @import("mingw/Preprocessor.zig"); + test { _ = def; _ = implib; + _ = Preprocessor; } pub const CrtFile = enum { @@ -273,22 +276,6 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void { var o_dir = try comp.dirs.global_cache.handle.createDirPathOpen(io, o_sub_path, .{}); defer o_dir.close(io); - const aro = @import("aro"); - var diagnostics: aro.Diagnostics = .{ - .output = .{ .to_list = .{ .arena = .init(gpa) } }, - }; - defer diagnostics.deinit(); - var aro_comp = try aro.Compilation.init(.{ - .gpa = gpa, - .arena = arena, - .io = io, - .diagnostics = &diagnostics, - .environ_map = null, - }); - defer aro_comp.deinit(); - - aro_comp.target = .fromZigTarget(target.*); - const include_dir = try comp.dirs.zig_lib.join(arena, &.{ "libc", "mingw", "def-include" }); if (comp.verbose_cc) { @@ -304,39 +291,24 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void { }; } - try aro_comp.search_path.append(gpa, .{ .path = include_dir, .kind = .normal }); - - const builtin_macros = try aro_comp.generateBuiltinMacros(.include_system_defines); - const def_file_source = try aro_comp.addSourceFromPath(def_file_path); - - var pp = try aro.Preprocessor.init(&aro_comp, .{ .base_file = .unused }); - defer pp.deinit(); - pp.linemarkers = .none; - pp.preserve_whitespace = true; - - try pp.preprocessSources(.{ .main = def_file_source, .builtin = builtin_macros }); - - if (aro_comp.diagnostics.output.to_list.messages.items.len != 0) { - var buffer: [64]u8 = undefined; - const stderr = try io.lockStderr(&buffer, null); - defer io.unlockStderr(); - for (aro_comp.diagnostics.output.to_list.messages.items) |msg| { - if (msg.kind == .@"fatal error" or msg.kind == .@"error") { - msg.write(stderr.terminal(), true) catch |err| switch (err) { - error.WriteFailed => return stderr.file_writer.err.?, - error.Canceled, error.Unexpected => |e| return e, - }; - return error.AroPreprocessorFailed; - } - } - } - const members = members: { - var aw: Io.Writer.Allocating = .init(gpa); - errdefer aw.deinit(); - try pp.prettyPrintTokens(&aw.writer, .result_only); + const input = pp: { + var aw: Io.Writer.Allocating = .init(gpa); + errdefer aw.deinit(); + + var pp_arena = std.heap.ArenaAllocator.init(gpa); + defer pp_arena.deinit(); + var pp: Preprocessor = .{ + .io = io, + .arena = pp_arena.allocator(), + .include_dir = include_dir, + .target = target, + }; + try pp.preprocess(def_file_path); + try pp.prettyPrintTokens(&aw.writer); - const input = try aw.toOwnedSliceSentinel(0); + break :pp try aw.toOwnedSliceSentinel(0); + }; defer gpa.free(input); const machine_type = target.toCoffMachine(); diff --git a/src/libs/mingw/Preprocessor.zig b/src/libs/mingw/Preprocessor.zig @@ -0,0 +1,975 @@ +const std = @import("std"); +const Tokenizer = @import("./Tokenizer.zig"); +const Allocator = std.mem.Allocator; +const Token = Tokenizer.Token; +const mem = std.mem; +const assert = std.debug.assert; + +test { + _ = Tokenizer; +} + +const TokenList = std.MultiArrayList(Token); +const RawTokenList = std.ArrayList(Token); + +const ExpandBuf = std.ArrayList(Token); + +const Preprocessor = @This(); +const DefineMap = std.StringArrayHashMapUnmanaged(Macro); + +const GeneratedTokens = std.ArrayList(u8); + +const MacroArgument = []const Token; + +pub const Source = struct { + pub const generated: Source.Id = std.math.maxInt(usize); + pub const Id = usize; + id: Id = generated, + path: []const u8, + buf: []const u8, +}; + +sources: std.StringArrayHashMapUnmanaged(Source) = .empty, + +arena: Allocator, +io: std.Io, +include_dir: []const u8, + +top_expansion_buf: ExpandBuf = .empty, +add_expansion_nl: usize = 0, +token_buf: RawTokenList = .empty, +generated_tokens: GeneratedTokens = .empty, +generated_line: u32 = 1, +defines: DefineMap = .empty, +tokens: TokenList = .empty, +target: *const std.Target, + +const Macro = struct { + param: []const u8, + tokens: []const Token, + is_func: bool, +}; + +const IfContext = struct { + const Backing = u2; + const Nesting = enum(Backing) { + until_else, + until_endif, + until_endif_seen_else, + }; + + const buf_size_bits = @bitSizeOf(Backing) * 256; + kind: [buf_size_bits / std.mem.byte_size_in_bits]u8, + level: u8, + + fn get(self: *const IfContext) Nesting { + return @enumFromInt(std.mem.readPackedInt(Backing, &self.kind, @as(usize, self.level) * 2, .native)); + } + + fn set(self: *IfContext, context: Nesting) void { + std.mem.writePackedInt(Backing, &self.kind, @as(usize, self.level) * 2, @intFromEnum(context), .native); + } + + fn increment(self: *IfContext) void { + self.level += 1; + } + + fn decrement(self: *IfContext) void { + self.level -= 1; + } + + const default: IfContext = .{ .kind = @splat(0xFF), .level = 0 }; +}; + +fn addToken(pp: *Preprocessor, tok: Token) !void { + try pp.tokens.append(pp.arena, tok); +} + +fn addTokenAssumeCapacity(pp: *Preprocessor, tok: Token) void { + pp.tokens.appendAssumeCapacity(tok); +} + +fn defineBuiltins(pp: *Preprocessor) !void { + var buf: [5]u8 = undefined; + var val = std.fmt.bufPrint(&buf, "{d}", .{pp.target.cTypeBitSize(.longdouble)}) catch unreachable; + try pp.defineBuiltinValue("__SIZEOF_LONG_DOUBLE__", val, .pp_num); + val = std.fmt.bufPrint(&buf, "{d}", .{pp.target.cTypeBitSize(.double)}) catch unreachable; + try pp.defineBuiltinValue("__SIZEOF_DOUBLE__", val, .pp_num); + + if (pp.target.abi.isGnu()) { + try pp.defineBuiltinValue("__cdecl", "__attribute__((__cdecl__))", .identifier); + } + + const arch = switch (pp.target.cpu.arch) { + .aarch64 => "__aarch64__", + .x86 => "__i386__", + .x86_64 => "__x86_64__", + .arm, .thumb => "__arm__", + else => return error.ArchitectureNotSupported, + }; + try pp.defineBuiltin(arch); +} + +fn defineBuiltinValue(pp: *Preprocessor, name: []const u8, value: []const u8, id: Token.Id) !void { + const start = pp.generated_tokens.items.len; + try pp.generated_tokens.appendSlice(pp.arena, value); + const end = pp.generated_tokens.items.len; + + const token_list = try pp.arena.alloc(Token, 1); + token_list[0] = .{ .source = Source.generated, .id = id, .start = @intCast(start), .end = @intCast(end) }; + try pp.defines.putNoClobber(pp.arena, name, .{ + .is_func = false, + .param = "", + .tokens = token_list, + }); +} + +fn defineBuiltin(pp: *Preprocessor, name: []const u8) !void { + return pp.defines.putNoClobber(pp.arena, name, .{ + .tokens = &.{}, + .param = "", + .is_func = false, + }); +} + +pub fn preprocess(pp: *Preprocessor, file_path: []const u8) !void { + const source = try pp.addSourceFromPath(file_path); + try pp.preprocessFile(source); +} + +fn preprocessFile(pp: *Preprocessor, src: Source) !void { + try pp.defineBuiltins(); + const eof = try pp.preprocessFileExtra(src); + try pp.addToken(eof); +} + +fn preprocessFileExtra(pp: *Preprocessor, src: Source) !Token { + var tokenizer: Tokenizer = .init(src.buf, src.id); + var if_context: IfContext = .default; + + while (true) { + var tok = tokenizer.next(); + switch (tok.id) { + .hash => { + const directive = tokenizer.nextNoWS(); + switch (directive.id) { + .keyword_define => try pp.define(&tokenizer), + .keyword_if => { + if_context.increment(); + if (try pp.expr(&tokenizer)) { + if_context.set(.until_endif); + } else { + if_context.set(.until_else); + try pp.skip(&tokenizer, .until_else); + } + }, + .keyword_ifdef => { + if_context.increment(); + const macro_name = pp.expectMacroName(&tokenizer); + skipToNl(&tokenizer); + if (pp.defines.get(macro_name) != null) { + if_context.set(.until_endif); + } else { + if_context.set(.until_else); + try pp.skip(&tokenizer, .until_else); + } + }, + .keyword_ifndef => { + if_context.increment(); + const macro_name = pp.expectMacroName(&tokenizer); + skipToNl(&tokenizer); + if (pp.defines.get(macro_name) == null) { + if_context.set(.until_endif); + } else { + if_context.set(.until_else); + try pp.skip(&tokenizer, .until_else); + } + }, + .keyword_elif => { + assert(if_context.level > 0); + switch (if_context.get()) { + .until_else => if (try pp.expr(&tokenizer)) { + if_context.set(.until_endif); + } else { + try pp.skip(&tokenizer, .until_else); + }, + .until_endif => try pp.skip(&tokenizer, .until_endif), + .until_endif_seen_else => unreachable, //elif after endif + } + }, + .keyword_else => { + skipToNl(&tokenizer); + assert(if_context.level > 0); + switch (if_context.get()) { + .until_else => if_context.set(.until_endif_seen_else), + .until_endif => try pp.skip(&tokenizer, .until_endif), + .until_endif_seen_else => unreachable, // else after else + } + }, + .keyword_endif => { + skipToNl(&tokenizer); + assert(if_context.level > 0); + if_context.decrement(); + }, + .keyword_undef => { + const macro_name = tokenizer.nextNoWS(); + assert(macro_name.id == .identifier); + pp.undefineMacro(macro_name); + skipToNl(&tokenizer); + }, + .keyword_include => { + try pp.include(&tokenizer); + continue; + }, + .keyword_defined, .keyword_error => {}, + else => unreachable, + } + tok.id = .nl; + try pp.addToken(tok); + }, + .whitespace, .nl => try pp.addToken(tok), + .eof => { + assert(if_context.level == 0); + return tok; + }, + else => try pp.expandMacro(&tokenizer, tok), + } + } +} + +fn include(pp: *Preprocessor, tokenizer: *Tokenizer) anyerror!void { + const first = tokenizer.nextNoWS(); + const src = try findIncludeSource(pp, tokenizer, first); + + _ = try pp.preprocessFileExtra(src); + if (pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) { + try pp.addToken(.{ .id = .nl, .source = Source.generated }); + } +} + +fn findIncludeSource( + pp: *Preprocessor, + tokenizer: *Tokenizer, + first: Token, +) !Source { + const filename_tok = first; + skipToNl(tokenizer); + const tok_slice = pp.expandToken(filename_tok); + assert(tok_slice.len >= 3); + const filename = tok_slice[1 .. tok_slice.len - 1]; + return (try pp.findInclude(filename, first)) orelse @panic("include not found"); +} + +fn expectMacroName(pp: *const Preprocessor, tokenizer: *Tokenizer) []const u8 { + const macro_name = tokenizer.nextNoWS(); + assert(macro_name.id.isMacroIdentifier()); + return pp.expandToken(macro_name); +} + +fn skipToNl(tokenizer: *Tokenizer) void { + while (true) { + const tok = tokenizer.next(); + if (tok.id == .nl or tok.id == .eof) return; + if (tok.id == .whitespace) continue; + } +} + +fn define(pp: *Preprocessor, tokenizer: *Tokenizer) !void { + const macro_name = tokenizer.nextNoWS(); + assert(macro_name.id == .identifier); + + const first = tokenizer.nextNoWS(); + switch (first.id) { + .nl, .eof => return pp.defineMacro(macro_name, .{ + .is_func = false, + .tokens = &.{}, + .param = "", + }), + .l_paren => return pp.defineFn(tokenizer, macro_name), + else => {}, + } +} + +fn defineMacro(pp: *Preprocessor, tok: Token, macro: Macro) !void { + const token_value = pp.expandToken(tok); + try pp.defines.putNoClobber(pp.arena, token_value, macro); +} + +fn undefineMacro(pp: *Preprocessor, tok: Token) void { + const token_value = pp.expandToken(tok); + _ = pp.defines.orderedRemove(token_value); +} + +fn defineFn( + pp: *Preprocessor, + tokenizer: *Tokenizer, + macro_name: Token, +) !void { + var tok = tokenizer.nextNoWS(); + assert(tok.id == .identifier); + const param = pp.expandToken(tok); + tok = tokenizer.nextNoWS(); + assert(tok.id == .r_paren); + + pp.token_buf.items.len = 0; + var need_ws = false; + while (true) { + tok = tokenizer.next(); + switch (tok.id) { + .nl, .eof => break, + .whitespace => need_ws = pp.token_buf.items.len != 0, + .hash => unreachable, + .hash_hash => { + need_ws = false; + try pp.token_buf.append(pp.arena, tok); + }, + else => { + if (need_ws) { + need_ws = false; + try pp.token_buf.append(pp.arena, .{ .id = .whitespace, .source = Source.generated }); + } + + if (tok.id.isMacroIdentifier()) { + tok.id = .identifier; + const s = pp.expandToken(tok); + if (mem.eql(u8, param, s)) { + tok.id = .macro_param; + tok.end = 0; + } + } + try pp.token_buf.append(pp.arena, tok); + }, + } + } + + const token_list = try pp.arena.dupe(Token, pp.token_buf.items); + try pp.defineMacro(macro_name, .{ + .tokens = token_list, + .is_func = true, + .param = param, + }); +} + +fn expandToken(pp: *const Preprocessor, tok: Token) []const u8 { + return switch (tok.source) { + Source.generated => pp.generated_tokens.items, + else => blk: { + const src = pp.sources.values()[tok.source]; + break :blk src.buf; + }, + }[@intCast(tok.start)..@intCast(tok.end)]; +} + +fn skip( + pp: *Preprocessor, + tokenizer: *Tokenizer, + cont: IfContext.Nesting, +) !void { + var ifs_seen: u32 = 0; + var line_start = true; + while (tokenizer.index < tokenizer.buf.len) { + if (line_start) { + const tokenizer_bkp = tokenizer.*; + const hash = tokenizer.nextNoWS(); + if (hash.id == .nl) continue; + line_start = false; + if (hash.id != .hash) continue; + const directive = tokenizer.nextNoWS(); + switch (directive.id) { + .keyword_else => { + if (ifs_seen != 0) continue; + assert(cont != .until_endif_seen_else); // else after else; + tokenizer.* = tokenizer_bkp; + return; + }, + .keyword_elif => { + if (ifs_seen != 0 or cont == .until_endif) continue; + assert(cont != .until_endif_seen_else); // elif after else; + tokenizer.* = tokenizer_bkp; + return; + }, + .keyword_endif => { + if (ifs_seen == 0) { + tokenizer.* = tokenizer_bkp; + return; + } + ifs_seen -= 1; + }, + .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1, + else => {}, + } + } else if (tokenizer.buf[tokenizer.index] == '\n') { + line_start = true; + tokenizer.index += 1; + try pp.addToken(.{ .id = .nl, .source = Source.generated }); + } else { + line_start = false; + tokenizer.index += 1; + } + } +} + +fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void { + try pp.tokens.ensureUnusedCapacity(pp.arena, capacity); +} + +fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) !bool { + const token_state = pp.tokens.len; + defer pp.tokens.len = token_state; + + pp.top_expansion_buf.items.len = 0; + while (true) { + const tok = tokenizer.next(); + switch (tok.id) { + .nl, .eof => break, + .whitespace => if (pp.top_expansion_buf.items.len == 0) continue, + else => {}, + } + try pp.top_expansion_buf.append(pp.arena, tok); + } else unreachable; + if (pp.top_expansion_buf.items.len != 0) { + try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr); + } + try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len); + var i: usize = 0; + const items = pp.top_expansion_buf.items; + while (i < items.len) : (i += 1) { + var tok = items[i]; + switch (tok.id) { + .string_literal, + .semicolon, + .hash_hash, + => unreachable, + .whitespace => continue, + else => if (tok.id == .keyword_defined) { + i += try pp.handleKeywordDefined(&tok, items[i + 1 ..]); + }, + } + pp.addTokenAssumeCapacity(tok); + } + + try pp.addToken(.{ .id = .eof, .source = Source.generated }); + return pp.evalExpression(token_state); +} + +fn handleKeywordDefined( + pp: *Preprocessor, + macro_tok: *Token, + tokens: []const Token, +) !usize { + assert(macro_tok.id == .keyword_defined); + var it = TokenIterator.init(tokens); + + _ = it.expectNoWS(.l_paren); + const second = it.expectNoWS(.identifier); + _ = it.expectNoWS(.r_paren); + + macro_tok.id = if (pp.defines.contains(pp.expandToken(second))) .one else .zero; + + return it.i; +} + +const TokenIterator = struct { + toks: []const Token, + i: usize, + + fn init(toks: []const Token) TokenIterator { + return .{ .toks = toks, .i = 0 }; + } + + fn nextNoWS(self: *TokenIterator) ?Token { + while (self.i < self.toks.len) : (self.i += 1) { + const tok = self.toks[self.i]; + if (tok.id == .whitespace) continue; + + self.i += 1; + return tok; + } + return null; + } + + fn expectNext(self: *TokenIterator) Token { + assert(self.i < self.toks.len); + const t = self.toks[self.i]; + self.i += 1; + return t; + } + + fn expectNoWS(self: *TokenIterator, expected: Token.Id) Token { + if (self.nextNoWS()) |tok| { + if (tok.id != expected) { + std.debug.panic("expected token {any} but got {any}\n", .{ expected, tok.id }); + } + return tok; + } + std.debug.panic("expected token {any} but got null\n", .{expected}); + } +}; + +fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, tok: Token) !void { + if (!tok.id.isMacroIdentifier()) { + return pp.addToken(tok); + } + pp.top_expansion_buf.items.len = 0; + try pp.top_expansion_buf.append(pp.arena, tok); + try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr); + try pp.addTokensFromExpandBuf(pp.top_expansion_buf.items, .{ .id = .nl, .source = Source.generated }); +} + +fn addTokensFromExpandBuf(pp: *Preprocessor, tokens: []Token, tokenizer_nl: Token) !void { + try pp.ensureUnusedTokenCapacity(tokens.len); + for (tokens) |tok| { + pp.addTokenAssumeCapacity(tok); + } + try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl); + while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) { + pp.addTokenAssumeCapacity(tokenizer_nl); + } +} + +const EvalContext = enum { + expr, + non_expr, +}; + +fn expandMacroExhaustive( + pp: *Preprocessor, + tokenizer: *Tokenizer, + buf: *ExpandBuf, + start_idx: usize, + end_idx: usize, + extend_buf: bool, + eval_ctx: EvalContext, +) !void { + var moving_end_idx = end_idx; + var advance_index: usize = 0; + var do_rescan = true; + while (do_rescan) { + do_rescan = false; + var idx: usize = start_idx + advance_index; + while (idx < moving_end_idx) { + const macro_tok = buf.items[idx]; + if (macro_tok.id == .keyword_defined and eval_ctx == .expr) { + idx += 1; + var it = TokenIterator.init(buf.items[idx..moving_end_idx]); + if (it.nextNoWS()) |tok| { + switch (tok.id) { + .l_paren => { + _ = it.nextNoWS(); + _ = it.nextNoWS(); + }, + else => {}, + } + } + idx += it.i; + continue; + } + if (!macro_tok.id.isMacroIdentifier()) { + idx += 1; + continue; + } + const expanded = pp.expandToken(macro_tok); + const macro = pp.defines.getPtr(expanded) orelse { + idx += 1; + continue; + }; + + if (macro.is_func) { + var macro_scan_idx = idx; + const arg = try pp.collectMacroArgument( + tokenizer, + buf, + &macro_scan_idx, + &moving_end_idx, + extend_buf, + ); + const expanded_arg = arg: { + var expand_buf: ExpandBuf = .empty; + errdefer expand_buf.deinit(pp.arena); + try expand_buf.appendSlice(pp.arena, arg); + try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx); + break :arg try expand_buf.toOwnedSlice(pp.arena); + }; + + const res = try pp.expandFuncMacro(macro, arg, expanded_arg); + const tokens_added = res.items.len; + const tokens_removed = macro_scan_idx - idx + 1; + try buf.replaceRange(pp.arena, idx, tokens_removed, res.items); + + moving_end_idx += tokens_added; + moving_end_idx -|= tokens_removed; + idx += tokens_added; + do_rescan = true; + } else { + var res = try pp.expandObjMacro(macro); + defer res.deinit(pp.arena); + var increment_idx_by = res.items.len; + + for (res.items, 0..) |*tok, i| { + if (i < increment_idx_by and pp.defines.contains(pp.expandToken(tok.*))) { + increment_idx_by = i; + } + } + try buf.replaceRange(pp.arena, idx, 1, res.items); + idx += res.items.len; + moving_end_idx = moving_end_idx + res.items.len - 1; + do_rescan = true; + } + if (idx - start_idx == advance_index + 1 and !do_rescan) { + advance_index += 1; + } + } + } + buf.items.len = moving_end_idx; +} + +fn collectMacroArgument( + pp: *Preprocessor, + tokenizer: *Tokenizer, + buf: *ExpandBuf, + start_idx: *usize, + end_idx: *usize, + extend_buf: bool, +) !MacroArgument { + var parens: u32 = 0; + var argument: std.ArrayList(Token) = .empty; + defer argument.deinit(pp.arena); + + while (true) { + const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf); + switch (tok.id) { + .nl, .whitespace => {}, + .l_paren => break, + else => unreachable, + } + } + + while (true) { + const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf); + switch (tok.id) { + .l_paren => { + try argument.append(pp.arena, tok); + parens += 1; + }, + .r_paren => { + if (parens == 0) { + return try argument.toOwnedSlice(pp.arena); + } else { + try argument.append(pp.arena, tok); + parens -= 1; + } + }, + .nl, .whitespace => try argument.append(pp.arena, .{ .id = .whitespace, .source = Source.generated }), + .eof => unreachable, + else => try argument.append(pp.arena, tok), + } + } +} + +fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) !ExpandBuf { + var buf: ExpandBuf = .empty; + errdefer buf.deinit(pp.arena); + try buf.appendSlice(pp.arena, simple_macro.tokens); + return buf; +} + +fn expandFuncMacro( + pp: *Preprocessor, + func_macro: *const Macro, + arg: MacroArgument, + expanded_arg: MacroArgument, +) !ExpandBuf { + var buf: ExpandBuf = .empty; + errdefer buf.deinit(pp.arena); + try buf.ensureTotalCapacity(pp.arena, func_macro.tokens.len); + + var tok_i: usize = 0; + while (tok_i < func_macro.tokens.len) : (tok_i += 1) { + const tok = func_macro.tokens[tok_i]; + switch (tok.id) { + .hash_hash => while (tok_i + 1 < func_macro.tokens.len) { + tok_i += 1; + const tok_next = func_macro.tokens[tok_i]; + const next = switch (tok_next.id) { + .whitespace => continue, + .hash_hash => continue, + .macro_param => arg, + else => &[1]Token{tok_next}, + }; + try pp.pasteTokens(&buf, next); + if (next.len != 0) break; + }, + .macro_param => { + try buf.appendSlice(pp.arena, expanded_arg); + }, + else => try buf.append(pp.arena, tok), + } + } + + return buf; +} + +fn pasteTokens( + pp: *Preprocessor, + lhs_toks: *ExpandBuf, + rhs_toks: []const Token, +) !void { + const lhs = while (lhs_toks.pop()) |lhs| { + if (lhs.id != .whitespace) break lhs; + } else { + return lhs_toks.appendSlice(pp.arena, rhs_toks); + }; + + var rhs_rest: u32 = 1; + const rhs = for (rhs_toks) |rhs| { + if (rhs.id != .whitespace) break rhs; + rhs_rest += 1; + } else { + return lhs_toks.appendAssumeCapacity(lhs); + }; + + const start = pp.generated_tokens.items.len; + const end = start + pp.expandToken(lhs).len + pp.expandToken(rhs).len; + try pp.generated_tokens.ensureTotalCapacity(pp.arena, end + 1); + pp.generated_tokens.appendSliceAssumeCapacity(pp.expandToken(lhs)); + pp.generated_tokens.appendSliceAssumeCapacity(pp.expandToken(rhs)); + pp.generated_tokens.appendAssumeCapacity('\n'); + + var tmp_tokenizer: Tokenizer = .{ + .index = @intCast(start), + .buf = pp.generated_tokens.items, + .source = Source.generated, + }; + const pasted_token = tmp_tokenizer.nextNoWS(); + const next = tmp_tokenizer.nextNoWS(); + + try lhs_toks.append(pp.arena, pp.makeGeneratedToken(start, end, pasted_token.id)); + assert(next.id == .nl or next.id == .eof); + + return lhs_toks.appendSlice(pp.arena, rhs_toks[rhs_rest..]); +} + +fn nextBufToken( + pp: *Preprocessor, + tokenizer: *Tokenizer, + buf: *ExpandBuf, + start_idx: *usize, + end_idx: *usize, + extend_buf: bool, +) !Token { + start_idx.* += 1; + if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) { + if (extend_buf) { + const tok = tokenizer.next(); + if (tok.id == .nl) pp.add_expansion_nl += 1; + + end_idx.* += 1; + try buf.append(pp.arena, tok); + return tok; + } + return .{ .id = .eof, .source = Source.generated }; + } + + return buf.items[start_idx.*]; +} + +fn makeGeneratedToken( + pp: *Preprocessor, + start: usize, + end: usize, + id: Token.Id, +) Token { + const pasted_token: Token = .{ + .id = id, + .source = Source.generated, + .start = @intCast(start), + .end = @intCast(end), + }; + pp.generated_line += 1; + return pasted_token; +} + +fn findInclude( + pp: *Preprocessor, + filename: []const u8, + includer_token: Token, +) !?Source { + const other_file = pp.sources.values()[includer_token.source].path; + const dir = std.fs.path.dirname(other_file) orelse "."; + if (try pp.checkIncludeDir(filename, dir)) |res| return res; + + return pp.checkIncludeDir(filename, pp.include_dir); +} + +fn checkIncludeDir( + pp: *Preprocessor, + include_path: []const u8, + include_dir: []const u8, +) !?Source { + const format = "{s}{c}{s}"; + var bfa_buf: [1024]u8 = undefined; + var bfa_state: std.heap.BufferFirstAllocator = .init(&bfa_buf, pp.arena); + const bfa = bfa_state.allocator(); + const header_path = try std.fmt.allocPrint(bfa, format, .{ + include_dir, + std.fs.path.sep, + include_path, + }); + defer bfa.free(header_path); + + return pp.addSourceFromPath(header_path) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return null, + }; +} + +pub fn addSourceFromPath(pp: *Preprocessor, path: []const u8) !Source { + if (pp.sources.get(path)) |src| return src; + try pp.sources.ensureUnusedCapacity(pp.arena, 1); + + const contents = try std.Io.Dir.cwd().readFileAlloc(pp.io, path, pp.arena, .limited(std.math.maxInt(u32))); + const duped_path = try pp.arena.dupe(u8, path); + + const src: Source = .{ + .buf = contents, + .path = duped_path, + .id = pp.sources.count(), + }; + + pp.sources.putAssumeCapacityNoClobber(duped_path, src); + return src; +} + +fn evalExpression( + pp: *Preprocessor, + start: usize, +) !bool { + const s = pp.tokens.slice(); + const len = s.len - start; + const ss = s.subslice(start, len); + + const ids: []Token.Id = ss.items(.id); + const starts: []u32 = ss.items(.start); + const ends: []u32 = ss.items(.end); + const srcs: []usize = ss.items(.source); + + var toks = try pp.arena.alloc(Token, len); + defer pp.arena.free(toks); + + for (0..len) |i| { + toks[i] = .{ + .id = ids[i], + .source = srcs[i], + .start = starts[i], + .end = ends[i], + }; + } + + return pp.evaluateExpressionTokens(toks); +} + +fn evaluateExpressionTokens( + pp: *const Preprocessor, + toks: []const Token, +) bool { + var it = TokenIterator.init(toks); + + const left = evalToken(&it); + assert(!left.id.isInfix()); + + const op = evalToken(&it); + if (op.id == .eof) return left.id == .one; + + assert(op.id.isInfix()); + const right = evalToken(&it); + + return pp.evalInfix(left, op, right); +} + +fn evalToken(it: *TokenIterator) Token { + const tok = it.expectNext(); + if (tok.id != .bang) { + return tok; + } + + var op = it.expectNext(); + const flipped: Token.Id = switch (op.id) { + .one => .zero, + .zero => .one, + else => unreachable, + }; + op.id = flipped; + return op; +} + +fn evalInfix( + pp: *const Preprocessor, + left: Token, + op: Token, + right: Token, +) bool { + switch (op.id) { + .pipe_pipe => return (left.id == .one) or (right.id == .one), + .equal_equal => { + switch (left.id) { + .one, .zero => { + assert(right.id == .one or right.id == .zero); + return left.id == right.id; + }, + .pp_num => { + assert(right.id == .pp_num); + const lval = pp.expandToken(left); + const rval = pp.expandToken(right); + return std.mem.eql(u8, lval, rval); + }, + else => unreachable, + } + }, + else => unreachable, + } +} + +pub fn prettyPrintTokens(pp: *Preprocessor, w: *std.Io.Writer) !void { + const tok_ids = pp.tokens.items(.id); + var i: usize = 0; + var last_nl = true; + outer: while (true) : (i += 1) { + const cur: Token = pp.tokens.get(i); + switch (cur.id) { + .eof => { + if (!last_nl) try w.writeByte('\n'); + try w.flush(); + return; + }, + .nl => { + var newlines: u32 = 0; + for (tok_ids[i..], i..) |id, j| { + if (id == .nl) { + newlines += 1; + } else if (id == .eof) { + if (!last_nl) try w.writeByte('\n'); + try w.flush(); + return; + } else if (id != .whitespace) { + if (newlines < 2) break; + + i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace)); + if (!last_nl) try w.writeAll("\n"); + continue :outer; + } + } + last_nl = true; + try w.writeAll("\n"); + }, + .whitespace => { + try w.writeByte(' '); + last_nl = false; + }, + else => { + const slice = pp.expandToken(cur); + try w.writeAll(slice); + last_nl = false; + }, + } + } +} diff --git a/src/libs/mingw/Tokenizer.zig b/src/libs/mingw/Tokenizer.zig @@ -0,0 +1,365 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Source = @import("Preprocessor.zig").Source; + +const Tokenizer = @This(); + +pub fn init(buf: []const u8, source: Source.Id) Tokenizer { + return .{ .buf = buf, .source = source }; +} + +buf: []const u8, +index: u32 = 0, +source: Source.Id, + +pub const Token = struct { + pub const Id = enum { + bang, + eof, + equal_equal, + hash, + hash_hash, + macro_param, + identifier, + keyword_if, + keyword_ifndef, + keyword_ifdef, + keyword_define, + keyword_endif, + keyword_defined, + keyword_include, + keyword_elif, + keyword_else, + keyword_undef, + keyword_error, + l_paren, + nl, + pp_num, + pipe_pipe, + r_paren, + semicolon, + string_literal, + whitespace, + one, + zero, + + pub fn isInfix(id: Id) bool { + switch (id) { + .pipe_pipe, .equal_equal => return true, + else => return false, + } + } + + pub fn isMacroIdentifier(id: Id) bool { + switch (id) { + .keyword_if, + .keyword_ifndef, + .keyword_ifdef, + .keyword_define, + .keyword_endif, + .keyword_defined, + .keyword_include, + .keyword_elif, + .keyword_else, + .keyword_undef, + .keyword_error, + .identifier, + => return true, + else => return false, + } + } + }; + + const all_kws = std.StaticStringMap(Id).initComptime(.{ + .{ "define", .keyword_define }, + .{ "defined", .keyword_defined }, + .{ "else", .keyword_else }, + .{ "endif", .keyword_endif }, + .{ "if", .keyword_if }, + .{ "elif", .keyword_elif }, + .{ "ifdef", .keyword_ifdef }, + .{ "ifndef", .keyword_ifndef }, + .{ "include", .keyword_include }, + .{ "undef", .keyword_undef }, + .{ "error", .keyword_error }, + }); + + id: Id, + source: Source.Id, + start: u32 = 0, + end: u32 = 0, + + fn getTokenId(str: []const u8) Id { + return all_kws.get(str) orelse .identifier; + } +}; + +pub fn next(self: *Tokenizer) Token { + var state: enum { + start, + cr, + string_literal, + identifier, + equal, + slash, + line_comment, + hash, + pipe, + pp_num, + } = .start; + + const start = self.index; + var id: Token.Id = .eof; + + while (self.index < self.buf.len) : (self.index += 1) { + const c = self.buf[self.index]; + switch (state) { + .start => switch (c) { + '\r' => { + id = .nl; + state = .cr; + }, + '\n' => { + id = .nl; + self.index += 1; + break; + }, + '!' => { + id = .bang; + self.index += 1; + break; + }, + '"' => { + id = .string_literal; + state = .string_literal; + }, + '|' => state = .pipe, + '=' => state = .equal, + '(' => { + id = .l_paren; + self.index += 1; + break; + }, + ')' => { + id = .r_paren; + self.index += 1; + break; + }, + ';' => { + id = .semicolon; + self.index += 1; + break; + }, + '/' => state = .slash, + '#' => state = .hash, + '0'...'9' => state = .pp_num, + ' ' => { + id = .whitespace; + self.index += 1; + break; + }, + else => state = .identifier, + }, + .cr => switch (c) { + '\n' => { + self.index += 1; + break; + }, + else => break, + }, + .pipe => switch (c) { + '|' => { + id = .pipe_pipe; + self.index += 1; + break; + }, + else => unreachable, + }, + .hash => switch (c) { + '#' => { + id = .hash_hash; + self.index += 1; + break; + }, + else => { + id = .hash; + break; + }, + }, + .string_literal => switch (c) { + '"' => { + self.index += 1; + break; + }, + else => {}, + }, + .identifier => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, + else => { + id = Token.getTokenId(self.buf[start..self.index]); + break; + }, + }, + .equal => switch (c) { + '=' => { + id = .equal_equal; + self.index += 1; + break; + }, + else => unreachable, + }, + .slash => switch (c) { + '/' => state = .line_comment, + else => { + id = .identifier; + break; + }, + }, + .line_comment => switch (c) { + '\n' => { + self.index -= 1; + state = .start; + }, + else => {}, + }, + .pp_num => switch (c) { + '0'...'9' => {}, + else => { + id = .pp_num; + break; + }, + }, + } + } else if (self.index == self.buf.len) { + switch (state) { + .start, .line_comment, .cr => {}, + .identifier => id = Token.getTokenId(self.buf[start..self.index]), + .hash => id = .hash, + .pp_num => id = .pp_num, + else => unreachable, + } + } + + return .{ + .id = id, + .start = start, + .end = self.index, + .source = self.source, + }; +} + +pub fn nextNoWS(self: *Tokenizer) Token { + var tok = self.next(); + while (tok.id == .whitespace) tok = self.next(); + return tok; +} + +pub fn nextNoWSComments(self: *Tokenizer) Token { + var tok = self.next(); + while (tok.id == .whitespace) tok = self.next(); + return tok; +} + +fn expectToken(expected: Token.Id, actual: Token) !void { + try std.testing.expectEqual(expected, actual.id); +} + +fn testToken(buf: []const u8, expected: Token.Id) !void { + var tokenizer = Tokenizer.init(buf, Source.generated); + const t = tokenizer.next(); + try expectToken(expected, t); + try expectToken(.eof, tokenizer.next()); +} + +test "tokens" { + try testToken("TEST", .identifier); + try testToken("__x86_64__", .identifier); + try testToken("122", .pp_num); + try testToken("==", .equal_equal); + try testToken("#", .hash); + try testToken("##", .hash_hash); + try testToken("undef", .keyword_undef); + try testToken("||", .pipe_pipe); + try testToken("!", .bang); + try testToken("else", .keyword_else); + try testToken("endif", .keyword_endif); + try testToken("include", .keyword_include); + try testToken("define", .keyword_define); + try testToken("defined", .keyword_defined); + try testToken("if", .keyword_if); + try testToken("ifdef", .keyword_ifdef); + try testToken("ifndef", .keyword_ifndef); + try testToken("(", .l_paren); + try testToken("\n", .nl); + try testToken("\r", .nl); + try testToken("\r\n", .nl); + try testToken("5", .pp_num); + try testToken(")", .r_paren); + try testToken("\"str\"", .string_literal); + try testToken(" ", .whitespace); +} + +fn expectTokens(contents: []const u8, expected_tokens: []const Token.Id) !void { + var tokenizer: Tokenizer = .init(contents, Source.generated); + var i: usize = 0; + while (i < expected_tokens.len) { + const token = tokenizer.next(); + if (token.id == .whitespace) continue; + const expected_token_id = expected_tokens[i]; + i += 1; + if (!std.meta.eql(token.id, expected_token_id)) { + std.debug.print("expected {s}, found {s}\n", .{ @tagName(expected_token_id), @tagName(token.id) }); + return error.TokensDoNotEqual; + } + } + const last_token = tokenizer.next(); + try std.testing.expect(last_token.id == .eof); +} + +test "preprocessor keywords" { + try expectTokens( + \\#if + \\#ifndef + \\#ifdef + \\#define + \\#endif + \\defined + \\#include + \\#elif + \\#else + \\#undef + \\#error + , &.{ + .hash, + .keyword_if, + .nl, + .hash, + .keyword_ifndef, + .nl, + .hash, + .keyword_ifdef, + .nl, + .hash, + .keyword_define, + .nl, + .hash, + .keyword_endif, + .nl, + .keyword_defined, + .nl, + .hash, + .keyword_include, + .nl, + .hash, + .keyword_elif, + .nl, + .hash, + .keyword_else, + .nl, + .hash, + .keyword_undef, + .nl, + .hash, + .keyword_error, + }); +} diff --git a/test/standalone/build.zig b/test/standalone/build.zig @@ -31,6 +31,7 @@ pub fn build(b: *std.Build) void { const tools_target = b.resolveTargetQuery(.{}); for ([_][]const u8{ // Alphabetically sorted. No need to build `tools/spirv/grammar.zig`. + "../../tools/check_mingw.zig", "../../tools/dump-cov.zig", "../../tools/fetch_them_macos_headers.zig", "../../tools/gen_macos_headers_c.zig", @@ -61,6 +62,14 @@ pub fn build(b: *std.Build) void { .target = tools_target, }), }); + if (std.mem.endsWith(u8, tool_src_path, "check_mingw.zig")) { + const mingw_preprocessor_mod = b.createModule(.{ + .root_source_file = b.path("../../src/libs/mingw/Preprocessor.zig"), + .target = tools_target, + }); + tool.root_module.addImport("preprocessor", mingw_preprocessor_mod); + } + tools_tests_step.dependOn(&tool.step); } for ([_][]const u8{ diff --git a/tools/check_mingw.zig b/tools/check_mingw.zig @@ -0,0 +1,122 @@ +const std = @import("std"); +const Io = std.Io; +const Dir = Io.Dir; +const Preprocessor = @import("preprocessor"); + +pub fn main(init: std.process.Init) !void { + const arena = init.arena.allocator(); + const io = init.io; + const args = try init.minimal.args.toSlice(arena); + + const zig_src_mingw_lib_path = args[1]; + + const mingw_include_path = try Dir.path.join(arena, &.{ + zig_src_mingw_lib_path, "def-include", + }); + const mingw_libcommon_path = try Dir.path.join(arena, &.{ + zig_src_mingw_lib_path, "lib-common", + }); + + var mingw_libcommon_dir = Dir.cwd().openDir(io, mingw_libcommon_path, .{ .iterate = true }) catch |err| { + std.log.err("unable to open directory {s}: {t}", .{ mingw_libcommon_path, err }); + std.process.exit(1); + }; + defer mingw_libcommon_dir.close(io); + + var walker = try mingw_libcommon_dir.walk(arena); + defer walker.deinit(); + + while (try walker.next(io)) |entry| { + if (entry.kind != .file) continue; + + var fail = false; + for (&targets) |*target| { + var target_arena: std.heap.ArenaAllocator = .init(init.gpa); + defer target_arena.deinit(); + + const pp_arena = target_arena.allocator(); + const file_path = try Dir.path.join(pp_arena, &.{ mingw_libcommon_path, entry.path }); + + const aro = pp: { + const target_triple = try target.zigTriple(pp_arena); + const target_arg = try std.fmt.allocPrint(pp_arena, "--target={s}", .{target_triple}); + const result = std.process.run(pp_arena, io, .{ + .argv = &.{ + "arocc", + "-E", + target_arg, + "--no-line-commands", + "-nostdinc", + "-I", + mingw_include_path, + file_path, + }, + }) catch |err| { + std.log.err("unable to execute arocc: {t}", .{err}); + std.process.exit(1); + }; + if (result.term.exited != 0) { + std.log.err("error executing arocc: {s}", .{result.stderr}); + std.process.exit(result.term.exited); + } + break :pp result.stdout; + }; + + const native = pp: { + var aw: Io.Writer.Allocating = .init(pp_arena); + errdefer aw.deinit(); + + var pp: Preprocessor = .{ + .io = io, + .arena = pp_arena, + .include_dir = mingw_include_path, + .target = target, + }; + + pp.preprocess(file_path) catch |err| { + std.log.err("error preprocessing file {s} for target {t}: {t}", .{ entry.path, target.cpu.arch, err }); + fail = true; + continue; + }; + pp.prettyPrintTokens(&aw.writer) catch |err| { + std.log.err("error printing tokens for file {s} for target {t}: {t}", .{ entry.path, target.cpu.arch, err }); + fail = true; + continue; + }; + + break :pp try aw.toOwnedSliceSentinel(0); + }; + + try std.testing.expectEqualStrings(aro, native); + } + + if (fail) std.process.exit(1); + } +} + +const targets = [_]std.Target{ + .{ + .ofmt = .coff, + .abi = .gnu, + .os = .{ .tag = .windows, .version_range = .default(.thumb, .windows, .gnu) }, + .cpu = .{ .arch = .thumb, .model = .generic(.thumb), .features = .empty }, + }, + .{ + .ofmt = .coff, + .abi = .gnu, + .os = .{ .tag = .windows, .version_range = .default(.aarch64, .windows, .gnu) }, + .cpu = .{ .arch = .aarch64, .model = .generic(.aarch64), .features = .empty }, + }, + .{ + .ofmt = .coff, + .abi = .gnu, + .os = .{ .tag = .windows, .version_range = .default(.x86, .windows, .gnu) }, + .cpu = .{ .arch = .x86, .model = .generic(.x86), .features = .empty }, + }, + .{ + .ofmt = .coff, + .abi = .gnu, + .os = .{ .tag = .windows, .version_range = .default(.x86_64, .windows, .gnu) }, + .cpu = .{ .arch = .x86_64, .model = .generic(.x86_64), .features = .empty }, + }, +};