zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 0c61466771ff205a955f3e5002d2a7f2449ccc78 (tree)
parent f60c24c73cc5c5894fbfb7060a70bc683c4a4ba5
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Mon, 11 Mar 2024 17:18:09 -0700

Merge pull request #19174 from squeek502/lazy-resinator

Lazily compile the `zig rc` subcommand and use it during `zig build-exe`, etc
Diffstat:
Rsrc/resinator/ani.zig -> lib/compiler/resinator/ani.zig | 0
Rsrc/resinator/ast.zig -> lib/compiler/resinator/ast.zig | 0
Rsrc/resinator/bmp.zig -> lib/compiler/resinator/bmp.zig | 0
Alib/compiler/resinator/cli.zig | 1507+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/code_pages.zig | 500+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/comments.zig | 358+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/compile.zig | 3427+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/errors.zig | 1076+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/resinator/ico.zig -> lib/compiler/resinator/ico.zig | 0
Alib/compiler/resinator/lang.zig | 877+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/lex.zig | 1106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/literals.zig | 910+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/main.zig | 719+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/parse.zig | 1897+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/preprocess.zig | 140+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/resinator/rc.zig -> lib/compiler/resinator/rc.zig | 0
Alib/compiler/resinator/res.zig | 1107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/source_mapping.zig | 831+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/compiler/resinator/utils.zig | 124+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/resinator/windows1252.zig -> lib/compiler/resinator/windows1252.zig | 0
Mlib/std/zig/ErrorBundle.zig | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/Compilation.zig | 634+++++++++++++++++++++++--------------------------------------------------------
Msrc/main.zig | 331++++++++++++++-----------------------------------------------------------------
Dsrc/resinator.zig | 25-------------------------
Dsrc/resinator/cli.zig | 1439-------------------------------------------------------------------------------
Dsrc/resinator/code_pages.zig | 487-------------------------------------------------------------------------------
Dsrc/resinator/comments.zig | 340-------------------------------------------------------------------------------
Dsrc/resinator/compile.zig | 3378-------------------------------------------------------------------------------
Dsrc/resinator/errors.zig | 1060-------------------------------------------------------------------------------
Dsrc/resinator/lang.zig | 877-------------------------------------------------------------------------------
Dsrc/resinator/lex.zig | 1098-------------------------------------------------------------------------------
Dsrc/resinator/literals.zig | 911-------------------------------------------------------------------------------
Dsrc/resinator/parse.zig | 1883-------------------------------------------------------------------------------
Dsrc/resinator/preprocess.zig | 100-------------------------------------------------------------------------------
Dsrc/resinator/res.zig | 1107-------------------------------------------------------------------------------
Dsrc/resinator/source_mapping.zig | 687-------------------------------------------------------------------------------
Dsrc/resinator/utils.zig | 112-------------------------------------------------------------------------------
37 files changed, 14940 insertions(+), 14234 deletions(-)

diff --git a/src/resinator/ani.zig b/lib/compiler/resinator/ani.zig diff --git a/src/resinator/ast.zig b/lib/compiler/resinator/ast.zig diff --git a/src/resinator/bmp.zig b/lib/compiler/resinator/bmp.zig diff --git a/lib/compiler/resinator/cli.zig b/lib/compiler/resinator/cli.zig @@ -0,0 +1,1507 @@ +const std = @import("std"); +const CodePage = @import("code_pages.zig").CodePage; +const lang = @import("lang.zig"); +const res = @import("res.zig"); +const Allocator = std.mem.Allocator; +const lex = @import("lex.zig"); + +/// This is what /SL 100 will set the maximum string literal length to +pub const max_string_literal_length_100_percent = 8192; + +pub const usage_string_after_command_name = + \\ [options] [--] <INPUT> [<OUTPUT>] + \\ + \\The sequence -- can be used to signify when to stop parsing options. + \\This is necessary when the input path begins with a forward slash. + \\ + \\Supported Win32 RC Options: + \\ /?, /h Print this help and exit. + \\ /v Verbose (print progress messages). + \\ /d <name>[=<value>] Define a symbol (during preprocessing). + \\ /u <name> Undefine a symbol (during preprocessing). + \\ /fo <value> Specify output file path. + \\ /l <value> Set default language using hexadecimal id (ex: 409). + \\ /ln <value> Set default language using language name (ex: en-us). + \\ /i <value> Add an include path. + \\ /x Ignore INCLUDE environment variable. + \\ /c <value> Set default code page (ex: 65001). + \\ /w Warn on invalid code page in .rc (instead of error). + \\ /y Suppress warnings for duplicate control IDs. + \\ /n Null-terminate all strings in string tables. + \\ /sl <value> Specify string literal length limit in percentage (1-100) + \\ where 100 corresponds to a limit of 8192. If the /sl + \\ option is not specified, the default limit is 4097. + \\ /p Only run the preprocessor and output a .rcpp file. + \\ + \\No-op Win32 RC Options: + \\ /nologo, /a, /r Options that are recognized but do nothing. + \\ + \\Unsupported Win32 RC Options: + \\ /fm, /q, /g, /gn, /g1, /g2 Unsupported MUI-related options. + \\ /?c, /hc, /t, /tp:<prefix>, Unsupported LCX/LCE-related options. + \\ /tn, /tm, /tc, /tw, /te, + \\ /ti, /ta + \\ /z Unsupported font-substitution-related option. + \\ /s Unsupported HWB-related option. + \\ + \\Custom Options (resinator-specific): + \\ /:no-preprocess Do not run the preprocessor. + \\ /:debug Output the preprocessed .rc file and the parsed AST. + \\ /:auto-includes <value> Set the automatic include path detection behavior. + \\ any (default) Use MSVC if available, fall back to MinGW + \\ msvc Use MSVC include paths (must be present on the system) + \\ gnu Use MinGW include paths + \\ none Do not use any autodetected include paths + \\ /:depfile <path> Output a file containing a list of all the files that + \\ the .rc includes or otherwise depends on. + \\ /:depfile-fmt <value> Output format of the depfile, if /:depfile is set. + \\ json (default) A top-level JSON array of paths + \\ /:mingw-includes <path> Path to a directory containing MinGW include files. If + \\ not specified, bundled MinGW include files will be used. + \\ + \\Note: For compatibility reasons, all custom options start with : + \\ +; + +pub fn writeUsage(writer: anytype, command_name: []const u8) !void { + try writer.writeAll("Usage: "); + try writer.writeAll(command_name); + try writer.writeAll(usage_string_after_command_name); +} + +pub const Diagnostics = struct { + errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, + allocator: Allocator, + + pub const ErrorDetails = struct { + arg_index: usize, + arg_span: ArgSpan = .{}, + msg: std.ArrayListUnmanaged(u8) = .{}, + type: Type = .err, + print_args: bool = true, + + pub const Type = enum { err, warning, note }; + pub const ArgSpan = struct { + point_at_next_arg: bool = false, + name_offset: usize = 0, + prefix_len: usize = 0, + value_offset: usize = 0, + name_len: usize = 0, + }; + }; + + pub fn init(allocator: Allocator) Diagnostics { + return .{ + .allocator = allocator, + }; + } + + pub fn deinit(self: *Diagnostics) void { + for (self.errors.items) |*details| { + details.msg.deinit(self.allocator); + } + self.errors.deinit(self.allocator); + } + + pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { + try self.errors.append(self.allocator, error_details); + } + + pub fn renderToStdErr(self: *Diagnostics, args: []const []const u8, config: std.io.tty.Config) void { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + self.renderToWriter(args, stderr, config) catch return; + } + + pub fn renderToWriter(self: *Diagnostics, args: []const []const u8, writer: anytype, config: std.io.tty.Config) !void { + for (self.errors.items) |err_details| { + try renderErrorMessage(writer, config, err_details, args); + } + } + + pub fn hasError(self: *const Diagnostics) bool { + for (self.errors.items) |err| { + if (err.type == .err) return true; + } + return false; + } +}; + +pub const Options = struct { + allocator: Allocator, + input_filename: []const u8 = &[_]u8{}, + output_filename: []const u8 = &[_]u8{}, + extra_include_paths: std.ArrayListUnmanaged([]const u8) = .{}, + ignore_include_env_var: bool = false, + preprocess: Preprocess = .yes, + default_language_id: ?u16 = null, + default_code_page: ?CodePage = null, + verbose: bool = false, + symbols: std.StringArrayHashMapUnmanaged(SymbolValue) = .{}, + null_terminate_string_table_strings: bool = false, + max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, + silent_duplicate_control_ids: bool = false, + warn_instead_of_error_on_invalid_code_page: bool = false, + debug: bool = false, + print_help_and_exit: bool = false, + auto_includes: AutoIncludes = .any, + depfile_path: ?[]const u8 = null, + depfile_fmt: DepfileFormat = .json, + mingw_includes_dir: ?[]const u8 = null, + + pub const AutoIncludes = enum { any, msvc, gnu, none }; + pub const DepfileFormat = enum { json }; + pub const Preprocess = enum { no, yes, only }; + pub const SymbolAction = enum { define, undefine }; + pub const SymbolValue = union(SymbolAction) { + define: []const u8, + undefine: void, + + pub fn deinit(self: SymbolValue, allocator: Allocator) void { + switch (self) { + .define => |value| allocator.free(value), + .undefine => {}, + } + } + }; + + /// Does not check that identifier contains only valid characters + pub fn define(self: *Options, identifier: []const u8, value: []const u8) !void { + if (self.symbols.getPtr(identifier)) |val_ptr| { + // If the symbol is undefined, then that always takes precedence so + // we shouldn't change anything. + if (val_ptr.* == .undefine) return; + // Otherwise, the new value takes precedence. + const duped_value = try self.allocator.dupe(u8, value); + errdefer self.allocator.free(duped_value); + val_ptr.deinit(self.allocator); + val_ptr.* = .{ .define = duped_value }; + return; + } + const duped_key = try self.allocator.dupe(u8, identifier); + errdefer self.allocator.free(duped_key); + const duped_value = try self.allocator.dupe(u8, value); + errdefer self.allocator.free(duped_value); + try self.symbols.put(self.allocator, duped_key, .{ .define = duped_value }); + } + + /// Does not check that identifier contains only valid characters + pub fn undefine(self: *Options, identifier: []const u8) !void { + if (self.symbols.getPtr(identifier)) |action| { + action.deinit(self.allocator); + action.* = .{ .undefine = {} }; + return; + } + const duped_key = try self.allocator.dupe(u8, identifier); + errdefer self.allocator.free(duped_key); + try self.symbols.put(self.allocator, duped_key, .{ .undefine = {} }); + } + + /// If the current input filename both: + /// - does not have an extension, and + /// - does not exist in the cwd + /// then this function will append `.rc` to the input filename + /// + /// Note: This behavior is different from the Win32 compiler. + /// It always appends .RC if the filename does not have + /// a `.` in it and it does not even try the verbatim name + /// in that scenario. + /// + /// The approach taken here is meant to give us a 'best of both + /// worlds' situation where we'll be compatible with most use-cases + /// of the .rc extension being omitted from the CLI args, but still + /// work fine if the file itself does not have an extension. + pub fn maybeAppendRC(options: *Options, cwd: std.fs.Dir) !void { + if (std.fs.path.extension(options.input_filename).len == 0) { + cwd.access(options.input_filename, .{}) catch |err| switch (err) { + error.FileNotFound => { + var filename_bytes = try options.allocator.alloc(u8, options.input_filename.len + 3); + @memcpy(filename_bytes[0..options.input_filename.len], options.input_filename); + @memcpy(filename_bytes[filename_bytes.len - 3 ..], ".rc"); + options.allocator.free(options.input_filename); + options.input_filename = filename_bytes; + }, + else => {}, + }; + } + } + + pub fn deinit(self: *Options) void { + for (self.extra_include_paths.items) |extra_include_path| { + self.allocator.free(extra_include_path); + } + self.extra_include_paths.deinit(self.allocator); + self.allocator.free(self.input_filename); + self.allocator.free(self.output_filename); + var symbol_it = self.symbols.iterator(); + while (symbol_it.next()) |entry| { + self.allocator.free(entry.key_ptr.*); + entry.value_ptr.deinit(self.allocator); + } + self.symbols.deinit(self.allocator); + if (self.depfile_path) |depfile_path| { + self.allocator.free(depfile_path); + } + if (self.mingw_includes_dir) |mingw_includes_dir| { + self.allocator.free(mingw_includes_dir); + } + } + + pub fn dumpVerbose(self: *const Options, writer: anytype) !void { + try writer.print("Input filename: {s}\n", .{self.input_filename}); + try writer.print("Output filename: {s}\n", .{self.output_filename}); + if (self.extra_include_paths.items.len > 0) { + try writer.writeAll(" Extra include paths:\n"); + for (self.extra_include_paths.items) |extra_include_path| { + try writer.print(" \"{s}\"\n", .{extra_include_path}); + } + } + if (self.ignore_include_env_var) { + try writer.writeAll(" The INCLUDE environment variable will be ignored\n"); + } + if (self.preprocess == .no) { + try writer.writeAll(" The preprocessor will not be invoked\n"); + } else if (self.preprocess == .only) { + try writer.writeAll(" Only the preprocessor will be invoked\n"); + } + if (self.symbols.count() > 0) { + try writer.writeAll(" Symbols:\n"); + var it = self.symbols.iterator(); + while (it.next()) |symbol| { + try writer.print(" {s} {s}", .{ switch (symbol.value_ptr.*) { + .define => "#define", + .undefine => "#undef", + }, symbol.key_ptr.* }); + if (symbol.value_ptr.* == .define) { + try writer.print(" {s}", .{symbol.value_ptr.define}); + } + try writer.writeAll("\n"); + } + } + if (self.null_terminate_string_table_strings) { + try writer.writeAll(" Strings in string tables will be null-terminated\n"); + } + if (self.max_string_literal_codepoints != lex.default_max_string_literal_codepoints) { + try writer.print(" Max string literal length: {}\n", .{self.max_string_literal_codepoints}); + } + if (self.silent_duplicate_control_ids) { + try writer.writeAll(" Duplicate control IDs will not emit warnings\n"); + } + if (self.silent_duplicate_control_ids) { + try writer.writeAll(" Invalid code page in .rc will produce a warning (instead of an error)\n"); + } + + const language_id = self.default_language_id orelse res.Language.default; + const language_name = language_name: { + if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| { + break :language_name @tagName(lang_enum_val); + } else |_| {} + if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) { + break :language_name "LOCALE_CUSTOM_UNSPECIFIED"; + } + break :language_name "<UNKNOWN>"; + }; + try writer.print("Default language: {s} (id=0x{x})\n", .{ language_name, language_id }); + + const code_page = self.default_code_page orelse .windows1252; + try writer.print("Default codepage: {s} (id={})\n", .{ @tagName(code_page), @intFromEnum(code_page) }); + } +}; + +pub const Arg = struct { + prefix: enum { long, short, slash }, + name_offset: usize, + full: []const u8, + + pub fn fromString(str: []const u8) ?@This() { + if (std.mem.startsWith(u8, str, "--")) { + return .{ .prefix = .long, .name_offset = 2, .full = str }; + } else if (std.mem.startsWith(u8, str, "-")) { + return .{ .prefix = .short, .name_offset = 1, .full = str }; + } else if (std.mem.startsWith(u8, str, "/")) { + return .{ .prefix = .slash, .name_offset = 1, .full = str }; + } + return null; + } + + pub fn prefixSlice(self: Arg) []const u8 { + return self.full[0..(if (self.prefix == .long) 2 else 1)]; + } + + pub fn name(self: Arg) []const u8 { + return self.full[self.name_offset..]; + } + + pub fn optionWithoutPrefix(self: Arg, option_len: usize) []const u8 { + return self.name()[0..option_len]; + } + + pub fn missingSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { + return .{ + .point_at_next_arg = true, + .value_offset = 0, + .name_offset = self.name_offset, + .prefix_len = self.prefixSlice().len, + }; + } + + pub fn optionAndAfterSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { + return self.optionSpan(0); + } + + pub fn optionSpan(self: Arg, option_len: usize) Diagnostics.ErrorDetails.ArgSpan { + return .{ + .name_offset = self.name_offset, + .prefix_len = self.prefixSlice().len, + .name_len = option_len, + }; + } + + pub const Value = struct { + slice: []const u8, + index_increment: u2 = 1, + + pub fn argSpan(self: Value, arg: Arg) Diagnostics.ErrorDetails.ArgSpan { + const prefix_len = arg.prefixSlice().len; + switch (self.index_increment) { + 1 => return .{ + .value_offset = @intFromPtr(self.slice.ptr) - @intFromPtr(arg.full.ptr), + .prefix_len = prefix_len, + .name_offset = arg.name_offset, + }, + 2 => return .{ + .point_at_next_arg = true, + .prefix_len = prefix_len, + .name_offset = arg.name_offset, + }, + else => unreachable, + } + } + + pub fn index(self: Value, arg_index: usize) usize { + if (self.index_increment == 2) return arg_index + 1; + return arg_index; + } + }; + + pub fn value(self: Arg, option_len: usize, index: usize, args: []const []const u8) error{MissingValue}!Value { + const rest = self.full[self.name_offset + option_len ..]; + if (rest.len > 0) return .{ .slice = rest }; + if (index + 1 >= args.len) return error.MissingValue; + return .{ .slice = args[index + 1], .index_increment = 2 }; + } + + pub const Context = struct { + index: usize, + arg: Arg, + value: Value, + }; +}; + +pub const ParseError = error{ParseError} || Allocator.Error; + +/// Note: Does not run `Options.maybeAppendRC` automatically. If that behavior is desired, +/// it must be called separately. +pub fn parse(allocator: Allocator, args: []const []const u8, diagnostics: *Diagnostics) ParseError!Options { + var options = Options{ .allocator = allocator }; + errdefer options.deinit(); + + var output_filename: ?[]const u8 = null; + var output_filename_context: Arg.Context = undefined; + + var arg_i: usize = 0; + next_arg: while (arg_i < args.len) { + var arg = Arg.fromString(args[arg_i]) orelse break; + if (arg.name().len == 0) { + switch (arg.prefix) { + // -- on its own ends arg parsing + .long => { + arg_i += 1; + break; + }, + // - or / on its own is an error + else => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid option: {s}", .{arg.prefixSlice()}); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + }, + } + } + + while (arg.name().len > 0) { + const arg_name = arg.name(); + // Note: These cases should be in order from longest to shortest, since + // shorter options that are a substring of a longer one could make + // the longer option's branch unreachable. + if (std.ascii.startsWithIgnoreCase(arg_name, ":no-preprocess")) { + options.preprocess = .no; + arg.name_offset += ":no-preprocess".len; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":mingw-includes")) { + const value = arg.value(":mingw-includes".len, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":mingw-includes".len) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + if (options.mingw_includes_dir) |overwritten_path| { + allocator.free(overwritten_path); + options.mingw_includes_dir = null; + } + const path = try allocator.dupe(u8, value.slice); + errdefer allocator.free(path); + options.mingw_includes_dir = path; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":auto-includes")) { + const value = arg.value(":auto-includes".len, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":auto-includes".len) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + options.auto_includes = std.meta.stringToEnum(Options.AutoIncludes, value.slice) orelse blk: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid auto includes setting: {s} ", .{value.slice}); + try diagnostics.append(err_details); + break :blk options.auto_includes; + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":depfile-fmt")) { + const value = arg.value(":depfile-fmt".len, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":depfile-fmt".len) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + options.depfile_fmt = std.meta.stringToEnum(Options.DepfileFormat, value.slice) orelse blk: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid depfile format setting: {s} ", .{value.slice}); + try diagnostics.append(err_details); + break :blk options.depfile_fmt; + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":depfile")) { + const value = arg.value(":depfile".len, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":depfile".len) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + if (options.depfile_path) |overwritten_path| { + allocator.free(overwritten_path); + options.depfile_path = null; + } + const path = try allocator.dupe(u8, value.slice); + errdefer allocator.free(path); + options.depfile_path = path; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "nologo")) { + // No-op, we don't display any 'logo' to suppress + arg.name_offset += "nologo".len; + } else if (std.ascii.startsWithIgnoreCase(arg_name, ":debug")) { + options.debug = true; + arg.name_offset += ":debug".len; + } + // Unsupported LCX/LCE options that need a value (within the same arg only) + else if (std.ascii.startsWithIgnoreCase(arg_name, "tp:")) { + const rest = arg.full[arg.name_offset + 3 ..]; + if (rest.len == 0) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = .{ + .name_offset = arg.name_offset, + .prefix_len = arg.prefixSlice().len, + .value_offset = arg.name_offset + 3, + } }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value for {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); + try diagnostics.append(err_details); + } + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + } + // Unsupported LCX/LCE options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "tn")) { + const value = arg.value(2, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 2; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Unsupported MUI options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "fm") or + std.ascii.startsWithIgnoreCase(arg_name, "gn") or + std.ascii.startsWithIgnoreCase(arg_name, "g2")) + { + const value = arg.value(2, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 2; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Unsupported MUI options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "g1")) { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg.name_offset += 2; + } + // Unsupported LCX/LCE options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "tm") or + std.ascii.startsWithIgnoreCase(arg_name, "tc") or + std.ascii.startsWithIgnoreCase(arg_name, "tw") or + std.ascii.startsWithIgnoreCase(arg_name, "te") or + std.ascii.startsWithIgnoreCase(arg_name, "ti") or + std.ascii.startsWithIgnoreCase(arg_name, "ta")) + { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg.name_offset += 2; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "fo")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing output path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + output_filename_context = .{ .index = arg_i, .arg = arg, .value = value }; + output_filename = value.slice; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "sl")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const percent_str = value.slice; + const percent: u32 = parsePercent(percent_str) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid percent format '{s}'", .{percent_str}); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); + try diagnostics.append(note_details); + arg_i += value.index_increment; + continue :next_arg; + }; + if (percent == 0 or percent > 100) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("percent out of range: {} (parsed from '{s}')", .{ percent, percent_str }); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); + try diagnostics.append(note_details); + arg_i += value.index_increment; + continue :next_arg; + } + const percent_float = @as(f32, @floatFromInt(percent)) / 100; + options.max_string_literal_codepoints = @intFromFloat(percent_float * max_string_literal_length_100_percent); + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "ln")) { + const value = arg.value(2, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const tag = value.slice; + options.default_language_id = lang.tagToInt(tag) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid language tag: {s}", .{tag}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + if (options.default_language_id.? == lang.LOCALE_CUSTOM_UNSPECIFIED) { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("language tag '{s}' does not have an assigned ID so it will be resolved to LOCALE_CUSTOM_UNSPECIFIED (id=0x{x})", .{ tag, lang.LOCALE_CUSTOM_UNSPECIFIED }); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "l")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing language ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const num_str = value.slice; + options.default_language_id = lang.parseInt(num_str) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid language ID: {s}", .{num_str}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "h") or std.mem.startsWith(u8, arg_name, "?")) { + options.print_help_and_exit = true; + // If there's been an error to this point, then we still want to fail + if (diagnostics.hasError()) return error.ParseError; + return options; + } + // 1 char unsupported MUI options that need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "q") or + std.ascii.startsWithIgnoreCase(arg_name, "g")) + { + const value = arg.value(1, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 1; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // Undocumented (and unsupported) options that need a value + // /z has to do something with font substitution + // /s has something to do with HWB resources being inserted into the .res + else if (std.ascii.startsWithIgnoreCase(arg_name, "z") or + std.ascii.startsWithIgnoreCase(arg_name, "s")) + { + const value = arg.value(1, arg_i, args) catch no_value: { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + // dummy zero-length slice starting where the value would have been + const value_start = arg.name_offset + 1; + break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; + }; + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + } + // 1 char unsupported LCX/LCE options that do not need a value + else if (std.ascii.startsWithIgnoreCase(arg_name, "t")) { + var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "c")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing code page ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const num_str = value.slice; + const code_page_id = std.fmt.parseUnsigned(u16, num_str, 10) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid code page ID: {s}", .{num_str}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }; + options.default_code_page = CodePage.getByIdentifierEnsureSupported(code_page_id) catch |err| switch (err) { + error.InvalidCodePage => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid or unknown code page ID: {}", .{code_page_id}); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }, + error.UnsupportedCodePage => { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("unsupported code page: {s} (id={})", .{ + @tagName(CodePage.getByIdentifier(code_page_id) catch unreachable), + code_page_id, + }); + try diagnostics.append(err_details); + arg_i += value.index_increment; + continue :next_arg; + }, + }; + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "v")) { + options.verbose = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "x")) { + options.ignore_include_env_var = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "p")) { + options.preprocess = .only; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "i")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing include path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const path = value.slice; + const duped = try allocator.dupe(u8, path); + errdefer allocator.free(duped); + try options.extra_include_paths.append(options.allocator, duped); + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "r")) { + // From https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line- + // "Ignored. Provided for compatibility with existing makefiles." + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "n")) { + options.null_terminate_string_table_strings = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "y")) { + options.silent_duplicate_control_ids = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "w")) { + options.warn_instead_of_error_on_invalid_code_page = true; + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "a")) { + // Undocumented option with unknown function + // TODO: More investigation to figure out what it does (if anything) + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("option {s}{s} has no effect (it is undocumented and its function is unknown in the Win32 RC compiler)", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg.name_offset += 1; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "d")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing symbol to define after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + var tokenizer = std.mem.tokenize(u8, value.slice, "="); + // guaranteed to exist since an empty value.slice would invoke + // the 'missing symbol to define' branch above + const symbol = tokenizer.next().?; + const symbol_value = tokenizer.next() orelse "1"; + + if (isValidIdentifier(symbol)) { + try options.define(symbol, symbol_value); + } else { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be defined", .{symbol}); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else if (std.ascii.startsWithIgnoreCase(arg_name, "u")) { + const value = arg.value(1, arg_i, args) catch { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("missing symbol to undefine after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); + try diagnostics.append(err_details); + arg_i += 1; + break :next_arg; + }; + const symbol = value.slice; + if (isValidIdentifier(symbol)) { + try options.undefine(symbol); + } else { + var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be undefined", .{symbol}); + try diagnostics.append(err_details); + } + arg_i += value.index_increment; + continue :next_arg; + } else { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.print("invalid option: {s}{s}", .{ arg.prefixSlice(), arg.name() }); + try diagnostics.append(err_details); + arg_i += 1; + continue :next_arg; + } + } else { + // The while loop exited via its conditional, meaning we are done with + // the current arg and can move on the the next + arg_i += 1; + continue; + } + } + + const positionals = args[arg_i..]; + + if (positionals.len < 1) { + var err_details = Diagnostics.ErrorDetails{ .print_args = false, .arg_index = arg_i }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.writeAll("missing input filename"); + try diagnostics.append(err_details); + + const last_arg = args[args.len - 1]; + if (arg_i > 0 and last_arg.len > 0 and last_arg[0] == '/' and std.ascii.endsWithIgnoreCase(last_arg, ".rc")) { + var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = true, .arg_index = arg_i - 1 }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing"); + try diagnostics.append(note_details); + } + + // This is a fatal enough problem to justify an early return, since + // things after this rely on the value of the input filename. + return error.ParseError; + } + options.input_filename = try allocator.dupe(u8, positionals[0]); + + if (positionals.len > 1) { + if (output_filename != null) { + var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i + 1 }; + var msg_writer = err_details.msg.writer(allocator); + try msg_writer.writeAll("output filename already specified"); + try diagnostics.append(err_details); + var note_details = Diagnostics.ErrorDetails{ + .type = .note, + .arg_index = output_filename_context.value.index(output_filename_context.index), + .arg_span = output_filename_context.value.argSpan(output_filename_context.arg), + }; + var note_writer = note_details.msg.writer(allocator); + try note_writer.writeAll("output filename previously specified here"); + try diagnostics.append(note_details); + } else { + output_filename = positionals[1]; + } + } + if (output_filename == null) { + var buf = std.ArrayList(u8).init(allocator); + errdefer buf.deinit(); + + if (std.fs.path.dirname(options.input_filename)) |dirname| { + var end_pos = dirname.len; + // We want to ensure that we write a path separator at the end, so if the dirname + // doesn't end with a path sep then include the char after the dirname + // which must be a path sep. + if (!std.fs.path.isSep(dirname[dirname.len - 1])) end_pos += 1; + try buf.appendSlice(options.input_filename[0..end_pos]); + } + try buf.appendSlice(std.fs.path.stem(options.input_filename)); + if (options.preprocess == .only) { + try buf.appendSlice(".rcpp"); + } else { + try buf.appendSlice(".res"); + } + + options.output_filename = try buf.toOwnedSlice(); + } else { + options.output_filename = try allocator.dupe(u8, output_filename.?); + } + + if (diagnostics.hasError()) { + return error.ParseError; + } + + return options; +} + +/// Returns true if the str is a valid C identifier for use in a #define/#undef macro +pub fn isValidIdentifier(str: []const u8) bool { + for (str, 0..) |c, i| switch (c) { + '0'...'9' => if (i == 0) return false, + 'a'...'z', 'A'...'Z', '_' => {}, + else => return false, + }; + return true; +} + +/// This function is specific to how the Win32 RC command line interprets +/// max string literal length percent. +/// - Wraps on overflow of u32 +/// - Stops parsing on any invalid hexadecimal digits +/// - Errors if a digit is not the first char +/// - `-` (negative) prefix is allowed +pub fn parsePercent(str: []const u8) error{InvalidFormat}!u32 { + var result: u32 = 0; + const radix: u8 = 10; + var buf = str; + + const Prefix = enum { none, minus }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + else => {}, + } + + for (buf, 0..) |c, i| { + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + '0'...'9' => std.fmt.charToDigit(c, radix) catch break, + else => { + // First digit must be valid + if (i == 0) { + return error.InvalidFormat; + } + break; + }, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result = 0 -% result, + } + + return result; +} + +test parsePercent { + try std.testing.expectEqual(@as(u32, 16), try parsePercent("16")); + try std.testing.expectEqual(@as(u32, 0), try parsePercent("0x1A")); + try std.testing.expectEqual(@as(u32, 0x1), try parsePercent("1zzzz")); + try std.testing.expectEqual(@as(u32, 0xffffffff), try parsePercent("-1")); + try std.testing.expectEqual(@as(u32, 0xfffffff0), try parsePercent("-16")); + try std.testing.expectEqual(@as(u32, 1), try parsePercent("4294967297")); + try std.testing.expectError(error.InvalidFormat, parsePercent("--1")); + try std.testing.expectError(error.InvalidFormat, parsePercent("ha")); + try std.testing.expectError(error.InvalidFormat, parsePercent("¹")); + try std.testing.expectError(error.InvalidFormat, parsePercent("~1")); +} + +pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, err_details: Diagnostics.ErrorDetails, args: []const []const u8) !void { + try config.setColor(writer, .dim); + try writer.writeAll("<cli>"); + try config.setColor(writer, .reset); + try config.setColor(writer, .bold); + try writer.writeAll(": "); + switch (err_details.type) { + .err => { + try config.setColor(writer, .red); + try writer.writeAll("error: "); + }, + .warning => { + try config.setColor(writer, .yellow); + try writer.writeAll("warning: "); + }, + .note => { + try config.setColor(writer, .cyan); + try writer.writeAll("note: "); + }, + } + try config.setColor(writer, .reset); + try config.setColor(writer, .bold); + try writer.writeAll(err_details.msg.items); + try writer.writeByte('\n'); + try config.setColor(writer, .reset); + + if (!err_details.print_args) { + try writer.writeByte('\n'); + return; + } + + try config.setColor(writer, .dim); + const prefix = " ... "; + try writer.writeAll(prefix); + try config.setColor(writer, .reset); + + const arg_with_name = args[err_details.arg_index]; + const prefix_slice = arg_with_name[0..err_details.arg_span.prefix_len]; + const before_name_slice = arg_with_name[err_details.arg_span.prefix_len..err_details.arg_span.name_offset]; + var name_slice = arg_with_name[err_details.arg_span.name_offset..]; + if (err_details.arg_span.name_len > 0) name_slice.len = err_details.arg_span.name_len; + const after_name_slice = arg_with_name[err_details.arg_span.name_offset + name_slice.len ..]; + + try writer.writeAll(prefix_slice); + if (before_name_slice.len > 0) { + try config.setColor(writer, .dim); + try writer.writeAll(before_name_slice); + try config.setColor(writer, .reset); + } + try writer.writeAll(name_slice); + if (after_name_slice.len > 0) { + try config.setColor(writer, .dim); + try writer.writeAll(after_name_slice); + try config.setColor(writer, .reset); + } + + var next_arg_len: usize = 0; + if (err_details.arg_span.point_at_next_arg and err_details.arg_index + 1 < args.len) { + const next_arg = args[err_details.arg_index + 1]; + try writer.writeByte(' '); + try writer.writeAll(next_arg); + next_arg_len = next_arg.len; + } + + const last_shown_arg_index = if (err_details.arg_span.point_at_next_arg) err_details.arg_index + 1 else err_details.arg_index; + if (last_shown_arg_index + 1 < args.len) { + // special case for when pointing to a missing value within the same arg + // as the name + if (err_details.arg_span.value_offset >= arg_with_name.len) { + try writer.writeByte(' '); + } + try config.setColor(writer, .dim); + try writer.writeAll(" ..."); + try config.setColor(writer, .reset); + } + try writer.writeByte('\n'); + + try config.setColor(writer, .green); + try writer.writeByteNTimes(' ', prefix.len); + // Special case for when the option is *only* a prefix (e.g. invalid option: -) + if (err_details.arg_span.prefix_len == arg_with_name.len) { + try writer.writeByteNTimes('^', err_details.arg_span.prefix_len); + } else { + try writer.writeByteNTimes('~', err_details.arg_span.prefix_len); + try writer.writeByteNTimes(' ', err_details.arg_span.name_offset - err_details.arg_span.prefix_len); + if (!err_details.arg_span.point_at_next_arg and err_details.arg_span.value_offset == 0) { + try writer.writeByte('^'); + try writer.writeByteNTimes('~', name_slice.len - 1); + } else if (err_details.arg_span.value_offset > 0) { + try writer.writeByteNTimes('~', err_details.arg_span.value_offset - err_details.arg_span.name_offset); + try writer.writeByte('^'); + if (err_details.arg_span.value_offset < arg_with_name.len) { + try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.value_offset - 1); + } + } else if (err_details.arg_span.point_at_next_arg) { + try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.name_offset + 1); + try writer.writeByte('^'); + if (next_arg_len > 0) { + try writer.writeByteNTimes('~', next_arg_len - 1); + } + } + } + try writer.writeByte('\n'); + try config.setColor(writer, .reset); +} + +fn testParse(args: []const []const u8) !Options { + return (try testParseOutput(args, "")).?; +} + +fn testParseWarning(args: []const []const u8, expected_output: []const u8) !Options { + return (try testParseOutput(args, expected_output)).?; +} + +fn testParseError(args: []const []const u8, expected_output: []const u8) !void { + var maybe_options = try testParseOutput(args, expected_output); + if (maybe_options != null) { + std.debug.print("expected error, got options: {}\n", .{maybe_options.?}); + maybe_options.?.deinit(); + return error.TestExpectedError; + } +} + +fn testParseOutput(args: []const []const u8, expected_output: []const u8) !?Options { + var diagnostics = Diagnostics.init(std.testing.allocator); + defer diagnostics.deinit(); + + var output = std.ArrayList(u8).init(std.testing.allocator); + defer output.deinit(); + + var options = parse(std.testing.allocator, args, &diagnostics) catch |err| switch (err) { + error.ParseError => { + try diagnostics.renderToWriter(args, output.writer(), .no_color); + try std.testing.expectEqualStrings(expected_output, output.items); + return null; + }, + else => |e| return e, + }; + errdefer options.deinit(); + + try diagnostics.renderToWriter(args, output.writer(), .no_color); + try std.testing.expectEqualStrings(expected_output, output.items); + return options; +} + +test "parse errors: basic" { + try testParseError(&.{"/"}, + \\<cli>: error: invalid option: / + \\ ... / + \\ ^ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"/ln"}, + \\<cli>: error: missing language tag after /ln option + \\ ... /ln + \\ ~~~~^ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"-vln"}, + \\<cli>: error: missing language tag after -ln option + \\ ... -vln + \\ ~ ~~~^ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"/_not-an-option"}, + \\<cli>: error: invalid option: /_not-an-option + \\ ... /_not-an-option + \\ ~^~~~~~~~~~~~~~ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"-_not-an-option"}, + \\<cli>: error: invalid option: -_not-an-option + \\ ... -_not-an-option + \\ ~^~~~~~~~~~~~~~ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"--_not-an-option"}, + \\<cli>: error: invalid option: --_not-an-option + \\ ... --_not-an-option + \\ ~~^~~~~~~~~~~~~~ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"/v_not-an-option"}, + \\<cli>: error: invalid option: /_not-an-option + \\ ... /v_not-an-option + \\ ~ ^~~~~~~~~~~~~~ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"-v_not-an-option"}, + \\<cli>: error: invalid option: -_not-an-option + \\ ... -v_not-an-option + \\ ~ ^~~~~~~~~~~~~~ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"--v_not-an-option"}, + \\<cli>: error: invalid option: --_not-an-option + \\ ... --v_not-an-option + \\ ~~ ^~~~~~~~~~~~~~ + \\<cli>: error: missing input filename + \\ + \\ + ); + try testParseError(&.{"/some/absolute/path/parsed/as/an/option.rc"}, + \\<cli>: error: the /s option is unsupported + \\ ... /some/absolute/path/parsed/as/an/option.rc + \\ ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + \\<cli>: error: missing input filename + \\ + \\<cli>: note: if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing + \\ ... /some/absolute/path/parsed/as/an/option.rc + \\ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + \\ + ); +} + +test "parse errors: /ln" { + try testParseError(&.{ "/ln", "invalid", "foo.rc" }, + \\<cli>: error: invalid language tag: invalid + \\ ... /ln invalid ... + \\ ~~~~^~~~~~~ + \\ + ); + try testParseError(&.{ "/lninvalid", "foo.rc" }, + \\<cli>: error: invalid language tag: invalid + \\ ... /lninvalid ... + \\ ~~~^~~~~~~ + \\ + ); +} + +test "parse: options" { + { + var options = try testParse(&.{ "/v", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "/vx", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "/xv", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("foo.res", options.output_filename); + } + { + var options = try testParse(&.{ "/xvFObar.res", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(true, options.verbose); + try std.testing.expectEqual(true, options.ignore_include_env_var); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); + try std.testing.expectEqualStrings("bar.res", options.output_filename); + } +} + +test "parse: define and undefine" { + { + var options = try testParse(&.{ "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqualStrings("1", action.define); + } + { + var options = try testParse(&.{ "/dfoo=bar", "/dfoo=baz", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqualStrings("baz", action.define); + } + { + var options = try testParse(&.{ "/ufoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Once undefined, future defines are ignored + var options = try testParse(&.{ "/ufoo", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Undefined always takes precedence + var options = try testParse(&.{ "/dfoo", "/ufoo", "/dfoo", "foo.rc" }); + defer options.deinit(); + + const action = options.symbols.get("foo").?; + try std.testing.expectEqual(Options.SymbolAction.undefine, action); + } + { + // Warn + ignore invalid identifiers + var options = try testParseWarning( + &.{ "/dfoo bar", "/u", "0leadingdigit", "foo.rc" }, + \\<cli>: warning: symbol "foo bar" is not a valid identifier and therefore cannot be defined + \\ ... /dfoo bar ... + \\ ~~^~~~~~~ + \\<cli>: warning: symbol "0leadingdigit" is not a valid identifier and therefore cannot be undefined + \\ ... /u 0leadingdigit ... + \\ ~~~^~~~~~~~~~~~~ + \\ + , + ); + defer options.deinit(); + + try std.testing.expectEqual(@as(usize, 0), options.symbols.count()); + } +} + +test "parse: /sl" { + try testParseError(&.{ "/sl", "0", "foo.rc" }, + \\<cli>: error: percent out of range: 0 (parsed from '0') + \\ ... /sl 0 ... + \\ ~~~~^ + \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive) + \\ + \\ + ); + try testParseError(&.{ "/sl", "abcd", "foo.rc" }, + \\<cli>: error: invalid percent format 'abcd' + \\ ... /sl abcd ... + \\ ~~~~^~~~ + \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive) + \\ + \\ + ); + { + var options = try testParse(&.{"foo.rc"}); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, lex.default_max_string_literal_codepoints), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "/sl100", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, max_string_literal_length_100_percent), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "-SL33", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, 2703), options.max_string_literal_codepoints); + } + { + var options = try testParse(&.{ "/sl15", "foo.rc" }); + defer options.deinit(); + + try std.testing.expectEqual(@as(u15, 1228), options.max_string_literal_codepoints); + } +} + +test "parse: unsupported MUI-related options" { + try testParseError(&.{ "/q", "blah", "/g1", "-G2", "blah", "/fm", "blah", "/g", "blah", "foo.rc" }, + \\<cli>: error: the /q option is unsupported + \\ ... /q ... + \\ ~^ + \\<cli>: error: the /g1 option is unsupported + \\ ... /g1 ... + \\ ~^~ + \\<cli>: error: the -G2 option is unsupported + \\ ... -G2 ... + \\ ~^~ + \\<cli>: error: the /fm option is unsupported + \\ ... /fm ... + \\ ~^~ + \\<cli>: error: the /g option is unsupported + \\ ... /g ... + \\ ~^ + \\ + ); +} + +test "parse: unsupported LCX/LCE-related options" { + try testParseError(&.{ "/t", "/tp:", "/tp:blah", "/tm", "/tc", "/tw", "-TEti", "/ta", "/tn", "blah", "foo.rc" }, + \\<cli>: error: the /t option is unsupported + \\ ... /t ... + \\ ~^ + \\<cli>: error: missing value for /tp: option + \\ ... /tp: ... + \\ ~~~~^ + \\<cli>: error: the /tp: option is unsupported + \\ ... /tp: ... + \\ ~^~~ + \\<cli>: error: the /tp: option is unsupported + \\ ... /tp:blah ... + \\ ~^~~~~~~ + \\<cli>: error: the /tm option is unsupported + \\ ... /tm ... + \\ ~^~ + \\<cli>: error: the /tc option is unsupported + \\ ... /tc ... + \\ ~^~ + \\<cli>: error: the /tw option is unsupported + \\ ... /tw ... + \\ ~^~ + \\<cli>: error: the -TE option is unsupported + \\ ... -TEti ... + \\ ~^~ + \\<cli>: error: the -ti option is unsupported + \\ ... -TEti ... + \\ ~ ^~ + \\<cli>: error: the /ta option is unsupported + \\ ... /ta ... + \\ ~^~ + \\<cli>: error: the /tn option is unsupported + \\ ... /tn ... + \\ ~^~ + \\ + ); +} + +test "maybeAppendRC" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + var options = try testParse(&.{"foo"}); + defer options.deinit(); + try std.testing.expectEqualStrings("foo", options.input_filename); + + // Create the file so that it's found. In this scenario, .rc should not get + // appended. + var file = try tmp.dir.createFile("foo", .{}); + file.close(); + try options.maybeAppendRC(tmp.dir); + try std.testing.expectEqualStrings("foo", options.input_filename); + + // Now delete the file and try again. Since the verbatim name is no longer found + // and the input filename does not have an extension, .rc should get appended. + try tmp.dir.deleteFile("foo"); + try options.maybeAppendRC(tmp.dir); + try std.testing.expectEqualStrings("foo.rc", options.input_filename); +} diff --git a/lib/compiler/resinator/code_pages.zig b/lib/compiler/resinator/code_pages.zig @@ -0,0 +1,500 @@ +const std = @import("std"); +const windows1252 = @import("windows1252.zig"); + +// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing +// than it is to the stuff in this file. +// +// ‰ representations for context: +// Win-1252 89 +// UTF-8 E2 80 B0 +// UTF-16 20 30 +// +// With code page 65001: +// ‰ RCDATA { "‰" L"‰" } +// File encoded as Windows-1252: +// ‰ => <U+FFFD REPLACEMENT CHARACTER> as u16 +// "‰" => 0x3F ('?') +// L"‰" => <U+FFFD REPLACEMENT CHARACTER> as u16 +// File encoded as UTF-8: +// ‰ => <U+2030 ‰> as u16 +// "‰" => 0x89 ('‰' encoded as Windows-1252) +// L"‰" => <U+2030 ‰> as u16 +// +// With code page 1252: +// ‰ RCDATA { "‰" L"‰" } +// File encoded as Windows-1252: +// ‰ => <U+2030 ‰> as u16 +// "‰" => 0x89 ('‰' encoded as Windows-1252) +// L"‰" => <U+2030 ‰> as u16 +// File encoded as UTF-8: +// ‰ => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 +// ^ first byte of utf8 representation +// ^ second byte of UTF-8 representation (0x80), but interpretted as +// Windows-1252 ('€') and then converted to UTF-16 (<U+20AC>) +// ^ third byte of utf8 representation +// "‰" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation) +// L"‰" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see '‰ =>' explanation) +// +// With code page 1252: +// <0x90> RCDATA { "<0x90>" L"<0x90>" } +// File encoded as Windows-1252: +// <0x90> => 0x90 as u16 +// "<0x90>" => 0x90 +// L"<0x90>" => 0x90 as u16 +// File encoded as UTF-8: +// <0x90> => 0xC2 as u16, 0x90 as u16 +// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of <U+0090>) +// L"<0x90>" => 0xC2 as u16, 0x90 as u16 +// +// Within a raw data block, file encoded as Windows-1252 ( is <0xC2>): +// "Âa" L"Âa" "\xC2ad" L"\xC2AD" +// With code page 1252: +// C2 61 C2 00 61 00 C2 61 64 AD C2 +// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD +// \xC2~` +// With code page 65001: +// 3F 61 FD FF 61 00 C2 61 64 AD C2 +// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD +// `. `. `~\xC2 +// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it). +// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the +// `. invalid sequence so only the <0xC2> gets converted to <U+FFFD>. +// `~Same as ^ but converted to '?' instead. +// +// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, € is <0x80>): +// "ð€a" L"ð€a" +// With code page 1252: +// F0 80 61 F0 00 AC 20 61 00 +// ð^ €^ a^ ð~~~^ €~~~^ a~~~^ +// With code page 65001: +// 3F 61 FD FF 61 00 +// ^. a^ ^~~~. a~~~^ +// `. `. +// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so +// `. both bytes are considered an invalid sequence and get converted to '<U+FFFD>' +// `~Same as ^ but converted to '?' instead. + +/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers +pub const CodePage = enum(u16) { + // supported + windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows) + utf8 = 65001, // utf-8 Unicode (UTF-8) + + // unsupported but valid + ibm037 = 37, // IBM037 IBM EBCDIC US-Canada + ibm437 = 437, // IBM437 OEM United States + ibm500 = 500, // IBM500 IBM EBCDIC International + asmo708 = 708, // ASMO-708 Arabic (ASMO 708) + asmo449plus = 709, // Arabic (ASMO-449+, BCON V4) + transparent_arabic = 710, // Arabic - Transparent Arabic + dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS) + ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS) + ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS) + ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS) + ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS) + ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian) + ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS) + ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol + ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS) + ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS) + dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS) + ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS) + ibm864 = 864, // IBM864 OEM Arabic; Arabic (864) + ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS) + cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS) + ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS) + ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 + windows874 = 874, // windows-874 Thai (Windows) + cp875 = 875, // cp875 IBM EBCDIC Greek Modern + shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS) + gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) + ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code) + big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) + ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5) + ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System + ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) + ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) + ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) + ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) + ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) + ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) + ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) + ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) + ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) + ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) + utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications + utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications + windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows) + windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows) + windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows) + windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows) + windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows) + windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows) + windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows) + windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows) + johab = 1361, // Johab Korean (Johab) + macintosh = 10000, // macintosh MAC Roman; Western European (Mac) + x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac) + x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac) + x_mac_korean = 10003, // x-mac-korean Korean (Mac) + x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac) + x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac) + x_mac_greek = 10006, // x-mac-greek Greek (Mac) + x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac) + x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) + x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac) + x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac) + x_mac_thai = 10021, // x-mac-thai Thai (Mac) + x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac) + x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac) + x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac) + x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac) + utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications + utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications + x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS) + x_cp20001 = 20001, // x-cp20001 TCA Taiwan + x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten) + x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan + x_cp20004 = 20004, // x-cp20004 TeleText Taiwan + x_cp20005 = 20005, // x-cp20005 Wang Taiwan + x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) + x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit) + x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit) + x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit) + us_ascii = 20127, // us-ascii US-ASCII (7-bit) + x_cp20261 = 20261, // x-cp20261 T.61 + x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent + ibm273 = 20273, // IBM273 IBM EBCDIC Germany + ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway + ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden + ibm280 = 20280, // IBM280 IBM EBCDIC Italy + ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain + ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom + ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended + ibm297 = 20297, // IBM297 IBM EBCDIC France + ibm420 = 20420, // IBM420 IBM EBCDIC Arabic + ibm423 = 20423, // IBM423 IBM EBCDIC Greek + ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew + x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended + ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai + koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R) + ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic + ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian + ibm905 = 20905, // IBM905 IBM EBCDIC Turkish + ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) + euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990) + x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) + x_cp20949 = 20949, // x-cp20949 Korean Wansung + cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian + // = 21027, // (deprecated) + koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U) + iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO) + iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO) + iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3 + iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic + iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic + iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic + iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek + iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual) + iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish + iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian + iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9 + x_europa = 29001, // x-Europa Europa 3 + is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical) + iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) + cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) + iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) + iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean + x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) + iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese + ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended + ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese + ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean + ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese + ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese + ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese + ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese + euc_jp = 51932, // euc-jp EUC Japanese + euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC) + euc_kr = 51949, // euc-kr EUC Korean + euc_chinesetrad = 51950, // EUC Traditional Chinese + hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) + gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) + x_iscii_de = 57002, // x-iscii-de ISCII Devanagari + x_iscii_be = 57003, // x-iscii-be ISCII Bangla + x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil + x_iscii_te = 57005, // x-iscii-te ISCII Telugu + x_iscii_as = 57006, // x-iscii-as ISCII Assamese + x_iscii_or = 57007, // x-iscii-or ISCII Odia + x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada + x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam + x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati + x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi + utf7 = 65000, // utf-7 Unicode (UTF-7) + + pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint { + if (index >= bytes.len) return null; + switch (code_page) { + .windows1252 => { + // All byte values have a representation, so just convert the byte + return Codepoint{ + .value = windows1252.toCodepoint(bytes[index]), + .byte_len = 1, + }; + }, + .utf8 => { + return Utf8.WellFormedDecoder.decode(bytes[index..]); + }, + else => unreachable, + } + } + + pub fn isSupported(code_page: CodePage) bool { + return switch (code_page) { + .windows1252, .utf8 => true, + else => false, + }; + } + + pub fn getByIdentifier(identifier: u16) !CodePage { + // There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but + // this should be fine, especially since this function likely won't be called much. + inline for (@typeInfo(CodePage).Enum.fields) |enumField| { + if (identifier == enumField.value) { + return @field(CodePage, enumField.name); + } + } + return error.InvalidCodePage; + } + + pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage { + const code_page = try getByIdentifier(identifier); + switch (isSupported(code_page)) { + true => return code_page, + false => return error.UnsupportedCodePage, + } + } +}; + +pub const Utf8 = struct { + /// Implements decoding with rejection of ill-formed UTF-8 sequences based on section + /// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically). + /// + /// Note: This does not match "U+FFFD Substitution of Maximal Subparts", but instead + /// matches the behavior of the Windows RC compiler. + pub const WellFormedDecoder = struct { + /// Like std.unicode.utf8ByteSequenceLength, but: + /// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF + /// - Returns an optional value instead of an error union + pub fn sequenceLength(first_byte: u8) ?u3 { + return switch (first_byte) { + 0x00...0x7F => 1, + 0xC2...0xDF => 2, + 0xE0...0xEF => 3, + 0xF0...0xF4 => 4, + else => null, + }; + } + + fn isContinuationByte(byte: u8) bool { + return switch (byte) { + 0x80...0xBF => true, + else => false, + }; + } + + pub fn decode(bytes: []const u8) Codepoint { + std.debug.assert(bytes.len > 0); + const first_byte = bytes[0]; + const expected_len = sequenceLength(first_byte) orelse { + return .{ .value = Codepoint.invalid, .byte_len = 1 }; + }; + if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 }; + + var value: u21 = first_byte & 0b00011111; + var byte_index: u8 = 1; + while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) { + const byte = bytes[byte_index]; + // See Table 3-7 of D92 in Chapter 3 of the Unicode Standard + const valid: bool = switch (byte_index) { + 1 => switch (first_byte) { + 0xE0 => switch (byte) { + 0xA0...0xBF => true, + else => false, + }, + 0xED => switch (byte) { + 0x80...0x9F => true, + else => false, + }, + 0xF0 => switch (byte) { + 0x90...0xBF => true, + else => false, + }, + 0xF4 => switch (byte) { + 0x80...0x8F => true, + else => false, + }, + else => switch (byte) { + 0x80...0xBF => true, + else => false, + }, + }, + else => switch (byte) { + 0x80...0xBF => true, + else => false, + }, + }; + + if (!valid) { + var len = byte_index; + // Only include the byte in the invalid sequence if it's in the range + // of a continuation byte. All other values should not be included in the + // invalid sequence. + if (isContinuationByte(byte)) len += 1; + return .{ .value = Codepoint.invalid, .byte_len = len }; + } + + value <<= 6; + value |= byte & 0b00111111; + } + if (byte_index != expected_len) { + return .{ .value = Codepoint.invalid, .byte_len = byte_index }; + } + return .{ .value = value, .byte_len = expected_len }; + } + }; +}; + +test "Utf8.WellFormedDecoder" { + const invalid_utf8 = "\xF0\x80"; + const decoded = Utf8.WellFormedDecoder.decode(invalid_utf8); + try std.testing.expectEqual(Codepoint.invalid, decoded.value); + try std.testing.expectEqual(@as(usize, 2), decoded.byte_len); +} + +test "codepointAt invalid utf8" { + { + const invalid_utf8 = "\xf0\xf0\x80\x80\x80"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(1, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(3, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(4, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8)); + } + + { + const invalid_utf8 = "\xE1\xA0\xC0"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(2, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8)); + } + + { + const invalid_utf8 = "\xD2"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8)); + } + + { + const invalid_utf8 = "\xE1\xA0"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); + } + + { + const invalid_utf8 = "\xC5\xFF"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(1, invalid_utf8).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); + } + + { + // encoded high surrogate + const invalid_utf8 = "\xED\xA0\xBD"; + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, invalid_utf8).?); + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(2, invalid_utf8).?); + } +} + +test "codepointAt utf8 encoded" { + const utf8_encoded = "²"; + + // with code page utf8 + try std.testing.expectEqual(Codepoint{ + .value = '²', + .byte_len = 2, + }, CodePage.utf8.codepointAt(0, utf8_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded)); + + // with code page windows1252 + try std.testing.expectEqual(Codepoint{ + .value = '\xC2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(0, utf8_encoded).?); + try std.testing.expectEqual(Codepoint{ + .value = '\xB2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(1, utf8_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded)); +} + +test "codepointAt windows1252 encoded" { + const windows1252_encoded = "\xB2"; + + // with code page utf8 + try std.testing.expectEqual(Codepoint{ + .value = Codepoint.invalid, + .byte_len = 1, + }, CodePage.utf8.codepointAt(0, windows1252_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded)); + + // with code page windows1252 + try std.testing.expectEqual(Codepoint{ + .value = '\xB2', + .byte_len = 1, + }, CodePage.windows1252.codepointAt(0, windows1252_encoded).?); + try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded)); +} + +pub const Codepoint = struct { + value: u21, + byte_len: usize, + + pub const invalid: u21 = std.math.maxInt(u21); +}; diff --git a/lib/compiler/resinator/comments.zig b/lib/compiler/resinator/comments.zig @@ -0,0 +1,358 @@ +//! Expects to run after a C preprocessor step that preserves comments. +//! +//! `rc` has a peculiar quirk where something like `blah/**/blah` will be +//! transformed into `blahblah` during parsing. However, `clang -E` will +//! transform it into `blah blah`, so in order to match `rc`, we need +//! to remove comments ourselves after the preprocessor runs. +//! Note: Multiline comments that actually span more than one line do +//! get translated to a space character by `rc`. +//! +//! Removing comments before lexing also allows the lexer to not have to +//! deal with comments which would complicate its implementation (this is something +//! of a tradeoff, as removing comments in a separate pass means that we'll +//! need to iterate the source twice instead of once, but having to deal with +//! comments when lexing would be a pain). + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const LineHandler = @import("lex.zig").LineHandler; +const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair; + +/// `buf` must be at least as long as `source` +/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) +pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) ![]u8 { + std.debug.assert(buf.len >= source.len); + var result = UncheckedSliceWriter{ .slice = buf }; + const State = enum { + start, + forward_slash, + line_comment, + multiline_comment, + multiline_comment_end, + single_quoted, + single_quoted_escape, + double_quoted, + double_quoted_escape, + }; + var state: State = .start; + var index: usize = 0; + var pending_start: ?usize = null; + var line_handler = LineHandler{ .buffer = source }; + while (index < source.len) : (index += 1) { + const c = source[index]; + // TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely + // cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed + // in the lexer, but comments are stripped before getting to the lexer. + switch (state) { + .start => switch (c) { + '/' => { + state = .forward_slash; + pending_start = index; + }, + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + }, + else => { + switch (c) { + '"' => state = .double_quoted, + '\'' => state = .single_quoted, + else => {}, + } + result.write(c); + }, + }, + .forward_slash => switch (c) { + '/' => state = .line_comment, + '*' => { + state = .multiline_comment; + }, + else => { + _ = line_handler.maybeIncrementLineNumber(index); + result.writeSlice(source[pending_start.? .. index + 1]); + pending_start = null; + state = .start; + }, + }, + .line_comment => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + state = .start; + }, + else => {}, + }, + .multiline_comment => switch (c) { + '\r' => try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings), + '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + }, + '*' => state = .multiline_comment_end, + else => {}, + }, + .multiline_comment_end => switch (c) { + '\r' => { + try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings); + // We only want to treat this as a newline if it's part of a CRLF pair. If it's + // not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still + // functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works. + if (formsLineEndingPair(source, '\r', index + 1)) { + state = .multiline_comment; + } + }, + '\n' => { + _ = line_handler.incrementLineNumber(index); + result.write(c); + state = .multiline_comment; + }, + '/' => { + state = .start; + }, + else => { + state = .multiline_comment; + }, + }, + .single_quoted => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + '\\' => { + state = .single_quoted_escape; + result.write(c); + }, + '\'' => { + state = .start; + result.write(c); + }, + else => { + result.write(c); + }, + }, + .single_quoted_escape => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + else => { + state = .single_quoted; + result.write(c); + }, + }, + .double_quoted => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + '\\' => { + state = .double_quoted_escape; + result.write(c); + }, + '"' => { + state = .start; + result.write(c); + }, + else => { + result.write(c); + }, + }, + .double_quoted_escape => switch (c) { + '\r', '\n' => { + _ = line_handler.incrementLineNumber(index); + state = .start; + result.write(c); + }, + else => { + state = .double_quoted; + result.write(c); + }, + }, + } + } + return result.getWritten(); +} + +inline fn handleMultilineCarriageReturn( + source: []const u8, + line_handler: *LineHandler, + index: usize, + result: *UncheckedSliceWriter, + source_mappings: ?*SourceMappings, +) !void { + // This is a dumb way to go about this, but basically we want to determine + // if this is part of a distinct CRLF or LFCR pair. This function call will detect + // LFCR pairs correctly since the function we're in will only be called on CR, + // but will not detect CRLF pairs since it only looks at the line ending before the + // CR. So, we do a second (forward) check if the first fails to detect CRLF that is + // not part of another pair. + const is_lfcr_pair = line_handler.currentIndexFormsLineEndingPair(index); + const is_crlf_pair = !is_lfcr_pair and formsLineEndingPair(source, '\r', index + 1); + // Note: Bare \r within a multiline comment should *not* be treated as a line ending for the + // purposes of removing comments, but *should* be treated as a line ending for the + // purposes of line counting/source mapping + _ = line_handler.incrementLineNumber(index); + // So only write the \r if it's part of a CRLF/LFCR pair + if (is_lfcr_pair or is_crlf_pair) { + result.write('\r'); + } + // And otherwise, we want to collapse the source mapping so that we can still know which + // line came from where. + else { + // Because the line gets collapsed, we need to decrement line number so that + // the next collapse acts on the first of the collapsed line numbers + line_handler.line_number -= 1; + if (source_mappings) |mappings| { + try mappings.collapse(line_handler.line_number, 1); + } + } +} + +pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 { + const buf = try allocator.alloc(u8, source.len); + errdefer allocator.free(buf); + const result = try removeComments(source, buf, source_mappings); + return allocator.realloc(buf, result.len); +} + +fn testRemoveComments(expected: []const u8, source: []const u8) !void { + const result = try removeCommentsAlloc(std.testing.allocator, source, null); + defer std.testing.allocator.free(result); + + try std.testing.expectEqualStrings(expected, result); +} + +test "basic" { + try testRemoveComments("", "// comment"); + try testRemoveComments("", "/* comment */"); +} + +test "mixed" { + try testRemoveComments("hello", "hello// comment"); + try testRemoveComments("hello", "hel/* comment */lo"); +} + +test "within a string" { + // escaped " is \" + try testRemoveComments( + \\blah"//som\"/*ething*/"BLAH + , + \\blah"//som\"/*ething*/"BLAH + ); +} + +test "line comments retain newlines" { + try testRemoveComments( + \\ + \\ + \\ + , + \\// comment + \\// comment + \\// comment + ); + + try testRemoveComments("\r\n", "//comment\r\n"); +} + +test "unfinished multiline comment" { + try testRemoveComments( + \\unfinished + \\ + , + \\unfinished/* + \\ + ); +} + +test "crazy" { + try testRemoveComments( + \\blah"/*som*/\""BLAH + , + \\blah"/*som*/\""/*ething*/BLAH + ); + + try testRemoveComments( + \\blah"/*som*/"BLAH RCDATA "BEGIN END + \\ + \\ + \\hello + \\" + , + \\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END + \\// comment + \\//"blah blah" RCDATA {} + \\hello + \\" + ); +} + +test "multiline comment with newlines" { + // bare \r is not treated as a newline + try testRemoveComments("blahblah", "blah/*some\rthing*/blah"); + + try testRemoveComments( + \\blah + \\blah + , + \\blah/*some + \\thing*/blah + ); + try testRemoveComments( + "blah\r\nblah", + "blah/*some\r\nthing*/blah", + ); + + // handle *<not /> correctly + try testRemoveComments( + \\blah + \\ + \\ + , + \\blah/*some + \\thing* + \\/bl*ah*/ + ); +} + +test "comments appended to a line" { + try testRemoveComments( + \\blah + \\blah + , + \\blah // line comment + \\blah + ); + try testRemoveComments( + "blah \r\nblah", + "blah // line comment\r\nblah", + ); +} + +test "remove comments with mappings" { + const allocator = std.testing.allocator; + var mut_source = "blah/*\rcommented line*\r/blah".*; + var mappings = SourceMappings{}; + _ = try mappings.files.put(allocator, "test.rc"); + try mappings.set(1, 1, 0); + try mappings.set(2, 2, 0); + try mappings.set(3, 3, 0); + defer mappings.deinit(allocator); + + const result = try removeComments(&mut_source, &mut_source, &mappings); + + try std.testing.expectEqualStrings("blahblah", result); + try std.testing.expectEqual(@as(usize, 1), mappings.end_line); + try std.testing.expectEqual(@as(usize, 3), mappings.getCorrespondingSpan(1).?.end_line); +} + +test "in place" { + var mut_source = "blah /* comment */ blah".*; + const result = try removeComments(&mut_source, &mut_source, null); + try std.testing.expectEqualStrings("blah blah", result); +} diff --git a/lib/compiler/resinator/compile.zig b/lib/compiler/resinator/compile.zig @@ -0,0 +1,3427 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; +const Node = @import("ast.zig").Node; +const lex = @import("lex.zig"); +const Parser = @import("parse.zig").Parser; +const Resource = @import("rc.zig").Resource; +const Token = @import("lex.zig").Token; +const literals = @import("literals.zig"); +const Number = literals.Number; +const SourceBytes = literals.SourceBytes; +const Diagnostics = @import("errors.zig").Diagnostics; +const ErrorDetails = @import("errors.zig").ErrorDetails; +const MemoryFlags = @import("res.zig").MemoryFlags; +const rc = @import("rc.zig"); +const res = @import("res.zig"); +const ico = @import("ico.zig"); +const ani = @import("ani.zig"); +const bmp = @import("bmp.zig"); +const WORD = std.os.windows.WORD; +const DWORD = std.os.windows.DWORD; +const utils = @import("utils.zig"); +const NameOrOrdinal = res.NameOrOrdinal; +const CodePage = @import("code_pages.zig").CodePage; +const CodePageLookup = @import("ast.zig").CodePageLookup; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const windows1252 = @import("windows1252.zig"); +const lang = @import("lang.zig"); +const code_pages = @import("code_pages.zig"); +const errors = @import("errors.zig"); +const native_endian = builtin.cpu.arch.endian(); + +pub const CompileOptions = struct { + cwd: std.fs.Dir, + diagnostics: *Diagnostics, + source_mappings: ?*SourceMappings = null, + /// List of paths (absolute or relative to `cwd`) for every file that the resources within the .rc file depend on. + /// Items within the list will be allocated using the allocator of the ArrayList and must be + /// freed by the caller. + /// TODO: Maybe a dedicated struct for this purpose so that it's a bit nicer to work with. + dependencies_list: ?*std.ArrayList([]const u8) = null, + default_code_page: CodePage = .windows1252, + ignore_include_env_var: bool = false, + extra_include_paths: []const []const u8 = &.{}, + /// This is just an API convenience to allow separately passing 'system' (i.e. those + /// that would normally be gotten from the INCLUDE env var) include paths. This is mostly + /// intended for use when setting `ignore_include_env_var = true`. When `ignore_include_env_var` + /// is false, `system_include_paths` will be searched before the paths in the INCLUDE env var. + system_include_paths: []const []const u8 = &.{}, + default_language_id: ?u16 = null, + // TODO: Implement verbose output + verbose: bool = false, + null_terminate_string_table_strings: bool = false, + /// Note: This is a u15 to ensure that the maximum number of UTF-16 code units + /// plus a null-terminator can always fit into a u16. + max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, + silent_duplicate_control_ids: bool = false, + warn_instead_of_error_on_invalid_code_page: bool = false, +}; + +pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, options: CompileOptions) !void { + var lexer = lex.Lexer.init(source, .{ + .default_code_page = options.default_code_page, + .source_mappings = options.source_mappings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + }); + var parser = Parser.init(&lexer, .{ + .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, + }); + var tree = try parser.parse(allocator, options.diagnostics); + defer tree.deinit(); + + var search_dirs = std.ArrayList(SearchDir).init(allocator); + defer { + for (search_dirs.items) |*search_dir| { + search_dir.deinit(allocator); + } + search_dirs.deinit(); + } + + if (options.source_mappings) |source_mappings| { + const root_path = source_mappings.files.get(source_mappings.root_filename_offset); + // If dirname returns null, then the root path will be the same as + // the cwd so we don't need to add it as a distinct search path. + if (std.fs.path.dirname(root_path)) |root_dir_path| { + var root_dir = try options.cwd.openDir(root_dir_path, .{}); + errdefer root_dir.close(); + try search_dirs.append(.{ .dir = root_dir, .path = try allocator.dupe(u8, root_dir_path) }); + } + } + // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed) + const cwd_dir = options.cwd.openDir(".", .{}) catch |err| { + try options.diagnostics.append(.{ + .err = .failed_to_open_cwd, + .token = .{ + .id = .invalid, + .start = 0, + .end = 0, + .line_number = 1, + }, + .print_source_line = false, + .extra = .{ .file_open_error = .{ + .err = ErrorDetails.FileOpenError.enumFromError(err), + .filename_string_index = undefined, + } }, + }); + return error.CompileError; + }; + try search_dirs.append(.{ .dir = cwd_dir, .path = null }); + for (options.extra_include_paths) |extra_include_path| { + var dir = openSearchPathDir(options.cwd, extra_include_path) catch { + // TODO: maybe a warning that the search path is skipped? + continue; + }; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, extra_include_path) }); + } + for (options.system_include_paths) |system_include_path| { + var dir = openSearchPathDir(options.cwd, system_include_path) catch { + // TODO: maybe a warning that the search path is skipped? + continue; + }; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) }); + } + if (!options.ignore_include_env_var) { + const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch ""; + defer allocator.free(INCLUDE); + + // The only precedence here is llvm-rc which also uses the platform-specific + // delimiter. There's no precedence set by `rc.exe` since it's Windows-only. + const delimiter = switch (builtin.os.tag) { + .windows => ';', + else => ':', + }; + var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter); + while (it.next()) |search_path| { + var dir = openSearchPathDir(options.cwd, search_path) catch continue; + errdefer dir.close(); + try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, search_path) }); + } + } + + var arena_allocator = std.heap.ArenaAllocator.init(allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var compiler = Compiler{ + .source = source, + .arena = arena, + .allocator = allocator, + .cwd = options.cwd, + .diagnostics = options.diagnostics, + .dependencies_list = options.dependencies_list, + .input_code_pages = &tree.input_code_pages, + .output_code_pages = &tree.output_code_pages, + // This is only safe because we know search_dirs won't be modified past this point + .search_dirs = search_dirs.items, + .null_terminate_string_table_strings = options.null_terminate_string_table_strings, + .silent_duplicate_control_ids = options.silent_duplicate_control_ids, + }; + if (options.default_language_id) |default_language_id| { + compiler.state.language = res.Language.fromInt(default_language_id); + } + + try compiler.writeRoot(tree.root(), writer); +} + +pub const Compiler = struct { + source: []const u8, + arena: Allocator, + allocator: Allocator, + cwd: std.fs.Dir, + state: State = .{}, + diagnostics: *Diagnostics, + dependencies_list: ?*std.ArrayList([]const u8), + input_code_pages: *const CodePageLookup, + output_code_pages: *const CodePageLookup, + search_dirs: []SearchDir, + null_terminate_string_table_strings: bool, + silent_duplicate_control_ids: bool, + + pub const State = struct { + icon_id: u16 = 1, + string_tables: StringTablesByLanguage = .{}, + language: res.Language = .{}, + font_dir: FontDir = .{}, + version: u32 = 0, + characteristics: u32 = 0, + }; + + pub fn writeRoot(self: *Compiler, root: *Node.Root, writer: anytype) !void { + try writeEmptyResource(writer); + for (root.body) |node| { + try self.writeNode(node, writer); + } + + // now write the FONTDIR (if it has anything in it) + try self.state.font_dir.writeResData(self, writer); + if (self.state.font_dir.fonts.items.len != 0) { + // The Win32 RC compiler may write a different FONTDIR resource than us, + // due to it sometimes writing a non-zero-length device name/face name + // whereas we *always* write them both as zero-length. + // + // In practical terms, this doesn't matter, since for various reasons the format + // of the FONTDIR cannot be relied on and is seemingly not actually used by anything + // anymore. We still want to emit some sort of diagnostic for the purposes of being able + // to know that our .RES is intentionally not meant to be byte-for-byte identical with + // the rc.exe output. + // + // By using the hint type here, we allow this diagnostic to be detected in code, + // but it will not be printed since the end-user doesn't need to care. + try self.addErrorDetails(.{ + .err = .result_contains_fontdir, + .type = .hint, + .token = undefined, + }); + } + // once we've written every else out, we can write out the finalized STRINGTABLE resources + var string_tables_it = self.state.string_tables.tables.iterator(); + while (string_tables_it.next()) |string_table_entry| { + var string_table_it = string_table_entry.value_ptr.blocks.iterator(); + while (string_table_it.next()) |entry| { + try entry.value_ptr.writeResData(self, string_table_entry.key_ptr.*, entry.key_ptr.*, writer); + } + } + } + + pub fn writeNode(self: *Compiler, node: *Node, writer: anytype) !void { + switch (node.id) { + .root => unreachable, // writeRoot should be called directly instead + .resource_external => try self.writeResourceExternal(@fieldParentPtr(Node.ResourceExternal, "base", node), writer), + .resource_raw_data => try self.writeResourceRawData(@fieldParentPtr(Node.ResourceRawData, "base", node), writer), + .literal => unreachable, // this is context dependent and should be handled by its parent + .binary_expression => unreachable, + .grouped_expression => unreachable, + .not_expression => unreachable, + .invalid => {}, // no-op, currently only used for dangling literals at EOF + .accelerators => try self.writeAccelerators(@fieldParentPtr(Node.Accelerators, "base", node), writer), + .accelerator => unreachable, // handled by writeAccelerators + .dialog => try self.writeDialog(@fieldParentPtr(Node.Dialog, "base", node), writer), + .control_statement => unreachable, + .toolbar => try self.writeToolbar(@fieldParentPtr(Node.Toolbar, "base", node), writer), + .menu => try self.writeMenu(@fieldParentPtr(Node.Menu, "base", node), writer), + .menu_item => unreachable, + .menu_item_separator => unreachable, + .menu_item_ex => unreachable, + .popup => unreachable, + .popup_ex => unreachable, + .version_info => try self.writeVersionInfo(@fieldParentPtr(Node.VersionInfo, "base", node), writer), + .version_statement => unreachable, + .block => unreachable, + .block_value => unreachable, + .block_value_value => unreachable, + .string_table => try self.writeStringTable(@fieldParentPtr(Node.StringTable, "base", node)), + .string_table_string => unreachable, // handled by writeStringTable + .language_statement => self.writeLanguageStatement(@fieldParentPtr(Node.LanguageStatement, "base", node)), + .font_statement => unreachable, + .simple_statement => self.writeTopLevelSimpleStatement(@fieldParentPtr(Node.SimpleStatement, "base", node)), + } + } + + /// Returns the filename encoded as UTF-8 (allocated by self.allocator) + pub fn evaluateFilenameExpression(self: *Compiler, expression_node: *Node) ![]u8 { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + switch (literal_node.token.id) { + .literal, .number => { + const slice = literal_node.token.slice(self.source); + const code_page = self.input_code_pages.getForToken(literal_node.token); + var buf = try std.ArrayList(u8).initCapacity(self.allocator, slice.len); + errdefer buf.deinit(); + + var index: usize = 0; + while (code_page.codepointAt(index, slice)) |codepoint| : (index += codepoint.byte_len) { + const c = codepoint.value; + if (c == code_pages.Codepoint.invalid) { + try buf.appendSlice("�"); + } else { + // Anything that is not returned as an invalid codepoint must be encodable as UTF-8. + const utf8_len = std.unicode.utf8CodepointSequenceLength(c) catch unreachable; + try buf.ensureUnusedCapacity(utf8_len); + _ = std.unicode.utf8Encode(c, buf.unusedCapacitySlice()) catch unreachable; + buf.items.len += utf8_len; + } + } + + return buf.toOwnedSlice(); + }, + .quoted_ascii_string, .quoted_wide_string => { + const slice = literal_node.token.slice(self.source); + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ .slice = slice, .code_page = self.input_code_pages.getForToken(literal_node.token) }; + + var buf = std.ArrayList(u8).init(self.allocator); + errdefer buf.deinit(); + + // Filenames are sort-of parsed as if they were wide strings, but the max escape width of + // hex/octal escapes is still determined by the L prefix. Since we want to end up with + // UTF-8, we can parse either string type directly to UTF-8. + var parser = literals.IterativeStringParser.init(bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + }); + + while (try parser.nextUnchecked()) |parsed| { + const c = parsed.codepoint; + if (c == code_pages.Codepoint.invalid) { + try buf.appendSlice("�"); + } else { + var codepoint_buf: [4]u8 = undefined; + // If the codepoint cannot be encoded, we fall back to � + if (std.unicode.utf8Encode(c, &codepoint_buf)) |len| { + try buf.appendSlice(codepoint_buf[0..len]); + } else |_| { + try buf.appendSlice("�"); + } + } + } + + return buf.toOwnedSlice(); + }, + else => unreachable, // no other token types should be in a filename literal node + } + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + return self.evaluateFilenameExpression(binary_expression_node.right); + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return self.evaluateFilenameExpression(grouped_expression_node.expression); + }, + else => unreachable, + } + } + + /// https://learn.microsoft.com/en-us/windows/win32/menurc/searching-for-files + /// + /// Searches, in this order: + /// Directory of the 'root' .rc file (if different from CWD) + /// CWD + /// extra_include_paths (resolved relative to CWD) + /// system_include_paths (resolve relative to CWD) + /// INCLUDE environment var paths (only if ignore_include_env_var is false; resolved relative to CWD) + /// + /// Note: The CWD being searched *in addition to* the directory of the 'root' .rc file + /// is also how the Win32 RC compiler preprocessor searches for includes, but that + /// differs from how the clang preprocessor searches for includes. + /// + /// Note: This will always return the first matching file that can be opened. + /// This matches the Win32 RC compiler, which will fail with an error if the first + /// matching file is invalid. That is, it does not do the `cmd` PATH searching + /// thing of continuing to look for matching files until it finds a valid + /// one if a matching file is invalid. + fn searchForFile(self: *Compiler, path: []const u8) !std.fs.File { + // If the path is absolute, then it is not resolved relative to any search + // paths, so there's no point in checking them. + // + // This behavior was determined/confirmed with the following test: + // - A `test.rc` file with the contents `1 RCDATA "/test.bin"` + // - A `test.bin` file at `C:\test.bin` + // - A `test.bin` file at `inc\test.bin` relative to the .rc file + // - Invoking `rc` with `rc /i inc test.rc` + // + // This results in a .res file with the contents of `C:\test.bin`, not + // the contents of `inc\test.bin`. Further, if `C:\test.bin` is deleted, + // then it start failing to find `/test.bin`, meaning that it does not resolve + // `/test.bin` relative to include paths and instead only treats it as + // an absolute path. + if (std.fs.path.isAbsolute(path)) { + const file = try utils.openFileNotDir(std.fs.cwd(), path, .{}); + errdefer file.close(); + + if (self.dependencies_list) |dependencies_list| { + const duped_path = try dependencies_list.allocator.dupe(u8, path); + errdefer dependencies_list.allocator.free(duped_path); + try dependencies_list.append(duped_path); + } + } + + var first_error: ?std.fs.File.OpenError = null; + for (self.search_dirs) |search_dir| { + if (utils.openFileNotDir(search_dir.dir, path, .{})) |file| { + errdefer file.close(); + + if (self.dependencies_list) |dependencies_list| { + const searched_file_path = try std.fs.path.join(dependencies_list.allocator, &.{ + search_dir.path orelse "", path, + }); + errdefer dependencies_list.allocator.free(searched_file_path); + try dependencies_list.append(searched_file_path); + } + + return file; + } else |err| if (first_error == null) { + first_error = err; + } + } + return first_error orelse error.FileNotFound; + } + + pub fn parseDlgIncludeString(self: *Compiler, token: Token) ![]u8 { + // For the purposes of parsing, we want to strip the L prefix + // if it exists since we want escaped integers to be limited to + // their ascii string range. + // + // We keep track of whether or not there was an L prefix, though, + // since there's more weirdness to come. + var bytes = self.sourceBytesForToken(token); + var was_wide_string = false; + if (bytes.slice[0] == 'L' or bytes.slice[0] == 'l') { + was_wide_string = true; + bytes.slice = bytes.slice[1..]; + } + + var buf = try std.ArrayList(u8).initCapacity(self.allocator, bytes.slice.len); + errdefer buf.deinit(); + + var iterative_parser = literals.IterativeStringParser.init(bytes, .{ + .start_column = token.calculateColumn(self.source, 8, null), + .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, + }); + + // No real idea what's going on here, but this matches the rc.exe behavior + while (try iterative_parser.next()) |parsed| { + const c = parsed.codepoint; + switch (was_wide_string) { + true => { + switch (c) { + 0...0x7F, 0xA0...0xFF => try buf.append(@intCast(c)), + 0x80...0x9F => { + if (windows1252.bestFitFromCodepoint(c)) |_| { + try buf.append(@intCast(c)); + } else { + try buf.append('?'); + } + }, + else => { + if (windows1252.bestFitFromCodepoint(c)) |best_fit| { + try buf.append(best_fit); + } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) { + try buf.append('?'); + } else { + try buf.appendSlice("??"); + } + }, + } + }, + false => { + if (parsed.from_escaped_integer) { + try buf.append(@truncate(c)); + } else { + if (windows1252.bestFitFromCodepoint(c)) |best_fit| { + try buf.append(best_fit); + } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) { + try buf.append('?'); + } else { + try buf.appendSlice("??"); + } + } + }, + } + } + + return buf.toOwnedSlice(); + } + + pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void { + // Init header with data size zero for now, will need to fill it in later + var header = try self.resourceHeader(node.id, node.type, .{}); + defer header.deinit(self.allocator); + + const maybe_predefined_type = header.predefinedResourceType(); + + // DLGINCLUDE has special handling that doesn't actually need the file to exist + if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) { + const filename_token = node.filename.cast(.literal).?.token; + const parsed_filename = try self.parseDlgIncludeString(filename_token); + defer self.allocator.free(parsed_filename); + + // NUL within the parsed string acts as a terminator + const parsed_filename_terminated = std.mem.sliceTo(parsed_filename, 0); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.data_size = @intCast(parsed_filename_terminated.len + 1); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try writer.writeAll(parsed_filename_terminated); + try writer.writeByte(0); + try writeDataPadding(writer, header.data_size); + return; + } + + const filename_utf8 = try self.evaluateFilenameExpression(node.filename); + defer self.allocator.free(filename_utf8); + + // TODO: More robust checking of the validity of the filename. + // This currently only checks for NUL bytes, but it should probably also check for + // platform-specific invalid characters like '*', '?', '"', '<', '>', '|' (Windows) + // Related: https://github.com/ziglang/zig/pull/14533#issuecomment-1416888193 + if (std.mem.indexOfScalar(u8, filename_utf8, 0) != null) { + return self.addErrorDetailsAndFail(.{ + .err = .invalid_filename, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .extra = .{ .number = 0 }, + }); + } + + // Allow plain number literals, but complex number expressions are evaluated strangely + // and almost certainly lead to things not intended by the user (e.g. '(1+-1)' evaluates + // to the filename '-1'), so error if the filename node is a grouped/binary expression. + // Note: This is done here instead of during parsing so that we can easily include + // the evaluated filename as part of the error messages. + if (node.filename.id != .literal) { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + try self.addErrorDetails(.{ + .err = .number_expression_as_filename, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .extra = .{ .number = filename_string_index }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .number_expression_as_filename, + .type = .note, + .token = node.filename.getFirstToken(), + .token_span_end = node.filename.getLastToken(), + .print_source_line = false, + .extra = .{ .number = filename_string_index }, + }); + } + // From here on out, we know that the filename must be comprised of a single token, + // so get it here to simplify future usage. + const filename_token = node.filename.getFirstToken(); + + const file = self.searchForFile(filename_utf8) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + return self.addErrorDetailsAndFail(.{ + .err = .file_open_error, + .token = filename_token, + .extra = .{ .file_open_error = .{ + .err = ErrorDetails.FileOpenError.enumFromError(e), + .filename_string_index = filename_string_index, + } }, + }); + }, + }; + defer file.close(); + + if (maybe_predefined_type) |predefined_type| { + switch (predefined_type) { + .GROUP_ICON, .GROUP_CURSOR => { + // Check for animated icon first + if (ani.isAnimatedIcon(file.reader())) { + // Animated icons are just put into the resource unmodified, + // and the resource type changes to ANIICON/ANICURSOR + + const new_predefined_type: res.RT = switch (predefined_type) { + .GROUP_ICON => .ANIICON, + .GROUP_CURSOR => .ANICURSOR, + else => unreachable, + }; + header.type_value.ordinal = @intFromEnum(new_predefined_type); + header.memory_flags = MemoryFlags.defaults(new_predefined_type); + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.data_size = @intCast(try file.getEndPos()); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try file.seekTo(0); + try writeResourceData(writer, file.reader(), header.data_size); + return; + } + + // isAnimatedIcon moved the file cursor so reset to the start + try file.seekTo(0); + + const icon_dir = ico.read(self.allocator, file.reader(), try file.getEndPos()) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + return self.iconReadError( + e, + filename_utf8, + filename_token, + predefined_type, + ); + }, + }; + defer icon_dir.deinit(); + + // This limit is inherent to the ico format since number of entries is a u16 field. + std.debug.assert(icon_dir.entries.len <= std.math.maxInt(u16)); + + // Note: The Win32 RC compiler will compile the resource as whatever type is + // in the icon_dir regardless of the type of resource specified in the .rc. + // This leads to unusable .res files when the types mismatch, so + // we error instead. + const res_types_match = switch (predefined_type) { + .GROUP_ICON => icon_dir.image_type == .icon, + .GROUP_CURSOR => icon_dir.image_type == .cursor, + else => unreachable, + }; + if (!res_types_match) { + return self.addErrorDetailsAndFail(.{ + .err = .icon_dir_and_resource_type_mismatch, + .token = filename_token, + .extra = .{ .resource = switch (predefined_type) { + .GROUP_ICON => .icon, + .GROUP_CURSOR => .cursor, + else => unreachable, + } }, + }); + } + + // Memory flags affect the RT_ICON and the RT_GROUP_ICON differently + var icon_memory_flags = MemoryFlags.defaults(res.RT.ICON); + applyToMemoryFlags(&icon_memory_flags, node.common_resource_attributes, self.source); + applyToGroupMemoryFlags(&header.memory_flags, node.common_resource_attributes, self.source); + + const first_icon_id = self.state.icon_id; + const entry_type = if (predefined_type == .GROUP_ICON) @intFromEnum(res.RT.ICON) else @intFromEnum(res.RT.CURSOR); + for (icon_dir.entries, 0..) |*entry, entry_i_usize| { + // We know that the entry index must fit within a u16, so + // cast it here to simplify usage sites. + const entry_i: u16 = @intCast(entry_i_usize); + var full_data_size = entry.data_size_in_bytes; + if (icon_dir.image_type == .cursor) { + full_data_size = std.math.add(u32, full_data_size, 4) catch { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }; + } + + const image_header = ResourceHeader{ + .type_value = .{ .ordinal = entry_type }, + .name_value = .{ .ordinal = self.state.icon_id }, + .data_size = full_data_size, + .memory_flags = icon_memory_flags, + .language = self.state.language, + .version = self.state.version, + .characteristics = self.state.characteristics, + }; + try image_header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + // From https://learn.microsoft.com/en-us/windows/win32/menurc/localheader: + // > The LOCALHEADER structure is the first data written to the RT_CURSOR + // > resource if a RESDIR structure contains information about a cursor. + // where LOCALHEADER is `struct { WORD xHotSpot; WORD yHotSpot; }` + if (icon_dir.image_type == .cursor) { + try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_x, .little); + try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_y, .little); + } + + try file.seekTo(entry.data_offset_from_start_of_file); + var header_bytes = file.reader().readBytesNoEof(16) catch { + return self.iconReadError( + error.UnexpectedEOF, + filename_utf8, + filename_token, + predefined_type, + ); + }; + + const image_format = ico.ImageFormat.detect(&header_bytes); + if (!image_format.validate(&header_bytes)) { + return self.iconReadError( + error.InvalidHeader, + filename_utf8, + filename_token, + predefined_type, + ); + } + switch (image_format) { + .riff => switch (icon_dir.image_type) { + .icon => { + // The Win32 RC compiler treats this as an error, but icon dirs + // with RIFF encoded icons within them work ~okay (they work + // in some places but not others, they may not animate, etc) if they are + // allowed to be compiled. + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .note, + .print_source_line = false, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, + }); + }, + .cursor => { + // The Win32 RC compiler errors in this case too, but we only error + // here because the cursor would fail to be loaded at runtime if we + // compiled it. + return self.addErrorDetailsAndFail(.{ + .err = .format_not_supported_in_icon_dir, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .riff, .index = entry_i } }, + }); + }, + }, + .png => switch (icon_dir.image_type) { + .icon => { + // PNG always seems to have 1 for color planes no matter what + entry.type_specific_data.icon.color_planes = 1; + // These seem to be the only values of num_colors that + // get treated specially + entry.type_specific_data.icon.bits_per_pixel = switch (entry.num_colors) { + 2 => 1, + 8 => 3, + 16 => 4, + else => entry.type_specific_data.icon.bits_per_pixel, + }; + }, + .cursor => { + // The Win32 RC compiler treats this as an error, but cursor dirs + // with PNG encoded icons within them work fine if they are + // allowed to be compiled. + try self.addErrorDetails(.{ + .err = .rc_would_error_on_icon_dir, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .png, .index = entry_i } }, + }); + }, + }, + .dib => { + const bitmap_header: *ico.BitmapHeader = @ptrCast(@alignCast(&header_bytes)); + if (native_endian == .big) { + std.mem.byteSwapAllFields(ico.BitmapHeader, bitmap_header); + } + const bitmap_version = ico.BitmapHeader.Version.get(bitmap_header.bcSize); + + // The Win32 RC compiler only allows headers with + // `bcSize == sizeof(BITMAPINFOHEADER)`, but it seems unlikely + // that there's a good reason for that outside of too-old + // bitmap headers. + // TODO: Need to test V4 and V5 bitmaps to check they actually work + if (bitmap_version == .@"win2.0") { + return self.addErrorDetailsAndFail(.{ + .err = .rc_would_error_on_bitmap_version, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + .bitmap_version = bitmap_version, + } }, + }); + } else if (bitmap_version != .@"nt3.1") { + try self.addErrorDetails(.{ + .err = .rc_would_error_on_bitmap_version, + .type = .warning, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + .bitmap_version = bitmap_version, + } }, + }); + } + + switch (icon_dir.image_type) { + .icon => { + // The values in the icon's BITMAPINFOHEADER always take precedence over + // the values in the IconDir, but not in the LOCALHEADER (see above). + entry.type_specific_data.icon.color_planes = bitmap_header.bcPlanes; + entry.type_specific_data.icon.bits_per_pixel = bitmap_header.bcBitCount; + }, + .cursor => { + // Only cursors get the width/height from BITMAPINFOHEADER (icons don't) + entry.width = @intCast(bitmap_header.bcWidth); + entry.height = @intCast(bitmap_header.bcHeight); + entry.type_specific_data.cursor.hotspot_x = bitmap_header.bcPlanes; + entry.type_specific_data.cursor.hotspot_y = bitmap_header.bcBitCount; + }, + } + }, + } + + try file.seekTo(entry.data_offset_from_start_of_file); + try writeResourceDataNoPadding(writer, file.reader(), entry.data_size_in_bytes); + try writeDataPadding(writer, full_data_size); + + if (self.state.icon_id == std.math.maxInt(u16)) { + try self.addErrorDetails(.{ + .err = .max_icon_ids_exhausted, + .print_source_line = false, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + } }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .max_icon_ids_exhausted, + .type = .note, + .token = filename_token, + .extra = .{ .icon_dir = .{ + .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, + .icon_format = image_format, + .index = entry_i, + } }, + }); + } + self.state.icon_id += 1; + } + + header.data_size = icon_dir.getResDataSize(); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try icon_dir.writeResData(writer, first_icon_id); + try writeDataPadding(writer, header.data_size); + return; + }, + .RCDATA, .HTML, .MANIFEST, .MESSAGETABLE, .DLGINIT, .PLUGPLAY => { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + }, + .BITMAP => { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + const file_size = try file.getEndPos(); + + const bitmap_info = bmp.read(file.reader(), file_size) catch |err| { + const filename_string_index = try self.diagnostics.putString(filename_utf8); + return self.addErrorDetailsAndFail(.{ + .err = .bmp_read_error, + .token = filename_token, + .extra = .{ .bmp_read_error = .{ + .err = ErrorDetails.BitmapReadError.enumFromError(err), + .filename_string_index = filename_string_index, + } }, + }); + }; + + if (bitmap_info.getActualPaletteByteLen() > bitmap_info.getExpectedPaletteByteLen()) { + const num_ignored_bytes = bitmap_info.getActualPaletteByteLen() - bitmap_info.getExpectedPaletteByteLen(); + var number_as_bytes: [8]u8 = undefined; + std.mem.writeInt(u64, &number_as_bytes, num_ignored_bytes, native_endian); + const value_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_ignored_palette_bytes, + .type = .warning, + .token = filename_token, + .extra = .{ .number = value_string_index }, + }); + } else if (bitmap_info.getActualPaletteByteLen() < bitmap_info.getExpectedPaletteByteLen()) { + const num_padding_bytes = bitmap_info.getExpectedPaletteByteLen() - bitmap_info.getActualPaletteByteLen(); + + // TODO: Make this configurable (command line option) + const max_missing_bytes = 4096; + if (num_padding_bytes > max_missing_bytes) { + var numbers_as_bytes: [16]u8 = undefined; + std.mem.writeInt(u64, numbers_as_bytes[0..8], num_padding_bytes, native_endian); + std.mem.writeInt(u64, numbers_as_bytes[8..16], max_missing_bytes, native_endian); + const values_string_index = try self.diagnostics.putString(&numbers_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_too_many_missing_palette_bytes, + .token = filename_token, + .extra = .{ .number = values_string_index }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .bmp_too_many_missing_palette_bytes, + .type = .note, + .print_source_line = false, + .token = filename_token, + }); + } + + var number_as_bytes: [8]u8 = undefined; + std.mem.writeInt(u64, &number_as_bytes, num_padding_bytes, native_endian); + const value_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .bmp_missing_palette_bytes, + .type = .warning, + .token = filename_token, + .extra = .{ .number = value_string_index }, + }); + const pixel_data_len = bitmap_info.getPixelDataLen(file_size); + if (pixel_data_len > 0) { + const miscompiled_bytes = @min(pixel_data_len, num_padding_bytes); + std.mem.writeInt(u64, &number_as_bytes, miscompiled_bytes, native_endian); + const miscompiled_bytes_string_index = try self.diagnostics.putString(&number_as_bytes); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_bmp_palette_padding, + .type = .warning, + .token = filename_token, + .extra = .{ .number = miscompiled_bytes_string_index }, + }); + } + } + + // TODO: It might be possible that the calculation done in this function + // could underflow if the underlying file is modified while reading + // it, but need to think about it more to determine if that's a + // real possibility + const bmp_bytes_to_write: u32 = @intCast(bitmap_info.getExpectedByteLen(file_size)); + + header.data_size = bmp_bytes_to_write; + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try file.seekTo(bmp.file_header_len); + const file_reader = file.reader(); + try writeResourceDataNoPadding(writer, file_reader, bitmap_info.dib_header_size); + if (bitmap_info.getBitmasksByteLen() > 0) { + try writeResourceDataNoPadding(writer, file_reader, bitmap_info.getBitmasksByteLen()); + } + if (bitmap_info.getExpectedPaletteByteLen() > 0) { + try writeResourceDataNoPadding(writer, file_reader, @intCast(bitmap_info.getActualPaletteByteLen())); + // We know that the number of missing palette bytes is <= 4096 + // (see `bmp_too_many_missing_palette_bytes` error case above) + const padding_bytes: usize = @intCast(bitmap_info.getMissingPaletteByteLen()); + if (padding_bytes > 0) { + try writer.writeByteNTimes(0, padding_bytes); + } + } + try file.seekTo(bitmap_info.pixel_data_offset); + const pixel_bytes: u32 = @intCast(file_size - bitmap_info.pixel_data_offset); + try writeResourceDataNoPadding(writer, file_reader, pixel_bytes); + try writeDataPadding(writer, bmp_bytes_to_write); + return; + }, + .FONT => { + if (self.state.font_dir.ids.get(header.name_value.ordinal) != null) { + // Add warning and skip this resource + // Note: The Win32 compiler prints this as an error but it doesn't fail the compilation + // and the duplicate resource is skipped. + try self.addErrorDetails(ErrorDetails{ + .err = .font_id_already_defined, + .token = node.id, + .type = .warning, + .extra = .{ .number = header.name_value.ordinal }, + }); + try self.addErrorDetails(ErrorDetails{ + .err = .font_id_already_defined, + .token = self.state.font_dir.ids.get(header.name_value.ordinal).?, + .type = .note, + .extra = .{ .number = header.name_value.ordinal }, + }); + return; + } + header.applyMemoryFlags(node.common_resource_attributes, self.source); + const file_size = try file.getEndPos(); + if (file_size > std.math.maxInt(u32)) { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + } + + // We now know that the data size will fit in a u32 + header.data_size = @intCast(file_size); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var header_slurping_reader = headerSlurpingReader(148, file.reader()); + try writeResourceData(writer, header_slurping_reader.reader(), header.data_size); + + try self.state.font_dir.add(self.arena, FontDir.Font{ + .id = header.name_value.ordinal, + .header_bytes = header_slurping_reader.slurped_header, + }, node.id); + return; + }, + .ACCELERATOR, + .ANICURSOR, + .ANIICON, + .CURSOR, + .DIALOG, + .DLGINCLUDE, + .FONTDIR, + .ICON, + .MENU, + .STRING, + .TOOLBAR, + .VERSION, + .VXD, + => unreachable, + _ => unreachable, + } + } else { + header.applyMemoryFlags(node.common_resource_attributes, self.source); + } + + // Fallback to just writing out the entire contents of the file + const data_size = try file.getEndPos(); + if (data_size > std.math.maxInt(u32)) { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + } + // We now know that the data size will fit in a u32 + header.data_size = @intCast(data_size); + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + try writeResourceData(writer, file.reader(), header.data_size); + } + + fn iconReadError( + self: *Compiler, + err: ico.ReadError, + filename: []const u8, + token: Token, + predefined_type: res.RT, + ) error{ CompileError, OutOfMemory } { + const filename_string_index = try self.diagnostics.putString(filename); + return self.addErrorDetailsAndFail(.{ + .err = .icon_read_error, + .token = token, + .extra = .{ .icon_read_error = .{ + .err = ErrorDetails.IconReadError.enumFromError(err), + .icon_type = switch (predefined_type) { + .GROUP_ICON => .icon, + .GROUP_CURSOR => .cursor, + else => unreachable, + }, + .filename_string_index = filename_string_index, + } }, + }); + } + + pub const DataType = enum { + number, + ascii_string, + wide_string, + }; + + pub const Data = union(DataType) { + number: Number, + ascii_string: []const u8, + wide_string: [:0]const u16, + + pub fn deinit(self: Data, allocator: Allocator) void { + switch (self) { + .wide_string => |wide_string| { + allocator.free(wide_string); + }, + .ascii_string => |ascii_string| { + allocator.free(ascii_string); + }, + else => {}, + } + } + + pub fn write(self: Data, writer: anytype) !void { + switch (self) { + .number => |number| switch (number.is_long) { + false => try writer.writeInt(WORD, number.asWord(), .little), + true => try writer.writeInt(DWORD, number.value, .little), + }, + .ascii_string => |ascii_string| { + try writer.writeAll(ascii_string); + }, + .wide_string => |wide_string| { + try writer.writeAll(std.mem.sliceAsBytes(wide_string)); + }, + } + } + }; + + /// Assumes that the node is a number or number expression + pub fn evaluateNumberExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) Number { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + std.debug.assert(literal_node.token.id == .number); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(source), + .code_page = code_page_lookup.getForToken(literal_node.token), + }; + return literals.parseNumberLiteral(bytes); + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + const lhs = evaluateNumberExpression(binary_expression_node.left, source, code_page_lookup); + const rhs = evaluateNumberExpression(binary_expression_node.right, source, code_page_lookup); + const operator_char = binary_expression_node.operator.slice(source)[0]; + return lhs.evaluateOperator(operator_char, rhs); + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return evaluateNumberExpression(grouped_expression_node.expression, source, code_page_lookup); + }, + else => unreachable, + } + } + + const FlagsNumber = struct { + value: u32, + not_mask: u32 = 0xFFFFFFFF, + + pub fn evaluateOperator(lhs: FlagsNumber, operator_char: u8, rhs: FlagsNumber) FlagsNumber { + const result = switch (operator_char) { + '-' => lhs.value -% rhs.value, + '+' => lhs.value +% rhs.value, + '|' => lhs.value | rhs.value, + '&' => lhs.value & rhs.value, + else => unreachable, // invalid operator, this would be a lexer/parser bug + }; + return .{ + .value = result, + .not_mask = lhs.not_mask & rhs.not_mask, + }; + } + + pub fn applyNotMask(self: FlagsNumber) u32 { + return self.value & self.not_mask; + } + }; + + pub fn evaluateFlagsExpressionWithDefault(default: u32, expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) u32 { + var context = FlagsExpressionContext{ .initial_value = default }; + const number = evaluateFlagsExpression(expression_node, source, code_page_lookup, &context); + return number.value; + } + + pub const FlagsExpressionContext = struct { + initial_value: u32 = 0, + initial_value_used: bool = false, + }; + + /// Assumes that the node is a number expression (which can contain not_expressions) + pub fn evaluateFlagsExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup, context: *FlagsExpressionContext) FlagsNumber { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + std.debug.assert(literal_node.token.id == .number); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(source), + .code_page = code_page_lookup.getForToken(literal_node.token), + }; + var value = literals.parseNumberLiteral(bytes).value; + if (!context.initial_value_used) { + context.initial_value_used = true; + value |= context.initial_value; + } + return .{ .value = value }; + }, + .binary_expression => { + const binary_expression_node = expression_node.cast(.binary_expression).?; + const lhs = evaluateFlagsExpression(binary_expression_node.left, source, code_page_lookup, context); + const rhs = evaluateFlagsExpression(binary_expression_node.right, source, code_page_lookup, context); + const operator_char = binary_expression_node.operator.slice(source)[0]; + const result = lhs.evaluateOperator(operator_char, rhs); + return .{ .value = result.applyNotMask() }; + }, + .grouped_expression => { + const grouped_expression_node = expression_node.cast(.grouped_expression).?; + return evaluateFlagsExpression(grouped_expression_node.expression, source, code_page_lookup, context); + }, + .not_expression => { + const not_expression = expression_node.cast(.not_expression).?; + const bytes = SourceBytes{ + .slice = not_expression.number_token.slice(source), + .code_page = code_page_lookup.getForToken(not_expression.number_token), + }; + const not_number = literals.parseNumberLiteral(bytes); + if (!context.initial_value_used) { + context.initial_value_used = true; + return .{ .value = context.initial_value & ~not_number.value }; + } + return .{ .value = 0, .not_mask = ~not_number.value }; + }, + else => unreachable, + } + } + + pub fn evaluateDataExpression(self: *Compiler, expression_node: *Node) !Data { + switch (expression_node.id) { + .literal => { + const literal_node = expression_node.cast(.literal).?; + switch (literal_node.token.id) { + .number => { + const number = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); + return .{ .number = number }; + }, + .quoted_ascii_string => { + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const parsed = try literals.parseQuotedAsciiString(self.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + .output_code_page = self.output_code_pages.getForToken(literal_node.token), + }); + errdefer self.allocator.free(parsed); + return .{ .ascii_string = parsed }; + }, + .quoted_wide_string => { + const column = literal_node.token.calculateColumn(self.source, 8, null); + const bytes = SourceBytes{ + .slice = literal_node.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const parsed_string = try literals.parseQuotedWideString(self.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, + }); + errdefer self.allocator.free(parsed_string); + return .{ .wide_string = parsed_string }; + }, + else => unreachable, // no other token types should be in a data literal node + } + }, + .binary_expression, .grouped_expression => { + const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); + return .{ .number = result }; + }, + .not_expression => unreachable, + else => unreachable, + } + } + + pub fn writeResourceRawData(self: *Compiler, node: *Node.ResourceRawData, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + for (node.raw_data) |expression| { + const data = try self.evaluateDataExpression(expression); + defer data.deinit(self.allocator); + data.write(data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + } + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_len: u32 = @intCast(data_buffer.items.len); + try self.writeResourceHeader(writer, node.id, node.type, data_len, node.common_resource_attributes, self.state.language); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_len); + } + + pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void { + var header = try self.resourceHeader(id_token, type_token, .{ + .language = language, + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = id_token }); + } + + pub fn writeResourceDataNoPadding(writer: anytype, data_reader: anytype, data_size: u32) !void { + var limited_reader = std.io.limitedReader(data_reader, data_size); + + const FifoBuffer = std.fifo.LinearFifo(u8, .{ .Static = 4096 }); + var fifo = FifoBuffer.init(); + try fifo.pump(limited_reader.reader(), writer); + } + + pub fn writeResourceData(writer: anytype, data_reader: anytype, data_size: u32) !void { + try writeResourceDataNoPadding(writer, data_reader, data_size); + try writeDataPadding(writer, data_size); + } + + pub fn writeDataPadding(writer: anytype, data_size: u32) !void { + try writer.writeByteNTimes(0, numPaddingBytesNeeded(data_size)); + } + + pub fn numPaddingBytesNeeded(data_size: u32) u2 { + // Result is guaranteed to be between 0 and 3. + return @intCast((4 -% data_size) % 4); + } + + pub fn evaluateAcceleratorKeyExpression(self: *Compiler, node: *Node, is_virt: bool) !u16 { + if (node.isNumberExpression()) { + return evaluateNumberExpression(node, self.source, self.input_code_pages).asWord(); + } else { + std.debug.assert(node.isStringLiteral()); + const literal = @fieldParentPtr(Node.Literal, "base", node); + const bytes = SourceBytes{ + .slice = literal.token.slice(self.source), + .code_page = self.input_code_pages.getForToken(literal.token), + }; + const column = literal.token.calculateColumn(self.source, 8, null); + return res.parseAcceleratorKeyString(bytes, is_virt, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal.token }, + }); + } + } + + pub fn writeAccelerators(self: *Compiler, node: *Node.Accelerators, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + self.writeAcceleratorsData(node, data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft + pub fn writeAcceleratorsData(self: *Compiler, node: *Node.Accelerators, data_writer: anytype) !void { + for (node.accelerators, 0..) |accel_node, i| { + const accelerator = @fieldParentPtr(Node.Accelerator, "base", accel_node); + var modifiers = res.AcceleratorModifiers{}; + for (accelerator.type_and_options) |type_or_option| { + const modifier = rc.AcceleratorTypeAndOptions.map.get(type_or_option.slice(self.source)).?; + modifiers.apply(modifier); + } + if (accelerator.event.isNumberExpression() and !modifiers.explicit_ascii_or_virtkey) { + return self.addErrorDetailsAndFail(.{ + .err = .accelerator_type_required, + .token = accelerator.event.getFirstToken(), + .token_span_end = accelerator.event.getLastToken(), + }); + } + const key = self.evaluateAcceleratorKeyExpression(accelerator.event, modifiers.isSet(.virtkey)) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + return self.addErrorDetailsAndFail(.{ + .err = .invalid_accelerator_key, + .token = accelerator.event.getFirstToken(), + .token_span_end = accelerator.event.getLastToken(), + .extra = .{ .accelerator_error = .{ + .err = ErrorDetails.AcceleratorError.enumFromError(e), + } }, + }); + }, + }; + const cmd_id = evaluateNumberExpression(accelerator.idvalue, self.source, self.input_code_pages); + + if (i == node.accelerators.len - 1) { + modifiers.markLast(); + } + + try data_writer.writeByte(modifiers.value); + try data_writer.writeByte(0); // padding + try data_writer.writeInt(u16, key, .little); + try data_writer.writeInt(u16, cmd_id.asWord(), .little); + try data_writer.writeInt(u16, 0, .little); // padding + } + } + + const DialogOptionalStatementValues = struct { + style: u32 = res.WS.SYSMENU | res.WS.BORDER | res.WS.POPUP, + exstyle: u32 = 0, + class: ?NameOrOrdinal = null, + menu: ?NameOrOrdinal = null, + font: ?FontStatementValues = null, + caption: ?Token = null, + }; + + pub fn writeDialog(self: *Compiler, node: *Node.Dialog, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + const resource = Resource.fromString(.{ + .slice = node.type.slice(self.source), + .code_page = self.input_code_pages.getForToken(node.type), + }); + std.debug.assert(resource == .dialog or resource == .dialogex); + + var optional_statement_values: DialogOptionalStatementValues = .{}; + defer { + if (optional_statement_values.class) |class| { + class.deinit(self.allocator); + } + if (optional_statement_values.menu) |menu| { + menu.deinit(self.allocator); + } + } + var skipped_menu_or_classes = std.ArrayList(*Node.SimpleStatement).init(self.allocator); + defer skipped_menu_or_classes.deinit(); + var last_menu: *Node.SimpleStatement = undefined; + var last_class: *Node.SimpleStatement = undefined; + var last_menu_would_be_forced_ordinal = false; + var last_menu_has_digit_as_first_char = false; + var last_menu_did_uppercase = false; + var last_class_would_be_forced_ordinal = false; + + for (node.optional_statements) |optional_statement| { + switch (optional_statement.id) { + .simple_statement => { + const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", optional_statement); + const statement_identifier = simple_statement.identifier; + const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; + switch (statement_type) { + .style, .exstyle => { + const style = evaluateFlagsExpressionWithDefault(0, simple_statement.value, self.source, self.input_code_pages); + if (statement_type == .style) { + optional_statement_values.style = style; + } else { + optional_statement_values.exstyle = style; + } + }, + .caption => { + std.debug.assert(simple_statement.value.id == .literal); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + optional_statement_values.caption = literal_node.token; + }, + .class => { + const is_duplicate = optional_statement_values.class != null; + if (is_duplicate) { + try skipped_menu_or_classes.append(last_class); + } + const forced_ordinal = is_duplicate and optional_statement_values.class.? == .ordinal; + // In the Win32 RC compiler, if any CLASS values that are interpreted as + // an ordinal exist, it affects all future CLASS statements and forces + // them to be treated as an ordinal no matter what. + if (forced_ordinal) { + last_class_would_be_forced_ordinal = true; + } + // clear out the old one if it exists + if (optional_statement_values.class) |prev| { + prev.deinit(self.allocator); + optional_statement_values.class = null; + } + + if (simple_statement.value.isNumberExpression()) { + const class_ordinal = evaluateNumberExpression(simple_statement.value, self.source, self.input_code_pages); + optional_statement_values.class = NameOrOrdinal{ .ordinal = class_ordinal.asWord() }; + } else { + std.debug.assert(simple_statement.value.isStringLiteral()); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + const parsed = try self.parseQuotedStringAsWideString(literal_node.token); + optional_statement_values.class = NameOrOrdinal{ .name = parsed }; + } + + last_class = simple_statement; + }, + .menu => { + const is_duplicate = optional_statement_values.menu != null; + if (is_duplicate) { + try skipped_menu_or_classes.append(last_menu); + } + const forced_ordinal = is_duplicate and optional_statement_values.menu.? == .ordinal; + // In the Win32 RC compiler, if any MENU values that are interpreted as + // an ordinal exist, it affects all future MENU statements and forces + // them to be treated as an ordinal no matter what. + if (forced_ordinal) { + last_menu_would_be_forced_ordinal = true; + } + // clear out the old one if it exists + if (optional_statement_values.menu) |prev| { + prev.deinit(self.allocator); + optional_statement_values.menu = null; + } + + std.debug.assert(simple_statement.value.id == .literal); + const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); + + const token_slice = literal_node.token.slice(self.source); + const bytes = SourceBytes{ + .slice = token_slice, + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes); + + if (optional_statement_values.menu.? == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| { + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = literal_node.token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = literal_node.token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + } + } + + // Need to keep track of some properties of the value + // in order to emit the appropriate warning(s) later on. + // See where the warning are emitted below (outside this loop) + // for the full explanation. + var did_uppercase = false; + var codepoint_i: usize = 0; + while (bytes.code_page.codepointAt(codepoint_i, bytes.slice)) |codepoint| : (codepoint_i += codepoint.byte_len) { + const c = codepoint.value; + switch (c) { + 'a'...'z' => { + did_uppercase = true; + break; + }, + else => {}, + } + } + last_menu_did_uppercase = did_uppercase; + last_menu_has_digit_as_first_char = std.ascii.isDigit(token_slice[0]); + last_menu = simple_statement; + }, + else => {}, + } + }, + .font_statement => { + const font = @fieldParentPtr(Node.FontStatement, "base", optional_statement); + if (optional_statement_values.font != null) { + optional_statement_values.font.?.node = font; + } else { + optional_statement_values.font = FontStatementValues{ .node = font }; + } + if (font.weight) |weight| { + const value = evaluateNumberExpression(weight, self.source, self.input_code_pages); + optional_statement_values.font.?.weight = value.asWord(); + } + if (font.italic) |italic| { + const value = evaluateNumberExpression(italic, self.source, self.input_code_pages); + optional_statement_values.font.?.italic = value.asWord() != 0; + } + }, + else => {}, + } + } + + for (skipped_menu_or_classes.items) |simple_statement| { + const statement_identifier = simple_statement.identifier; + const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; + try self.addErrorDetails(.{ + .err = .duplicate_menu_or_class_skipped, + .type = .warning, + .token = simple_statement.identifier, + .token_span_start = simple_statement.base.getFirstToken(), + .token_span_end = simple_statement.base.getLastToken(), + .extra = .{ .menu_or_class = switch (statement_type) { + .menu => .menu, + .class => .class, + else => unreachable, + } }, + }); + } + // The Win32 RC compiler miscompiles the value in the following scenario: + // Multiple CLASS parameters are specified and any of them are treated as a number, then + // the last CLASS is always treated as a number no matter what + if (last_class_would_be_forced_ordinal and optional_statement_values.class.? == .name) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_class.value); + const ordinal_value = res.ForcedOrdinal.fromUtf16Le(optional_statement_values.class.?.name); + + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_class, + .type = .warning, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_class, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .menu_or_class = .class }, + }); + } + // The Win32 RC compiler miscompiles the id in two different scenarios: + // 1. The first character of the ID is a digit, in which case it is always treated as a number + // no matter what (and therefore does not match how the MENU/MENUEX id is parsed) + // 2. Multiple MENU parameters are specified and any of them are treated as a number, then + // the last MENU is always treated as a number no matter what + if ((last_menu_would_be_forced_ordinal or last_menu_has_digit_as_first_char) and optional_statement_values.menu.? == .name) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); + const token_slice = literal_node.token.slice(self.source); + const bytes = SourceBytes{ + .slice = token_slice, + .code_page = self.input_code_pages.getForToken(literal_node.token), + }; + const ordinal_value = res.ForcedOrdinal.fromBytes(bytes); + + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id, + .type = .warning, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .number = ordinal_value }, + }); + if (last_menu_would_be_forced_ordinal) { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + .extra = .{ .menu_or_class = .menu }, + }); + } else { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_dialog_menu_id_starts_with_digit, + .type = .note, + .print_source_line = false, + .token = literal_node.token, + }); + } + } + // The MENU id parsing uses the exact same logic as the MENU/MENUEX resource id parsing, + // which means that it will convert ASCII characters to uppercase during the 'name' parsing. + // This turns out not to matter (`LoadMenu` does a case-insensitive lookup anyway), + // but it still makes sense to share the uppercasing logic since the MENU parameter + // here is just a reference to a MENU/MENUEX id within the .exe. + // So, because this is an intentional but inconsequential-to-the-user difference + // between resinator and the Win32 RC compiler, we only emit a hint instead of + // a warning. + if (last_menu_did_uppercase) { + const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); + try self.addErrorDetails(.{ + .err = .dialog_menu_id_was_uppercased, + .type = .hint, + .token = literal_node.token, + }); + } + + const x = evaluateNumberExpression(node.x, self.source, self.input_code_pages); + const y = evaluateNumberExpression(node.y, self.source, self.input_code_pages); + const width = evaluateNumberExpression(node.width, self.source, self.input_code_pages); + const height = evaluateNumberExpression(node.height, self.source, self.input_code_pages); + + // FONT statement requires DS_SETFONT, and if it's not present DS_SETFRONT must be unset + if (optional_statement_values.font) |_| { + optional_statement_values.style |= res.DS.SETFONT; + } else { + optional_statement_values.style &= ~res.DS.SETFONT; + } + // CAPTION statement implies WS_CAPTION + if (optional_statement_values.caption) |_| { + optional_statement_values.style |= res.WS.CAPTION; + } + + self.writeDialogHeaderAndStrings( + node, + data_writer, + resource, + &optional_statement_values, + x, + y, + width, + height, + ) catch |err| switch (err) { + // Dialog header and menu/class/title strings can never exceed u32 bytes + // on their own, so this error is unreachable. + error.NoSpaceLeft => unreachable, + else => |e| return e, + }; + + var controls_by_id = std.AutoHashMap(u32, *const Node.ControlStatement).init(self.allocator); + // Number of controls are guaranteed by the parser to be within maxInt(u16). + try controls_by_id.ensureTotalCapacity(@as(u16, @intCast(node.controls.len))); + defer controls_by_id.deinit(); + + for (node.controls) |control_node| { + const control = @fieldParentPtr(Node.ControlStatement, "base", control_node); + + self.writeDialogControl( + control, + data_writer, + resource, + // We know the data_buffer len is limited to u32 max. + @intCast(data_buffer.items.len), + &controls_by_id, + ) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .type = .note, + .token = control.type, + }); + }, + else => |e| return e, + }; + } + + // We know the data_buffer len is limited to u32 max. + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + fn writeDialogHeaderAndStrings( + self: *Compiler, + node: *Node.Dialog, + data_writer: anytype, + resource: Resource, + optional_statement_values: *const DialogOptionalStatementValues, + x: Number, + y: Number, + width: Number, + height: Number, + ) !void { + // Header + if (resource == .dialogex) { + const help_id: u32 = help_id: { + if (node.help_id == null) break :help_id 0; + break :help_id evaluateNumberExpression(node.help_id.?, self.source, self.input_code_pages).value; + }; + try data_writer.writeInt(u16, 1, .little); // version number, always 1 + try data_writer.writeInt(u16, 0xFFFF, .little); // signature, always 0xFFFF + try data_writer.writeInt(u32, help_id, .little); + try data_writer.writeInt(u32, optional_statement_values.exstyle, .little); + try data_writer.writeInt(u32, optional_statement_values.style, .little); + } else { + try data_writer.writeInt(u32, optional_statement_values.style, .little); + try data_writer.writeInt(u32, optional_statement_values.exstyle, .little); + } + // This limit is enforced by the parser, so we know the number of controls + // is within the range of a u16. + try data_writer.writeInt(u16, @as(u16, @intCast(node.controls.len)), .little); + try data_writer.writeInt(u16, x.asWord(), .little); + try data_writer.writeInt(u16, y.asWord(), .little); + try data_writer.writeInt(u16, width.asWord(), .little); + try data_writer.writeInt(u16, height.asWord(), .little); + + // Menu + if (optional_statement_values.menu) |menu| { + try menu.write(data_writer); + } else { + try data_writer.writeInt(u16, 0, .little); + } + // Class + if (optional_statement_values.class) |class| { + try class.write(data_writer); + } else { + try data_writer.writeInt(u16, 0, .little); + } + // Caption + if (optional_statement_values.caption) |caption| { + const parsed = try self.parseQuotedStringAsWideString(caption); + defer self.allocator.free(parsed); + try data_writer.writeAll(std.mem.sliceAsBytes(parsed[0 .. parsed.len + 1])); + } else { + try data_writer.writeInt(u16, 0, .little); + } + // Font + if (optional_statement_values.font) |font| { + try self.writeDialogFont(resource, font, data_writer); + } + } + + fn writeDialogControl( + self: *Compiler, + control: *Node.ControlStatement, + data_writer: anytype, + resource: Resource, + bytes_written_so_far: u32, + controls_by_id: *std.AutoHashMap(u32, *const Node.ControlStatement), + ) !void { + const control_type = rc.Control.map.get(control.type.slice(self.source)).?; + + // Each control must be at a 4-byte boundary. However, the Windows RC + // compiler will miscompile controls if their extra data ends on an odd offset. + // We will avoid the miscompilation and emit a warning. + const num_padding = numPaddingBytesNeeded(bytes_written_so_far); + if (num_padding == 1 or num_padding == 3) { + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_padding, + .type = .warning, + .token = control.type, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_padding, + .type = .note, + .print_source_line = false, + .token = control.type, + }); + } + try data_writer.writeByteNTimes(0, num_padding); + + const style = if (control.style) |style_expression| + // Certain styles are implied by the control type + evaluateFlagsExpressionWithDefault(res.ControlClass.getImpliedStyle(control_type), style_expression, self.source, self.input_code_pages) + else + res.ControlClass.getImpliedStyle(control_type); + + const exstyle = if (control.exstyle) |exstyle_expression| + evaluateFlagsExpressionWithDefault(0, exstyle_expression, self.source, self.input_code_pages) + else + 0; + + switch (resource) { + .dialog => { + // Note: Reverse order from DIALOGEX + try data_writer.writeInt(u32, style, .little); + try data_writer.writeInt(u32, exstyle, .little); + }, + .dialogex => { + const help_id: u32 = if (control.help_id) |help_id_expression| + evaluateNumberExpression(help_id_expression, self.source, self.input_code_pages).value + else + 0; + try data_writer.writeInt(u32, help_id, .little); + // Note: Reverse order from DIALOG + try data_writer.writeInt(u32, exstyle, .little); + try data_writer.writeInt(u32, style, .little); + }, + else => unreachable, + } + + const control_x = evaluateNumberExpression(control.x, self.source, self.input_code_pages); + const control_y = evaluateNumberExpression(control.y, self.source, self.input_code_pages); + const control_width = evaluateNumberExpression(control.width, self.source, self.input_code_pages); + const control_height = evaluateNumberExpression(control.height, self.source, self.input_code_pages); + + try data_writer.writeInt(u16, control_x.asWord(), .little); + try data_writer.writeInt(u16, control_y.asWord(), .little); + try data_writer.writeInt(u16, control_width.asWord(), .little); + try data_writer.writeInt(u16, control_height.asWord(), .little); + + const control_id = evaluateNumberExpression(control.id, self.source, self.input_code_pages); + switch (resource) { + .dialog => try data_writer.writeInt(u16, control_id.asWord(), .little), + .dialogex => try data_writer.writeInt(u32, control_id.value, .little), + else => unreachable, + } + + const control_id_for_map: u32 = switch (resource) { + .dialog => control_id.asWord(), + .dialogex => control_id.value, + else => unreachable, + }; + const result = controls_by_id.getOrPutAssumeCapacity(control_id_for_map); + if (result.found_existing) { + if (!self.silent_duplicate_control_ids) { + try self.addErrorDetails(.{ + .err = .control_id_already_defined, + .type = .warning, + .token = control.id.getFirstToken(), + .token_span_end = control.id.getLastToken(), + .extra = .{ .number = control_id_for_map }, + }); + try self.addErrorDetails(.{ + .err = .control_id_already_defined, + .type = .note, + .token = result.value_ptr.*.id.getFirstToken(), + .token_span_end = result.value_ptr.*.id.getLastToken(), + .extra = .{ .number = control_id_for_map }, + }); + } + } else { + result.value_ptr.* = control; + } + + if (res.ControlClass.fromControl(control_type)) |control_class| { + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } else { + const class_node = control.class.?; + if (class_node.isNumberExpression()) { + const number = evaluateNumberExpression(class_node, self.source, self.input_code_pages); + const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; + // This is different from how the Windows RC compiles ordinals here, + // but I think that's a miscompilation/bug of the Windows implementation. + // The Windows behavior is (where LSB = least significant byte): + // - If the LSB is 0x00 => 0xFFFF0000 + // - If the LSB is < 0x80 => 0x000000<LSB> + // - If the LSB is >= 0x80 => 0x0000FF<LSB> + // + // Because of this, we emit a warning about the potential miscompilation + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_class_ordinal, + .type = .warning, + .token = class_node.getFirstToken(), + .token_span_end = class_node.getLastToken(), + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_control_class_ordinal, + .type = .note, + .print_source_line = false, + .token = class_node.getFirstToken(), + .token_span_end = class_node.getLastToken(), + }); + // And then write out the ordinal using a proper a NameOrOrdinal encoding. + try ordinal.write(data_writer); + } else if (class_node.isStringLiteral()) { + const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); + const parsed = try self.parseQuotedStringAsWideString(literal_node.token); + defer self.allocator.free(parsed); + if (rc.ControlClass.fromWideString(parsed)) |control_class| { + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } else { + // NUL acts as a terminator + // TODO: Maybe warn when parsed_terminated.len != parsed.len, since + // it seems unlikely that NUL-termination is something intentional + const parsed_terminated = std.mem.sliceTo(parsed, 0); + const name = NameOrOrdinal{ .name = parsed_terminated }; + try name.write(data_writer); + } + } else { + const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); + const literal_slice = literal_node.token.slice(self.source); + // This succeeding is guaranteed by the parser + const control_class = rc.ControlClass.map.get(literal_slice) orelse unreachable; + const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; + try ordinal.write(data_writer); + } + } + + if (control.text) |text_token| { + const bytes = SourceBytes{ + .slice = text_token.slice(self.source), + .code_page = self.input_code_pages.getForToken(text_token), + }; + if (text_token.isStringLiteral()) { + const text = try self.parseQuotedStringAsWideString(text_token); + defer self.allocator.free(text); + const name = NameOrOrdinal{ .name = text }; + try name.write(data_writer); + } else { + std.debug.assert(text_token.id == .number); + const number = literals.parseNumberLiteral(bytes); + const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; + try ordinal.write(data_writer); + } + } else { + try NameOrOrdinal.writeEmpty(data_writer); + } + + var extra_data_buf = std.ArrayList(u8).init(self.allocator); + defer extra_data_buf.deinit(); + // The extra data byte length must be able to fit within a u16. + var limited_extra_data_writer = limitedWriter(extra_data_buf.writer(), std.math.maxInt(u16)); + const extra_data_writer = limited_extra_data_writer.writer(); + for (control.extra_data) |data_expression| { + const data = try self.evaluateDataExpression(data_expression); + defer data.deinit(self.allocator); + data.write(extra_data_writer) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .control_extra_data_size_exceeds_max, + .token = control.type, + }); + return self.addErrorDetailsAndFail(.{ + .err = .control_extra_data_size_exceeds_max, + .type = .note, + .token = data_expression.getFirstToken(), + .token_span_end = data_expression.getLastToken(), + }); + }, + else => |e| return e, + }; + } + // We know the extra_data_buf size fits within a u16. + const extra_data_size: u16 = @intCast(extra_data_buf.items.len); + try data_writer.writeInt(u16, extra_data_size, .little); + try data_writer.writeAll(extra_data_buf.items); + } + + pub fn writeToolbar(self: *Compiler, node: *Node.Toolbar, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + const data_writer = data_buffer.writer(); + + const button_width = evaluateNumberExpression(node.button_width, self.source, self.input_code_pages); + const button_height = evaluateNumberExpression(node.button_height, self.source, self.input_code_pages); + + // I'm assuming this is some sort of version + // TODO: Try to find something mentioning this + try data_writer.writeInt(u16, 1, .little); + try data_writer.writeInt(u16, button_width.asWord(), .little); + try data_writer.writeInt(u16, button_height.asWord(), .little); + // Number of buttons is guaranteed by the parser to be within maxInt(u16). + try data_writer.writeInt(u16, @as(u16, @intCast(node.buttons.len)), .little); + + for (node.buttons) |button_or_sep| { + switch (button_or_sep.id) { + .literal => { // This is always SEPARATOR + std.debug.assert(button_or_sep.cast(.literal).?.token.id == .literal); + try data_writer.writeInt(u16, 0, .little); + }, + .simple_statement => { + const value_node = button_or_sep.cast(.simple_statement).?.value; + const value = evaluateNumberExpression(value_node, self.source, self.input_code_pages); + try data_writer.writeInt(u16, value.asWord(), .little); + }, + else => unreachable, // This is a bug in the parser + } + } + + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Weight and italic carry over from previous FONT statements within a single resource, + /// so they need to be parsed ahead-of-time and stored + const FontStatementValues = struct { + weight: u16 = 0, + italic: bool = false, + node: *Node.FontStatement, + }; + + pub fn writeDialogFont(self: *Compiler, resource: Resource, values: FontStatementValues, writer: anytype) !void { + const node = values.node; + const point_size = evaluateNumberExpression(node.point_size, self.source, self.input_code_pages); + try writer.writeInt(u16, point_size.asWord(), .little); + + if (resource == .dialogex) { + try writer.writeInt(u16, values.weight, .little); + } + + if (resource == .dialogex) { + try writer.writeInt(u8, @intFromBool(values.italic), .little); + } + + if (node.char_set) |char_set| { + const value = evaluateNumberExpression(char_set, self.source, self.input_code_pages); + try writer.writeInt(u8, @as(u8, @truncate(value.value)), .little); + } else if (resource == .dialogex) { + try writer.writeInt(u8, 1, .little); // DEFAULT_CHARSET + } + + const typeface = try self.parseQuotedStringAsWideString(node.typeface); + defer self.allocator.free(typeface); + try writer.writeAll(std.mem.sliceAsBytes(typeface[0 .. typeface.len + 1])); + } + + pub fn writeMenu(self: *Compiler, node: *Node.Menu, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The header's data length field is a u32 so limit the resource's data size so that + // we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); + const data_writer = limited_writer.writer(); + + const type_bytes = SourceBytes{ + .slice = node.type.slice(self.source), + .code_page = self.input_code_pages.getForToken(node.type), + }; + const resource = Resource.fromString(type_bytes); + std.debug.assert(resource == .menu or resource == .menuex); + + self.writeMenuData(node, data_writer, resource) catch |err| switch (err) { + error.NoSpaceLeft => { + return self.addErrorDetailsAndFail(.{ + .err = .resource_data_size_exceeds_max, + .token = node.id, + }); + }, + else => |e| return e, + }; + + // This intCast can't fail because the limitedWriter above guarantees that + // we will never write more than maxInt(u32) bytes. + const data_size: u32 = @intCast(data_buffer.items.len); + var header = try self.resourceHeader(node.id, node.type, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft + pub fn writeMenuData(self: *Compiler, node: *Node.Menu, data_writer: anytype, resource: Resource) !void { + // menu header + const version: u16 = if (resource == .menu) 0 else 1; + try data_writer.writeInt(u16, version, .little); + const header_size: u16 = if (resource == .menu) 0 else 4; + try data_writer.writeInt(u16, header_size, .little); // cbHeaderSize + // Note: There can be extra bytes at the end of this header (`rgbExtra`), + // but they are always zero-length for us, so we don't write anything + // (the length of the rgbExtra field is inferred from the header_size). + // MENU => rgbExtra: [cbHeaderSize]u8 + // MENUEX => rgbExtra: [cbHeaderSize-4]u8 + + if (resource == .menuex) { + if (node.help_id) |help_id_node| { + const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); + try data_writer.writeInt(u32, help_id.value, .little); + } else { + try data_writer.writeInt(u32, 0, .little); + } + } + + for (node.items, 0..) |item, i| { + const is_last = i == node.items.len - 1; + try self.writeMenuItem(item, data_writer, is_last); + } + } + + pub fn writeMenuItem(self: *Compiler, node: *Node, writer: anytype, is_last_of_parent: bool) !void { + switch (node.id) { + .menu_item_separator => { + // This is the 'alternate compability form' of the separator, see + // https://devblogs.microsoft.com/oldnewthing/20080710-00/?p=21673 + // + // The 'correct' way is to set the MF_SEPARATOR flag, but the Win32 RC + // compiler still uses this alternate form, so that's what we use too. + var flags = res.MenuItemFlags{}; + if (is_last_of_parent) flags.markLast(); + try writer.writeInt(u16, flags.value, .little); + try writer.writeInt(u16, 0, .little); // id + try writer.writeInt(u16, 0, .little); // null-terminated UTF-16 text + }, + .menu_item => { + const menu_item = @fieldParentPtr(Node.MenuItem, "base", node); + var flags = res.MenuItemFlags{}; + for (menu_item.option_list) |option_token| { + // This failing would be a bug in the parser + const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; + flags.apply(option); + } + if (is_last_of_parent) flags.markLast(); + try writer.writeInt(u16, flags.value, .little); + + var result = evaluateNumberExpression(menu_item.result, self.source, self.input_code_pages); + try writer.writeInt(u16, result.asWord(), .little); + + var text = try self.parseQuotedStringAsWideString(menu_item.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + }, + .popup => { + const popup = @fieldParentPtr(Node.Popup, "base", node); + var flags = res.MenuItemFlags{ .value = res.MF.POPUP }; + for (popup.option_list) |option_token| { + // This failing would be a bug in the parser + const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; + flags.apply(option); + } + if (is_last_of_parent) flags.markLast(); + try writer.writeInt(u16, flags.value, .little); + + var text = try self.parseQuotedStringAsWideString(popup.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + + for (popup.items, 0..) |item, i| { + const is_last = i == popup.items.len - 1; + try self.writeMenuItem(item, writer, is_last); + } + }, + inline .menu_item_ex, .popup_ex => |node_type| { + const menu_item = @fieldParentPtr(node_type.Type(), "base", node); + + if (menu_item.type) |flags| { + const value = evaluateNumberExpression(flags, self.source, self.input_code_pages); + try writer.writeInt(u32, value.value, .little); + } else { + try writer.writeInt(u32, 0, .little); + } + + if (menu_item.state) |state| { + const value = evaluateNumberExpression(state, self.source, self.input_code_pages); + try writer.writeInt(u32, value.value, .little); + } else { + try writer.writeInt(u32, 0, .little); + } + + if (menu_item.id) |id| { + const value = evaluateNumberExpression(id, self.source, self.input_code_pages); + try writer.writeInt(u32, value.value, .little); + } else { + try writer.writeInt(u32, 0, .little); + } + + var flags: u16 = 0; + if (is_last_of_parent) flags |= comptime @as(u16, @intCast(res.MF.END)); + // This constant doesn't seem to have a named #define, it's different than MF_POPUP + if (node_type == .popup_ex) flags |= 0x01; + try writer.writeInt(u16, flags, .little); + + var text = try self.parseQuotedStringAsWideString(menu_item.text); + defer self.allocator.free(text); + try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); + + // Only the combination of the flags u16 and the text bytes can cause + // non-DWORD alignment, so we can just use the byte length of those + // two values to realign to DWORD alignment. + const relevant_bytes = 2 + (text.len + 1) * 2; + try writeDataPadding(writer, @intCast(relevant_bytes)); + + if (node_type == .popup_ex) { + if (menu_item.help_id) |help_id_node| { + const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); + try writer.writeInt(u32, help_id.value, .little); + } else { + try writer.writeInt(u32, 0, .little); + } + + for (menu_item.items, 0..) |item, i| { + const is_last = i == menu_item.items.len - 1; + try self.writeMenuItem(item, writer, is_last); + } + } + }, + else => unreachable, + } + } + + pub fn writeVersionInfo(self: *Compiler, node: *Node.VersionInfo, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(self.allocator); + defer data_buffer.deinit(); + // The node's length field (which is inclusive of the length of all of its children) is a u16 + // so limit the node's data size so that we know we can always specify the real size. + var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u16)); + const data_writer = limited_writer.writer(); + + try data_writer.writeInt(u16, 0, .little); // placeholder size + try data_writer.writeInt(u16, res.FixedFileInfo.byte_len, .little); + try data_writer.writeInt(u16, res.VersionNode.type_binary, .little); + const key_bytes = std.mem.sliceAsBytes(res.FixedFileInfo.key[0 .. res.FixedFileInfo.key.len + 1]); + try data_writer.writeAll(key_bytes); + // The number of bytes written up to this point is always the same, since the name + // of the node is a constant (FixedFileInfo.key). The total number of bytes + // written so far is 38, so we need 2 padding bytes to get back to DWORD alignment + try data_writer.writeInt(u16, 0, .little); + + var fixed_file_info = res.FixedFileInfo{}; + for (node.fixed_info) |fixed_info| { + switch (fixed_info.id) { + .version_statement => { + const version_statement = @fieldParentPtr(Node.VersionStatement, "base", fixed_info); + const version_type = rc.VersionInfo.map.get(version_statement.type.slice(self.source)).?; + + // Ensure that all parts are cleared for each version, to properly account for + // potential duplicate PRODUCTVERSION/FILEVERSION statements + switch (version_type) { + .file_version => @memset(&fixed_file_info.file_version.parts, 0), + .product_version => @memset(&fixed_file_info.product_version.parts, 0), + else => unreachable, + } + + for (version_statement.parts, 0..) |part, i| { + const part_value = evaluateNumberExpression(part, self.source, self.input_code_pages); + if (part_value.is_long) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = part.getFirstToken(), + .token_span_end = part.getLastToken(), + .extra = .{ .statement_with_u16_param = switch (version_type) { + .file_version => .fileversion, + .product_version => .productversion, + else => unreachable, + } }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = part.getFirstToken(), + .token_span_end = part.getLastToken(), + .extra = .{ .statement_with_u16_param = switch (version_type) { + .file_version => .fileversion, + .product_version => .productversion, + else => unreachable, + } }, + }); + } + switch (version_type) { + .file_version => { + fixed_file_info.file_version.parts[i] = part_value.asWord(); + }, + .product_version => { + fixed_file_info.product_version.parts[i] = part_value.asWord(); + }, + else => unreachable, + } + } + }, + .simple_statement => { + const statement = @fieldParentPtr(Node.SimpleStatement, "base", fixed_info); + const statement_type = rc.VersionInfo.map.get(statement.identifier.slice(self.source)).?; + const value = evaluateNumberExpression(statement.value, self.source, self.input_code_pages); + switch (statement_type) { + .file_flags_mask => fixed_file_info.file_flags_mask = value.value, + .file_flags => fixed_file_info.file_flags = value.value, + .file_os => fixed_file_info.file_os = value.value, + .file_type => fixed_file_info.file_type = value.value, + .file_subtype => fixed_file_info.file_subtype = value.value, + else => unreachable, + } + }, + else => unreachable, + } + } + try fixed_file_info.write(data_writer); + + for (node.block_statements) |statement| { + self.writeVersionNode(statement, data_writer, &data_buffer) catch |err| switch (err) { + error.NoSpaceLeft => { + try self.addErrorDetails(.{ + .err = .version_node_size_exceeds_max, + .token = node.id, + }); + return self.addErrorDetailsAndFail(.{ + .err = .version_node_size_exceeds_max, + .type = .note, + .token = statement.getFirstToken(), + .token_span_end = statement.getLastToken(), + }); + }, + else => |e| return e, + }; + } + + // We know that data_buffer.items.len is within the limits of a u16, since we + // limited the writer to maxInt(u16) + const data_size: u16 = @intCast(data_buffer.items.len); + // And now that we know the full size of this node (including its children), set its size + std.mem.writeInt(u16, data_buffer.items[0..2], data_size, .little); + + var header = try self.resourceHeader(node.id, node.versioninfo, .{ + .data_size = data_size, + }); + defer header.deinit(self.allocator); + + header.applyMemoryFlags(node.common_resource_attributes, self.source); + + try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try writeResourceData(writer, data_fbs.reader(), data_size); + } + + /// Expects writer to be a LimitedWriter limited to u16, meaning all writes to + /// the writer within this function could return error.NoSpaceLeft, and that buf.items.len + /// will never be able to exceed maxInt(u16). + pub fn writeVersionNode(self: *Compiler, node: *Node, writer: anytype, buf: *std.ArrayList(u8)) !void { + // We can assume that buf.items.len will never be able to exceed the limits of a u16 + try writeDataPadding(writer, @as(u16, @intCast(buf.items.len))); + + const node_and_children_size_offset = buf.items.len; + try writer.writeInt(u16, 0, .little); // placeholder for size + const data_size_offset = buf.items.len; + try writer.writeInt(u16, 0, .little); // placeholder for data size + const data_type_offset = buf.items.len; + // Data type is string unless the node contains values that are numbers. + try writer.writeInt(u16, res.VersionNode.type_string, .little); + + switch (node.id) { + inline .block, .block_value => |node_type| { + const block_or_value = @fieldParentPtr(node_type.Type(), "base", node); + const parsed_key = try self.parseQuotedStringAsWideString(block_or_value.key); + defer self.allocator.free(parsed_key); + + const parsed_key_to_first_null = std.mem.sliceTo(parsed_key, 0); + try writer.writeAll(std.mem.sliceAsBytes(parsed_key_to_first_null[0 .. parsed_key_to_first_null.len + 1])); + + var has_number_value: bool = false; + for (block_or_value.values) |value_value_node_uncasted| { + const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; + if (value_value_node.expression.isNumberExpression()) { + has_number_value = true; + break; + } + } + // The units used here are dependent on the type. If there are any numbers, then + // this is a byte count. If there are only strings, then this is a count of + // UTF-16 code units. + // + // The Win32 RC compiler miscompiles this count in the case of values that + // have a mix of numbers and strings. This is detected and a warning is emitted + // during parsing, so we can just do the correct thing here. + var values_size: usize = 0; + + try writeDataPadding(writer, @intCast(buf.items.len)); + + for (block_or_value.values, 0..) |value_value_node_uncasted, i| { + const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; + const value_node = value_value_node.expression; + if (value_node.isNumberExpression()) { + const number = evaluateNumberExpression(value_node, self.source, self.input_code_pages); + // This is used to write u16 or u32 depending on the number's suffix + const data_wrapper = Data{ .number = number }; + try data_wrapper.write(writer); + // Numbers use byte count + values_size += if (number.is_long) 4 else 2; + } else { + std.debug.assert(value_node.isStringLiteral()); + const literal_node = value_node.cast(.literal).?; + const parsed_value = try self.parseQuotedStringAsWideString(literal_node.token); + defer self.allocator.free(parsed_value); + + const parsed_to_first_null = std.mem.sliceTo(parsed_value, 0); + try writer.writeAll(std.mem.sliceAsBytes(parsed_to_first_null)); + // Strings use UTF-16 code-unit count including the null-terminator, but + // only if there are no number values in the list. + var value_size = parsed_to_first_null.len; + if (has_number_value) value_size *= 2; // 2 bytes per UTF-16 code unit + values_size += value_size; + // The null-terminator is only included if there's a trailing comma + // or this is the last value. If the value evaluates to empty, then + // it never gets a null terminator. If there was an explicit null-terminator + // in the string, we still need to potentially add one since we already + // sliced to the terminator. + const is_last = i == block_or_value.values.len - 1; + const is_empty = parsed_to_first_null.len == 0; + const is_only = block_or_value.values.len == 1; + if ((!is_empty or !is_only) and (is_last or value_value_node.trailing_comma)) { + try writer.writeInt(u16, 0, .little); + values_size += if (has_number_value) 2 else 1; + } + } + } + var data_size_slice = buf.items[data_size_offset..]; + std.mem.writeInt(u16, data_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(values_size)), .little); + + if (has_number_value) { + const data_type_slice = buf.items[data_type_offset..]; + std.mem.writeInt(u16, data_type_slice[0..@sizeOf(u16)], res.VersionNode.type_binary, .little); + } + + if (node_type == .block) { + const block = block_or_value; + for (block.children) |child| { + try self.writeVersionNode(child, writer, buf); + } + } + }, + else => unreachable, + } + + const node_and_children_size = buf.items.len - node_and_children_size_offset; + const node_and_children_size_slice = buf.items[node_and_children_size_offset..]; + std.mem.writeInt(u16, node_and_children_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(node_and_children_size)), .little); + } + + pub fn writeStringTable(self: *Compiler, node: *Node.StringTable) !void { + const language = getLanguageFromOptionalStatements(node.optional_statements, self.source, self.input_code_pages) orelse self.state.language; + + for (node.strings) |string_node| { + const string = @fieldParentPtr(Node.StringTableString, "base", string_node); + const string_id_data = try self.evaluateDataExpression(string.id); + const string_id = string_id_data.number.asWord(); + + self.state.string_tables.set( + self.arena, + language, + string_id, + string.string, + &node.base, + self.source, + self.input_code_pages, + self.state.version, + self.state.characteristics, + ) catch |err| switch (err) { + error.StringAlreadyDefined => { + // It might be nice to have these errors point to the ids rather than the + // string tokens, but that would mean storing the id token of each string + // which doesn't seem worth it just for slightly better error messages. + try self.addErrorDetails(ErrorDetails{ + .err = .string_already_defined, + .token = string.string, + .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, + }); + const existing_def_table = self.state.string_tables.tables.getPtr(language).?; + const existing_definition = existing_def_table.get(string_id).?; + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .string_already_defined, + .type = .note, + .token = existing_definition, + .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, + }); + }, + error.OutOfMemory => |e| return e, + }; + } + } + + /// Expects this to be a top-level LANGUAGE statement + pub fn writeLanguageStatement(self: *Compiler, node: *Node.LanguageStatement) void { + const primary = Compiler.evaluateNumberExpression(node.primary_language_id, self.source, self.input_code_pages); + const sublanguage = Compiler.evaluateNumberExpression(node.sublanguage_id, self.source, self.input_code_pages); + self.state.language.primary_language_id = @truncate(primary.value); + self.state.language.sublanguage_id = @truncate(sublanguage.value); + } + + /// Expects this to be a top-level VERSION or CHARACTERISTICS statement + pub fn writeTopLevelSimpleStatement(self: *Compiler, node: *Node.SimpleStatement) void { + const value = Compiler.evaluateNumberExpression(node.value, self.source, self.input_code_pages); + const statement_type = rc.TopLevelKeywords.map.get(node.identifier.slice(self.source)).?; + switch (statement_type) { + .characteristics => self.state.characteristics = value.value, + .version => self.state.version = value.value, + else => unreachable, + } + } + + pub const ResourceHeaderOptions = struct { + language: ?res.Language = null, + data_size: DWORD = 0, + }; + + pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader { + const id_bytes = self.sourceBytesForToken(id_token); + const type_bytes = self.sourceBytesForToken(type_token); + return ResourceHeader.init( + self.allocator, + id_bytes, + type_bytes, + options.data_size, + options.language orelse self.state.language, + self.state.version, + self.state.characteristics, + ) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.TypeNonAsciiOrdinal => { + const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?; + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = type_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = type_token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + }, + error.IdNonAsciiOrdinal => { + const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?; + try self.addErrorDetails(.{ + .err = .invalid_digit_character_in_ordinal, + .type = .err, + .token = id_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .win32_non_ascii_ordinal, + .type = .note, + .token = id_token, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + }, + }; + } + + pub const ResourceHeader = struct { + name_value: NameOrOrdinal, + type_value: NameOrOrdinal, + language: res.Language, + memory_flags: MemoryFlags, + data_size: DWORD, + version: DWORD, + characteristics: DWORD, + data_version: DWORD = 0, + + pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal }; + + pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader { + const type_value = type: { + const resource_type = Resource.fromString(type_bytes); + if (res.RT.fromResource(resource_type)) |rt_constant| { + break :type NameOrOrdinal{ .ordinal = @intFromEnum(rt_constant) }; + } else { + break :type try NameOrOrdinal.fromString(allocator, type_bytes); + } + }; + errdefer type_value.deinit(allocator); + if (type_value == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| { + return error.TypeNonAsciiOrdinal; + } + } + + const name_value = try NameOrOrdinal.fromString(allocator, id_bytes); + errdefer name_value.deinit(allocator); + if (name_value == .name) { + if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| { + return error.IdNonAsciiOrdinal; + } + } + + const predefined_resource_type = type_value.predefinedResourceType(); + + return ResourceHeader{ + .name_value = name_value, + .type_value = type_value, + .data_size = data_size, + .memory_flags = MemoryFlags.defaults(predefined_resource_type), + .language = language, + .version = version, + .characteristics = characteristics, + }; + } + + pub fn deinit(self: ResourceHeader, allocator: Allocator) void { + self.name_value.deinit(allocator); + self.type_value.deinit(allocator); + } + + pub const SizeInfo = struct { + bytes: u32, + padding_after_name: u2, + }; + + fn calcSize(self: ResourceHeader) error{Overflow}!SizeInfo { + var header_size: u32 = 8; + header_size = try std.math.add( + u32, + header_size, + std.math.cast(u32, self.name_value.byteLen()) orelse return error.Overflow, + ); + header_size = try std.math.add( + u32, + header_size, + std.math.cast(u32, self.type_value.byteLen()) orelse return error.Overflow, + ); + const padding_after_name = numPaddingBytesNeeded(header_size); + header_size = try std.math.add(u32, header_size, padding_after_name); + header_size = try std.math.add(u32, header_size, 16); + return .{ .bytes = header_size, .padding_after_name = padding_after_name }; + } + + pub fn writeAssertNoOverflow(self: ResourceHeader, writer: anytype) !void { + return self.writeSizeInfo(writer, self.calcSize() catch unreachable); + } + + pub fn write(self: ResourceHeader, writer: anytype, err_ctx: errors.DiagnosticsContext) !void { + const size_info = self.calcSize() catch { + try err_ctx.diagnostics.append(.{ + .err = .resource_data_size_exceeds_max, + .token = err_ctx.token, + }); + return error.CompileError; + }; + return self.writeSizeInfo(writer, size_info); + } + + fn writeSizeInfo(self: ResourceHeader, writer: anytype, size_info: SizeInfo) !void { + try writer.writeInt(DWORD, self.data_size, .little); // DataSize + try writer.writeInt(DWORD, size_info.bytes, .little); // HeaderSize + try self.type_value.write(writer); // TYPE + try self.name_value.write(writer); // NAME + try writer.writeByteNTimes(0, size_info.padding_after_name); + + try writer.writeInt(DWORD, self.data_version, .little); // DataVersion + try writer.writeInt(WORD, self.memory_flags.value, .little); // MemoryFlags + try writer.writeInt(WORD, self.language.asInt(), .little); // LanguageId + try writer.writeInt(DWORD, self.version, .little); // Version + try writer.writeInt(DWORD, self.characteristics, .little); // Characteristics + } + + pub fn predefinedResourceType(self: ResourceHeader) ?res.RT { + return self.type_value.predefinedResourceType(); + } + + pub fn applyMemoryFlags(self: *ResourceHeader, tokens: []Token, source: []const u8) void { + applyToMemoryFlags(&self.memory_flags, tokens, source); + } + + pub fn applyOptionalStatements(self: *ResourceHeader, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { + applyToOptionalStatements(&self.language, &self.version, &self.characteristics, statements, source, code_page_lookup); + } + }; + + fn applyToMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags.set(attribute); + } + } + + /// RT_GROUP_ICON and RT_GROUP_CURSOR have their own special rules for memory flags + fn applyToGroupMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { + // There's probably a cleaner implementation of this, but this will result in the same + // flags as the Win32 RC compiler for all 986,410 K-permutations of memory flags + // for an ICON resource. + // + // This was arrived at by iterating over the permutations and creating a + // list where each line looks something like this: + // MOVEABLE PRELOAD -> 0x1050 (MOVEABLE|PRELOAD|DISCARDABLE) + // + // and then noticing a few things: + + // 1. Any permutation that does not have PRELOAD in it just uses the + // default flags. + const initial_flags = flags.*; + var flags_set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags_set.insert(attribute); + } + if (!flags_set.contains(.preload)) return; + + // 2. Any permutation of flags where applying only the PRELOAD and LOADONCALL flags + // results in no actual change by the end will just use the default flags. + // For example, `PRELOAD LOADONCALL` will result in default flags, but + // `LOADONCALL PRELOAD` will have PRELOAD set after they are both applied in order. + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + switch (attribute) { + .preload, .loadoncall => flags.set(attribute), + else => {}, + } + } + if (flags.value == initial_flags.value) return; + + // 3. If none of DISCARDABLE, SHARED, or PURE is specified, then PRELOAD + // implies `flags &= ~SHARED` and LOADONCALL implies `flags |= SHARED` + const shared_set = comptime blk: { + var set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); + set.insert(.discardable); + set.insert(.shared); + set.insert(.pure); + break :blk set; + }; + const discardable_shared_or_pure_specified = flags_set.intersectWith(shared_set).count() != 0; + for (tokens) |token| { + const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; + flags.setGroup(attribute, !discardable_shared_or_pure_specified); + } + } + + /// Only handles the 'base' optional statements that are shared between resource types. + fn applyToOptionalStatements(language: *res.Language, version: *u32, characteristics: *u32, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { + for (statements) |node| switch (node.id) { + .language_statement => { + const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); + language.* = languageFromLanguageStatement(language_statement, source, code_page_lookup); + }, + .simple_statement => { + const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", node); + const statement_type = rc.OptionalStatements.map.get(simple_statement.identifier.slice(source)) orelse continue; + const result = Compiler.evaluateNumberExpression(simple_statement.value, source, code_page_lookup); + switch (statement_type) { + .version => version.* = result.value, + .characteristics => characteristics.* = result.value, + else => unreachable, // only VERSION and CHARACTERISTICS should be in an optional statements list + } + }, + else => {}, + }; + } + + pub fn languageFromLanguageStatement(language_statement: *const Node.LanguageStatement, source: []const u8, code_page_lookup: *const CodePageLookup) res.Language { + const primary = Compiler.evaluateNumberExpression(language_statement.primary_language_id, source, code_page_lookup); + const sublanguage = Compiler.evaluateNumberExpression(language_statement.sublanguage_id, source, code_page_lookup); + return .{ + .primary_language_id = @truncate(primary.value), + .sublanguage_id = @truncate(sublanguage.value), + }; + } + + pub fn getLanguageFromOptionalStatements(statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) ?res.Language { + for (statements) |node| switch (node.id) { + .language_statement => { + const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); + return languageFromLanguageStatement(language_statement, source, code_page_lookup); + }, + else => continue, + }; + return null; + } + + pub fn writeEmptyResource(writer: anytype) !void { + const header = ResourceHeader{ + .name_value = .{ .ordinal = 0 }, + .type_value = .{ .ordinal = 0 }, + .language = .{ + .primary_language_id = 0, + .sublanguage_id = 0, + }, + .memory_flags = .{ .value = 0 }, + .data_size = 0, + .version = 0, + .characteristics = 0, + }; + try header.writeAssertNoOverflow(writer); + } + + pub fn sourceBytesForToken(self: *Compiler, token: Token) SourceBytes { + return .{ + .slice = token.slice(self.source), + .code_page = self.input_code_pages.getForToken(token), + }; + } + + /// Helper that calls parseQuotedStringAsWideString with the relevant context + /// Resulting slice is allocated by `self.allocator`. + pub fn parseQuotedStringAsWideString(self: *Compiler, token: Token) ![:0]u16 { + return literals.parseQuotedStringAsWideString( + self.allocator, + self.sourceBytesForToken(token), + .{ + .start_column = token.calculateColumn(self.source, 8, null), + .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, + }, + ); + } + + fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void { + try self.diagnostics.append(details); + } + + fn addErrorDetailsAndFail(self: *Compiler, details: ErrorDetails) error{ CompileError, OutOfMemory } { + try self.addErrorDetails(details); + return error.CompileError; + } +}; + +pub const OpenSearchPathError = std.fs.Dir.OpenError; + +fn openSearchPathDir(dir: std.fs.Dir, path: []const u8) OpenSearchPathError!std.fs.Dir { + // Validate the search path to avoid possible unreachable on invalid paths, + // see https://github.com/ziglang/zig/issues/15607 for why this is currently necessary. + try validateSearchPath(path); + return dir.openDir(path, .{}); +} + +/// Very crude attempt at validating a path. This is imperfect +/// and AFAIK it is effectively impossible to implement perfect path +/// validation, since it ultimately depends on the underlying filesystem. +/// Note that this function won't be necessary if/when +/// https://github.com/ziglang/zig/issues/15607 +/// is accepted/implemented. +fn validateSearchPath(path: []const u8) error{BadPathName}!void { + switch (builtin.os.tag) { + .windows => { + // This will return error.BadPathName on non-Win32 namespaced paths + // (e.g. the NT \??\ prefix, the device \\.\ prefix, etc). + // Those path types are something of an unavoidable way to + // still hit unreachable during the openDir call. + var component_iterator = try std.fs.path.componentIterator(path); + while (component_iterator.next()) |component| { + // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file + if (std.mem.indexOfAny(u8, component.name, "\x00<>:\"|?*") != null) return error.BadPathName; + } + }, + else => { + if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; + }, + } +} + +pub const SearchDir = struct { + dir: std.fs.Dir, + path: ?[]const u8, + + pub fn deinit(self: *SearchDir, allocator: Allocator) void { + self.dir.close(); + if (self.path) |path| { + allocator.free(path); + } + } +}; + +/// Slurps the first `size` bytes read into `slurped_header` +pub fn HeaderSlurpingReader(comptime size: usize, comptime ReaderType: anytype) type { + return struct { + child_reader: ReaderType, + bytes_read: usize = 0, + slurped_header: [size]u8 = [_]u8{0x00} ** size, + + pub const Error = ReaderType.Error; + pub const Reader = std.io.Reader(*@This(), Error, read); + + pub fn read(self: *@This(), buf: []u8) Error!usize { + const amt = try self.child_reader.read(buf); + if (self.bytes_read < size) { + const bytes_to_add = @min(amt, size - self.bytes_read); + const end_index = self.bytes_read + bytes_to_add; + @memcpy(self.slurped_header[self.bytes_read..end_index], buf[0..bytes_to_add]); + } + self.bytes_read +|= amt; + return amt; + } + + pub fn reader(self: *@This()) Reader { + return .{ .context = self }; + } + }; +} + +pub fn headerSlurpingReader(comptime size: usize, reader: anytype) HeaderSlurpingReader(size, @TypeOf(reader)) { + return .{ .child_reader = reader }; +} + +/// Sort of like std.io.LimitedReader, but a Writer. +/// Returns an error if writing the requested number of bytes +/// would ever exceed bytes_left, i.e. it does not always +/// write up to the limit and instead will error if the +/// limit would be breached if the entire slice was written. +pub fn LimitedWriter(comptime WriterType: type) type { + return struct { + inner_writer: WriterType, + bytes_left: u64, + + pub const Error = error{NoSpaceLeft} || WriterType.Error; + pub const Writer = std.io.Writer(*Self, Error, write); + + const Self = @This(); + + pub fn write(self: *Self, bytes: []const u8) Error!usize { + if (bytes.len > self.bytes_left) return error.NoSpaceLeft; + const amt = try self.inner_writer.write(bytes); + self.bytes_left -= amt; + return amt; + } + + pub fn writer(self: *Self) Writer { + return .{ .context = self }; + } + }; +} + +/// Returns an initialised `LimitedWriter` +/// `bytes_left` is a `u64` to be able to take 64 bit file offsets +pub fn limitedWriter(inner_writer: anytype, bytes_left: u64) LimitedWriter(@TypeOf(inner_writer)) { + return .{ .inner_writer = inner_writer, .bytes_left = bytes_left }; +} + +test "limitedWriter basic usage" { + var buf: [4]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buf); + var limited_stream = limitedWriter(fbs.writer(), 4); + var writer = limited_stream.writer(); + + try std.testing.expectEqual(@as(usize, 3), try writer.write("123")); + try std.testing.expectEqualSlices(u8, "123", buf[0..3]); + try std.testing.expectError(error.NoSpaceLeft, writer.write("45")); + try std.testing.expectEqual(@as(usize, 1), try writer.write("4")); + try std.testing.expectEqualSlices(u8, "1234", buf[0..4]); + try std.testing.expectError(error.NoSpaceLeft, writer.write("5")); +} + +pub const FontDir = struct { + fonts: std.ArrayListUnmanaged(Font) = .{}, + /// To keep track of which ids are set and where they were set from + ids: std.AutoHashMapUnmanaged(u16, Token) = .{}, + + pub const Font = struct { + id: u16, + header_bytes: [148]u8, + }; + + pub fn deinit(self: *FontDir, allocator: Allocator) void { + self.fonts.deinit(allocator); + } + + pub fn add(self: *FontDir, allocator: Allocator, font: Font, id_token: Token) !void { + try self.ids.putNoClobber(allocator, font.id, id_token); + try self.fonts.append(allocator, font); + } + + pub fn writeResData(self: *FontDir, compiler: *Compiler, writer: anytype) !void { + if (self.fonts.items.len == 0) return; + + // We know the number of fonts is limited to maxInt(u16) because fonts + // must have a valid and unique u16 ordinal ID (trying to specify a FONT + // with e.g. id 65537 will wrap around to 1 and be ignored if there's already + // a font with that ID in the file). + const num_fonts: u16 = @intCast(self.fonts.items.len); + + // u16 count + [(u16 id + 150 bytes) for each font] + // Note: This works out to a maximum data_size of 9,961,322. + const data_size: u32 = 2 + (2 + 150) * num_fonts; + + var header = Compiler.ResourceHeader{ + .name_value = try NameOrOrdinal.nameFromString(compiler.allocator, .{ .slice = "FONTDIR", .code_page = .windows1252 }), + .type_value = NameOrOrdinal{ .ordinal = @intFromEnum(res.RT.FONTDIR) }, + .memory_flags = res.MemoryFlags.defaults(res.RT.FONTDIR), + .language = compiler.state.language, + .version = compiler.state.version, + .characteristics = compiler.state.characteristics, + .data_size = data_size, + }; + defer header.deinit(compiler.allocator); + + try header.writeAssertNoOverflow(writer); + try writer.writeInt(u16, num_fonts, .little); + for (self.fonts.items) |font| { + // The format of the FONTDIR is a strange beast. + // Technically, each FONT is seemingly meant to be written as a + // FONTDIRENTRY with two trailing NUL-terminated strings corresponding to + // the 'device name' and 'face name' of the .FNT file, but: + // + // 1. When dealing with .FNT files, the Win32 implementation + // gets the device name and face name from the wrong locations, + // so it's basically never going to write the real device/face name + // strings. + // 2. When dealing with files 76-140 bytes long, the Win32 implementation + // can just crash (if there are no NUL bytes in the file). + // 3. The 32-bit Win32 rc.exe uses a 148 byte size for the portion of + // the FONTDIRENTRY before the NUL-terminated strings, which + // does not match the documented FONTDIRENTRY size that (presumably) + // this format is meant to be using, so anything iterating the + // FONTDIR according to the available documentation will get bogus results. + // 4. The FONT resource can be used for non-.FNT types like TTF and OTF, + // in which case emulating the Win32 behavior of unconditionally + // interpreting the bytes as a .FNT and trying to grab device/face names + // from random bytes in the TTF/OTF file can lead to weird behavior + // and errors in the Win32 implementation (for example, the device/face + // name fields are offsets into the file where the NUL-terminated + // string is located, but the Win32 implementation actually treats + // them as signed so if they are negative then the Win32 implementation + // will error; this happening for TTF fonts would just be a bug + // since the TTF could otherwise be valid) + // 5. The FONTDIR resource doesn't actually seem to be used at all by + // anything that I've found, and instead in Windows 3.0 and newer + // it seems like the FONT resources are always just iterated/accessed + // directly without ever looking at the FONTDIR. + // + // All of these combined means that we: + // - Do not need or want to emulate Win32 behavior here + // - For maximum simplicity and compatibility, we just write the first + // 148 bytes of the file without any interpretation (padded with + // zeroes to get up to 148 bytes if necessary), and then + // unconditionally write two NUL bytes, meaning that we always + // write 'device name' and 'face name' as if they were 0-length + // strings. + // + // This gives us byte-for-byte .RES compatibility in the common case while + // allowing us to avoid any erroneous errors caused by trying to read + // the face/device name from a bogus location. Note that the Win32 + // implementation never actually writes the real device/face name here + // anyway (except in the bizarre case that a .FNT file has the proper + // device/face name offsets within a reserved section of the .FNT file) + // so there's no feasible way that anything can actually think that the + // device name/face name in the FONTDIR is reliable. + + // First, the ID is written, though + try writer.writeInt(u16, font.id, .little); + try writer.writeAll(&font.header_bytes); + try writer.writeByteNTimes(0, 2); + } + try Compiler.writeDataPadding(writer, data_size); + } +}; + +pub const StringTablesByLanguage = struct { + /// String tables for each language are written to the .res file in order depending on + /// when the first STRINGTABLE for the language was defined, and all blocks for a given + /// language are written contiguously. + /// Using an ArrayHashMap here gives us this property for free. + tables: std.AutoArrayHashMapUnmanaged(res.Language, StringTable) = .{}, + + pub fn deinit(self: *StringTablesByLanguage, allocator: Allocator) void { + self.tables.deinit(allocator); + } + + pub fn set( + self: *StringTablesByLanguage, + allocator: Allocator, + language: res.Language, + id: u16, + string_token: Token, + node: *Node, + source: []const u8, + code_page_lookup: *const CodePageLookup, + version: u32, + characteristics: u32, + ) StringTable.SetError!void { + var get_or_put_result = try self.tables.getOrPut(allocator, language); + if (!get_or_put_result.found_existing) { + get_or_put_result.value_ptr.* = StringTable{}; + } + return get_or_put_result.value_ptr.set(allocator, id, string_token, node, source, code_page_lookup, version, characteristics); + } +}; + +pub const StringTable = struct { + /// Blocks are written to the .res file in order depending on when the first string + /// was added to the block (i.e. `STRINGTABLE { 16 "b" 0 "a" }` would then get written + /// with block ID 2 (the one with "b") first and block ID 1 (the one with "a") second). + /// Using an ArrayHashMap here gives us this property for free. + blocks: std.AutoArrayHashMapUnmanaged(u16, Block) = .{}, + + pub const Block = struct { + strings: std.ArrayListUnmanaged(Token) = .{}, + set_indexes: std.bit_set.IntegerBitSet(16) = .{ .mask = 0 }, + memory_flags: MemoryFlags = MemoryFlags.defaults(res.RT.STRING), + characteristics: u32, + version: u32, + + /// Returns the index to insert the string into the `strings` list. + /// Returns null if the string should be appended. + fn getInsertionIndex(self: *Block, index: u8) ?u8 { + std.debug.assert(!self.set_indexes.isSet(index)); + + const first_set = self.set_indexes.findFirstSet() orelse return null; + if (first_set > index) return 0; + + const last_set = 15 - @clz(self.set_indexes.mask); + if (index > last_set) return null; + + var bit = first_set + 1; + var insertion_index: u8 = 1; + while (bit != index) : (bit += 1) { + if (self.set_indexes.isSet(bit)) insertion_index += 1; + } + return insertion_index; + } + + fn getTokenIndex(self: *Block, string_index: u8) ?u8 { + const count = self.strings.items.len; + if (count == 0) return null; + if (count == 1) return 0; + + const first_set = self.set_indexes.findFirstSet() orelse unreachable; + if (first_set == string_index) return 0; + const last_set = 15 - @clz(self.set_indexes.mask); + if (last_set == string_index) return @intCast(count - 1); + + if (first_set == last_set) return null; + + var bit = first_set + 1; + var token_index: u8 = 1; + while (bit < last_set) : (bit += 1) { + if (!self.set_indexes.isSet(bit)) continue; + if (bit == string_index) return token_index; + token_index += 1; + } + return null; + } + + fn dump(self: *Block) void { + var bit_it = self.set_indexes.iterator(.{}); + var string_index: usize = 0; + while (bit_it.next()) |bit_index| { + const token = self.strings.items[string_index]; + std.debug.print("{}: [{}] {any}\n", .{ bit_index, string_index, token }); + string_index += 1; + } + } + + pub fn applyAttributes(self: *Block, string_table: *Node.StringTable, source: []const u8, code_page_lookup: *const CodePageLookup) void { + Compiler.applyToMemoryFlags(&self.memory_flags, string_table.common_resource_attributes, source); + var dummy_language: res.Language = undefined; + Compiler.applyToOptionalStatements(&dummy_language, &self.version, &self.characteristics, string_table.optional_statements, source, code_page_lookup); + } + + fn trimToDoubleNUL(comptime T: type, str: []const T) []const T { + var last_was_null = false; + for (str, 0..) |c, i| { + if (c == 0) { + if (last_was_null) return str[0 .. i - 1]; + last_was_null = true; + } else { + last_was_null = false; + } + } + return str; + } + + test "trimToDoubleNUL" { + try std.testing.expectEqualStrings("a\x00b", trimToDoubleNUL(u8, "a\x00b")); + try std.testing.expectEqualStrings("a", trimToDoubleNUL(u8, "a\x00\x00b")); + } + + pub fn writeResData(self: *Block, compiler: *Compiler, language: res.Language, block_id: u16, writer: anytype) !void { + var data_buffer = std.ArrayList(u8).init(compiler.allocator); + defer data_buffer.deinit(); + const data_writer = data_buffer.writer(); + + var i: u8 = 0; + var string_i: u8 = 0; + while (true) : (i += 1) { + if (!self.set_indexes.isSet(i)) { + try data_writer.writeInt(u16, 0, .little); + if (i == 15) break else continue; + } + + const string_token = self.strings.items[string_i]; + const slice = string_token.slice(compiler.source); + const column = string_token.calculateColumn(compiler.source, 8, null); + const code_page = compiler.input_code_pages.getForToken(string_token); + const bytes = SourceBytes{ .slice = slice, .code_page = code_page }; + const utf16_string = try literals.parseQuotedStringAsWideString(compiler.allocator, bytes, .{ + .start_column = column, + .diagnostics = .{ .diagnostics = compiler.diagnostics, .token = string_token }, + }); + defer compiler.allocator.free(utf16_string); + + const trimmed_string = trim: { + // Two NUL characters in a row act as a terminator + // Note: This is only the case for STRINGTABLE strings + const trimmed = trimToDoubleNUL(u16, utf16_string); + // We also want to trim any trailing NUL characters + break :trim std.mem.trimRight(u16, trimmed, &[_]u16{0}); + }; + + // String literals are limited to maxInt(u15) codepoints, so these UTF-16 encoded + // strings are limited to maxInt(u15) * 2 = 65,534 code units (since 2 is the + // maximum number of UTF-16 code units per codepoint). + // This leaves room for exactly one NUL terminator. + var string_len_in_utf16_code_units: u16 = @intCast(trimmed_string.len); + // If the option is set, then a NUL terminator is added unconditionally. + // We already trimmed any trailing NULs, so we know it will be a new addition to the string. + if (compiler.null_terminate_string_table_strings) string_len_in_utf16_code_units += 1; + try data_writer.writeInt(u16, string_len_in_utf16_code_units, .little); + try data_writer.writeAll(std.mem.sliceAsBytes(trimmed_string)); + if (compiler.null_terminate_string_table_strings) { + try data_writer.writeInt(u16, 0, .little); + } + + if (i == 15) break; + string_i += 1; + } + + // This intCast will never be able to fail due to the length constraints on string literals. + // + // - STRINGTABLE resource definitions can can only provide one string literal per index. + // - STRINGTABLE strings are limited to maxInt(u16) UTF-16 code units (see 'string_len_in_utf16_code_units' + // above), which means that the maximum number of bytes per string literal is + // 2 * maxInt(u16) = 131,070 (since there are 2 bytes per UTF-16 code unit). + // - Each Block/RT_STRING resource includes exactly 16 strings and each have a 2 byte + // length field, so the maximum number of total bytes in a RT_STRING resource's data is + // 16 * (131,070 + 2) = 2,097,152 which is well within the u32 max. + // + // Note: The string literal maximum length is enforced by the lexer. + const data_size: u32 = @intCast(data_buffer.items.len); + + const header = Compiler.ResourceHeader{ + .name_value = .{ .ordinal = block_id }, + .type_value = .{ .ordinal = @intFromEnum(res.RT.STRING) }, + .memory_flags = self.memory_flags, + .language = language, + .version = self.version, + .characteristics = self.characteristics, + .data_size = data_size, + }; + // The only variable parts of the header are name and type, which in this case + // we fully control and know are numbers, so they have a fixed size. + try header.writeAssertNoOverflow(writer); + + var data_fbs = std.io.fixedBufferStream(data_buffer.items); + try Compiler.writeResourceData(writer, data_fbs.reader(), data_size); + } + }; + + pub fn deinit(self: *StringTable, allocator: Allocator) void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + entry.value_ptr.strings.deinit(allocator); + } + self.blocks.deinit(allocator); + } + + const SetError = error{StringAlreadyDefined} || Allocator.Error; + + pub fn set( + self: *StringTable, + allocator: Allocator, + id: u16, + string_token: Token, + node: *Node, + source: []const u8, + code_page_lookup: *const CodePageLookup, + version: u32, + characteristics: u32, + ) SetError!void { + const block_id = (id / 16) + 1; + const string_index: u8 = @intCast(id & 0xF); + + var get_or_put_result = try self.blocks.getOrPut(allocator, block_id); + if (!get_or_put_result.found_existing) { + get_or_put_result.value_ptr.* = Block{ .version = version, .characteristics = characteristics }; + get_or_put_result.value_ptr.applyAttributes(node.cast(.string_table).?, source, code_page_lookup); + } else { + if (get_or_put_result.value_ptr.set_indexes.isSet(string_index)) { + return error.StringAlreadyDefined; + } + } + + var block = get_or_put_result.value_ptr; + if (block.getInsertionIndex(string_index)) |insertion_index| { + try block.strings.insert(allocator, insertion_index, string_token); + } else { + try block.strings.append(allocator, string_token); + } + block.set_indexes.set(string_index); + } + + pub fn get(self: *StringTable, id: u16) ?Token { + const block_id = (id / 16) + 1; + const string_index: u8 = @intCast(id & 0xF); + + const block = self.blocks.getPtr(block_id) orelse return null; + const token_index = block.getTokenIndex(string_index) orelse return null; + return block.strings.items[token_index]; + } + + pub fn dump(self: *StringTable) !void { + var it = self.iterator(); + while (it.next()) |entry| { + std.debug.print("block: {}\n", .{entry.key_ptr.*}); + entry.value_ptr.dump(); + } + } +}; + +test "StringTable" { + const S = struct { + fn makeDummyToken(id: usize) Token { + return Token{ + .id = .invalid, + .start = id, + .end = id, + .line_number = id, + }; + } + }; + const allocator = std.testing.allocator; + var string_table = StringTable{}; + defer string_table.deinit(allocator); + + var code_page_lookup = CodePageLookup.init(allocator, .windows1252); + defer code_page_lookup.deinit(); + + var dummy_node = Node.StringTable{ + .type = S.makeDummyToken(0), + .common_resource_attributes = &.{}, + .optional_statements = &.{}, + .begin_token = S.makeDummyToken(0), + .strings = &.{}, + .end_token = S.makeDummyToken(0), + }; + + // randomize an array of ids 0-99 + var ids = ids: { + var buf: [100]u16 = undefined; + var i: u16 = 0; + while (i < buf.len) : (i += 1) { + buf[i] = i; + } + break :ids buf; + }; + var prng = std.rand.DefaultPrng.init(0); + var random = prng.random(); + random.shuffle(u16, &ids); + + // set each one in the randomized order + for (ids) |id| { + try string_table.set(allocator, id, S.makeDummyToken(id), &dummy_node.base, "", &code_page_lookup, 0, 0); + } + + // make sure each one exists and is the right value when gotten + var id: u16 = 0; + while (id < 100) : (id += 1) { + const dummy = S.makeDummyToken(id); + try std.testing.expectError(error.StringAlreadyDefined, string_table.set(allocator, id, dummy, &dummy_node.base, "", &code_page_lookup, 0, 0)); + try std.testing.expectEqual(dummy, string_table.get(id).?); + } + + // make sure non-existent string ids are not found + try std.testing.expectEqual(@as(?Token, null), string_table.get(100)); +} diff --git a/lib/compiler/resinator/errors.zig b/lib/compiler/resinator/errors.zig @@ -0,0 +1,1076 @@ +const std = @import("std"); +const Token = @import("lex.zig").Token; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const utils = @import("utils.zig"); +const rc = @import("rc.zig"); +const res = @import("res.zig"); +const ico = @import("ico.zig"); +const bmp = @import("bmp.zig"); +const parse = @import("parse.zig"); +const lang = @import("lang.zig"); +const CodePage = @import("code_pages.zig").CodePage; +const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); + +pub const Diagnostics = struct { + errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, + /// Append-only, cannot handle removing strings. + /// Expects to own all strings within the list. + strings: std.ArrayListUnmanaged([]const u8) = .{}, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) Diagnostics { + return .{ + .allocator = allocator, + }; + } + + pub fn deinit(self: *Diagnostics) void { + self.errors.deinit(self.allocator); + for (self.strings.items) |str| { + self.allocator.free(str); + } + self.strings.deinit(self.allocator); + } + + pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { + try self.errors.append(self.allocator, error_details); + } + + const SmallestStringIndexType = std.meta.Int(.unsigned, @min( + @bitSizeOf(ErrorDetails.FileOpenError.FilenameStringIndex), + @min( + @bitSizeOf(ErrorDetails.IconReadError.FilenameStringIndex), + @bitSizeOf(ErrorDetails.BitmapReadError.FilenameStringIndex), + ), + )); + + /// Returns the index of the added string as the SmallestStringIndexType + /// in order to avoid needing to `@intCast` it at callsites of putString. + /// Instead, this function will error if the index would ever exceed the + /// smallest FilenameStringIndex of an ErrorDetails type. + pub fn putString(self: *Diagnostics, str: []const u8) !SmallestStringIndexType { + if (self.strings.items.len >= std.math.maxInt(SmallestStringIndexType)) { + return error.OutOfMemory; // ran out of string indexes + } + const dupe = try self.allocator.dupe(u8, str); + const index = self.strings.items.len; + try self.strings.append(self.allocator, dupe); + return @intCast(index); + } + + pub fn renderToStdErr(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, tty_config: std.io.tty.Config, source_mappings: ?SourceMappings) void { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + const stderr = std.io.getStdErr().writer(); + for (self.errors.items) |err_details| { + renderErrorMessage(self.allocator, stderr, tty_config, cwd, err_details, source, self.strings.items, source_mappings) catch return; + } + } + + pub fn renderToStdErrDetectTTY(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, source_mappings: ?SourceMappings) void { + const tty_config = std.io.tty.detectConfig(std.io.getStdErr()); + return self.renderToStdErr(cwd, source, tty_config, source_mappings); + } + + pub fn contains(self: *const Diagnostics, err: ErrorDetails.Error) bool { + for (self.errors.items) |details| { + if (details.err == err) return true; + } + return false; + } + + pub fn containsAny(self: *const Diagnostics, errors: []const ErrorDetails.Error) bool { + for (self.errors.items) |details| { + for (errors) |err| { + if (details.err == err) return true; + } + } + return false; + } +}; + +/// Contains enough context to append errors/warnings/notes etc +pub const DiagnosticsContext = struct { + diagnostics: *Diagnostics, + token: Token, +}; + +pub const ErrorDetails = struct { + err: Error, + token: Token, + /// If non-null, should be before `token`. If null, `token` is assumed to be the start. + token_span_start: ?Token = null, + /// If non-null, should be after `token`. If null, `token` is assumed to be the end. + token_span_end: ?Token = null, + type: Type = .err, + print_source_line: bool = true, + extra: union { + none: void, + expected: Token.Id, + number: u32, + expected_types: ExpectedTypes, + resource: rc.Resource, + string_and_language: StringAndLanguage, + file_open_error: FileOpenError, + icon_read_error: IconReadError, + icon_dir: IconDirContext, + bmp_read_error: BitmapReadError, + accelerator_error: AcceleratorError, + statement_with_u16_param: StatementWithU16Param, + menu_or_class: enum { class, menu }, + } = .{ .none = {} }, + + pub const Type = enum { + /// Fatal error, stops compilation + err, + /// Warning that does not affect compilation result + warning, + /// A note that typically provides further context for a warning/error + note, + /// An invisible diagnostic that is not printed to stderr but can + /// provide information useful when comparing the behavior of different + /// implementations. For example, a hint is emitted when a FONTDIR resource + /// was included in the .RES file which is significant because rc.exe + /// does something different than us, but ultimately it's not important + /// enough to be a warning/note. + hint, + }; + + comptime { + // all fields in the extra union should be 32 bits or less + for (std.meta.fields(std.meta.fieldInfo(ErrorDetails, .extra).type)) |field| { + std.debug.assert(@bitSizeOf(field.type) <= 32); + } + } + + pub const StatementWithU16Param = enum(u32) { + fileversion, + productversion, + language, + }; + + pub const StringAndLanguage = packed struct(u32) { + id: u16, + language: res.Language, + }; + + pub const FileOpenError = packed struct(u32) { + err: FileOpenErrorEnum, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(FileOpenErrorEnum)); + pub const FileOpenErrorEnum = std.meta.FieldEnum(std.fs.File.OpenError); + + pub fn enumFromError(err: std.fs.File.OpenError) FileOpenErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.FileOpenError.FileOpenErrorEnum, @errorName(e)), + }; + } + }; + + pub const IconReadError = packed struct(u32) { + err: IconReadErrorEnum, + icon_type: enum(u1) { cursor, icon }, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(IconReadErrorEnum) - 1); + pub const IconReadErrorEnum = std.meta.FieldEnum(ico.ReadError); + + pub fn enumFromError(err: ico.ReadError) IconReadErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.IconReadError.IconReadErrorEnum, @errorName(e)), + }; + } + }; + + pub const IconDirContext = packed struct(u32) { + icon_type: enum(u1) { cursor, icon }, + icon_format: ico.ImageFormat, + index: u16, + bitmap_version: ico.BitmapHeader.Version = .unknown, + _: Padding = 0, + + pub const Padding = std.meta.Int(.unsigned, 15 - @bitSizeOf(ico.BitmapHeader.Version) - @bitSizeOf(ico.ImageFormat)); + }; + + pub const BitmapReadError = packed struct(u32) { + err: BitmapReadErrorEnum, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(BitmapReadErrorEnum)); + pub const BitmapReadErrorEnum = std.meta.FieldEnum(bmp.ReadError); + + pub fn enumFromError(err: bmp.ReadError) BitmapReadErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.BitmapReadError.BitmapReadErrorEnum, @errorName(e)), + }; + } + }; + + pub const BitmapUnsupportedDIB = packed struct(u32) { + dib_version: ico.BitmapHeader.Version, + filename_string_index: FilenameStringIndex, + + pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(ico.BitmapHeader.Version)); + }; + + pub const AcceleratorError = packed struct(u32) { + err: AcceleratorErrorEnum, + _: Padding = 0, + + pub const Padding = std.meta.Int(.unsigned, 32 - @bitSizeOf(AcceleratorErrorEnum)); + pub const AcceleratorErrorEnum = std.meta.FieldEnum(res.ParseAcceleratorKeyStringError); + + pub fn enumFromError(err: res.ParseAcceleratorKeyStringError) AcceleratorErrorEnum { + return switch (err) { + inline else => |e| @field(ErrorDetails.AcceleratorError.AcceleratorErrorEnum, @errorName(e)), + }; + } + }; + + pub const ExpectedTypes = packed struct(u32) { + number: bool = false, + number_expression: bool = false, + string_literal: bool = false, + accelerator_type_or_option: bool = false, + control_class: bool = false, + literal: bool = false, + // Note: This being 0 instead of undefined is arbitrary and something of a workaround, + // see https://github.com/ziglang/zig/issues/15395 + _: u26 = 0, + + pub const strings = std.ComptimeStringMap([]const u8, .{ + .{ "number", "number" }, + .{ "number_expression", "number expression" }, + .{ "string_literal", "quoted string literal" }, + .{ "accelerator_type_or_option", "accelerator type or option [ASCII, VIRTKEY, etc]" }, + .{ "control_class", "control class [BUTTON, EDIT, etc]" }, + .{ "literal", "unquoted literal" }, + }); + + pub fn writeCommaSeparated(self: ExpectedTypes, writer: anytype) !void { + const struct_info = @typeInfo(ExpectedTypes).Struct; + const num_real_fields = struct_info.fields.len - 1; + const num_padding_bits = @bitSizeOf(ExpectedTypes) - num_real_fields; + const mask = std.math.maxInt(struct_info.backing_integer.?) >> num_padding_bits; + const relevant_bits_only = @as(struct_info.backing_integer.?, @bitCast(self)) & mask; + const num_set_bits = @popCount(relevant_bits_only); + + var i: usize = 0; + inline for (struct_info.fields) |field_info| { + if (field_info.type != bool) continue; + if (i == num_set_bits) return; + if (@field(self, field_info.name)) { + try writer.writeAll(strings.get(field_info.name).?); + i += 1; + if (num_set_bits > 2 and i != num_set_bits) { + try writer.writeAll(", "); + } else if (i != num_set_bits) { + try writer.writeByte(' '); + } + if (num_set_bits > 1 and i == num_set_bits - 1) { + try writer.writeAll("or "); + } + } + } + } + }; + + pub const Error = enum { + // Lexer + unfinished_string_literal, + string_literal_too_long, + invalid_number_with_exponent, + invalid_digit_character_in_number_literal, + illegal_byte, + illegal_byte_outside_string_literals, + illegal_codepoint_outside_string_literals, + illegal_byte_order_mark, + illegal_private_use_character, + found_c_style_escaped_quote, + code_page_pragma_missing_left_paren, + code_page_pragma_missing_right_paren, + code_page_pragma_invalid_code_page, + code_page_pragma_not_integer, + code_page_pragma_overflow, + code_page_pragma_unsupported_code_page, + + // Parser + unfinished_raw_data_block, + unfinished_string_table_block, + /// `expected` is populated. + expected_token, + /// `expected_types` is populated + expected_something_else, + /// `resource` is populated + resource_type_cant_use_raw_data, + /// `resource` is populated + id_must_be_ordinal, + /// `resource` is populated + name_or_id_not_allowed, + string_resource_as_numeric_type, + ascii_character_not_equivalent_to_virtual_key_code, + empty_menu_not_allowed, + rc_would_miscompile_version_value_padding, + rc_would_miscompile_version_value_byte_count, + code_page_pragma_in_included_file, + nested_resource_level_exceeds_max, + too_many_dialog_controls_or_toolbar_buttons, + nested_expression_level_exceeds_max, + close_paren_expression, + unary_plus_expression, + rc_could_miscompile_control_params, + + // Compiler + /// `string_and_language` is populated + string_already_defined, + font_id_already_defined, + /// `file_open_error` is populated + file_open_error, + /// `accelerator_error` is populated + invalid_accelerator_key, + accelerator_type_required, + rc_would_miscompile_control_padding, + rc_would_miscompile_control_class_ordinal, + /// `icon_dir` is populated + rc_would_error_on_icon_dir, + /// `icon_dir` is populated + format_not_supported_in_icon_dir, + /// `resource` is populated and contains the expected type + icon_dir_and_resource_type_mismatch, + /// `icon_read_error` is populated + icon_read_error, + /// `icon_dir` is populated + rc_would_error_on_bitmap_version, + /// `icon_dir` is populated + max_icon_ids_exhausted, + /// `bmp_read_error` is populated + bmp_read_error, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of ignored bytes. + bmp_ignored_palette_bytes, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of missing bytes. + bmp_missing_palette_bytes, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of a `u64` (native endian). The `u64` contains the number of miscompiled bytes. + rc_would_miscompile_bmp_palette_padding, + /// `number` is populated and contains a string index for which the string contains + /// the bytes of two `u64`s (native endian). The first contains the number of missing + /// palette bytes and the second contains the max number of missing palette bytes. + /// If type is `.note`, then `extra` is `none`. + bmp_too_many_missing_palette_bytes, + resource_header_size_exceeds_max, + resource_data_size_exceeds_max, + control_extra_data_size_exceeds_max, + version_node_size_exceeds_max, + fontdir_size_exceeds_max, + /// `number` is populated and contains a string index for the filename + number_expression_as_filename, + /// `number` is populated and contains the control ID that is a duplicate + control_id_already_defined, + /// `number` is populated and contains the disallowed codepoint + invalid_filename, + /// `statement_with_u16_param` is populated + rc_would_error_u16_with_l_suffix, + result_contains_fontdir, + /// `number` is populated and contains the ordinal value that the id would be miscompiled to + rc_would_miscompile_dialog_menu_id, + /// `number` is populated and contains the ordinal value that the value would be miscompiled to + rc_would_miscompile_dialog_class, + /// `menu_or_class` is populated and contains the type of the parameter statement + rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, + rc_would_miscompile_dialog_menu_id_starts_with_digit, + dialog_menu_id_was_uppercased, + /// `menu_or_class` is populated and contains the type of the parameter statement + duplicate_menu_or_class_skipped, + invalid_digit_character_in_ordinal, + + // Literals + /// `number` is populated + rc_would_miscompile_codepoint_byte_swap, + /// `number` is populated + rc_would_miscompile_codepoint_skip, + tab_converted_to_spaces, + + // General (used in various places) + /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation + win32_non_ascii_ordinal, + + // Initialization + /// `file_open_error` is populated, but `filename_string_index` is not + failed_to_open_cwd, + }; + + pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void { + switch (self.err) { + .unfinished_string_literal => { + return writer.print("unfinished string literal at '{s}', expected closing '\"'", .{self.token.nameForErrorDisplay(source)}); + }, + .string_literal_too_long => { + return writer.print("string literal too long (max is currently {} characters)", .{self.extra.number}); + }, + .invalid_number_with_exponent => { + return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)}); + }, + .invalid_digit_character_in_number_literal => switch (self.type) { + .err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"), + .note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"), + .hint => return, + }, + .illegal_byte => { + return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); + }, + .illegal_byte_outside_string_literals => { + return writer.print("character '{s}' is not allowed outside of string literals", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); + }, + .illegal_codepoint_outside_string_literals => { + // This is somewhat hacky, but we know that: + // - This error is only possible with codepoints outside of the Windows-1252 character range + // - So, the only supported code page that could generate this error is UTF-8 + // Therefore, we just assume the token bytes are UTF-8 and decode them to get the illegal + // codepoint. + // + // FIXME: Support other code pages if they become relevant + const bytes = self.token.slice(source); + const codepoint = std.unicode.utf8Decode(bytes) catch unreachable; + return writer.print("codepoint <U+{X:0>4}> is not allowed outside of string literals", .{codepoint}); + }, + .illegal_byte_order_mark => { + return writer.writeAll("byte order mark <U+FEFF> is not allowed"); + }, + .illegal_private_use_character => { + return writer.writeAll("private use character <U+E000> is not allowed"); + }, + .found_c_style_escaped_quote => { + return writer.writeAll("escaping quotes with \\\" is not allowed (use \"\" instead)"); + }, + .code_page_pragma_missing_left_paren => { + return writer.writeAll("expected left parenthesis after 'code_page' in #pragma code_page"); + }, + .code_page_pragma_missing_right_paren => { + return writer.writeAll("expected right parenthesis after '<number>' in #pragma code_page"); + }, + .code_page_pragma_invalid_code_page => { + return writer.writeAll("invalid or unknown code page in #pragma code_page"); + }, + .code_page_pragma_not_integer => { + return writer.writeAll("code page is not a valid integer in #pragma code_page"); + }, + .code_page_pragma_overflow => { + return writer.writeAll("code page too large in #pragma code_page"); + }, + .code_page_pragma_unsupported_code_page => { + // We know that the token slice is a well-formed #pragma code_page(N), so + // we can skip to the first ( and then get the number that follows + const token_slice = self.token.slice(source); + var number_start = std.mem.indexOfScalar(u8, token_slice, '(').? + 1; + while (std.ascii.isWhitespace(token_slice[number_start])) { + number_start += 1; + } + var number_slice = token_slice[number_start..number_start]; + while (std.ascii.isDigit(token_slice[number_start + number_slice.len])) { + number_slice.len += 1; + } + const number = std.fmt.parseUnsigned(u16, number_slice, 10) catch unreachable; + const code_page = CodePage.getByIdentifier(number) catch unreachable; + // TODO: Improve or maybe add a note making it more clear that the code page + // is valid and that the code page is unsupported purely due to a limitation + // in this compiler. + return writer.print("unsupported code page '{s} (id={})' in #pragma code_page", .{ @tagName(code_page), number }); + }, + .unfinished_raw_data_block => { + return writer.print("unfinished raw data block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); + }, + .unfinished_string_table_block => { + return writer.print("unfinished STRINGTABLE block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); + }, + .expected_token => { + return writer.print("expected '{s}', got '{s}'", .{ self.extra.expected.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); + }, + .expected_something_else => { + try writer.writeAll("expected "); + try self.extra.expected_types.writeCommaSeparated(writer); + return writer.print("; got '{s}'", .{self.token.nameForErrorDisplay(source)}); + }, + .resource_type_cant_use_raw_data => switch (self.type) { + .err, .warning => try writer.print("expected '<filename>', found '{s}' (resource type '{s}' can't use raw data)", .{ self.token.nameForErrorDisplay(source), self.extra.resource.nameForErrorDisplay() }), + .note => try writer.print("if '{s}' is intended to be a filename, it must be specified as a quoted string literal", .{self.token.nameForErrorDisplay(source)}), + .hint => return, + }, + .id_must_be_ordinal => { + try writer.print("id of resource type '{s}' must be an ordinal (u16), got '{s}'", .{ self.extra.resource.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); + }, + .name_or_id_not_allowed => { + try writer.print("name or id is not allowed for resource type '{s}'", .{self.extra.resource.nameForErrorDisplay()}); + }, + .string_resource_as_numeric_type => switch (self.type) { + .err, .warning => try writer.writeAll("the number 6 (RT_STRING) cannot be used as a resource type"), + .note => try writer.writeAll("using RT_STRING directly likely results in an invalid .res file, use a STRINGTABLE instead"), + .hint => return, + }, + .ascii_character_not_equivalent_to_virtual_key_code => { + // TODO: Better wording? This is what the Win32 RC compiler emits. + // This occurs when VIRTKEY and a control code is specified ("^c", etc) + try writer.writeAll("ASCII character not equivalent to virtual key code"); + }, + .empty_menu_not_allowed => { + try writer.print("empty menu of type '{s}' not allowed", .{self.token.nameForErrorDisplay(source)}); + }, + .rc_would_miscompile_version_value_padding => switch (self.type) { + .err, .warning => return writer.print("the padding before this quoted string value would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider adding a comma between the key and the quoted string", .{}), + .hint => return, + }, + .rc_would_miscompile_version_value_byte_count => switch (self.type) { + .err, .warning => return writer.print("the byte count of this value would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, do not mix numbers and strings within a value", .{}), + .hint => return, + }, + .code_page_pragma_in_included_file => { + try writer.print("#pragma code_page is not supported in an included resource file", .{}); + }, + .nested_resource_level_exceeds_max => switch (self.type) { + .err, .warning => { + const max = switch (self.extra.resource) { + .versioninfo => parse.max_nested_version_level, + .menu, .menuex => parse.max_nested_menu_level, + else => unreachable, + }; + return writer.print("{s} contains too many nested children (max is {})", .{ self.extra.resource.nameForErrorDisplay(), max }); + }, + .note => return writer.print("max {s} nesting level exceeded here", .{self.extra.resource.nameForErrorDisplay()}), + .hint => return, + }, + .too_many_dialog_controls_or_toolbar_buttons => switch (self.type) { + .err, .warning => return writer.print("{s} contains too many {s} (max is {})", .{ self.extra.resource.nameForErrorDisplay(), switch (self.extra.resource) { + .toolbar => "buttons", + else => "controls", + }, std.math.maxInt(u16) }), + .note => return writer.print("maximum number of {s} exceeded here", .{switch (self.extra.resource) { + .toolbar => "buttons", + else => "controls", + }}), + .hint => return, + }, + .nested_expression_level_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("expression contains too many syntax levels (max is {})", .{parse.max_nested_expression_level}), + .note => return writer.print("maximum expression level exceeded here", .{}), + .hint => return, + }, + .close_paren_expression => { + try writer.writeAll("the Win32 RC compiler would accept ')' as a valid expression, but it would be skipped over and potentially lead to unexpected outcomes"); + }, + .unary_plus_expression => { + try writer.writeAll("the Win32 RC compiler may accept '+' as a unary operator here, but it is not supported in this implementation; consider omitting the unary +"); + }, + .rc_could_miscompile_control_params => switch (self.type) { + .err, .warning => return writer.print("this token could be erroneously skipped over by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider adding a comma after the style parameter", .{}), + .hint => return, + }, + .string_already_defined => switch (self.type) { + .err, .warning => { + const language_id = self.extra.string_and_language.language.asInt(); + const language_name = language_name: { + if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| { + break :language_name @tagName(lang_enum_val); + } else |_| {} + if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) { + break :language_name "LOCALE_CUSTOM_UNSPECIFIED"; + } + break :language_name "<UNKNOWN>"; + }; + return writer.print("string with id {d} (0x{X}) already defined for language {s} (0x{X})", .{ self.extra.string_and_language.id, self.extra.string_and_language.id, language_name, language_id }); + }, + .note => return writer.print("previous definition of string with id {d} (0x{X}) here", .{ self.extra.string_and_language.id, self.extra.string_and_language.id }), + .hint => return, + }, + .font_id_already_defined => switch (self.type) { + .err => return writer.print("font with id {d} already defined", .{self.extra.number}), + .warning => return writer.print("skipped duplicate font with id {d}", .{self.extra.number}), + .note => return writer.print("previous definition of font with id {d} here", .{self.extra.number}), + .hint => return, + }, + .file_open_error => { + try writer.print("unable to open file '{s}': {s}", .{ strings[self.extra.file_open_error.filename_string_index], @tagName(self.extra.file_open_error.err) }); + }, + .invalid_accelerator_key => { + try writer.print("invalid accelerator key '{s}': {s}", .{ self.token.nameForErrorDisplay(source), @tagName(self.extra.accelerator_error.err) }); + }, + .accelerator_type_required => { + try writer.print("accelerator type [ASCII or VIRTKEY] required when key is an integer", .{}); + }, + .rc_would_miscompile_control_padding => switch (self.type) { + .err, .warning => return writer.print("the padding before this control would be miscompiled by the Win32 RC compiler (it would insert 2 extra bytes of padding)", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider removing any 'control data' blocks from the controls in this dialog", .{}), + .hint => return, + }, + .rc_would_miscompile_control_class_ordinal => switch (self.type) { + .err, .warning => return writer.print("the control class of this CONTROL would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("to avoid the potential miscompilation, consider specifying the control class using a string (BUTTON, EDIT, etc) instead of a number", .{}), + .hint => return, + }, + .rc_would_error_on_icon_dir => switch (self.type) { + .err, .warning => return writer.print("the resource at index {} of this {s} has the format '{s}'; this would be an error in the Win32 RC compiler", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type), @tagName(self.extra.icon_dir.icon_format) }), + .note => { + // The only note supported is one specific to exactly this combination + if (!(self.extra.icon_dir.icon_type == .icon and self.extra.icon_dir.icon_format == .riff)) unreachable; + try writer.print("animated RIFF icons within resource groups may not be well supported, consider using an animated icon file (.ani) instead", .{}); + }, + .hint => return, + }, + .format_not_supported_in_icon_dir => { + try writer.print("resource with format '{s}' (at index {}) is not allowed in {s} resource groups", .{ @tagName(self.extra.icon_dir.icon_format), self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }); + }, + .icon_dir_and_resource_type_mismatch => { + const unexpected_type: rc.Resource = if (self.extra.resource == .icon) .cursor else .icon; + // TODO: Better wording + try writer.print("resource type '{s}' does not match type '{s}' specified in the file", .{ self.extra.resource.nameForErrorDisplay(), unexpected_type.nameForErrorDisplay() }); + }, + .icon_read_error => { + try writer.print("unable to read {s} file '{s}': {s}", .{ @tagName(self.extra.icon_read_error.icon_type), strings[self.extra.icon_read_error.filename_string_index], @tagName(self.extra.icon_read_error.err) }); + }, + .rc_would_error_on_bitmap_version => switch (self.type) { + .err => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this version is no longer allowed and should be upgraded to '{s}'", .{ + self.extra.icon_dir.index, + @tagName(self.extra.icon_dir.icon_type), + self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), + ico.BitmapHeader.Version.@"nt3.1".nameForErrorDisplay(), + }), + .warning => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this would be an error in the Win32 RC compiler", .{ + self.extra.icon_dir.index, + @tagName(self.extra.icon_dir.icon_type), + self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), + }), + .note => unreachable, + .hint => return, + }, + .max_icon_ids_exhausted => switch (self.type) { + .err, .warning => try writer.print("maximum global icon/cursor ids exhausted (max is {})", .{std.math.maxInt(u16) - 1}), + .note => try writer.print("maximum icon/cursor id exceeded at index {} of this {s}", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }), + .hint => return, + }, + .bmp_read_error => { + try writer.print("invalid bitmap file '{s}': {s}", .{ strings[self.extra.bmp_read_error.filename_string_index], @tagName(self.extra.bmp_read_error.err) }); + }, + .bmp_ignored_palette_bytes => { + const bytes = strings[self.extra.number]; + const ignored_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); + try writer.print("bitmap has {d} extra bytes preceding the pixel data which will be ignored", .{ignored_bytes}); + }, + .bmp_missing_palette_bytes => { + const bytes = strings[self.extra.number]; + const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); + try writer.print("bitmap has {d} missing color palette bytes which will be padded with zeroes", .{missing_bytes}); + }, + .rc_would_miscompile_bmp_palette_padding => { + const bytes = strings[self.extra.number]; + const miscompiled_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); + try writer.print("the missing color palette bytes would be miscompiled by the Win32 RC compiler (the added padding bytes would include {d} bytes of the pixel data)", .{miscompiled_bytes}); + }, + .bmp_too_many_missing_palette_bytes => switch (self.type) { + .err, .warning => { + const bytes = strings[self.extra.number]; + const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); + const max_missing_bytes = std.mem.readInt(u64, bytes[8..16], native_endian); + try writer.print("bitmap has {} missing color palette bytes which exceeds the maximum of {}", .{ missing_bytes, max_missing_bytes }); + }, + // TODO: command line option + .note => try writer.writeAll("the maximum number of missing color palette bytes is configurable via <<TODO command line option>>"), + .hint => return, + }, + .resource_header_size_exceeds_max => { + try writer.print("resource's header length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}); + }, + .resource_data_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("resource's data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), + .note => return writer.print("maximum data length exceeded here", .{}), + .hint => return, + }, + .control_extra_data_size_exceeds_max => switch (self.type) { + .err, .warning => try writer.print("control data length exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), + .note => return writer.print("maximum control data length exceeded here", .{}), + .hint => return, + }, + .version_node_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("version node tree size exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), + .note => return writer.print("maximum tree size exceeded while writing this child", .{}), + .hint => return, + }, + .fontdir_size_exceeds_max => switch (self.type) { + .err, .warning => return writer.print("FONTDIR data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), + .note => return writer.writeAll("this is likely due to the size of the combined lengths of the device/face names of all FONT resources"), + .hint => return, + }, + .number_expression_as_filename => switch (self.type) { + .err, .warning => return writer.writeAll("filename cannot be specified using a number expression, consider using a quoted string instead"), + .note => return writer.print("the Win32 RC compiler would evaluate this number expression as the filename '{s}'", .{strings[self.extra.number]}), + .hint => return, + }, + .control_id_already_defined => switch (self.type) { + .err, .warning => return writer.print("control with id {d} already defined for this dialog", .{self.extra.number}), + .note => return writer.print("previous definition of control with id {d} here", .{self.extra.number}), + .hint => return, + }, + .invalid_filename => { + const disallowed_codepoint = self.extra.number; + if (disallowed_codepoint < 128 and std.ascii.isPrint(@intCast(disallowed_codepoint))) { + try writer.print("evaluated filename contains a disallowed character: '{c}'", .{@as(u8, @intCast(disallowed_codepoint))}); + } else { + try writer.print("evaluated filename contains a disallowed codepoint: <U+{X:0>4}>", .{disallowed_codepoint}); + } + }, + .rc_would_error_u16_with_l_suffix => switch (self.type) { + .err, .warning => return writer.print("this {s} parameter would be an error in the Win32 RC compiler", .{@tagName(self.extra.statement_with_u16_param)}), + .note => return writer.writeAll("to avoid the error, remove any L suffixes from numbers within the parameter"), + .hint => return, + }, + .result_contains_fontdir => return, + .rc_would_miscompile_dialog_menu_id => switch (self.type) { + .err, .warning => return writer.print("the id of this menu would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("the Win32 RC compiler would evaluate the id as the ordinal/number value {d}", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_dialog_class => switch (self.type) { + .err, .warning => return writer.print("this class would be miscompiled by the Win32 RC compiler", .{}), + .note => return writer.print("the Win32 RC compiler would evaluate it as the ordinal/number value {d}", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal => switch (self.type) { + .err, .warning => return, + .note => return writer.print("to avoid the potential miscompilation, only specify one {s} per dialog resource", .{@tagName(self.extra.menu_or_class)}), + .hint => return, + }, + .rc_would_miscompile_dialog_menu_id_starts_with_digit => switch (self.type) { + .err, .warning => return, + .note => return writer.writeAll("to avoid the potential miscompilation, the first character of the id should not be a digit"), + .hint => return, + }, + .dialog_menu_id_was_uppercased => return, + .duplicate_menu_or_class_skipped => { + return writer.print("this {s} was ignored; when multiple {s} statements are specified, only the last takes precedence", .{ + @tagName(self.extra.menu_or_class), + @tagName(self.extra.menu_or_class), + }); + }, + .invalid_digit_character_in_ordinal => { + return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values"); + }, + .rc_would_miscompile_codepoint_byte_swap => switch (self.type) { + .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}), + .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), + .hint => return, + }, + .rc_would_miscompile_codepoint_skip => switch (self.type) { + .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the codepoint would be missing from the compiled resource)", .{self.extra.number}), + .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), + .hint => return, + }, + .tab_converted_to_spaces => switch (self.type) { + .err, .warning => return writer.writeAll("the tab character(s) in this string will be converted into a variable number of spaces (determined by the column of the tab character in the .rc file)"), + .note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"), + .hint => return, + }, + .win32_non_ascii_ordinal => switch (self.type) { + .err, .warning => unreachable, + .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}), + .hint => return, + }, + .failed_to_open_cwd => { + try writer.print("failed to open CWD for compilation: {s}", .{@tagName(self.extra.file_open_error.err)}); + }, + } + } + + pub const VisualTokenInfo = struct { + before_len: usize, + point_offset: usize, + after_len: usize, + }; + + pub fn visualTokenInfo(self: ErrorDetails, source_line_start: usize, source_line_end: usize) VisualTokenInfo { + // Note: A perfect solution here would involve full grapheme cluster + // awareness, but oh well. This will give incorrect offsets + // if there are any multibyte codepoints within the relevant span, + // and even more inflated for grapheme clusters. + // + // We mitigate this slightly when we know we'll be pointing at + // something that displays as 1 character. + return switch (self.err) { + // These can technically be more than 1 byte depending on encoding, + // but they always refer to one visual character/grapheme. + .illegal_byte, + .illegal_byte_outside_string_literals, + .illegal_codepoint_outside_string_literals, + .illegal_byte_order_mark, + .illegal_private_use_character, + => .{ + .before_len = 0, + .point_offset = self.token.start - source_line_start, + .after_len = 0, + }, + else => .{ + .before_len = before: { + const start = @max(source_line_start, if (self.token_span_start) |span_start| span_start.start else self.token.start); + break :before self.token.start - start; + }, + .point_offset = self.token.start - source_line_start, + .after_len = after: { + const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end); + // end may be less than start when pointing to EOF + if (end <= self.token.start) break :after 0; + break :after end - self.token.start - 1; + }, + }, + }; + } +}; + +pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_config: std.io.tty.Config, cwd: std.fs.Dir, err_details: ErrorDetails, source: []const u8, strings: []const []const u8, source_mappings: ?SourceMappings) !void { + if (err_details.type == .hint) return; + + const source_line_start = err_details.token.getLineStartForErrorDisplay(source); + // Treat tab stops as 1 column wide for error display purposes, + // and add one to get a 1-based column + const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1; + + const corresponding_span: ?SourceMappings.CorrespondingSpan = if (source_mappings) |mappings| + mappings.getCorrespondingSpan(err_details.token.line_number) + else + null; + const corresponding_file: ?[]const u8 = if (source_mappings != null and corresponding_span != null) + source_mappings.?.files.get(corresponding_span.?.filename_offset) + else + null; + + const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number; + + try tty_config.setColor(writer, .bold); + if (corresponding_file) |file| { + try writer.writeAll(file); + } else { + try tty_config.setColor(writer, .dim); + try writer.writeAll("<after preprocessor>"); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + } + try writer.print(":{d}:{d}: ", .{ err_line, column }); + switch (err_details.type) { + .err => { + try tty_config.setColor(writer, .red); + try writer.writeAll("error: "); + }, + .warning => { + try tty_config.setColor(writer, .yellow); + try writer.writeAll("warning: "); + }, + .note => { + try tty_config.setColor(writer, .cyan); + try writer.writeAll("note: "); + }, + .hint => unreachable, + } + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + try err_details.render(writer, source, strings); + try writer.writeByte('\n'); + try tty_config.setColor(writer, .reset); + + if (!err_details.print_source_line) { + try writer.writeByte('\n'); + return; + } + + const source_line = err_details.token.getLineForErrorDisplay(source, source_line_start); + const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); + + // Need this to determine if the 'line originated from' note is worth printing + var source_line_for_display_buf = try std.ArrayList(u8).initCapacity(allocator, source_line.len); + defer source_line_for_display_buf.deinit(); + try writeSourceSlice(source_line_for_display_buf.writer(), source_line); + + // TODO: General handling of long lines, not tied to this specific error + if (err_details.err == .string_literal_too_long) { + const before_slice = source_line[0..@min(source_line.len, visual_info.point_offset + 16)]; + try writeSourceSlice(writer, before_slice); + try tty_config.setColor(writer, .dim); + try writer.writeAll("<...truncated...>"); + try tty_config.setColor(writer, .reset); + } else { + try writer.writeAll(source_line_for_display_buf.items); + } + try writer.writeByte('\n'); + + try tty_config.setColor(writer, .green); + const num_spaces = visual_info.point_offset - visual_info.before_len; + try writer.writeByteNTimes(' ', num_spaces); + try writer.writeByteNTimes('~', visual_info.before_len); + try writer.writeByte('^'); + if (visual_info.after_len > 0) { + var num_squiggles = visual_info.after_len; + if (err_details.err == .string_literal_too_long) { + num_squiggles = @min(num_squiggles, 15); + } + try writer.writeByteNTimes('~', num_squiggles); + } + try writer.writeByte('\n'); + try tty_config.setColor(writer, .reset); + + if (corresponding_span != null and corresponding_file != null) { + var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?); + defer corresponding_lines.deinit(allocator); + + if (!corresponding_lines.worth_printing_note) return; + + try tty_config.setColor(writer, .bold); + if (corresponding_file) |file| { + try writer.writeAll(file); + } else { + try tty_config.setColor(writer, .dim); + try writer.writeAll("<after preprocessor>"); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + } + try writer.print(":{d}:{d}: ", .{ err_line, column }); + try tty_config.setColor(writer, .cyan); + try writer.writeAll("note: "); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .bold); + try writer.writeAll("this line originated from line"); + if (corresponding_span.?.start_line != corresponding_span.?.end_line) { + try writer.print("s {}-{}", .{ corresponding_span.?.start_line, corresponding_span.?.end_line }); + } else { + try writer.print(" {}", .{corresponding_span.?.start_line}); + } + try writer.print(" of file '{s}'\n", .{corresponding_file.?}); + try tty_config.setColor(writer, .reset); + + if (!corresponding_lines.worth_printing_lines) return; + + if (corresponding_lines.lines_is_error_message) { + try tty_config.setColor(writer, .red); + try writer.writeAll(" | "); + try tty_config.setColor(writer, .reset); + try tty_config.setColor(writer, .dim); + try writer.writeAll(corresponding_lines.lines.items); + try tty_config.setColor(writer, .reset); + try writer.writeAll("\n\n"); + return; + } + + try writer.writeAll(corresponding_lines.lines.items); + try writer.writeAll("\n\n"); + } +} + +const CorrespondingLines = struct { + worth_printing_note: bool = true, + worth_printing_lines: bool = true, + lines: std.ArrayListUnmanaged(u8) = .{}, + lines_is_error_message: bool = false, + + pub fn init(allocator: std.mem.Allocator, cwd: std.fs.Dir, err_details: ErrorDetails, lines_for_comparison: []const u8, corresponding_span: SourceMappings.CorrespondingSpan, corresponding_file: []const u8) !CorrespondingLines { + var corresponding_lines = CorrespondingLines{}; + + // We don't do line comparison for this error, so don't print the note if the line + // number is different + if (err_details.err == .string_literal_too_long and err_details.token.line_number == corresponding_span.start_line) { + corresponding_lines.worth_printing_note = false; + return corresponding_lines; + } + + // Don't print the originating line for this error, we know it's really long + if (err_details.err == .string_literal_too_long) { + corresponding_lines.worth_printing_lines = false; + return corresponding_lines; + } + + var writer = corresponding_lines.lines.writer(allocator); + if (utils.openFileNotDir(cwd, corresponding_file, .{})) |file| { + defer file.close(); + var buffered_reader = std.io.bufferedReader(file.reader()); + writeLinesFromStream(writer, buffered_reader.reader(), corresponding_span.start_line, corresponding_span.end_line) catch |err| switch (err) { + error.LinesNotFound => { + corresponding_lines.lines.clearRetainingCapacity(); + try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); + corresponding_lines.lines_is_error_message = true; + return corresponding_lines; + }, + else => |e| return e, + }; + } else |err| { + corresponding_lines.lines.clearRetainingCapacity(); + try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); + corresponding_lines.lines_is_error_message = true; + return corresponding_lines; + } + + // If the lines are the same as they were before preprocessing, skip printing the note entirely + if (std.mem.eql(u8, lines_for_comparison, corresponding_lines.lines.items)) { + corresponding_lines.worth_printing_note = false; + } + return corresponding_lines; + } + + pub fn deinit(self: *CorrespondingLines, allocator: std.mem.Allocator) void { + self.lines.deinit(allocator); + } +}; + +fn writeSourceSlice(writer: anytype, slice: []const u8) !void { + for (slice) |c| try writeSourceByte(writer, c); +} + +inline fn writeSourceByte(writer: anytype, byte: u8) !void { + switch (byte) { + '\x00'...'\x08', '\x0E'...'\x1F', '\x7F' => try writer.writeAll("�"), + // \r is seemingly ignored by the RC compiler so skipping it when printing source lines + // could help avoid confusing output (e.g. RC\rDATA if printed verbatim would show up + // in the console as DATA but the compiler reads it as RCDATA) + // + // NOTE: This is irrelevant when using the clang preprocessor, because unpaired \r + // characters get converted to \n, but may become relevant if another + // preprocessor is used instead. + '\r' => {}, + '\t', '\x0B', '\x0C' => try writer.writeByte(' '), + else => try writer.writeByte(byte), + } +} + +pub fn writeLinesFromStream(writer: anytype, input: anytype, start_line: usize, end_line: usize) !void { + var line_num: usize = 1; + var last_byte: u8 = 0; + while (try readByteOrEof(input)) |byte| { + switch (byte) { + '\n', '\r' => { + if (!utils.isLineEndingPair(last_byte, byte)) { + if (line_num == end_line) return; + if (line_num >= start_line) try writeSourceByte(writer, byte); + line_num += 1; + } else { + // reset last_byte to a non-line ending so that + // consecutive CRLF pairs don't get treated as one + // long line ending 'pair' + last_byte = 0; + continue; + } + }, + else => { + if (line_num >= start_line) try writeSourceByte(writer, byte); + }, + } + last_byte = byte; + } + if (line_num != end_line) { + return error.LinesNotFound; + } +} + +pub fn readByteOrEof(reader: anytype) !?u8 { + return reader.readByte() catch |err| switch (err) { + error.EndOfStream => return null, + else => |e| return e, + }; +} diff --git a/src/resinator/ico.zig b/lib/compiler/resinator/ico.zig diff --git a/lib/compiler/resinator/lang.zig b/lib/compiler/resinator/lang.zig @@ -0,0 +1,877 @@ +const std = @import("std"); + +/// This function is specific to how the Win32 RC command line interprets +/// language IDs specified as integers. +/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed +/// - Wraps on overflow of u16 +/// - Stops parsing on any invalid hexadecimal digits +/// - Errors if a digit is not the first char +/// - `-` (negative) prefix is allowed +pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 { + var result: u16 = 0; + const radix: u8 = 16; + var buf = str; + + const Prefix = enum { none, minus }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + else => {}, + } + + if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') { + buf = buf[2..]; + } + + for (buf, 0..) |c, i| { + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + 'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break, + else => { + // First digit must be valid + if (i == 0) { + return error.InvalidLanguageId; + } + break; + }, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result = 0 -% result, + } + + return result; +} + +test parseInt { + try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16")); + try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A")); + try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz")); + try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1")); + try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16")); + try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100")); + try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("--1")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("¹")); + try std.testing.expectError(error.InvalidLanguageId, parseInt("~1")); +} + +/// This function is specific to how the Win32 RC command line interprets +/// language tags: invalid tags are rejected, but tags that don't have +/// a specific assigned ID but are otherwise valid enough will get +/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED. +pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 { + const maybe_id = try tagToId(tag); + if (maybe_id) |id| { + return @intFromEnum(id); + } else { + return LOCALE_CUSTOM_UNSPECIFIED; + } +} + +pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId { + const parsed = try parse(tag); + // There are currently no language tags with assigned IDs that have + // multiple suffixes, so we can skip the lookup. + if (parsed.multiple_suffixes) return null; + const longest_known_tag = comptime blk: { + var len = 0; + for (@typeInfo(LanguageId).Enum.fields) |field| { + if (field.name.len > len) len = field.name.len; + } + break :blk len; + }; + // If the tag is longer than the longest tag that has an assigned ID, + // then we can skip the lookup. + if (tag.len > longest_known_tag) return null; + var normalized_buf: [longest_known_tag]u8 = undefined; + // To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to + // omit the suffix, but only if the tag contains a valid alternate sort order. + const tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag; + const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf); + return std.meta.stringToEnum(LanguageId, normalized_tag) orelse { + // special case for a tag that has been mapped to the same ID + // twice. + if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) { + return LanguageId.ff_ng; + } + return null; + }; +} + +test tagToId { + try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?); + try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?); + try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?); + // Special case + try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?); +} + +test "exhaustive tagToId" { + inline for (@typeInfo(LanguageId).Enum.fields) |field| { + const id = tagToId(field.name) catch |err| { + std.debug.print("tag: {s}\n", .{field.name}); + return err; + }; + try std.testing.expectEqual(@field(LanguageId, field.name), id orelse { + std.debug.print("tag: {s}, got null\n", .{field.name}); + return error.TestExpectedEqual; + }); + } + var buf: [32]u8 = undefined; + inline for (valid_alternate_sorts) |parsed_sort| { + var fbs = std.io.fixedBufferStream(&buf); + const writer = fbs.writer(); + writer.writeAll(parsed_sort.language_code) catch unreachable; + writer.writeAll("-") catch unreachable; + writer.writeAll(parsed_sort.country_code.?) catch unreachable; + writer.writeAll("-") catch unreachable; + writer.writeAll(parsed_sort.suffix.?) catch unreachable; + const expected_field_name = comptime field: { + var name_buf: [5]u8 = undefined; + @memcpy(name_buf[0..parsed_sort.language_code.len], parsed_sort.language_code); + name_buf[2] = '_'; + @memcpy(name_buf[3..], parsed_sort.country_code.?); + break :field name_buf; + }; + const expected = @field(LanguageId, &expected_field_name); + const id = tagToId(fbs.getWritten()) catch |err| { + std.debug.print("tag: {s}\n", .{fbs.getWritten()}); + return err; + }; + try std.testing.expectEqual(expected, id orelse { + std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected }); + return error.TestExpectedEqual; + }); + } +} + +fn normalizeTag(tag: []const u8, buf: []u8) []u8 { + std.debug.assert(buf.len >= tag.len); + for (tag, 0..) |c, i| { + if (c == '-') + buf[i] = '_' + else + buf[i] = std.ascii.toLower(c); + } + return buf[0..tag.len]; +} + +/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D +/// "When an LCID is requested for a locale without a +/// permanent LCID assignment, nor a temporary +/// assignment as above, the protocol will respond +/// with LOCALE_CUSTOM_UNSPECIFIED for all such +/// locales. Because this single value is used for +/// numerous possible locale names, it is impossible to +/// round trip this locale, even temporarily. +/// Applications should discard this value as soon as +/// possible and never persist it. If the system is +/// forced to respond to a request for +/// LCID_CUSTOM_UNSPECIFIED, it will fall back to +/// the current user locale. This is often incorrect but +/// may prevent an application or component from +/// failing. As the meaning of this temporary LCID is +/// unstable, it should never be used for interchange +/// or persisted data. This is a 1-to-many relationship +/// that is very unstable." +pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000; + +pub const LANG_ENGLISH = 0x09; +pub const SUBLANG_ENGLISH_US = 0x01; + +/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers +pub fn MAKELANGID(primary: u10, sublang: u6) u16 { + return (@as(u16, primary) << 10) | sublang; +} + +/// Language tag format expressed as a regular expression (rough approximation): +/// +/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})? +/// lang | script | country | suffix +/// +/// Notes: +/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix +/// parsing rules (e.g. `a-0` and `a-00000000` are allowed). +/// - There can also be any number of trailing suffix parts as long as they each +/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed. +/// - When doing lookups, trailing suffix parts are taken into account, e.g. +/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`. +/// - A suffix is only allowed if: +/// + Lang code is 1 char long, or +/// + A country code is present, or +/// + A script tag is not present and: +/// - the suffix is numeric-only and has a length of 3, or +/// - the lang is `qps` and the suffix is `ploca` or `plocm` +pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed { + var it = std.mem.splitAny(u8, lang_tag, "-_"); + const lang_code = it.first(); + const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code); + if (!is_valid_lang_code) return error.InvalidLanguageTag; + var parsed = Parsed{ + .language_code = lang_code, + }; + // The second part could be a script tag, a country code, or a suffix + if (it.next()) |part_str| { + // The lang code being length 1 behaves strangely, so fully special case it. + if (lang_code.len == 1) { + // This is almost certainly not the 'right' way to do this, but I don't have a method + // to determine how exactly these language tags are parsed, and it seems like + // suffix parsing rules apply generally (digits allowed, length of 1 to 8). + // + // However, because we want to be able to lookup `x-iv-mathan` normally without + // `multiple_suffixes` being set to true, we need to make sure to treat two-length + // alphabetic parts as a country code. + if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } + // Everything else, though, we can just throw into the suffix as long as the normal + // rules apply. + else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) { + parsed.suffix = part_str; + } else { + return error.InvalidLanguageTag; + } + } else if (part_str.len == 4 and isAllAlphabetic(part_str)) { + parsed.script_tag = part_str; + } else if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } + // Only a 3-len numeric suffix is allowed as the second part of a tag + else if (part_str.len == 3 and isAllNumeric(part_str)) { + parsed.suffix = part_str; + } + // Special case for qps-ploca and qps-plocm + else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and + (std.ascii.eqlIgnoreCase(part_str, "ploca") or + std.ascii.eqlIgnoreCase(part_str, "plocm"))) + { + parsed.suffix = part_str; + } else { + return error.InvalidLanguageTag; + } + } else { + // If there's no part besides a 1-len lang code, then it is malformed + if (lang_code.len == 1) return error.InvalidLanguageTag; + return parsed; + } + if (parsed.script_tag != null) { + if (it.next()) |part_str| { + if (part_str.len == 2 and isAllAlphabetic(part_str)) { + parsed.country_code = part_str; + } else { + // Suffix is not allowed when a country code is not present. + return error.InvalidLanguageTag; + } + } else { + return parsed; + } + } + // We've now parsed any potential script tag/country codes, so anything remaining + // is a suffix + while (it.next()) |part_str| { + if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) { + return error.InvalidLanguageTag; + } + if (parsed.suffix == null) { + parsed.suffix = part_str; + } else { + // In theory we could return early here but we still want to validate + // that each part is a valid suffix all the way to the end, e.g. + // we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!` + // suffix part. + parsed.multiple_suffixes = true; + } + } + return parsed; +} + +pub const Parsed = struct { + language_code: []const u8, + script_tag: ?[]const u8 = null, + country_code: ?[]const u8 = null, + /// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc + suffix: ?[]const u8 = null, + /// There can be any number of suffixes, but we don't need to care what their + /// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah` + /// can be seen as different from `ca-es-valencia`. Storing this as a bool + /// allows us to avoid needing either (a) dynamic allocation or (b) a limit to + /// the number of suffixes allowed when parsing. + multiple_suffixes: bool = false, + + pub fn isSuffixValidSortOrder(self: Parsed) bool { + if (self.country_code == null) return false; + if (self.suffix == null) return false; + if (self.script_tag != null) return false; + if (self.multiple_suffixes) return false; + for (valid_alternate_sorts) |valid_sort| { + if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and + std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and + std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?)) + { + return true; + } + } + return false; + } +}; + +/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f +/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table." +const valid_alternate_sorts = [_]Parsed{ + // Note: x-IV-mathan is omitted due to how lookups are implemented. + // This table is used to make e.g. `de-de_phoneb` get looked up + // as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan + // instead needs to be looked up with the suffix included because + // `x-iv` is not a tag with an assigned ID. + .{ .language_code = "de", .country_code = "de", .suffix = "phoneb" }, + .{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" }, + .{ .language_code = "ka", .country_code = "ge", .suffix = "modern" }, + .{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" }, + .{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" }, + .{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" }, + .{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" }, + .{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" }, + .{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" }, +}; + +test "parse" { + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + }, try parse("en")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .country_code = "us", + }, try parse("en-us")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .suffix = "123", + }, try parse("en-123")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .suffix = "123", + .multiple_suffixes = true, + }, try parse("en-123-blah")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "en", + .country_code = "us", + .suffix = "123", + .multiple_suffixes = true, + }, try parse("en-us_123-blah")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "eng", + .script_tag = "Latn", + }, try parse("eng-Latn")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "eng", + .script_tag = "Latn", + }, try parse("eng-Latn")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "ff", + .script_tag = "Latn", + .country_code = "NG", + }, try parse("ff-Latn-NG")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "qps", + .suffix = "Plocm", + }, try parse("qps-Plocm")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "qps", + .suffix = "ploca", + }, try parse("qps-ploca")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "x", + .country_code = "IV", + .suffix = "mathan", + }, try parse("x-IV-mathan")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "a", + }, try parse("a-a")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "000", + }, try parse("a-000")); + try std.testing.expectEqualDeep(Parsed{ + .language_code = "a", + .suffix = "00000000", + }, try parse("a-00000000")); + // suffix not allowed if script tag is present without country code + try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix")); + // suffix must be 3 numeric digits if neither script tag nor country code is present + try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix")); + try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm")); + // 1-len lang code is not allowed if it's the only part + try std.testing.expectError(error.InvalidLanguageTag, parse("e")); +} + +fn isAllAlphabetic(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isAlphabetic(c)) return false; + } + return true; +} + +fn isAllAlphanumeric(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isAlphanumeric(c)) return false; + } + return true; +} + +fn isAllNumeric(str: []const u8) bool { + for (str) |c| { + if (!std.ascii.isDigit(c)) return false; + } + return true; +} + +/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f +/// - Protocol Revision: 15.0 +/// - Language / Language ID / Language Tag table in Appendix A +/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED) +/// - Normalized each language tag (lowercased, replaced all `-` with `_`) +/// - There is one special case where two tags are mapped to the same ID, the following +/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467. +/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria +/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts +/// table as 0x007F (LANG_INVARIANT). +pub const LanguageId = enum(u16) { + // Language tag = Language ID, // Language, Location (or type) + af = 0x0036, // Afrikaans + af_za = 0x0436, // Afrikaans, South Africa + sq = 0x001C, // Albanian + sq_al = 0x041C, // Albanian, Albania + gsw = 0x0084, // Alsatian + gsw_fr = 0x0484, // Alsatian, France + am = 0x005E, // Amharic + am_et = 0x045E, // Amharic, Ethiopia + ar = 0x0001, // Arabic + ar_dz = 0x1401, // Arabic, Algeria + ar_bh = 0x3C01, // Arabic, Bahrain + ar_eg = 0x0c01, // Arabic, Egypt + ar_iq = 0x0801, // Arabic, Iraq + ar_jo = 0x2C01, // Arabic, Jordan + ar_kw = 0x3401, // Arabic, Kuwait + ar_lb = 0x3001, // Arabic, Lebanon + ar_ly = 0x1001, // Arabic, Libya + ar_ma = 0x1801, // Arabic, Morocco + ar_om = 0x2001, // Arabic, Oman + ar_qa = 0x4001, // Arabic, Qatar + ar_sa = 0x0401, // Arabic, Saudi Arabia + ar_sy = 0x2801, // Arabic, Syria + ar_tn = 0x1C01, // Arabic, Tunisia + ar_ae = 0x3801, // Arabic, U.A.E. + ar_ye = 0x2401, // Arabic, Yemen + hy = 0x002B, // Armenian + hy_am = 0x042B, // Armenian, Armenia + as = 0x004D, // Assamese + as_in = 0x044D, // Assamese, India + az_cyrl = 0x742C, // Azerbaijani (Cyrillic) + az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan + az = 0x002C, // Azerbaijani (Latin) + az_latn = 0x782C, // Azerbaijani (Latin) + az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan + bn = 0x0045, // Bangla + bn_bd = 0x0845, // Bangla, Bangladesh + bn_in = 0x0445, // Bangla, India + ba = 0x006D, // Bashkir + ba_ru = 0x046D, // Bashkir, Russia + eu = 0x002D, // Basque + eu_es = 0x042D, // Basque, Spain + be = 0x0023, // Belarusian + be_by = 0x0423, // Belarusian, Belarus + bs_cyrl = 0x641A, // Bosnian (Cyrillic) + bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina + bs_latn = 0x681A, // Bosnian (Latin) + bs = 0x781A, // Bosnian (Latin) + bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina + br = 0x007E, // Breton + br_fr = 0x047E, // Breton, France + bg = 0x0002, // Bulgarian + bg_bg = 0x0402, // Bulgarian, Bulgaria + my = 0x0055, // Burmese + my_mm = 0x0455, // Burmese, Myanmar + ca = 0x0003, // Catalan + ca_es = 0x0403, // Catalan, Spain + tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco + ku = 0x0092, // Central Kurdish + ku_arab = 0x7c92, // Central Kurdish + ku_arab_iq = 0x0492, // Central Kurdish, Iraq + chr = 0x005C, // Cherokee + chr_cher = 0x7c5C, // Cherokee + chr_cher_us = 0x045C, // Cherokee, United States + zh_hans = 0x0004, // Chinese (Simplified) + zh = 0x7804, // Chinese (Simplified) + zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China + zh_sg = 0x1004, // Chinese (Simplified), Singapore + zh_hant = 0x7C04, // Chinese (Traditional) + zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R. + zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R. + zh_tw = 0x0404, // Chinese (Traditional), Taiwan + co = 0x0083, // Corsican + co_fr = 0x0483, // Corsican, France + hr = 0x001A, // Croatian + hr_hr = 0x041A, // Croatian, Croatia + hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina + cs = 0x0005, // Czech + cs_cz = 0x0405, // Czech, Czech Republic + da = 0x0006, // Danish + da_dk = 0x0406, // Danish, Denmark + prs = 0x008C, // Dari + prs_af = 0x048C, // Dari, Afghanistan + dv = 0x0065, // Divehi + dv_mv = 0x0465, // Divehi, Maldives + nl = 0x0013, // Dutch + nl_be = 0x0813, // Dutch, Belgium + nl_nl = 0x0413, // Dutch, Netherlands + dz_bt = 0x0C51, // Dzongkha, Bhutan + en = 0x0009, // English + en_au = 0x0C09, // English, Australia + en_bz = 0x2809, // English, Belize + en_ca = 0x1009, // English, Canada + en_029 = 0x2409, // English, Caribbean + en_hk = 0x3C09, // English, Hong Kong + en_in = 0x4009, // English, India + en_ie = 0x1809, // English, Ireland + en_jm = 0x2009, // English, Jamaica + en_my = 0x4409, // English, Malaysia + en_nz = 0x1409, // English, New Zealand + en_ph = 0x3409, // English, Republic of the Philippines + en_sg = 0x4809, // English, Singapore + en_za = 0x1C09, // English, South Africa + en_tt = 0x2c09, // English, Trinidad and Tobago + en_ae = 0x4C09, // English, United Arab Emirates + en_gb = 0x0809, // English, United Kingdom + en_us = 0x0409, // English, United States + en_zw = 0x3009, // English, Zimbabwe + et = 0x0025, // Estonian + et_ee = 0x0425, // Estonian, Estonia + fo = 0x0038, // Faroese + fo_fo = 0x0438, // Faroese, Faroe Islands + fil = 0x0064, // Filipino + fil_ph = 0x0464, // Filipino, Philippines + fi = 0x000B, // Finnish + fi_fi = 0x040B, // Finnish, Finland + fr = 0x000C, // French + fr_be = 0x080C, // French, Belgium + fr_cm = 0x2c0C, // French, Cameroon + fr_ca = 0x0c0C, // French, Canada + fr_029 = 0x1C0C, // French, Caribbean + fr_cd = 0x240C, // French, Congo, DRC + fr_ci = 0x300C, // French, Côte d'Ivoire + fr_fr = 0x040C, // French, France + fr_ht = 0x3c0C, // French, Haiti + fr_lu = 0x140C, // French, Luxembourg + fr_ml = 0x340C, // French, Mali + fr_ma = 0x380C, // French, Morocco + fr_mc = 0x180C, // French, Principality of Monaco + fr_re = 0x200C, // French, Reunion + fr_sn = 0x280C, // French, Senegal + fr_ch = 0x100C, // French, Switzerland + fy = 0x0062, // Frisian + fy_nl = 0x0462, // Frisian, Netherlands + ff = 0x0067, // Fulah + ff_latn = 0x7C67, // Fulah (Latin) + ff_ng = 0x0467, // Fulah, Nigeria + ff_latn_sn = 0x0867, // Fulah, Senegal + gl = 0x0056, // Galician + gl_es = 0x0456, // Galician, Spain + ka = 0x0037, // Georgian + ka_ge = 0x0437, // Georgian, Georgia + de = 0x0007, // German + de_at = 0x0C07, // German, Austria + de_de = 0x0407, // German, Germany + de_li = 0x1407, // German, Liechtenstein + de_lu = 0x1007, // German, Luxembourg + de_ch = 0x0807, // German, Switzerland + el = 0x0008, // Greek + el_gr = 0x0408, // Greek, Greece + kl = 0x006F, // Greenlandic + kl_gl = 0x046F, // Greenlandic, Greenland + gn = 0x0074, // Guarani + gn_py = 0x0474, // Guarani, Paraguay + gu = 0x0047, // Gujarati + gu_in = 0x0447, // Gujarati, India + ha = 0x0068, // Hausa (Latin) + ha_latn = 0x7C68, // Hausa (Latin) + ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria + haw = 0x0075, // Hawaiian + haw_us = 0x0475, // Hawaiian, United States + he = 0x000D, // Hebrew + he_il = 0x040D, // Hebrew, Israel + hi = 0x0039, // Hindi + hi_in = 0x0439, // Hindi, India + hu = 0x000E, // Hungarian + hu_hu = 0x040E, // Hungarian, Hungary + is = 0x000F, // Icelandic + is_is = 0x040F, // Icelandic, Iceland + ig = 0x0070, // Igbo + ig_ng = 0x0470, // Igbo, Nigeria + id = 0x0021, // Indonesian + id_id = 0x0421, // Indonesian, Indonesia + iu = 0x005D, // Inuktitut (Latin) + iu_latn = 0x7C5D, // Inuktitut (Latin) + iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada + iu_cans = 0x785D, // Inuktitut (Syllabics) + iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada + ga = 0x003C, // Irish + ga_ie = 0x083C, // Irish, Ireland + it = 0x0010, // Italian + it_it = 0x0410, // Italian, Italy + it_ch = 0x0810, // Italian, Switzerland + ja = 0x0011, // Japanese + ja_jp = 0x0411, // Japanese, Japan + kn = 0x004B, // Kannada + kn_in = 0x044B, // Kannada, India + kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria + ks = 0x0060, // Kashmiri + ks_arab = 0x0460, // Kashmiri, Perso-Arabic + ks_deva_in = 0x0860, // Kashmiri (Devanagari), India + kk = 0x003F, // Kazakh + kk_kz = 0x043F, // Kazakh, Kazakhstan + km = 0x0053, // Khmer + km_kh = 0x0453, // Khmer, Cambodia + quc = 0x0086, // K'iche + quc_latn_gt = 0x0486, // K'iche, Guatemala + rw = 0x0087, // Kinyarwanda + rw_rw = 0x0487, // Kinyarwanda, Rwanda + sw = 0x0041, // Kiswahili + sw_ke = 0x0441, // Kiswahili, Kenya + kok = 0x0057, // Konkani + kok_in = 0x0457, // Konkani, India + ko = 0x0012, // Korean + ko_kr = 0x0412, // Korean, Korea + ky = 0x0040, // Kyrgyz + ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan + lo = 0x0054, // Lao + lo_la = 0x0454, // Lao, Lao P.D.R. + la_va = 0x0476, // Latin, Vatican City + lv = 0x0026, // Latvian + lv_lv = 0x0426, // Latvian, Latvia + lt = 0x0027, // Lithuanian + lt_lt = 0x0427, // Lithuanian, Lithuania + dsb = 0x7C2E, // Lower Sorbian + dsb_de = 0x082E, // Lower Sorbian, Germany + lb = 0x006E, // Luxembourgish + lb_lu = 0x046E, // Luxembourgish, Luxembourg + mk = 0x002F, // Macedonian + mk_mk = 0x042F, // Macedonian, North Macedonia + ms = 0x003E, // Malay + ms_bn = 0x083E, // Malay, Brunei Darussalam + ms_my = 0x043E, // Malay, Malaysia + ml = 0x004C, // Malayalam + ml_in = 0x044C, // Malayalam, India + mt = 0x003A, // Maltese + mt_mt = 0x043A, // Maltese, Malta + mi = 0x0081, // Maori + mi_nz = 0x0481, // Maori, New Zealand + arn = 0x007A, // Mapudungun + arn_cl = 0x047A, // Mapudungun, Chile + mr = 0x004E, // Marathi + mr_in = 0x044E, // Marathi, India + moh = 0x007C, // Mohawk + moh_ca = 0x047C, // Mohawk, Canada + mn = 0x0050, // Mongolian (Cyrillic) + mn_cyrl = 0x7850, // Mongolian (Cyrillic) + mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia + mn_mong = 0x7C50, // Mongolian (Traditional Mongolian) + mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China + mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia + ne = 0x0061, // Nepali + ne_in = 0x0861, // Nepali, India + ne_np = 0x0461, // Nepali, Nepal + no = 0x0014, // Norwegian (Bokmal) + nb = 0x7C14, // Norwegian (Bokmal) + nb_no = 0x0414, // Norwegian (Bokmal), Norway + nn = 0x7814, // Norwegian (Nynorsk) + nn_no = 0x0814, // Norwegian (Nynorsk), Norway + oc = 0x0082, // Occitan + oc_fr = 0x0482, // Occitan, France + @"or" = 0x0048, // Odia + or_in = 0x0448, // Odia, India + om = 0x0072, // Oromo + om_et = 0x0472, // Oromo, Ethiopia + ps = 0x0063, // Pashto + ps_af = 0x0463, // Pashto, Afghanistan + fa = 0x0029, // Persian + fa_ir = 0x0429, // Persian, Iran + pl = 0x0015, // Polish + pl_pl = 0x0415, // Polish, Poland + pt = 0x0016, // Portuguese + pt_br = 0x0416, // Portuguese, Brazil + pt_pt = 0x0816, // Portuguese, Portugal + qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing + qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing + qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales + pa = 0x0046, // Punjabi + pa_arab = 0x7C46, // Punjabi + pa_in = 0x0446, // Punjabi, India + pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan + quz = 0x006B, // Quechua + quz_bo = 0x046B, // Quechua, Bolivia + quz_ec = 0x086B, // Quechua, Ecuador + quz_pe = 0x0C6B, // Quechua, Peru + ro = 0x0018, // Romanian + ro_md = 0x0818, // Romanian, Moldova + ro_ro = 0x0418, // Romanian, Romania + rm = 0x0017, // Romansh + rm_ch = 0x0417, // Romansh, Switzerland + ru = 0x0019, // Russian + ru_md = 0x0819, // Russian, Moldova + ru_ru = 0x0419, // Russian, Russia + sah = 0x0085, // Sakha + sah_ru = 0x0485, // Sakha, Russia + smn = 0x703B, // Sami (Inari) + smn_fi = 0x243B, // Sami (Inari), Finland + smj = 0x7C3B, // Sami (Lule) + smj_no = 0x103B, // Sami (Lule), Norway + smj_se = 0x143B, // Sami (Lule), Sweden + se = 0x003B, // Sami (Northern) + se_fi = 0x0C3B, // Sami (Northern), Finland + se_no = 0x043B, // Sami (Northern), Norway + se_se = 0x083B, // Sami (Northern), Sweden + sms = 0x743B, // Sami (Skolt) + sms_fi = 0x203B, // Sami (Skolt), Finland + sma = 0x783B, // Sami (Southern) + sma_no = 0x183B, // Sami (Southern), Norway + sma_se = 0x1C3B, // Sami (Southern), Sweden + sa = 0x004F, // Sanskrit + sa_in = 0x044F, // Sanskrit, India + gd = 0x0091, // Scottish Gaelic + gd_gb = 0x0491, // Scottish Gaelic, United Kingdom + sr_cyrl = 0x6C1A, // Serbian (Cyrillic) + sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina + sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro + sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia + sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former) + sr_latn = 0x701A, // Serbian (Latin) + sr = 0x7C1A, // Serbian (Latin) + sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina + sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro + sr_latn_rs = 0x241A, // Serbian (Latin), Serbia + sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former) + nso = 0x006C, // Sesotho sa Leboa + nso_za = 0x046C, // Sesotho sa Leboa, South Africa + tn = 0x0032, // Setswana + tn_bw = 0x0832, // Setswana, Botswana + tn_za = 0x0432, // Setswana, South Africa + sd = 0x0059, // Sindhi + sd_arab = 0x7C59, // Sindhi + sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan + si = 0x005B, // Sinhala + si_lk = 0x045B, // Sinhala, Sri Lanka + sk = 0x001B, // Slovak + sk_sk = 0x041B, // Slovak, Slovakia + sl = 0x0024, // Slovenian + sl_si = 0x0424, // Slovenian, Slovenia + so = 0x0077, // Somali + so_so = 0x0477, // Somali, Somalia + st = 0x0030, // Sotho + st_za = 0x0430, // Sotho, South Africa + es = 0x000A, // Spanish + es_ar = 0x2C0A, // Spanish, Argentina + es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela + es_bo = 0x400A, // Spanish, Bolivia + es_cl = 0x340A, // Spanish, Chile + es_co = 0x240A, // Spanish, Colombia + es_cr = 0x140A, // Spanish, Costa Rica + es_cu = 0x5c0A, // Spanish, Cuba + es_do = 0x1c0A, // Spanish, Dominican Republic + es_ec = 0x300A, // Spanish, Ecuador + es_sv = 0x440A, // Spanish, El Salvador + es_gt = 0x100A, // Spanish, Guatemala + es_hn = 0x480A, // Spanish, Honduras + es_419 = 0x580A, // Spanish, Latin America + es_mx = 0x080A, // Spanish, Mexico + es_ni = 0x4C0A, // Spanish, Nicaragua + es_pa = 0x180A, // Spanish, Panama + es_py = 0x3C0A, // Spanish, Paraguay + es_pe = 0x280A, // Spanish, Peru + es_pr = 0x500A, // Spanish, Puerto Rico + es_es_tradnl = 0x040A, // Spanish, Spain + es_es = 0x0c0A, // Spanish, Spain + es_us = 0x540A, // Spanish, United States + es_uy = 0x380A, // Spanish, Uruguay + sv = 0x001D, // Swedish + sv_fi = 0x081D, // Swedish, Finland + sv_se = 0x041D, // Swedish, Sweden + syr = 0x005A, // Syriac + syr_sy = 0x045A, // Syriac, Syria + tg = 0x0028, // Tajik (Cyrillic) + tg_cyrl = 0x7C28, // Tajik (Cyrillic) + tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan + tzm = 0x005F, // Tamazight (Latin) + tzm_latn = 0x7C5F, // Tamazight (Latin) + tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria + ta = 0x0049, // Tamil + ta_in = 0x0449, // Tamil, India + ta_lk = 0x0849, // Tamil, Sri Lanka + tt = 0x0044, // Tatar + tt_ru = 0x0444, // Tatar, Russia + te = 0x004A, // Telugu + te_in = 0x044A, // Telugu, India + th = 0x001E, // Thai + th_th = 0x041E, // Thai, Thailand + bo = 0x0051, // Tibetan + bo_cn = 0x0451, // Tibetan, People's Republic of China + ti = 0x0073, // Tigrinya + ti_er = 0x0873, // Tigrinya, Eritrea + ti_et = 0x0473, // Tigrinya, Ethiopia + ts = 0x0031, // Tsonga + ts_za = 0x0431, // Tsonga, South Africa + tr = 0x001F, // Turkish + tr_tr = 0x041F, // Turkish, Turkey + tk = 0x0042, // Turkmen + tk_tm = 0x0442, // Turkmen, Turkmenistan + uk = 0x0022, // Ukrainian + uk_ua = 0x0422, // Ukrainian, Ukraine + hsb = 0x002E, // Upper Sorbian + hsb_de = 0x042E, // Upper Sorbian, Germany + ur = 0x0020, // Urdu + ur_in = 0x0820, // Urdu, India + ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan + ug = 0x0080, // Uyghur + ug_cn = 0x0480, // Uyghur, People's Republic of China + uz_cyrl = 0x7843, // Uzbek (Cyrillic) + uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan + uz = 0x0043, // Uzbek (Latin) + uz_latn = 0x7C43, // Uzbek (Latin) + uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan + ca_es_valencia = 0x0803, // Valencian, Spain + ve = 0x0033, // Venda + ve_za = 0x0433, // Venda, South Africa + vi = 0x002A, // Vietnamese + vi_vn = 0x042A, // Vietnamese, Vietnam + cy = 0x0052, // Welsh + cy_gb = 0x0452, // Welsh, United Kingdom + wo = 0x0088, // Wolof + wo_sn = 0x0488, // Wolof, Senegal + xh = 0x0034, // Xhosa + xh_za = 0x0434, // Xhosa, South Africa + ii = 0x0078, // Yi + ii_cn = 0x0478, // Yi, People's Republic of China + yi_001 = 0x043D, // Yiddish, World + yo = 0x006A, // Yoruba + yo_ng = 0x046A, // Yoruba, Nigeria + zu = 0x0035, // Zulu + zu_za = 0x0435, // Zulu, South Africa + + /// Special case + x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting" +}; diff --git a/lib/compiler/resinator/lex.zig b/lib/compiler/resinator/lex.zig @@ -0,0 +1,1106 @@ +//! Expects to be run after the C preprocessor and after `removeComments`. +//! This means that the lexer assumes that: +//! - Splices ('\' at the end of a line) have been handled/collapsed. +//! - Preprocessor directives and macros have been expanded (any remaining should be skipped with the exception of `#pragma code_page`). +//! - All comments have been removed. + +const std = @import("std"); +const ErrorDetails = @import("errors.zig").ErrorDetails; +const columnWidth = @import("literals.zig").columnWidth; +const code_pages = @import("code_pages.zig"); +const CodePage = code_pages.CodePage; +const SourceMappings = @import("source_mapping.zig").SourceMappings; +const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; + +const dumpTokensDuringTests = false; + +pub const default_max_string_literal_codepoints = 4097; + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + line_number: usize, + + pub const Id = enum { + literal, + number, + quoted_ascii_string, + quoted_wide_string, + operator, + begin, + end, + comma, + open_paren, + close_paren, + /// This Id is only used for errors, the Lexer will never return one + /// of these from a `next` call. + preprocessor_command, + invalid, + eof, + + pub fn nameForErrorDisplay(self: Id) []const u8 { + return switch (self) { + .literal => "<literal>", + .number => "<number>", + .quoted_ascii_string => "<quoted ascii string>", + .quoted_wide_string => "<quoted wide string>", + .operator => "<operator>", + .begin => "<'{' or BEGIN>", + .end => "<'}' or END>", + .comma => ",", + .open_paren => "(", + .close_paren => ")", + .preprocessor_command => "<preprocessor command>", + .invalid => unreachable, + .eof => "<eof>", + }; + } + }; + + pub fn slice(self: Token, buffer: []const u8) []const u8 { + return buffer[self.start..self.end]; + } + + pub fn nameForErrorDisplay(self: Token, buffer: []const u8) []const u8 { + return switch (self.id) { + .eof => self.id.nameForErrorDisplay(), + else => self.slice(buffer), + }; + } + + /// Returns 0-based column + pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize { + const line_start = maybe_line_start orelse token.getLineStartForColumnCalc(source); + + var i: usize = line_start; + var column: usize = 0; + while (i < token.start) : (i += 1) { + column += columnWidth(column, source[i], tab_columns); + } + return column; + } + + // TODO: More testing is needed to determine if this can be merged with getLineStartForErrorDisplay + // (the TODO in currentIndexFormsLineEndingPair should be taken into account as well) + pub fn getLineStartForColumnCalc(token: Token, source: []const u8) usize { + const line_start = line_start: { + if (token.start != 0) { + // start checking at the byte before the token + var index = token.start - 1; + while (true) { + if (source[index] == '\n') break :line_start @min(source.len - 1, index + 1); + if (index != 0) index -= 1 else break; + } + } + break :line_start 0; + }; + return line_start; + } + + pub fn getLineStartForErrorDisplay(token: Token, source: []const u8) usize { + const line_start = line_start: { + if (token.start != 0) { + // start checking at the byte before the token + var index = token.start - 1; + while (true) { + if (source[index] == '\r' or source[index] == '\n') break :line_start @min(source.len - 1, index + 1); + if (index != 0) index -= 1 else break; + } + } + break :line_start 0; + }; + return line_start; + } + + pub fn getLineForErrorDisplay(token: Token, source: []const u8, maybe_line_start: ?usize) []const u8 { + const line_start = maybe_line_start orelse token.getLineStartForErrorDisplay(source); + + var line_end = line_start; + while (line_end < source.len and source[line_end] != '\r' and source[line_end] != '\n') : (line_end += 1) {} + return source[line_start..line_end]; + } + + pub fn isStringLiteral(token: Token) bool { + return token.id == .quoted_ascii_string or token.id == .quoted_wide_string; + } +}; + +pub const LineHandler = struct { + line_number: usize = 1, + buffer: []const u8, + last_line_ending_index: ?usize = null, + + /// Like incrementLineNumber but checks that the current char is a line ending first. + /// Returns the new line number if it was incremented, null otherwise. + pub fn maybeIncrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { + const c = self.buffer[cur_index]; + if (c == '\r' or c == '\n') { + return self.incrementLineNumber(cur_index); + } + return null; + } + + /// Increments line_number appropriately (handling line ending pairs) + /// and returns the new line number if it was incremented, or null otherwise. + pub fn incrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { + if (self.currentIndexFormsLineEndingPair(cur_index)) { + self.last_line_ending_index = null; + return null; + } else { + self.line_number += 1; + self.last_line_ending_index = cur_index; + return self.line_number; + } + } + + /// \r\n and \n\r pairs are treated as a single line ending (but not \r\r \n\n) + /// expects self.index and last_line_ending_index (if non-null) to contain line endings + /// + /// TODO: This is not really how the Win32 RC compiler handles line endings. Instead, it + /// seems to drop all carriage returns during preprocessing and then replace all + /// remaining line endings with well-formed CRLF pairs (e.g. `<CR>a<CR>b<LF>c` becomes `ab<CR><LF>c`). + /// Handling this the same as the Win32 RC compiler would need control over the preprocessor, + /// since Clang converts unpaired <CR> into unpaired <LF>. + pub fn currentIndexFormsLineEndingPair(self: *const LineHandler, cur_index: usize) bool { + if (self.last_line_ending_index == null) return false; + + // must immediately precede the current index, we know cur_index must + // be >= 1 since last_line_ending_index is non-null (so if the subtraction + // overflows it is a bug at the callsite of this function). + if (self.last_line_ending_index.? != cur_index - 1) return false; + + const cur_line_ending = self.buffer[cur_index]; + const last_line_ending = self.buffer[self.last_line_ending_index.?]; + + // sanity check + std.debug.assert(cur_line_ending == '\r' or cur_line_ending == '\n'); + std.debug.assert(last_line_ending == '\r' or last_line_ending == '\n'); + + // can't be \n\n or \r\r + if (last_line_ending == cur_line_ending) return false; + + return true; + } +}; + +pub const LexError = error{ + UnfinishedStringLiteral, + StringLiteralTooLong, + InvalidNumberWithExponent, + InvalidDigitCharacterInNumberLiteral, + IllegalByte, + IllegalByteOutsideStringLiterals, + IllegalCodepointOutsideStringLiterals, + IllegalByteOrderMark, + IllegalPrivateUseCharacter, + FoundCStyleEscapedQuote, + CodePagePragmaMissingLeftParen, + CodePagePragmaMissingRightParen, + /// Can be caught and ignored + CodePagePragmaInvalidCodePage, + CodePagePragmaNotInteger, + CodePagePragmaOverflow, + CodePagePragmaUnsupportedCodePage, + /// Can be caught and ignored + CodePagePragmaInIncludedFile, +}; + +pub const Lexer = struct { + const Self = @This(); + + buffer: []const u8, + index: usize, + line_handler: LineHandler, + at_start_of_line: bool = true, + error_context_token: ?Token = null, + current_code_page: CodePage, + default_code_page: CodePage, + source_mappings: ?*SourceMappings, + max_string_literal_codepoints: u15, + /// Needed to determine whether or not the output code page should + /// be set in the parser. + seen_pragma_code_pages: u2 = 0, + + pub const Error = LexError; + + pub const LexerOptions = struct { + default_code_page: CodePage = .windows1252, + source_mappings: ?*SourceMappings = null, + max_string_literal_codepoints: u15 = default_max_string_literal_codepoints, + }; + + pub fn init(buffer: []const u8, options: LexerOptions) Self { + return Self{ + .buffer = buffer, + .index = 0, + .current_code_page = options.default_code_page, + .default_code_page = options.default_code_page, + .source_mappings = options.source_mappings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + .line_handler = .{ .buffer = buffer }, + }; + } + + pub fn dump(self: *Self, token: *const Token) void { + std.debug.print("{s}:{d}: {s}\n", .{ @tagName(token.id), token.line_number, std.fmt.fmtSliceEscapeLower(token.slice(self.buffer)) }); + } + + pub const LexMethod = enum { + whitespace_delimiter_only, + normal, + normal_expect_operator, + }; + + pub fn next(self: *Self, comptime method: LexMethod) LexError!Token { + switch (method) { + .whitespace_delimiter_only => return self.nextWhitespaceDelimeterOnly(), + .normal => return self.nextNormal(), + .normal_expect_operator => return self.nextNormalWithContext(.expect_operator), + } + } + + const StateWhitespaceDelimiterOnly = enum { + start, + literal, + preprocessor, + semicolon, + }; + + pub fn nextWhitespaceDelimeterOnly(self: *Self) LexError!Token { + const start_index = self.index; + var result = Token{ + .id = .eof, + .start = start_index, + .end = undefined, + .line_number = self.line_handler.line_number, + }; + var state = StateWhitespaceDelimiterOnly.start; + + while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + try self.checkForIllegalCodepoint(codepoint, false); + switch (state) { + .start => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + result.line_number = self.incrementLineNumber(); + }, + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.start = self.index + 1; + }, + // NBSP only counts as whitespace at the start of a line (but + // can be intermixed with other whitespace). Who knows why. + '\xA0' => if (self.at_start_of_line) { + result.start = self.index + codepoint.byte_len; + } else { + state = .literal; + self.at_start_of_line = false; + }, + '#' => { + if (self.at_start_of_line) { + state = .preprocessor; + } else { + state = .literal; + } + self.at_start_of_line = false; + }, + // Semi-colon acts as a line-terminator, but in this lexing mode + // that's only true if it's at the start of a line. + ';' => { + if (self.at_start_of_line) { + state = .semicolon; + } + self.at_start_of_line = false; + }, + else => { + state = .literal; + self.at_start_of_line = false; + }, + }, + .literal => switch (c) { + '\r', '\n', ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.id = .literal; + break; + }, + else => {}, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + .semicolon => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + } + } else { // got EOF + switch (state) { + .start, .semicolon => {}, + .literal => { + result.id = .literal; + }, + .preprocessor => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index; + }, + } + } + + result.end = self.index; + return result; + } + + const StateNormal = enum { + start, + literal_or_quoted_wide_string, + quoted_ascii_string, + quoted_wide_string, + quoted_ascii_string_escape, + quoted_wide_string_escape, + quoted_ascii_string_maybe_end, + quoted_wide_string_maybe_end, + literal, + number_literal, + preprocessor, + semicolon, + // end + e, + en, + // begin + b, + be, + beg, + begi, + }; + + /// TODO: A not-terrible name + pub fn nextNormal(self: *Self) LexError!Token { + return self.nextNormalWithContext(.any); + } + + pub fn nextNormalWithContext(self: *Self, context: enum { expect_operator, any }) LexError!Token { + const start_index = self.index; + var result = Token{ + .id = .eof, + .start = start_index, + .end = undefined, + .line_number = self.line_handler.line_number, + }; + var state = StateNormal.start; + + // Note: The Windows RC compiler uses a non-standard method of computing + // length for its 'string literal too long' errors; it isn't easily + // explained or intuitive (it's sort-of pre-parsed byte length but with + // a few of exceptions/edge cases). + // + // It also behaves strangely with non-ASCII codepoints, e.g. even though the default + // limit is 4097, you can only have 4094 € codepoints (1 UTF-16 code unit each), + // and 2048 𐐷 codepoints (2 UTF-16 code units each). + // + // TODO: Understand this more, bring it more in line with how the Win32 limits work. + // Alternatively, do something that makes more sense but may be more permissive. + var string_literal_length: usize = 0; + // Keeping track of the string literal column prevents pathological edge cases when + // there are tons of tab stop characters within a string literal. + var string_literal_column: usize = 0; + var string_literal_collapsing_whitespace: bool = false; + var still_could_have_exponent: bool = true; + var exponent_index: ?usize = null; + while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { + const c = codepoint.value; + const in_string_literal = switch (state) { + .quoted_ascii_string, + .quoted_wide_string, + .quoted_ascii_string_escape, + .quoted_wide_string_escape, + .quoted_ascii_string_maybe_end, + .quoted_wide_string_maybe_end, + => + // If the current line is not the same line as the start of the string literal, + // then we want to treat the current codepoint as 'not in a string literal' + // for the purposes of detecting illegal codepoints. This means that we will + // error on illegal-outside-string-literal characters that are outside string + // literals from the perspective of a C preprocessor, but that may be + // inside string literals from the perspective of the RC lexer. For example, + // "hello + // @" + // will be treated as a single string literal by the RC lexer but the Win32 + // preprocessor will consider this an unclosed string literal followed by + // the character @ and ", and will therefore error since the Win32 RC preprocessor + // errors on the @ character outside string literals. + // + // By doing this here, we can effectively emulate the Win32 RC preprocessor behavior + // at lex-time, and avoid the need for a separate step that checks for this edge-case + // specifically. + result.line_number == self.line_handler.line_number, + else => false, + }; + try self.checkForIllegalCodepoint(codepoint, in_string_literal); + switch (state) { + .start => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + result.line_number = self.incrementLineNumber(); + }, + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { + result.start = self.index + 1; + }, + // NBSP only counts as whitespace at the start of a line (but + // can be intermixed with other whitespace). Who knows why. + '\xA0' => if (self.at_start_of_line) { + result.start = self.index + codepoint.byte_len; + } else { + state = .literal; + self.at_start_of_line = false; + }, + 'L', 'l' => { + state = .literal_or_quoted_wide_string; + self.at_start_of_line = false; + }, + 'E', 'e' => { + state = .e; + self.at_start_of_line = false; + }, + 'B', 'b' => { + state = .b; + self.at_start_of_line = false; + }, + '"' => { + state = .quoted_ascii_string; + self.at_start_of_line = false; + string_literal_collapsing_whitespace = false; + string_literal_length = 0; + + var dummy_token = Token{ + .start = self.index, + .end = self.index, + .line_number = self.line_handler.line_number, + .id = .invalid, + }; + string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null); + }, + '+', '&', '|' => { + self.index += 1; + result.id = .operator; + self.at_start_of_line = false; + break; + }, + '-' => { + if (context == .expect_operator) { + self.index += 1; + result.id = .operator; + self.at_start_of_line = false; + break; + } else { + state = .number_literal; + still_could_have_exponent = true; + exponent_index = null; + self.at_start_of_line = false; + } + }, + '0'...'9', '~' => { + state = .number_literal; + still_could_have_exponent = true; + exponent_index = null; + self.at_start_of_line = false; + }, + '#' => { + if (self.at_start_of_line) { + state = .preprocessor; + } else { + state = .literal; + } + self.at_start_of_line = false; + }, + ';' => { + state = .semicolon; + self.at_start_of_line = false; + }, + '{', '}' => { + self.index += 1; + result.id = if (c == '{') .begin else .end; + self.at_start_of_line = false; + break; + }, + '(', ')' => { + self.index += 1; + result.id = if (c == '(') .open_paren else .close_paren; + self.at_start_of_line = false; + break; + }, + ',' => { + self.index += 1; + result.id = .comma; + self.at_start_of_line = false; + break; + }, + else => { + if (isNonAsciiDigit(c)) { + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidDigitCharacterInNumberLiteral; + } + state = .literal; + self.at_start_of_line = false; + }, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + // Semi-colon acts as a line-terminator--everything is skipped until + // the next line. + .semicolon => switch (c) { + '\r', '\n' => { + result.start = self.index + 1; + state = .start; + result.line_number = self.incrementLineNumber(); + }, + else => {}, + }, + .number_literal => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + => { + // zig fmt: on + result.id = .number; + break; + }, + '0'...'9' => { + if (exponent_index) |exp_i| { + if (self.index - 1 == exp_i) { + // Note: This being an error is a quirk of the preprocessor used by + // the Win32 RC compiler. + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidNumberWithExponent; + } + } + }, + 'e', 'E' => { + if (still_could_have_exponent) { + exponent_index = self.index; + still_could_have_exponent = false; + } + }, + else => { + if (isNonAsciiDigit(c)) { + self.error_context_token = .{ + .id = .number, + .start = result.start, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.InvalidDigitCharacterInNumberLiteral; + } + still_could_have_exponent = false; + }, + }, + .literal_or_quoted_wide_string => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + // zig fmt: on + => { + result.id = .literal; + break; + }, + '"' => { + state = .quoted_wide_string; + string_literal_collapsing_whitespace = false; + string_literal_length = 0; + + var dummy_token = Token{ + .start = self.index, + .end = self.index, + .line_number = self.line_handler.line_number, + .id = .invalid, + }; + string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null); + }, + else => { + state = .literal; + }, + }, + .literal => switch (c) { + // zig fmt: off + ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', + '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', + '\'', ';', '=', + => { + // zig fmt: on + result.id = .literal; + break; + }, + else => {}, + }, + .e => switch (c) { + 'N', 'n' => { + state = .en; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .en => switch (c) { + 'D', 'd' => { + result.id = .end; + self.index += 1; + break; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .b => switch (c) { + 'E', 'e' => { + state = .be; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .be => switch (c) { + 'G', 'g' => { + state = .beg; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .beg => switch (c) { + 'I', 'i' => { + state = .begi; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .begi => switch (c) { + 'N', 'n' => { + result.id = .begin; + self.index += 1; + break; + }, + else => { + state = .literal; + self.index -= 1; + }, + }, + .quoted_ascii_string, .quoted_wide_string => switch (c) { + '"' => { + string_literal_column += 1; + state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end; + }, + '\\' => { + string_literal_length += 1; + string_literal_column += 1; + state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape; + }, + '\r' => { + string_literal_column = 0; + // \r doesn't count towards string literal length + + // Increment line number but don't affect the result token's line number + _ = self.incrementLineNumber(); + }, + '\n' => { + string_literal_column = 0; + // first \n expands to <space><\n> + if (!string_literal_collapsing_whitespace) { + string_literal_length += 2; + string_literal_collapsing_whitespace = true; + } + // the rest are collapsed into the <space><\n> + + // Increment line number but don't affect the result token's line number + _ = self.incrementLineNumber(); + }, + // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing + '\t', ' ', '\x0b', '\x0c' => { + if (!string_literal_collapsing_whitespace) { + // Literal tab characters are counted as the number of space characters + // needed to reach the next 8-column tab stop. + const width = columnWidth(string_literal_column, @intCast(c), 8); + string_literal_length += width; + string_literal_column += width; + } + }, + else => { + string_literal_collapsing_whitespace = false; + string_literal_length += 1; + string_literal_column += 1; + }, + }, + .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) { + '"' => { + self.error_context_token = .{ + .id = .invalid, + .start = self.index - 1, + .end = self.index + 1, + .line_number = self.line_handler.line_number, + }; + return error.FoundCStyleEscapedQuote; + }, + else => { + string_literal_length += 1; + string_literal_column += 1; + state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string; + }, + }, + .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) { + '"' => { + state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + // Escaped quotes count as 1 char for string literal length checks. + // Since we did not increment on the first " (because it could have been + // the end of the quoted string), we increment here + string_literal_length += 1; + string_literal_column += 1; + }, + else => { + result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + break; + }, + }, + } + } else { // got EOF + switch (state) { + .start, .semicolon => {}, + .literal_or_quoted_wide_string, .literal, .e, .en, .b, .be, .beg, .begi => { + result.id = .literal; + }, + .preprocessor => { + try self.evaluatePreprocessorCommand(result.start, self.index); + result.start = self.index; + }, + .number_literal => { + result.id = .number; + }, + .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => { + result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; + }, + .quoted_ascii_string, + .quoted_wide_string, + .quoted_ascii_string_escape, + .quoted_wide_string_escape, + => { + self.error_context_token = .{ + .id = .eof, + .start = self.index, + .end = self.index, + .line_number = self.line_handler.line_number, + }; + return LexError.UnfinishedStringLiteral; + }, + } + } + + result.end = self.index; + + if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) { + if (string_literal_length > self.max_string_literal_codepoints) { + self.error_context_token = result; + return LexError.StringLiteralTooLong; + } + } + + return result; + } + + /// Increments line_number appropriately (handling line ending pairs) + /// and returns the new line number. + fn incrementLineNumber(self: *Self) usize { + _ = self.line_handler.incrementLineNumber(self.index); + self.at_start_of_line = true; + return self.line_handler.line_number; + } + + fn checkForIllegalCodepoint(self: *Self, codepoint: code_pages.Codepoint, in_string_literal: bool) LexError!void { + const err = switch (codepoint.value) { + // 0x00 = NUL + // 0x1A = Substitute (treated as EOF) + // NOTE: 0x1A gets treated as EOF by the clang preprocessor so after a .rc file + // is run through the clang preprocessor it will no longer have 0x1A characters in it. + // 0x7F = DEL (treated as a context-specific terminator by the Windows RC compiler) + 0x00, 0x1A, 0x7F => error.IllegalByte, + // 0x01...0x03 result in strange 'macro definition too big' errors when used outside of string literals + // 0x04 is valid but behaves strangely (sort of acts as a 'skip the next character' instruction) + 0x01...0x04 => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, + // @ and ` both result in error RC2018: unknown character '0x60' (and subsequently + // fatal error RC1116: RC terminating after preprocessor errors) if they are ever used + // outside of string literals. Not exactly sure why this would be the case, though. + // TODO: Make sure there aren't any exceptions + '@', '`' => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, + // The Byte Order Mark is mostly skipped over by the Windows RC compiler, but + // there are edge cases where it leads to cryptic 'compiler limit : macro definition too big' + // errors (e.g. a BOM within a number literal). By making this illegal we avoid having to + // deal with a lot of edge cases and remove the potential footgun of the bytes of a BOM + // being 'missing' when included in a string literal (the Windows RC compiler acts as + // if the codepoint was never part of the string literal). + '\u{FEFF}' => error.IllegalByteOrderMark, + // Similar deal with this private use codepoint, it gets skipped/ignored by the + // RC compiler (but without the cryptic errors). Silently dropping bytes still seems like + // enough of a footgun with no real use-cases that it's still worth erroring instead of + // emulating the RC compiler's behavior, though. + '\u{E000}' => error.IllegalPrivateUseCharacter, + // These codepoints lead to strange errors when used outside of string literals, + // and miscompilations when used within string literals. We avoid the miscompilation + // within string literals and emit a warning, but outside of string literals it makes + // more sense to just disallow these codepoints. + 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => if (!in_string_literal) error.IllegalCodepointOutsideStringLiterals else return, + else => return, + }; + self.error_context_token = .{ + .id = .invalid, + .start = self.index, + .end = self.index + codepoint.byte_len, + .line_number = self.line_handler.line_number, + }; + return err; + } + + fn evaluatePreprocessorCommand(self: *Self, start: usize, end: usize) !void { + const token = Token{ + .id = .preprocessor_command, + .start = start, + .end = end, + .line_number = self.line_handler.line_number, + }; + errdefer self.error_context_token = token; + const full_command = self.buffer[start..end]; + var command = full_command; + + // Anything besides exactly this is ignored by the Windows RC implementation + const expected_directive = "#pragma"; + if (!std.mem.startsWith(u8, command, expected_directive)) return; + command = command[expected_directive.len..]; + + if (command.len == 0 or !std.ascii.isWhitespace(command[0])) return; + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + // Note: CoDe_PaGeZ is also treated as "code_page" by the Windows RC implementation, + // and it will error with 'Missing left parenthesis in code_page #pragma' + const expected_extension = "code_page"; + if (!std.ascii.startsWithIgnoreCase(command, expected_extension)) return; + command = command[expected_extension.len..]; + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + if (command.len == 0 or command[0] != '(') { + return error.CodePagePragmaMissingLeftParen; + } + command = command[1..]; + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + var num_str: []u8 = command[0..0]; + while (command.len > 0 and (command[0] != ')' and !std.ascii.isWhitespace(command[0]))) { + command = command[1..]; + num_str.len += 1; + } + + if (num_str.len == 0) { + return error.CodePagePragmaNotInteger; + } + + while (command.len > 0 and std.ascii.isWhitespace(command[0])) { + command = command[1..]; + } + + if (command.len == 0 or command[0] != ')') { + return error.CodePagePragmaMissingRightParen; + } + + const code_page = code_page: { + if (std.ascii.eqlIgnoreCase("DEFAULT", num_str)) { + break :code_page self.default_code_page; + } + + // The Win32 compiler behaves fairly strangely around maxInt(u32): + // - If the overflowed u32 wraps and becomes a known code page ID, then + // it will error/warn with "Codepage not valid: ignored" (depending on /w) + // - If the overflowed u32 wraps and does not become a known code page ID, + // then it will error with 'constant too big' and 'Codepage not integer' + // + // Instead of that, we just have a separate error specifically for overflow. + const num = parseCodePageNum(num_str) catch |err| switch (err) { + error.InvalidCharacter => return error.CodePagePragmaNotInteger, + error.Overflow => return error.CodePagePragmaOverflow, + }; + + // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252 + if (num_str[0] == '0' and num != 0) { + return error.CodePagePragmaInvalidCodePage; + } + // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation. + else if (num == 0) { + return error.CodePagePragmaNotInteger; + } + // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16. + if (num > std.math.maxInt(u16)) { + return error.CodePagePragmaInvalidCodePage; + } + + break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) { + error.InvalidCodePage => return error.CodePagePragmaInvalidCodePage, + error.UnsupportedCodePage => return error.CodePagePragmaUnsupportedCodePage, + }; + }; + + // https://learn.microsoft.com/en-us/windows/win32/menurc/pragma-directives + // > This pragma is not supported in an included resource file (.rc) + // + // Even though the Win32 behavior is to just ignore such directives silently, + // this is an error in the lexer to allow for emitting warnings/errors when + // such directives are found if that's wanted. The intention is for the lexer + // to still be able to work correctly after this error is returned. + if (self.source_mappings) |source_mappings| { + if (!source_mappings.isRootFile(token.line_number)) { + return error.CodePagePragmaInIncludedFile; + } + } + + self.seen_pragma_code_pages +|= 1; + self.current_code_page = code_page; + } + + fn parseCodePageNum(str: []const u8) !u32 { + var x: u32 = 0; + for (str) |c| { + const digit = try std.fmt.charToDigit(c, 10); + if (x != 0) x = try std.math.mul(u32, x, 10); + x = try std.math.add(u32, x, digit); + } + return x; + } + + pub fn getErrorDetails(self: Self, lex_err: LexError) ErrorDetails { + const err = switch (lex_err) { + error.UnfinishedStringLiteral => ErrorDetails.Error.unfinished_string_literal, + error.StringLiteralTooLong => return .{ + .err = .string_literal_too_long, + .token = self.error_context_token.?, + .extra = .{ .number = self.max_string_literal_codepoints }, + }, + error.InvalidNumberWithExponent => ErrorDetails.Error.invalid_number_with_exponent, + error.InvalidDigitCharacterInNumberLiteral => ErrorDetails.Error.invalid_digit_character_in_number_literal, + error.IllegalByte => ErrorDetails.Error.illegal_byte, + error.IllegalByteOutsideStringLiterals => ErrorDetails.Error.illegal_byte_outside_string_literals, + error.IllegalCodepointOutsideStringLiterals => ErrorDetails.Error.illegal_codepoint_outside_string_literals, + error.IllegalByteOrderMark => ErrorDetails.Error.illegal_byte_order_mark, + error.IllegalPrivateUseCharacter => ErrorDetails.Error.illegal_private_use_character, + error.FoundCStyleEscapedQuote => ErrorDetails.Error.found_c_style_escaped_quote, + error.CodePagePragmaMissingLeftParen => ErrorDetails.Error.code_page_pragma_missing_left_paren, + error.CodePagePragmaMissingRightParen => ErrorDetails.Error.code_page_pragma_missing_right_paren, + error.CodePagePragmaInvalidCodePage => ErrorDetails.Error.code_page_pragma_invalid_code_page, + error.CodePagePragmaNotInteger => ErrorDetails.Error.code_page_pragma_not_integer, + error.CodePagePragmaOverflow => ErrorDetails.Error.code_page_pragma_overflow, + error.CodePagePragmaUnsupportedCodePage => ErrorDetails.Error.code_page_pragma_unsupported_code_page, + error.CodePagePragmaInIncludedFile => ErrorDetails.Error.code_page_pragma_in_included_file, + }; + return .{ + .err = err, + .token = self.error_context_token.?, + }; + } +}; + +fn testLexNormal(source: []const u8, expected_tokens: []const Token.Id) !void { + var lexer = Lexer.init(source, .{}); + if (dumpTokensDuringTests) std.debug.print("\n----------------------\n{s}\n----------------------\n", .{lexer.buffer}); + for (expected_tokens) |expected_token_id| { + const token = try lexer.nextNormal(); + if (dumpTokensDuringTests) lexer.dump(&token); + try std.testing.expectEqual(expected_token_id, token.id); + } + const last_token = try lexer.nextNormal(); + try std.testing.expectEqual(Token.Id.eof, last_token.id); +} + +fn expectLexError(expected: LexError, actual: anytype) !void { + try std.testing.expectError(expected, actual); + if (dumpTokensDuringTests) std.debug.print("{!}\n", .{actual}); +} + +test "normal: numbers" { + try testLexNormal("1", &.{.number}); + try testLexNormal("-1", &.{.number}); + try testLexNormal("- 1", &.{ .number, .number }); + try testLexNormal("-a", &.{.number}); +} + +test "normal: string literals" { + try testLexNormal("\"\"", &.{.quoted_ascii_string}); + // "" is an escaped " + try testLexNormal("\" \"\" \"", &.{.quoted_ascii_string}); +} + +test "superscript chars and code pages" { + const firstToken = struct { + pub fn firstToken(source: []const u8, default_code_page: CodePage, comptime lex_method: Lexer.LexMethod) LexError!Token { + var lexer = Lexer.init(source, .{ .default_code_page = default_code_page }); + return lexer.next(lex_method); + } + }.firstToken; + const utf8_source = "²"; + const windows1252_source = "\xB2"; + + const windows1252_encoded_as_windows1252 = firstToken(windows1252_source, .windows1252, .normal); + try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, windows1252_encoded_as_windows1252); + + const utf8_encoded_as_windows1252 = try firstToken(utf8_source, .windows1252, .normal); + try std.testing.expectEqual(Token{ + .id = .literal, + .start = 0, + .end = 2, + .line_number = 1, + }, utf8_encoded_as_windows1252); + + const utf8_encoded_as_utf8 = firstToken(utf8_source, .utf8, .normal); + try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, utf8_encoded_as_utf8); + + const windows1252_encoded_as_utf8 = try firstToken(windows1252_source, .utf8, .normal); + try std.testing.expectEqual(Token{ + .id = .literal, + .start = 0, + .end = 1, + .line_number = 1, + }, windows1252_encoded_as_utf8); +} diff --git a/lib/compiler/resinator/literals.zig b/lib/compiler/resinator/literals.zig @@ -0,0 +1,910 @@ +const std = @import("std"); +const code_pages = @import("code_pages.zig"); +const CodePage = code_pages.CodePage; +const windows1252 = @import("windows1252.zig"); +const ErrorDetails = @import("errors.zig").ErrorDetails; +const DiagnosticsContext = @import("errors.zig").DiagnosticsContext; +const Token = @import("lex.zig").Token; + +/// rc is maximally liberal in terms of what it accepts as a number literal +/// for data values. As long as it starts with a number or - or ~, that's good enough. +pub fn isValidNumberDataLiteral(str: []const u8) bool { + if (str.len == 0) return false; + switch (str[0]) { + '~', '-', '0'...'9' => return true, + else => return false, + } +} + +pub const SourceBytes = struct { + slice: []const u8, + code_page: CodePage, +}; + +pub const StringType = enum { ascii, wide }; + +/// Valid escapes: +/// "" -> " +/// \a, \A => 0x08 (not 0x07 like in C) +/// \n => 0x0A +/// \r => 0x0D +/// \t, \T => 0x09 +/// \\ => \ +/// \nnn => byte with numeric value given by nnn interpreted as octal +/// (wraps on overflow, number of digits can be 1-3 for ASCII strings +/// and 1-7 for wide strings) +/// \xhh => byte with numeric value given by hh interpreted as hex +/// (number of digits can be 0-2 for ASCII strings and 0-4 for +/// wide strings) +/// \<\r+> => \ +/// \<[\r\n\t ]+> => <nothing> +/// +/// Special cases: +/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself +/// <\r> => <nothing> +/// <\n+><\w+?\n?> => <space><\n> +/// +/// Special, especially weird case: +/// \"" => " +/// NOTE: This leads to footguns because the preprocessor can start parsing things +/// out-of-sync with the RC compiler, expanding macros within string literals, etc. +/// This parse function handles this case the same as the Windows RC compiler, but +/// \" within a string literal is treated as an error by the lexer, so the relevant +/// branches should never actually be hit during this function. +pub const IterativeStringParser = struct { + source: []const u8, + code_page: CodePage, + /// The type of the string inferred by the prefix (L"" or "") + /// This is what matters for things like the maximum digits in an + /// escape sequence, whether or not invalid escape sequences are skipped, etc. + declared_string_type: StringType, + pending_codepoint: ?u21 = null, + num_pending_spaces: u8 = 0, + index: usize = 0, + column: usize = 0, + diagnostics: ?DiagnosticsContext = null, + seen_tab: bool = false, + + const State = enum { + normal, + quote, + newline, + escaped, + escaped_cr, + escaped_newlines, + escaped_octal, + escaped_hex, + }; + + pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser { + const declared_string_type: StringType = switch (bytes.slice[0]) { + 'L', 'l' => .wide, + else => .ascii, + }; + var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove "" + var column = options.start_column + 1; // for the removed " + if (declared_string_type == .wide) { + source = source[1..]; // remove L + column += 1; // for the removed L + } + return .{ + .source = source, + .code_page = bytes.code_page, + .declared_string_type = declared_string_type, + .column = column, + .diagnostics = options.diagnostics, + }; + } + + pub const ParsedCodepoint = struct { + codepoint: u21, + /// Note: If this is true, `codepoint` will be a value with a max of maxInt(u16). + /// This is enforced by using saturating arithmetic, so in e.g. a wide string literal the + /// octal escape sequence \7777777 (2,097,151) will be parsed into the value 0xFFFF (65,535). + /// If the value needs to be truncated to a smaller integer (for ASCII string literals), then that + /// must be done by the caller. + from_escaped_integer: bool = false, + }; + + pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { + const result = try self.nextUnchecked(); + if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) { + switch (result.?.codepoint) { + 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => { + const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00) + .rc_would_miscompile_codepoint_skip + else + .rc_would_miscompile_codepoint_byte_swap; + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = err, + .type = .warning, + .token = self.diagnostics.?.token, + .extra = .{ .number = result.?.codepoint }, + }); + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = err, + .type = .note, + .token = self.diagnostics.?.token, + .print_source_line = false, + .extra = .{ .number = result.?.codepoint }, + }); + }, + else => {}, + } + } + return result; + } + + pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { + if (self.num_pending_spaces > 0) { + // Ensure that we don't get into this predicament so we can ensure that + // the order of processing any pending stuff doesn't matter + std.debug.assert(self.pending_codepoint == null); + self.num_pending_spaces -= 1; + return .{ .codepoint = ' ' }; + } + if (self.pending_codepoint) |pending_codepoint| { + self.pending_codepoint = null; + return .{ .codepoint = pending_codepoint }; + } + if (self.index >= self.source.len) return null; + + var state: State = .normal; + var string_escape_n: u16 = 0; + var string_escape_i: u8 = 0; + const max_octal_escape_digits: u8 = switch (self.declared_string_type) { + .ascii => 3, + .wide => 7, + }; + const max_hex_escape_digits: u8 = switch (self.declared_string_type) { + .ascii => 2, + .wide => 4, + }; + + var backtrack: bool = undefined; + while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : ({ + if (!backtrack) self.index += codepoint.byte_len; + }) { + backtrack = false; + const c = codepoint.value; + defer { + if (!backtrack) { + if (c == '\t') { + self.column += columnsUntilTabStop(self.column, 8); + } else { + self.column += codepoint.byte_len; + } + } + } + switch (state) { + .normal => switch (c) { + '\\' => state = .escaped, + '"' => state = .quote, + '\r' => {}, + '\n' => state = .newline, + '\t' => { + // Only warn about a tab getting converted to spaces once per string + if (self.diagnostics != null and !self.seen_tab) { + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = .tab_converted_to_spaces, + .type = .warning, + .token = self.diagnostics.?.token, + }); + try self.diagnostics.?.diagnostics.append(ErrorDetails{ + .err = .tab_converted_to_spaces, + .type = .note, + .token = self.diagnostics.?.token, + .print_source_line = false, + }); + self.seen_tab = true; + } + const cols = columnsUntilTabStop(self.column, 8); + self.num_pending_spaces = @intCast(cols - 1); + self.index += codepoint.byte_len; + return .{ .codepoint = ' ' }; + }, + else => { + self.index += codepoint.byte_len; + return .{ .codepoint = c }; + }, + }, + .quote => switch (c) { + '"' => { + // "" => " + self.index += codepoint.byte_len; + return .{ .codepoint = '"' }; + }, + else => unreachable, // this is a bug in the lexer + }, + .newline => switch (c) { + '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {}, + else => { + // we intentionally avoid incrementing self.index + // to handle the current char in the next call, + // and we set backtrack so column count is handled correctly + backtrack = true; + + // <space><newline> + self.pending_codepoint = '\n'; + return .{ .codepoint = ' ' }; + }, + }, + .escaped => switch (c) { + '\r' => state = .escaped_cr, + '\n' => state = .escaped_newlines, + '0'...'7' => { + string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable; + string_escape_i = 1; + state = .escaped_octal; + }, + 'x', 'X' => { + string_escape_n = 0; + string_escape_i = 0; + state = .escaped_hex; + }, + else => { + switch (c) { + 'a', 'A' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\x08' }; + }, // might be a bug in RC, but matches its behavior + 'n' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\n' }; + }, + 'r' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\r' }; + }, + 't', 'T' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\t' }; + }, + '\\' => { + self.index += codepoint.byte_len; + return .{ .codepoint = '\\' }; + }, + '"' => { + // \" is a special case that doesn't get the \ included, + backtrack = true; + }, + else => switch (self.declared_string_type) { + .wide => {}, // invalid escape sequences are skipped in wide strings + .ascii => { + // we intentionally avoid incrementing self.index + // to handle the current char in the next call, + // and we set backtrack so column count is handled correctly + backtrack = true; + return .{ .codepoint = '\\' }; + }, + }, + } + state = .normal; + }, + }, + .escaped_cr => switch (c) { + '\r' => {}, + '\n' => state = .escaped_newlines, + else => { + // we intentionally avoid incrementing self.index + // to handle the current char in the next call, + // and we set backtrack so column count is handled correctly + backtrack = true; + return .{ .codepoint = '\\' }; + }, + }, + .escaped_newlines => switch (c) { + '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {}, + else => { + // backtrack so that we handle the current char properly + backtrack = true; + state = .normal; + }, + }, + .escaped_octal => switch (c) { + '0'...'7' => { + string_escape_n *%= 8; + string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable; + string_escape_i += 1; + if (string_escape_i == max_octal_escape_digits) { + self.index += codepoint.byte_len; + return .{ .codepoint = string_escape_n, .from_escaped_integer = true }; + } + }, + else => { + // we intentionally avoid incrementing self.index + // to handle the current char in the next call, + // and we set backtrack so column count is handled correctly + backtrack = true; + + // write out whatever byte we have parsed so far + return .{ .codepoint = string_escape_n, .from_escaped_integer = true }; + }, + }, + .escaped_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + string_escape_n *= 16; + string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable; + string_escape_i += 1; + if (string_escape_i == max_hex_escape_digits) { + self.index += codepoint.byte_len; + return .{ .codepoint = string_escape_n, .from_escaped_integer = true }; + } + }, + else => { + // we intentionally avoid incrementing self.index + // to handle the current char in the next call, + // and we set backtrack so column count is handled correctly + backtrack = true; + + // write out whatever byte we have parsed so far + // (even with 0 actual digits, \x alone parses to 0) + const escaped_value = string_escape_n; + return .{ .codepoint = escaped_value, .from_escaped_integer = true }; + }, + }, + } + } + + switch (state) { + .normal, .escaped_newlines => {}, + .newline => { + // <space><newline> + self.pending_codepoint = '\n'; + return .{ .codepoint = ' ' }; + }, + .escaped, .escaped_cr => return .{ .codepoint = '\\' }, + .escaped_octal, .escaped_hex => { + return .{ .codepoint = string_escape_n, .from_escaped_integer = true }; + }, + .quote => unreachable, // this is a bug in the lexer + } + + return null; + } +}; + +pub const StringParseOptions = struct { + start_column: usize = 0, + diagnostics: ?DiagnosticsContext = null, + output_code_page: CodePage = .windows1252, +}; + +pub fn parseQuotedString( + comptime literal_type: StringType, + allocator: std.mem.Allocator, + bytes: SourceBytes, + options: StringParseOptions, +) !(switch (literal_type) { + .ascii => []u8, + .wide => [:0]u16, +}) { + const T = if (literal_type == .ascii) u8 else u16; + std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars + + var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len); + errdefer buf.deinit(); + + var iterative_parser = IterativeStringParser.init(bytes, options); + + while (try iterative_parser.next()) |parsed| { + const c = parsed.codepoint; + if (parsed.from_escaped_integer) { + // We truncate here to get the correct behavior for ascii strings + try buf.append(std.mem.nativeToLittle(T, @truncate(c))); + } else { + switch (literal_type) { + .ascii => switch (options.output_code_page) { + .windows1252 => { + if (windows1252.bestFitFromCodepoint(c)) |best_fit| { + try buf.append(best_fit); + } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) { + try buf.append('?'); + } else { + try buf.appendSlice("??"); + } + }, + .utf8 => { + var codepoint_to_encode = c; + if (c == code_pages.Codepoint.invalid) { + codepoint_to_encode = '�'; + } + var utf8_buf: [4]u8 = undefined; + const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable; + try buf.appendSlice(utf8_buf[0..utf8_len]); + }, + else => unreachable, // Unsupported code page + }, + .wide => { + if (c == code_pages.Codepoint.invalid) { + try buf.append(std.mem.nativeToLittle(u16, '�')); + } else if (c < 0x10000) { + const short: u16 = @intCast(c); + try buf.append(std.mem.nativeToLittle(u16, short)); + } else { + const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; + try buf.append(std.mem.nativeToLittle(u16, high)); + const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; + try buf.append(std.mem.nativeToLittle(u16, low)); + } + }, + } + } + } + + if (literal_type == .wide) { + return buf.toOwnedSliceSentinel(0); + } else { + return buf.toOwnedSlice(); + } +} + +pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.ascii, allocator, bytes, options); +} + +pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { + std.debug.assert(bytes.slice.len >= 3); // L"" + return parseQuotedString(.wide, allocator, bytes, options); +} + +pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { + std.debug.assert(bytes.slice.len >= 2); // "" + return parseQuotedString(.wide, allocator, bytes, options); +} + +test "parse quoted ascii string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{ + .slice = + \\"hello" + , + .code_page = .windows1252, + }, .{})); + // hex with 0 digits + try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\x" + , + .code_page = .windows1252, + }, .{})); + // hex max of 2 digits + try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\XfFf" + , + .code_page = .windows1252, + }, .{})); + // octal with invalid octal digit + try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\19" + , + .code_page = .windows1252, + }, .{})); + // escaped quotes + try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{ + .slice = + \\" "" " + , + .code_page = .windows1252, + }, .{})); + // backslash right before escaped quotes + try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\""" + , + .code_page = .windows1252, + }, .{})); + // octal overflow + try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\401" + , + .code_page = .windows1252, + }, .{})); + // escapes + try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\a\n\r\t\\" + , + .code_page = .windows1252, + }, .{})); + // uppercase escapes + try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\A\N\R\T\\" + , + .code_page = .windows1252, + }, .{})); + // backslash on its own + try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\" + , + .code_page = .windows1252, + }, .{})); + // unrecognized escapes + try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{ + .slice = + \\"\b" + , + .code_page = .windows1252, + }, .{})); + // escaped carriage returns + try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 }, + .{}, + )); + // escaped newlines + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 }, + .{}, + )); + // escaped CRLF pairs + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 }, + .{}, + )); + // escaped newlines with other whitespace + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 }, + .{}, + )); + // literal tab characters get converted to spaces (dependent on source file columns) + try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"abc\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\t\"", .code_page = .windows1252 }, + .{}, + )); + // literal CR's get dropped + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 }, + .{}, + )); + // contiguous newlines and whitespace get collapsed to <space><newline> + try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 }, + .{}, + )); +} + +test "parse quoted ascii string with utf8 code page" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that don't have a Windows-1252 representation get converted to ? + try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString( + arena, + .{ .slice = "\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that have a best fit mapping get converted accordingly, + // these are box drawing codepoints + try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString( + arena, + .{ .slice = "\"┌─┐\"", .code_page = .utf8 }, + .{}, + )); + // Invalid UTF-8 gets converted to ? depending on well-formedness + try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{}, + )); + // Codepoints that would require a UTF-16 surrogate pair get converted to ?? + try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, + .{}, + )); + + // Output code page changes how invalid UTF-8 gets converted, since it + // now encodes the result as UTF-8 so it can write replacement characters. + try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{ .output_code_page = .utf8 }, + )); + try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, + .{ .output_code_page = .utf8 }, + )); + + // This used to cause integer overflow when reconsuming the 4-byte long codepoint + // after the escaped CRLF pair. + try std.testing.expectEqualSlices(u8, "\u{10348}", try parseQuotedAsciiString( + arena, + .{ .slice = "\"\\\r\n\u{10348}\"", .code_page = .utf8 }, + .{ .output_code_page = .utf8 }, + )); +} + +test "parse quoted wide string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("hello"), try parseQuotedWideString(arena, .{ + .slice = + \\L"hello" + , + .code_page = .windows1252, + }, .{})); + // hex with 0 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{ + .slice = + \\L"\x" + , + .code_page = .windows1252, + }, .{})); + // hex max of 4 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0xFFFF), std.mem.nativeToLittle(u16, 'f') }, try parseQuotedWideString(arena, .{ + .slice = + \\L"\XfFfFf" + , + .code_page = .windows1252, + }, .{})); + // octal max of 7 digits + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x9493), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '3') }, try parseQuotedWideString(arena, .{ + .slice = + \\L"\111222333" + , + .code_page = .windows1252, + }, .{})); + // octal overflow + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0xFF01)}, try parseQuotedWideString(arena, .{ + .slice = + \\L"\777401" + , + .code_page = .windows1252, + }, .{})); + // literal tab characters get converted to spaces (dependent on source file columns) + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString( + arena, + .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 }, + .{}, + )); + // Windows-1252 conversion + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString( + arena, + .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 }, + .{}, + )); + // Invalid escape sequences are skipped + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString( + arena, + .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, + .{}, + )); +} + +test "parse quoted wide string with utf8 code page" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString( + arena, + .{ .slice = "L\"\"", .code_page = .utf8 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString( + arena, + .{ .slice = "L\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Invalid UTF-8 gets converted to � depending on well-formedness + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString( + arena, + .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, + .{}, + )); +} + +test "parse quoted ascii string as wide string" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"кириллица\"", .code_page = .utf8 }, + .{}, + )); + // Whether or not invalid escapes are skipped is still determined by the L prefix + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"\\H\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString( + arena, + .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, + .{}, + )); + // Maximum escape sequence value is also determined by the L prefix + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x12), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '4') }, try parseQuotedStringAsWideString( + arena, + .{ .slice = "\"\\x1234\"", .code_page = .windows1252 }, + .{}, + )); + try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0x1234)}, try parseQuotedStringAsWideString( + arena, + .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 }, + .{}, + )); +} + +pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize { + // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4 + // 5 => 3, 6 => 2, 7 => 1, 8 => 8 + return tab_columns - (column % tab_columns); +} + +pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize { + return switch (c) { + '\t' => columnsUntilTabStop(cur_column, tab_columns), + else => 1, + }; +} + +pub const Number = struct { + value: u32, + is_long: bool = false, + + pub fn asWord(self: Number) u16 { + return @truncate(self.value); + } + + pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number { + const result = switch (operator_char) { + '-' => lhs.value -% rhs.value, + '+' => lhs.value +% rhs.value, + '|' => lhs.value | rhs.value, + '&' => lhs.value & rhs.value, + else => unreachable, // invalid operator, this would be a lexer/parser bug + }; + return .{ + .value = result, + .is_long = lhs.is_long or rhs.is_long, + }; + } +}; + +/// Assumes that number literals normally rejected by RC's preprocessor +/// are similarly rejected before being parsed. +/// +/// Relevant RC preprocessor errors: +/// RC2021: expected exponent value, not '<digit>' +/// example that is rejected: 1e1 +/// example that is accepted: 1ea +/// (this function will parse the two examples above the same) +pub fn parseNumberLiteral(bytes: SourceBytes) Number { + std.debug.assert(bytes.slice.len > 0); + var result = Number{ .value = 0, .is_long = false }; + var radix: u8 = 10; + var buf = bytes.slice; + + const Prefix = enum { none, minus, complement }; + var prefix: Prefix = .none; + switch (buf[0]) { + '-' => { + prefix = .minus; + buf = buf[1..]; + }, + '~' => { + prefix = .complement; + buf = buf[1..]; + }, + else => {}, + } + + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + 'o' => { // octal radix prefix is case-sensitive + radix = 8; + buf = buf[2..]; + }, + 'x', 'X' => { + radix = 16; + buf = buf[2..]; + }, + else => {}, + } + } + + var i: usize = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + if (c == 'L' or c == 'l') { + result.is_long = true; + break; + } + const digit = switch (c) { + // On invalid digit for the radix, just stop parsing but don't fail + 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break, + else => break, + }; + + if (result.value != 0) { + result.value *%= radix; + } + result.value +%= digit; + } + + switch (prefix) { + .none => {}, + .minus => result.value = 0 -% result.value, + .complement => result.value = ~result.value, + } + + return result; +} + +test "parse number literal" { + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 })); + + // can handle any length of number, wraps on overflow appropriately + const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 }); + try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow); + try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord()); + + try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); + + try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 })); + + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 })); + try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 })); + + // anything after L is ignored + try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 })); +} diff --git a/lib/compiler/resinator/main.zig b/lib/compiler/resinator/main.zig @@ -0,0 +1,719 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const removeComments = @import("comments.zig").removeComments; +const parseAndRemoveLineCommands = @import("source_mapping.zig").parseAndRemoveLineCommands; +const compile = @import("compile.zig").compile; +const Diagnostics = @import("errors.zig").Diagnostics; +const cli = @import("cli.zig"); +const preprocess = @import("preprocess.zig"); +const renderErrorMessage = @import("utils.zig").renderErrorMessage; +const aro = @import("aro"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer std.debug.assert(gpa.deinit() == .ok); + const allocator = gpa.allocator(); + + var arena_state = std.heap.ArenaAllocator.init(allocator); + defer arena_state.deinit(); + const arena = arena_state.allocator(); + + const stderr = std.io.getStdErr(); + const stderr_config = std.io.tty.detectConfig(stderr); + + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + + if (args.len < 2) { + try renderErrorMessage(stderr.writer(), stderr_config, .err, "expected zig lib dir as first argument", .{}); + std.os.exit(1); + } + const zig_lib_dir = args[1]; + var cli_args = args[2..]; + + var zig_integration = false; + if (cli_args.len > 0 and std.mem.eql(u8, cli_args[0], "--zig-integration")) { + zig_integration = true; + cli_args = args[3..]; + } + + var error_handler: ErrorHandler = switch (zig_integration) { + true => .{ + .server = .{ + .out = std.io.getStdOut(), + .in = undefined, // won't be receiving messages + .receive_fifo = undefined, // won't be receiving messages + }, + }, + false => .{ + .tty = stderr_config, + }, + }; + + if (zig_integration) { + // Send progress with a special string to indicate that the building of the + // resinator binary is finished and we've moved on to actually compiling the .rc file + try error_handler.server.serveStringMessage(.progress, "<resinator>"); + } + + var options = options: { + var cli_diagnostics = cli.Diagnostics.init(allocator); + defer cli_diagnostics.deinit(); + var options = cli.parse(allocator, cli_args, &cli_diagnostics) catch |err| switch (err) { + error.ParseError => { + try error_handler.emitCliDiagnostics(allocator, cli_args, &cli_diagnostics); + std.os.exit(1); + }, + else => |e| return e, + }; + try options.maybeAppendRC(std.fs.cwd()); + + if (!zig_integration) { + // print any warnings/notes + cli_diagnostics.renderToStdErr(args, stderr_config); + // If there was something printed, then add an extra newline separator + // so that there is a clear separation between the cli diagnostics and whatever + // gets printed after + if (cli_diagnostics.errors.items.len > 0) { + try stderr.writeAll("\n"); + } + } + break :options options; + }; + defer options.deinit(); + + if (options.print_help_and_exit) { + try cli.writeUsage(stderr.writer(), "zig rc"); + return; + } + + // Don't allow verbose when integrating with Zig via stdout + options.verbose = false; + + const stdout_writer = std.io.getStdOut().writer(); + if (options.verbose) { + try options.dumpVerbose(stdout_writer); + try stdout_writer.writeByte('\n'); + } + + var dependencies_list = std.ArrayList([]const u8).init(allocator); + defer { + for (dependencies_list.items) |item| { + allocator.free(item); + } + dependencies_list.deinit(); + } + const maybe_dependencies_list: ?*std.ArrayList([]const u8) = if (options.depfile_path != null) &dependencies_list else null; + + const include_paths = getIncludePaths(arena, options.auto_includes, zig_lib_dir) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => |e| { + switch (e) { + error.MsvcIncludesNotFound => { + try error_handler.emitMessage(allocator, .err, "MSVC include paths could not be automatically detected", .{}); + }, + error.MingwIncludesNotFound => { + try error_handler.emitMessage(allocator, .err, "MinGW include paths could not be automatically detected", .{}); + }, + } + try error_handler.emitMessage(allocator, .note, "to disable auto includes, use the option /:auto-includes none", .{}); + std.os.exit(1); + }, + }; + + const full_input = full_input: { + if (options.preprocess != .no) { + var preprocessed_buf = std.ArrayList(u8).init(allocator); + errdefer preprocessed_buf.deinit(); + + // We're going to throw away everything except the final preprocessed output anyway, + // so we can use a scoped arena for everything else. + var aro_arena_state = std.heap.ArenaAllocator.init(allocator); + defer aro_arena_state.deinit(); + const aro_arena = aro_arena_state.allocator(); + + var comp = aro.Compilation.init(aro_arena); + defer comp.deinit(); + + var argv = std.ArrayList([]const u8).init(comp.gpa); + defer argv.deinit(); + + try argv.append("arocc"); // dummy command name + try preprocess.appendAroArgs(aro_arena, &argv, options, include_paths); + try argv.append(options.input_filename); + + if (options.verbose) { + try stdout_writer.writeAll("Preprocessor: arocc (built-in)\n"); + for (argv.items[0 .. argv.items.len - 1]) |arg| { + try stdout_writer.print("{s} ", .{arg}); + } + try stdout_writer.print("{s}\n\n", .{argv.items[argv.items.len - 1]}); + } + + preprocess.preprocess(&comp, preprocessed_buf.writer(), argv.items, maybe_dependencies_list) catch |err| switch (err) { + error.GeneratedSourceError => { + try error_handler.emitAroDiagnostics(allocator, "failed during preprocessor setup (this is always a bug):", &comp); + std.os.exit(1); + }, + // ArgError can occur if e.g. the .rc file is not found + error.ArgError, error.PreprocessError => { + try error_handler.emitAroDiagnostics(allocator, "failed during preprocessing:", &comp); + std.os.exit(1); + }, + error.StreamTooLong => { + try error_handler.emitMessage(allocator, .err, "failed during preprocessing: maximum file size exceeded", .{}); + std.os.exit(1); + }, + error.OutOfMemory => |e| return e, + }; + + break :full_input try preprocessed_buf.toOwnedSlice(); + } else { + break :full_input std.fs.cwd().readFileAlloc(allocator, options.input_filename, std.math.maxInt(usize)) catch |err| { + try error_handler.emitMessage(allocator, .err, "unable to read input file path '{s}': {s}", .{ options.input_filename, @errorName(err) }); + std.os.exit(1); + }; + } + }; + defer allocator.free(full_input); + + if (options.preprocess == .only) { + try std.fs.cwd().writeFile(options.output_filename, full_input); + return; + } + + // Note: We still want to run this when no-preprocess is set because: + // 1. We want to print accurate line numbers after removing multiline comments + // 2. We want to be able to handle an already-preprocessed input with #line commands in it + var mapping_results = try parseAndRemoveLineCommands(allocator, full_input, full_input, .{ .initial_filename = options.input_filename }); + defer mapping_results.mappings.deinit(allocator); + + const final_input = removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings) catch |err| switch (err) { + error.InvalidSourceMappingCollapse => { + try error_handler.emitMessage(allocator, .err, "failed during comment removal; this is a known bug", .{}); + std.os.exit(1); + }, + else => |e| return e, + }; + + var output_file = std.fs.cwd().createFile(options.output_filename, .{}) catch |err| { + try error_handler.emitMessage(allocator, .err, "unable to create output file '{s}': {s}", .{ options.output_filename, @errorName(err) }); + std.os.exit(1); + }; + var output_file_closed = false; + defer if (!output_file_closed) output_file.close(); + + var diagnostics = Diagnostics.init(allocator); + defer diagnostics.deinit(); + + var output_buffered_stream = std.io.bufferedWriter(output_file.writer()); + + compile(allocator, final_input, output_buffered_stream.writer(), .{ + .cwd = std.fs.cwd(), + .diagnostics = &diagnostics, + .source_mappings = &mapping_results.mappings, + .dependencies_list = maybe_dependencies_list, + .ignore_include_env_var = options.ignore_include_env_var, + .extra_include_paths = options.extra_include_paths.items, + .system_include_paths = include_paths, + .default_language_id = options.default_language_id, + .default_code_page = options.default_code_page orelse .windows1252, + .verbose = options.verbose, + .null_terminate_string_table_strings = options.null_terminate_string_table_strings, + .max_string_literal_codepoints = options.max_string_literal_codepoints, + .silent_duplicate_control_ids = options.silent_duplicate_control_ids, + .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, + }) catch |err| switch (err) { + error.ParseError, error.CompileError => { + try error_handler.emitDiagnostics(allocator, std.fs.cwd(), final_input, &diagnostics, mapping_results.mappings); + // Delete the output file on error + output_file.close(); + output_file_closed = true; + // Failing to delete is not really a big deal, so swallow any errors + std.fs.cwd().deleteFile(options.output_filename) catch {}; + std.os.exit(1); + }, + else => |e| return e, + }; + + try output_buffered_stream.flush(); + + // print any warnings/notes + if (!zig_integration) { + diagnostics.renderToStdErr(std.fs.cwd(), final_input, stderr_config, mapping_results.mappings); + } + + // write the depfile + if (options.depfile_path) |depfile_path| { + var depfile = std.fs.cwd().createFile(depfile_path, .{}) catch |err| { + try error_handler.emitMessage(allocator, .err, "unable to create depfile '{s}': {s}", .{ depfile_path, @errorName(err) }); + std.os.exit(1); + }; + defer depfile.close(); + + const depfile_writer = depfile.writer(); + var depfile_buffered_writer = std.io.bufferedWriter(depfile_writer); + switch (options.depfile_fmt) { + .json => { + var write_stream = std.json.writeStream(depfile_buffered_writer.writer(), .{ .whitespace = .indent_2 }); + defer write_stream.deinit(); + + try write_stream.beginArray(); + for (dependencies_list.items) |dep_path| { + try write_stream.write(dep_path); + } + try write_stream.endArray(); + }, + } + try depfile_buffered_writer.flush(); + } +} + +fn getIncludePaths(arena: std.mem.Allocator, auto_includes_option: cli.Options.AutoIncludes, zig_lib_dir: []const u8) ![]const []const u8 { + var includes = auto_includes_option; + if (builtin.target.os.tag != .windows) { + switch (includes) { + // MSVC can't be found when the host isn't Windows, so short-circuit. + .msvc => return error.MsvcIncludesNotFound, + // Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts. + .any => includes = .gnu, + .none, .gnu => {}, + } + } + + while (true) { + switch (includes) { + .none => return &[_][]const u8{}, + .any, .msvc => { + // MSVC is only detectable on Windows targets. This unreachable is to signify + // that .any and .msvc should be dealt with on non-Windows targets before this point, + // since getting MSVC include paths uses Windows-only APIs. + if (builtin.target.os.tag != .windows) unreachable; + + const target_query: std.Target.Query = .{ + .os_tag = .windows, + .abi = .msvc, + }; + const target = std.zig.resolveTargetQueryOrFatal(target_query); + const is_native_abi = target_query.isNativeAbi(); + const detected_libc = std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null) catch { + if (includes == .any) { + // fall back to mingw + includes = .gnu; + continue; + } + return error.MsvcIncludesNotFound; + }; + if (detected_libc.libc_include_dir_list.len == 0) { + if (includes == .any) { + // fall back to mingw + includes = .gnu; + continue; + } + return error.MsvcIncludesNotFound; + } + return detected_libc.libc_include_dir_list; + }, + .gnu => { + const target_query: std.Target.Query = .{ + .os_tag = .windows, + .abi = .gnu, + }; + const target = std.zig.resolveTargetQueryOrFatal(target_query); + const is_native_abi = target_query.isNativeAbi(); + const detected_libc = std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return error.MingwIncludesNotFound, + }; + return detected_libc.libc_include_dir_list; + }, + } + } +} + +const ErrorBundle = std.zig.ErrorBundle; +const SourceMappings = @import("source_mapping.zig").SourceMappings; + +const ErrorHandler = union(enum) { + server: std.zig.Server, + tty: std.io.tty.Config, + + pub fn emitCliDiagnostics( + self: *ErrorHandler, + allocator: std.mem.Allocator, + args: []const []const u8, + diagnostics: *cli.Diagnostics, + ) !void { + switch (self.*) { + .server => |*server| { + var error_bundle = try cliDiagnosticsToErrorBundle(allocator, diagnostics); + defer error_bundle.deinit(allocator); + + try server.serveErrorBundle(error_bundle); + }, + .tty => { + diagnostics.renderToStdErr(args, self.tty); + }, + } + } + + pub fn emitAroDiagnostics( + self: *ErrorHandler, + allocator: std.mem.Allocator, + fail_msg: []const u8, + comp: *aro.Compilation, + ) !void { + switch (self.*) { + .server => |*server| { + var error_bundle = try aroDiagnosticsToErrorBundle(allocator, fail_msg, comp); + defer error_bundle.deinit(allocator); + + try server.serveErrorBundle(error_bundle); + }, + .tty => { + // extra newline to separate this line from the aro errors + try renderErrorMessage(std.io.getStdErr().writer(), self.tty, .err, "{s}\n", .{fail_msg}); + aro.Diagnostics.render(comp, self.tty); + }, + } + } + + pub fn emitDiagnostics( + self: *ErrorHandler, + allocator: std.mem.Allocator, + cwd: std.fs.Dir, + source: []const u8, + diagnostics: *Diagnostics, + mappings: SourceMappings, + ) !void { + switch (self.*) { + .server => |*server| { + var error_bundle = try diagnosticsToErrorBundle(allocator, source, diagnostics, mappings); + defer error_bundle.deinit(allocator); + + try server.serveErrorBundle(error_bundle); + }, + .tty => { + diagnostics.renderToStdErr(cwd, source, self.tty, mappings); + }, + } + } + + pub fn emitMessage( + self: *ErrorHandler, + allocator: std.mem.Allocator, + msg_type: @import("utils.zig").ErrorMessageType, + comptime format: []const u8, + args: anytype, + ) !void { + switch (self.*) { + .server => |*server| { + // only emit errors + if (msg_type != .err) return; + + var error_bundle = try errorStringToErrorBundle(allocator, format, args); + defer error_bundle.deinit(allocator); + + try server.serveErrorBundle(error_bundle); + }, + .tty => { + try renderErrorMessage(std.io.getStdErr().writer(), self.tty, msg_type, format, args); + }, + } + } +}; + +fn cliDiagnosticsToErrorBundle( + gpa: std.mem.Allocator, + diagnostics: *cli.Diagnostics, +) !ErrorBundle { + @setCold(true); + + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(gpa); + errdefer bundle.deinit(); + + try bundle.addRootErrorMessage(.{ + .msg = try bundle.addString("invalid command line option(s)"), + }); + + var cur_err: ?ErrorBundle.ErrorMessage = null; + var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; + defer cur_notes.deinit(gpa); + for (diagnostics.errors.items) |err_details| { + switch (err_details.type) { + .err => { + if (cur_err) |err| { + try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items); + } + cur_err = .{ + .msg = try bundle.addString(err_details.msg.items), + }; + cur_notes.clearRetainingCapacity(); + }, + .warning => cur_err = null, + .note => { + if (cur_err == null) continue; + cur_err.?.notes_len += 1; + try cur_notes.append(gpa, .{ + .msg = try bundle.addString(err_details.msg.items), + }); + }, + } + } + if (cur_err) |err| { + try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items); + } + + return try bundle.toOwnedBundle(""); +} + +fn diagnosticsToErrorBundle( + gpa: std.mem.Allocator, + source: []const u8, + diagnostics: *Diagnostics, + mappings: SourceMappings, +) !ErrorBundle { + @setCold(true); + + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(gpa); + errdefer bundle.deinit(); + + var msg_buf: std.ArrayListUnmanaged(u8) = .{}; + defer msg_buf.deinit(gpa); + var cur_err: ?ErrorBundle.ErrorMessage = null; + var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; + defer cur_notes.deinit(gpa); + for (diagnostics.errors.items) |err_details| { + switch (err_details.type) { + .hint => continue, + // Clear the current error so that notes don't bleed into unassociated errors + .warning => { + cur_err = null; + continue; + }, + .note => if (cur_err == null) continue, + .err => {}, + } + const corresponding_span = mappings.getCorrespondingSpan(err_details.token.line_number).?; + const err_line = corresponding_span.start_line; + const err_filename = mappings.files.get(corresponding_span.filename_offset); + + const source_line_start = err_details.token.getLineStartForErrorDisplay(source); + // Treat tab stops as 1 column wide for error display purposes, + // and add one to get a 1-based column + const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1; + + msg_buf.clearRetainingCapacity(); + try err_details.render(msg_buf.writer(gpa), source, diagnostics.strings.items); + + const src_loc = src_loc: { + var src_loc: ErrorBundle.SourceLocation = .{ + .src_path = try bundle.addString(err_filename), + .line = @intCast(err_line - 1), // 1-based -> 0-based + .column = @intCast(column - 1), // 1-based -> 0-based + .span_start = 0, + .span_main = 0, + .span_end = 0, + }; + if (err_details.print_source_line) { + const source_line = err_details.token.getLineForErrorDisplay(source, source_line_start); + const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); + src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len); + src_loc.span_main = @intCast(visual_info.point_offset); + src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len); + src_loc.source_line = try bundle.addString(source_line); + } + break :src_loc try bundle.addSourceLocation(src_loc); + }; + + switch (err_details.type) { + .err => { + if (cur_err) |err| { + try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items); + } + cur_err = .{ + .msg = try bundle.addString(msg_buf.items), + .src_loc = src_loc, + }; + cur_notes.clearRetainingCapacity(); + }, + .note => { + cur_err.?.notes_len += 1; + try cur_notes.append(gpa, .{ + .msg = try bundle.addString(msg_buf.items), + .src_loc = src_loc, + }); + }, + .warning, .hint => unreachable, + } + } + if (cur_err) |err| { + try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items); + } + + return try bundle.toOwnedBundle(""); +} + +fn flushErrorMessageIntoBundle(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void { + try wip.addRootErrorMessage(msg); + const notes_start = try wip.reserveNotes(@intCast(notes.len)); + for (notes_start.., notes) |i, note| { + wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note)); + } +} + +fn errorStringToErrorBundle(allocator: std.mem.Allocator, comptime format: []const u8, args: anytype) !ErrorBundle { + @setCold(true); + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(allocator); + errdefer bundle.deinit(); + try bundle.addRootErrorMessage(.{ + .msg = try bundle.printString(format, args), + }); + return try bundle.toOwnedBundle(""); +} + +fn aroDiagnosticsToErrorBundle( + gpa: std.mem.Allocator, + fail_msg: []const u8, + comp: *aro.Compilation, +) !ErrorBundle { + @setCold(true); + + var bundle: ErrorBundle.Wip = undefined; + try bundle.init(gpa); + errdefer bundle.deinit(); + + try bundle.addRootErrorMessage(.{ + .msg = try bundle.addString(fail_msg), + }); + + var msg_writer = MsgWriter.init(gpa); + defer msg_writer.deinit(); + var cur_err: ?ErrorBundle.ErrorMessage = null; + var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; + defer cur_notes.deinit(gpa); + for (comp.diagnostics.list.items) |msg| { + switch (msg.kind) { + // Clear the current error so that notes don't bleed into unassociated errors + .off, .warning => { + cur_err = null; + continue; + }, + .note => if (cur_err == null) continue, + .@"fatal error", .@"error" => {}, + .default => unreachable, + } + msg_writer.resetRetainingCapacity(); + aro.Diagnostics.renderMessage(comp, &msg_writer, msg); + + const src_loc = src_loc: { + if (msg_writer.path) |src_path| { + var src_loc: ErrorBundle.SourceLocation = .{ + .src_path = try bundle.addString(src_path), + .line = msg_writer.line - 1, // 1-based -> 0-based + .column = msg_writer.col - 1, // 1-based -> 0-based + .span_start = 0, + .span_main = 0, + .span_end = 0, + }; + if (msg_writer.source_line) |source_line| { + src_loc.span_start = msg_writer.span_main; + src_loc.span_main = msg_writer.span_main; + src_loc.span_end = msg_writer.span_main; + src_loc.source_line = try bundle.addString(source_line); + } + break :src_loc try bundle.addSourceLocation(src_loc); + } + break :src_loc ErrorBundle.SourceLocationIndex.none; + }; + + switch (msg.kind) { + .@"fatal error", .@"error" => { + if (cur_err) |err| { + try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items); + } + cur_err = .{ + .msg = try bundle.addString(msg_writer.buf.items), + .src_loc = src_loc, + }; + cur_notes.clearRetainingCapacity(); + }, + .note => { + cur_err.?.notes_len += 1; + try cur_notes.append(gpa, .{ + .msg = try bundle.addString(msg_writer.buf.items), + .src_loc = src_loc, + }); + }, + .off, .warning, .default => unreachable, + } + } + if (cur_err) |err| { + try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items); + } + + return try bundle.toOwnedBundle(""); +} + +// Similar to aro.Diagnostics.MsgWriter but: +// - Writers to an ArrayList +// - Only prints the message itself (no location, source line, error: prefix, etc) +// - Keeps track of source path/line/col instead +const MsgWriter = struct { + buf: std.ArrayList(u8), + path: ?[]const u8 = null, + // 1-indexed + line: u32 = undefined, + col: u32 = undefined, + source_line: ?[]const u8 = null, + span_main: u32 = undefined, + + fn init(allocator: std.mem.Allocator) MsgWriter { + return .{ + .buf = std.ArrayList(u8).init(allocator), + }; + } + + fn deinit(m: *MsgWriter) void { + m.buf.deinit(); + } + + fn resetRetainingCapacity(m: *MsgWriter) void { + m.buf.clearRetainingCapacity(); + m.path = null; + m.source_line = null; + } + + pub fn print(m: *MsgWriter, comptime fmt: []const u8, args: anytype) void { + m.buf.writer().print(fmt, args) catch {}; + } + + pub fn write(m: *MsgWriter, msg: []const u8) void { + m.buf.writer().writeAll(msg) catch {}; + } + + pub fn setColor(m: *MsgWriter, color: std.io.tty.Color) void { + _ = m; + _ = color; + } + + pub fn location(m: *MsgWriter, path: []const u8, line: u32, col: u32) void { + m.path = path; + m.line = line; + m.col = col; + } + + pub fn start(m: *MsgWriter, kind: aro.Diagnostics.Kind) void { + _ = m; + _ = kind; + } + + pub fn end(m: *MsgWriter, maybe_line: ?[]const u8, col: u32, end_with_splice: bool) void { + _ = end_with_splice; + m.source_line = maybe_line; + m.span_main = col; + } +}; diff --git a/lib/compiler/resinator/parse.zig b/lib/compiler/resinator/parse.zig @@ -0,0 +1,1897 @@ +const std = @import("std"); +const Lexer = @import("lex.zig").Lexer; +const Token = @import("lex.zig").Token; +const Node = @import("ast.zig").Node; +const Tree = @import("ast.zig").Tree; +const CodePageLookup = @import("ast.zig").CodePageLookup; +const Resource = @import("rc.zig").Resource; +const Allocator = std.mem.Allocator; +const ErrorDetails = @import("errors.zig").ErrorDetails; +const Diagnostics = @import("errors.zig").Diagnostics; +const SourceBytes = @import("literals.zig").SourceBytes; +const Compiler = @import("compile.zig").Compiler; +const rc = @import("rc.zig"); +const res = @import("res.zig"); + +// TODO: Make these configurable? +pub const max_nested_menu_level: u32 = 512; +pub const max_nested_version_level: u32 = 512; +pub const max_nested_expression_level: u32 = 200; + +pub const Parser = struct { + const Self = @This(); + + lexer: *Lexer, + /// values that need to be initialized per-parse + state: Parser.State = undefined, + options: Parser.Options, + + pub const Error = error{ParseError} || Allocator.Error; + + pub const Options = struct { + warn_instead_of_error_on_invalid_code_page: bool = false, + }; + + pub fn init(lexer: *Lexer, options: Options) Parser { + return Parser{ + .lexer = lexer, + .options = options, + }; + } + + pub const State = struct { + token: Token, + lookahead_lexer: Lexer, + allocator: Allocator, + arena: Allocator, + diagnostics: *Diagnostics, + input_code_page_lookup: CodePageLookup, + output_code_page_lookup: CodePageLookup, + }; + + pub fn parse(self: *Self, allocator: Allocator, diagnostics: *Diagnostics) Error!*Tree { + var arena = std.heap.ArenaAllocator.init(allocator); + errdefer arena.deinit(); + + self.state = Parser.State{ + .token = undefined, + .lookahead_lexer = undefined, + .allocator = allocator, + .arena = arena.allocator(), + .diagnostics = diagnostics, + .input_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), + .output_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), + }; + + const parsed_root = try self.parseRoot(); + + const tree = try self.state.arena.create(Tree); + tree.* = .{ + .node = parsed_root, + .input_code_pages = self.state.input_code_page_lookup, + .output_code_pages = self.state.output_code_page_lookup, + .source = self.lexer.buffer, + .arena = arena.state, + .allocator = allocator, + }; + return tree; + } + + fn parseRoot(self: *Self) Error!*Node { + var statements = std.ArrayList(*Node).init(self.state.allocator); + defer statements.deinit(); + + try self.parseStatements(&statements); + try self.check(.eof); + + const node = try self.state.arena.create(Node.Root); + node.* = .{ + .body = try self.state.arena.dupe(*Node, statements.items), + }; + return &node.base; + } + + fn parseStatements(self: *Self, statements: *std.ArrayList(*Node)) Error!void { + while (true) { + try self.nextToken(.whitespace_delimiter_only); + if (self.state.token.id == .eof) break; + // The Win32 compiler will sometimes try to recover from errors + // and then restart parsing afterwards. We don't ever do this + // because it almost always leads to unhelpful error messages + // (usually it will end up with bogus things like 'file + // not found: {') + const statement = try self.parseStatement(); + try statements.append(statement); + } + } + + /// Expects the current token to be the token before possible common resource attributes. + /// After return, the current token will be the token immediately before the end of the + /// common resource attributes (if any). If there are no common resource attributes, the + /// current token is unchanged. + /// The returned slice is allocated by the parser's arena + fn parseCommonResourceAttributes(self: *Self) ![]Token { + var common_resource_attributes = std.ArrayListUnmanaged(Token){}; + while (true) { + const maybe_common_resource_attribute = try self.lookaheadToken(.normal); + if (maybe_common_resource_attribute.id == .literal and rc.CommonResourceAttributes.map.has(maybe_common_resource_attribute.slice(self.lexer.buffer))) { + try common_resource_attributes.append(self.state.arena, maybe_common_resource_attribute); + self.nextToken(.normal) catch unreachable; + } else { + break; + } + } + return common_resource_attributes.toOwnedSlice(self.state.arena); + } + + /// Expects the current token to have already been dealt with, and that the + /// optional statements will potentially start on the next token. + /// After return, the current token will be the token immediately before the end of the + /// optional statements (if any). If there are no optional statements, the + /// current token is unchanged. + /// The returned slice is allocated by the parser's arena + fn parseOptionalStatements(self: *Self, resource: Resource) ![]*Node { + var optional_statements = std.ArrayListUnmanaged(*Node){}; + while (true) { + const lookahead_token = try self.lookaheadToken(.normal); + if (lookahead_token.id != .literal) break; + const slice = lookahead_token.slice(self.lexer.buffer); + const optional_statement_type = rc.OptionalStatements.map.get(slice) orelse switch (resource) { + .dialog, .dialogex => rc.OptionalStatements.dialog_map.get(slice) orelse break, + else => break, + }; + self.nextToken(.normal) catch unreachable; + switch (optional_statement_type) { + .language => { + const language = try self.parseLanguageStatement(); + try optional_statements.append(self.state.arena, language); + }, + // Number only + .version, .characteristics, .style, .exstyle => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ + .can_contain_not_expressions = optional_statement_type == .style or optional_statement_type == .exstyle, + .allowed_types = .{ .number = true }, + }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // String only + .caption => { + const identifier = self.state.token; + try self.nextToken(.normal); + const value = self.state.token; + if (!value.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = value, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + const value_node = try self.state.arena.create(Node.Literal); + value_node.* = .{ + .token = value, + }; + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = &value_node.base, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // String or number + .class => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + // Special case + .menu => { + const identifier = self.state.token; + try self.nextToken(.whitespace_delimiter_only); + try self.check(.literal); + const value_node = try self.state.arena.create(Node.Literal); + value_node.* = .{ + .token = self.state.token, + }; + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = &value_node.base, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + .font => { + const identifier = self.state.token; + const point_size = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + // The comma between point_size and typeface is both optional and + // there can be any number of them + try self.skipAnyCommas(); + + try self.nextToken(.normal); + const typeface = self.state.token; + if (!typeface.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = typeface, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + + const ExSpecificValues = struct { + weight: ?*Node = null, + italic: ?*Node = null, + char_set: ?*Node = null, + }; + var ex_specific = ExSpecificValues{}; + ex_specific: { + var optional_param_parser = OptionalParamParser{ .parser = self }; + switch (resource) { + .dialogex => { + { + ex_specific.weight = try optional_param_parser.parse(.{}); + if (optional_param_parser.finished) break :ex_specific; + } + { + if (!(try self.parseOptionalToken(.comma))) break :ex_specific; + ex_specific.italic = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + } + { + ex_specific.char_set = try optional_param_parser.parse(.{}); + if (optional_param_parser.finished) break :ex_specific; + } + }, + .dialog => {}, + else => unreachable, // only DIALOG and DIALOGEX have FONT optional-statements + } + } + + const node = try self.state.arena.create(Node.FontStatement); + node.* = .{ + .identifier = identifier, + .point_size = point_size, + .typeface = typeface, + .weight = ex_specific.weight, + .italic = ex_specific.italic, + .char_set = ex_specific.char_set, + }; + try optional_statements.append(self.state.arena, &node.base); + }, + } + } + return optional_statements.toOwnedSlice(self.state.arena); + } + + /// Expects the current token to be the first token of the statement. + fn parseStatement(self: *Self) Error!*Node { + const first_token = self.state.token; + std.debug.assert(first_token.id == .literal); + + if (rc.TopLevelKeywords.map.get(first_token.slice(self.lexer.buffer))) |keyword| switch (keyword) { + .language => { + const language_statement = try self.parseLanguageStatement(); + return language_statement; + }, + .version, .characteristics => { + const identifier = self.state.token; + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = identifier, + .value = value, + }; + return &node.base; + }, + .stringtable => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + const optional_statements = try self.parseOptionalStatements(.stringtable); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var strings = std.ArrayList(*Node).init(self.state.allocator); + defer strings.deinit(); + while (true) { + const maybe_end_token = try self.lookaheadToken(.normal); + switch (maybe_end_token.id) { + .end => { + self.nextToken(.normal) catch unreachable; + break; + }, + .eof => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .unfinished_string_table_block, + .token = maybe_end_token, + }); + }, + else => {}, + } + const id_expression = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const comma_token: ?Token = if (try self.parseOptionalToken(.comma)) self.state.token else null; + + try self.nextToken(.normal); + if (self.state.token.id != .quoted_ascii_string and self.state.token.id != .quoted_wide_string) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ .string_literal = true } }, + }); + } + + const string_node = try self.state.arena.create(Node.StringTableString); + string_node.* = .{ + .id = id_expression, + .maybe_comma = comma_token, + .string = self.state.token, + }; + try strings.append(&string_node.base); + } + + if (strings.items.len == 0) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, // TODO: probably a more specific error message + .token = self.state.token, + .extra = .{ .expected = .number }, + }); + } + + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.StringTable); + node.* = .{ + .type = first_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .begin_token = begin_token, + .strings = try self.state.arena.dupe(*Node, strings.items), + .end_token = end_token, + }; + return &node.base; + }, + }; + + // The Win32 RC compiler allows for a 'dangling' literal at the end of a file + // (as long as it's not a valid top-level keyword), and there is actually an + // .rc file with a such a dangling literal in the Windows-classic-samples set + // of projects. So, we have special compatibility for this particular case. + const maybe_eof = try self.lookaheadToken(.whitespace_delimiter_only); + if (maybe_eof.id == .eof) { + // TODO: emit warning + var context = try self.state.arena.alloc(Token, 2); + context[0] = first_token; + context[1] = maybe_eof; + const invalid_node = try self.state.arena.create(Node.Invalid); + invalid_node.* = .{ + .context = context, + }; + return &invalid_node.base; + } + + const id_token = first_token; + const id_code_page = self.lexer.current_code_page; + try self.nextToken(.whitespace_delimiter_only); + const resource = try self.checkResource(); + const type_token = self.state.token; + + if (resource == .string_num) { + try self.addErrorDetails(.{ + .err = .string_resource_as_numeric_type, + .token = type_token, + }); + return self.addErrorDetailsAndFail(.{ + .err = .string_resource_as_numeric_type, + .token = type_token, + .type = .note, + .print_source_line = false, + }); + } + + if (resource == .font) { + const id_bytes = SourceBytes{ + .slice = id_token.slice(self.lexer.buffer), + .code_page = id_code_page, + }; + const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(id_bytes); + if (maybe_ordinal == null) { + const would_be_win32_rc_ordinal = res.NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes); + if (would_be_win32_rc_ordinal) |win32_rc_ordinal| { + try self.addErrorDetails(ErrorDetails{ + .err = .id_must_be_ordinal, + .token = id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .win32_non_ascii_ordinal, + .token = id_token, + .type = .note, + .print_source_line = false, + .extra = .{ .number = win32_rc_ordinal.ordinal }, + }); + } else { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .id_must_be_ordinal, + .token = id_token, + .extra = .{ .resource = resource }, + }); + } + } + } + + switch (resource) { + .accelerators => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + const optional_statements = try self.parseOptionalStatements(resource); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var accelerators = std.ArrayListUnmanaged(*Node){}; + + while (true) { + const lookahead = try self.lookaheadToken(.normal); + switch (lookahead.id) { + .end, .eof => { + self.nextToken(.normal) catch unreachable; + break; + }, + else => {}, + } + const event = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const idvalue = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var type_and_options = std.ArrayListUnmanaged(Token){}; + while (true) { + if (!(try self.parseOptionalToken(.comma))) break; + + try self.nextToken(.normal); + if (!rc.AcceleratorTypeAndOptions.map.has(self.tokenSlice())) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .accelerator_type_or_option = true, + } }, + }); + } + try type_and_options.append(self.state.arena, self.state.token); + } + + const node = try self.state.arena.create(Node.Accelerator); + node.* = .{ + .event = event, + .idvalue = idvalue, + .type_and_options = try type_and_options.toOwnedSlice(self.state.arena), + }; + try accelerators.append(self.state.arena, &node.base); + } + + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Accelerators); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .begin_token = begin_token, + .accelerators = try accelerators.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .dialog, .dialogex => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + + const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var optional_param_parser = OptionalParamParser{ .parser = self }; + const help_id: ?*Node = try optional_param_parser.parse(.{}); + + const optional_statements = try self.parseOptionalStatements(resource); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var controls = std.ArrayListUnmanaged(*Node){}; + defer controls.deinit(self.state.allocator); + while (try self.parseControlStatement(resource)) |control_node| { + // The number of controls must fit in a u16 in order for it to + // be able to be written into the relevant field in the .res data. + if (controls.items.len >= std.math.maxInt(u16)) { + try self.addErrorDetails(.{ + .err = .too_many_dialog_controls_or_toolbar_buttons, + .token = id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .too_many_dialog_controls_or_toolbar_buttons, + .type = .note, + .token = control_node.getFirstToken(), + .token_span_end = control_node.getLastToken(), + .extra = .{ .resource = resource }, + }); + } + + try controls.append(self.state.allocator, control_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Dialog); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .x = x, + .y = y, + .width = width, + .height = height, + .help_id = help_id, + .optional_statements = optional_statements, + .begin_token = begin_token, + .controls = try self.state.arena.dupe(*Node, controls.items), + .end_token = end_token, + }; + return &node.base; + }, + .toolbar => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const button_width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const button_height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var buttons = std.ArrayListUnmanaged(*Node){}; + defer buttons.deinit(self.state.allocator); + while (try self.parseToolbarButtonStatement()) |button_node| { + // The number of buttons must fit in a u16 in order for it to + // be able to be written into the relevant field in the .res data. + if (buttons.items.len >= std.math.maxInt(u16)) { + try self.addErrorDetails(.{ + .err = .too_many_dialog_controls_or_toolbar_buttons, + .token = id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .too_many_dialog_controls_or_toolbar_buttons, + .type = .note, + .token = button_node.getFirstToken(), + .token_span_end = button_node.getLastToken(), + .extra = .{ .resource = resource }, + }); + } + + try buttons.append(self.state.allocator, button_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Toolbar); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .button_width = button_width, + .button_height = button_height, + .begin_token = begin_token, + .buttons = try self.state.arena.dupe(*Node, buttons.items), + .end_token = end_token, + }; + return &node.base; + }, + .menu, .menuex => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + // help id is optional but must come between common resource attributes and optional-statements + var help_id: ?*Node = null; + // Note: No comma is allowed before or after help_id of MENUEX and help_id is not + // a possible field of MENU. + if (resource == .menuex and try self.lookaheadCouldBeNumberExpression(.not_disallowed)) { + help_id = try self.parseExpression(.{ + .is_known_to_be_number_expression = true, + }); + } + const optional_statements = try self.parseOptionalStatements(.stringtable); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + defer items.deinit(self.state.allocator); + while (try self.parseMenuItemStatement(resource, id_token, 1)) |item_node| { + try items.append(self.state.allocator, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = type_token, + }); + } + + const node = try self.state.arena.create(Node.Menu); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .optional_statements = optional_statements, + .help_id = help_id, + .begin_token = begin_token, + .items = try self.state.arena.dupe(*Node, items.items), + .end_token = end_token, + }; + return &node.base; + }, + .versioninfo => { + // common resource attributes must all be contiguous and come before optional-statements + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + var fixed_info = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionStatement()) |version_statement| { + try fixed_info.append(self.state.arena, version_statement); + } + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var block_statements = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionBlockOrValue(id_token, 1)) |block_node| { + try block_statements.append(self.state.arena, block_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.VersionInfo); + node.* = .{ + .id = id_token, + .versioninfo = type_token, + .common_resource_attributes = common_resource_attributes, + .fixed_info = try fixed_info.toOwnedSlice(self.state.arena), + .begin_token = begin_token, + .block_statements = try block_statements.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .dlginclude => { + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const filename_expression = try self.parseExpression(.{ + .allowed_types = .{ .string = true }, + }); + + const node = try self.state.arena.create(Node.ResourceExternal); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .filename = filename_expression, + }; + return &node.base; + }, + .stringtable => { + return self.addErrorDetailsAndFail(.{ + .err = .name_or_id_not_allowed, + .token = id_token, + .extra = .{ .resource = resource }, + }); + }, + // Just try everything as a 'generic' resource (raw data or external file) + // TODO: More fine-grained switch cases as necessary + else => { + const common_resource_attributes = try self.parseCommonResourceAttributes(); + + const maybe_begin = try self.lookaheadToken(.normal); + if (maybe_begin.id == .begin) { + self.nextToken(.normal) catch unreachable; + + if (!resource.canUseRawData()) { + try self.addErrorDetails(ErrorDetails{ + .err = .resource_type_cant_use_raw_data, + .token = maybe_begin, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .resource_type_cant_use_raw_data, + .type = .note, + .print_source_line = false, + .token = maybe_begin, + }); + } + + const raw_data = try self.parseRawDataBlock(); + const end_token = self.state.token; + + const node = try self.state.arena.create(Node.ResourceRawData); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .begin_token = maybe_begin, + .raw_data = raw_data, + .end_token = end_token, + }; + return &node.base; + } + + const filename_expression = try self.parseExpression(.{ + // Don't tell the user that numbers are accepted since we error on + // number expressions and regular number literals are treated as unquoted + // literals rather than numbers, so from the users perspective + // numbers aren't really allowed. + .expected_types_override = .{ + .literal = true, + .string_literal = true, + }, + }); + + const node = try self.state.arena.create(Node.ResourceExternal); + node.* = .{ + .id = id_token, + .type = type_token, + .common_resource_attributes = common_resource_attributes, + .filename = filename_expression, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be a begin token. + /// After return, the current token will be the end token. + fn parseRawDataBlock(self: *Self) Error![]*Node { + var raw_data = std.ArrayList(*Node).init(self.state.allocator); + defer raw_data.deinit(); + while (true) { + const maybe_end_token = try self.lookaheadToken(.normal); + switch (maybe_end_token.id) { + .comma => { + // comma as the first token in a raw data block is an error + if (raw_data.items.len == 0) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = maybe_end_token, + .extra = .{ .expected_types = .{ + .number = true, + .number_expression = true, + .string_literal = true, + } }, + }); + } + // otherwise just skip over commas + self.nextToken(.normal) catch unreachable; + continue; + }, + .end => { + self.nextToken(.normal) catch unreachable; + break; + }, + .eof => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .unfinished_raw_data_block, + .token = maybe_end_token, + }); + }, + else => {}, + } + const expression = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); + try raw_data.append(expression); + + if (expression.isNumberExpression()) { + const maybe_close_paren = try self.lookaheadToken(.normal); + if (maybe_close_paren.id == .close_paren) { + // <number expression>) is an error + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = maybe_close_paren, + .extra = .{ .expected = .operator }, + }); + } + } + } + return try self.state.arena.dupe(*Node, raw_data.items); + } + + /// Expects the current token to be handled, and that the control statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// control statement (or unchanged if the function returns null). + fn parseControlStatement(self: *Self, resource: Resource) Error!?*Node { + const control_token = try self.lookaheadToken(.normal); + const control = rc.Control.map.get(control_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + try self.skipAnyCommas(); + + var text: ?Token = null; + if (control.hasTextParam()) { + try self.nextToken(.normal); + switch (self.state.token.id) { + .quoted_ascii_string, .quoted_wide_string, .number => { + text = self.state.token; + }, + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .number = true, + .string_literal = true, + } }, + }); + }, + } + try self.skipAnyCommas(); + } + + const id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.skipAnyCommas(); + + var class: ?*Node = null; + var style: ?*Node = null; + if (control == .control) { + class = try self.parseExpression(.{}); + if (class.?.id == .literal) { + const class_literal = @fieldParentPtr(Node.Literal, "base", class.?); + const is_invalid_control_class = class_literal.token.id == .literal and !rc.ControlClass.map.has(class_literal.token.slice(self.lexer.buffer)); + if (is_invalid_control_class) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = self.state.token, + .extra = .{ .expected_types = .{ + .control_class = true, + } }, + }); + } + } + try self.skipAnyCommas(); + style = try self.parseExpression(.{ + .can_contain_not_expressions = true, + .allowed_types = .{ .number = true }, + }); + // If there is no comma after the style paramter, the Win32 RC compiler + // could misinterpret the statement and end up skipping over at least one token + // that should have been interepeted as the next parameter (x). For example: + // CONTROL "text", 1, BUTTON, 15 30, 1, 2, 3, 4 + // the `15` is the style parameter, but in the Win32 implementation the `30` + // is completely ignored (i.e. the `1, 2, 3, 4` are `x`, `y`, `w`, `h`). + // If a comma is added after the `15`, then `30` gets interpreted (correctly) + // as the `x` value. + // + // Instead of emulating this behavior, we just warn about the potential for + // weird behavior in the Win32 implementation whenever there isn't a comma after + // the style parameter. + const lookahead_token = try self.lookaheadToken(.normal); + if (lookahead_token.id != .comma and lookahead_token.id != .eof) { + try self.addErrorDetails(.{ + .err = .rc_could_miscompile_control_params, + .type = .warning, + .token = lookahead_token, + }); + try self.addErrorDetails(.{ + .err = .rc_could_miscompile_control_params, + .type = .note, + .token = style.?.getFirstToken(), + .token_span_end = style.?.getLastToken(), + }); + } + try self.skipAnyCommas(); + } + + const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + _ = try self.parseOptionalToken(.comma); + const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + var optional_param_parser = OptionalParamParser{ .parser = self }; + if (control != .control) { + style = try optional_param_parser.parse(.{ .not_expression_allowed = true }); + } + + const exstyle: ?*Node = try optional_param_parser.parse(.{ .not_expression_allowed = true }); + const help_id: ?*Node = switch (resource) { + .dialogex => try optional_param_parser.parse(.{}), + else => null, + }; + + var extra_data: []*Node = &[_]*Node{}; + var extra_data_begin: ?Token = null; + var extra_data_end: ?Token = null; + // extra data is DIALOGEX-only + if (resource == .dialogex and try self.parseOptionalToken(.begin)) { + extra_data_begin = self.state.token; + extra_data = try self.parseRawDataBlock(); + extra_data_end = self.state.token; + } + + const node = try self.state.arena.create(Node.ControlStatement); + node.* = .{ + .type = control_token, + .text = text, + .class = class, + .id = id, + .x = x, + .y = y, + .width = width, + .height = height, + .style = style, + .exstyle = exstyle, + .help_id = help_id, + .extra_data_begin = extra_data_begin, + .extra_data = extra_data, + .extra_data_end = extra_data_end, + }; + return &node.base; + } + + fn parseToolbarButtonStatement(self: *Self) Error!?*Node { + const keyword_token = try self.lookaheadToken(.normal); + const button_type = rc.ToolbarButton.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + switch (button_type) { + .separator => { + const node = try self.state.arena.create(Node.Literal); + node.* = .{ + .token = keyword_token, + }; + return &node.base; + }, + .button => { + const button_id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = keyword_token, + .value = button_id, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be handled, and that the menuitem/popup statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// menuitem statement (or unchanged if the function returns null). + fn parseMenuItemStatement(self: *Self, resource: Resource, top_level_menu_id_token: Token, nesting_level: u32) Error!?*Node { + const menuitem_token = try self.lookaheadToken(.normal); + const menuitem = rc.MenuItem.map.get(menuitem_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + if (nesting_level > max_nested_menu_level) { + try self.addErrorDetails(.{ + .err = .nested_resource_level_exceeds_max, + .token = top_level_menu_id_token, + .extra = .{ .resource = resource }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_resource_level_exceeds_max, + .type = .note, + .token = menuitem_token, + .extra = .{ .resource = resource }, + }); + } + + switch (resource) { + .menu => switch (menuitem) { + .menuitem => { + try self.nextToken(.normal); + if (rc.MenuItem.isSeparator(self.state.token.slice(self.lexer.buffer))) { + const separator_token = self.state.token; + // There can be any number of trailing commas after SEPARATOR + try self.skipAnyCommas(); + const node = try self.state.arena.create(Node.MenuItemSeparator); + node.* = .{ + .menuitem = menuitem_token, + .separator = separator_token, + }; + return &node.base; + } else { + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + try self.skipAnyCommas(); + + const result = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + _ = try self.parseOptionalToken(.comma); + + var options = std.ArrayListUnmanaged(Token){}; + while (true) { + const option_token = try self.lookaheadToken(.normal); + if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { + break; + } + self.nextToken(.normal) catch unreachable; + try options.append(self.state.arena, option_token); + try self.skipAnyCommas(); + } + + const node = try self.state.arena.create(Node.MenuItem); + node.* = .{ + .menuitem = menuitem_token, + .text = text, + .result = result, + .option_list = try options.toOwnedSlice(self.state.arena), + }; + return &node.base; + } + }, + .popup => { + try self.nextToken(.normal); + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + try self.skipAnyCommas(); + + var options = std.ArrayListUnmanaged(Token){}; + while (true) { + const option_token = try self.lookaheadToken(.normal); + if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { + break; + } + self.nextToken(.normal) catch unreachable; + try options.append(self.state.arena, option_token); + try self.skipAnyCommas(); + } + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { + try items.append(self.state.arena, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = menuitem_token, + }); + } + + const node = try self.state.arena.create(Node.Popup); + node.* = .{ + .popup = menuitem_token, + .text = text, + .option_list = try options.toOwnedSlice(self.state.arena), + .begin_token = begin_token, + .items = try items.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + }, + .menuex => { + try self.nextToken(.normal); + const text = self.state.token; + if (!text.isStringLiteral()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = text, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + + var param_parser = OptionalParamParser{ .parser = self }; + const id = try param_parser.parse(.{}); + const item_type = try param_parser.parse(.{}); + const state = try param_parser.parse(.{}); + + if (menuitem == .menuitem) { + // trailing comma is allowed, skip it + _ = try self.parseOptionalToken(.comma); + + const node = try self.state.arena.create(Node.MenuItemEx); + node.* = .{ + .menuitem = menuitem_token, + .text = text, + .id = id, + .type = item_type, + .state = state, + }; + return &node.base; + } + + const help_id = try param_parser.parse(.{}); + + // trailing comma is allowed, skip it + _ = try self.parseOptionalToken(.comma); + + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var items = std.ArrayListUnmanaged(*Node){}; + while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { + try items.append(self.state.arena, item_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + if (items.items.len == 0) { + return self.addErrorDetailsAndFail(.{ + .err = .empty_menu_not_allowed, + .token = menuitem_token, + }); + } + + const node = try self.state.arena.create(Node.PopupEx); + node.* = .{ + .popup = menuitem_token, + .text = text, + .id = id, + .type = item_type, + .state = state, + .help_id = help_id, + .begin_token = begin_token, + .items = try items.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + else => unreachable, + } + @compileError("unreachable"); + } + + pub const OptionalParamParser = struct { + finished: bool = false, + parser: *Self, + + pub const Options = struct { + not_expression_allowed: bool = false, + }; + + pub fn parse(self: *OptionalParamParser, options: OptionalParamParser.Options) Error!?*Node { + if (self.finished) return null; + if (!(try self.parser.parseOptionalToken(.comma))) { + self.finished = true; + return null; + } + // If the next lookahead token could be part of a number expression, + // then parse it. Otherwise, treat it as an 'empty' expression and + // continue parsing, since 'empty' values are allowed. + if (try self.parser.lookaheadCouldBeNumberExpression(switch (options.not_expression_allowed) { + true => .not_allowed, + false => .not_disallowed, + })) { + const node = try self.parser.parseExpression(.{ + .allowed_types = .{ .number = true }, + .can_contain_not_expressions = options.not_expression_allowed, + }); + return node; + } + return null; + } + }; + + /// Expects the current token to be handled, and that the version statement will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// version statement (or unchanged if the function returns null). + fn parseVersionStatement(self: *Self) Error!?*Node { + const type_token = try self.lookaheadToken(.normal); + const statement_type = rc.VersionInfo.map.get(type_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + switch (statement_type) { + .file_version, .product_version => { + var parts_buffer: [4]*Node = undefined; + var parts = std.ArrayListUnmanaged(*Node).initBuffer(&parts_buffer); + + while (true) { + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + parts.addOneAssumeCapacity().* = value; + + if (parts.unusedCapacitySlice().len == 0 or + !(try self.parseOptionalToken(.comma))) + { + break; + } + } + + const node = try self.state.arena.create(Node.VersionStatement); + node.* = .{ + .type = type_token, + .parts = try self.state.arena.dupe(*Node, parts.items), + }; + return &node.base; + }, + else => { + const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + const node = try self.state.arena.create(Node.SimpleStatement); + node.* = .{ + .identifier = type_token, + .value = value, + }; + return &node.base; + }, + } + } + + /// Expects the current token to be handled, and that the version BLOCK/VALUE will + /// begin on the next token. + /// After return, the current token will be the token immediately before the end of the + /// version BLOCK/VALUE (or unchanged if the function returns null). + fn parseVersionBlockOrValue(self: *Self, top_level_version_id_token: Token, nesting_level: u32) Error!?*Node { + const keyword_token = try self.lookaheadToken(.normal); + const keyword = rc.VersionBlock.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; + self.nextToken(.normal) catch unreachable; + + if (nesting_level > max_nested_version_level) { + try self.addErrorDetails(.{ + .err = .nested_resource_level_exceeds_max, + .token = top_level_version_id_token, + .extra = .{ .resource = .versioninfo }, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_resource_level_exceeds_max, + .type = .note, + .token = keyword_token, + .extra = .{ .resource = .versioninfo }, + }); + } + + try self.nextToken(.normal); + const key = self.state.token; + if (!key.isStringLiteral()) { + return self.addErrorDetailsAndFail(.{ + .err = .expected_something_else, + .token = key, + .extra = .{ .expected_types = .{ + .string_literal = true, + } }, + }); + } + // Need to keep track of this to detect a potential miscompilation when + // the comma is omitted and the first value is a quoted string. + const had_comma_before_first_value = try self.parseOptionalToken(.comma); + try self.skipAnyCommas(); + + const values = try self.parseBlockValuesList(had_comma_before_first_value); + + switch (keyword) { + .block => { + try self.nextToken(.normal); + const begin_token = self.state.token; + try self.check(.begin); + + var children = std.ArrayListUnmanaged(*Node){}; + while (try self.parseVersionBlockOrValue(top_level_version_id_token, nesting_level + 1)) |value_node| { + try children.append(self.state.arena, value_node); + } + + try self.nextToken(.normal); + const end_token = self.state.token; + try self.check(.end); + + const node = try self.state.arena.create(Node.Block); + node.* = .{ + .identifier = keyword_token, + .key = key, + .values = values, + .begin_token = begin_token, + .children = try children.toOwnedSlice(self.state.arena), + .end_token = end_token, + }; + return &node.base; + }, + .value => { + const node = try self.state.arena.create(Node.BlockValue); + node.* = .{ + .identifier = keyword_token, + .key = key, + .values = values, + }; + return &node.base; + }, + } + } + + fn parseBlockValuesList(self: *Self, had_comma_before_first_value: bool) Error![]*Node { + var values = std.ArrayListUnmanaged(*Node){}; + var seen_number: bool = false; + var first_string_value: ?*Node = null; + while (true) { + const lookahead_token = try self.lookaheadToken(.normal); + switch (lookahead_token.id) { + .operator, + .number, + .open_paren, + .quoted_ascii_string, + .quoted_wide_string, + => {}, + else => break, + } + const value = try self.parseExpression(.{}); + + if (value.isNumberExpression()) { + seen_number = true; + } else if (first_string_value == null) { + std.debug.assert(value.isStringLiteral()); + first_string_value = value; + } + + const has_trailing_comma = try self.parseOptionalToken(.comma); + try self.skipAnyCommas(); + + const value_value = try self.state.arena.create(Node.BlockValueValue); + value_value.* = .{ + .expression = value, + .trailing_comma = has_trailing_comma, + }; + try values.append(self.state.arena, &value_value.base); + } + if (seen_number and first_string_value != null) { + // The Win32 RC compiler does some strange stuff with the data size: + // Strings are counted as UTF-16 code units including the null-terminator + // Numbers are counted as their byte lengths + // So, when both strings and numbers are within a single value, + // it incorrectly sets the value's type as binary, but then gives the + // data length as a mixture of bytes and UTF-16 code units. This means that + // when the length is read, it will be treated as byte length and will + // not read the full value. We don't reproduce this behavior, so we warn + // of the miscompilation here. + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_byte_count, + .type = .warning, + .token = first_string_value.?.getFirstToken(), + .token_span_start = values.items[0].getFirstToken(), + .token_span_end = values.items[values.items.len - 1].getLastToken(), + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_byte_count, + .type = .note, + .token = first_string_value.?.getFirstToken(), + .token_span_start = values.items[0].getFirstToken(), + .token_span_end = values.items[values.items.len - 1].getLastToken(), + .print_source_line = false, + }); + } + if (!had_comma_before_first_value and values.items.len > 0 and values.items[0].cast(.block_value_value).?.expression.isStringLiteral()) { + const token = values.items[0].cast(.block_value_value).?.expression.cast(.literal).?.token; + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_padding, + .type = .warning, + .token = token, + }); + try self.addErrorDetails(.{ + .err = .rc_would_miscompile_version_value_padding, + .type = .note, + .token = token, + .print_source_line = false, + }); + } + return values.toOwnedSlice(self.state.arena); + } + + fn numberExpressionContainsAnyLSuffixes(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) bool { + // TODO: This could probably be done without evaluating the whole expression + return Compiler.evaluateNumberExpression(expression_node, source, code_page_lookup).is_long; + } + + /// Expects the current token to be a literal token that contains the string LANGUAGE + fn parseLanguageStatement(self: *Self) Error!*Node { + const language_token = self.state.token; + + const primary_language = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + try self.nextToken(.normal); + try self.check(.comma); + + const sublanguage = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); + + // The Win32 RC compiler errors if either parameter contains any number with an L + // suffix. Instead of that, we want to warn and then let the values get truncated. + // The warning is done here to allow the compiler logic to not have to deal with this. + if (numberExpressionContainsAnyLSuffixes(primary_language, self.lexer.buffer, &self.state.input_code_page_lookup)) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = primary_language.getFirstToken(), + .token_span_end = primary_language.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = primary_language.getFirstToken(), + .token_span_end = primary_language.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + } + if (numberExpressionContainsAnyLSuffixes(sublanguage, self.lexer.buffer, &self.state.input_code_page_lookup)) { + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .type = .warning, + .token = sublanguage.getFirstToken(), + .token_span_end = sublanguage.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + try self.addErrorDetails(.{ + .err = .rc_would_error_u16_with_l_suffix, + .print_source_line = false, + .type = .note, + .token = sublanguage.getFirstToken(), + .token_span_end = sublanguage.getLastToken(), + .extra = .{ .statement_with_u16_param = .language }, + }); + } + + const node = try self.state.arena.create(Node.LanguageStatement); + node.* = .{ + .language_token = language_token, + .primary_language_id = primary_language, + .sublanguage_id = sublanguage, + }; + return &node.base; + } + + pub const ParseExpressionOptions = struct { + is_known_to_be_number_expression: bool = false, + can_contain_not_expressions: bool = false, + nesting_context: NestingContext = .{}, + allowed_types: AllowedTypes = .{ .literal = true, .number = true, .string = true }, + expected_types_override: ?ErrorDetails.ExpectedTypes = null, + + pub const AllowedTypes = struct { + literal: bool = false, + number: bool = false, + string: bool = false, + }; + + pub const NestingContext = struct { + first_token: ?Token = null, + last_token: ?Token = null, + level: u32 = 0, + + /// Returns a new NestingContext with values modified appropriately for an increased nesting level + fn incremented(ctx: NestingContext, first_token: Token, most_recent_token: Token) NestingContext { + return .{ + .first_token = ctx.first_token orelse first_token, + .last_token = most_recent_token, + .level = ctx.level + 1, + }; + } + }; + + pub fn toErrorDetails(options: ParseExpressionOptions, token: Token) ErrorDetails { + // TODO: expected_types_override interaction with is_known_to_be_number_expression? + const expected_types = options.expected_types_override orelse ErrorDetails.ExpectedTypes{ + .number = options.allowed_types.number, + .number_expression = options.allowed_types.number, + .string_literal = options.allowed_types.string and !options.is_known_to_be_number_expression, + .literal = options.allowed_types.literal and !options.is_known_to_be_number_expression, + }; + return ErrorDetails{ + .err = .expected_something_else, + .token = token, + .extra = .{ .expected_types = expected_types }, + }; + } + }; + + /// Returns true if the next lookahead token is a number or could be the start of a number expression. + /// Only useful when looking for empty expressions in optional fields. + fn lookaheadCouldBeNumberExpression(self: *Self, not_allowed: enum { not_allowed, not_disallowed }) Error!bool { + var lookahead_token = try self.lookaheadToken(.normal); + switch (lookahead_token.id) { + .literal => if (not_allowed == .not_allowed) { + return std.ascii.eqlIgnoreCase("NOT", lookahead_token.slice(self.lexer.buffer)); + } else return false, + .number => return true, + .open_paren => return true, + .operator => { + // + can be a unary operator, see parseExpression's handling of unary + + const operator_char = lookahead_token.slice(self.lexer.buffer)[0]; + return operator_char == '+'; + }, + else => return false, + } + } + + fn parsePrimary(self: *Self, options: ParseExpressionOptions) Error!*Node { + try self.nextToken(.normal); + const first_token = self.state.token; + var is_close_paren_expression = false; + var is_unary_plus_expression = false; + switch (self.state.token.id) { + .quoted_ascii_string, .quoted_wide_string => { + if (!options.allowed_types.string) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .literal => { + if (options.can_contain_not_expressions and std.ascii.eqlIgnoreCase("NOT", self.state.token.slice(self.lexer.buffer))) { + const not_token = self.state.token; + try self.nextToken(.normal); + try self.check(.number); + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.NotExpression); + node.* = .{ + .not_token = not_token, + .number_token = self.state.token, + }; + return &node.base; + } + if (!options.allowed_types.literal) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .number => { + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); + const node = try self.state.arena.create(Node.Literal); + node.* = .{ .token = self.state.token }; + return &node.base; + }, + .open_paren => { + const open_paren_token = self.state.token; + + const expression = try self.parseExpression(.{ + .is_known_to_be_number_expression = true, + .can_contain_not_expressions = options.can_contain_not_expressions, + .nesting_context = options.nesting_context.incremented(first_token, open_paren_token), + .allowed_types = .{ .number = true }, + }); + + try self.nextToken(.normal); + // TODO: Add context to error about where the open paren is + try self.check(.close_paren); + + if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(open_paren_token)); + const node = try self.state.arena.create(Node.GroupedExpression); + node.* = .{ + .open_token = open_paren_token, + .expression = expression, + .close_token = self.state.token, + }; + return &node.base; + }, + .close_paren => { + // Note: In the Win32 implementation, a single close paren + // counts as a valid "expression", but only when its the first and + // only token in the expression. Such an expression is then treated + // as a 'skip this expression' instruction. For example: + // 1 RCDATA { 1, ), ), ), 2 } + // will be evaluated as if it were `1 RCDATA { 1, 2 }` and only + // 0x0001 and 0x0002 will be written to the .res data. + // + // This behavior is not emulated because it almost certainly has + // no valid use cases and only introduces edge cases that are + // not worth the effort to track down and deal with. Instead, + // we error but also add a note about the Win32 RC behavior if + // this edge case is detected. + if (!options.is_known_to_be_number_expression) { + is_close_paren_expression = true; + } + }, + .operator => { + // In the Win32 implementation, something akin to a unary + + // is allowed but it doesn't behave exactly like a unary +. + // Instead of emulating the Win32 behavior, we instead error + // and add a note about unary plus not being allowed. + // + // This is done because unary + only works in some places, + // and there's no real use-case for it since it's so limited + // in how it can be used (e.g. +1 is accepted but (+1) will error) + // + // Even understanding when unary plus is allowed is difficult, so + // we don't do any fancy detection of when the Win32 RC compiler would + // allow a unary + and instead just output the note in all cases. + // + // Some examples of allowed expressions by the Win32 compiler: + // +1 + // 0|+5 + // +1+2 + // +~-5 + // +(1) + // + // Some examples of disallowed expressions by the Win32 compiler: + // (+1) + // ++5 + // + // TODO: Potentially re-evaluate and support the unary plus in a bug-for-bug + // compatible way. + const operator_char = self.state.token.slice(self.lexer.buffer)[0]; + if (operator_char == '+') { + is_unary_plus_expression = true; + } + }, + else => {}, + } + + try self.addErrorDetails(options.toErrorDetails(self.state.token)); + if (is_close_paren_expression) { + try self.addErrorDetails(ErrorDetails{ + .err = .close_paren_expression, + .type = .note, + .token = self.state.token, + .print_source_line = false, + }); + } + if (is_unary_plus_expression) { + try self.addErrorDetails(ErrorDetails{ + .err = .unary_plus_expression, + .type = .note, + .token = self.state.token, + .print_source_line = false, + }); + } + return error.ParseError; + } + + /// Expects the current token to have already been dealt with, and that the + /// expression will start on the next token. + /// After return, the current token will have been dealt with. + fn parseExpression(self: *Self, options: ParseExpressionOptions) Error!*Node { + if (options.nesting_context.level > max_nested_expression_level) { + try self.addErrorDetails(.{ + .err = .nested_expression_level_exceeds_max, + .token = options.nesting_context.first_token.?, + }); + return self.addErrorDetailsAndFail(.{ + .err = .nested_expression_level_exceeds_max, + .type = .note, + .token = options.nesting_context.last_token.?, + }); + } + var expr: *Node = try self.parsePrimary(options); + const first_token = expr.getFirstToken(); + + // Non-number expressions can't have operators, so we can just return + if (!expr.isNumberExpression()) return expr; + + while (try self.parseOptionalTokenAdvanced(.operator, .normal_expect_operator)) { + const operator = self.state.token; + const rhs_node = try self.parsePrimary(.{ + .is_known_to_be_number_expression = true, + .can_contain_not_expressions = options.can_contain_not_expressions, + .nesting_context = options.nesting_context.incremented(first_token, operator), + .allowed_types = options.allowed_types, + }); + + if (!rhs_node.isNumberExpression()) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_something_else, + .token = rhs_node.getFirstToken(), + .token_span_end = rhs_node.getLastToken(), + .extra = .{ .expected_types = .{ + .number = true, + .number_expression = true, + } }, + }); + } + + const node = try self.state.arena.create(Node.BinaryExpression); + node.* = .{ + .left = expr, + .operator = operator, + .right = rhs_node, + }; + expr = &node.base; + } + + return expr; + } + + /// Skips any amount of commas (including zero) + /// In other words, it will skip the regex `,*` + /// Assumes the token(s) should be parsed with `.normal` as the method. + fn skipAnyCommas(self: *Self) !void { + while (try self.parseOptionalToken(.comma)) {} + } + + /// Advances the current token only if the token's id matches the specified `id`. + /// Assumes the token should be parsed with `.normal` as the method. + /// Returns true if the token matched, false otherwise. + fn parseOptionalToken(self: *Self, id: Token.Id) Error!bool { + return self.parseOptionalTokenAdvanced(id, .normal); + } + + /// Advances the current token only if the token's id matches the specified `id`. + /// Returns true if the token matched, false otherwise. + fn parseOptionalTokenAdvanced(self: *Self, id: Token.Id, comptime method: Lexer.LexMethod) Error!bool { + const maybe_token = try self.lookaheadToken(method); + if (maybe_token.id != id) return false; + self.nextToken(method) catch unreachable; + return true; + } + + fn addErrorDetails(self: *Self, details: ErrorDetails) Allocator.Error!void { + try self.state.diagnostics.append(details); + } + + fn addErrorDetailsAndFail(self: *Self, details: ErrorDetails) Error { + try self.addErrorDetails(details); + return error.ParseError; + } + + fn nextToken(self: *Self, comptime method: Lexer.LexMethod) Error!void { + self.state.token = token: while (true) { + const token = self.lexer.next(method) catch |err| switch (err) { + error.CodePagePragmaInIncludedFile => { + // The Win32 RC compiler silently ignores such `#pragma code_point` directives, + // but we want to both ignore them *and* emit a warning + try self.addErrorDetails(.{ + .err = .code_page_pragma_in_included_file, + .type = .warning, + .token = self.lexer.error_context_token.?, + }); + continue; + }, + error.CodePagePragmaInvalidCodePage => { + var details = self.lexer.getErrorDetails(err); + if (!self.options.warn_instead_of_error_on_invalid_code_page) { + return self.addErrorDetailsAndFail(details); + } + details.type = .warning; + try self.addErrorDetails(details); + continue; + }, + error.InvalidDigitCharacterInNumberLiteral => { + const details = self.lexer.getErrorDetails(err); + try self.addErrorDetails(details); + return self.addErrorDetailsAndFail(.{ + .err = details.err, + .type = .note, + .token = details.token, + .print_source_line = false, + }); + }, + else => return self.addErrorDetailsAndFail(self.lexer.getErrorDetails(err)), + }; + break :token token; + }; + // After every token, set the input code page for its line + try self.state.input_code_page_lookup.setForToken(self.state.token, self.lexer.current_code_page); + // But only set the output code page to the current code page if we are past the first code_page pragma in the file. + // Otherwise, we want to fill the lookup using the default code page so that lookups still work for lines that + // don't have an explicit output code page set. + const output_code_page = if (self.lexer.seen_pragma_code_pages > 1) self.lexer.current_code_page else self.state.output_code_page_lookup.default_code_page; + try self.state.output_code_page_lookup.setForToken(self.state.token, output_code_page); + } + + fn lookaheadToken(self: *Self, comptime method: Lexer.LexMethod) Error!Token { + self.state.lookahead_lexer = self.lexer.*; + return token: while (true) { + break :token self.state.lookahead_lexer.next(method) catch |err| switch (err) { + // Ignore this error and get the next valid token, we'll deal with this + // properly when getting the token for real + error.CodePagePragmaInIncludedFile => continue, + else => return self.addErrorDetailsAndFail(self.state.lookahead_lexer.getErrorDetails(err)), + }; + }; + } + + fn tokenSlice(self: *Self) []const u8 { + return self.state.token.slice(self.lexer.buffer); + } + + /// Check that the current token is something that can be used as an ID + fn checkId(self: *Self) !void { + switch (self.state.token.id) { + .literal => {}, + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = .literal }, + }); + }, + } + } + + fn check(self: *Self, expected_token_id: Token.Id) !void { + if (self.state.token.id != expected_token_id) { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = expected_token_id }, + }); + } + } + + fn checkResource(self: *Self) !Resource { + switch (self.state.token.id) { + .literal => return Resource.fromString(.{ + .slice = self.state.token.slice(self.lexer.buffer), + .code_page = self.lexer.current_code_page, + }), + else => { + return self.addErrorDetailsAndFail(ErrorDetails{ + .err = .expected_token, + .token = self.state.token, + .extra = .{ .expected = .literal }, + }); + }, + } + } +}; diff --git a/lib/compiler/resinator/preprocess.zig b/lib/compiler/resinator/preprocess.zig @@ -0,0 +1,140 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; +const cli = @import("cli.zig"); +const aro = @import("aro"); + +const PreprocessError = error{ ArgError, GeneratedSourceError, PreprocessError, StreamTooLong, OutOfMemory }; + +pub fn preprocess( + comp: *aro.Compilation, + writer: anytype, + /// Expects argv[0] to be the command name + argv: []const []const u8, + maybe_dependencies_list: ?*std.ArrayList([]const u8), +) PreprocessError!void { + try comp.addDefaultPragmaHandlers(); + + var driver: aro.Driver = .{ .comp = comp, .aro_name = "arocc" }; + defer driver.deinit(); + + var macro_buf = std.ArrayList(u8).init(comp.gpa); + defer macro_buf.deinit(); + + _ = driver.parseArgs(std.io.null_writer, macro_buf.writer(), argv) catch |err| switch (err) { + error.FatalError => return error.ArgError, + error.OutOfMemory => |e| return e, + }; + + if (hasAnyErrors(comp)) return error.ArgError; + + // .include_system_defines gives us things like _WIN32 + const builtin_macros = comp.generateBuiltinMacros(.include_system_defines) catch |err| switch (err) { + error.FatalError => return error.GeneratedSourceError, + else => |e| return e, + }; + const user_macros = comp.addSourceFromBuffer("<command line>", macro_buf.items) catch |err| switch (err) { + error.FatalError => return error.GeneratedSourceError, + else => |e| return e, + }; + const source = driver.inputs.items[0]; + + if (hasAnyErrors(comp)) return error.GeneratedSourceError; + + comp.generated_buf.items.len = 0; + var pp = try aro.Preprocessor.initDefault(comp); + defer pp.deinit(); + + if (comp.langopts.ms_extensions) { + comp.ms_cwd_source_id = source.id; + } + + pp.preserve_whitespace = true; + pp.linemarkers = .line_directives; + + pp.preprocessSources(&.{ source, builtin_macros, user_macros }) catch |err| switch (err) { + error.FatalError => return error.PreprocessError, + else => |e| return e, + }; + + if (hasAnyErrors(comp)) return error.PreprocessError; + + try pp.prettyPrintTokens(writer); + + if (maybe_dependencies_list) |dependencies_list| { + for (comp.sources.values()) |comp_source| { + if (comp_source.id == builtin_macros.id or comp_source.id == user_macros.id) continue; + if (comp_source.id == .unused or comp_source.id == .generated) continue; + const duped_path = try dependencies_list.allocator.dupe(u8, comp_source.path); + errdefer dependencies_list.allocator.free(duped_path); + try dependencies_list.append(duped_path); + } + } +} + +fn hasAnyErrors(comp: *aro.Compilation) bool { + // In theory we could just check Diagnostics.errors != 0, but that only + // gets set during rendering of the error messages, see: + // https://github.com/Vexu/arocc/issues/603 + for (comp.diagnostics.list.items) |msg| { + switch (msg.kind) { + .@"fatal error", .@"error" => return true, + else => {}, + } + } + return false; +} + +/// `arena` is used for temporary -D argument strings and the INCLUDE environment variable. +/// The arena should be kept alive at least as long as `argv`. +pub fn appendAroArgs(arena: Allocator, argv: *std.ArrayList([]const u8), options: cli.Options, system_include_paths: []const []const u8) !void { + try argv.appendSlice(&.{ + "-E", + "--comments", + "-fuse-line-directives", + "--target=x86_64-windows-msvc", + "--emulate=msvc", + "-nostdinc", + "-DRC_INVOKED", + }); + for (options.extra_include_paths.items) |extra_include_path| { + try argv.append("-I"); + try argv.append(extra_include_path); + } + + for (system_include_paths) |include_path| { + try argv.append("-isystem"); + try argv.append(include_path); + } + + if (!options.ignore_include_env_var) { + const INCLUDE = std.process.getEnvVarOwned(arena, "INCLUDE") catch ""; + + // The only precedence here is llvm-rc which also uses the platform-specific + // delimiter. There's no precedence set by `rc.exe` since it's Windows-only. + const delimiter = switch (builtin.os.tag) { + .windows => ';', + else => ':', + }; + var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter); + while (it.next()) |include_path| { + try argv.append("-isystem"); + try argv.append(include_path); + } + } + + var symbol_it = options.symbols.iterator(); + while (symbol_it.next()) |entry| { + switch (entry.value_ptr.*) { + .define => |value| { + try argv.append("-D"); + const define_arg = try std.fmt.allocPrint(arena, "{s}={s}", .{ entry.key_ptr.*, value }); + try argv.append(define_arg); + }, + .undefine => { + try argv.append("-U"); + try argv.append(entry.key_ptr.*); + }, + } + } +} diff --git a/src/resinator/rc.zig b/lib/compiler/resinator/rc.zig diff --git a/lib/compiler/resinator/res.zig b/lib/compiler/resinator/res.zig @@ -0,0 +1,1107 @@ +const std = @import("std"); +const rc = @import("rc.zig"); +const Resource = rc.Resource; +const CommonResourceAttributes = rc.CommonResourceAttributes; +const Allocator = std.mem.Allocator; +const windows1252 = @import("windows1252.zig"); +const CodePage = @import("code_pages.zig").CodePage; +const literals = @import("literals.zig"); +const SourceBytes = literals.SourceBytes; +const Codepoint = @import("code_pages.zig").Codepoint; +const lang = @import("lang.zig"); +const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/resource-types +pub const RT = enum(u8) { + ACCELERATOR = 9, + ANICURSOR = 21, + ANIICON = 22, + BITMAP = 2, + CURSOR = 1, + DIALOG = 5, + DLGINCLUDE = 17, + DLGINIT = 240, + FONT = 8, + FONTDIR = 7, + GROUP_CURSOR = 1 + 11, // CURSOR + 11 + GROUP_ICON = 3 + 11, // ICON + 11 + HTML = 23, + ICON = 3, + MANIFEST = 24, + MENU = 4, + MESSAGETABLE = 11, + PLUGPLAY = 19, + RCDATA = 10, + STRING = 6, + TOOLBAR = 241, + VERSION = 16, + VXD = 20, + _, + + /// Returns null if the resource type is user-defined + /// Asserts that the resource is not `stringtable` + pub fn fromResource(resource: Resource) ?RT { + return switch (resource) { + .accelerators => .ACCELERATOR, + .bitmap => .BITMAP, + .cursor => .GROUP_CURSOR, + .dialog => .DIALOG, + .dialogex => .DIALOG, + .dlginclude => .DLGINCLUDE, + .dlginit => .DLGINIT, + .font => .FONT, + .html => .HTML, + .icon => .GROUP_ICON, + .menu => .MENU, + .menuex => .MENU, + .messagetable => .MESSAGETABLE, + .plugplay => .PLUGPLAY, + .rcdata => .RCDATA, + .stringtable => unreachable, + .toolbar => .TOOLBAR, + .user_defined => null, + .versioninfo => .VERSION, + .vxd => .VXD, + + .cursor_num => .CURSOR, + .icon_num => .ICON, + .string_num => .STRING, + .anicursor_num => .ANICURSOR, + .aniicon_num => .ANIICON, + .fontdir_num => .FONTDIR, + .manifest_num => .MANIFEST, + }; + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/menurc/common-resource-attributes +/// https://learn.microsoft.com/en-us/windows/win32/menurc/resourceheader +pub const MemoryFlags = packed struct(u16) { + value: u16, + + pub const MOVEABLE: u16 = 0x10; + // TODO: SHARED and PURE seem to be the same thing? Testing seems to confirm this but + // would like to find mention of it somewhere. + pub const SHARED: u16 = 0x20; + pub const PURE: u16 = 0x20; + pub const PRELOAD: u16 = 0x40; + pub const DISCARDABLE: u16 = 0x1000; + + /// Note: The defaults can have combinations that are not possible to specify within + /// an .rc file, as the .rc attributes imply other values (i.e. specifying + /// DISCARDABLE always implies MOVEABLE and PURE/SHARED, and yet RT_ICON + /// has a default of only MOVEABLE | DISCARDABLE). + pub fn defaults(predefined_resource_type: ?RT) MemoryFlags { + if (predefined_resource_type == null) { + return MemoryFlags{ .value = MOVEABLE | SHARED }; + } else { + return switch (predefined_resource_type.?) { + // zig fmt: off + .RCDATA, .BITMAP, .HTML, .MANIFEST, + .ACCELERATOR, .VERSION, .MESSAGETABLE, + .DLGINIT, .TOOLBAR, .PLUGPLAY, + .VXD, => MemoryFlags{ .value = MOVEABLE | SHARED }, + + .GROUP_ICON, .GROUP_CURSOR, + .STRING, .FONT, .DIALOG, .MENU, + .DLGINCLUDE, => MemoryFlags{ .value = MOVEABLE | SHARED | DISCARDABLE }, + + .ICON, .CURSOR, .ANIICON, .ANICURSOR => MemoryFlags{ .value = MOVEABLE | DISCARDABLE }, + .FONTDIR => MemoryFlags{ .value = MOVEABLE | PRELOAD }, + // zig fmt: on + // Same as predefined_resource_type == null + _ => return MemoryFlags{ .value = MOVEABLE | SHARED }, + }; + } + } + + pub fn set(self: *MemoryFlags, attribute: CommonResourceAttributes) void { + switch (attribute) { + .preload => self.value |= PRELOAD, + .loadoncall => self.value &= ~PRELOAD, + .moveable => self.value |= MOVEABLE, + .fixed => self.value &= ~(MOVEABLE | DISCARDABLE), + .shared => self.value |= SHARED, + .nonshared => self.value &= ~(SHARED | DISCARDABLE), + .pure => self.value |= PURE, + .impure => self.value &= ~(PURE | DISCARDABLE), + .discardable => self.value |= DISCARDABLE | MOVEABLE | PURE, + } + } + + pub fn setGroup(self: *MemoryFlags, attribute: CommonResourceAttributes, implied_shared_or_pure: bool) void { + switch (attribute) { + .preload => { + self.value |= PRELOAD; + if (implied_shared_or_pure) self.value &= ~SHARED; + }, + .loadoncall => { + self.value &= ~PRELOAD; + if (implied_shared_or_pure) self.value |= SHARED; + }, + else => self.set(attribute), + } + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers +pub const Language = packed struct(u16) { + // Note: This is the default no matter what locale the current system is set to, + // e.g. even if the system's locale is en-GB, en-US will still be the + // default language for resources in the Win32 rc compiler. + primary_language_id: u10 = lang.LANG_ENGLISH, + sublanguage_id: u6 = lang.SUBLANG_ENGLISH_US, + + /// Default language ID as a u16 + pub const default: u16 = (Language{}).asInt(); + + pub fn fromInt(int: u16) Language { + return @bitCast(int); + } + + pub fn asInt(self: Language) u16 { + return @bitCast(self); + } +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-dlgitemtemplate#remarks +pub const ControlClass = enum(u16) { + button = 0x80, + edit = 0x81, + static = 0x82, + listbox = 0x83, + scrollbar = 0x84, + combobox = 0x85, + + pub fn fromControl(control: rc.Control) ?ControlClass { + return switch (control) { + // zig fmt: off + .auto3state, .autocheckbox, .autoradiobutton, + .checkbox, .defpushbutton, .groupbox, .pushbox, + .pushbutton, .radiobutton, .state3, .userbutton => .button, + // zig fmt: on + .combobox => .combobox, + .control => null, + .ctext, .icon, .ltext, .rtext => .static, + .edittext, .hedit, .iedit => .edit, + .listbox => .listbox, + .scrollbar => .scrollbar, + }; + } + + pub fn getImpliedStyle(control: rc.Control) u32 { + var style = WS.CHILD | WS.VISIBLE; + switch (control) { + .auto3state => style |= BS.AUTO3STATE | WS.TABSTOP, + .autocheckbox => style |= BS.AUTOCHECKBOX | WS.TABSTOP, + .autoradiobutton => style |= BS.AUTORADIOBUTTON, + .checkbox => style |= BS.CHECKBOX | WS.TABSTOP, + .combobox => {}, + .control => {}, + .ctext => style |= SS.CENTER | WS.GROUP, + .defpushbutton => style |= BS.DEFPUSHBUTTON | WS.TABSTOP, + .edittext, .hedit, .iedit => style |= WS.TABSTOP | WS.BORDER, + .groupbox => style |= BS.GROUPBOX, + .icon => style |= SS.ICON, + .listbox => style |= LBS.NOTIFY | WS.BORDER, + .ltext => style |= WS.GROUP, + .pushbox => style |= BS.PUSHBOX | WS.TABSTOP, + .pushbutton => style |= WS.TABSTOP, + .radiobutton => style |= BS.RADIOBUTTON, + .rtext => style |= SS.RIGHT | WS.GROUP, + .scrollbar => {}, + .state3 => style |= BS.@"3STATE" | WS.TABSTOP, + .userbutton => style |= BS.USERBUTTON | WS.TABSTOP, + } + return style; + } +}; + +pub const NameOrOrdinal = union(enum) { + // UTF-16 LE + name: [:0]const u16, + ordinal: u16, + + pub fn deinit(self: NameOrOrdinal, allocator: Allocator) void { + switch (self) { + .name => |name| { + allocator.free(name); + }, + .ordinal => {}, + } + } + + /// Returns the full length of the amount of bytes that would be written by `write` + /// (e.g. for an ordinal it will return the length including the 0xFFFF indicator) + pub fn byteLen(self: NameOrOrdinal) usize { + switch (self) { + .name => |name| { + // + 1 for 0-terminated + return (name.len + 1) * @sizeOf(u16); + }, + .ordinal => return 4, + } + } + + pub fn write(self: NameOrOrdinal, writer: anytype) !void { + switch (self) { + .name => |name| { + try writer.writeAll(std.mem.sliceAsBytes(name[0 .. name.len + 1])); + }, + .ordinal => |ordinal| { + try writer.writeInt(u16, 0xffff, .little); + try writer.writeInt(u16, ordinal, .little); + }, + } + } + + pub fn writeEmpty(writer: anytype) !void { + try writer.writeInt(u16, 0, .little); + } + + pub fn fromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { + if (maybeOrdinalFromString(bytes)) |ordinal| { + return ordinal; + } + return nameFromString(allocator, bytes); + } + + pub fn nameFromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { + // Names have a limit of 256 UTF-16 code units + null terminator + var buf = try std.ArrayList(u16).initCapacity(allocator, @min(257, bytes.slice.len)); + errdefer buf.deinit(); + + var i: usize = 0; + while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { + if (buf.items.len == 256) break; + + const c = codepoint.value; + if (c == Codepoint.invalid) { + try buf.append(std.mem.nativeToLittle(u16, '�')); + } else if (c < 0x7F) { + // ASCII chars in names are always converted to uppercase + try buf.append(std.mem.nativeToLittle(u16, std.ascii.toUpper(@intCast(c)))); + } else if (c < 0x10000) { + const short: u16 = @intCast(c); + try buf.append(std.mem.nativeToLittle(u16, short)); + } else { + const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; + try buf.append(std.mem.nativeToLittle(u16, high)); + + // Note: This can cut-off in the middle of a UTF-16 surrogate pair, + // i.e. it can make the string end with an unpaired high surrogate + if (buf.items.len == 256) break; + + const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; + try buf.append(std.mem.nativeToLittle(u16, low)); + } + } + + return NameOrOrdinal{ .name = try buf.toOwnedSliceSentinel(0) }; + } + + /// Returns `null` if the bytes do not form a valid number. + /// Does not allow non-ASCII digits (which the Win32 RC compiler does allow + /// in base 10 numbers, see `maybeNonAsciiOrdinalFromString`). + pub fn maybeOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { + var buf = bytes.slice; + var radix: u8 = 10; + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + '0'...'9' => {}, + 'x', 'X' => { + radix = 16; + buf = buf[2..]; + // only the first 4 hex digits matter, anything else is ignored + // i.e. 0x12345 is treated as if it were 0x1234 + buf.len = @min(buf.len, 4); + }, + else => return null, + } + } + + var i: usize = 0; + var result: u16 = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + const digit: u8 = switch (c) { + 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch switch (radix) { + 10 => return null, + // non-hex-digits are treated as a terminator rather than invalidating + // the number (note: if there are no valid hex digits then the result + // will be zero which is not treated as a valid number) + 16 => break, + else => unreachable, + }, + else => if (radix == 10) return null else break, + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + // Anything that resolves to zero is not interpretted as a number + if (result == 0) return null; + return NameOrOrdinal{ .ordinal = result }; + } + + /// The Win32 RC compiler uses `iswdigit` for digit detection for base 10 + /// numbers, which means that non-ASCII digits are 'accepted' but handled + /// in a totally unintuitive manner, leading to arbitrary results. + /// + /// This function will return the value that such an ordinal 'would' have + /// if it was run through the Win32 RC compiler. This allows us to disallow + /// non-ASCII digits in number literals but still detect when the Win32 + /// RC compiler would have allowed them, so that a proper warning/error + /// can be emitted. + pub fn maybeNonAsciiOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { + const buf = bytes.slice; + const radix = 10; + if (buf.len > 2 and buf[0] == '0') { + switch (buf[1]) { + // We only care about base 10 numbers here + 'x', 'X' => return null, + else => {}, + } + } + + var i: usize = 0; + var result: u16 = 0; + while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { + const c = codepoint.value; + const digit: u16 = digit: { + const is_digit = (c >= '0' and c <= '9') or isNonAsciiDigit(c); + if (!is_digit) return null; + break :digit @intCast(c - '0'); + }; + + if (result != 0) { + result *%= radix; + } + result +%= digit; + } + + // Anything that resolves to zero is not interpretted as a number + if (result == 0) return null; + return NameOrOrdinal{ .ordinal = result }; + } + + pub fn predefinedResourceType(self: NameOrOrdinal) ?RT { + switch (self) { + .ordinal => |ordinal| { + if (ordinal >= 256) return null; + switch (@as(RT, @enumFromInt(ordinal))) { + .ACCELERATOR, + .ANICURSOR, + .ANIICON, + .BITMAP, + .CURSOR, + .DIALOG, + .DLGINCLUDE, + .DLGINIT, + .FONT, + .FONTDIR, + .GROUP_CURSOR, + .GROUP_ICON, + .HTML, + .ICON, + .MANIFEST, + .MENU, + .MESSAGETABLE, + .PLUGPLAY, + .RCDATA, + .STRING, + .TOOLBAR, + .VERSION, + .VXD, + => |rt| return rt, + _ => return null, + } + }, + .name => return null, + } + } +}; + +fn expectNameOrOrdinal(expected: NameOrOrdinal, actual: NameOrOrdinal) !void { + switch (expected) { + .name => { + if (actual != .name) return error.TestExpectedEqual; + try std.testing.expectEqualSlices(u16, expected.name, actual.name); + }, + .ordinal => { + if (actual != .ordinal) return error.TestExpectedEqual; + try std.testing.expectEqual(expected.ordinal, actual.ordinal); + }, + } +} + +test "NameOrOrdinal" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const allocator = arena.allocator(); + + // zero is treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0", .code_page = .windows1252 }), + ); + // any non-digit byte invalidates the number + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1A") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1a", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1ÿ") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1\xff", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1€") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1€", .code_page = .utf8 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1�") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "1\x80", .code_page = .utf8 }), + ); + // same with overflow that resolves to 0 + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("65536") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "65536", .code_page = .windows1252 }), + ); + // hex zero is also treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0X0") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x0", .code_page = .windows1252 }), + ); + // hex numbers work + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x100 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x100", .code_page = .windows1252 }), + ); + // only the first 4 hex digits matter + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x1234 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0X12345", .code_page = .windows1252 }), + ); + // octal is not supported so it gets treated as a string + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0O1234") }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0o1234", .code_page = .windows1252 }), + ); + // overflow wraps + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = @truncate(65635) }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "65635", .code_page = .windows1252 }), + ); + // non-hex-digits in a hex literal are treated as a terminator + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0x4 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0x4n", .code_page = .windows1252 }), + ); + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 0xFA }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "0xFAZ92348", .code_page = .windows1252 }), + ); + // 0 at the start is allowed + try expectNameOrOrdinal( + NameOrOrdinal{ .ordinal = 50 }, + try NameOrOrdinal.fromString(allocator, .{ .slice = "050", .code_page = .windows1252 }), + ); + // limit of 256 UTF-16 code units, can cut off between a surrogate pair + { + var expected = blk: { + // the input before the 𐐷 character, but uppercased + const expected_u8_bytes = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528QFFL7SHNSIETG0QKLR1UYPBTUV1PMFQRRA0VJDG354GQEDJMUPGPP1W1EXVNTZVEIZ6K3IPQM1AWGEYALMEODYVEZGOD3MFMGEY8FNR4JUETTB1PZDEWSNDRGZUA8SNXP3NGO"; + var buf: [256:0]u16 = undefined; + for (expected_u8_bytes, 0..) |byte, i| { + buf[i] = std.mem.nativeToLittle(u16, byte); + } + // surrogate pair that is now orphaned + buf[255] = std.mem.nativeToLittle(u16, 0xD801); + break :blk buf; + }; + try expectNameOrOrdinal( + NameOrOrdinal{ .name = &expected }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528qffL7ShnSIETg0qkLr1UYpbtuv1PMFQRRa0VjDG354GQedJmUPgpp1w1ExVnTzVEiz6K3iPqM1AWGeYALmeODyvEZGOD3MfmGey8fnR4jUeTtB1PzdeWsNDrGzuA8Snxp3NGO𐐷", + .code_page = .utf8, + }), + ); + } +} + +test "NameOrOrdinal code page awareness" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const allocator = arena.allocator(); + + try expectNameOrOrdinal( + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("��𐐷") }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "\xF0\x80\x80𐐷", + .code_page = .utf8, + }), + ); + try expectNameOrOrdinal( + // The UTF-8 representation of 𐐷 is 0xF0 0x90 0x90 0xB7. In order to provide valid + // UTF-8 to utf8ToUtf16LeStringLiteral, it uses the UTF-8 representation of the codepoint + // <U+0x90> which is 0xC2 0x90. The code units in the expected UTF-16 string are: + // { 0x00F0, 0x20AC, 0x20AC, 0x00F0, 0x0090, 0x0090, 0x00B7 } + NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("ð€€ð\xC2\x90\xC2\x90·") }, + try NameOrOrdinal.fromString(allocator, .{ + .slice = "\xF0\x80\x80𐐷", + .code_page = .windows1252, + }), + ); +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-accel#members +/// https://devblogs.microsoft.com/oldnewthing/20070316-00/?p=27593 +pub const AcceleratorModifiers = struct { + value: u8 = 0, + explicit_ascii_or_virtkey: bool = false, + + pub const ASCII = 0; + pub const VIRTKEY = 1; + pub const NOINVERT = 1 << 1; + pub const SHIFT = 1 << 2; + pub const CONTROL = 1 << 3; + pub const ALT = 1 << 4; + /// Marker for the last accelerator in an accelerator table + pub const last_accelerator_in_table = 1 << 7; + + pub fn apply(self: *AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) void { + if (modifier == .ascii or modifier == .virtkey) self.explicit_ascii_or_virtkey = true; + self.value |= modifierValue(modifier); + } + + pub fn isSet(self: AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) bool { + // ASCII is set whenever VIRTKEY is not + if (modifier == .ascii) return self.value & modifierValue(.virtkey) == 0; + return self.value & modifierValue(modifier) != 0; + } + + fn modifierValue(modifier: rc.AcceleratorTypeAndOptions) u8 { + return switch (modifier) { + .ascii => ASCII, + .virtkey => VIRTKEY, + .noinvert => NOINVERT, + .shift => SHIFT, + .control => CONTROL, + .alt => ALT, + }; + } + + pub fn markLast(self: *AcceleratorModifiers) void { + self.value |= last_accelerator_in_table; + } +}; + +const AcceleratorKeyCodepointTranslator = struct { + string_type: literals.StringType, + + pub fn translate(self: @This(), maybe_parsed: ?literals.IterativeStringParser.ParsedCodepoint) ?u21 { + const parsed = maybe_parsed orelse return null; + if (parsed.codepoint == Codepoint.invalid) return 0xFFFD; + if (parsed.from_escaped_integer and self.string_type == .ascii) { + return windows1252.toCodepoint(@truncate(parsed.codepoint)); + } + return parsed.codepoint; + } +}; + +pub const ParseAcceleratorKeyStringError = error{ EmptyAccelerator, AcceleratorTooLong, InvalidControlCharacter, ControlCharacterOutOfRange }; + +/// Expects bytes to be the full bytes of a string literal token (e.g. including the "" or L""). +pub fn parseAcceleratorKeyString(bytes: SourceBytes, is_virt: bool, options: literals.StringParseOptions) (ParseAcceleratorKeyStringError || Allocator.Error)!u16 { + if (bytes.slice.len == 0) { + return error.EmptyAccelerator; + } + + var parser = literals.IterativeStringParser.init(bytes, options); + var translator = AcceleratorKeyCodepointTranslator{ .string_type = parser.declared_string_type }; + + const first_codepoint = translator.translate(try parser.next()) orelse return error.EmptyAccelerator; + // 0 is treated as a terminator, so this is equivalent to an empty string + if (first_codepoint == 0) return error.EmptyAccelerator; + + if (first_codepoint == '^') { + // Note: Emitting this warning unconditonally whenever ^ is the first character + // matches the Win32 RC behavior, but it's questionable whether or not + // the warning should be emitted for ^^ since that results in the ASCII + // character ^ being written to the .res. + if (is_virt and options.diagnostics != null) { + try options.diagnostics.?.diagnostics.append(.{ + .err = .ascii_character_not_equivalent_to_virtual_key_code, + .type = .warning, + .token = options.diagnostics.?.token, + }); + } + + const c = translator.translate(try parser.next()) orelse return error.InvalidControlCharacter; + switch (c) { + '^' => return '^', // special case + 'a'...'z', 'A'...'Z' => return std.ascii.toUpper(@intCast(c)) - 0x40, + // Note: The Windows RC compiler allows more than just A-Z, but what it allows + // seems to be tied to some sort of Unicode-aware 'is character' function or something. + // The full list of codepoints that trigger an out-of-range error can be found here: + // https://gist.github.com/squeek502/2e9d0a4728a83eed074ad9785a209fd0 + // For codepoints >= 0x80 that don't trigger the error, the Windows RC compiler takes the + // codepoint and does the `- 0x40` transformation as if it were A-Z which couldn't lead + // to anything useable, so there's no point in emulating that behavior--erroring for + // all non-[a-zA-Z] makes much more sense and is what was probably intended by the + // Windows RC compiler. + else => return error.ControlCharacterOutOfRange, + } + @compileError("this should be unreachable"); + } + + const second_codepoint = translator.translate(try parser.next()); + + var result: u32 = initial_value: { + if (first_codepoint >= 0x10000) { + if (second_codepoint != null and second_codepoint.? != 0) return error.AcceleratorTooLong; + // No idea why it works this way, but this seems to match the Windows RC + // behavior for codepoints >= 0x10000 + const low = @as(u16, @intCast(first_codepoint & 0x3FF)) + 0xDC00; + const extra = (first_codepoint - 0x10000) / 0x400; + break :initial_value low + extra * 0x100; + } + break :initial_value first_codepoint; + }; + + // 0 is treated as a terminator + if (second_codepoint != null and second_codepoint.? == 0) return @truncate(result); + + const third_codepoint = translator.translate(try parser.next()); + // 0 is treated as a terminator, so a 0 in the third position is fine but + // anything else is too many codepoints for an accelerator + if (third_codepoint != null and third_codepoint.? != 0) return error.AcceleratorTooLong; + + if (second_codepoint) |c| { + if (c >= 0x10000) return error.AcceleratorTooLong; + result <<= 8; + result += c; + } else if (is_virt) { + switch (result) { + 'a'...'z' => result -= 0x20, // toUpper + else => {}, + } + } + return @truncate(result); +} + +test "accelerator keys" { + try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( + .{ .slice = "\"^a\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( + .{ .slice = "\"^A\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 26), try parseAcceleratorKeyString( + .{ .slice = "\"^Z\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, '^'), try parseAcceleratorKeyString( + .{ .slice = "\"^^\"", .code_page = .windows1252 }, + false, + .{}, + )); + + try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( + .{ .slice = "\"a\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x6162), try parseAcceleratorKeyString( + .{ .slice = "\"ab\"", .code_page = .windows1252 }, + false, + .{}, + )); + + try std.testing.expectEqual(@as(u16, 'C'), try parseAcceleratorKeyString( + .{ .slice = "\"c\"", .code_page = .windows1252 }, + true, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x6363), try parseAcceleratorKeyString( + .{ .slice = "\"cc\"", .code_page = .windows1252 }, + true, + .{}, + )); + + // \x00 or any escape that evaluates to zero acts as a terminator, everything past it + // is ignored + try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( + .{ .slice = "\"a\\0bcdef\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // \x80 is € in Windows-1252, which is Unicode codepoint 20AC + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // This depends on the code page, though, with codepage 65001, \x80 + // on its own is invalid UTF-8 so it gets converted to the replacement character + try std.testing.expectEqual(@as(u16, 0xFFFD), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // This also behaves the same with escaped characters + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // Even with utf8 code page + try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "\"\\x80\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // Wide string with the actual characters behaves like the ASCII string version + try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( + .{ .slice = "L\"\x80\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // But wide string with escapes behaves differently + try std.testing.expectEqual(@as(u16, 0x8080), try parseAcceleratorKeyString( + .{ .slice = "L\"\\x80\\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + // and invalid escapes within wide strings get skipped + try std.testing.expectEqual(@as(u16, 'z'), try parseAcceleratorKeyString( + .{ .slice = "L\"\\Hz\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // any non-A-Z codepoints are illegal + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^\x83\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^1\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.InvalidControlCharacter, parseAcceleratorKeyString( + .{ .slice = "\"^\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.EmptyAccelerator, parseAcceleratorKeyString( + .{ .slice = "\"\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"hello\"", .code_page = .windows1252 }, + false, + .{}, + )); + try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( + .{ .slice = "\"^\x80\"", .code_page = .windows1252 }, + false, + .{}, + )); + + // Invalid UTF-8 gets converted to 0xFFFD, multiple invalids get shifted and added together + // The behavior is the same for ascii and wide strings + try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( + .{ .slice = "\"\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( + .{ .slice = "L\"\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + + // Codepoints >= 0x10000 + try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( + .{ .slice = "\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( + .{ .slice = "L\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectEqual(@as(u16, 0x9C01), try parseAcceleratorKeyString( + .{ .slice = "\"\xF4\x80\x80\x81\"", .code_page = .utf8 }, + false, + .{}, + )); + // anything before or after a codepoint >= 0x10000 causes an error + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"a\xF0\x90\x80\x80\"", .code_page = .utf8 }, + false, + .{}, + )); + try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( + .{ .slice = "\"\xF0\x90\x80\x80a\"", .code_page = .utf8 }, + false, + .{}, + )); +} + +pub const ForcedOrdinal = struct { + pub fn fromBytes(bytes: SourceBytes) u16 { + var i: usize = 0; + var result: u21 = 0; + while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { + const c = switch (codepoint.value) { + // Codepoints that would need a surrogate pair in UTF-16 are + // broken up into their UTF-16 code units and each code unit + // is interpreted as a digit. + 0x10000...0x10FFFF => { + const high = @as(u16, @intCast((codepoint.value - 0x10000) >> 10)) + 0xD800; + if (result != 0) result *%= 10; + result +%= high -% '0'; + + const low = @as(u16, @intCast(codepoint.value & 0x3FF)) + 0xDC00; + if (result != 0) result *%= 10; + result +%= low -% '0'; + continue; + }, + Codepoint.invalid => 0xFFFD, + else => codepoint.value, + }; + if (result != 0) result *%= 10; + result +%= c -% '0'; + } + return @truncate(result); + } + + pub fn fromUtf16Le(utf16: [:0]const u16) u16 { + var result: u16 = 0; + for (utf16) |code_unit| { + if (result != 0) result *%= 10; + result +%= std.mem.littleToNative(u16, code_unit) -% '0'; + } + return result; + } +}; + +test "forced ordinal" { + try std.testing.expectEqual(@as(u16, 3200), ForcedOrdinal.fromBytes(.{ .slice = "3200", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 0x33), ForcedOrdinal.fromBytes(.{ .slice = "1+1", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 65531), ForcedOrdinal.fromBytes(.{ .slice = "1!", .code_page = .windows1252 })); + + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0\x8C", .code_page = .windows1252 })); + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0Œ", .code_page = .utf8 })); + + // invalid UTF-8 gets converted to 0xFFFD (replacement char) and then interpreted as a digit + try std.testing.expectEqual(@as(u16, 0xFFCD), ForcedOrdinal.fromBytes(.{ .slice = "0\x81", .code_page = .utf8 })); + // codepoints >= 0x10000 + try std.testing.expectEqual(@as(u16, 0x49F2), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10002}", .code_page = .utf8 })); + try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10100}", .code_page = .utf8 })); + + // From UTF-16 + try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromUtf16Le(&[_:0]u16{ std.mem.nativeToLittle(u16, '0'), std.mem.nativeToLittle(u16, 'Œ') })); + try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromUtf16Le(std.unicode.utf8ToUtf16LeStringLiteral("0\u{10100}"))); +} + +/// https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo +pub const FixedFileInfo = struct { + file_version: Version = .{}, + product_version: Version = .{}, + file_flags_mask: u32 = 0, + file_flags: u32 = 0, + file_os: u32 = 0, + file_type: u32 = 0, + file_subtype: u32 = 0, + file_date: Version = .{}, // TODO: I think this is always all zeroes? + + pub const signature = 0xFEEF04BD; + // Note: This corresponds to a version of 1.0 + pub const version = 0x00010000; + + pub const byte_len = 0x34; + pub const key = std.unicode.utf8ToUtf16LeStringLiteral("VS_VERSION_INFO"); + + pub const Version = struct { + parts: [4]u16 = [_]u16{0} ** 4, + + pub fn mostSignificantCombinedParts(self: Version) u32 { + return (@as(u32, self.parts[0]) << 16) + self.parts[1]; + } + + pub fn leastSignificantCombinedParts(self: Version) u32 { + return (@as(u32, self.parts[2]) << 16) + self.parts[3]; + } + }; + + pub fn write(self: FixedFileInfo, writer: anytype) !void { + try writer.writeInt(u32, signature, .little); + try writer.writeInt(u32, version, .little); + try writer.writeInt(u32, self.file_version.mostSignificantCombinedParts(), .little); + try writer.writeInt(u32, self.file_version.leastSignificantCombinedParts(), .little); + try writer.writeInt(u32, self.product_version.mostSignificantCombinedParts(), .little); + try writer.writeInt(u32, self.product_version.leastSignificantCombinedParts(), .little); + try writer.writeInt(u32, self.file_flags_mask, .little); + try writer.writeInt(u32, self.file_flags, .little); + try writer.writeInt(u32, self.file_os, .little); + try writer.writeInt(u32, self.file_type, .little); + try writer.writeInt(u32, self.file_subtype, .little); + try writer.writeInt(u32, self.file_date.mostSignificantCombinedParts(), .little); + try writer.writeInt(u32, self.file_date.leastSignificantCombinedParts(), .little); + } +}; + +test "FixedFileInfo.Version" { + const version = FixedFileInfo.Version{ + .parts = .{ 1, 2, 3, 4 }, + }; + try std.testing.expectEqual(@as(u32, 0x00010002), version.mostSignificantCombinedParts()); + try std.testing.expectEqual(@as(u32, 0x00030004), version.leastSignificantCombinedParts()); +} + +pub const VersionNode = struct { + pub const type_string: u16 = 1; + pub const type_binary: u16 = 0; +}; + +pub const MenuItemFlags = struct { + value: u16 = 0, + + pub fn apply(self: *MenuItemFlags, option: rc.MenuItem.Option) void { + self.value |= optionValue(option); + } + + pub fn isSet(self: MenuItemFlags, option: rc.MenuItem.Option) bool { + return self.value & optionValue(option) != 0; + } + + fn optionValue(option: rc.MenuItem.Option) u16 { + return @intCast(switch (option) { + .checked => MF.CHECKED, + .grayed => MF.GRAYED, + .help => MF.HELP, + .inactive => MF.DISABLED, + .menubarbreak => MF.MENUBARBREAK, + .menubreak => MF.MENUBREAK, + }); + } + + pub fn markLast(self: *MenuItemFlags) void { + self.value |= @intCast(MF.END); + } +}; + +/// Menu Flags from WinUser.h +/// This is not complete, it only contains what is needed +pub const MF = struct { + pub const GRAYED: u32 = 0x00000001; + pub const DISABLED: u32 = 0x00000002; + pub const CHECKED: u32 = 0x00000008; + pub const POPUP: u32 = 0x00000010; + pub const MENUBARBREAK: u32 = 0x00000020; + pub const MENUBREAK: u32 = 0x00000040; + pub const HELP: u32 = 0x00004000; + pub const END: u32 = 0x00000080; +}; + +/// Window Styles from WinUser.h +pub const WS = struct { + pub const OVERLAPPED: u32 = 0x00000000; + pub const POPUP: u32 = 0x80000000; + pub const CHILD: u32 = 0x40000000; + pub const MINIMIZE: u32 = 0x20000000; + pub const VISIBLE: u32 = 0x10000000; + pub const DISABLED: u32 = 0x08000000; + pub const CLIPSIBLINGS: u32 = 0x04000000; + pub const CLIPCHILDREN: u32 = 0x02000000; + pub const MAXIMIZE: u32 = 0x01000000; + pub const CAPTION: u32 = BORDER | DLGFRAME; + pub const BORDER: u32 = 0x00800000; + pub const DLGFRAME: u32 = 0x00400000; + pub const VSCROLL: u32 = 0x00200000; + pub const HSCROLL: u32 = 0x00100000; + pub const SYSMENU: u32 = 0x00080000; + pub const THICKFRAME: u32 = 0x00040000; + pub const GROUP: u32 = 0x00020000; + pub const TABSTOP: u32 = 0x00010000; + + pub const MINIMIZEBOX: u32 = 0x00020000; + pub const MAXIMIZEBOX: u32 = 0x00010000; + + pub const TILED: u32 = OVERLAPPED; + pub const ICONIC: u32 = MINIMIZE; + pub const SIZEBOX: u32 = THICKFRAME; + pub const TILEDWINDOW: u32 = OVERLAPPEDWINDOW; + + // Common Window Styles + pub const OVERLAPPEDWINDOW: u32 = OVERLAPPED | CAPTION | SYSMENU | THICKFRAME | MINIMIZEBOX | MAXIMIZEBOX; + pub const POPUPWINDOW: u32 = POPUP | BORDER | SYSMENU; + pub const CHILDWINDOW: u32 = CHILD; +}; + +/// Dialog Box Template Styles from WinUser.h +pub const DS = struct { + pub const SETFONT: u32 = 0x40; +}; + +/// Button Control Styles from WinUser.h +/// This is not complete, it only contains what is needed +pub const BS = struct { + pub const PUSHBUTTON: u32 = 0x00000000; + pub const DEFPUSHBUTTON: u32 = 0x00000001; + pub const CHECKBOX: u32 = 0x00000002; + pub const AUTOCHECKBOX: u32 = 0x00000003; + pub const RADIOBUTTON: u32 = 0x00000004; + pub const @"3STATE": u32 = 0x00000005; + pub const AUTO3STATE: u32 = 0x00000006; + pub const GROUPBOX: u32 = 0x00000007; + pub const USERBUTTON: u32 = 0x00000008; + pub const AUTORADIOBUTTON: u32 = 0x00000009; + pub const PUSHBOX: u32 = 0x0000000A; + pub const OWNERDRAW: u32 = 0x0000000B; + pub const TYPEMASK: u32 = 0x0000000F; + pub const LEFTTEXT: u32 = 0x00000020; +}; + +/// Static Control Constants from WinUser.h +/// This is not complete, it only contains what is needed +pub const SS = struct { + pub const LEFT: u32 = 0x00000000; + pub const CENTER: u32 = 0x00000001; + pub const RIGHT: u32 = 0x00000002; + pub const ICON: u32 = 0x00000003; +}; + +/// Listbox Styles from WinUser.h +/// This is not complete, it only contains what is needed +pub const LBS = struct { + pub const NOTIFY: u32 = 0x0001; +}; diff --git a/lib/compiler/resinator/source_mapping.zig b/lib/compiler/resinator/source_mapping.zig @@ -0,0 +1,831 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const utils = @import("utils.zig"); +const UncheckedSliceWriter = utils.UncheckedSliceWriter; + +pub const ParseLineCommandsResult = struct { + result: []u8, + mappings: SourceMappings, +}; + +const CurrentMapping = struct { + line_num: usize = 1, + filename: std.ArrayListUnmanaged(u8) = .{}, + pending: bool = true, + ignore_contents: bool = false, +}; + +pub const ParseAndRemoveLineCommandsOptions = struct { + initial_filename: ?[]const u8 = null, +}; + +/// Parses and removes #line commands as well as all source code that is within a file +/// with .c or .h extensions. +/// +/// > RC treats files with the .c and .h extensions in a special manner. It +/// > assumes that a file with one of these extensions does not contain +/// > resources. If a file has the .c or .h file name extension, RC ignores all +/// > lines in the file except the preprocessor directives. Therefore, to +/// > include a file that contains resources in another resource script, give +/// > the file to be included an extension other than .c or .h. +/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives +/// +/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping +/// between the lines and their corresponding lines in their original files. +/// +/// `buf` must be at least as long as `source` +/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) +/// +/// If `options.initial_filename` is provided, that filename is guaranteed to be +/// within the `mappings.files` table and `root_filename_offset` will be set appropriately. +pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { + var parse_result = ParseLineCommandsResult{ + .result = undefined, + .mappings = .{}, + }; + errdefer parse_result.mappings.deinit(allocator); + + var current_mapping: CurrentMapping = .{}; + defer current_mapping.filename.deinit(allocator); + + if (options.initial_filename) |initial_filename| { + try current_mapping.filename.appendSlice(allocator, initial_filename); + parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename); + } + + std.debug.assert(buf.len >= source.len); + var result = UncheckedSliceWriter{ .slice = buf }; + const State = enum { + line_start, + preprocessor, + non_preprocessor, + }; + var state: State = .line_start; + var index: usize = 0; + var pending_start: ?usize = null; + var preprocessor_start: usize = 0; + var line_number: usize = 1; + while (index < source.len) : (index += 1) { + const c = source[index]; + switch (state) { + .line_start => switch (c) { + '#' => { + preprocessor_start = index; + state = .preprocessor; + if (pending_start == null) { + pending_start = index; + } + }, + '\r', '\n' => { + const is_crlf = formsLineEndingPair(source, c, index + 1); + if (!current_mapping.ignore_contents) { + try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); + + result.write(c); + if (is_crlf) result.write(source[index + 1]); + line_number += 1; + } + if (is_crlf) index += 1; + pending_start = null; + }, + ' ', '\t', '\x0b', '\x0c' => { + if (pending_start == null) { + pending_start = index; + } + }, + else => { + state = .non_preprocessor; + if (pending_start != null) { + if (!current_mapping.ignore_contents) { + result.writeSlice(source[pending_start.? .. index + 1]); + } + pending_start = null; + continue; + } + if (!current_mapping.ignore_contents) { + result.write(c); + } + }, + }, + .preprocessor => switch (c) { + '\r', '\n' => { + // Now that we have the full line we can decide what to do with it + const preprocessor_str = source[preprocessor_start..index]; + const is_crlf = formsLineEndingPair(source, c, index + 1); + if (std.mem.startsWith(u8, preprocessor_str, "#line")) { + try handleLineCommand(allocator, preprocessor_str, &current_mapping); + } else { + if (!current_mapping.ignore_contents) { + try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); + + const line_ending_len: usize = if (is_crlf) 2 else 1; + result.writeSlice(source[pending_start.? .. index + line_ending_len]); + line_number += 1; + } + } + if (is_crlf) index += 1; + state = .line_start; + pending_start = null; + }, + else => {}, + }, + .non_preprocessor => switch (c) { + '\r', '\n' => { + const is_crlf = formsLineEndingPair(source, c, index + 1); + if (!current_mapping.ignore_contents) { + try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); + + result.write(c); + if (is_crlf) result.write(source[index + 1]); + line_number += 1; + } + if (is_crlf) index += 1; + state = .line_start; + pending_start = null; + }, + else => { + if (!current_mapping.ignore_contents) { + result.write(c); + } + }, + }, + } + } else { + switch (state) { + .line_start => {}, + .non_preprocessor => { + try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); + }, + .preprocessor => { + // Now that we have the full line we can decide what to do with it + const preprocessor_str = source[preprocessor_start..index]; + if (std.mem.startsWith(u8, preprocessor_str, "#line")) { + try handleLineCommand(allocator, preprocessor_str, &current_mapping); + } else { + try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); + if (!current_mapping.ignore_contents) { + result.writeSlice(source[pending_start.?..index]); + } + } + }, + } + } + + parse_result.result = result.getWritten(); + + // Remove whitespace from the end of the result. This avoids issues when the + // preprocessor adds a newline to the end of the file, since then the + // post-preprocessed source could have more lines than the corresponding input source and + // the inserted line can't be mapped to any lines in the original file. + // There's no way that whitespace at the end of a file can affect the parsing + // of the RC script so this is okay to do unconditionally. + // TODO: There might be a better way around this + while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) { + parse_result.result.len -= 1; + } + + // If there have been no line mappings at all, then we're dealing with an empty file. + // In this case, we want to fake a line mapping just so that we return something + // that is useable in the same way that a non-empty mapping would be. + if (parse_result.mappings.sources.root == null) { + try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); + } + + return parse_result; +} + +/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair +pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool { + if (next_index >= source.len) return false; + + const next_ending = source[next_index]; + return utils.isLineEndingPair(line_ending, next_ending); +} + +pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void { + const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items); + + try mapping.set(post_processed_line_number, current_mapping.line_num, filename_offset); + + current_mapping.line_num += 1; + current_mapping.pending = false; +} + +// TODO: Might want to provide diagnostics on invalid line commands instead of just returning +pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void { + // TODO: Are there other whitespace characters that should be included? + var tokenizer = std.mem.tokenize(u8, line_command, " \t"); + const line_directive = tokenizer.next() orelse return; // #line + if (!std.mem.eql(u8, line_directive, "#line")) return; + const linenum_str = tokenizer.next() orelse return; + const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return; + + var filename_literal = tokenizer.rest(); + while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) { + filename_literal.len -= 1; + } + if (filename_literal.len < 2) return; + const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"'; + if (!is_quoted) return; + const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return, + }; + defer allocator.free(filename); + + // \x00 bytes in the filename is incompatible with how StringTable works + if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return; + + current_mapping.line_num = linenum; + current_mapping.filename.clearRetainingCapacity(); + try current_mapping.filename.appendSlice(allocator, filename); + current_mapping.pending = true; + current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h"); +} + +pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { + const buf = try allocator.alloc(u8, source.len); + errdefer allocator.free(buf); + var result = try parseAndRemoveLineCommands(allocator, source, buf, options); + result.result = try allocator.realloc(buf, result.result.len); + return result; +} + +/// C-style string parsing with a few caveats: +/// - The str cannot contain newlines or carriage returns +/// - Hex and octal escape are limited to u8 +/// - No handling/support for L, u, or U prefixed strings +/// - The start and end double quotes should be omitted from the `str` +/// - Other than the above, does not assume any validity of the strings (i.e. there +/// may be unescaped double quotes within the str) and will return error.InvalidString +/// on any problems found. +/// +/// The result is a UTF-8 encoded string. +fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 { + const State = enum { + string, + escape, + escape_hex, + escape_octal, + escape_u, + }; + + var filename = try std.ArrayList(u8).initCapacity(allocator, str.len); + errdefer filename.deinit(); + var state: State = .string; + var index: usize = 0; + var escape_len: usize = undefined; + var escape_val: u64 = undefined; + var escape_expected_len: u8 = undefined; + while (index < str.len) : (index += 1) { + const c = str[index]; + switch (state) { + .string => switch (c) { + '\\' => state = .escape, + '"' => return error.InvalidString, + else => filename.appendAssumeCapacity(c), + }, + .escape => switch (c) { + '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => { + const escaped_c = switch (c) { + '\'', '"', '\\', '?' => c, + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'a' => '\x07', + 'b' => '\x08', + 'e' => '\x1b', // non-standard + 'f' => '\x0c', + 'v' => '\x0b', + else => unreachable, + }; + filename.appendAssumeCapacity(escaped_c); + state = .string; + }, + 'x' => { + escape_val = 0; + escape_len = 0; + state = .escape_hex; + }, + '0'...'7' => { + escape_val = std.fmt.charToDigit(c, 8) catch unreachable; + escape_len = 1; + state = .escape_octal; + }, + 'u' => { + escape_val = 0; + escape_len = 0; + state = .escape_u; + escape_expected_len = 4; + }, + 'U' => { + escape_val = 0; + escape_len = 0; + state = .escape_u; + escape_expected_len = 8; + }, + else => return error.InvalidString, + }, + .escape_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + const digit = std.fmt.charToDigit(c, 16) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString; + escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + }, + else => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + index -= 1; // reconsume + }, + }, + .escape_octal => switch (c) { + '0'...'7' => { + const digit = std.fmt.charToDigit(c, 8) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString; + escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + if (escape_len == 3) { + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + } + }, + else => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + state = .string; + index -= 1; // reconsume + }, + }, + .escape_u => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + const digit = std.fmt.charToDigit(c, 16) catch unreachable; + if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString; + escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString; + escape_len += 1; + if (escape_len == escape_expected_len) { + var buf: [4]u8 = undefined; + const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString; + filename.appendSliceAssumeCapacity(buf[0..utf8_len]); + state = .string; + } + }, + // Requires escape_expected_len valid hex digits + else => return error.InvalidString, + }, + } + } else { + switch (state) { + .string => {}, + .escape, .escape_u => return error.InvalidString, + .escape_hex => { + if (escape_len == 0) return error.InvalidString; + filename.appendAssumeCapacity(@intCast(escape_val)); + }, + .escape_octal => { + filename.appendAssumeCapacity(@intCast(escape_val)); + }, + } + } + + return filename.toOwnedSlice(); +} + +fn testParseFilename(expected: []const u8, input: []const u8) !void { + const parsed = try parseFilename(std.testing.allocator, input); + defer std.testing.allocator.free(parsed); + + return std.testing.expectEqualSlices(u8, expected, parsed); +} + +test parseFilename { + try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11"); + try testParseFilename("\xABz\x53", "\\xABz\\123"); + try testParseFilename("⚡⚡", "\\u26A1\\U000026A1"); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\"")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF")); + try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777")); +} + +pub const SourceMappings = struct { + sources: Sources = .{}, + files: StringTable = .{}, + /// The default assumes that the first filename added is the root file. + /// The value should be set to the correct offset if that assumption does not hold. + root_filename_offset: u32 = 0, + source_node_pool: std.heap.MemoryPool(Sources.Node) = std.heap.MemoryPool(Sources.Node).init(std.heap.page_allocator), + end_line: usize = 0, + + const sourceCompare = struct { + fn compare(a: Source, b: Source) std.math.Order { + return std.math.order(a.start_line, b.start_line); + } + }.compare; + const Sources = std.Treap(Source, sourceCompare); + + pub const Source = struct { + start_line: usize, + span: usize = 0, + corresponding_start_line: usize, + filename_offset: u32, + }; + + pub fn deinit(self: *SourceMappings, allocator: Allocator) void { + self.files.deinit(allocator); + self.source_node_pool.deinit(); + } + + /// Find the node that 'contains' the `line`, i.e. the node's start_line is + /// >= `line` + fn findNode(self: SourceMappings, line: usize) ?*Sources.Node { + var node = self.sources.root; + var last_gt: ?*Sources.Node = null; + + var search_key: Source = undefined; + search_key.start_line = line; + while (node) |current| { + const order = sourceCompare(search_key, current.key); + if (order == .eq) break; + if (order == .gt) last_gt = current; + + node = current.children[@intFromBool(order == .gt)] orelse { + // Regardless of the current order, last_gt will contain the + // the node we want to return. + // + // If search key is > current node's key, then last_gt will be + // current which we now know is the closest node that is <= + // the search key. + // + // + // If the key is < current node's key, we want to jump back to the + // node that the search key was most recently greater than. + // This is necessary for scenarios like (where the search key is 2): + // + // 1 + // \ + // 6 + // / + // 3 + // + // In this example, we'll get down to the '3' node but ultimately want + // to return the '1' node. + // + // Note: If we've never seen a key that the search key is greater than, + // then we know that there's no valid node, so last_gt will be null. + return last_gt; + }; + } + + return node; + } + + /// Note: `line_num` and `corresponding_line_num` start at 1 + pub fn set(self: *SourceMappings, line_num: usize, corresponding_line_num: usize, filename_offset: u32) !void { + const maybe_node = self.findNode(line_num); + + const need_new_node = need_new_node: { + if (maybe_node) |node| { + if (node.key.filename_offset != filename_offset) { + break :need_new_node true; + } + const exist_delta = @as(i64, @intCast(node.key.corresponding_start_line)) - @as(i64, @intCast(node.key.start_line)); + const cur_delta = @as(i64, @intCast(corresponding_line_num)) - @as(i64, @intCast(line_num)); + if (exist_delta != cur_delta) { + break :need_new_node true; + } + break :need_new_node false; + } + break :need_new_node true; + }; + if (need_new_node) { + // spans must not overlap + if (maybe_node) |node| { + std.debug.assert(node.key.start_line != line_num); + } + + const key = Source{ + .start_line = line_num, + .corresponding_start_line = corresponding_line_num, + .filename_offset = filename_offset, + }; + var entry = self.sources.getEntryFor(key); + var new_node = try self.source_node_pool.create(); + new_node.key = key; + entry.set(new_node); + } + if (line_num > self.end_line) { + self.end_line = line_num; + } + } + + /// Note: `line_num` starts at 1 + pub fn get(self: SourceMappings, line_num: usize) ?Source { + const node = self.findNode(line_num) orelse return null; + return node.key; + } + + pub const CorrespondingSpan = struct { + start_line: usize, + end_line: usize, + filename_offset: u32, + }; + + pub fn getCorrespondingSpan(self: SourceMappings, line_num: usize) ?CorrespondingSpan { + const source = self.get(line_num) orelse return null; + const diff = line_num - source.start_line; + const start_line = source.corresponding_start_line + (if (line_num == source.start_line) 0 else source.span + diff); + const end_line = start_line + (if (line_num == source.start_line) source.span else 0); + return CorrespondingSpan{ + .start_line = start_line, + .end_line = end_line, + .filename_offset = source.filename_offset, + }; + } + + pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) !void { + std.debug.assert(num_following_lines_to_collapse > 0); + var node = self.findNode(line_num).?; + const span_diff = num_following_lines_to_collapse; + if (node.key.start_line != line_num) { + const offset = line_num - node.key.start_line; + const key = Source{ + .start_line = line_num, + .span = num_following_lines_to_collapse, + .corresponding_start_line = node.key.corresponding_start_line + node.key.span + offset, + .filename_offset = node.key.filename_offset, + }; + var entry = self.sources.getEntryFor(key); + var new_node = try self.source_node_pool.create(); + new_node.key = key; + entry.set(new_node); + node = new_node; + } else { + node.key.span += span_diff; + } + + // now subtract the span diff from the start line number of all of + // the following nodes in order + var it = Sources.InorderIterator{ + .current = node, + .previous = node.children[0], + }; + // skip past current, but store it + var prev = it.next().?; + while (it.next()) |inorder_node| { + inorder_node.key.start_line -= span_diff; + + // This can only really happen if there are #line commands within + // a multiline comment, which in theory should be skipped over. + // However, currently, parseAndRemoveLineCommands is not aware of + // comments at all. + // + // TODO: Make parseAndRemoveLineCommands aware of comments/strings + // and turn this into an assertion + if (prev.key.start_line > inorder_node.key.start_line) { + return error.InvalidSourceMappingCollapse; + } + prev = inorder_node; + } + self.end_line -= span_diff; + } + + /// Returns true if the line is from the main/root file (i.e. not a file that has been + /// `#include`d). + pub fn isRootFile(self: *SourceMappings, line_num: usize) bool { + const source = self.get(line_num) orelse return false; + return source.filename_offset == self.root_filename_offset; + } +}; + +test "SourceMappings collapse" { + const allocator = std.testing.allocator; + + var mappings = SourceMappings{}; + defer mappings.deinit(allocator); + const filename_offset = try mappings.files.put(allocator, "test.rc"); + + try mappings.set(1, 1, filename_offset); + try mappings.set(5, 5, filename_offset); + + try mappings.collapse(2, 2); + + try std.testing.expectEqual(@as(usize, 3), mappings.end_line); + const span_1 = mappings.getCorrespondingSpan(1).?; + try std.testing.expectEqual(@as(usize, 1), span_1.start_line); + try std.testing.expectEqual(@as(usize, 1), span_1.end_line); + const span_2 = mappings.getCorrespondingSpan(2).?; + try std.testing.expectEqual(@as(usize, 2), span_2.start_line); + try std.testing.expectEqual(@as(usize, 4), span_2.end_line); + const span_3 = mappings.getCorrespondingSpan(3).?; + try std.testing.expectEqual(@as(usize, 5), span_3.start_line); + try std.testing.expectEqual(@as(usize, 5), span_3.end_line); +} + +/// Same thing as StringTable in Zig's src/Wasm.zig +pub const StringTable = struct { + data: std.ArrayListUnmanaged(u8) = .{}, + map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, + + pub fn deinit(self: *StringTable, allocator: Allocator) void { + self.data.deinit(allocator); + self.map.deinit(allocator); + } + + pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 { + const result = try self.map.getOrPutContextAdapted( + allocator, + value, + std.hash_map.StringIndexAdapter{ .bytes = &self.data }, + .{ .bytes = &self.data }, + ); + if (result.found_existing) { + return result.key_ptr.*; + } + + try self.data.ensureUnusedCapacity(allocator, value.len + 1); + const offset: u32 = @intCast(self.data.items.len); + + self.data.appendSliceAssumeCapacity(value); + self.data.appendAssumeCapacity(0); + + result.key_ptr.* = offset; + + return offset; + } + + pub fn get(self: StringTable, offset: u32) []const u8 { + std.debug.assert(offset < self.data.items.len); + return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0); + } + + pub fn getOffset(self: *StringTable, value: []const u8) ?u32 { + return self.map.getKeyAdapted( + value, + std.hash_map.StringIndexAdapter{ .bytes = &self.data }, + ); + } +}; + +const ExpectedSourceSpan = struct { + start_line: usize, + end_line: usize, + filename: []const u8, +}; + +fn testParseAndRemoveLineCommands( + expected: []const u8, + comptime expected_spans: []const ExpectedSourceSpan, + source: []const u8, + options: ParseAndRemoveLineCommandsOptions, +) !void { + var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options); + defer std.testing.allocator.free(results.result); + defer results.mappings.deinit(std.testing.allocator); + + try std.testing.expectEqualStrings(expected, results.result); + + expectEqualMappings(expected_spans, results.mappings) catch |err| { + std.debug.print("\nexpected mappings:\n", .{}); + for (expected_spans, 0..) |span, i| { + const line_num = i + 1; + std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line }); + } + std.debug.print("\nactual mappings:\n", .{}); + var i: usize = 1; + while (i <= results.mappings.end_line) : (i += 1) { + const span = results.mappings.getCorrespondingSpan(i).?; + const filename = results.mappings.files.get(span.filename_offset); + std.debug.print("{}: {s}:{}-{}\n", .{ i, filename, span.start_line, span.end_line }); + } + std.debug.print("\n", .{}); + return err; + }; +} + +fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void { + try std.testing.expectEqual(expected_spans.len, mappings.end_line); + for (expected_spans, 0..) |expected_span, i| { + const line_num = i + 1; + const span = mappings.getCorrespondingSpan(line_num) orelse return error.MissingLineNum; + const filename = mappings.files.get(span.filename_offset); + try std.testing.expectEqual(expected_span.start_line, span.start_line); + try std.testing.expectEqual(expected_span.end_line, span.end_line); + try std.testing.expectEqualStrings(expected_span.filename, filename); + } +} + +test "basic" { + try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, "#line 1 \"blah.rc\"", .{}); +} + +test "only removes line commands" { + try testParseAndRemoveLineCommands( + \\#pragma code_page(65001) + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, + \\#line 1 "blah.rc" + \\#pragma code_page(65001) + , .{}); +} + +test "whitespace and line endings" { + try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + }, "#line \t 1 \t \"blah.rc\"\r\n", .{}); +} + +test "example" { + try testParseAndRemoveLineCommands( + \\ + \\included RCDATA {"hello"} + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" }, + }, + \\#line 1 "rcdata.rc" + \\#line 1 "<built-in>" + \\#line 1 "<built-in>" + \\#line 355 "<built-in>" + \\#line 1 "<command line>" + \\#line 1 "<built-in>" + \\#line 1 "rcdata.rc" + \\#line 1 "./header.h" + \\ + \\ + \\2 RCDATA {"blah"} + \\ + \\ + \\#line 1 "./included.rc" + \\ + \\included RCDATA {"hello"} + \\#line 7 "./header.h" + \\#line 1 "rcdata.rc" + , .{}); +} + +test "CRLF and other line endings" { + try testParseAndRemoveLineCommands( + "hello\r\n#pragma code_page(65001)\r\nworld", + &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" }, + .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" }, + }, + "#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n", + .{}, + ); +} + +test "no line commands" { + try testParseAndRemoveLineCommands( + \\1 RCDATA {"blah"} + \\2 RCDATA {"blah"} + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, + }, + \\1 RCDATA {"blah"} + \\2 RCDATA {"blah"} + , .{ .initial_filename = "blah.rc" }); +} + +test "in place" { + var mut_source = "#line 1 \"blah.rc\"".*; + var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}); + defer result.mappings.deinit(std.testing.allocator); + try std.testing.expectEqualStrings("", result.result); +} + +test "line command within a multiline comment" { + // TODO: Enable once parseAndRemoveLineCommands is comment-aware + if (true) return error.SkipZigTest; + + try testParseAndRemoveLineCommands( + \\/* + \\#line 1 "irrelevant.rc" + \\ + \\ + \\*/ + , &[_]ExpectedSourceSpan{ + .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, + .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, + .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" }, + .{ .start_line = 4, .end_line = 4, .filename = "blah.rc" }, + .{ .start_line = 5, .end_line = 5, .filename = "blah.rc" }, + }, + \\/* + \\#line 1 "irrelevant.rc" + \\ + \\ + \\*/ + , .{ .initial_filename = "blah.rc" }); +} diff --git a/lib/compiler/resinator/utils.zig b/lib/compiler/resinator/utils.zig @@ -0,0 +1,124 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// Like std.io.FixedBufferStream but does no bounds checking +pub const UncheckedSliceWriter = struct { + const Self = @This(); + + pos: usize = 0, + slice: []u8, + + pub fn write(self: *Self, char: u8) void { + self.slice[self.pos] = char; + self.pos += 1; + } + + pub fn writeSlice(self: *Self, slice: []const u8) void { + for (slice) |c| { + self.write(c); + } + } + + pub fn getWritten(self: Self) []u8 { + return self.slice[0..self.pos]; + } +}; + +/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if +/// a directory is attempted to be opened. +/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed. +pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File { + const file = try cwd.openFile(path, flags); + errdefer file.close(); + // https://github.com/ziglang/zig/issues/5732 + if (builtin.os.tag != .windows) { + const stat = try file.stat(); + + if (stat.kind == .directory) + return error.IsDir; + } + return file; +} + +/// Emulates the Windows implementation of `iswdigit`, but only returns true +/// for the non-ASCII digits that `iswdigit` on Windows would return true for. +pub fn isNonAsciiDigit(c: u21) bool { + return switch (c) { + '²', + '³', + '¹', + '\u{660}'...'\u{669}', + '\u{6F0}'...'\u{6F9}', + '\u{7C0}'...'\u{7C9}', + '\u{966}'...'\u{96F}', + '\u{9E6}'...'\u{9EF}', + '\u{A66}'...'\u{A6F}', + '\u{AE6}'...'\u{AEF}', + '\u{B66}'...'\u{B6F}', + '\u{BE6}'...'\u{BEF}', + '\u{C66}'...'\u{C6F}', + '\u{CE6}'...'\u{CEF}', + '\u{D66}'...'\u{D6F}', + '\u{E50}'...'\u{E59}', + '\u{ED0}'...'\u{ED9}', + '\u{F20}'...'\u{F29}', + '\u{1040}'...'\u{1049}', + '\u{1090}'...'\u{1099}', + '\u{17E0}'...'\u{17E9}', + '\u{1810}'...'\u{1819}', + '\u{1946}'...'\u{194F}', + '\u{19D0}'...'\u{19D9}', + '\u{1B50}'...'\u{1B59}', + '\u{1BB0}'...'\u{1BB9}', + '\u{1C40}'...'\u{1C49}', + '\u{1C50}'...'\u{1C59}', + '\u{A620}'...'\u{A629}', + '\u{A8D0}'...'\u{A8D9}', + '\u{A900}'...'\u{A909}', + '\u{AA50}'...'\u{AA59}', + '\u{FF10}'...'\u{FF19}', + => true, + else => false, + }; +} + +pub const ErrorMessageType = enum { err, warning, note }; + +/// Used for generic colored errors/warnings/notes, more context-specific error messages +/// are handled elsewhere. +pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, msg_type: ErrorMessageType, comptime format: []const u8, args: anytype) !void { + switch (msg_type) { + .err => { + try config.setColor(writer, .bold); + try config.setColor(writer, .red); + try writer.writeAll("error: "); + }, + .warning => { + try config.setColor(writer, .bold); + try config.setColor(writer, .yellow); + try writer.writeAll("warning: "); + }, + .note => { + try config.setColor(writer, .reset); + try config.setColor(writer, .cyan); + try writer.writeAll("note: "); + }, + } + try config.setColor(writer, .reset); + if (msg_type == .err) { + try config.setColor(writer, .bold); + } + try writer.print(format, args); + try writer.writeByte('\n'); + try config.setColor(writer, .reset); +} + +pub fn isLineEndingPair(first: u8, second: u8) bool { + if (first != '\r' and first != '\n') return false; + if (second != '\r' and second != '\n') return false; + + // can't be \n\n or \r\r + if (first == second) return false; + + return true; +} diff --git a/src/resinator/windows1252.zig b/lib/compiler/resinator/windows1252.zig diff --git a/lib/std/zig/ErrorBundle.zig b/lib/std/zig/ErrorBundle.zig @@ -433,7 +433,9 @@ pub const Wip = struct { // The ensureUnusedCapacity call above guarantees this. const notes_start = wip.reserveNotes(@intCast(other_list.len)) catch unreachable; for (notes_start.., other_list) |note, message| { - wip.extra.items[note] = @intFromEnum(wip.addOtherMessage(other, message) catch unreachable); + // This line can cause `wip.extra.items` to be resized. + const note_index = @intFromEnum(wip.addOtherMessage(other, message) catch unreachable); + wip.extra.items[note] = note_index; } } @@ -522,7 +524,8 @@ pub const Wip = struct { }; const loc = std.zig.findLineColumn(source, span.main); - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + // This line can cause `wip.extra.items` to be resized. + const note_index = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.addString(msg), .src_loc = try eb.addSourceLocation(.{ .src_path = try eb.addString(src_path), @@ -538,6 +541,7 @@ pub const Wip = struct { }), .notes_len = 0, // TODO rework this function to be recursive })); + eb.extra.items[note_i] = note_index; } } } @@ -567,6 +571,28 @@ pub const Wip = struct { if (index == .none) return .none; const other_sl = other.getSourceLocation(index); + var ref_traces: std.ArrayListUnmanaged(ReferenceTrace) = .{}; + defer ref_traces.deinit(wip.gpa); + + if (other_sl.reference_trace_len > 0) { + var ref_index = other.extraData(SourceLocation, @intFromEnum(index)).end; + for (0..other_sl.reference_trace_len) |_| { + const other_ref_trace_ed = other.extraData(ReferenceTrace, ref_index); + const other_ref_trace = other_ref_trace_ed.data; + ref_index = other_ref_trace_ed.end; + + const ref_trace: ReferenceTrace = if (other_ref_trace.src_loc == .none) .{ + // sentinel ReferenceTrace does not store a string index in decl_name + .decl_name = other_ref_trace.decl_name, + .src_loc = .none, + } else .{ + .decl_name = try wip.addString(other.nullTerminatedString(other_ref_trace.decl_name)), + .src_loc = try wip.addOtherSourceLocation(other, other_ref_trace.src_loc), + }; + try ref_traces.append(wip.gpa, ref_trace); + } + } + const src_loc = try wip.addSourceLocation(.{ .src_path = try wip.addString(other.nullTerminatedString(other_sl.src_path)), .line = other_sl.line, @@ -581,7 +607,9 @@ pub const Wip = struct { .reference_trace_len = other_sl.reference_trace_len, }); - // TODO: also add the reference trace + for (ref_traces.items) |ref_trace| { + try wip.addReferenceTrace(ref_trace); + } return src_loc; } @@ -615,3 +643,95 @@ pub const Wip = struct { } } }; + +test "addBundleAsRoots" { + var bundle = bundle: { + var wip: ErrorBundle.Wip = undefined; + try wip.init(std.testing.allocator); + errdefer wip.deinit(); + + var ref_traces: [3]ReferenceTrace = undefined; + for (&ref_traces, 0..) |*ref_trace, i| { + if (i == ref_traces.len - 1) { + // sentinel reference trace + ref_trace.* = .{ + .decl_name = 3, // signifies 3 hidden references + .src_loc = .none, + }; + } else { + ref_trace.* = .{ + .decl_name = try wip.addString("foo"), + .src_loc = try wip.addSourceLocation(.{ + .src_path = try wip.addString("foo"), + .line = 1, + .column = 2, + .span_start = 3, + .span_main = 4, + .span_end = 5, + .source_line = 0, + }), + }; + } + } + + const src_loc = try wip.addSourceLocation(.{ + .src_path = try wip.addString("foo"), + .line = 1, + .column = 2, + .span_start = 3, + .span_main = 4, + .span_end = 5, + .source_line = try wip.addString("some source code"), + .reference_trace_len = ref_traces.len, + }); + for (&ref_traces) |ref_trace| { + try wip.addReferenceTrace(ref_trace); + } + + try wip.addRootErrorMessage(ErrorMessage{ + .msg = try wip.addString("hello world"), + .src_loc = src_loc, + .notes_len = 1, + }); + const i = try wip.reserveNotes(1); + const note_index = @intFromEnum(wip.addErrorMessageAssumeCapacity(.{ + .msg = try wip.addString("this is a note"), + .src_loc = try wip.addSourceLocation(.{ + .src_path = try wip.addString("bar"), + .line = 1, + .column = 2, + .span_start = 3, + .span_main = 4, + .span_end = 5, + .source_line = try wip.addString("another line of source"), + }), + })); + wip.extra.items[i] = note_index; + + break :bundle try wip.toOwnedBundle(""); + }; + defer bundle.deinit(std.testing.allocator); + + const ttyconf: std.io.tty.Config = .no_color; + + var bundle_buf = std.ArrayList(u8).init(std.testing.allocator); + defer bundle_buf.deinit(); + try bundle.renderToWriter(.{ .ttyconf = ttyconf }, bundle_buf.writer()); + + var copy = copy: { + var wip: ErrorBundle.Wip = undefined; + try wip.init(std.testing.allocator); + errdefer wip.deinit(); + + try wip.addBundleAsRoots(bundle); + + break :copy try wip.toOwnedBundle(""); + }; + defer copy.deinit(std.testing.allocator); + + var copy_buf = std.ArrayList(u8).init(std.testing.allocator); + defer copy_buf.deinit(); + try copy.renderToWriter(.{ .ttyconf = ttyconf }, copy_buf.writer()); + + try std.testing.expectEqualStrings(bundle_buf.items, copy_buf.items); +} diff --git a/src/Compilation.zig b/src/Compilation.zig @@ -36,7 +36,6 @@ const Cache = std.Build.Cache; const c_codegen = @import("codegen/c.zig"); const libtsan = @import("libtsan.zig"); const Zir = std.zig.Zir; -const resinator = @import("resinator.zig"); const Builtin = @import("Builtin.zig"); const LlvmObject = @import("codegen/llvm.zig").Object; @@ -174,7 +173,7 @@ local_cache_directory: Directory, global_cache_directory: Directory, libc_include_dir_list: []const []const u8, libc_framework_dir_list: []const []const u8, -rc_include_dir_list: []const []const u8, +rc_includes: RcIncludes, thread_pool: *ThreadPool, /// Populated when we build the libc++ static library. A Job to build this is placed in the queue @@ -1243,68 +1242,6 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil options.libc_installation, ); - // The include directories used when preprocessing .rc files are separate from the - // target. Which include directories are used is determined by `options.rc_includes`. - // - // Note: It should be okay that the include directories used when compiling .rc - // files differ from the include directories used when compiling the main - // binary, since the .res format is not dependent on anything ABI-related. The - // only relevant differences would be things like `#define` constants being - // different in the MinGW headers vs the MSVC headers, but any such - // differences would likely be a MinGW bug. - const rc_dirs: std.zig.LibCDirs = b: { - // Set the includes to .none here when there are no rc files to compile - var includes = if (options.rc_source_files.len > 0) options.rc_includes else .none; - const target = options.root_mod.resolved_target.result; - if (!options.root_mod.resolved_target.is_native_os or target.os.tag != .windows) { - switch (includes) { - // MSVC can't be found when the host isn't Windows, so short-circuit. - .msvc => return error.WindowsSdkNotFound, - // Skip straight to gnu since we won't be able to detect - // MSVC on non-Windows hosts. - .any => includes = .gnu, - .none, .gnu => {}, - } - } - while (true) switch (includes) { - .any, .msvc => break :b std.zig.LibCDirs.detect( - arena, - options.zig_lib_directory.path.?, - .{ - .cpu = target.cpu, - .os = target.os, - .abi = .msvc, - .ofmt = target.ofmt, - }, - options.root_mod.resolved_target.is_native_abi, - // The .rc preprocessor will need to know the libc include dirs even if we - // are not linking libc, so force 'link_libc' to true - true, - options.libc_installation, - ) catch |err| { - if (includes == .any) { - // fall back to mingw - includes = .gnu; - continue; - } - return err; - }, - .gnu => break :b try std.zig.LibCDirs.detectFromBuilding(arena, options.zig_lib_directory.path.?, .{ - .cpu = target.cpu, - .os = target.os, - .abi = .gnu, - .ofmt = target.ofmt, - }), - .none => break :b .{ - .libc_include_dir_list = &[0][]u8{}, - .libc_installation = null, - .libc_framework_dir_list = &.{}, - .sysroot = null, - .darwin_sdk_layout = null, - }, - }; - }; - const sysroot = options.sysroot orelse libc_dirs.sysroot; const include_compiler_rt = options.want_compiler_rt orelse @@ -1492,7 +1429,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil .self_exe_path = options.self_exe_path, .libc_include_dir_list = libc_dirs.libc_include_dir_list, .libc_framework_dir_list = libc_dirs.libc_framework_dir_list, - .rc_include_dir_list = rc_dirs.libc_include_dir_list, + .rc_includes = options.rc_includes, .thread_pool = options.thread_pool, .clang_passthrough_mode = options.clang_passthrough_mode, .clang_preprocessor_mode = options.clang_preprocessor_mode, @@ -2506,7 +2443,7 @@ fn addNonIncrementalStuffToCacheManifest( man.hash.add(comp.link_eh_frame_hdr); man.hash.add(comp.skip_linker_dependencies); man.hash.add(comp.include_compiler_rt); - man.hash.addListOfBytes(comp.rc_include_dir_list); + man.hash.add(comp.rc_includes); man.hash.addListOfBytes(comp.force_undefined_symbols.keys()); man.hash.addListOfBytes(comp.framework_dirs); try link.hashAddSystemLibs(man, comp.system_libs); @@ -4172,7 +4109,7 @@ pub fn obtainCObjectCacheManifest( pub fn obtainWin32ResourceCacheManifest(comp: *const Compilation) Cache.Manifest { var man = comp.cache_parent.obtain(); - man.hash.addListOfBytes(comp.rc_include_dir_list); + man.hash.add(comp.rc_includes); return man; } @@ -4812,11 +4749,12 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P } fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32_resource_prog_node: *std.Progress.Node) !void { - if (!build_options.have_llvm) { - return comp.failWin32Resource(win32_resource, "clang not available: compiler built without LLVM extensions", .{}); + if (!std.process.can_spawn) { + return comp.failWin32Resource(win32_resource, "{s} does not support spawning a child process", .{@tagName(builtin.os.tag)}); } + const self_exe_path = comp.self_exe_path orelse - return comp.failWin32Resource(win32_resource, "clang compilation disabled", .{}); + return comp.failWin32Resource(win32_resource, "unable to find self exe path", .{}); const tracy_trace = trace(@src()); defer tracy_trace.end(); @@ -4856,6 +4794,7 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 if (win32_resource.src == .manifest) { _ = try man.addFile(src_path, null); + const rc_basename = try std.fmt.allocPrint(arena, "{s}.rc", .{src_basename}); const res_basename = try std.fmt.allocPrint(arena, "{s}.res", .{src_basename}); const digest = if (try man.hit()) man.final() else blk: { @@ -4867,17 +4806,12 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 var o_dir = try comp.local_cache_directory.handle.makeOpenPath(o_sub_path, .{}); defer o_dir.close(); - var output_file = o_dir.createFile(res_basename, .{}) catch |err| { - const output_file_path = try comp.local_cache_directory.join(arena, &.{ o_sub_path, res_basename }); - return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ output_file_path, @errorName(err) }); - }; - var output_file_closed = false; - defer if (!output_file_closed) output_file.close(); - - var diagnostics = resinator.errors.Diagnostics.init(arena); - defer diagnostics.deinit(); - - var output_buffered_stream = std.io.bufferedWriter(output_file.writer()); + const in_rc_path = try comp.local_cache_directory.join(comp.gpa, &.{ + o_sub_path, rc_basename, + }); + const out_res_path = try comp.local_cache_directory.join(comp.gpa, &.{ + o_sub_path, res_basename, + }); // In .rc files, a " within a quoted string is escaped as "" const fmtRcEscape = struct { @@ -4899,28 +4833,24 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 // 1 is CREATEPROCESS_MANIFEST_RESOURCE_ID which is the default ID used for RT_MANIFEST resources // 24 is RT_MANIFEST const input = try std.fmt.allocPrint(arena, "1 24 \"{s}\"", .{fmtRcEscape(src_path)}); + try o_dir.writeFile(rc_basename, input); + + var argv = std.ArrayList([]const u8).init(comp.gpa); + defer argv.deinit(); + + try argv.appendSlice(&.{ + self_exe_path, + "rc", + "--zig-integration", + "/:no-preprocess", + "/x", // ignore INCLUDE environment variable + "/c65001", // UTF-8 codepage + "/:auto-includes", + "none", + }); + try argv.appendSlice(&.{ "--", in_rc_path, out_res_path }); - resinator.compile.compile(arena, input, output_buffered_stream.writer(), .{ - .cwd = std.fs.cwd(), - .diagnostics = &diagnostics, - .ignore_include_env_var = true, - .default_code_page = .utf8, - }) catch |err| switch (err) { - error.ParseError, error.CompileError => { - // Delete the output file on error - output_file.close(); - output_file_closed = true; - // Failing to delete is not really a big deal, so swallow any errors - o_dir.deleteFile(res_basename) catch { - const output_file_path = try comp.local_cache_directory.join(arena, &.{ o_sub_path, res_basename }); - log.warn("failed to delete '{s}': {s}", .{ output_file_path, @errorName(err) }); - }; - return comp.failWin32ResourceCompile(win32_resource, input, &diagnostics, null); - }, - else => |e| return e, - }; - - try output_buffered_stream.flush(); + try spawnZigRc(comp, win32_resource, src_basename, arena, argv.items, &child_progress_node); break :blk digest; }; @@ -4951,9 +4881,6 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 const rc_basename_noext = src_basename[0 .. src_basename.len - std.fs.path.extension(src_basename).len]; const digest = if (try man.hit()) man.final() else blk: { - const rcpp_filename = try std.fmt.allocPrint(arena, "{s}.rcpp", .{rc_basename_noext}); - - const out_rcpp_path = try comp.tmpFilePath(arena, rcpp_filename); var zig_cache_tmp_dir = try comp.local_cache_directory.handle.makeOpenPath("tmp", .{}); defer zig_cache_tmp_dir.close(); @@ -4963,193 +4890,66 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 // so we need a temporary filename. const out_res_path = try comp.tmpFilePath(arena, res_filename); - var options = options: { - var resinator_args = try std.ArrayListUnmanaged([]const u8).initCapacity(comp.gpa, rc_src.extra_flags.len + 4); - defer resinator_args.deinit(comp.gpa); - - resinator_args.appendAssumeCapacity(""); // dummy 'process name' arg - resinator_args.appendSliceAssumeCapacity(rc_src.extra_flags); - resinator_args.appendSliceAssumeCapacity(&.{ "--", out_rcpp_path, out_res_path }); - - var cli_diagnostics = resinator.cli.Diagnostics.init(comp.gpa); - defer cli_diagnostics.deinit(); - const options = resinator.cli.parse(comp.gpa, resinator_args.items, &cli_diagnostics) catch |err| switch (err) { - error.ParseError => { - return comp.failWin32ResourceCli(win32_resource, &cli_diagnostics); - }, - else => |e| return e, - }; - break :options options; - }; - defer options.deinit(); - - // We never want to read the INCLUDE environment variable, so - // unconditionally set `ignore_include_env_var` to true - options.ignore_include_env_var = true; - - if (options.preprocess != .yes) { - return comp.failWin32Resource(win32_resource, "the '{s}' option is not supported in this context", .{switch (options.preprocess) { - .no => "/:no-preprocess", - .only => "/p", - .yes => unreachable, - }}); - } - var argv = std.ArrayList([]const u8).init(comp.gpa); defer argv.deinit(); - try argv.appendSlice(&[_][]const u8{ self_exe_path, "clang" }); - - try resinator.preprocess.appendClangArgs(arena, &argv, options, .{ - .clang_target = null, // handled by addCCArgs - .system_include_paths = &.{}, // handled by addCCArgs - .needs_gnu_workaround = comp.getTarget().isGnu(), - .nostdinc = false, // handled by addCCArgs + const depfile_filename = try std.fmt.allocPrint(arena, "{s}.d.json", .{rc_basename_noext}); + const out_dep_path = try comp.tmpFilePath(arena, depfile_filename); + try argv.appendSlice(&.{ + self_exe_path, + "rc", + "--zig-integration", + "/:depfile", + out_dep_path, + "/:depfile-fmt", + "json", + "/x", // ignore INCLUDE environment variable + "/:auto-includes", + @tagName(comp.rc_includes), }); - - try argv.append(rc_src.src_path); - try argv.appendSlice(&[_][]const u8{ - "-o", - out_rcpp_path, - }); - - const out_dep_path = try std.fmt.allocPrint(arena, "{s}.d", .{out_rcpp_path}); - // Note: addCCArgs will implicitly add _DEBUG/NDEBUG depending on the optimization - // mode. While these defines are not normally present when calling rc.exe directly, + // While these defines are not normally present when calling rc.exe directly, // them being defined matches the behavior of how MSVC calls rc.exe which is the more // relevant behavior in this case. - try comp.addCCArgs(arena, &argv, .rc, out_dep_path, rc_src.owner); - - if (comp.verbose_cc) { - dump_argv(argv.items); + switch (rc_src.owner.optimize_mode) { + .Debug => try argv.append("-D_DEBUG"), + .ReleaseSafe => {}, + .ReleaseFast, .ReleaseSmall => try argv.append("-DNDEBUG"), } + try argv.appendSlice(rc_src.extra_flags); + try argv.appendSlice(&.{ "--", rc_src.src_path, out_res_path }); - if (std.process.can_spawn) { - var child = std.ChildProcess.init(argv.items, arena); - child.stdin_behavior = .Ignore; - child.stdout_behavior = .Ignore; - child.stderr_behavior = .Pipe; - - try child.spawn(); - - const stderr_reader = child.stderr.?.reader(); - - const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024); - - const term = child.wait() catch |err| { - return comp.failWin32Resource(win32_resource, "unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) }); - }; + try spawnZigRc(comp, win32_resource, src_basename, arena, argv.items, &child_progress_node); - switch (term) { - .Exited => |code| { - if (code != 0) { - // TODO parse clang stderr and turn it into an error message - // and then call failCObjWithOwnedErrorMsg - log.err("clang preprocessor failed with stderr:\n{s}", .{stderr}); - return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{code}); - } - }, - else => { - log.err("clang preprocessor terminated with stderr:\n{s}", .{stderr}); - return comp.failWin32Resource(win32_resource, "clang preprocessor terminated unexpectedly", .{}); - }, - } - } else { - const exit_code = try clangMain(arena, argv.items); - if (exit_code != 0) { - return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{exit_code}); - } - } - - const dep_basename = std.fs.path.basename(out_dep_path); - // Add the files depended on to the cache system. - try man.addDepFilePost(zig_cache_tmp_dir, dep_basename); - switch (comp.cache_use) { - .whole => |whole| if (whole.cache_manifest) |whole_cache_manifest| { - whole.cache_manifest_mutex.lock(); - defer whole.cache_manifest_mutex.unlock(); - try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename); - }, - .incremental => {}, - } - // Just to save disk space, we delete the file because it is never needed again. - zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| { - log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) }); - }; - - const full_input = std.fs.cwd().readFileAlloc(arena, out_rcpp_path, std.math.maxInt(usize)) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| { - return comp.failWin32Resource(win32_resource, "failed to read preprocessed file '{s}': {s}", .{ out_rcpp_path, @errorName(e) }); - }, - }; - - var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(arena, full_input, full_input, .{ .initial_filename = rc_src.src_path }); - defer mapping_results.mappings.deinit(arena); - - const final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings); - - var output_file = zig_cache_tmp_dir.createFile(out_res_path, .{}) catch |err| { - return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ out_res_path, @errorName(err) }); - }; - var output_file_closed = false; - defer if (!output_file_closed) output_file.close(); - - var diagnostics = resinator.errors.Diagnostics.init(arena); - defer diagnostics.deinit(); + // Read depfile and update cache manifest + { + const dep_basename = std.fs.path.basename(out_dep_path); + const dep_file_contents = try zig_cache_tmp_dir.readFileAlloc(arena, dep_basename, 50 * 1024 * 1024); + defer arena.free(dep_file_contents); - var dependencies_list = std.ArrayList([]const u8).init(comp.gpa); - defer { - for (dependencies_list.items) |item| { - comp.gpa.free(item); + const value = try std.json.parseFromSliceLeaky(std.json.Value, arena, dep_file_contents, .{}); + if (value != .array) { + return comp.failWin32Resource(win32_resource, "depfile from zig rc has unexpected format", .{}); } - dependencies_list.deinit(); - } - var output_buffered_stream = std.io.bufferedWriter(output_file.writer()); - - resinator.compile.compile(arena, final_input, output_buffered_stream.writer(), .{ - .cwd = std.fs.cwd(), - .diagnostics = &diagnostics, - .source_mappings = &mapping_results.mappings, - .dependencies_list = &dependencies_list, - .system_include_paths = comp.rc_include_dir_list, - .ignore_include_env_var = true, - // options - .extra_include_paths = options.extra_include_paths.items, - .default_language_id = options.default_language_id, - .default_code_page = options.default_code_page orelse .windows1252, - .verbose = options.verbose, - .null_terminate_string_table_strings = options.null_terminate_string_table_strings, - .max_string_literal_codepoints = options.max_string_literal_codepoints, - .silent_duplicate_control_ids = options.silent_duplicate_control_ids, - .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, - }) catch |err| switch (err) { - error.ParseError, error.CompileError => { - // Delete the output file on error - output_file.close(); - output_file_closed = true; - // Failing to delete is not really a big deal, so swallow any errors - zig_cache_tmp_dir.deleteFile(out_res_path) catch { - log.warn("failed to delete '{s}': {s}", .{ out_res_path, @errorName(err) }); - }; - return comp.failWin32ResourceCompile(win32_resource, final_input, &diagnostics, mapping_results.mappings); - }, - else => |e| return e, - }; - - try output_buffered_stream.flush(); - - for (dependencies_list.items) |dep_file_path| { - try man.addFilePost(dep_file_path); - switch (comp.cache_use) { - .whole => |whole| if (whole.cache_manifest) |whole_cache_manifest| { - whole.cache_manifest_mutex.lock(); - defer whole.cache_manifest_mutex.unlock(); - try whole_cache_manifest.addFilePost(dep_file_path); - }, - .incremental => {}, + for (value.array.items) |element| { + if (element != .string) { + return comp.failWin32Resource(win32_resource, "depfile from zig rc has unexpected format", .{}); + } + const dep_file_path = element.string; + try man.addFilePost(dep_file_path); + switch (comp.cache_use) { + .whole => |whole| if (whole.cache_manifest) |whole_cache_manifest| { + whole.cache_manifest_mutex.lock(); + defer whole.cache_manifest_mutex.unlock(); + try whole_cache_manifest.addFilePost(dep_file_path); + }, + .incremental => {}, + } } + // Just to save disk space, we delete the file because it is never needed again. + zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| { + log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) }); + }; } // Rename into place. @@ -5159,8 +4959,6 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 defer o_dir.close(); const tmp_basename = std.fs.path.basename(out_res_path); try std.fs.rename(zig_cache_tmp_dir, tmp_basename, o_dir, res_filename); - const tmp_rcpp_basename = std.fs.path.basename(out_rcpp_path); - try std.fs.rename(zig_cache_tmp_dir, tmp_rcpp_basename, o_dir, rcpp_filename); break :blk digest; }; @@ -5186,6 +4984,106 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32 }; } +fn spawnZigRc( + comp: *Compilation, + win32_resource: *Win32Resource, + src_basename: []const u8, + arena: Allocator, + argv: []const []const u8, + child_progress_node: *std.Progress.Node, +) !void { + var node_name: std.ArrayListUnmanaged(u8) = .{}; + defer node_name.deinit(arena); + + var child = std.ChildProcess.init(argv, arena); + child.stdin_behavior = .Ignore; + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Pipe; + + child.spawn() catch |err| { + return comp.failWin32Resource(win32_resource, "unable to spawn {s} rc: {s}", .{ argv[0], @errorName(err) }); + }; + + var poller = std.io.poll(comp.gpa, enum { stdout }, .{ + .stdout = child.stdout.?, + }); + defer poller.deinit(); + + const stdout = poller.fifo(.stdout); + + poll: while (true) { + while (stdout.readableLength() < @sizeOf(std.zig.Server.Message.Header)) { + if (!(try poller.poll())) break :poll; + } + const header = stdout.reader().readStruct(std.zig.Server.Message.Header) catch unreachable; + while (stdout.readableLength() < header.bytes_len) { + if (!(try poller.poll())) break :poll; + } + const body = stdout.readableSliceOfLen(header.bytes_len); + + switch (header.tag) { + // We expect exactly one ErrorBundle, and if any error_bundle header is + // sent then it's a fatal error. + .error_bundle => { + const EbHdr = std.zig.Server.Message.ErrorBundle; + const eb_hdr = @as(*align(1) const EbHdr, @ptrCast(body)); + const extra_bytes = + body[@sizeOf(EbHdr)..][0 .. @sizeOf(u32) * eb_hdr.extra_len]; + const string_bytes = + body[@sizeOf(EbHdr) + extra_bytes.len ..][0..eb_hdr.string_bytes_len]; + const unaligned_extra = std.mem.bytesAsSlice(u32, extra_bytes); + const extra_array = try comp.gpa.alloc(u32, unaligned_extra.len); + @memcpy(extra_array, unaligned_extra); + const error_bundle = std.zig.ErrorBundle{ + .string_bytes = try comp.gpa.dupe(u8, string_bytes), + .extra = extra_array, + }; + return comp.failWin32ResourceWithOwnedBundle(win32_resource, error_bundle); + }, + .progress => { + node_name.clearRetainingCapacity(); + // <resinator> is a special string that indicates that the child + // process has reached resinator's main function + if (std.mem.eql(u8, body, "<resinator>")) { + child_progress_node.setName(src_basename); + } + // Ignore 0-length strings since if multiple zig rc commands + // are executed at the same time, only one will send progress strings + // while the other(s) will send empty strings. + else if (body.len > 0) { + try node_name.appendSlice(arena, "build 'zig rc'... "); + try node_name.appendSlice(arena, body); + child_progress_node.setName(node_name.items); + } + }, + else => {}, // ignore other messages + } + + stdout.discard(body.len); + } + + // Just in case there's a failure that didn't send an ErrorBundle (e.g. an error return trace) + const stderr_reader = child.stderr.?.reader(); + const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024); + + const term = child.wait() catch |err| { + return comp.failWin32Resource(win32_resource, "unable to wait for {s} rc: {s}", .{ argv[0], @errorName(err) }); + }; + + switch (term) { + .Exited => |code| { + if (code != 0) { + log.err("zig rc failed with stderr:\n{s}", .{stderr}); + return comp.failWin32Resource(win32_resource, "zig rc exited with code {d}", .{code}); + } + }, + else => { + log.err("zig rc terminated with stderr:\n{s}", .{stderr}); + return comp.failWin32Resource(win32_resource, "zig rc terminated unexpectedly", .{}); + }, + } +} + pub fn tmpFilePath(comp: *Compilation, ally: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 { const s = std.fs.path.sep_str; const rand_int = std.crypto.random.int(u64); @@ -5352,16 +5250,9 @@ pub fn addCCArgs( try argv.append("-isystem"); try argv.append(c_headers_dir); - if (ext == .rc) { - for (comp.rc_include_dir_list) |include_dir| { - try argv.append("-isystem"); - try argv.append(include_dir); - } - } else { - for (comp.libc_include_dir_list) |include_dir| { - try argv.append("-isystem"); - try argv.append(include_dir); - } + for (comp.libc_include_dir_list) |include_dir| { + try argv.append("-isystem"); + try argv.append(include_dir); } if (target.cpu.model.llvm_name) |llvm_name| { @@ -5726,167 +5617,6 @@ fn failWin32ResourceWithOwnedBundle( return error.AnalysisFail; } -fn failWin32ResourceCli( - comp: *Compilation, - win32_resource: *Win32Resource, - diagnostics: *resinator.cli.Diagnostics, -) SemaError { - @setCold(true); - - var bundle: ErrorBundle.Wip = undefined; - try bundle.init(comp.gpa); - errdefer bundle.deinit(); - - try bundle.addRootErrorMessage(.{ - .msg = try bundle.addString("invalid command line option(s)"), - .src_loc = try bundle.addSourceLocation(.{ - .src_path = try bundle.addString(switch (win32_resource.src) { - .rc => |rc_src| rc_src.src_path, - .manifest => |manifest_src| manifest_src, - }), - .line = 0, - .column = 0, - .span_start = 0, - .span_main = 0, - .span_end = 0, - }), - }); - - var cur_err: ?ErrorBundle.ErrorMessage = null; - var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; - defer cur_notes.deinit(comp.gpa); - for (diagnostics.errors.items) |err_details| { - switch (err_details.type) { - .err => { - if (cur_err) |err| { - try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); - } - cur_err = .{ - .msg = try bundle.addString(err_details.msg.items), - }; - cur_notes.clearRetainingCapacity(); - }, - .warning => cur_err = null, - .note => { - if (cur_err == null) continue; - cur_err.?.notes_len += 1; - try cur_notes.append(comp.gpa, .{ - .msg = try bundle.addString(err_details.msg.items), - }); - }, - } - } - if (cur_err) |err| { - try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); - } - - const finished_bundle = try bundle.toOwnedBundle(""); - return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); -} - -fn failWin32ResourceCompile( - comp: *Compilation, - win32_resource: *Win32Resource, - source: []const u8, - diagnostics: *resinator.errors.Diagnostics, - opt_mappings: ?resinator.source_mapping.SourceMappings, -) SemaError { - @setCold(true); - - var bundle: ErrorBundle.Wip = undefined; - try bundle.init(comp.gpa); - errdefer bundle.deinit(); - - var msg_buf: std.ArrayListUnmanaged(u8) = .{}; - defer msg_buf.deinit(comp.gpa); - var cur_err: ?ErrorBundle.ErrorMessage = null; - var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{}; - defer cur_notes.deinit(comp.gpa); - for (diagnostics.errors.items) |err_details| { - switch (err_details.type) { - .hint => continue, - // Clear the current error so that notes don't bleed into unassociated errors - .warning => { - cur_err = null; - continue; - }, - .note => if (cur_err == null) continue, - .err => {}, - } - const err_line, const err_filename = blk: { - if (opt_mappings) |mappings| { - const corresponding_span = mappings.get(err_details.token.line_number); - const corresponding_file = mappings.files.get(corresponding_span.filename_offset); - const err_line = corresponding_span.start_line; - break :blk .{ err_line, corresponding_file }; - } else { - break :blk .{ err_details.token.line_number, "<generated rc>" }; - } - }; - - const source_line_start = err_details.token.getLineStart(source); - const column = err_details.token.calculateColumn(source, 1, source_line_start); - - msg_buf.clearRetainingCapacity(); - try err_details.render(msg_buf.writer(comp.gpa), source, diagnostics.strings.items); - - const src_loc = src_loc: { - var src_loc: ErrorBundle.SourceLocation = .{ - .src_path = try bundle.addString(err_filename), - .line = @intCast(err_line - 1), // 1-based -> 0-based - .column = @intCast(column), - .span_start = 0, - .span_main = 0, - .span_end = 0, - }; - if (err_details.print_source_line) { - const source_line = err_details.token.getLine(source, source_line_start); - const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); - src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len); - src_loc.span_main = @intCast(visual_info.point_offset); - src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len); - src_loc.source_line = try bundle.addString(source_line); - } - break :src_loc try bundle.addSourceLocation(src_loc); - }; - - switch (err_details.type) { - .err => { - if (cur_err) |err| { - try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); - } - cur_err = .{ - .msg = try bundle.addString(msg_buf.items), - .src_loc = src_loc, - }; - cur_notes.clearRetainingCapacity(); - }, - .note => { - cur_err.?.notes_len += 1; - try cur_notes.append(comp.gpa, .{ - .msg = try bundle.addString(msg_buf.items), - .src_loc = src_loc, - }); - }, - .warning, .hint => unreachable, - } - } - if (cur_err) |err| { - try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items); - } - - const finished_bundle = try bundle.toOwnedBundle(""); - return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle); -} - -fn win32ResourceFlushErrorMessage(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void { - try wip.addRootErrorMessage(msg); - const notes_start = try wip.reserveNotes(@intCast(notes.len)); - for (notes_start.., notes) |i, note| { - wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note)); - } -} - pub const FileExt = enum { c, cpp, diff --git a/src/main.zig b/src/main.zig @@ -291,7 +291,14 @@ fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { } else if (mem.eql(u8, cmd, "translate-c")) { return buildOutputType(gpa, arena, args, .translate_c); } else if (mem.eql(u8, cmd, "rc")) { - return cmdRc(gpa, arena, args[1..]); + const use_server = cmd_args.len > 0 and std.mem.eql(u8, cmd_args[0], "--zig-integration"); + return jitCmd(gpa, arena, cmd_args, .{ + .cmd_name = "resinator", + .root_src_path = "resinator/main.zig", + .depend_on_aro = true, + .prepend_zig_lib_dir_path = true, + .server = use_server, + }); } else if (mem.eql(u8, cmd, "fmt")) { return jitCmd(gpa, arena, cmd_args, .{ .cmd_name = "fmt", @@ -4625,276 +4632,6 @@ fn cmdTranslateC(comp: *Compilation, arena: Allocator, fancy_output: ?*Compilati } } -fn cmdRc(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { - const resinator = @import("resinator.zig"); - - const stderr = std.io.getStdErr(); - const stderr_config = std.io.tty.detectConfig(stderr); - - var options = options: { - var cli_diagnostics = resinator.cli.Diagnostics.init(gpa); - defer cli_diagnostics.deinit(); - var options = resinator.cli.parse(gpa, args, &cli_diagnostics) catch |err| switch (err) { - error.ParseError => { - cli_diagnostics.renderToStdErr(args, stderr_config); - process.exit(1); - }, - else => |e| return e, - }; - try options.maybeAppendRC(std.fs.cwd()); - - // print any warnings/notes - cli_diagnostics.renderToStdErr(args, stderr_config); - // If there was something printed, then add an extra newline separator - // so that there is a clear separation between the cli diagnostics and whatever - // gets printed after - if (cli_diagnostics.errors.items.len > 0) { - std.debug.print("\n", .{}); - } - break :options options; - }; - defer options.deinit(); - - if (options.print_help_and_exit) { - try resinator.cli.writeUsage(stderr.writer(), "zig rc"); - return; - } - - const stdout_writer = std.io.getStdOut().writer(); - if (options.verbose) { - try options.dumpVerbose(stdout_writer); - try stdout_writer.writeByte('\n'); - } - - const full_input = full_input: { - if (options.preprocess != .no) { - if (!build_options.have_llvm) { - fatal("clang not available: compiler built without LLVM extensions", .{}); - } - - var argv = std.ArrayList([]const u8).init(gpa); - defer argv.deinit(); - - const self_exe_path = try introspect.findZigExePath(arena); - var zig_lib_directory = introspect.findZigLibDirFromSelfExe(arena, self_exe_path) catch |err| { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to find zig installation directory: {s}", .{@errorName(err)}); - process.exit(1); - }; - defer zig_lib_directory.handle.close(); - - const include_args = detectRcIncludeDirs(arena, zig_lib_directory.path.?, options.auto_includes) catch |err| { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to detect system include directories: {s}", .{@errorName(err)}); - process.exit(1); - }; - - try argv.appendSlice(&[_][]const u8{ self_exe_path, "clang" }); - - const clang_target = clang_target: { - if (include_args.target_abi) |abi| { - break :clang_target try std.fmt.allocPrint(arena, "x86_64-unknown-windows-{s}", .{abi}); - } - break :clang_target "x86_64-unknown-windows"; - }; - try resinator.preprocess.appendClangArgs(arena, &argv, options, .{ - .clang_target = clang_target, - .system_include_paths = include_args.include_paths, - .needs_gnu_workaround = if (include_args.target_abi) |abi| std.mem.eql(u8, abi, "gnu") else false, - .nostdinc = true, - }); - - try argv.append(options.input_filename); - - if (options.verbose) { - try stdout_writer.writeAll("Preprocessor: zig clang\n"); - for (argv.items[0 .. argv.items.len - 1]) |arg| { - try stdout_writer.print("{s} ", .{arg}); - } - try stdout_writer.print("{s}\n\n", .{argv.items[argv.items.len - 1]}); - } - - if (process.can_spawn) { - const result = std.ChildProcess.run(.{ - .allocator = gpa, - .argv = argv.items, - .max_output_bytes = std.math.maxInt(u32), - }) catch |err| { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to spawn preprocessor child process: {s}", .{@errorName(err)}); - process.exit(1); - }; - errdefer gpa.free(result.stdout); - defer gpa.free(result.stderr); - - switch (result.term) { - .Exited => |code| { - if (code != 0) { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "the preprocessor failed with exit code {}:", .{code}); - try stderr.writeAll(result.stderr); - try stderr.writeAll("\n"); - process.exit(1); - } - }, - .Signal, .Stopped, .Unknown => { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "the preprocessor terminated unexpectedly ({s}):", .{@tagName(result.term)}); - try stderr.writeAll(result.stderr); - try stderr.writeAll("\n"); - process.exit(1); - }, - } - - break :full_input result.stdout; - } else { - // need to use an intermediate file - const rand_int = std.crypto.random.int(u64); - const preprocessed_path = try std.fmt.allocPrint(gpa, "resinator{x}.rcpp", .{rand_int}); - defer gpa.free(preprocessed_path); - defer std.fs.cwd().deleteFile(preprocessed_path) catch {}; - - try argv.appendSlice(&.{ "-o", preprocessed_path }); - const exit_code = try clangMain(arena, argv.items); - if (exit_code != 0) { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "the preprocessor failed with exit code {}:", .{exit_code}); - process.exit(1); - } - break :full_input std.fs.cwd().readFileAlloc(gpa, preprocessed_path, std.math.maxInt(usize)) catch |err| { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to read preprocessed file path '{s}': {s}", .{ preprocessed_path, @errorName(err) }); - process.exit(1); - }; - } - } else { - break :full_input std.fs.cwd().readFileAlloc(gpa, options.input_filename, std.math.maxInt(usize)) catch |err| { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to read input file path '{s}': {s}", .{ options.input_filename, @errorName(err) }); - process.exit(1); - }; - } - }; - defer gpa.free(full_input); - - if (options.preprocess == .only) { - std.fs.cwd().writeFile(options.output_filename, full_input) catch |err| { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to write output file '{s}': {s}", .{ options.output_filename, @errorName(err) }); - process.exit(1); - }; - return cleanExit(); - } - - var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(gpa, full_input, full_input, .{ .initial_filename = options.input_filename }); - defer mapping_results.mappings.deinit(gpa); - - const final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings); - - var output_file = std.fs.cwd().createFile(options.output_filename, .{}) catch |err| { - try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to create output file '{s}': {s}", .{ options.output_filename, @errorName(err) }); - process.exit(1); - }; - var output_file_closed = false; - defer if (!output_file_closed) output_file.close(); - - var diagnostics = resinator.errors.Diagnostics.init(gpa); - defer diagnostics.deinit(); - - var output_buffered_stream = std.io.bufferedWriter(output_file.writer()); - - resinator.compile.compile(gpa, final_input, output_buffered_stream.writer(), .{ - .cwd = std.fs.cwd(), - .diagnostics = &diagnostics, - .source_mappings = &mapping_results.mappings, - .dependencies_list = null, - .ignore_include_env_var = options.ignore_include_env_var, - .extra_include_paths = options.extra_include_paths.items, - .default_language_id = options.default_language_id, - .default_code_page = options.default_code_page orelse .windows1252, - .verbose = options.verbose, - .null_terminate_string_table_strings = options.null_terminate_string_table_strings, - .max_string_literal_codepoints = options.max_string_literal_codepoints, - .silent_duplicate_control_ids = options.silent_duplicate_control_ids, - .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, - }) catch |err| switch (err) { - error.ParseError, error.CompileError => { - diagnostics.renderToStdErr(std.fs.cwd(), final_input, stderr_config, mapping_results.mappings); - // Delete the output file on error - output_file.close(); - output_file_closed = true; - // Failing to delete is not really a big deal, so swallow any errors - std.fs.cwd().deleteFile(options.output_filename) catch {}; - process.exit(1); - }, - else => |e| return e, - }; - - try output_buffered_stream.flush(); - - // print any warnings/notes - diagnostics.renderToStdErr(std.fs.cwd(), final_input, stderr_config, mapping_results.mappings); - - return cleanExit(); -} - -const RcIncludeArgs = struct { - include_paths: []const []const u8 = &.{}, - target_abi: ?[]const u8 = null, -}; - -fn detectRcIncludeDirs(arena: Allocator, zig_lib_dir: []const u8, auto_includes: @import("resinator.zig").cli.Options.AutoIncludes) !RcIncludeArgs { - if (auto_includes == .none) return .{}; - var cur_includes = auto_includes; - if (builtin.target.os.tag != .windows) { - switch (cur_includes) { - // MSVC can't be found when the host isn't Windows, so short-circuit. - .msvc => return error.WindowsSdkNotFound, - // Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts. - .any => cur_includes = .gnu, - .gnu => {}, - .none => unreachable, - } - } - while (true) { - switch (cur_includes) { - .any, .msvc => { - const target_query: std.Target.Query = .{ - .os_tag = .windows, - .abi = .msvc, - }; - const target = std.zig.resolveTargetQueryOrFatal(target_query); - const is_native_abi = target_query.isNativeAbi(); - const detected_libc = std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null) catch |err| { - if (cur_includes == .any) { - // fall back to mingw - cur_includes = .gnu; - continue; - } - return err; - }; - if (detected_libc.libc_include_dir_list.len == 0) { - if (cur_includes == .any) { - // fall back to mingw - cur_includes = .gnu; - continue; - } - return error.WindowsSdkNotFound; - } - return .{ - .include_paths = detected_libc.libc_include_dir_list, - .target_abi = "msvc", - }; - }, - .gnu => { - const target_query: std.Target.Query = .{ - .os_tag = .windows, - .abi = .gnu, - }; - const target = std.zig.resolveTargetQueryOrFatal(target_query); - const is_native_abi = target_query.isNativeAbi(); - const detected_libc = try std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null); - return .{ - .include_paths = detected_libc.libc_include_dir_list, - .target_abi = "gnu", - }; - }, - .none => unreachable, - } - } -} - const usage_init = \\Usage: zig init \\ @@ -5569,6 +5306,8 @@ const JitCmdOptions = struct { prepend_zig_exe_path: bool = false, depend_on_aro: bool = false, capture: ?*[]u8 = null, + /// Send progress and error bundles via std.zig.Server over stdout + server: bool = false, }; fn jitCmd( @@ -5714,10 +5453,52 @@ fn jitCmd( }; defer comp.destroy(); - updateModule(comp, color) catch |err| switch (err) { - error.SemanticAnalyzeFail => process.exit(2), - else => |e| return e, - }; + if (options.server and !builtin.single_threaded) { + var reset: std.Thread.ResetEvent = .{}; + var progress: std.Progress = .{ + .terminal = null, + .root = .{ + .context = undefined, + .parent = null, + .name = "", + .unprotected_estimated_total_items = 0, + .unprotected_completed_items = 0, + }, + .columns_written = 0, + .prev_refresh_timestamp = 0, + .timer = null, + .done = false, + }; + const main_progress_node = &progress.root; + main_progress_node.context = &progress; + var server = std.zig.Server{ + .out = std.io.getStdOut(), + .in = undefined, // won't be receiving messages + .receive_fifo = undefined, // won't be receiving messages + }; + + var progress_thread = try std.Thread.spawn(.{}, progressThread, .{ + &progress, &server, &reset, + }); + defer { + reset.set(); + progress_thread.join(); + } + + try comp.update(main_progress_node); + + var error_bundle = try comp.getAllErrorsAlloc(); + defer error_bundle.deinit(comp.gpa); + if (error_bundle.errorMessageCount() > 0) { + try server.serveErrorBundle(error_bundle); + process.exit(2); + } + } else { + updateModule(comp, color) catch |err| switch (err) { + error.SemanticAnalyzeFail => process.exit(2), + else => |e| return e, + }; + } const exe_path = try global_cache_directory.join(arena, &.{comp.cache_use.whole.bin_sub_path.?}); child_argv.appendAssumeCapacity(exe_path); diff --git a/src/resinator.zig b/src/resinator.zig @@ -1,25 +0,0 @@ -comptime { - if (@import("build_options").only_core_functionality) { - @compileError("resinator included in only_core_functionality build"); - } -} - -pub const ani = @import("resinator/ani.zig"); -pub const ast = @import("resinator/ast.zig"); -pub const bmp = @import("resinator/bmp.zig"); -pub const cli = @import("resinator/cli.zig"); -pub const code_pages = @import("resinator/code_pages.zig"); -pub const comments = @import("resinator/comments.zig"); -pub const compile = @import("resinator/compile.zig"); -pub const errors = @import("resinator/errors.zig"); -pub const ico = @import("resinator/ico.zig"); -pub const lang = @import("resinator/lang.zig"); -pub const lex = @import("resinator/lex.zig"); -pub const literals = @import("resinator/literals.zig"); -pub const parse = @import("resinator/parse.zig"); -pub const preprocess = @import("resinator/preprocess.zig"); -pub const rc = @import("resinator/rc.zig"); -pub const res = @import("resinator/res.zig"); -pub const source_mapping = @import("resinator/source_mapping.zig"); -pub const utils = @import("resinator/utils.zig"); -pub const windows1252 = @import("resinator/windows1252.zig"); diff --git a/src/resinator/cli.zig b/src/resinator/cli.zig @@ -1,1439 +0,0 @@ -const std = @import("std"); -const CodePage = @import("code_pages.zig").CodePage; -const lang = @import("lang.zig"); -const res = @import("res.zig"); -const Allocator = std.mem.Allocator; -const lex = @import("lex.zig"); - -/// This is what /SL 100 will set the maximum string literal length to -pub const max_string_literal_length_100_percent = 8192; - -pub const usage_string_after_command_name = - \\ [options] [--] <INPUT> [<OUTPUT>] - \\ - \\The sequence -- can be used to signify when to stop parsing options. - \\This is necessary when the input path begins with a forward slash. - \\ - \\Supported Win32 RC Options: - \\ /?, /h Print this help and exit. - \\ /v Verbose (print progress messages). - \\ /d <name>[=<value>] Define a symbol (during preprocessing). - \\ /u <name> Undefine a symbol (during preprocessing). - \\ /fo <value> Specify output file path. - \\ /l <value> Set default language using hexadecimal id (ex: 409). - \\ /ln <value> Set default language using language name (ex: en-us). - \\ /i <value> Add an include path. - \\ /x Ignore INCLUDE environment variable. - \\ /c <value> Set default code page (ex: 65001). - \\ /w Warn on invalid code page in .rc (instead of error). - \\ /y Suppress warnings for duplicate control IDs. - \\ /n Null-terminate all strings in string tables. - \\ /sl <value> Specify string literal length limit in percentage (1-100) - \\ where 100 corresponds to a limit of 8192. If the /sl - \\ option is not specified, the default limit is 4097. - \\ /p Only run the preprocessor and output a .rcpp file. - \\ - \\No-op Win32 RC Options: - \\ /nologo, /a, /r Options that are recognized but do nothing. - \\ - \\Unsupported Win32 RC Options: - \\ /fm, /q, /g, /gn, /g1, /g2 Unsupported MUI-related options. - \\ /?c, /hc, /t, /tp:<prefix>, Unsupported LCX/LCE-related options. - \\ /tn, /tm, /tc, /tw, /te, - \\ /ti, /ta - \\ /z Unsupported font-substitution-related option. - \\ /s Unsupported HWB-related option. - \\ - \\Custom Options (resinator-specific): - \\ /:no-preprocess Do not run the preprocessor. - \\ /:debug Output the preprocessed .rc file and the parsed AST. - \\ /:auto-includes <value> Set the automatic include path detection behavior. - \\ any (default) Use MSVC if available, fall back to MinGW - \\ msvc Use MSVC include paths (must be present on the system) - \\ gnu Use MinGW include paths (requires Zig as the preprocessor) - \\ none Do not use any autodetected include paths - \\ - \\Note: For compatibility reasons, all custom options start with : - \\ -; - -pub fn writeUsage(writer: anytype, command_name: []const u8) !void { - try writer.writeAll("Usage: "); - try writer.writeAll(command_name); - try writer.writeAll(usage_string_after_command_name); -} - -pub const Diagnostics = struct { - errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, - allocator: Allocator, - - pub const ErrorDetails = struct { - arg_index: usize, - arg_span: ArgSpan = .{}, - msg: std.ArrayListUnmanaged(u8) = .{}, - type: Type = .err, - print_args: bool = true, - - pub const Type = enum { err, warning, note }; - pub const ArgSpan = struct { - point_at_next_arg: bool = false, - name_offset: usize = 0, - prefix_len: usize = 0, - value_offset: usize = 0, - name_len: usize = 0, - }; - }; - - pub fn init(allocator: Allocator) Diagnostics { - return .{ - .allocator = allocator, - }; - } - - pub fn deinit(self: *Diagnostics) void { - for (self.errors.items) |*details| { - details.msg.deinit(self.allocator); - } - self.errors.deinit(self.allocator); - } - - pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { - try self.errors.append(self.allocator, error_details); - } - - pub fn renderToStdErr(self: *Diagnostics, args: []const []const u8, config: std.io.tty.Config) void { - std.debug.getStderrMutex().lock(); - defer std.debug.getStderrMutex().unlock(); - const stderr = std.io.getStdErr().writer(); - self.renderToWriter(args, stderr, config) catch return; - } - - pub fn renderToWriter(self: *Diagnostics, args: []const []const u8, writer: anytype, config: std.io.tty.Config) !void { - for (self.errors.items) |err_details| { - try renderErrorMessage(writer, config, err_details, args); - } - } - - pub fn hasError(self: *const Diagnostics) bool { - for (self.errors.items) |err| { - if (err.type == .err) return true; - } - return false; - } -}; - -pub const Options = struct { - allocator: Allocator, - input_filename: []const u8 = &[_]u8{}, - output_filename: []const u8 = &[_]u8{}, - extra_include_paths: std.ArrayListUnmanaged([]const u8) = .{}, - ignore_include_env_var: bool = false, - preprocess: Preprocess = .yes, - default_language_id: ?u16 = null, - default_code_page: ?CodePage = null, - verbose: bool = false, - symbols: std.StringArrayHashMapUnmanaged(SymbolValue) = .{}, - null_terminate_string_table_strings: bool = false, - max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, - silent_duplicate_control_ids: bool = false, - warn_instead_of_error_on_invalid_code_page: bool = false, - debug: bool = false, - print_help_and_exit: bool = false, - auto_includes: AutoIncludes = .any, - - pub const AutoIncludes = enum { any, msvc, gnu, none }; - pub const Preprocess = enum { no, yes, only }; - pub const SymbolAction = enum { define, undefine }; - pub const SymbolValue = union(SymbolAction) { - define: []const u8, - undefine: void, - - pub fn deinit(self: SymbolValue, allocator: Allocator) void { - switch (self) { - .define => |value| allocator.free(value), - .undefine => {}, - } - } - }; - - /// Does not check that identifier contains only valid characters - pub fn define(self: *Options, identifier: []const u8, value: []const u8) !void { - if (self.symbols.getPtr(identifier)) |val_ptr| { - // If the symbol is undefined, then that always takes precedence so - // we shouldn't change anything. - if (val_ptr.* == .undefine) return; - // Otherwise, the new value takes precedence. - const duped_value = try self.allocator.dupe(u8, value); - errdefer self.allocator.free(duped_value); - val_ptr.deinit(self.allocator); - val_ptr.* = .{ .define = duped_value }; - return; - } - const duped_key = try self.allocator.dupe(u8, identifier); - errdefer self.allocator.free(duped_key); - const duped_value = try self.allocator.dupe(u8, value); - errdefer self.allocator.free(duped_value); - try self.symbols.put(self.allocator, duped_key, .{ .define = duped_value }); - } - - /// Does not check that identifier contains only valid characters - pub fn undefine(self: *Options, identifier: []const u8) !void { - if (self.symbols.getPtr(identifier)) |action| { - action.deinit(self.allocator); - action.* = .{ .undefine = {} }; - return; - } - const duped_key = try self.allocator.dupe(u8, identifier); - errdefer self.allocator.free(duped_key); - try self.symbols.put(self.allocator, duped_key, .{ .undefine = {} }); - } - - /// If the current input filename both: - /// - does not have an extension, and - /// - does not exist in the cwd - /// then this function will append `.rc` to the input filename - /// - /// Note: This behavior is different from the Win32 compiler. - /// It always appends .RC if the filename does not have - /// a `.` in it and it does not even try the verbatim name - /// in that scenario. - /// - /// The approach taken here is meant to give us a 'best of both - /// worlds' situation where we'll be compatible with most use-cases - /// of the .rc extension being omitted from the CLI args, but still - /// work fine if the file itself does not have an extension. - pub fn maybeAppendRC(options: *Options, cwd: std.fs.Dir) !void { - if (std.fs.path.extension(options.input_filename).len == 0) { - cwd.access(options.input_filename, .{}) catch |err| switch (err) { - error.FileNotFound => { - var filename_bytes = try options.allocator.alloc(u8, options.input_filename.len + 3); - @memcpy(filename_bytes[0 .. filename_bytes.len - 3], options.input_filename); - @memcpy(filename_bytes[filename_bytes.len - 3 ..], ".rc"); - options.allocator.free(options.input_filename); - options.input_filename = filename_bytes; - }, - else => {}, - }; - } - } - - pub fn deinit(self: *Options) void { - for (self.extra_include_paths.items) |extra_include_path| { - self.allocator.free(extra_include_path); - } - self.extra_include_paths.deinit(self.allocator); - self.allocator.free(self.input_filename); - self.allocator.free(self.output_filename); - var symbol_it = self.symbols.iterator(); - while (symbol_it.next()) |entry| { - self.allocator.free(entry.key_ptr.*); - entry.value_ptr.deinit(self.allocator); - } - self.symbols.deinit(self.allocator); - } - - pub fn dumpVerbose(self: *const Options, writer: anytype) !void { - try writer.print("Input filename: {s}\n", .{self.input_filename}); - try writer.print("Output filename: {s}\n", .{self.output_filename}); - if (self.extra_include_paths.items.len > 0) { - try writer.writeAll(" Extra include paths:\n"); - for (self.extra_include_paths.items) |extra_include_path| { - try writer.print(" \"{s}\"\n", .{extra_include_path}); - } - } - if (self.ignore_include_env_var) { - try writer.writeAll(" The INCLUDE environment variable will be ignored\n"); - } - if (self.preprocess == .no) { - try writer.writeAll(" The preprocessor will not be invoked\n"); - } else if (self.preprocess == .only) { - try writer.writeAll(" Only the preprocessor will be invoked\n"); - } - if (self.symbols.count() > 0) { - try writer.writeAll(" Symbols:\n"); - var it = self.symbols.iterator(); - while (it.next()) |symbol| { - try writer.print(" {s} {s}", .{ switch (symbol.value_ptr.*) { - .define => "#define", - .undefine => "#undef", - }, symbol.key_ptr.* }); - if (symbol.value_ptr.* == .define) { - try writer.print(" {s}", .{symbol.value_ptr.define}); - } - try writer.writeAll("\n"); - } - } - if (self.null_terminate_string_table_strings) { - try writer.writeAll(" Strings in string tables will be null-terminated\n"); - } - if (self.max_string_literal_codepoints != lex.default_max_string_literal_codepoints) { - try writer.print(" Max string literal length: {}\n", .{self.max_string_literal_codepoints}); - } - if (self.silent_duplicate_control_ids) { - try writer.writeAll(" Duplicate control IDs will not emit warnings\n"); - } - if (self.silent_duplicate_control_ids) { - try writer.writeAll(" Invalid code page in .rc will produce a warning (instead of an error)\n"); - } - - const language_id = self.default_language_id orelse res.Language.default; - const language_name = language_name: { - if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| { - break :language_name @tagName(lang_enum_val); - } else |_| {} - if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) { - break :language_name "LOCALE_CUSTOM_UNSPECIFIED"; - } - break :language_name "<UNKNOWN>"; - }; - try writer.print("Default language: {s} (id=0x{x})\n", .{ language_name, language_id }); - - const code_page = self.default_code_page orelse .windows1252; - try writer.print("Default codepage: {s} (id={})\n", .{ @tagName(code_page), @intFromEnum(code_page) }); - } -}; - -pub const Arg = struct { - prefix: enum { long, short, slash }, - name_offset: usize, - full: []const u8, - - pub fn fromString(str: []const u8) ?@This() { - if (std.mem.startsWith(u8, str, "--")) { - return .{ .prefix = .long, .name_offset = 2, .full = str }; - } else if (std.mem.startsWith(u8, str, "-")) { - return .{ .prefix = .short, .name_offset = 1, .full = str }; - } else if (std.mem.startsWith(u8, str, "/")) { - return .{ .prefix = .slash, .name_offset = 1, .full = str }; - } - return null; - } - - pub fn prefixSlice(self: Arg) []const u8 { - return self.full[0..(if (self.prefix == .long) 2 else 1)]; - } - - pub fn name(self: Arg) []const u8 { - return self.full[self.name_offset..]; - } - - pub fn optionWithoutPrefix(self: Arg, option_len: usize) []const u8 { - return self.name()[0..option_len]; - } - - pub fn missingSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { - return .{ - .point_at_next_arg = true, - .value_offset = 0, - .name_offset = self.name_offset, - .prefix_len = self.prefixSlice().len, - }; - } - - pub fn optionAndAfterSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan { - return self.optionSpan(0); - } - - pub fn optionSpan(self: Arg, option_len: usize) Diagnostics.ErrorDetails.ArgSpan { - return .{ - .name_offset = self.name_offset, - .prefix_len = self.prefixSlice().len, - .name_len = option_len, - }; - } - - pub const Value = struct { - slice: []const u8, - index_increment: u2 = 1, - - pub fn argSpan(self: Value, arg: Arg) Diagnostics.ErrorDetails.ArgSpan { - const prefix_len = arg.prefixSlice().len; - switch (self.index_increment) { - 1 => return .{ - .value_offset = @intFromPtr(self.slice.ptr) - @intFromPtr(arg.full.ptr), - .prefix_len = prefix_len, - .name_offset = arg.name_offset, - }, - 2 => return .{ - .point_at_next_arg = true, - .prefix_len = prefix_len, - .name_offset = arg.name_offset, - }, - else => unreachable, - } - } - - pub fn index(self: Value, arg_index: usize) usize { - if (self.index_increment == 2) return arg_index + 1; - return arg_index; - } - }; - - pub fn value(self: Arg, option_len: usize, index: usize, args: []const []const u8) error{MissingValue}!Value { - const rest = self.full[self.name_offset + option_len ..]; - if (rest.len > 0) return .{ .slice = rest }; - if (index + 1 >= args.len) return error.MissingValue; - return .{ .slice = args[index + 1], .index_increment = 2 }; - } - - pub const Context = struct { - index: usize, - arg: Arg, - value: Value, - }; -}; - -pub const ParseError = error{ParseError} || Allocator.Error; - -/// Note: Does not run `Options.maybeAppendRC` automatically. If that behavior is desired, -/// it must be called separately. -pub fn parse(allocator: Allocator, args: []const []const u8, diagnostics: *Diagnostics) ParseError!Options { - var options = Options{ .allocator = allocator }; - errdefer options.deinit(); - - var output_filename: ?[]const u8 = null; - var output_filename_context: Arg.Context = undefined; - - var arg_i: usize = 1; // start at 1 to skip past the exe name - next_arg: while (arg_i < args.len) { - var arg = Arg.fromString(args[arg_i]) orelse break; - if (arg.name().len == 0) { - switch (arg.prefix) { - // -- on its own ends arg parsing - .long => { - arg_i += 1; - break; - }, - // - or / on its own is an error - else => { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid option: {s}", .{arg.prefixSlice()}); - try diagnostics.append(err_details); - arg_i += 1; - continue :next_arg; - }, - } - } - - while (arg.name().len > 0) { - const arg_name = arg.name(); - // Note: These cases should be in order from longest to shortest, since - // shorter options that are a substring of a longer one could make - // the longer option's branch unreachable. - if (std.ascii.startsWithIgnoreCase(arg_name, ":no-preprocess")) { - options.preprocess = .no; - arg.name_offset += ":no-preprocess".len; - } else if (std.ascii.startsWithIgnoreCase(arg_name, ":auto-includes")) { - const value = arg.value(":auto-includes".len, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":auto-includes".len) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - options.auto_includes = std.meta.stringToEnum(Options.AutoIncludes, value.slice) orelse blk: { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid auto includes setting: {s} ", .{value.slice}); - try diagnostics.append(err_details); - break :blk options.auto_includes; - }; - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "nologo")) { - // No-op, we don't display any 'logo' to suppress - arg.name_offset += "nologo".len; - } else if (std.ascii.startsWithIgnoreCase(arg_name, ":debug")) { - options.debug = true; - arg.name_offset += ":debug".len; - } - // Unsupported LCX/LCE options that need a value (within the same arg only) - else if (std.ascii.startsWithIgnoreCase(arg_name, "tp:")) { - const rest = arg.full[arg.name_offset + 3 ..]; - if (rest.len == 0) { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = .{ - .name_offset = arg.name_offset, - .prefix_len = arg.prefixSlice().len, - .value_offset = arg.name_offset + 3, - } }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing value for {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); - try diagnostics.append(err_details); - } - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) }); - try diagnostics.append(err_details); - arg_i += 1; - continue :next_arg; - } - // Unsupported LCX/LCE options that need a value - else if (std.ascii.startsWithIgnoreCase(arg_name, "tn")) { - const value = arg.value(2, arg_i, args) catch no_value: { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - // dummy zero-length slice starting where the value would have been - const value_start = arg.name_offset + 2; - break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; - }; - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - } - // Unsupported MUI options that need a value - else if (std.ascii.startsWithIgnoreCase(arg_name, "fm") or - std.ascii.startsWithIgnoreCase(arg_name, "gn") or - std.ascii.startsWithIgnoreCase(arg_name, "g2")) - { - const value = arg.value(2, arg_i, args) catch no_value: { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - // dummy zero-length slice starting where the value would have been - const value_start = arg.name_offset + 2; - break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; - }; - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - } - // Unsupported MUI options that do not need a value - else if (std.ascii.startsWithIgnoreCase(arg_name, "g1")) { - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - arg.name_offset += 2; - } - // Unsupported LCX/LCE options that do not need a value - else if (std.ascii.startsWithIgnoreCase(arg_name, "tm") or - std.ascii.startsWithIgnoreCase(arg_name, "tc") or - std.ascii.startsWithIgnoreCase(arg_name, "tw") or - std.ascii.startsWithIgnoreCase(arg_name, "te") or - std.ascii.startsWithIgnoreCase(arg_name, "ti") or - std.ascii.startsWithIgnoreCase(arg_name, "ta")) - { - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - arg.name_offset += 2; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "fo")) { - const value = arg.value(2, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing output path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - output_filename_context = .{ .index = arg_i, .arg = arg, .value = value }; - output_filename = value.slice; - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "sl")) { - const value = arg.value(2, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - const percent_str = value.slice; - const percent: u32 = parsePercent(percent_str) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid percent format '{s}'", .{percent_str}); - try diagnostics.append(err_details); - var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; - var note_writer = note_details.msg.writer(allocator); - try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); - try diagnostics.append(note_details); - arg_i += value.index_increment; - continue :next_arg; - }; - if (percent == 0 or percent > 100) { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("percent out of range: {} (parsed from '{s}')", .{ percent, percent_str }); - try diagnostics.append(err_details); - var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i }; - var note_writer = note_details.msg.writer(allocator); - try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)"); - try diagnostics.append(note_details); - arg_i += value.index_increment; - continue :next_arg; - } - const percent_float = @as(f32, @floatFromInt(percent)) / 100; - options.max_string_literal_codepoints = @intFromFloat(percent_float * max_string_literal_length_100_percent); - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "ln")) { - const value = arg.value(2, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - const tag = value.slice; - options.default_language_id = lang.tagToInt(tag) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid language tag: {s}", .{tag}); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - }; - if (options.default_language_id.? == lang.LOCALE_CUSTOM_UNSPECIFIED) { - var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("language tag '{s}' does not have an assigned ID so it will be resolved to LOCALE_CUSTOM_UNSPECIFIED (id=0x{x})", .{ tag, lang.LOCALE_CUSTOM_UNSPECIFIED }); - try diagnostics.append(err_details); - } - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "l")) { - const value = arg.value(1, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing language ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - const num_str = value.slice; - options.default_language_id = lang.parseInt(num_str) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid language ID: {s}", .{num_str}); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - }; - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "h") or std.mem.startsWith(u8, arg_name, "?")) { - options.print_help_and_exit = true; - // If there's been an error to this point, then we still want to fail - if (diagnostics.hasError()) return error.ParseError; - return options; - } - // 1 char unsupported MUI options that need a value - else if (std.ascii.startsWithIgnoreCase(arg_name, "q") or - std.ascii.startsWithIgnoreCase(arg_name, "g")) - { - const value = arg.value(1, arg_i, args) catch no_value: { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - // dummy zero-length slice starting where the value would have been - const value_start = arg.name_offset + 1; - break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; - }; - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - } - // Undocumented (and unsupported) options that need a value - // /z has to do something with font substitution - // /s has something to do with HWB resources being inserted into the .res - else if (std.ascii.startsWithIgnoreCase(arg_name, "z") or - std.ascii.startsWithIgnoreCase(arg_name, "s")) - { - const value = arg.value(1, arg_i, args) catch no_value: { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - // dummy zero-length slice starting where the value would have been - const value_start = arg.name_offset + 1; - break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] }; - }; - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - } - // 1 char unsupported LCX/LCE options that do not need a value - else if (std.ascii.startsWithIgnoreCase(arg_name, "t")) { - var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "c")) { - const value = arg.value(1, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing code page ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - const num_str = value.slice; - const code_page_id = std.fmt.parseUnsigned(u16, num_str, 10) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid code page ID: {s}", .{num_str}); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - }; - options.default_code_page = CodePage.getByIdentifierEnsureSupported(code_page_id) catch |err| switch (err) { - error.InvalidCodePage => { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid or unknown code page ID: {}", .{code_page_id}); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - }, - error.UnsupportedCodePage => { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("unsupported code page: {s} (id={})", .{ - @tagName(CodePage.getByIdentifier(code_page_id) catch unreachable), - code_page_id, - }); - try diagnostics.append(err_details); - arg_i += value.index_increment; - continue :next_arg; - }, - }; - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "v")) { - options.verbose = true; - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "x")) { - options.ignore_include_env_var = true; - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "p")) { - options.preprocess = .only; - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "i")) { - const value = arg.value(1, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing include path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - const path = value.slice; - const duped = try allocator.dupe(u8, path); - errdefer allocator.free(duped); - try options.extra_include_paths.append(options.allocator, duped); - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "r")) { - // From https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line- - // "Ignored. Provided for compatibility with existing makefiles." - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "n")) { - options.null_terminate_string_table_strings = true; - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "y")) { - options.silent_duplicate_control_ids = true; - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "w")) { - options.warn_instead_of_error_on_invalid_code_page = true; - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "a")) { - // Undocumented option with unknown function - // TODO: More investigation to figure out what it does (if anything) - var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = arg.optionSpan(1) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("option {s}{s} has no effect (it is undocumented and its function is unknown in the Win32 RC compiler)", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg.name_offset += 1; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "d")) { - const value = arg.value(1, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing symbol to define after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - var tokenizer = std.mem.tokenize(u8, value.slice, "="); - // guaranteed to exist since an empty value.slice would invoke - // the 'missing symbol to define' branch above - const symbol = tokenizer.next().?; - const symbol_value = tokenizer.next() orelse "1"; - - if (isValidIdentifier(symbol)) { - try options.define(symbol, symbol_value); - } else { - var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be defined", .{symbol}); - try diagnostics.append(err_details); - } - arg_i += value.index_increment; - continue :next_arg; - } else if (std.ascii.startsWithIgnoreCase(arg_name, "u")) { - const value = arg.value(1, arg_i, args) catch { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("missing symbol to undefine after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) }); - try diagnostics.append(err_details); - arg_i += 1; - break :next_arg; - }; - const symbol = value.slice; - if (isValidIdentifier(symbol)) { - try options.undefine(symbol); - } else { - var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be undefined", .{symbol}); - try diagnostics.append(err_details); - } - arg_i += value.index_increment; - continue :next_arg; - } else { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.print("invalid option: {s}{s}", .{ arg.prefixSlice(), arg.name() }); - try diagnostics.append(err_details); - arg_i += 1; - continue :next_arg; - } - } else { - // The while loop exited via its conditional, meaning we are done with - // the current arg and can move on the the next - arg_i += 1; - continue; - } - } - - const positionals = args[arg_i..]; - - if (positionals.len < 1) { - var err_details = Diagnostics.ErrorDetails{ .print_args = false, .arg_index = arg_i }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.writeAll("missing input filename"); - try diagnostics.append(err_details); - - const last_arg = args[args.len - 1]; - if (arg_i > 1 and last_arg.len > 0 and last_arg[0] == '/' and std.ascii.endsWithIgnoreCase(last_arg, ".rc")) { - var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = true, .arg_index = arg_i - 1 }; - var note_writer = note_details.msg.writer(allocator); - try note_writer.writeAll("if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing"); - try diagnostics.append(note_details); - } - - // This is a fatal enough problem to justify an early return, since - // things after this rely on the value of the input filename. - return error.ParseError; - } - options.input_filename = try allocator.dupe(u8, positionals[0]); - - if (positionals.len > 1) { - if (output_filename != null) { - var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i + 1 }; - var msg_writer = err_details.msg.writer(allocator); - try msg_writer.writeAll("output filename already specified"); - try diagnostics.append(err_details); - var note_details = Diagnostics.ErrorDetails{ - .type = .note, - .arg_index = output_filename_context.value.index(output_filename_context.index), - .arg_span = output_filename_context.value.argSpan(output_filename_context.arg), - }; - var note_writer = note_details.msg.writer(allocator); - try note_writer.writeAll("output filename previously specified here"); - try diagnostics.append(note_details); - } else { - output_filename = positionals[1]; - } - } - if (output_filename == null) { - var buf = std.ArrayList(u8).init(allocator); - errdefer buf.deinit(); - - if (std.fs.path.dirname(options.input_filename)) |dirname| { - var end_pos = dirname.len; - // We want to ensure that we write a path separator at the end, so if the dirname - // doesn't end with a path sep then include the char after the dirname - // which must be a path sep. - if (!std.fs.path.isSep(dirname[dirname.len - 1])) end_pos += 1; - try buf.appendSlice(options.input_filename[0..end_pos]); - } - try buf.appendSlice(std.fs.path.stem(options.input_filename)); - if (options.preprocess == .only) { - try buf.appendSlice(".rcpp"); - } else { - try buf.appendSlice(".res"); - } - - options.output_filename = try buf.toOwnedSlice(); - } else { - options.output_filename = try allocator.dupe(u8, output_filename.?); - } - - if (diagnostics.hasError()) { - return error.ParseError; - } - - return options; -} - -/// Returns true if the str is a valid C identifier for use in a #define/#undef macro -pub fn isValidIdentifier(str: []const u8) bool { - for (str, 0..) |c, i| switch (c) { - '0'...'9' => if (i == 0) return false, - 'a'...'z', 'A'...'Z', '_' => {}, - else => return false, - }; - return true; -} - -/// This function is specific to how the Win32 RC command line interprets -/// max string literal length percent. -/// - Wraps on overflow of u32 -/// - Stops parsing on any invalid hexadecimal digits -/// - Errors if a digit is not the first char -/// - `-` (negative) prefix is allowed -pub fn parsePercent(str: []const u8) error{InvalidFormat}!u32 { - var result: u32 = 0; - const radix: u8 = 10; - var buf = str; - - const Prefix = enum { none, minus }; - var prefix: Prefix = .none; - switch (buf[0]) { - '-' => { - prefix = .minus; - buf = buf[1..]; - }, - else => {}, - } - - for (buf, 0..) |c, i| { - const digit = switch (c) { - // On invalid digit for the radix, just stop parsing but don't fail - '0'...'9' => std.fmt.charToDigit(c, radix) catch break, - else => { - // First digit must be valid - if (i == 0) { - return error.InvalidFormat; - } - break; - }, - }; - - if (result != 0) { - result *%= radix; - } - result +%= digit; - } - - switch (prefix) { - .none => {}, - .minus => result = 0 -% result, - } - - return result; -} - -test parsePercent { - try std.testing.expectEqual(@as(u32, 16), try parsePercent("16")); - try std.testing.expectEqual(@as(u32, 0), try parsePercent("0x1A")); - try std.testing.expectEqual(@as(u32, 0x1), try parsePercent("1zzzz")); - try std.testing.expectEqual(@as(u32, 0xffffffff), try parsePercent("-1")); - try std.testing.expectEqual(@as(u32, 0xfffffff0), try parsePercent("-16")); - try std.testing.expectEqual(@as(u32, 1), try parsePercent("4294967297")); - try std.testing.expectError(error.InvalidFormat, parsePercent("--1")); - try std.testing.expectError(error.InvalidFormat, parsePercent("ha")); - try std.testing.expectError(error.InvalidFormat, parsePercent("¹")); - try std.testing.expectError(error.InvalidFormat, parsePercent("~1")); -} - -pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, err_details: Diagnostics.ErrorDetails, args: []const []const u8) !void { - try config.setColor(writer, .dim); - try writer.writeAll("<cli>"); - try config.setColor(writer, .reset); - try config.setColor(writer, .bold); - try writer.writeAll(": "); - switch (err_details.type) { - .err => { - try config.setColor(writer, .red); - try writer.writeAll("error: "); - }, - .warning => { - try config.setColor(writer, .yellow); - try writer.writeAll("warning: "); - }, - .note => { - try config.setColor(writer, .cyan); - try writer.writeAll("note: "); - }, - } - try config.setColor(writer, .reset); - try config.setColor(writer, .bold); - try writer.writeAll(err_details.msg.items); - try writer.writeByte('\n'); - try config.setColor(writer, .reset); - - if (!err_details.print_args) { - try writer.writeByte('\n'); - return; - } - - try config.setColor(writer, .dim); - const prefix = " ... "; - try writer.writeAll(prefix); - try config.setColor(writer, .reset); - - const arg_with_name = args[err_details.arg_index]; - const prefix_slice = arg_with_name[0..err_details.arg_span.prefix_len]; - const before_name_slice = arg_with_name[err_details.arg_span.prefix_len..err_details.arg_span.name_offset]; - var name_slice = arg_with_name[err_details.arg_span.name_offset..]; - if (err_details.arg_span.name_len > 0) name_slice.len = err_details.arg_span.name_len; - const after_name_slice = arg_with_name[err_details.arg_span.name_offset + name_slice.len ..]; - - try writer.writeAll(prefix_slice); - if (before_name_slice.len > 0) { - try config.setColor(writer, .dim); - try writer.writeAll(before_name_slice); - try config.setColor(writer, .reset); - } - try writer.writeAll(name_slice); - if (after_name_slice.len > 0) { - try config.setColor(writer, .dim); - try writer.writeAll(after_name_slice); - try config.setColor(writer, .reset); - } - - var next_arg_len: usize = 0; - if (err_details.arg_span.point_at_next_arg and err_details.arg_index + 1 < args.len) { - const next_arg = args[err_details.arg_index + 1]; - try writer.writeByte(' '); - try writer.writeAll(next_arg); - next_arg_len = next_arg.len; - } - - const last_shown_arg_index = if (err_details.arg_span.point_at_next_arg) err_details.arg_index + 1 else err_details.arg_index; - if (last_shown_arg_index + 1 < args.len) { - // special case for when pointing to a missing value within the same arg - // as the name - if (err_details.arg_span.value_offset >= arg_with_name.len) { - try writer.writeByte(' '); - } - try config.setColor(writer, .dim); - try writer.writeAll(" ..."); - try config.setColor(writer, .reset); - } - try writer.writeByte('\n'); - - try config.setColor(writer, .green); - try writer.writeByteNTimes(' ', prefix.len); - // Special case for when the option is *only* a prefix (e.g. invalid option: -) - if (err_details.arg_span.prefix_len == arg_with_name.len) { - try writer.writeByteNTimes('^', err_details.arg_span.prefix_len); - } else { - try writer.writeByteNTimes('~', err_details.arg_span.prefix_len); - try writer.writeByteNTimes(' ', err_details.arg_span.name_offset - err_details.arg_span.prefix_len); - if (!err_details.arg_span.point_at_next_arg and err_details.arg_span.value_offset == 0) { - try writer.writeByte('^'); - try writer.writeByteNTimes('~', name_slice.len - 1); - } else if (err_details.arg_span.value_offset > 0) { - try writer.writeByteNTimes('~', err_details.arg_span.value_offset - err_details.arg_span.name_offset); - try writer.writeByte('^'); - if (err_details.arg_span.value_offset < arg_with_name.len) { - try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.value_offset - 1); - } - } else if (err_details.arg_span.point_at_next_arg) { - try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.name_offset + 1); - try writer.writeByte('^'); - if (next_arg_len > 0) { - try writer.writeByteNTimes('~', next_arg_len - 1); - } - } - } - try writer.writeByte('\n'); - try config.setColor(writer, .reset); -} - -fn testParse(args: []const []const u8) !Options { - return (try testParseOutput(args, "")).?; -} - -fn testParseWarning(args: []const []const u8, expected_output: []const u8) !Options { - return (try testParseOutput(args, expected_output)).?; -} - -fn testParseError(args: []const []const u8, expected_output: []const u8) !void { - var maybe_options = try testParseOutput(args, expected_output); - if (maybe_options != null) { - std.debug.print("expected error, got options: {}\n", .{maybe_options.?}); - maybe_options.?.deinit(); - return error.TestExpectedError; - } -} - -fn testParseOutput(args: []const []const u8, expected_output: []const u8) !?Options { - var diagnostics = Diagnostics.init(std.testing.allocator); - defer diagnostics.deinit(); - - var output = std.ArrayList(u8).init(std.testing.allocator); - defer output.deinit(); - - var options = parse(std.testing.allocator, args, &diagnostics) catch |err| switch (err) { - error.ParseError => { - try diagnostics.renderToWriter(args, output.writer(), .no_color); - try std.testing.expectEqualStrings(expected_output, output.items); - return null; - }, - else => |e| return e, - }; - errdefer options.deinit(); - - try diagnostics.renderToWriter(args, output.writer(), .no_color); - try std.testing.expectEqualStrings(expected_output, output.items); - return options; -} - -test "parse errors: basic" { - try testParseError(&.{ "foo.exe", "/" }, - \\<cli>: error: invalid option: / - \\ ... / - \\ ^ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "/ln" }, - \\<cli>: error: missing language tag after /ln option - \\ ... /ln - \\ ~~~~^ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "-vln" }, - \\<cli>: error: missing language tag after -ln option - \\ ... -vln - \\ ~ ~~~^ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "/_not-an-option" }, - \\<cli>: error: invalid option: /_not-an-option - \\ ... /_not-an-option - \\ ~^~~~~~~~~~~~~~ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "-_not-an-option" }, - \\<cli>: error: invalid option: -_not-an-option - \\ ... -_not-an-option - \\ ~^~~~~~~~~~~~~~ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "--_not-an-option" }, - \\<cli>: error: invalid option: --_not-an-option - \\ ... --_not-an-option - \\ ~~^~~~~~~~~~~~~~ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "/v_not-an-option" }, - \\<cli>: error: invalid option: /_not-an-option - \\ ... /v_not-an-option - \\ ~ ^~~~~~~~~~~~~~ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "-v_not-an-option" }, - \\<cli>: error: invalid option: -_not-an-option - \\ ... -v_not-an-option - \\ ~ ^~~~~~~~~~~~~~ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "--v_not-an-option" }, - \\<cli>: error: invalid option: --_not-an-option - \\ ... --v_not-an-option - \\ ~~ ^~~~~~~~~~~~~~ - \\<cli>: error: missing input filename - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "/some/absolute/path/parsed/as/an/option.rc" }, - \\<cli>: error: the /s option is unsupported - \\ ... /some/absolute/path/parsed/as/an/option.rc - \\ ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - \\<cli>: error: missing input filename - \\ - \\<cli>: note: if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing - \\ ... /some/absolute/path/parsed/as/an/option.rc - \\ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - \\ - ); -} - -test "parse errors: /ln" { - try testParseError(&.{ "foo.exe", "/ln", "invalid", "foo.rc" }, - \\<cli>: error: invalid language tag: invalid - \\ ... /ln invalid ... - \\ ~~~~^~~~~~~ - \\ - ); - try testParseError(&.{ "foo.exe", "/lninvalid", "foo.rc" }, - \\<cli>: error: invalid language tag: invalid - \\ ... /lninvalid ... - \\ ~~~^~~~~~~ - \\ - ); -} - -test "parse: options" { - { - var options = try testParse(&.{ "foo.exe", "/v", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(true, options.verbose); - try std.testing.expectEqualStrings("foo.rc", options.input_filename); - try std.testing.expectEqualStrings("foo.res", options.output_filename); - } - { - var options = try testParse(&.{ "foo.exe", "/vx", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(true, options.verbose); - try std.testing.expectEqual(true, options.ignore_include_env_var); - try std.testing.expectEqualStrings("foo.rc", options.input_filename); - try std.testing.expectEqualStrings("foo.res", options.output_filename); - } - { - var options = try testParse(&.{ "foo.exe", "/xv", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(true, options.verbose); - try std.testing.expectEqual(true, options.ignore_include_env_var); - try std.testing.expectEqualStrings("foo.rc", options.input_filename); - try std.testing.expectEqualStrings("foo.res", options.output_filename); - } - { - var options = try testParse(&.{ "foo.exe", "/xvFObar.res", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(true, options.verbose); - try std.testing.expectEqual(true, options.ignore_include_env_var); - try std.testing.expectEqualStrings("foo.rc", options.input_filename); - try std.testing.expectEqualStrings("bar.res", options.output_filename); - } -} - -test "parse: define and undefine" { - { - var options = try testParse(&.{ "foo.exe", "/dfoo", "foo.rc" }); - defer options.deinit(); - - const action = options.symbols.get("foo").?; - try std.testing.expectEqual(Options.SymbolAction.define, action); - try std.testing.expectEqualStrings("1", action.define); - } - { - var options = try testParse(&.{ "foo.exe", "/dfoo=bar", "/dfoo=baz", "foo.rc" }); - defer options.deinit(); - - const action = options.symbols.get("foo").?; - try std.testing.expectEqual(Options.SymbolAction.define, action); - try std.testing.expectEqualStrings("baz", action.define); - } - { - var options = try testParse(&.{ "foo.exe", "/ufoo", "foo.rc" }); - defer options.deinit(); - - const action = options.symbols.get("foo").?; - try std.testing.expectEqual(Options.SymbolAction.undefine, action); - } - { - // Once undefined, future defines are ignored - var options = try testParse(&.{ "foo.exe", "/ufoo", "/dfoo", "foo.rc" }); - defer options.deinit(); - - const action = options.symbols.get("foo").?; - try std.testing.expectEqual(Options.SymbolAction.undefine, action); - } - { - // Undefined always takes precedence - var options = try testParse(&.{ "foo.exe", "/dfoo", "/ufoo", "/dfoo", "foo.rc" }); - defer options.deinit(); - - const action = options.symbols.get("foo").?; - try std.testing.expectEqual(Options.SymbolAction.undefine, action); - } - { - // Warn + ignore invalid identifiers - var options = try testParseWarning( - &.{ "foo.exe", "/dfoo bar", "/u", "0leadingdigit", "foo.rc" }, - \\<cli>: warning: symbol "foo bar" is not a valid identifier and therefore cannot be defined - \\ ... /dfoo bar ... - \\ ~~^~~~~~~ - \\<cli>: warning: symbol "0leadingdigit" is not a valid identifier and therefore cannot be undefined - \\ ... /u 0leadingdigit ... - \\ ~~~^~~~~~~~~~~~~ - \\ - , - ); - defer options.deinit(); - - try std.testing.expectEqual(@as(usize, 0), options.symbols.count()); - } -} - -test "parse: /sl" { - try testParseError(&.{ "foo.exe", "/sl", "0", "foo.rc" }, - \\<cli>: error: percent out of range: 0 (parsed from '0') - \\ ... /sl 0 ... - \\ ~~~~^ - \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive) - \\ - \\ - ); - try testParseError(&.{ "foo.exe", "/sl", "abcd", "foo.rc" }, - \\<cli>: error: invalid percent format 'abcd' - \\ ... /sl abcd ... - \\ ~~~~^~~~ - \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive) - \\ - \\ - ); - { - var options = try testParse(&.{ "foo.exe", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(@as(u15, lex.default_max_string_literal_codepoints), options.max_string_literal_codepoints); - } - { - var options = try testParse(&.{ "foo.exe", "/sl100", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(@as(u15, max_string_literal_length_100_percent), options.max_string_literal_codepoints); - } - { - var options = try testParse(&.{ "foo.exe", "-SL33", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(@as(u15, 2703), options.max_string_literal_codepoints); - } - { - var options = try testParse(&.{ "foo.exe", "/sl15", "foo.rc" }); - defer options.deinit(); - - try std.testing.expectEqual(@as(u15, 1228), options.max_string_literal_codepoints); - } -} - -test "parse: unsupported MUI-related options" { - try testParseError(&.{ "foo.exe", "/q", "blah", "/g1", "-G2", "blah", "/fm", "blah", "/g", "blah", "foo.rc" }, - \\<cli>: error: the /q option is unsupported - \\ ... /q ... - \\ ~^ - \\<cli>: error: the /g1 option is unsupported - \\ ... /g1 ... - \\ ~^~ - \\<cli>: error: the -G2 option is unsupported - \\ ... -G2 ... - \\ ~^~ - \\<cli>: error: the /fm option is unsupported - \\ ... /fm ... - \\ ~^~ - \\<cli>: error: the /g option is unsupported - \\ ... /g ... - \\ ~^ - \\ - ); -} - -test "parse: unsupported LCX/LCE-related options" { - try testParseError(&.{ "foo.exe", "/t", "/tp:", "/tp:blah", "/tm", "/tc", "/tw", "-TEti", "/ta", "/tn", "blah", "foo.rc" }, - \\<cli>: error: the /t option is unsupported - \\ ... /t ... - \\ ~^ - \\<cli>: error: missing value for /tp: option - \\ ... /tp: ... - \\ ~~~~^ - \\<cli>: error: the /tp: option is unsupported - \\ ... /tp: ... - \\ ~^~~ - \\<cli>: error: the /tp: option is unsupported - \\ ... /tp:blah ... - \\ ~^~~~~~~ - \\<cli>: error: the /tm option is unsupported - \\ ... /tm ... - \\ ~^~ - \\<cli>: error: the /tc option is unsupported - \\ ... /tc ... - \\ ~^~ - \\<cli>: error: the /tw option is unsupported - \\ ... /tw ... - \\ ~^~ - \\<cli>: error: the -TE option is unsupported - \\ ... -TEti ... - \\ ~^~ - \\<cli>: error: the -ti option is unsupported - \\ ... -TEti ... - \\ ~ ^~ - \\<cli>: error: the /ta option is unsupported - \\ ... /ta ... - \\ ~^~ - \\<cli>: error: the /tn option is unsupported - \\ ... /tn ... - \\ ~^~ - \\ - ); -} - -test "maybeAppendRC" { - var tmp = std.testing.tmpDir(.{}); - defer tmp.cleanup(); - - var options = try testParse(&.{ "foo.exe", "foo" }); - defer options.deinit(); - try std.testing.expectEqualStrings("foo", options.input_filename); - - // Create the file so that it's found. In this scenario, .rc should not get - // appended. - var file = try tmp.dir.createFile("foo", .{}); - file.close(); - try options.maybeAppendRC(tmp.dir); - try std.testing.expectEqualStrings("foo", options.input_filename); - - // Now delete the file and try again. Since the verbatim name is no longer found - // and the input filename does not have an extension, .rc should get appended. - try tmp.dir.deleteFile("foo"); - try options.maybeAppendRC(tmp.dir); - try std.testing.expectEqualStrings("foo.rc", options.input_filename); -} diff --git a/src/resinator/code_pages.zig b/src/resinator/code_pages.zig @@ -1,487 +0,0 @@ -const std = @import("std"); -const windows1252 = @import("windows1252.zig"); - -// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing -// than it is to the stuff in this file. -// -// ‰ representations for context: -// Win-1252 89 -// UTF-8 E2 80 B0 -// UTF-16 20 30 -// -// With code page 65001: -// ‰ RCDATA { "‰" L"‰" } -// File encoded as Windows-1252: -// ‰ => <U+FFFD REPLACEMENT CHARACTER> as u16 -// "‰" => 0x3F ('?') -// L"‰" => <U+FFFD REPLACEMENT CHARACTER> as u16 -// File encoded as UTF-8: -// ‰ => <U+2030 ‰> as u16 -// "‰" => 0x89 ('‰' encoded as Windows-1252) -// L"‰" => <U+2030 ‰> as u16 -// -// With code page 1252: -// ‰ RCDATA { "‰" L"‰" } -// File encoded as Windows-1252: -// ‰ => <U+2030 ‰> as u16 -// "‰" => 0x89 ('‰' encoded as Windows-1252) -// L"‰" => <U+2030 ‰> as u16 -// File encoded as UTF-8: -// ‰ => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 -// ^ first byte of utf8 representation -// ^ second byte of UTF-8 representation (0x80), but interpretted as -// Windows-1252 ('€') and then converted to UTF-16 (<U+20AC>) -// ^ third byte of utf8 representation -// "‰" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation) -// L"‰" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see '‰ =>' explanation) -// -// With code page 1252: -// <0x90> RCDATA { "<0x90>" L"<0x90>" } -// File encoded as Windows-1252: -// <0x90> => 0x90 as u16 -// "<0x90>" => 0x90 -// L"<0x90>" => 0x90 as u16 -// File encoded as UTF-8: -// <0x90> => 0xC2 as u16, 0x90 as u16 -// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of <U+0090>) -// L"<0x90>" => 0xC2 as u16, 0x90 as u16 -// -// Within a raw data block, file encoded as Windows-1252 ( is <0xC2>): -// "Âa" L"Âa" "\xC2ad" L"\xC2AD" -// With code page 1252: -// C2 61 C2 00 61 00 C2 61 64 AD C2 -// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD -// \xC2~` -// With code page 65001: -// 3F 61 FD FF 61 00 C2 61 64 AD C2 -// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD -// `. `. `~\xC2 -// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it). -// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the -// `. invalid sequence so only the <0xC2> gets converted to <U+FFFD>. -// `~Same as ^ but converted to '?' instead. -// -// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, € is <0x80>): -// "ð€a" L"ð€a" -// With code page 1252: -// F0 80 61 F0 00 AC 20 61 00 -// ð^ €^ a^ ð~~~^ €~~~^ a~~~^ -// With code page 65001: -// 3F 61 FD FF 61 00 -// ^. a^ ^~~~. a~~~^ -// `. `. -// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so -// `. both bytes are considered an invalid sequence and get converted to '<U+FFFD>' -// `~Same as ^ but converted to '?' instead. - -/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers -pub const CodePage = enum(u16) { - // supported - windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows) - utf8 = 65001, // utf-8 Unicode (UTF-8) - - // unsupported but valid - ibm037 = 37, // IBM037 IBM EBCDIC US-Canada - ibm437 = 437, // IBM437 OEM United States - ibm500 = 500, // IBM500 IBM EBCDIC International - asmo708 = 708, // ASMO-708 Arabic (ASMO 708) - asmo449plus = 709, // Arabic (ASMO-449+, BCON V4) - transparent_arabic = 710, // Arabic - Transparent Arabic - dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS) - ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS) - ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS) - ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS) - ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS) - ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian) - ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS) - ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol - ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS) - ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS) - dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS) - ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS) - ibm864 = 864, // IBM864 OEM Arabic; Arabic (864) - ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS) - cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS) - ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS) - ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 - windows874 = 874, // windows-874 Thai (Windows) - cp875 = 875, // cp875 IBM EBCDIC Greek Modern - shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS) - gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) - ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code) - big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) - ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5) - ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System - ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) - ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) - ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) - ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) - ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) - ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) - ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) - ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) - ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) - ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) - utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications - utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications - windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows) - windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows) - windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows) - windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows) - windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows) - windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows) - windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows) - windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows) - johab = 1361, // Johab Korean (Johab) - macintosh = 10000, // macintosh MAC Roman; Western European (Mac) - x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac) - x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac) - x_mac_korean = 10003, // x-mac-korean Korean (Mac) - x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac) - x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac) - x_mac_greek = 10006, // x-mac-greek Greek (Mac) - x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac) - x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) - x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac) - x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac) - x_mac_thai = 10021, // x-mac-thai Thai (Mac) - x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac) - x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac) - x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac) - x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac) - utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications - utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications - x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS) - x_cp20001 = 20001, // x-cp20001 TCA Taiwan - x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten) - x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan - x_cp20004 = 20004, // x-cp20004 TeleText Taiwan - x_cp20005 = 20005, // x-cp20005 Wang Taiwan - x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) - x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit) - x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit) - x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit) - us_ascii = 20127, // us-ascii US-ASCII (7-bit) - x_cp20261 = 20261, // x-cp20261 T.61 - x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent - ibm273 = 20273, // IBM273 IBM EBCDIC Germany - ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway - ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden - ibm280 = 20280, // IBM280 IBM EBCDIC Italy - ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain - ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom - ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended - ibm297 = 20297, // IBM297 IBM EBCDIC France - ibm420 = 20420, // IBM420 IBM EBCDIC Arabic - ibm423 = 20423, // IBM423 IBM EBCDIC Greek - ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew - x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended - ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai - koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R) - ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic - ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian - ibm905 = 20905, // IBM905 IBM EBCDIC Turkish - ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) - euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990) - x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) - x_cp20949 = 20949, // x-cp20949 Korean Wansung - cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian - // = 21027, // (deprecated) - koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U) - iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO) - iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO) - iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3 - iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic - iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic - iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic - iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek - iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual) - iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish - iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian - iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9 - x_europa = 29001, // x-Europa Europa 3 - is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical) - iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) - cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) - iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) - iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean - x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) - iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese - ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended - ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese - ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean - ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese - ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese - ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese - ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese - euc_jp = 51932, // euc-jp EUC Japanese - euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC) - euc_kr = 51949, // euc-kr EUC Korean - euc_chinesetrad = 51950, // EUC Traditional Chinese - hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) - gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) - x_iscii_de = 57002, // x-iscii-de ISCII Devanagari - x_iscii_be = 57003, // x-iscii-be ISCII Bangla - x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil - x_iscii_te = 57005, // x-iscii-te ISCII Telugu - x_iscii_as = 57006, // x-iscii-as ISCII Assamese - x_iscii_or = 57007, // x-iscii-or ISCII Odia - x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada - x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam - x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati - x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi - utf7 = 65000, // utf-7 Unicode (UTF-7) - - pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint { - if (index >= bytes.len) return null; - switch (code_page) { - .windows1252 => { - // All byte values have a representation, so just convert the byte - return Codepoint{ - .value = windows1252.toCodepoint(bytes[index]), - .byte_len = 1, - }; - }, - .utf8 => { - return Utf8.WellFormedDecoder.decode(bytes[index..]); - }, - else => unreachable, - } - } - - pub fn isSupported(code_page: CodePage) bool { - return switch (code_page) { - .windows1252, .utf8 => true, - else => false, - }; - } - - pub fn getByIdentifier(identifier: u16) !CodePage { - // There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but - // this should be fine, especially since this function likely won't be called much. - inline for (@typeInfo(CodePage).Enum.fields) |enumField| { - if (identifier == enumField.value) { - return @field(CodePage, enumField.name); - } - } - return error.InvalidCodePage; - } - - pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage { - const code_page = try getByIdentifier(identifier); - switch (isSupported(code_page)) { - true => return code_page, - false => return error.UnsupportedCodePage, - } - } -}; - -pub const Utf8 = struct { - /// Implements decoding with rejection of ill-formed UTF-8 sequences based on section - /// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically). - pub const WellFormedDecoder = struct { - /// Like std.unicode.utf8ByteSequenceLength, but: - /// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF - /// - Returns an optional value instead of an error union - pub fn sequenceLength(first_byte: u8) ?u3 { - return switch (first_byte) { - 0x00...0x7F => 1, - 0xC2...0xDF => 2, - 0xE0...0xEF => 3, - 0xF0...0xF4 => 4, - else => null, - }; - } - - fn isContinuationByte(byte: u8) bool { - return switch (byte) { - 0x80...0xBF => true, - else => false, - }; - } - - pub fn decode(bytes: []const u8) Codepoint { - std.debug.assert(bytes.len > 0); - const first_byte = bytes[0]; - const expected_len = sequenceLength(first_byte) orelse { - return .{ .value = Codepoint.invalid, .byte_len = 1 }; - }; - if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 }; - - var value: u21 = first_byte & 0b00011111; - var byte_index: u8 = 1; - while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) { - const byte = bytes[byte_index]; - // See Table 3-7 of D92 in Chapter 3 of the Unicode Standard - const valid: bool = switch (byte_index) { - 1 => switch (first_byte) { - 0xE0 => switch (byte) { - 0xA0...0xBF => true, - else => false, - }, - 0xED => switch (byte) { - 0x80...0x9F => true, - else => false, - }, - 0xF0 => switch (byte) { - 0x90...0xBF => true, - else => false, - }, - 0xF4 => switch (byte) { - 0x80...0x8F => true, - else => false, - }, - else => switch (byte) { - 0x80...0xBF => true, - else => false, - }, - }, - else => switch (byte) { - 0x80...0xBF => true, - else => false, - }, - }; - - if (!valid) { - var len = byte_index; - // Only include the byte in the invalid sequence if it's in the range - // of a continuation byte. All other values should not be included in the - // invalid sequence. - // - // Note: This is how the Windows RC compiler handles this, this may not - // be the correct-as-according-to-the-Unicode-standard way to do it. - if (isContinuationByte(byte)) len += 1; - return .{ .value = Codepoint.invalid, .byte_len = len }; - } - - value <<= 6; - value |= byte & 0b00111111; - } - if (byte_index != expected_len) { - return .{ .value = Codepoint.invalid, .byte_len = byte_index }; - } - return .{ .value = value, .byte_len = expected_len }; - } - }; -}; - -test "Utf8.WellFormedDecoder" { - const invalid_utf8 = "\xF0\x80"; - const decoded = Utf8.WellFormedDecoder.decode(invalid_utf8); - try std.testing.expectEqual(Codepoint.invalid, decoded.value); - try std.testing.expectEqual(@as(usize, 2), decoded.byte_len); -} - -test "codepointAt invalid utf8" { - { - const invalid_utf8 = "\xf0\xf0\x80\x80\x80"; - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(0, invalid_utf8).?); - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 2, - }, CodePage.utf8.codepointAt(1, invalid_utf8).?); - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(3, invalid_utf8).?); - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(4, invalid_utf8).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8)); - } - - { - const invalid_utf8 = "\xE1\xA0\xC0"; - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 2, - }, CodePage.utf8.codepointAt(0, invalid_utf8).?); - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(2, invalid_utf8).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8)); - } - - { - const invalid_utf8 = "\xD2"; - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(0, invalid_utf8).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8)); - } - - { - const invalid_utf8 = "\xE1\xA0"; - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 2, - }, CodePage.utf8.codepointAt(0, invalid_utf8).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); - } - - { - const invalid_utf8 = "\xC5\xFF"; - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(0, invalid_utf8).?); - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(1, invalid_utf8).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8)); - } -} - -test "codepointAt utf8 encoded" { - const utf8_encoded = "²"; - - // with code page utf8 - try std.testing.expectEqual(Codepoint{ - .value = '²', - .byte_len = 2, - }, CodePage.utf8.codepointAt(0, utf8_encoded).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded)); - - // with code page windows1252 - try std.testing.expectEqual(Codepoint{ - .value = '\xC2', - .byte_len = 1, - }, CodePage.windows1252.codepointAt(0, utf8_encoded).?); - try std.testing.expectEqual(Codepoint{ - .value = '\xB2', - .byte_len = 1, - }, CodePage.windows1252.codepointAt(1, utf8_encoded).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded)); -} - -test "codepointAt windows1252 encoded" { - const windows1252_encoded = "\xB2"; - - // with code page utf8 - try std.testing.expectEqual(Codepoint{ - .value = Codepoint.invalid, - .byte_len = 1, - }, CodePage.utf8.codepointAt(0, windows1252_encoded).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded)); - - // with code page windows1252 - try std.testing.expectEqual(Codepoint{ - .value = '\xB2', - .byte_len = 1, - }, CodePage.windows1252.codepointAt(0, windows1252_encoded).?); - try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded)); -} - -pub const Codepoint = struct { - value: u21, - byte_len: usize, - - pub const invalid: u21 = std.math.maxInt(u21); -}; diff --git a/src/resinator/comments.zig b/src/resinator/comments.zig @@ -1,340 +0,0 @@ -//! Expects to run after a C preprocessor step that preserves comments. -//! -//! `rc` has a peculiar quirk where something like `blah/**/blah` will be -//! transformed into `blahblah` during parsing. However, `clang -E` will -//! transform it into `blah blah`, so in order to match `rc`, we need -//! to remove comments ourselves after the preprocessor runs. -//! Note: Multiline comments that actually span more than one line do -//! get translated to a space character by `rc`. -//! -//! Removing comments before lexing also allows the lexer to not have to -//! deal with comments which would complicate its implementation (this is something -//! of a tradeoff, as removing comments in a separate pass means that we'll -//! need to iterate the source twice instead of once, but having to deal with -//! comments when lexing would be a pain). - -const std = @import("std"); -const Allocator = std.mem.Allocator; -const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter; -const SourceMappings = @import("source_mapping.zig").SourceMappings; -const LineHandler = @import("lex.zig").LineHandler; -const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair; - -/// `buf` must be at least as long as `source` -/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) -pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 { - std.debug.assert(buf.len >= source.len); - var result = UncheckedSliceWriter{ .slice = buf }; - const State = enum { - start, - forward_slash, - line_comment, - multiline_comment, - multiline_comment_end, - single_quoted, - single_quoted_escape, - double_quoted, - double_quoted_escape, - }; - var state: State = .start; - var index: usize = 0; - var pending_start: ?usize = null; - var line_handler = LineHandler{ .buffer = source }; - while (index < source.len) : (index += 1) { - const c = source[index]; - // TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely - // cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed - // in the lexer, but comments are stripped before getting to the lexer. - switch (state) { - .start => switch (c) { - '/' => { - state = .forward_slash; - pending_start = index; - }, - '\r', '\n' => { - _ = line_handler.incrementLineNumber(index); - result.write(c); - }, - else => { - switch (c) { - '"' => state = .double_quoted, - '\'' => state = .single_quoted, - else => {}, - } - result.write(c); - }, - }, - .forward_slash => switch (c) { - '/' => state = .line_comment, - '*' => { - state = .multiline_comment; - }, - else => { - _ = line_handler.maybeIncrementLineNumber(index); - result.writeSlice(source[pending_start.? .. index + 1]); - pending_start = null; - state = .start; - }, - }, - .line_comment => switch (c) { - '\r', '\n' => { - _ = line_handler.incrementLineNumber(index); - result.write(c); - state = .start; - }, - else => {}, - }, - .multiline_comment => switch (c) { - '\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings), - '\n' => { - _ = line_handler.incrementLineNumber(index); - result.write(c); - }, - '*' => state = .multiline_comment_end, - else => {}, - }, - .multiline_comment_end => switch (c) { - '\r' => { - handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings); - // We only want to treat this as a newline if it's part of a CRLF pair. If it's - // not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still - // functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works. - if (formsLineEndingPair(source, '\r', index + 1)) { - state = .multiline_comment; - } - }, - '\n' => { - _ = line_handler.incrementLineNumber(index); - result.write(c); - state = .multiline_comment; - }, - '/' => { - state = .start; - }, - else => { - state = .multiline_comment; - }, - }, - .single_quoted => switch (c) { - '\r', '\n' => { - _ = line_handler.incrementLineNumber(index); - state = .start; - result.write(c); - }, - '\\' => { - state = .single_quoted_escape; - result.write(c); - }, - '\'' => { - state = .start; - result.write(c); - }, - else => { - result.write(c); - }, - }, - .single_quoted_escape => switch (c) { - '\r', '\n' => { - _ = line_handler.incrementLineNumber(index); - state = .start; - result.write(c); - }, - else => { - state = .single_quoted; - result.write(c); - }, - }, - .double_quoted => switch (c) { - '\r', '\n' => { - _ = line_handler.incrementLineNumber(index); - state = .start; - result.write(c); - }, - '\\' => { - state = .double_quoted_escape; - result.write(c); - }, - '"' => { - state = .start; - result.write(c); - }, - else => { - result.write(c); - }, - }, - .double_quoted_escape => switch (c) { - '\r', '\n' => { - _ = line_handler.incrementLineNumber(index); - state = .start; - result.write(c); - }, - else => { - state = .double_quoted; - result.write(c); - }, - }, - } - } - return result.getWritten(); -} - -inline fn handleMultilineCarriageReturn( - source: []const u8, - line_handler: *LineHandler, - index: usize, - result: *UncheckedSliceWriter, - source_mappings: ?*SourceMappings, -) void { - // Note: Bare \r within a multiline comment should *not* be treated as a line ending for the - // purposes of removing comments, but *should* be treated as a line ending for the - // purposes of line counting/source mapping - _ = line_handler.incrementLineNumber(index); - // So only write the \r if it's part of a CRLF pair - if (formsLineEndingPair(source, '\r', index + 1)) { - result.write('\r'); - } - // And otherwise, we want to collapse the source mapping so that we can still know which - // line came from where. - else { - // Because the line gets collapsed, we need to decrement line number so that - // the next collapse acts on the first of the collapsed line numbers - line_handler.line_number -= 1; - if (source_mappings) |mappings| { - mappings.collapse(line_handler.line_number, 1); - } - } -} - -pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 { - const buf = try allocator.alloc(u8, source.len); - errdefer allocator.free(buf); - const result = removeComments(source, buf, source_mappings); - return allocator.realloc(buf, result.len); -} - -fn testRemoveComments(expected: []const u8, source: []const u8) !void { - const result = try removeCommentsAlloc(std.testing.allocator, source, null); - defer std.testing.allocator.free(result); - - try std.testing.expectEqualStrings(expected, result); -} - -test "basic" { - try testRemoveComments("", "// comment"); - try testRemoveComments("", "/* comment */"); -} - -test "mixed" { - try testRemoveComments("hello", "hello// comment"); - try testRemoveComments("hello", "hel/* comment */lo"); -} - -test "within a string" { - // escaped " is \" - try testRemoveComments( - \\blah"//som\"/*ething*/"BLAH - , - \\blah"//som\"/*ething*/"BLAH - ); -} - -test "line comments retain newlines" { - try testRemoveComments( - \\ - \\ - \\ - , - \\// comment - \\// comment - \\// comment - ); - - try testRemoveComments("\r\n", "//comment\r\n"); -} - -test "crazy" { - try testRemoveComments( - \\blah"/*som*/\""BLAH - , - \\blah"/*som*/\""/*ething*/BLAH - ); - - try testRemoveComments( - \\blah"/*som*/"BLAH RCDATA "BEGIN END - \\ - \\ - \\hello - \\" - , - \\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END - \\// comment - \\//"blah blah" RCDATA {} - \\hello - \\" - ); -} - -test "multiline comment with newlines" { - // bare \r is not treated as a newline - try testRemoveComments("blahblah", "blah/*some\rthing*/blah"); - - try testRemoveComments( - \\blah - \\blah - , - \\blah/*some - \\thing*/blah - ); - try testRemoveComments( - "blah\r\nblah", - "blah/*some\r\nthing*/blah", - ); - - // handle *<not /> correctly - try testRemoveComments( - \\blah - \\ - \\ - , - \\blah/*some - \\thing* - \\/bl*ah*/ - ); -} - -test "comments appended to a line" { - try testRemoveComments( - \\blah - \\blah - , - \\blah // line comment - \\blah - ); - try testRemoveComments( - "blah \r\nblah", - "blah // line comment\r\nblah", - ); -} - -test "remove comments with mappings" { - const allocator = std.testing.allocator; - var mut_source = "blah/*\rcommented line*\r/blah".*; - var mappings = SourceMappings{}; - _ = try mappings.files.put(allocator, "test.rc"); - try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 }); - try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 }); - try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 }); - defer mappings.deinit(allocator); - - const result = removeComments(&mut_source, &mut_source, &mappings); - - try std.testing.expectEqualStrings("blahblah", result); - try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len); - try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line); -} - -test "in place" { - var mut_source = "blah /* comment */ blah".*; - const result = removeComments(&mut_source, &mut_source, null); - try std.testing.expectEqualStrings("blah blah", result); -} diff --git a/src/resinator/compile.zig b/src/resinator/compile.zig @@ -1,3378 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const Allocator = std.mem.Allocator; -const Node = @import("ast.zig").Node; -const lex = @import("lex.zig"); -const Parser = @import("parse.zig").Parser; -const Resource = @import("rc.zig").Resource; -const Token = @import("lex.zig").Token; -const literals = @import("literals.zig"); -const Number = literals.Number; -const SourceBytes = literals.SourceBytes; -const Diagnostics = @import("errors.zig").Diagnostics; -const ErrorDetails = @import("errors.zig").ErrorDetails; -const MemoryFlags = @import("res.zig").MemoryFlags; -const rc = @import("rc.zig"); -const res = @import("res.zig"); -const ico = @import("ico.zig"); -const ani = @import("ani.zig"); -const bmp = @import("bmp.zig"); -const WORD = std.os.windows.WORD; -const DWORD = std.os.windows.DWORD; -const utils = @import("utils.zig"); -const NameOrOrdinal = res.NameOrOrdinal; -const CodePage = @import("code_pages.zig").CodePage; -const CodePageLookup = @import("ast.zig").CodePageLookup; -const SourceMappings = @import("source_mapping.zig").SourceMappings; -const windows1252 = @import("windows1252.zig"); -const lang = @import("lang.zig"); -const code_pages = @import("code_pages.zig"); -const errors = @import("errors.zig"); -const native_endian = builtin.cpu.arch.endian(); - -pub const CompileOptions = struct { - cwd: std.fs.Dir, - diagnostics: *Diagnostics, - source_mappings: ?*SourceMappings = null, - /// List of paths (absolute or relative to `cwd`) for every file that the resources within the .rc file depend on. - /// Items within the list will be allocated using the allocator of the ArrayList and must be - /// freed by the caller. - /// TODO: Maybe a dedicated struct for this purpose so that it's a bit nicer to work with. - dependencies_list: ?*std.ArrayList([]const u8) = null, - default_code_page: CodePage = .windows1252, - ignore_include_env_var: bool = false, - extra_include_paths: []const []const u8 = &.{}, - /// This is just an API convenience to allow separately passing 'system' (i.e. those - /// that would normally be gotten from the INCLUDE env var) include paths. This is mostly - /// intended for use when setting `ignore_include_env_var = true`. When `ignore_include_env_var` - /// is false, `system_include_paths` will be searched before the paths in the INCLUDE env var. - system_include_paths: []const []const u8 = &.{}, - default_language_id: ?u16 = null, - // TODO: Implement verbose output - verbose: bool = false, - null_terminate_string_table_strings: bool = false, - /// Note: This is a u15 to ensure that the maximum number of UTF-16 code units - /// plus a null-terminator can always fit into a u16. - max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints, - silent_duplicate_control_ids: bool = false, - warn_instead_of_error_on_invalid_code_page: bool = false, -}; - -pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, options: CompileOptions) !void { - var lexer = lex.Lexer.init(source, .{ - .default_code_page = options.default_code_page, - .source_mappings = options.source_mappings, - .max_string_literal_codepoints = options.max_string_literal_codepoints, - }); - var parser = Parser.init(&lexer, .{ - .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page, - }); - var tree = try parser.parse(allocator, options.diagnostics); - defer tree.deinit(); - - var search_dirs = std.ArrayList(SearchDir).init(allocator); - defer { - for (search_dirs.items) |*search_dir| { - search_dir.deinit(allocator); - } - search_dirs.deinit(); - } - - if (options.source_mappings) |source_mappings| { - const root_path = source_mappings.files.get(source_mappings.root_filename_offset); - // If dirname returns null, then the root path will be the same as - // the cwd so we don't need to add it as a distinct search path. - if (std.fs.path.dirname(root_path)) |root_dir_path| { - var root_dir = try options.cwd.openDir(root_dir_path, .{}); - errdefer root_dir.close(); - try search_dirs.append(.{ .dir = root_dir, .path = try allocator.dupe(u8, root_dir_path) }); - } - } - // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed) - const cwd_dir = options.cwd.openDir(".", .{}) catch |err| { - try options.diagnostics.append(.{ - .err = .failed_to_open_cwd, - .token = .{ - .id = .invalid, - .start = 0, - .end = 0, - .line_number = 1, - }, - .print_source_line = false, - .extra = .{ .file_open_error = .{ - .err = ErrorDetails.FileOpenError.enumFromError(err), - .filename_string_index = undefined, - } }, - }); - return error.CompileError; - }; - try search_dirs.append(.{ .dir = cwd_dir, .path = null }); - for (options.extra_include_paths) |extra_include_path| { - var dir = openSearchPathDir(options.cwd, extra_include_path) catch { - // TODO: maybe a warning that the search path is skipped? - continue; - }; - errdefer dir.close(); - try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, extra_include_path) }); - } - for (options.system_include_paths) |system_include_path| { - var dir = openSearchPathDir(options.cwd, system_include_path) catch { - // TODO: maybe a warning that the search path is skipped? - continue; - }; - errdefer dir.close(); - try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) }); - } - if (!options.ignore_include_env_var) { - const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch ""; - defer allocator.free(INCLUDE); - - // The only precedence here is llvm-rc which also uses the platform-specific - // delimiter. There's no precedence set by `rc.exe` since it's Windows-only. - const delimiter = switch (builtin.os.tag) { - .windows => ';', - else => ':', - }; - var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter); - while (it.next()) |search_path| { - var dir = openSearchPathDir(options.cwd, search_path) catch continue; - errdefer dir.close(); - try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, search_path) }); - } - } - - var arena_allocator = std.heap.ArenaAllocator.init(allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - var compiler = Compiler{ - .source = source, - .arena = arena, - .allocator = allocator, - .cwd = options.cwd, - .diagnostics = options.diagnostics, - .dependencies_list = options.dependencies_list, - .input_code_pages = &tree.input_code_pages, - .output_code_pages = &tree.output_code_pages, - // This is only safe because we know search_dirs won't be modified past this point - .search_dirs = search_dirs.items, - .null_terminate_string_table_strings = options.null_terminate_string_table_strings, - .silent_duplicate_control_ids = options.silent_duplicate_control_ids, - }; - if (options.default_language_id) |default_language_id| { - compiler.state.language = res.Language.fromInt(default_language_id); - } - - try compiler.writeRoot(tree.root(), writer); -} - -pub const Compiler = struct { - source: []const u8, - arena: Allocator, - allocator: Allocator, - cwd: std.fs.Dir, - state: State = .{}, - diagnostics: *Diagnostics, - dependencies_list: ?*std.ArrayList([]const u8), - input_code_pages: *const CodePageLookup, - output_code_pages: *const CodePageLookup, - search_dirs: []SearchDir, - null_terminate_string_table_strings: bool, - silent_duplicate_control_ids: bool, - - pub const State = struct { - icon_id: u16 = 1, - string_tables: StringTablesByLanguage = .{}, - language: res.Language = .{}, - font_dir: FontDir = .{}, - version: u32 = 0, - characteristics: u32 = 0, - }; - - pub fn writeRoot(self: *Compiler, root: *Node.Root, writer: anytype) !void { - try writeEmptyResource(writer); - for (root.body) |node| { - try self.writeNode(node, writer); - } - - // now write the FONTDIR (if it has anything in it) - try self.state.font_dir.writeResData(self, writer); - if (self.state.font_dir.fonts.items.len != 0) { - // The Win32 RC compiler may write a different FONTDIR resource than us, - // due to it sometimes writing a non-zero-length device name/face name - // whereas we *always* write them both as zero-length. - // - // In practical terms, this doesn't matter, since for various reasons the format - // of the FONTDIR cannot be relied on and is seemingly not actually used by anything - // anymore. We still want to emit some sort of diagnostic for the purposes of being able - // to know that our .RES is intentionally not meant to be byte-for-byte identical with - // the rc.exe output. - // - // By using the hint type here, we allow this diagnostic to be detected in code, - // but it will not be printed since the end-user doesn't need to care. - try self.addErrorDetails(.{ - .err = .result_contains_fontdir, - .type = .hint, - .token = undefined, - }); - } - // once we've written every else out, we can write out the finalized STRINGTABLE resources - var string_tables_it = self.state.string_tables.tables.iterator(); - while (string_tables_it.next()) |string_table_entry| { - var string_table_it = string_table_entry.value_ptr.blocks.iterator(); - while (string_table_it.next()) |entry| { - try entry.value_ptr.writeResData(self, string_table_entry.key_ptr.*, entry.key_ptr.*, writer); - } - } - } - - pub fn writeNode(self: *Compiler, node: *Node, writer: anytype) !void { - switch (node.id) { - .root => unreachable, // writeRoot should be called directly instead - .resource_external => try self.writeResourceExternal(@fieldParentPtr(Node.ResourceExternal, "base", node), writer), - .resource_raw_data => try self.writeResourceRawData(@fieldParentPtr(Node.ResourceRawData, "base", node), writer), - .literal => unreachable, // this is context dependent and should be handled by its parent - .binary_expression => unreachable, - .grouped_expression => unreachable, - .not_expression => unreachable, - .invalid => {}, // no-op, currently only used for dangling literals at EOF - .accelerators => try self.writeAccelerators(@fieldParentPtr(Node.Accelerators, "base", node), writer), - .accelerator => unreachable, // handled by writeAccelerators - .dialog => try self.writeDialog(@fieldParentPtr(Node.Dialog, "base", node), writer), - .control_statement => unreachable, - .toolbar => try self.writeToolbar(@fieldParentPtr(Node.Toolbar, "base", node), writer), - .menu => try self.writeMenu(@fieldParentPtr(Node.Menu, "base", node), writer), - .menu_item => unreachable, - .menu_item_separator => unreachable, - .menu_item_ex => unreachable, - .popup => unreachable, - .popup_ex => unreachable, - .version_info => try self.writeVersionInfo(@fieldParentPtr(Node.VersionInfo, "base", node), writer), - .version_statement => unreachable, - .block => unreachable, - .block_value => unreachable, - .block_value_value => unreachable, - .string_table => try self.writeStringTable(@fieldParentPtr(Node.StringTable, "base", node)), - .string_table_string => unreachable, // handled by writeStringTable - .language_statement => self.writeLanguageStatement(@fieldParentPtr(Node.LanguageStatement, "base", node)), - .font_statement => unreachable, - .simple_statement => self.writeTopLevelSimpleStatement(@fieldParentPtr(Node.SimpleStatement, "base", node)), - } - } - - /// Returns the filename encoded as UTF-8 (allocated by self.allocator) - pub fn evaluateFilenameExpression(self: *Compiler, expression_node: *Node) ![]u8 { - switch (expression_node.id) { - .literal => { - const literal_node = expression_node.cast(.literal).?; - switch (literal_node.token.id) { - .literal, .number => { - const slice = literal_node.token.slice(self.source); - const code_page = self.input_code_pages.getForToken(literal_node.token); - var buf = try std.ArrayList(u8).initCapacity(self.allocator, slice.len); - errdefer buf.deinit(); - - var index: usize = 0; - while (code_page.codepointAt(index, slice)) |codepoint| : (index += codepoint.byte_len) { - const c = codepoint.value; - if (c == code_pages.Codepoint.invalid) { - try buf.appendSlice("�"); - } else { - // Anything that is not returned as an invalid codepoint must be encodable as UTF-8. - const utf8_len = std.unicode.utf8CodepointSequenceLength(c) catch unreachable; - try buf.ensureUnusedCapacity(utf8_len); - _ = std.unicode.utf8Encode(c, buf.unusedCapacitySlice()) catch unreachable; - buf.items.len += utf8_len; - } - } - - return buf.toOwnedSlice(); - }, - .quoted_ascii_string, .quoted_wide_string => { - const slice = literal_node.token.slice(self.source); - const column = literal_node.token.calculateColumn(self.source, 8, null); - const bytes = SourceBytes{ .slice = slice, .code_page = self.input_code_pages.getForToken(literal_node.token) }; - - var buf = std.ArrayList(u8).init(self.allocator); - errdefer buf.deinit(); - - // Filenames are sort-of parsed as if they were wide strings, but the max escape width of - // hex/octal escapes is still determined by the L prefix. Since we want to end up with - // UTF-8, we can parse either string type directly to UTF-8. - var parser = literals.IterativeStringParser.init(bytes, .{ - .start_column = column, - .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, - }); - - while (try parser.nextUnchecked()) |parsed| { - const c = parsed.codepoint; - if (c == code_pages.Codepoint.invalid) { - try buf.appendSlice("�"); - } else { - var codepoint_buf: [4]u8 = undefined; - // If the codepoint cannot be encoded, we fall back to � - if (std.unicode.utf8Encode(c, &codepoint_buf)) |len| { - try buf.appendSlice(codepoint_buf[0..len]); - } else |_| { - try buf.appendSlice("�"); - } - } - } - - return buf.toOwnedSlice(); - }, - else => { - std.debug.print("unexpected filename token type: {}\n", .{literal_node.token}); - unreachable; // no other token types should be in a filename literal node - }, - } - }, - .binary_expression => { - const binary_expression_node = expression_node.cast(.binary_expression).?; - return self.evaluateFilenameExpression(binary_expression_node.right); - }, - .grouped_expression => { - const grouped_expression_node = expression_node.cast(.grouped_expression).?; - return self.evaluateFilenameExpression(grouped_expression_node.expression); - }, - else => unreachable, - } - } - - /// https://learn.microsoft.com/en-us/windows/win32/menurc/searching-for-files - /// - /// Searches, in this order: - /// Directory of the 'root' .rc file (if different from CWD) - /// CWD - /// extra_include_paths (resolved relative to CWD) - /// system_include_paths (resolve relative to CWD) - /// INCLUDE environment var paths (only if ignore_include_env_var is false; resolved relative to CWD) - /// - /// Note: The CWD being searched *in addition to* the directory of the 'root' .rc file - /// is also how the Win32 RC compiler preprocessor searches for includes, but that - /// differs from how the clang preprocessor searches for includes. - /// - /// Note: This will always return the first matching file that can be opened. - /// This matches the Win32 RC compiler, which will fail with an error if the first - /// matching file is invalid. That is, it does not do the `cmd` PATH searching - /// thing of continuing to look for matching files until it finds a valid - /// one if a matching file is invalid. - fn searchForFile(self: *Compiler, path: []const u8) !std.fs.File { - // If the path is absolute, then it is not resolved relative to any search - // paths, so there's no point in checking them. - // - // This behavior was determined/confirmed with the following test: - // - A `test.rc` file with the contents `1 RCDATA "/test.bin"` - // - A `test.bin` file at `C:\test.bin` - // - A `test.bin` file at `inc\test.bin` relative to the .rc file - // - Invoking `rc` with `rc /i inc test.rc` - // - // This results in a .res file with the contents of `C:\test.bin`, not - // the contents of `inc\test.bin`. Further, if `C:\test.bin` is deleted, - // then it start failing to find `/test.bin`, meaning that it does not resolve - // `/test.bin` relative to include paths and instead only treats it as - // an absolute path. - if (std.fs.path.isAbsolute(path)) { - const file = try utils.openFileNotDir(std.fs.cwd(), path, .{}); - errdefer file.close(); - - if (self.dependencies_list) |dependencies_list| { - const duped_path = try dependencies_list.allocator.dupe(u8, path); - errdefer dependencies_list.allocator.free(duped_path); - try dependencies_list.append(duped_path); - } - } - - var first_error: ?std.fs.File.OpenError = null; - for (self.search_dirs) |search_dir| { - if (utils.openFileNotDir(search_dir.dir, path, .{})) |file| { - errdefer file.close(); - - if (self.dependencies_list) |dependencies_list| { - const searched_file_path = try std.fs.path.join(dependencies_list.allocator, &.{ - search_dir.path orelse "", path, - }); - errdefer dependencies_list.allocator.free(searched_file_path); - try dependencies_list.append(searched_file_path); - } - - return file; - } else |err| if (first_error == null) { - first_error = err; - } - } - return first_error orelse error.FileNotFound; - } - - pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void { - // Init header with data size zero for now, will need to fill it in later - var header = try self.resourceHeader(node.id, node.type, .{}); - defer header.deinit(self.allocator); - - const maybe_predefined_type = header.predefinedResourceType(); - - // DLGINCLUDE has special handling that doesn't actually need the file to exist - if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) { - const filename_token = node.filename.cast(.literal).?.token; - const parsed_filename = try self.parseQuotedStringAsAsciiString(filename_token); - defer self.allocator.free(parsed_filename); - - header.applyMemoryFlags(node.common_resource_attributes, self.source); - header.data_size = @intCast(parsed_filename.len + 1); - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - try writer.writeAll(parsed_filename); - try writer.writeByte(0); - try writeDataPadding(writer, header.data_size); - return; - } - - const filename_utf8 = try self.evaluateFilenameExpression(node.filename); - defer self.allocator.free(filename_utf8); - - // TODO: More robust checking of the validity of the filename. - // This currently only checks for NUL bytes, but it should probably also check for - // platform-specific invalid characters like '*', '?', '"', '<', '>', '|' (Windows) - // Related: https://github.com/ziglang/zig/pull/14533#issuecomment-1416888193 - if (std.mem.indexOfScalar(u8, filename_utf8, 0) != null) { - return self.addErrorDetailsAndFail(.{ - .err = .invalid_filename, - .token = node.filename.getFirstToken(), - .token_span_end = node.filename.getLastToken(), - .extra = .{ .number = 0 }, - }); - } - - // Allow plain number literals, but complex number expressions are evaluated strangely - // and almost certainly lead to things not intended by the user (e.g. '(1+-1)' evaluates - // to the filename '-1'), so error if the filename node is a grouped/binary expression. - // Note: This is done here instead of during parsing so that we can easily include - // the evaluated filename as part of the error messages. - if (node.filename.id != .literal) { - const filename_string_index = try self.diagnostics.putString(filename_utf8); - try self.addErrorDetails(.{ - .err = .number_expression_as_filename, - .token = node.filename.getFirstToken(), - .token_span_end = node.filename.getLastToken(), - .extra = .{ .number = filename_string_index }, - }); - return self.addErrorDetailsAndFail(.{ - .err = .number_expression_as_filename, - .type = .note, - .token = node.filename.getFirstToken(), - .token_span_end = node.filename.getLastToken(), - .print_source_line = false, - .extra = .{ .number = filename_string_index }, - }); - } - // From here on out, we know that the filename must be comprised of a single token, - // so get it here to simplify future usage. - const filename_token = node.filename.getFirstToken(); - - const file = self.searchForFile(filename_utf8) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - else => |e| { - const filename_string_index = try self.diagnostics.putString(filename_utf8); - return self.addErrorDetailsAndFail(.{ - .err = .file_open_error, - .token = filename_token, - .extra = .{ .file_open_error = .{ - .err = ErrorDetails.FileOpenError.enumFromError(e), - .filename_string_index = filename_string_index, - } }, - }); - }, - }; - defer file.close(); - - if (maybe_predefined_type) |predefined_type| { - switch (predefined_type) { - .GROUP_ICON, .GROUP_CURSOR => { - // Check for animated icon first - if (ani.isAnimatedIcon(file.reader())) { - // Animated icons are just put into the resource unmodified, - // and the resource type changes to ANIICON/ANICURSOR - - const new_predefined_type: res.RT = switch (predefined_type) { - .GROUP_ICON => .ANIICON, - .GROUP_CURSOR => .ANICURSOR, - else => unreachable, - }; - header.type_value.ordinal = @intFromEnum(new_predefined_type); - header.memory_flags = MemoryFlags.defaults(new_predefined_type); - header.applyMemoryFlags(node.common_resource_attributes, self.source); - header.data_size = @intCast(try file.getEndPos()); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - try file.seekTo(0); - try writeResourceData(writer, file.reader(), header.data_size); - return; - } - - // isAnimatedIcon moved the file cursor so reset to the start - try file.seekTo(0); - - const icon_dir = ico.read(self.allocator, file.reader(), try file.getEndPos()) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - else => |e| { - return self.iconReadError( - e, - filename_utf8, - filename_token, - predefined_type, - ); - }, - }; - defer icon_dir.deinit(); - - // This limit is inherent to the ico format since number of entries is a u16 field. - std.debug.assert(icon_dir.entries.len <= std.math.maxInt(u16)); - - // Note: The Win32 RC compiler will compile the resource as whatever type is - // in the icon_dir regardless of the type of resource specified in the .rc. - // This leads to unusable .res files when the types mismatch, so - // we error instead. - const res_types_match = switch (predefined_type) { - .GROUP_ICON => icon_dir.image_type == .icon, - .GROUP_CURSOR => icon_dir.image_type == .cursor, - else => unreachable, - }; - if (!res_types_match) { - return self.addErrorDetailsAndFail(.{ - .err = .icon_dir_and_resource_type_mismatch, - .token = filename_token, - .extra = .{ .resource = switch (predefined_type) { - .GROUP_ICON => .icon, - .GROUP_CURSOR => .cursor, - else => unreachable, - } }, - }); - } - - // Memory flags affect the RT_ICON and the RT_GROUP_ICON differently - var icon_memory_flags = MemoryFlags.defaults(res.RT.ICON); - applyToMemoryFlags(&icon_memory_flags, node.common_resource_attributes, self.source); - applyToGroupMemoryFlags(&header.memory_flags, node.common_resource_attributes, self.source); - - const first_icon_id = self.state.icon_id; - const entry_type = if (predefined_type == .GROUP_ICON) @intFromEnum(res.RT.ICON) else @intFromEnum(res.RT.CURSOR); - for (icon_dir.entries, 0..) |*entry, entry_i_usize| { - // We know that the entry index must fit within a u16, so - // cast it here to simplify usage sites. - const entry_i: u16 = @intCast(entry_i_usize); - var full_data_size = entry.data_size_in_bytes; - if (icon_dir.image_type == .cursor) { - full_data_size = std.math.add(u32, full_data_size, 4) catch { - return self.addErrorDetailsAndFail(.{ - .err = .resource_data_size_exceeds_max, - .token = node.id, - }); - }; - } - - const image_header = ResourceHeader{ - .type_value = .{ .ordinal = entry_type }, - .name_value = .{ .ordinal = self.state.icon_id }, - .data_size = full_data_size, - .memory_flags = icon_memory_flags, - .language = self.state.language, - .version = self.state.version, - .characteristics = self.state.characteristics, - }; - try image_header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - - // From https://learn.microsoft.com/en-us/windows/win32/menurc/localheader: - // > The LOCALHEADER structure is the first data written to the RT_CURSOR - // > resource if a RESDIR structure contains information about a cursor. - // where LOCALHEADER is `struct { WORD xHotSpot; WORD yHotSpot; }` - if (icon_dir.image_type == .cursor) { - try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_x, .little); - try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_y, .little); - } - - try file.seekTo(entry.data_offset_from_start_of_file); - var header_bytes = file.reader().readBytesNoEof(16) catch { - return self.iconReadError( - error.UnexpectedEOF, - filename_utf8, - filename_token, - predefined_type, - ); - }; - - const image_format = ico.ImageFormat.detect(&header_bytes); - if (!image_format.validate(&header_bytes)) { - return self.iconReadError( - error.InvalidHeader, - filename_utf8, - filename_token, - predefined_type, - ); - } - switch (image_format) { - .riff => switch (icon_dir.image_type) { - .icon => { - // The Win32 RC compiler treats this as an error, but icon dirs - // with RIFF encoded icons within them work ~okay (they work - // in some places but not others, they may not animate, etc) if they are - // allowed to be compiled. - try self.addErrorDetails(.{ - .err = .rc_would_error_on_icon_dir, - .type = .warning, - .token = filename_token, - .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, - }); - try self.addErrorDetails(.{ - .err = .rc_would_error_on_icon_dir, - .type = .note, - .print_source_line = false, - .token = filename_token, - .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } }, - }); - }, - .cursor => { - // The Win32 RC compiler errors in this case too, but we only error - // here because the cursor would fail to be loaded at runtime if we - // compiled it. - return self.addErrorDetailsAndFail(.{ - .err = .format_not_supported_in_icon_dir, - .token = filename_token, - .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .riff, .index = entry_i } }, - }); - }, - }, - .png => switch (icon_dir.image_type) { - .icon => { - // PNG always seems to have 1 for color planes no matter what - entry.type_specific_data.icon.color_planes = 1; - // These seem to be the only values of num_colors that - // get treated specially - entry.type_specific_data.icon.bits_per_pixel = switch (entry.num_colors) { - 2 => 1, - 8 => 3, - 16 => 4, - else => entry.type_specific_data.icon.bits_per_pixel, - }; - }, - .cursor => { - // The Win32 RC compiler treats this as an error, but cursor dirs - // with PNG encoded icons within them work fine if they are - // allowed to be compiled. - try self.addErrorDetails(.{ - .err = .rc_would_error_on_icon_dir, - .type = .warning, - .token = filename_token, - .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .png, .index = entry_i } }, - }); - }, - }, - .dib => { - const bitmap_header: *ico.BitmapHeader = @ptrCast(@alignCast(&header_bytes)); - if (native_endian == .big) { - std.mem.byteSwapAllFields(ico.BitmapHeader, bitmap_header); - } - const bitmap_version = ico.BitmapHeader.Version.get(bitmap_header.bcSize); - - // The Win32 RC compiler only allows headers with - // `bcSize == sizeof(BITMAPINFOHEADER)`, but it seems unlikely - // that there's a good reason for that outside of too-old - // bitmap headers. - // TODO: Need to test V4 and V5 bitmaps to check they actually work - if (bitmap_version == .@"win2.0") { - return self.addErrorDetailsAndFail(.{ - .err = .rc_would_error_on_bitmap_version, - .token = filename_token, - .extra = .{ .icon_dir = .{ - .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, - .icon_format = image_format, - .index = entry_i, - .bitmap_version = bitmap_version, - } }, - }); - } else if (bitmap_version != .@"nt3.1") { - try self.addErrorDetails(.{ - .err = .rc_would_error_on_bitmap_version, - .type = .warning, - .token = filename_token, - .extra = .{ .icon_dir = .{ - .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, - .icon_format = image_format, - .index = entry_i, - .bitmap_version = bitmap_version, - } }, - }); - } - - switch (icon_dir.image_type) { - .icon => { - // The values in the icon's BITMAPINFOHEADER always take precedence over - // the values in the IconDir, but not in the LOCALHEADER (see above). - entry.type_specific_data.icon.color_planes = bitmap_header.bcPlanes; - entry.type_specific_data.icon.bits_per_pixel = bitmap_header.bcBitCount; - }, - .cursor => { - // Only cursors get the width/height from BITMAPINFOHEADER (icons don't) - entry.width = @intCast(bitmap_header.bcWidth); - entry.height = @intCast(bitmap_header.bcHeight); - entry.type_specific_data.cursor.hotspot_x = bitmap_header.bcPlanes; - entry.type_specific_data.cursor.hotspot_y = bitmap_header.bcBitCount; - }, - } - }, - } - - try file.seekTo(entry.data_offset_from_start_of_file); - try writeResourceDataNoPadding(writer, file.reader(), entry.data_size_in_bytes); - try writeDataPadding(writer, full_data_size); - - if (self.state.icon_id == std.math.maxInt(u16)) { - try self.addErrorDetails(.{ - .err = .max_icon_ids_exhausted, - .print_source_line = false, - .token = filename_token, - .extra = .{ .icon_dir = .{ - .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, - .icon_format = image_format, - .index = entry_i, - } }, - }); - return self.addErrorDetailsAndFail(.{ - .err = .max_icon_ids_exhausted, - .type = .note, - .token = filename_token, - .extra = .{ .icon_dir = .{ - .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor, - .icon_format = image_format, - .index = entry_i, - } }, - }); - } - self.state.icon_id += 1; - } - - header.data_size = icon_dir.getResDataSize(); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - try icon_dir.writeResData(writer, first_icon_id); - try writeDataPadding(writer, header.data_size); - return; - }, - .RCDATA, .HTML, .MANIFEST, .MESSAGETABLE, .DLGINIT, .PLUGPLAY => { - header.applyMemoryFlags(node.common_resource_attributes, self.source); - }, - .BITMAP => { - header.applyMemoryFlags(node.common_resource_attributes, self.source); - const file_size = try file.getEndPos(); - - const bitmap_info = bmp.read(file.reader(), file_size) catch |err| { - const filename_string_index = try self.diagnostics.putString(filename_utf8); - return self.addErrorDetailsAndFail(.{ - .err = .bmp_read_error, - .token = filename_token, - .extra = .{ .bmp_read_error = .{ - .err = ErrorDetails.BitmapReadError.enumFromError(err), - .filename_string_index = filename_string_index, - } }, - }); - }; - - if (bitmap_info.getActualPaletteByteLen() > bitmap_info.getExpectedPaletteByteLen()) { - const num_ignored_bytes = bitmap_info.getActualPaletteByteLen() - bitmap_info.getExpectedPaletteByteLen(); - var number_as_bytes: [8]u8 = undefined; - std.mem.writeInt(u64, &number_as_bytes, num_ignored_bytes, native_endian); - const value_string_index = try self.diagnostics.putString(&number_as_bytes); - try self.addErrorDetails(.{ - .err = .bmp_ignored_palette_bytes, - .type = .warning, - .token = filename_token, - .extra = .{ .number = value_string_index }, - }); - } else if (bitmap_info.getActualPaletteByteLen() < bitmap_info.getExpectedPaletteByteLen()) { - const num_padding_bytes = bitmap_info.getExpectedPaletteByteLen() - bitmap_info.getActualPaletteByteLen(); - - // TODO: Make this configurable (command line option) - const max_missing_bytes = 4096; - if (num_padding_bytes > max_missing_bytes) { - var numbers_as_bytes: [16]u8 = undefined; - std.mem.writeInt(u64, numbers_as_bytes[0..8], num_padding_bytes, native_endian); - std.mem.writeInt(u64, numbers_as_bytes[8..16], max_missing_bytes, native_endian); - const values_string_index = try self.diagnostics.putString(&numbers_as_bytes); - try self.addErrorDetails(.{ - .err = .bmp_too_many_missing_palette_bytes, - .token = filename_token, - .extra = .{ .number = values_string_index }, - }); - return self.addErrorDetailsAndFail(.{ - .err = .bmp_too_many_missing_palette_bytes, - .type = .note, - .print_source_line = false, - .token = filename_token, - }); - } - - var number_as_bytes: [8]u8 = undefined; - std.mem.writeInt(u64, &number_as_bytes, num_padding_bytes, native_endian); - const value_string_index = try self.diagnostics.putString(&number_as_bytes); - try self.addErrorDetails(.{ - .err = .bmp_missing_palette_bytes, - .type = .warning, - .token = filename_token, - .extra = .{ .number = value_string_index }, - }); - const pixel_data_len = bitmap_info.getPixelDataLen(file_size); - if (pixel_data_len > 0) { - const miscompiled_bytes = @min(pixel_data_len, num_padding_bytes); - std.mem.writeInt(u64, &number_as_bytes, miscompiled_bytes, native_endian); - const miscompiled_bytes_string_index = try self.diagnostics.putString(&number_as_bytes); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_bmp_palette_padding, - .type = .warning, - .token = filename_token, - .extra = .{ .number = miscompiled_bytes_string_index }, - }); - } - } - - // TODO: It might be possible that the calculation done in this function - // could underflow if the underlying file is modified while reading - // it, but need to think about it more to determine if that's a - // real possibility - const bmp_bytes_to_write: u32 = @intCast(bitmap_info.getExpectedByteLen(file_size)); - - header.data_size = bmp_bytes_to_write; - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - try file.seekTo(bmp.file_header_len); - const file_reader = file.reader(); - try writeResourceDataNoPadding(writer, file_reader, bitmap_info.dib_header_size); - if (bitmap_info.getBitmasksByteLen() > 0) { - try writeResourceDataNoPadding(writer, file_reader, bitmap_info.getBitmasksByteLen()); - } - if (bitmap_info.getExpectedPaletteByteLen() > 0) { - try writeResourceDataNoPadding(writer, file_reader, @intCast(bitmap_info.getActualPaletteByteLen())); - // We know that the number of missing palette bytes is <= 4096 - // (see `bmp_too_many_missing_palette_bytes` error case above) - const padding_bytes: usize = @intCast(bitmap_info.getMissingPaletteByteLen()); - if (padding_bytes > 0) { - try writer.writeByteNTimes(0, padding_bytes); - } - } - try file.seekTo(bitmap_info.pixel_data_offset); - const pixel_bytes: u32 = @intCast(file_size - bitmap_info.pixel_data_offset); - try writeResourceDataNoPadding(writer, file_reader, pixel_bytes); - try writeDataPadding(writer, bmp_bytes_to_write); - return; - }, - .FONT => { - if (self.state.font_dir.ids.get(header.name_value.ordinal) != null) { - // Add warning and skip this resource - // Note: The Win32 compiler prints this as an error but it doesn't fail the compilation - // and the duplicate resource is skipped. - try self.addErrorDetails(ErrorDetails{ - .err = .font_id_already_defined, - .token = node.id, - .type = .warning, - .extra = .{ .number = header.name_value.ordinal }, - }); - try self.addErrorDetails(ErrorDetails{ - .err = .font_id_already_defined, - .token = self.state.font_dir.ids.get(header.name_value.ordinal).?, - .type = .note, - .extra = .{ .number = header.name_value.ordinal }, - }); - return; - } - header.applyMemoryFlags(node.common_resource_attributes, self.source); - const file_size = try file.getEndPos(); - if (file_size > std.math.maxInt(u32)) { - return self.addErrorDetailsAndFail(.{ - .err = .resource_data_size_exceeds_max, - .token = node.id, - }); - } - - // We now know that the data size will fit in a u32 - header.data_size = @intCast(file_size); - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - - var header_slurping_reader = headerSlurpingReader(148, file.reader()); - try writeResourceData(writer, header_slurping_reader.reader(), header.data_size); - - try self.state.font_dir.add(self.arena, FontDir.Font{ - .id = header.name_value.ordinal, - .header_bytes = header_slurping_reader.slurped_header, - }, node.id); - return; - }, - .ACCELERATOR, - .ANICURSOR, - .ANIICON, - .CURSOR, - .DIALOG, - .DLGINCLUDE, - .FONTDIR, - .ICON, - .MENU, - .STRING, - .TOOLBAR, - .VERSION, - .VXD, - => unreachable, - _ => unreachable, - } - } else { - header.applyMemoryFlags(node.common_resource_attributes, self.source); - } - - // Fallback to just writing out the entire contents of the file - const data_size = try file.getEndPos(); - if (data_size > std.math.maxInt(u32)) { - return self.addErrorDetailsAndFail(.{ - .err = .resource_data_size_exceeds_max, - .token = node.id, - }); - } - // We now know that the data size will fit in a u32 - header.data_size = @intCast(data_size); - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - try writeResourceData(writer, file.reader(), header.data_size); - } - - fn iconReadError( - self: *Compiler, - err: ico.ReadError, - filename: []const u8, - token: Token, - predefined_type: res.RT, - ) error{ CompileError, OutOfMemory } { - const filename_string_index = try self.diagnostics.putString(filename); - return self.addErrorDetailsAndFail(.{ - .err = .icon_read_error, - .token = token, - .extra = .{ .icon_read_error = .{ - .err = ErrorDetails.IconReadError.enumFromError(err), - .icon_type = switch (predefined_type) { - .GROUP_ICON => .icon, - .GROUP_CURSOR => .cursor, - else => unreachable, - }, - .filename_string_index = filename_string_index, - } }, - }); - } - - pub const DataType = enum { - number, - ascii_string, - wide_string, - }; - - pub const Data = union(DataType) { - number: Number, - ascii_string: []const u8, - wide_string: [:0]const u16, - - pub fn deinit(self: Data, allocator: Allocator) void { - switch (self) { - .wide_string => |wide_string| { - allocator.free(wide_string); - }, - .ascii_string => |ascii_string| { - allocator.free(ascii_string); - }, - else => {}, - } - } - - pub fn write(self: Data, writer: anytype) !void { - switch (self) { - .number => |number| switch (number.is_long) { - false => try writer.writeInt(WORD, number.asWord(), .little), - true => try writer.writeInt(DWORD, number.value, .little), - }, - .ascii_string => |ascii_string| { - try writer.writeAll(ascii_string); - }, - .wide_string => |wide_string| { - try writer.writeAll(std.mem.sliceAsBytes(wide_string)); - }, - } - } - }; - - /// Assumes that the node is a number or number expression - pub fn evaluateNumberExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) Number { - switch (expression_node.id) { - .literal => { - const literal_node = expression_node.cast(.literal).?; - std.debug.assert(literal_node.token.id == .number); - const bytes = SourceBytes{ - .slice = literal_node.token.slice(source), - .code_page = code_page_lookup.getForToken(literal_node.token), - }; - return literals.parseNumberLiteral(bytes); - }, - .binary_expression => { - const binary_expression_node = expression_node.cast(.binary_expression).?; - const lhs = evaluateNumberExpression(binary_expression_node.left, source, code_page_lookup); - const rhs = evaluateNumberExpression(binary_expression_node.right, source, code_page_lookup); - const operator_char = binary_expression_node.operator.slice(source)[0]; - return lhs.evaluateOperator(operator_char, rhs); - }, - .grouped_expression => { - const grouped_expression_node = expression_node.cast(.grouped_expression).?; - return evaluateNumberExpression(grouped_expression_node.expression, source, code_page_lookup); - }, - else => unreachable, - } - } - - const FlagsNumber = struct { - value: u32, - not_mask: u32 = 0xFFFFFFFF, - - pub fn evaluateOperator(lhs: FlagsNumber, operator_char: u8, rhs: FlagsNumber) FlagsNumber { - const result = switch (operator_char) { - '-' => lhs.value -% rhs.value, - '+' => lhs.value +% rhs.value, - '|' => lhs.value | rhs.value, - '&' => lhs.value & rhs.value, - else => unreachable, // invalid operator, this would be a lexer/parser bug - }; - return .{ - .value = result, - .not_mask = lhs.not_mask & rhs.not_mask, - }; - } - - pub fn applyNotMask(self: FlagsNumber) u32 { - return self.value & self.not_mask; - } - }; - - pub fn evaluateFlagsExpressionWithDefault(default: u32, expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) u32 { - var context = FlagsExpressionContext{ .initial_value = default }; - const number = evaluateFlagsExpression(expression_node, source, code_page_lookup, &context); - return number.value; - } - - pub const FlagsExpressionContext = struct { - initial_value: u32 = 0, - initial_value_used: bool = false, - }; - - /// Assumes that the node is a number expression (which can contain not_expressions) - pub fn evaluateFlagsExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup, context: *FlagsExpressionContext) FlagsNumber { - switch (expression_node.id) { - .literal => { - const literal_node = expression_node.cast(.literal).?; - std.debug.assert(literal_node.token.id == .number); - const bytes = SourceBytes{ - .slice = literal_node.token.slice(source), - .code_page = code_page_lookup.getForToken(literal_node.token), - }; - var value = literals.parseNumberLiteral(bytes).value; - if (!context.initial_value_used) { - context.initial_value_used = true; - value |= context.initial_value; - } - return .{ .value = value }; - }, - .binary_expression => { - const binary_expression_node = expression_node.cast(.binary_expression).?; - const lhs = evaluateFlagsExpression(binary_expression_node.left, source, code_page_lookup, context); - const rhs = evaluateFlagsExpression(binary_expression_node.right, source, code_page_lookup, context); - const operator_char = binary_expression_node.operator.slice(source)[0]; - const result = lhs.evaluateOperator(operator_char, rhs); - return .{ .value = result.applyNotMask() }; - }, - .grouped_expression => { - const grouped_expression_node = expression_node.cast(.grouped_expression).?; - return evaluateFlagsExpression(grouped_expression_node.expression, source, code_page_lookup, context); - }, - .not_expression => { - const not_expression = expression_node.cast(.not_expression).?; - const bytes = SourceBytes{ - .slice = not_expression.number_token.slice(source), - .code_page = code_page_lookup.getForToken(not_expression.number_token), - }; - const not_number = literals.parseNumberLiteral(bytes); - if (!context.initial_value_used) { - context.initial_value_used = true; - return .{ .value = context.initial_value & ~not_number.value }; - } - return .{ .value = 0, .not_mask = ~not_number.value }; - }, - else => unreachable, - } - } - - pub fn evaluateDataExpression(self: *Compiler, expression_node: *Node) !Data { - switch (expression_node.id) { - .literal => { - const literal_node = expression_node.cast(.literal).?; - switch (literal_node.token.id) { - .number => { - const number = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); - return .{ .number = number }; - }, - .quoted_ascii_string => { - const column = literal_node.token.calculateColumn(self.source, 8, null); - const bytes = SourceBytes{ - .slice = literal_node.token.slice(self.source), - .code_page = self.input_code_pages.getForToken(literal_node.token), - }; - const parsed = try literals.parseQuotedAsciiString(self.allocator, bytes, .{ - .start_column = column, - .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, - .output_code_page = self.output_code_pages.getForToken(literal_node.token), - }); - errdefer self.allocator.free(parsed); - return .{ .ascii_string = parsed }; - }, - .quoted_wide_string => { - const column = literal_node.token.calculateColumn(self.source, 8, null); - const bytes = SourceBytes{ - .slice = literal_node.token.slice(self.source), - .code_page = self.input_code_pages.getForToken(literal_node.token), - }; - const parsed_string = try literals.parseQuotedWideString(self.allocator, bytes, .{ - .start_column = column, - .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token }, - }); - errdefer self.allocator.free(parsed_string); - return .{ .wide_string = parsed_string }; - }, - else => { - std.debug.print("unexpected token in literal node: {}\n", .{literal_node.token}); - unreachable; // no other token types should be in a data literal node - }, - } - }, - .binary_expression, .grouped_expression => { - const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages); - return .{ .number = result }; - }, - .not_expression => unreachable, - else => { - std.debug.print("{}\n", .{expression_node.id}); - @panic("TODO: evaluateDataExpression"); - }, - } - } - - pub fn writeResourceRawData(self: *Compiler, node: *Node.ResourceRawData, writer: anytype) !void { - var data_buffer = std.ArrayList(u8).init(self.allocator); - defer data_buffer.deinit(); - // The header's data length field is a u32 so limit the resource's data size so that - // we know we can always specify the real size. - var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); - const data_writer = limited_writer.writer(); - - for (node.raw_data) |expression| { - const data = try self.evaluateDataExpression(expression); - defer data.deinit(self.allocator); - data.write(data_writer) catch |err| switch (err) { - error.NoSpaceLeft => { - return self.addErrorDetailsAndFail(.{ - .err = .resource_data_size_exceeds_max, - .token = node.id, - }); - }, - else => |e| return e, - }; - } - - // This intCast can't fail because the limitedWriter above guarantees that - // we will never write more than maxInt(u32) bytes. - const data_len: u32 = @intCast(data_buffer.items.len); - try self.writeResourceHeader(writer, node.id, node.type, data_len, node.common_resource_attributes, self.state.language); - - var data_fbs = std.io.fixedBufferStream(data_buffer.items); - try writeResourceData(writer, data_fbs.reader(), data_len); - } - - pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void { - var header = try self.resourceHeader(id_token, type_token, .{ - .language = language, - .data_size = data_size, - }); - defer header.deinit(self.allocator); - - header.applyMemoryFlags(common_resource_attributes, self.source); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = id_token }); - } - - pub fn writeResourceDataNoPadding(writer: anytype, data_reader: anytype, data_size: u32) !void { - var limited_reader = std.io.limitedReader(data_reader, data_size); - - const FifoBuffer = std.fifo.LinearFifo(u8, .{ .Static = 4096 }); - var fifo = FifoBuffer.init(); - try fifo.pump(limited_reader.reader(), writer); - } - - pub fn writeResourceData(writer: anytype, data_reader: anytype, data_size: u32) !void { - try writeResourceDataNoPadding(writer, data_reader, data_size); - try writeDataPadding(writer, data_size); - } - - pub fn writeDataPadding(writer: anytype, data_size: u32) !void { - try writer.writeByteNTimes(0, numPaddingBytesNeeded(data_size)); - } - - pub fn numPaddingBytesNeeded(data_size: u32) u2 { - // Result is guaranteed to be between 0 and 3. - return @intCast((4 -% data_size) % 4); - } - - pub fn evaluateAcceleratorKeyExpression(self: *Compiler, node: *Node, is_virt: bool) !u16 { - if (node.isNumberExpression()) { - return evaluateNumberExpression(node, self.source, self.input_code_pages).asWord(); - } else { - std.debug.assert(node.isStringLiteral()); - const literal = @fieldParentPtr(Node.Literal, "base", node); - const bytes = SourceBytes{ - .slice = literal.token.slice(self.source), - .code_page = self.input_code_pages.getForToken(literal.token), - }; - const column = literal.token.calculateColumn(self.source, 8, null); - return res.parseAcceleratorKeyString(bytes, is_virt, .{ - .start_column = column, - .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal.token }, - }); - } - } - - pub fn writeAccelerators(self: *Compiler, node: *Node.Accelerators, writer: anytype) !void { - var data_buffer = std.ArrayList(u8).init(self.allocator); - defer data_buffer.deinit(); - - // The header's data length field is a u32 so limit the resource's data size so that - // we know we can always specify the real size. - var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); - const data_writer = limited_writer.writer(); - - self.writeAcceleratorsData(node, data_writer) catch |err| switch (err) { - error.NoSpaceLeft => { - return self.addErrorDetailsAndFail(.{ - .err = .resource_data_size_exceeds_max, - .token = node.id, - }); - }, - else => |e| return e, - }; - - // This intCast can't fail because the limitedWriter above guarantees that - // we will never write more than maxInt(u32) bytes. - const data_size: u32 = @intCast(data_buffer.items.len); - var header = try self.resourceHeader(node.id, node.type, .{ - .data_size = data_size, - }); - defer header.deinit(self.allocator); - - header.applyMemoryFlags(node.common_resource_attributes, self.source); - header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - - var data_fbs = std.io.fixedBufferStream(data_buffer.items); - try writeResourceData(writer, data_fbs.reader(), data_size); - } - - /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to - /// the writer within this function could return error.NoSpaceLeft - pub fn writeAcceleratorsData(self: *Compiler, node: *Node.Accelerators, data_writer: anytype) !void { - for (node.accelerators, 0..) |accel_node, i| { - const accelerator = @fieldParentPtr(Node.Accelerator, "base", accel_node); - var modifiers = res.AcceleratorModifiers{}; - for (accelerator.type_and_options) |type_or_option| { - const modifier = rc.AcceleratorTypeAndOptions.map.get(type_or_option.slice(self.source)).?; - modifiers.apply(modifier); - } - if (accelerator.event.isNumberExpression() and !modifiers.explicit_ascii_or_virtkey) { - return self.addErrorDetailsAndFail(.{ - .err = .accelerator_type_required, - .token = accelerator.event.getFirstToken(), - .token_span_end = accelerator.event.getLastToken(), - }); - } - const key = self.evaluateAcceleratorKeyExpression(accelerator.event, modifiers.isSet(.virtkey)) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - else => |e| { - return self.addErrorDetailsAndFail(.{ - .err = .invalid_accelerator_key, - .token = accelerator.event.getFirstToken(), - .token_span_end = accelerator.event.getLastToken(), - .extra = .{ .accelerator_error = .{ - .err = ErrorDetails.AcceleratorError.enumFromError(e), - } }, - }); - }, - }; - const cmd_id = evaluateNumberExpression(accelerator.idvalue, self.source, self.input_code_pages); - - if (i == node.accelerators.len - 1) { - modifiers.markLast(); - } - - try data_writer.writeByte(modifiers.value); - try data_writer.writeByte(0); // padding - try data_writer.writeInt(u16, key, .little); - try data_writer.writeInt(u16, cmd_id.asWord(), .little); - try data_writer.writeInt(u16, 0, .little); // padding - } - } - - const DialogOptionalStatementValues = struct { - style: u32 = res.WS.SYSMENU | res.WS.BORDER | res.WS.POPUP, - exstyle: u32 = 0, - class: ?NameOrOrdinal = null, - menu: ?NameOrOrdinal = null, - font: ?FontStatementValues = null, - caption: ?Token = null, - }; - - pub fn writeDialog(self: *Compiler, node: *Node.Dialog, writer: anytype) !void { - var data_buffer = std.ArrayList(u8).init(self.allocator); - defer data_buffer.deinit(); - // The header's data length field is a u32 so limit the resource's data size so that - // we know we can always specify the real size. - var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); - const data_writer = limited_writer.writer(); - - const resource = Resource.fromString(.{ - .slice = node.type.slice(self.source), - .code_page = self.input_code_pages.getForToken(node.type), - }); - std.debug.assert(resource == .dialog or resource == .dialogex); - - var optional_statement_values: DialogOptionalStatementValues = .{}; - defer { - if (optional_statement_values.class) |class| { - class.deinit(self.allocator); - } - if (optional_statement_values.menu) |menu| { - menu.deinit(self.allocator); - } - } - var skipped_menu_or_classes = std.ArrayList(*Node.SimpleStatement).init(self.allocator); - defer skipped_menu_or_classes.deinit(); - var last_menu: *Node.SimpleStatement = undefined; - var last_class: *Node.SimpleStatement = undefined; - var last_menu_would_be_forced_ordinal = false; - var last_menu_has_digit_as_first_char = false; - var last_menu_did_uppercase = false; - var last_class_would_be_forced_ordinal = false; - - for (node.optional_statements) |optional_statement| { - switch (optional_statement.id) { - .simple_statement => { - const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", optional_statement); - const statement_identifier = simple_statement.identifier; - const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; - switch (statement_type) { - .style, .exstyle => { - const style = evaluateFlagsExpressionWithDefault(0, simple_statement.value, self.source, self.input_code_pages); - if (statement_type == .style) { - optional_statement_values.style = style; - } else { - optional_statement_values.exstyle = style; - } - }, - .caption => { - std.debug.assert(simple_statement.value.id == .literal); - const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); - optional_statement_values.caption = literal_node.token; - }, - .class => { - const is_duplicate = optional_statement_values.class != null; - if (is_duplicate) { - try skipped_menu_or_classes.append(last_class); - } - const forced_ordinal = is_duplicate and optional_statement_values.class.? == .ordinal; - // In the Win32 RC compiler, if any CLASS values that are interpreted as - // an ordinal exist, it affects all future CLASS statements and forces - // them to be treated as an ordinal no matter what. - if (forced_ordinal) { - last_class_would_be_forced_ordinal = true; - } - // clear out the old one if it exists - if (optional_statement_values.class) |prev| { - prev.deinit(self.allocator); - optional_statement_values.class = null; - } - - if (simple_statement.value.isNumberExpression()) { - const class_ordinal = evaluateNumberExpression(simple_statement.value, self.source, self.input_code_pages); - optional_statement_values.class = NameOrOrdinal{ .ordinal = class_ordinal.asWord() }; - } else { - std.debug.assert(simple_statement.value.isStringLiteral()); - const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); - const parsed = try self.parseQuotedStringAsWideString(literal_node.token); - optional_statement_values.class = NameOrOrdinal{ .name = parsed }; - } - - last_class = simple_statement; - }, - .menu => { - const is_duplicate = optional_statement_values.menu != null; - if (is_duplicate) { - try skipped_menu_or_classes.append(last_menu); - } - const forced_ordinal = is_duplicate and optional_statement_values.menu.? == .ordinal; - // In the Win32 RC compiler, if any MENU values that are interpreted as - // an ordinal exist, it affects all future MENU statements and forces - // them to be treated as an ordinal no matter what. - if (forced_ordinal) { - last_menu_would_be_forced_ordinal = true; - } - // clear out the old one if it exists - if (optional_statement_values.menu) |prev| { - prev.deinit(self.allocator); - optional_statement_values.menu = null; - } - - std.debug.assert(simple_statement.value.id == .literal); - const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value); - - const token_slice = literal_node.token.slice(self.source); - const bytes = SourceBytes{ - .slice = token_slice, - .code_page = self.input_code_pages.getForToken(literal_node.token), - }; - optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes); - - if (optional_statement_values.menu.? == .name) { - if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| { - try self.addErrorDetails(.{ - .err = .invalid_digit_character_in_ordinal, - .type = .err, - .token = literal_node.token, - }); - return self.addErrorDetailsAndFail(.{ - .err = .win32_non_ascii_ordinal, - .type = .note, - .token = literal_node.token, - .print_source_line = false, - .extra = .{ .number = win32_rc_ordinal.ordinal }, - }); - } - } - - // Need to keep track of some properties of the value - // in order to emit the appropriate warning(s) later on. - // See where the warning are emitted below (outside this loop) - // for the full explanation. - var did_uppercase = false; - var codepoint_i: usize = 0; - while (bytes.code_page.codepointAt(codepoint_i, bytes.slice)) |codepoint| : (codepoint_i += codepoint.byte_len) { - const c = codepoint.value; - switch (c) { - 'a'...'z' => { - did_uppercase = true; - break; - }, - else => {}, - } - } - last_menu_did_uppercase = did_uppercase; - last_menu_has_digit_as_first_char = std.ascii.isDigit(token_slice[0]); - last_menu = simple_statement; - }, - else => {}, - } - }, - .font_statement => { - const font = @fieldParentPtr(Node.FontStatement, "base", optional_statement); - if (optional_statement_values.font != null) { - optional_statement_values.font.?.node = font; - } else { - optional_statement_values.font = FontStatementValues{ .node = font }; - } - if (font.weight) |weight| { - const value = evaluateNumberExpression(weight, self.source, self.input_code_pages); - optional_statement_values.font.?.weight = value.asWord(); - } - if (font.italic) |italic| { - const value = evaluateNumberExpression(italic, self.source, self.input_code_pages); - optional_statement_values.font.?.italic = value.asWord() != 0; - } - }, - else => {}, - } - } - - for (skipped_menu_or_classes.items) |simple_statement| { - const statement_identifier = simple_statement.identifier; - const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue; - try self.addErrorDetails(.{ - .err = .duplicate_menu_or_class_skipped, - .type = .warning, - .token = simple_statement.identifier, - .token_span_start = simple_statement.base.getFirstToken(), - .token_span_end = simple_statement.base.getLastToken(), - .extra = .{ .menu_or_class = switch (statement_type) { - .menu => .menu, - .class => .class, - else => unreachable, - } }, - }); - } - // The Win32 RC compiler miscompiles the value in the following scenario: - // Multiple CLASS parameters are specified and any of them are treated as a number, then - // the last CLASS is always treated as a number no matter what - if (last_class_would_be_forced_ordinal and optional_statement_values.class.? == .name) { - const literal_node = @fieldParentPtr(Node.Literal, "base", last_class.value); - const ordinal_value = res.ForcedOrdinal.fromUtf16Le(optional_statement_values.class.?.name); - - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_dialog_class, - .type = .warning, - .token = literal_node.token, - .extra = .{ .number = ordinal_value }, - }); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_dialog_class, - .type = .note, - .print_source_line = false, - .token = literal_node.token, - .extra = .{ .number = ordinal_value }, - }); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, - .type = .note, - .print_source_line = false, - .token = literal_node.token, - .extra = .{ .menu_or_class = .class }, - }); - } - // The Win32 RC compiler miscompiles the id in two different scenarios: - // 1. The first character of the ID is a digit, in which case it is always treated as a number - // no matter what (and therefore does not match how the MENU/MENUEX id is parsed) - // 2. Multiple MENU parameters are specified and any of them are treated as a number, then - // the last MENU is always treated as a number no matter what - if ((last_menu_would_be_forced_ordinal or last_menu_has_digit_as_first_char) and optional_statement_values.menu.? == .name) { - const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); - const token_slice = literal_node.token.slice(self.source); - const bytes = SourceBytes{ - .slice = token_slice, - .code_page = self.input_code_pages.getForToken(literal_node.token), - }; - const ordinal_value = res.ForcedOrdinal.fromBytes(bytes); - - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_dialog_menu_id, - .type = .warning, - .token = literal_node.token, - .extra = .{ .number = ordinal_value }, - }); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_dialog_menu_id, - .type = .note, - .print_source_line = false, - .token = literal_node.token, - .extra = .{ .number = ordinal_value }, - }); - if (last_menu_would_be_forced_ordinal) { - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, - .type = .note, - .print_source_line = false, - .token = literal_node.token, - .extra = .{ .menu_or_class = .menu }, - }); - } else { - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_dialog_menu_id_starts_with_digit, - .type = .note, - .print_source_line = false, - .token = literal_node.token, - }); - } - } - // The MENU id parsing uses the exact same logic as the MENU/MENUEX resource id parsing, - // which means that it will convert ASCII characters to uppercase during the 'name' parsing. - // This turns out not to matter (`LoadMenu` does a case-insensitive lookup anyway), - // but it still makes sense to share the uppercasing logic since the MENU parameter - // here is just a reference to a MENU/MENUEX id within the .exe. - // So, because this is an intentional but inconsequential-to-the-user difference - // between resinator and the Win32 RC compiler, we only emit a hint instead of - // a warning. - if (last_menu_did_uppercase) { - const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value); - try self.addErrorDetails(.{ - .err = .dialog_menu_id_was_uppercased, - .type = .hint, - .token = literal_node.token, - }); - } - - const x = evaluateNumberExpression(node.x, self.source, self.input_code_pages); - const y = evaluateNumberExpression(node.y, self.source, self.input_code_pages); - const width = evaluateNumberExpression(node.width, self.source, self.input_code_pages); - const height = evaluateNumberExpression(node.height, self.source, self.input_code_pages); - - // FONT statement requires DS_SETFONT, and if it's not present DS_SETFRONT must be unset - if (optional_statement_values.font) |_| { - optional_statement_values.style |= res.DS.SETFONT; - } else { - optional_statement_values.style &= ~res.DS.SETFONT; - } - // CAPTION statement implies WS_CAPTION - if (optional_statement_values.caption) |_| { - optional_statement_values.style |= res.WS.CAPTION; - } - - self.writeDialogHeaderAndStrings( - node, - data_writer, - resource, - &optional_statement_values, - x, - y, - width, - height, - ) catch |err| switch (err) { - // Dialog header and menu/class/title strings can never exceed u32 bytes - // on their own, so this error is unreachable. - error.NoSpaceLeft => unreachable, - else => |e| return e, - }; - - var controls_by_id = std.AutoHashMap(u32, *const Node.ControlStatement).init(self.allocator); - // Number of controls are guaranteed by the parser to be within maxInt(u16). - try controls_by_id.ensureTotalCapacity(@as(u16, @intCast(node.controls.len))); - defer controls_by_id.deinit(); - - for (node.controls) |control_node| { - const control = @fieldParentPtr(Node.ControlStatement, "base", control_node); - - self.writeDialogControl( - control, - data_writer, - resource, - // We know the data_buffer len is limited to u32 max. - @intCast(data_buffer.items.len), - &controls_by_id, - ) catch |err| switch (err) { - error.NoSpaceLeft => { - try self.addErrorDetails(.{ - .err = .resource_data_size_exceeds_max, - .token = node.id, - }); - return self.addErrorDetailsAndFail(.{ - .err = .resource_data_size_exceeds_max, - .type = .note, - .token = control.type, - }); - }, - else => |e| return e, - }; - } - - const data_size: u32 = @intCast(data_buffer.items.len); - var header = try self.resourceHeader(node.id, node.type, .{ - .data_size = data_size, - }); - defer header.deinit(self.allocator); - - header.applyMemoryFlags(node.common_resource_attributes, self.source); - header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - - var data_fbs = std.io.fixedBufferStream(data_buffer.items); - try writeResourceData(writer, data_fbs.reader(), data_size); - } - - fn writeDialogHeaderAndStrings( - self: *Compiler, - node: *Node.Dialog, - data_writer: anytype, - resource: Resource, - optional_statement_values: *const DialogOptionalStatementValues, - x: Number, - y: Number, - width: Number, - height: Number, - ) !void { - // Header - if (resource == .dialogex) { - const help_id: u32 = help_id: { - if (node.help_id == null) break :help_id 0; - break :help_id evaluateNumberExpression(node.help_id.?, self.source, self.input_code_pages).value; - }; - try data_writer.writeInt(u16, 1, .little); // version number, always 1 - try data_writer.writeInt(u16, 0xFFFF, .little); // signature, always 0xFFFF - try data_writer.writeInt(u32, help_id, .little); - try data_writer.writeInt(u32, optional_statement_values.exstyle, .little); - try data_writer.writeInt(u32, optional_statement_values.style, .little); - } else { - try data_writer.writeInt(u32, optional_statement_values.style, .little); - try data_writer.writeInt(u32, optional_statement_values.exstyle, .little); - } - // This limit is enforced by the parser, so we know the number of controls - // is within the range of a u16. - try data_writer.writeInt(u16, @as(u16, @intCast(node.controls.len)), .little); - try data_writer.writeInt(u16, x.asWord(), .little); - try data_writer.writeInt(u16, y.asWord(), .little); - try data_writer.writeInt(u16, width.asWord(), .little); - try data_writer.writeInt(u16, height.asWord(), .little); - - // Menu - if (optional_statement_values.menu) |menu| { - try menu.write(data_writer); - } else { - try data_writer.writeInt(u16, 0, .little); - } - // Class - if (optional_statement_values.class) |class| { - try class.write(data_writer); - } else { - try data_writer.writeInt(u16, 0, .little); - } - // Caption - if (optional_statement_values.caption) |caption| { - const parsed = try self.parseQuotedStringAsWideString(caption); - defer self.allocator.free(parsed); - try data_writer.writeAll(std.mem.sliceAsBytes(parsed[0 .. parsed.len + 1])); - } else { - try data_writer.writeInt(u16, 0, .little); - } - // Font - if (optional_statement_values.font) |font| { - try self.writeDialogFont(resource, font, data_writer); - } - } - - fn writeDialogControl( - self: *Compiler, - control: *Node.ControlStatement, - data_writer: anytype, - resource: Resource, - bytes_written_so_far: u32, - controls_by_id: *std.AutoHashMap(u32, *const Node.ControlStatement), - ) !void { - const control_type = rc.Control.map.get(control.type.slice(self.source)).?; - - // Each control must be at a 4-byte boundary. However, the Windows RC - // compiler will miscompile controls if their extra data ends on an odd offset. - // We will avoid the miscompilation and emit a warning. - const num_padding = numPaddingBytesNeeded(bytes_written_so_far); - if (num_padding == 1 or num_padding == 3) { - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_control_padding, - .type = .warning, - .token = control.type, - }); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_control_padding, - .type = .note, - .print_source_line = false, - .token = control.type, - }); - } - try data_writer.writeByteNTimes(0, num_padding); - - const style = if (control.style) |style_expression| - // Certain styles are implied by the control type - evaluateFlagsExpressionWithDefault(res.ControlClass.getImpliedStyle(control_type), style_expression, self.source, self.input_code_pages) - else - res.ControlClass.getImpliedStyle(control_type); - - const exstyle = if (control.exstyle) |exstyle_expression| - evaluateFlagsExpressionWithDefault(0, exstyle_expression, self.source, self.input_code_pages) - else - 0; - - switch (resource) { - .dialog => { - // Note: Reverse order from DIALOGEX - try data_writer.writeInt(u32, style, .little); - try data_writer.writeInt(u32, exstyle, .little); - }, - .dialogex => { - const help_id: u32 = if (control.help_id) |help_id_expression| - evaluateNumberExpression(help_id_expression, self.source, self.input_code_pages).value - else - 0; - try data_writer.writeInt(u32, help_id, .little); - // Note: Reverse order from DIALOG - try data_writer.writeInt(u32, exstyle, .little); - try data_writer.writeInt(u32, style, .little); - }, - else => unreachable, - } - - const control_x = evaluateNumberExpression(control.x, self.source, self.input_code_pages); - const control_y = evaluateNumberExpression(control.y, self.source, self.input_code_pages); - const control_width = evaluateNumberExpression(control.width, self.source, self.input_code_pages); - const control_height = evaluateNumberExpression(control.height, self.source, self.input_code_pages); - - try data_writer.writeInt(u16, control_x.asWord(), .little); - try data_writer.writeInt(u16, control_y.asWord(), .little); - try data_writer.writeInt(u16, control_width.asWord(), .little); - try data_writer.writeInt(u16, control_height.asWord(), .little); - - const control_id = evaluateNumberExpression(control.id, self.source, self.input_code_pages); - switch (resource) { - .dialog => try data_writer.writeInt(u16, control_id.asWord(), .little), - .dialogex => try data_writer.writeInt(u32, control_id.value, .little), - else => unreachable, - } - - const control_id_for_map: u32 = switch (resource) { - .dialog => control_id.asWord(), - .dialogex => control_id.value, - else => unreachable, - }; - const result = controls_by_id.getOrPutAssumeCapacity(control_id_for_map); - if (result.found_existing) { - if (!self.silent_duplicate_control_ids) { - try self.addErrorDetails(.{ - .err = .control_id_already_defined, - .type = .warning, - .token = control.id.getFirstToken(), - .token_span_end = control.id.getLastToken(), - .extra = .{ .number = control_id_for_map }, - }); - try self.addErrorDetails(.{ - .err = .control_id_already_defined, - .type = .note, - .token = result.value_ptr.*.id.getFirstToken(), - .token_span_end = result.value_ptr.*.id.getLastToken(), - .extra = .{ .number = control_id_for_map }, - }); - } - } else { - result.value_ptr.* = control; - } - - if (res.ControlClass.fromControl(control_type)) |control_class| { - const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; - try ordinal.write(data_writer); - } else { - const class_node = control.class.?; - if (class_node.isNumberExpression()) { - const number = evaluateNumberExpression(class_node, self.source, self.input_code_pages); - const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; - // This is different from how the Windows RC compiles ordinals here, - // but I think that's a miscompilation/bug of the Windows implementation. - // The Windows behavior is (where LSB = least significant byte): - // - If the LSB is 0x00 => 0xFFFF0000 - // - If the LSB is < 0x80 => 0x000000<LSB> - // - If the LSB is >= 0x80 => 0x0000FF<LSB> - // - // Because of this, we emit a warning about the potential miscompilation - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_control_class_ordinal, - .type = .warning, - .token = class_node.getFirstToken(), - .token_span_end = class_node.getLastToken(), - }); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_control_class_ordinal, - .type = .note, - .print_source_line = false, - .token = class_node.getFirstToken(), - .token_span_end = class_node.getLastToken(), - }); - // And then write out the ordinal using a proper a NameOrOrdinal encoding. - try ordinal.write(data_writer); - } else if (class_node.isStringLiteral()) { - const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); - const parsed = try self.parseQuotedStringAsWideString(literal_node.token); - defer self.allocator.free(parsed); - if (rc.ControlClass.fromWideString(parsed)) |control_class| { - const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; - try ordinal.write(data_writer); - } else { - // NUL acts as a terminator - // TODO: Maybe warn when parsed_terminated.len != parsed.len, since - // it seems unlikely that NUL-termination is something intentional - const parsed_terminated = std.mem.sliceTo(parsed, 0); - const name = NameOrOrdinal{ .name = parsed_terminated }; - try name.write(data_writer); - } - } else { - const literal_node = @fieldParentPtr(Node.Literal, "base", class_node); - const literal_slice = literal_node.token.slice(self.source); - // This succeeding is guaranteed by the parser - const control_class = rc.ControlClass.map.get(literal_slice) orelse unreachable; - const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) }; - try ordinal.write(data_writer); - } - } - - if (control.text) |text_token| { - const bytes = SourceBytes{ - .slice = text_token.slice(self.source), - .code_page = self.input_code_pages.getForToken(text_token), - }; - if (text_token.isStringLiteral()) { - const text = try self.parseQuotedStringAsWideString(text_token); - defer self.allocator.free(text); - const name = NameOrOrdinal{ .name = text }; - try name.write(data_writer); - } else { - std.debug.assert(text_token.id == .number); - const number = literals.parseNumberLiteral(bytes); - const ordinal = NameOrOrdinal{ .ordinal = number.asWord() }; - try ordinal.write(data_writer); - } - } else { - try NameOrOrdinal.writeEmpty(data_writer); - } - - var extra_data_buf = std.ArrayList(u8).init(self.allocator); - defer extra_data_buf.deinit(); - // The extra data byte length must be able to fit within a u16. - var limited_extra_data_writer = limitedWriter(extra_data_buf.writer(), std.math.maxInt(u16)); - const extra_data_writer = limited_extra_data_writer.writer(); - for (control.extra_data) |data_expression| { - const data = try self.evaluateDataExpression(data_expression); - defer data.deinit(self.allocator); - data.write(extra_data_writer) catch |err| switch (err) { - error.NoSpaceLeft => { - try self.addErrorDetails(.{ - .err = .control_extra_data_size_exceeds_max, - .token = control.type, - }); - return self.addErrorDetailsAndFail(.{ - .err = .control_extra_data_size_exceeds_max, - .type = .note, - .token = data_expression.getFirstToken(), - .token_span_end = data_expression.getLastToken(), - }); - }, - else => |e| return e, - }; - } - // We know the extra_data_buf size fits within a u16. - const extra_data_size: u16 = @intCast(extra_data_buf.items.len); - try data_writer.writeInt(u16, extra_data_size, .little); - try data_writer.writeAll(extra_data_buf.items); - } - - pub fn writeToolbar(self: *Compiler, node: *Node.Toolbar, writer: anytype) !void { - var data_buffer = std.ArrayList(u8).init(self.allocator); - defer data_buffer.deinit(); - const data_writer = data_buffer.writer(); - - const button_width = evaluateNumberExpression(node.button_width, self.source, self.input_code_pages); - const button_height = evaluateNumberExpression(node.button_height, self.source, self.input_code_pages); - - // I'm assuming this is some sort of version - // TODO: Try to find something mentioning this - try data_writer.writeInt(u16, 1, .little); - try data_writer.writeInt(u16, button_width.asWord(), .little); - try data_writer.writeInt(u16, button_height.asWord(), .little); - try data_writer.writeInt(u16, @as(u16, @intCast(node.buttons.len)), .little); - - for (node.buttons) |button_or_sep| { - switch (button_or_sep.id) { - .literal => { // This is always SEPARATOR - std.debug.assert(button_or_sep.cast(.literal).?.token.id == .literal); - try data_writer.writeInt(u16, 0, .little); - }, - .simple_statement => { - const value_node = button_or_sep.cast(.simple_statement).?.value; - const value = evaluateNumberExpression(value_node, self.source, self.input_code_pages); - try data_writer.writeInt(u16, value.asWord(), .little); - }, - else => unreachable, // This is a bug in the parser - } - } - - const data_size: u32 = @intCast(data_buffer.items.len); - var header = try self.resourceHeader(node.id, node.type, .{ - .data_size = data_size, - }); - defer header.deinit(self.allocator); - - header.applyMemoryFlags(node.common_resource_attributes, self.source); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - - var data_fbs = std.io.fixedBufferStream(data_buffer.items); - try writeResourceData(writer, data_fbs.reader(), data_size); - } - - /// Weight and italic carry over from previous FONT statements within a single resource, - /// so they need to be parsed ahead-of-time and stored - const FontStatementValues = struct { - weight: u16 = 0, - italic: bool = false, - node: *Node.FontStatement, - }; - - pub fn writeDialogFont(self: *Compiler, resource: Resource, values: FontStatementValues, writer: anytype) !void { - const node = values.node; - const point_size = evaluateNumberExpression(node.point_size, self.source, self.input_code_pages); - try writer.writeInt(u16, point_size.asWord(), .little); - - if (resource == .dialogex) { - try writer.writeInt(u16, values.weight, .little); - } - - if (resource == .dialogex) { - try writer.writeInt(u8, @intFromBool(values.italic), .little); - } - - if (node.char_set) |char_set| { - const value = evaluateNumberExpression(char_set, self.source, self.input_code_pages); - try writer.writeInt(u8, @as(u8, @truncate(value.value)), .little); - } else if (resource == .dialogex) { - try writer.writeInt(u8, 1, .little); // DEFAULT_CHARSET - } - - const typeface = try self.parseQuotedStringAsWideString(node.typeface); - defer self.allocator.free(typeface); - try writer.writeAll(std.mem.sliceAsBytes(typeface[0 .. typeface.len + 1])); - } - - pub fn writeMenu(self: *Compiler, node: *Node.Menu, writer: anytype) !void { - var data_buffer = std.ArrayList(u8).init(self.allocator); - defer data_buffer.deinit(); - // The header's data length field is a u32 so limit the resource's data size so that - // we know we can always specify the real size. - var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32)); - const data_writer = limited_writer.writer(); - - const type_bytes = SourceBytes{ - .slice = node.type.slice(self.source), - .code_page = self.input_code_pages.getForToken(node.type), - }; - const resource = Resource.fromString(type_bytes); - std.debug.assert(resource == .menu or resource == .menuex); - - self.writeMenuData(node, data_writer, resource) catch |err| switch (err) { - error.NoSpaceLeft => { - return self.addErrorDetailsAndFail(.{ - .err = .resource_data_size_exceeds_max, - .token = node.id, - }); - }, - else => |e| return e, - }; - - // This intCast can't fail because the limitedWriter above guarantees that - // we will never write more than maxInt(u32) bytes. - const data_size: u32 = @intCast(data_buffer.items.len); - var header = try self.resourceHeader(node.id, node.type, .{ - .data_size = data_size, - }); - defer header.deinit(self.allocator); - - header.applyMemoryFlags(node.common_resource_attributes, self.source); - header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - - var data_fbs = std.io.fixedBufferStream(data_buffer.items); - try writeResourceData(writer, data_fbs.reader(), data_size); - } - - /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to - /// the writer within this function could return error.NoSpaceLeft - pub fn writeMenuData(self: *Compiler, node: *Node.Menu, data_writer: anytype, resource: Resource) !void { - // menu header - const version: u16 = if (resource == .menu) 0 else 1; - try data_writer.writeInt(u16, version, .little); - const header_size: u16 = if (resource == .menu) 0 else 4; - try data_writer.writeInt(u16, header_size, .little); // cbHeaderSize - // Note: There can be extra bytes at the end of this header (`rgbExtra`), - // but they are always zero-length for us, so we don't write anything - // (the length of the rgbExtra field is inferred from the header_size). - // MENU => rgbExtra: [cbHeaderSize]u8 - // MENUEX => rgbExtra: [cbHeaderSize-4]u8 - - if (resource == .menuex) { - if (node.help_id) |help_id_node| { - const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); - try data_writer.writeInt(u32, help_id.value, .little); - } else { - try data_writer.writeInt(u32, 0, .little); - } - } - - for (node.items, 0..) |item, i| { - const is_last = i == node.items.len - 1; - try self.writeMenuItem(item, data_writer, is_last); - } - } - - pub fn writeMenuItem(self: *Compiler, node: *Node, writer: anytype, is_last_of_parent: bool) !void { - switch (node.id) { - .menu_item_separator => { - // This is the 'alternate compability form' of the separator, see - // https://devblogs.microsoft.com/oldnewthing/20080710-00/?p=21673 - // - // The 'correct' way is to set the MF_SEPARATOR flag, but the Win32 RC - // compiler still uses this alternate form, so that's what we use too. - var flags = res.MenuItemFlags{}; - if (is_last_of_parent) flags.markLast(); - try writer.writeInt(u16, flags.value, .little); - try writer.writeInt(u16, 0, .little); // id - try writer.writeInt(u16, 0, .little); // null-terminated UTF-16 text - }, - .menu_item => { - const menu_item = @fieldParentPtr(Node.MenuItem, "base", node); - var flags = res.MenuItemFlags{}; - for (menu_item.option_list) |option_token| { - // This failing would be a bug in the parser - const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; - flags.apply(option); - } - if (is_last_of_parent) flags.markLast(); - try writer.writeInt(u16, flags.value, .little); - - var result = evaluateNumberExpression(menu_item.result, self.source, self.input_code_pages); - try writer.writeInt(u16, result.asWord(), .little); - - var text = try self.parseQuotedStringAsWideString(menu_item.text); - defer self.allocator.free(text); - try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); - }, - .popup => { - const popup = @fieldParentPtr(Node.Popup, "base", node); - var flags = res.MenuItemFlags{ .value = res.MF.POPUP }; - for (popup.option_list) |option_token| { - // This failing would be a bug in the parser - const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable; - flags.apply(option); - } - if (is_last_of_parent) flags.markLast(); - try writer.writeInt(u16, flags.value, .little); - - var text = try self.parseQuotedStringAsWideString(popup.text); - defer self.allocator.free(text); - try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); - - for (popup.items, 0..) |item, i| { - const is_last = i == popup.items.len - 1; - try self.writeMenuItem(item, writer, is_last); - } - }, - inline .menu_item_ex, .popup_ex => |node_type| { - const menu_item = @fieldParentPtr(node_type.Type(), "base", node); - - if (menu_item.type) |flags| { - const value = evaluateNumberExpression(flags, self.source, self.input_code_pages); - try writer.writeInt(u32, value.value, .little); - } else { - try writer.writeInt(u32, 0, .little); - } - - if (menu_item.state) |state| { - const value = evaluateNumberExpression(state, self.source, self.input_code_pages); - try writer.writeInt(u32, value.value, .little); - } else { - try writer.writeInt(u32, 0, .little); - } - - if (menu_item.id) |id| { - const value = evaluateNumberExpression(id, self.source, self.input_code_pages); - try writer.writeInt(u32, value.value, .little); - } else { - try writer.writeInt(u32, 0, .little); - } - - var flags: u16 = 0; - if (is_last_of_parent) flags |= comptime @as(u16, @intCast(res.MF.END)); - // This constant doesn't seem to have a named #define, it's different than MF_POPUP - if (node_type == .popup_ex) flags |= 0x01; - try writer.writeInt(u16, flags, .little); - - var text = try self.parseQuotedStringAsWideString(menu_item.text); - defer self.allocator.free(text); - try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1])); - - // Only the combination of the flags u16 and the text bytes can cause - // non-DWORD alignment, so we can just use the byte length of those - // two values to realign to DWORD alignment. - const relevant_bytes = 2 + (text.len + 1) * 2; - try writeDataPadding(writer, @intCast(relevant_bytes)); - - if (node_type == .popup_ex) { - if (menu_item.help_id) |help_id_node| { - const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages); - try writer.writeInt(u32, help_id.value, .little); - } else { - try writer.writeInt(u32, 0, .little); - } - - for (menu_item.items, 0..) |item, i| { - const is_last = i == menu_item.items.len - 1; - try self.writeMenuItem(item, writer, is_last); - } - } - }, - else => unreachable, - } - } - - pub fn writeVersionInfo(self: *Compiler, node: *Node.VersionInfo, writer: anytype) !void { - var data_buffer = std.ArrayList(u8).init(self.allocator); - defer data_buffer.deinit(); - // The node's length field (which is inclusive of the length of all of its children) is a u16 - // so limit the node's data size so that we know we can always specify the real size. - var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u16)); - const data_writer = limited_writer.writer(); - - try data_writer.writeInt(u16, 0, .little); // placeholder size - try data_writer.writeInt(u16, res.FixedFileInfo.byte_len, .little); - try data_writer.writeInt(u16, res.VersionNode.type_binary, .little); - const key_bytes = std.mem.sliceAsBytes(res.FixedFileInfo.key[0 .. res.FixedFileInfo.key.len + 1]); - try data_writer.writeAll(key_bytes); - // The number of bytes written up to this point is always the same, since the name - // of the node is a constant (FixedFileInfo.key). The total number of bytes - // written so far is 38, so we need 2 padding bytes to get back to DWORD alignment - try data_writer.writeInt(u16, 0, .little); - - var fixed_file_info = res.FixedFileInfo{}; - for (node.fixed_info) |fixed_info| { - switch (fixed_info.id) { - .version_statement => { - const version_statement = @fieldParentPtr(Node.VersionStatement, "base", fixed_info); - const version_type = rc.VersionInfo.map.get(version_statement.type.slice(self.source)).?; - - // Ensure that all parts are cleared for each version, to properly account for - // potential duplicate PRODUCTVERSION/FILEVERSION statements - switch (version_type) { - .file_version => @memset(&fixed_file_info.file_version.parts, 0), - .product_version => @memset(&fixed_file_info.product_version.parts, 0), - else => unreachable, - } - - for (version_statement.parts, 0..) |part, i| { - const part_value = evaluateNumberExpression(part, self.source, self.input_code_pages); - if (part_value.is_long) { - try self.addErrorDetails(.{ - .err = .rc_would_error_u16_with_l_suffix, - .type = .warning, - .token = part.getFirstToken(), - .token_span_end = part.getLastToken(), - .extra = .{ .statement_with_u16_param = switch (version_type) { - .file_version => .fileversion, - .product_version => .productversion, - else => unreachable, - } }, - }); - try self.addErrorDetails(.{ - .err = .rc_would_error_u16_with_l_suffix, - .print_source_line = false, - .type = .note, - .token = part.getFirstToken(), - .token_span_end = part.getLastToken(), - .extra = .{ .statement_with_u16_param = switch (version_type) { - .file_version => .fileversion, - .product_version => .productversion, - else => unreachable, - } }, - }); - } - switch (version_type) { - .file_version => { - fixed_file_info.file_version.parts[i] = part_value.asWord(); - }, - .product_version => { - fixed_file_info.product_version.parts[i] = part_value.asWord(); - }, - else => unreachable, - } - } - }, - .simple_statement => { - const statement = @fieldParentPtr(Node.SimpleStatement, "base", fixed_info); - const statement_type = rc.VersionInfo.map.get(statement.identifier.slice(self.source)).?; - const value = evaluateNumberExpression(statement.value, self.source, self.input_code_pages); - switch (statement_type) { - .file_flags_mask => fixed_file_info.file_flags_mask = value.value, - .file_flags => fixed_file_info.file_flags = value.value, - .file_os => fixed_file_info.file_os = value.value, - .file_type => fixed_file_info.file_type = value.value, - .file_subtype => fixed_file_info.file_subtype = value.value, - else => unreachable, - } - }, - else => unreachable, - } - } - try fixed_file_info.write(data_writer); - - for (node.block_statements) |statement| { - self.writeVersionNode(statement, data_writer, &data_buffer) catch |err| switch (err) { - error.NoSpaceLeft => { - try self.addErrorDetails(.{ - .err = .version_node_size_exceeds_max, - .token = node.id, - }); - return self.addErrorDetailsAndFail(.{ - .err = .version_node_size_exceeds_max, - .type = .note, - .token = statement.getFirstToken(), - .token_span_end = statement.getLastToken(), - }); - }, - else => |e| return e, - }; - } - - // We know that data_buffer.items.len is within the limits of a u16, since we - // limited the writer to maxInt(u16) - const data_size: u16 = @intCast(data_buffer.items.len); - // And now that we know the full size of this node (including its children), set its size - std.mem.writeInt(u16, data_buffer.items[0..2], data_size, .little); - - var header = try self.resourceHeader(node.id, node.versioninfo, .{ - .data_size = data_size, - }); - defer header.deinit(self.allocator); - - header.applyMemoryFlags(node.common_resource_attributes, self.source); - - try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id }); - - var data_fbs = std.io.fixedBufferStream(data_buffer.items); - try writeResourceData(writer, data_fbs.reader(), data_size); - } - - /// Expects writer to be a LimitedWriter limited to u16, meaning all writes to - /// the writer within this function could return error.NoSpaceLeft, and that buf.items.len - /// will never be able to exceed maxInt(u16). - pub fn writeVersionNode(self: *Compiler, node: *Node, writer: anytype, buf: *std.ArrayList(u8)) !void { - // We can assume that buf.items.len will never be able to exceed the limits of a u16 - try writeDataPadding(writer, @as(u16, @intCast(buf.items.len))); - - const node_and_children_size_offset = buf.items.len; - try writer.writeInt(u16, 0, .little); // placeholder for size - const data_size_offset = buf.items.len; - try writer.writeInt(u16, 0, .little); // placeholder for data size - const data_type_offset = buf.items.len; - // Data type is string unless the node contains values that are numbers. - try writer.writeInt(u16, res.VersionNode.type_string, .little); - - switch (node.id) { - inline .block, .block_value => |node_type| { - const block_or_value = @fieldParentPtr(node_type.Type(), "base", node); - const parsed_key = try self.parseQuotedStringAsWideString(block_or_value.key); - defer self.allocator.free(parsed_key); - - const parsed_key_to_first_null = std.mem.sliceTo(parsed_key, 0); - try writer.writeAll(std.mem.sliceAsBytes(parsed_key_to_first_null[0 .. parsed_key_to_first_null.len + 1])); - - var has_number_value: bool = false; - for (block_or_value.values) |value_value_node_uncasted| { - const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; - if (value_value_node.expression.isNumberExpression()) { - has_number_value = true; - break; - } - } - // The units used here are dependent on the type. If there are any numbers, then - // this is a byte count. If there are only strings, then this is a count of - // UTF-16 code units. - // - // The Win32 RC compiler miscompiles this count in the case of values that - // have a mix of numbers and strings. This is detected and a warning is emitted - // during parsing, so we can just do the correct thing here. - var values_size: usize = 0; - - try writeDataPadding(writer, @intCast(buf.items.len)); - - for (block_or_value.values, 0..) |value_value_node_uncasted, i| { - const value_value_node = value_value_node_uncasted.cast(.block_value_value).?; - const value_node = value_value_node.expression; - if (value_node.isNumberExpression()) { - const number = evaluateNumberExpression(value_node, self.source, self.input_code_pages); - // This is used to write u16 or u32 depending on the number's suffix - const data_wrapper = Data{ .number = number }; - try data_wrapper.write(writer); - // Numbers use byte count - values_size += if (number.is_long) 4 else 2; - } else { - std.debug.assert(value_node.isStringLiteral()); - const literal_node = value_node.cast(.literal).?; - const parsed_value = try self.parseQuotedStringAsWideString(literal_node.token); - defer self.allocator.free(parsed_value); - - const parsed_to_first_null = std.mem.sliceTo(parsed_value, 0); - try writer.writeAll(std.mem.sliceAsBytes(parsed_to_first_null)); - // Strings use UTF-16 code-unit count including the null-terminator, but - // only if there are no number values in the list. - var value_size = parsed_to_first_null.len; - if (has_number_value) value_size *= 2; // 2 bytes per UTF-16 code unit - values_size += value_size; - // The null-terminator is only included if there's a trailing comma - // or this is the last value. If the value evaluates to empty, then - // it never gets a null terminator. If there was an explicit null-terminator - // in the string, we still need to potentially add one since we already - // sliced to the terminator. - const is_last = i == block_or_value.values.len - 1; - const is_empty = parsed_to_first_null.len == 0; - const is_only = block_or_value.values.len == 1; - if ((!is_empty or !is_only) and (is_last or value_value_node.trailing_comma)) { - try writer.writeInt(u16, 0, .little); - values_size += if (has_number_value) 2 else 1; - } - } - } - var data_size_slice = buf.items[data_size_offset..]; - std.mem.writeInt(u16, data_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(values_size)), .little); - - if (has_number_value) { - const data_type_slice = buf.items[data_type_offset..]; - std.mem.writeInt(u16, data_type_slice[0..@sizeOf(u16)], res.VersionNode.type_binary, .little); - } - - if (node_type == .block) { - const block = block_or_value; - for (block.children) |child| { - try self.writeVersionNode(child, writer, buf); - } - } - }, - else => unreachable, - } - - const node_and_children_size = buf.items.len - node_and_children_size_offset; - const node_and_children_size_slice = buf.items[node_and_children_size_offset..]; - std.mem.writeInt(u16, node_and_children_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(node_and_children_size)), .little); - } - - pub fn writeStringTable(self: *Compiler, node: *Node.StringTable) !void { - const language = getLanguageFromOptionalStatements(node.optional_statements, self.source, self.input_code_pages) orelse self.state.language; - - for (node.strings) |string_node| { - const string = @fieldParentPtr(Node.StringTableString, "base", string_node); - const string_id_data = try self.evaluateDataExpression(string.id); - const string_id = string_id_data.number.asWord(); - - self.state.string_tables.set( - self.arena, - language, - string_id, - string.string, - &node.base, - self.source, - self.input_code_pages, - self.state.version, - self.state.characteristics, - ) catch |err| switch (err) { - error.StringAlreadyDefined => { - // It might be nice to have these errors point to the ids rather than the - // string tokens, but that would mean storing the id token of each string - // which doesn't seem worth it just for slightly better error messages. - try self.addErrorDetails(ErrorDetails{ - .err = .string_already_defined, - .token = string.string, - .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, - }); - const existing_def_table = self.state.string_tables.tables.getPtr(language).?; - const existing_definition = existing_def_table.get(string_id).?; - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .string_already_defined, - .type = .note, - .token = existing_definition, - .extra = .{ .string_and_language = .{ .id = string_id, .language = language } }, - }); - }, - error.OutOfMemory => |e| return e, - }; - } - } - - /// Expects this to be a top-level LANGUAGE statement - pub fn writeLanguageStatement(self: *Compiler, node: *Node.LanguageStatement) void { - const primary = Compiler.evaluateNumberExpression(node.primary_language_id, self.source, self.input_code_pages); - const sublanguage = Compiler.evaluateNumberExpression(node.sublanguage_id, self.source, self.input_code_pages); - self.state.language.primary_language_id = @truncate(primary.value); - self.state.language.sublanguage_id = @truncate(sublanguage.value); - } - - /// Expects this to be a top-level VERSION or CHARACTERISTICS statement - pub fn writeTopLevelSimpleStatement(self: *Compiler, node: *Node.SimpleStatement) void { - const value = Compiler.evaluateNumberExpression(node.value, self.source, self.input_code_pages); - const statement_type = rc.TopLevelKeywords.map.get(node.identifier.slice(self.source)).?; - switch (statement_type) { - .characteristics => self.state.characteristics = value.value, - .version => self.state.version = value.value, - else => unreachable, - } - } - - pub const ResourceHeaderOptions = struct { - language: ?res.Language = null, - data_size: DWORD = 0, - }; - - pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader { - const id_bytes = self.sourceBytesForToken(id_token); - const type_bytes = self.sourceBytesForToken(type_token); - return ResourceHeader.init( - self.allocator, - id_bytes, - type_bytes, - options.data_size, - options.language orelse self.state.language, - self.state.version, - self.state.characteristics, - ) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - error.TypeNonAsciiOrdinal => { - const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?; - try self.addErrorDetails(.{ - .err = .invalid_digit_character_in_ordinal, - .type = .err, - .token = type_token, - }); - return self.addErrorDetailsAndFail(.{ - .err = .win32_non_ascii_ordinal, - .type = .note, - .token = type_token, - .print_source_line = false, - .extra = .{ .number = win32_rc_ordinal.ordinal }, - }); - }, - error.IdNonAsciiOrdinal => { - const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?; - try self.addErrorDetails(.{ - .err = .invalid_digit_character_in_ordinal, - .type = .err, - .token = id_token, - }); - return self.addErrorDetailsAndFail(.{ - .err = .win32_non_ascii_ordinal, - .type = .note, - .token = id_token, - .print_source_line = false, - .extra = .{ .number = win32_rc_ordinal.ordinal }, - }); - }, - }; - } - - pub const ResourceHeader = struct { - name_value: NameOrOrdinal, - type_value: NameOrOrdinal, - language: res.Language, - memory_flags: MemoryFlags, - data_size: DWORD, - version: DWORD, - characteristics: DWORD, - data_version: DWORD = 0, - - pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal }; - - pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader { - const type_value = type: { - const resource_type = Resource.fromString(type_bytes); - if (res.RT.fromResource(resource_type)) |rt_constant| { - break :type NameOrOrdinal{ .ordinal = @intFromEnum(rt_constant) }; - } else { - break :type try NameOrOrdinal.fromString(allocator, type_bytes); - } - }; - errdefer type_value.deinit(allocator); - if (type_value == .name) { - if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| { - return error.TypeNonAsciiOrdinal; - } - } - - const name_value = try NameOrOrdinal.fromString(allocator, id_bytes); - errdefer name_value.deinit(allocator); - if (name_value == .name) { - if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| { - return error.IdNonAsciiOrdinal; - } - } - - const predefined_resource_type = type_value.predefinedResourceType(); - - return ResourceHeader{ - .name_value = name_value, - .type_value = type_value, - .data_size = data_size, - .memory_flags = MemoryFlags.defaults(predefined_resource_type), - .language = language, - .version = version, - .characteristics = characteristics, - }; - } - - pub fn deinit(self: ResourceHeader, allocator: Allocator) void { - self.name_value.deinit(allocator); - self.type_value.deinit(allocator); - } - - pub const SizeInfo = struct { - bytes: u32, - padding_after_name: u2, - }; - - fn calcSize(self: ResourceHeader) error{Overflow}!SizeInfo { - var header_size: u32 = 8; - header_size = try std.math.add( - u32, - header_size, - std.math.cast(u32, self.name_value.byteLen()) orelse return error.Overflow, - ); - header_size = try std.math.add( - u32, - header_size, - std.math.cast(u32, self.type_value.byteLen()) orelse return error.Overflow, - ); - const padding_after_name = numPaddingBytesNeeded(header_size); - header_size = try std.math.add(u32, header_size, padding_after_name); - header_size = try std.math.add(u32, header_size, 16); - return .{ .bytes = header_size, .padding_after_name = padding_after_name }; - } - - pub fn writeAssertNoOverflow(self: ResourceHeader, writer: anytype) !void { - return self.writeSizeInfo(writer, self.calcSize() catch unreachable); - } - - pub fn write(self: ResourceHeader, writer: anytype, err_ctx: errors.DiagnosticsContext) !void { - const size_info = self.calcSize() catch { - try err_ctx.diagnostics.append(.{ - .err = .resource_data_size_exceeds_max, - .token = err_ctx.token, - }); - return error.CompileError; - }; - return self.writeSizeInfo(writer, size_info); - } - - fn writeSizeInfo(self: ResourceHeader, writer: anytype, size_info: SizeInfo) !void { - try writer.writeInt(DWORD, self.data_size, .little); // DataSize - try writer.writeInt(DWORD, size_info.bytes, .little); // HeaderSize - try self.type_value.write(writer); // TYPE - try self.name_value.write(writer); // NAME - try writer.writeByteNTimes(0, size_info.padding_after_name); - - try writer.writeInt(DWORD, self.data_version, .little); // DataVersion - try writer.writeInt(WORD, self.memory_flags.value, .little); // MemoryFlags - try writer.writeInt(WORD, self.language.asInt(), .little); // LanguageId - try writer.writeInt(DWORD, self.version, .little); // Version - try writer.writeInt(DWORD, self.characteristics, .little); // Characteristics - } - - pub fn predefinedResourceType(self: ResourceHeader) ?res.RT { - return self.type_value.predefinedResourceType(); - } - - pub fn applyMemoryFlags(self: *ResourceHeader, tokens: []Token, source: []const u8) void { - applyToMemoryFlags(&self.memory_flags, tokens, source); - } - - pub fn applyOptionalStatements(self: *ResourceHeader, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { - applyToOptionalStatements(&self.language, &self.version, &self.characteristics, statements, source, code_page_lookup); - } - }; - - fn applyToMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { - for (tokens) |token| { - const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; - flags.set(attribute); - } - } - - /// RT_GROUP_ICON and RT_GROUP_CURSOR have their own special rules for memory flags - fn applyToGroupMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void { - // There's probably a cleaner implementation of this, but this will result in the same - // flags as the Win32 RC compiler for all 986,410 K-permutations of memory flags - // for an ICON resource. - // - // This was arrived at by iterating over the permutations and creating a - // list where each line looks something like this: - // MOVEABLE PRELOAD -> 0x1050 (MOVEABLE|PRELOAD|DISCARDABLE) - // - // and then noticing a few things: - - // 1. Any permutation that does not have PRELOAD in it just uses the - // default flags. - const initial_flags = flags.*; - var flags_set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); - for (tokens) |token| { - const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; - flags_set.insert(attribute); - } - if (!flags_set.contains(.preload)) return; - - // 2. Any permutation of flags where applying only the PRELOAD and LOADONCALL flags - // results in no actual change by the end will just use the default flags. - // For example, `PRELOAD LOADONCALL` will result in default flags, but - // `LOADONCALL PRELOAD` will have PRELOAD set after they are both applied in order. - for (tokens) |token| { - const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; - switch (attribute) { - .preload, .loadoncall => flags.set(attribute), - else => {}, - } - } - if (flags.value == initial_flags.value) return; - - // 3. If none of DISCARDABLE, SHARED, or PURE is specified, then PRELOAD - // implies `flags &= ~SHARED` and LOADONCALL implies `flags |= SHARED` - const shared_set = comptime blk: { - var set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty(); - set.insert(.discardable); - set.insert(.shared); - set.insert(.pure); - break :blk set; - }; - const discardable_shared_or_pure_specified = flags_set.intersectWith(shared_set).count() != 0; - for (tokens) |token| { - const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?; - flags.setGroup(attribute, !discardable_shared_or_pure_specified); - } - } - - /// Only handles the 'base' optional statements that are shared between resource types. - fn applyToOptionalStatements(language: *res.Language, version: *u32, characteristics: *u32, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void { - for (statements) |node| switch (node.id) { - .language_statement => { - const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); - language.* = languageFromLanguageStatement(language_statement, source, code_page_lookup); - }, - .simple_statement => { - const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", node); - const statement_type = rc.OptionalStatements.map.get(simple_statement.identifier.slice(source)) orelse continue; - const result = Compiler.evaluateNumberExpression(simple_statement.value, source, code_page_lookup); - switch (statement_type) { - .version => version.* = result.value, - .characteristics => characteristics.* = result.value, - else => unreachable, // only VERSION and CHARACTERISTICS should be in an optional statements list - } - }, - else => {}, - }; - } - - pub fn languageFromLanguageStatement(language_statement: *const Node.LanguageStatement, source: []const u8, code_page_lookup: *const CodePageLookup) res.Language { - const primary = Compiler.evaluateNumberExpression(language_statement.primary_language_id, source, code_page_lookup); - const sublanguage = Compiler.evaluateNumberExpression(language_statement.sublanguage_id, source, code_page_lookup); - return .{ - .primary_language_id = @truncate(primary.value), - .sublanguage_id = @truncate(sublanguage.value), - }; - } - - pub fn getLanguageFromOptionalStatements(statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) ?res.Language { - for (statements) |node| switch (node.id) { - .language_statement => { - const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node); - return languageFromLanguageStatement(language_statement, source, code_page_lookup); - }, - else => continue, - }; - return null; - } - - pub fn writeEmptyResource(writer: anytype) !void { - const header = ResourceHeader{ - .name_value = .{ .ordinal = 0 }, - .type_value = .{ .ordinal = 0 }, - .language = .{ - .primary_language_id = 0, - .sublanguage_id = 0, - }, - .memory_flags = .{ .value = 0 }, - .data_size = 0, - .version = 0, - .characteristics = 0, - }; - try header.writeAssertNoOverflow(writer); - } - - pub fn sourceBytesForToken(self: *Compiler, token: Token) SourceBytes { - return .{ - .slice = token.slice(self.source), - .code_page = self.input_code_pages.getForToken(token), - }; - } - - /// Helper that calls parseQuotedStringAsWideString with the relevant context - /// Resulting slice is allocated by `self.allocator`. - pub fn parseQuotedStringAsWideString(self: *Compiler, token: Token) ![:0]u16 { - return literals.parseQuotedStringAsWideString( - self.allocator, - self.sourceBytesForToken(token), - .{ - .start_column = token.calculateColumn(self.source, 8, null), - .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, - }, - ); - } - - /// Helper that calls parseQuotedStringAsAsciiString with the relevant context - /// Resulting slice is allocated by `self.allocator`. - pub fn parseQuotedStringAsAsciiString(self: *Compiler, token: Token) ![]u8 { - return literals.parseQuotedStringAsAsciiString( - self.allocator, - self.sourceBytesForToken(token), - .{ - .start_column = token.calculateColumn(self.source, 8, null), - .diagnostics = .{ .diagnostics = self.diagnostics, .token = token }, - }, - ); - } - - fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void { - try self.diagnostics.append(details); - } - - fn addErrorDetailsAndFail(self: *Compiler, details: ErrorDetails) error{ CompileError, OutOfMemory } { - try self.addErrorDetails(details); - return error.CompileError; - } -}; - -pub const OpenSearchPathError = std.fs.Dir.OpenError; - -fn openSearchPathDir(dir: std.fs.Dir, path: []const u8) OpenSearchPathError!std.fs.Dir { - // Validate the search path to avoid possible unreachable on invalid paths, - // see https://github.com/ziglang/zig/issues/15607 for why this is currently necessary. - try validateSearchPath(path); - return dir.openDir(path, .{}); -} - -/// Very crude attempt at validating a path. This is imperfect -/// and AFAIK it is effectively impossible to implement perfect path -/// validation, since it ultimately depends on the underlying filesystem. -/// Note that this function won't be necessary if/when -/// https://github.com/ziglang/zig/issues/15607 -/// is accepted/implemented. -fn validateSearchPath(path: []const u8) error{BadPathName}!void { - switch (builtin.os.tag) { - .windows => { - // This will return error.BadPathName on non-Win32 namespaced paths - // (e.g. the NT \??\ prefix, the device \\.\ prefix, etc). - // Those path types are something of an unavoidable way to - // still hit unreachable during the openDir call. - var component_iterator = try std.fs.path.componentIterator(path); - while (component_iterator.next()) |component| { - // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file - if (std.mem.indexOfAny(u8, component.name, "\x00<>:\"|?*") != null) return error.BadPathName; - } - }, - else => { - if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; - }, - } -} - -pub const SearchDir = struct { - dir: std.fs.Dir, - path: ?[]const u8, - - pub fn deinit(self: *SearchDir, allocator: Allocator) void { - self.dir.close(); - if (self.path) |path| { - allocator.free(path); - } - } -}; - -/// Slurps the first `size` bytes read into `slurped_header` -pub fn HeaderSlurpingReader(comptime size: usize, comptime ReaderType: anytype) type { - return struct { - child_reader: ReaderType, - bytes_read: usize = 0, - slurped_header: [size]u8 = [_]u8{0x00} ** size, - - pub const Error = ReaderType.Error; - pub const Reader = std.io.Reader(*@This(), Error, read); - - pub fn read(self: *@This(), buf: []u8) Error!usize { - const amt = try self.child_reader.read(buf); - if (self.bytes_read < size) { - const bytes_to_add = @min(amt, size - self.bytes_read); - const end_index = self.bytes_read + bytes_to_add; - @memcpy(self.slurped_header[self.bytes_read..end_index], buf[0..bytes_to_add]); - } - self.bytes_read +|= amt; - return amt; - } - - pub fn reader(self: *@This()) Reader { - return .{ .context = self }; - } - }; -} - -pub fn headerSlurpingReader(comptime size: usize, reader: anytype) HeaderSlurpingReader(size, @TypeOf(reader)) { - return .{ .child_reader = reader }; -} - -/// Sort of like std.io.LimitedReader, but a Writer. -/// Returns an error if writing the requested number of bytes -/// would ever exceed bytes_left, i.e. it does not always -/// write up to the limit and instead will error if the -/// limit would be breached if the entire slice was written. -pub fn LimitedWriter(comptime WriterType: type) type { - return struct { - inner_writer: WriterType, - bytes_left: u64, - - pub const Error = error{NoSpaceLeft} || WriterType.Error; - pub const Writer = std.io.Writer(*Self, Error, write); - - const Self = @This(); - - pub fn write(self: *Self, bytes: []const u8) Error!usize { - if (bytes.len > self.bytes_left) return error.NoSpaceLeft; - const amt = try self.inner_writer.write(bytes); - self.bytes_left -= amt; - return amt; - } - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - }; -} - -/// Returns an initialised `LimitedWriter` -/// `bytes_left` is a `u64` to be able to take 64 bit file offsets -pub fn limitedWriter(inner_writer: anytype, bytes_left: u64) LimitedWriter(@TypeOf(inner_writer)) { - return .{ .inner_writer = inner_writer, .bytes_left = bytes_left }; -} - -test "limitedWriter basic usage" { - var buf: [4]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buf); - var limited_stream = limitedWriter(fbs.writer(), 4); - var writer = limited_stream.writer(); - - try std.testing.expectEqual(@as(usize, 3), try writer.write("123")); - try std.testing.expectEqualSlices(u8, "123", buf[0..3]); - try std.testing.expectError(error.NoSpaceLeft, writer.write("45")); - try std.testing.expectEqual(@as(usize, 1), try writer.write("4")); - try std.testing.expectEqualSlices(u8, "1234", buf[0..4]); - try std.testing.expectError(error.NoSpaceLeft, writer.write("5")); -} - -pub const FontDir = struct { - fonts: std.ArrayListUnmanaged(Font) = .{}, - /// To keep track of which ids are set and where they were set from - ids: std.AutoHashMapUnmanaged(u16, Token) = .{}, - - pub const Font = struct { - id: u16, - header_bytes: [148]u8, - }; - - pub fn deinit(self: *FontDir, allocator: Allocator) void { - self.fonts.deinit(allocator); - } - - pub fn add(self: *FontDir, allocator: Allocator, font: Font, id_token: Token) !void { - try self.ids.putNoClobber(allocator, font.id, id_token); - try self.fonts.append(allocator, font); - } - - pub fn writeResData(self: *FontDir, compiler: *Compiler, writer: anytype) !void { - if (self.fonts.items.len == 0) return; - - // We know the number of fonts is limited to maxInt(u16) because fonts - // must have a valid and unique u16 ordinal ID (trying to specify a FONT - // with e.g. id 65537 will wrap around to 1 and be ignored if there's already - // a font with that ID in the file). - const num_fonts: u16 = @intCast(self.fonts.items.len); - - // u16 count + [(u16 id + 150 bytes) for each font] - // Note: This works out to a maximum data_size of 9,961,322. - const data_size: u32 = 2 + (2 + 150) * num_fonts; - - var header = Compiler.ResourceHeader{ - .name_value = try NameOrOrdinal.nameFromString(compiler.allocator, .{ .slice = "FONTDIR", .code_page = .windows1252 }), - .type_value = NameOrOrdinal{ .ordinal = @intFromEnum(res.RT.FONTDIR) }, - .memory_flags = res.MemoryFlags.defaults(res.RT.FONTDIR), - .language = compiler.state.language, - .version = compiler.state.version, - .characteristics = compiler.state.characteristics, - .data_size = data_size, - }; - defer header.deinit(compiler.allocator); - - try header.writeAssertNoOverflow(writer); - try writer.writeInt(u16, num_fonts, .little); - for (self.fonts.items) |font| { - // The format of the FONTDIR is a strange beast. - // Technically, each FONT is seemingly meant to be written as a - // FONTDIRENTRY with two trailing NUL-terminated strings corresponding to - // the 'device name' and 'face name' of the .FNT file, but: - // - // 1. When dealing with .FNT files, the Win32 implementation - // gets the device name and face name from the wrong locations, - // so it's basically never going to write the real device/face name - // strings. - // 2. When dealing with files 76-140 bytes long, the Win32 implementation - // can just crash (if there are no NUL bytes in the file). - // 3. The 32-bit Win32 rc.exe uses a 148 byte size for the portion of - // the FONTDIRENTRY before the NUL-terminated strings, which - // does not match the documented FONTDIRENTRY size that (presumably) - // this format is meant to be using, so anything iterating the - // FONTDIR according to the available documentation will get bogus results. - // 4. The FONT resource can be used for non-.FNT types like TTF and OTF, - // in which case emulating the Win32 behavior of unconditionally - // interpreting the bytes as a .FNT and trying to grab device/face names - // from random bytes in the TTF/OTF file can lead to weird behavior - // and errors in the Win32 implementation (for example, the device/face - // name fields are offsets into the file where the NUL-terminated - // string is located, but the Win32 implementation actually treats - // them as signed so if they are negative then the Win32 implementation - // will error; this happening for TTF fonts would just be a bug - // since the TTF could otherwise be valid) - // 5. The FONTDIR resource doesn't actually seem to be used at all by - // anything that I've found, and instead in Windows 3.0 and newer - // it seems like the FONT resources are always just iterated/accessed - // directly without ever looking at the FONTDIR. - // - // All of these combined means that we: - // - Do not need or want to emulate Win32 behavior here - // - For maximum simplicity and compatibility, we just write the first - // 148 bytes of the file without any interpretation (padded with - // zeroes to get up to 148 bytes if necessary), and then - // unconditionally write two NUL bytes, meaning that we always - // write 'device name' and 'face name' as if they were 0-length - // strings. - // - // This gives us byte-for-byte .RES compatibility in the common case while - // allowing us to avoid any erroneous errors caused by trying to read - // the face/device name from a bogus location. Note that the Win32 - // implementation never actually writes the real device/face name here - // anyway (except in the bizarre case that a .FNT file has the proper - // device/face name offsets within a reserved section of the .FNT file) - // so there's no feasible way that anything can actually think that the - // device name/face name in the FONTDIR is reliable. - - // First, the ID is written, though - try writer.writeInt(u16, font.id, .little); - try writer.writeAll(&font.header_bytes); - try writer.writeByteNTimes(0, 2); - } - try Compiler.writeDataPadding(writer, data_size); - } -}; - -pub const StringTablesByLanguage = struct { - /// String tables for each language are written to the .res file in order depending on - /// when the first STRINGTABLE for the language was defined, and all blocks for a given - /// language are written contiguously. - /// Using an ArrayHashMap here gives us this property for free. - tables: std.AutoArrayHashMapUnmanaged(res.Language, StringTable) = .{}, - - pub fn deinit(self: *StringTablesByLanguage, allocator: Allocator) void { - self.tables.deinit(allocator); - } - - pub fn set( - self: *StringTablesByLanguage, - allocator: Allocator, - language: res.Language, - id: u16, - string_token: Token, - node: *Node, - source: []const u8, - code_page_lookup: *const CodePageLookup, - version: u32, - characteristics: u32, - ) StringTable.SetError!void { - var get_or_put_result = try self.tables.getOrPut(allocator, language); - if (!get_or_put_result.found_existing) { - get_or_put_result.value_ptr.* = StringTable{}; - } - return get_or_put_result.value_ptr.set(allocator, id, string_token, node, source, code_page_lookup, version, characteristics); - } -}; - -pub const StringTable = struct { - /// Blocks are written to the .res file in order depending on when the first string - /// was added to the block (i.e. `STRINGTABLE { 16 "b" 0 "a" }` would then get written - /// with block ID 2 (the one with "b") first and block ID 1 (the one with "a") second). - /// Using an ArrayHashMap here gives us this property for free. - blocks: std.AutoArrayHashMapUnmanaged(u16, Block) = .{}, - - pub const Block = struct { - strings: std.ArrayListUnmanaged(Token) = .{}, - set_indexes: std.bit_set.IntegerBitSet(16) = .{ .mask = 0 }, - memory_flags: MemoryFlags = MemoryFlags.defaults(res.RT.STRING), - characteristics: u32, - version: u32, - - /// Returns the index to insert the string into the `strings` list. - /// Returns null if the string should be appended. - fn getInsertionIndex(self: *Block, index: u8) ?u8 { - std.debug.assert(!self.set_indexes.isSet(index)); - - const first_set = self.set_indexes.findFirstSet() orelse return null; - if (first_set > index) return 0; - - const last_set = 15 - @clz(self.set_indexes.mask); - if (index > last_set) return null; - - var bit = first_set + 1; - var insertion_index: u8 = 1; - while (bit != index) : (bit += 1) { - if (self.set_indexes.isSet(bit)) insertion_index += 1; - } - return insertion_index; - } - - fn getTokenIndex(self: *Block, string_index: u8) ?u8 { - const count = self.strings.items.len; - if (count == 0) return null; - if (count == 1) return 0; - - const first_set = self.set_indexes.findFirstSet() orelse unreachable; - if (first_set == string_index) return 0; - const last_set = 15 - @clz(self.set_indexes.mask); - if (last_set == string_index) return @intCast(count - 1); - - if (first_set == last_set) return null; - - var bit = first_set + 1; - var token_index: u8 = 1; - while (bit < last_set) : (bit += 1) { - if (!self.set_indexes.isSet(bit)) continue; - if (bit == string_index) return token_index; - token_index += 1; - } - return null; - } - - fn dump(self: *Block) void { - var bit_it = self.set_indexes.iterator(.{}); - var string_index: usize = 0; - while (bit_it.next()) |bit_index| { - const token = self.strings.items[string_index]; - std.debug.print("{}: [{}] {any}\n", .{ bit_index, string_index, token }); - string_index += 1; - } - } - - pub fn applyAttributes(self: *Block, string_table: *Node.StringTable, source: []const u8, code_page_lookup: *const CodePageLookup) void { - Compiler.applyToMemoryFlags(&self.memory_flags, string_table.common_resource_attributes, source); - var dummy_language: res.Language = undefined; - Compiler.applyToOptionalStatements(&dummy_language, &self.version, &self.characteristics, string_table.optional_statements, source, code_page_lookup); - } - - fn trimToDoubleNUL(comptime T: type, str: []const T) []const T { - var last_was_null = false; - for (str, 0..) |c, i| { - if (c == 0) { - if (last_was_null) return str[0 .. i - 1]; - last_was_null = true; - } else { - last_was_null = false; - } - } - return str; - } - - test "trimToDoubleNUL" { - try std.testing.expectEqualStrings("a\x00b", trimToDoubleNUL(u8, "a\x00b")); - try std.testing.expectEqualStrings("a", trimToDoubleNUL(u8, "a\x00\x00b")); - } - - pub fn writeResData(self: *Block, compiler: *Compiler, language: res.Language, block_id: u16, writer: anytype) !void { - var data_buffer = std.ArrayList(u8).init(compiler.allocator); - defer data_buffer.deinit(); - const data_writer = data_buffer.writer(); - - var i: u8 = 0; - var string_i: u8 = 0; - while (true) : (i += 1) { - if (!self.set_indexes.isSet(i)) { - try data_writer.writeInt(u16, 0, .little); - if (i == 15) break else continue; - } - - const string_token = self.strings.items[string_i]; - const slice = string_token.slice(compiler.source); - const column = string_token.calculateColumn(compiler.source, 8, null); - const code_page = compiler.input_code_pages.getForToken(string_token); - const bytes = SourceBytes{ .slice = slice, .code_page = code_page }; - const utf16_string = try literals.parseQuotedStringAsWideString(compiler.allocator, bytes, .{ - .start_column = column, - .diagnostics = .{ .diagnostics = compiler.diagnostics, .token = string_token }, - }); - defer compiler.allocator.free(utf16_string); - - const trimmed_string = trim: { - // Two NUL characters in a row act as a terminator - // Note: This is only the case for STRINGTABLE strings - const trimmed = trimToDoubleNUL(u16, utf16_string); - // We also want to trim any trailing NUL characters - break :trim std.mem.trimRight(u16, trimmed, &[_]u16{0}); - }; - - // String literals are limited to maxInt(u15) codepoints, so these UTF-16 encoded - // strings are limited to maxInt(u15) * 2 = 65,534 code units (since 2 is the - // maximum number of UTF-16 code units per codepoint). - // This leaves room for exactly one NUL terminator. - var string_len_in_utf16_code_units: u16 = @intCast(trimmed_string.len); - // If the option is set, then a NUL terminator is added unconditionally. - // We already trimmed any trailing NULs, so we know it will be a new addition to the string. - if (compiler.null_terminate_string_table_strings) string_len_in_utf16_code_units += 1; - try data_writer.writeInt(u16, string_len_in_utf16_code_units, .little); - try data_writer.writeAll(std.mem.sliceAsBytes(trimmed_string)); - if (compiler.null_terminate_string_table_strings) { - try data_writer.writeInt(u16, 0, .little); - } - - if (i == 15) break; - string_i += 1; - } - - // This intCast will never be able to fail due to the length constraints on string literals. - // - // - STRINGTABLE resource definitions can can only provide one string literal per index. - // - STRINGTABLE strings are limited to maxInt(u16) UTF-16 code units (see 'string_len_in_utf16_code_units' - // above), which means that the maximum number of bytes per string literal is - // 2 * maxInt(u16) = 131,070 (since there are 2 bytes per UTF-16 code unit). - // - Each Block/RT_STRING resource includes exactly 16 strings and each have a 2 byte - // length field, so the maximum number of total bytes in a RT_STRING resource's data is - // 16 * (131,070 + 2) = 2,097,152 which is well within the u32 max. - // - // Note: The string literal maximum length is enforced by the lexer. - const data_size: u32 = @intCast(data_buffer.items.len); - - const header = Compiler.ResourceHeader{ - .name_value = .{ .ordinal = block_id }, - .type_value = .{ .ordinal = @intFromEnum(res.RT.STRING) }, - .memory_flags = self.memory_flags, - .language = language, - .version = self.version, - .characteristics = self.characteristics, - .data_size = data_size, - }; - // The only variable parts of the header are name and type, which in this case - // we fully control and know are numbers, so they have a fixed size. - try header.writeAssertNoOverflow(writer); - - var data_fbs = std.io.fixedBufferStream(data_buffer.items); - try Compiler.writeResourceData(writer, data_fbs.reader(), data_size); - } - }; - - pub fn deinit(self: *StringTable, allocator: Allocator) void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - entry.value_ptr.strings.deinit(allocator); - } - self.blocks.deinit(allocator); - } - - const SetError = error{StringAlreadyDefined} || Allocator.Error; - - pub fn set( - self: *StringTable, - allocator: Allocator, - id: u16, - string_token: Token, - node: *Node, - source: []const u8, - code_page_lookup: *const CodePageLookup, - version: u32, - characteristics: u32, - ) SetError!void { - const block_id = (id / 16) + 1; - const string_index: u8 = @intCast(id & 0xF); - - var get_or_put_result = try self.blocks.getOrPut(allocator, block_id); - if (!get_or_put_result.found_existing) { - get_or_put_result.value_ptr.* = Block{ .version = version, .characteristics = characteristics }; - get_or_put_result.value_ptr.applyAttributes(node.cast(.string_table).?, source, code_page_lookup); - } else { - if (get_or_put_result.value_ptr.set_indexes.isSet(string_index)) { - return error.StringAlreadyDefined; - } - } - - var block = get_or_put_result.value_ptr; - if (block.getInsertionIndex(string_index)) |insertion_index| { - try block.strings.insert(allocator, insertion_index, string_token); - } else { - try block.strings.append(allocator, string_token); - } - block.set_indexes.set(string_index); - } - - pub fn get(self: *StringTable, id: u16) ?Token { - const block_id = (id / 16) + 1; - const string_index: u8 = @intCast(id & 0xF); - - const block = self.blocks.getPtr(block_id) orelse return null; - const token_index = block.getTokenIndex(string_index) orelse return null; - return block.strings.items[token_index]; - } - - pub fn dump(self: *StringTable) !void { - var it = self.iterator(); - while (it.next()) |entry| { - std.debug.print("block: {}\n", .{entry.key_ptr.*}); - entry.value_ptr.dump(); - } - } -}; - -test "StringTable" { - const S = struct { - fn makeDummyToken(id: usize) Token { - return Token{ - .id = .invalid, - .start = id, - .end = id, - .line_number = id, - }; - } - }; - const allocator = std.testing.allocator; - var string_table = StringTable{}; - defer string_table.deinit(allocator); - - var code_page_lookup = CodePageLookup.init(allocator, .windows1252); - defer code_page_lookup.deinit(); - - var dummy_node = Node.StringTable{ - .type = S.makeDummyToken(0), - .common_resource_attributes = &.{}, - .optional_statements = &.{}, - .begin_token = S.makeDummyToken(0), - .strings = &.{}, - .end_token = S.makeDummyToken(0), - }; - - // randomize an array of ids 0-99 - var ids = ids: { - var buf: [100]u16 = undefined; - var i: u16 = 0; - while (i < buf.len) : (i += 1) { - buf[i] = i; - } - break :ids buf; - }; - var prng = std.Random.DefaultPrng.init(0); - var random = prng.random(); - random.shuffle(u16, &ids); - - // set each one in the randomized order - for (ids) |id| { - try string_table.set(allocator, id, S.makeDummyToken(id), &dummy_node.base, "", &code_page_lookup, 0, 0); - } - - // make sure each one exists and is the right value when gotten - var id: u16 = 0; - while (id < 100) : (id += 1) { - const dummy = S.makeDummyToken(id); - try std.testing.expectError(error.StringAlreadyDefined, string_table.set(allocator, id, dummy, &dummy_node.base, "", &code_page_lookup, 0, 0)); - try std.testing.expectEqual(dummy, string_table.get(id).?); - } - - // make sure non-existent string ids are not found - try std.testing.expectEqual(@as(?Token, null), string_table.get(100)); -} diff --git a/src/resinator/errors.zig b/src/resinator/errors.zig @@ -1,1060 +0,0 @@ -const std = @import("std"); -const Token = @import("lex.zig").Token; -const SourceMappings = @import("source_mapping.zig").SourceMappings; -const utils = @import("utils.zig"); -const rc = @import("rc.zig"); -const res = @import("res.zig"); -const ico = @import("ico.zig"); -const bmp = @import("bmp.zig"); -const parse = @import("parse.zig"); -const lang = @import("lang.zig"); -const CodePage = @import("code_pages.zig").CodePage; -const builtin = @import("builtin"); -const native_endian = builtin.cpu.arch.endian(); - -pub const Diagnostics = struct { - errors: std.ArrayListUnmanaged(ErrorDetails) = .{}, - /// Append-only, cannot handle removing strings. - /// Expects to own all strings within the list. - strings: std.ArrayListUnmanaged([]const u8) = .{}, - allocator: std.mem.Allocator, - - pub fn init(allocator: std.mem.Allocator) Diagnostics { - return .{ - .allocator = allocator, - }; - } - - pub fn deinit(self: *Diagnostics) void { - self.errors.deinit(self.allocator); - for (self.strings.items) |str| { - self.allocator.free(str); - } - self.strings.deinit(self.allocator); - } - - pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void { - try self.errors.append(self.allocator, error_details); - } - - const SmallestStringIndexType = std.meta.Int(.unsigned, @min( - @bitSizeOf(ErrorDetails.FileOpenError.FilenameStringIndex), - @min( - @bitSizeOf(ErrorDetails.IconReadError.FilenameStringIndex), - @bitSizeOf(ErrorDetails.BitmapReadError.FilenameStringIndex), - ), - )); - - /// Returns the index of the added string as the SmallestStringIndexType - /// in order to avoid needing to `@intCast` it at callsites of putString. - /// Instead, this function will error if the index would ever exceed the - /// smallest FilenameStringIndex of an ErrorDetails type. - pub fn putString(self: *Diagnostics, str: []const u8) !SmallestStringIndexType { - if (self.strings.items.len >= std.math.maxInt(SmallestStringIndexType)) { - return error.OutOfMemory; // ran out of string indexes - } - const dupe = try self.allocator.dupe(u8, str); - const index = self.strings.items.len; - try self.strings.append(self.allocator, dupe); - return @intCast(index); - } - - pub fn renderToStdErr(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, tty_config: std.io.tty.Config, source_mappings: ?SourceMappings) void { - std.debug.getStderrMutex().lock(); - defer std.debug.getStderrMutex().unlock(); - const stderr = std.io.getStdErr().writer(); - for (self.errors.items) |err_details| { - renderErrorMessage(self.allocator, stderr, tty_config, cwd, err_details, source, self.strings.items, source_mappings) catch return; - } - } - - pub fn renderToStdErrDetectTTY(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, source_mappings: ?SourceMappings) void { - const tty_config = std.io.tty.detectConfig(std.io.getStdErr()); - return self.renderToStdErr(cwd, source, tty_config, source_mappings); - } - - pub fn contains(self: *const Diagnostics, err: ErrorDetails.Error) bool { - for (self.errors.items) |details| { - if (details.err == err) return true; - } - return false; - } - - pub fn containsAny(self: *const Diagnostics, errors: []const ErrorDetails.Error) bool { - for (self.errors.items) |details| { - for (errors) |err| { - if (details.err == err) return true; - } - } - return false; - } -}; - -/// Contains enough context to append errors/warnings/notes etc -pub const DiagnosticsContext = struct { - diagnostics: *Diagnostics, - token: Token, -}; - -pub const ErrorDetails = struct { - err: Error, - token: Token, - /// If non-null, should be before `token`. If null, `token` is assumed to be the start. - token_span_start: ?Token = null, - /// If non-null, should be after `token`. If null, `token` is assumed to be the end. - token_span_end: ?Token = null, - type: Type = .err, - print_source_line: bool = true, - extra: union { - none: void, - expected: Token.Id, - number: u32, - expected_types: ExpectedTypes, - resource: rc.Resource, - string_and_language: StringAndLanguage, - file_open_error: FileOpenError, - icon_read_error: IconReadError, - icon_dir: IconDirContext, - bmp_read_error: BitmapReadError, - accelerator_error: AcceleratorError, - statement_with_u16_param: StatementWithU16Param, - menu_or_class: enum { class, menu }, - } = .{ .none = {} }, - - pub const Type = enum { - /// Fatal error, stops compilation - err, - /// Warning that does not affect compilation result - warning, - /// A note that typically provides further context for a warning/error - note, - /// An invisible diagnostic that is not printed to stderr but can - /// provide information useful when comparing the behavior of different - /// implementations. For example, a hint is emitted when a FONTDIR resource - /// was included in the .RES file which is significant because rc.exe - /// does something different than us, but ultimately it's not important - /// enough to be a warning/note. - hint, - }; - - comptime { - // all fields in the extra union should be 32 bits or less - for (std.meta.fields(std.meta.fieldInfo(ErrorDetails, .extra).type)) |field| { - std.debug.assert(@bitSizeOf(field.type) <= 32); - } - } - - pub const StatementWithU16Param = enum(u32) { - fileversion, - productversion, - language, - }; - - pub const StringAndLanguage = packed struct(u32) { - id: u16, - language: res.Language, - }; - - pub const FileOpenError = packed struct(u32) { - err: FileOpenErrorEnum, - filename_string_index: FilenameStringIndex, - - pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(FileOpenErrorEnum)); - pub const FileOpenErrorEnum = std.meta.FieldEnum(std.fs.File.OpenError); - - pub fn enumFromError(err: std.fs.File.OpenError) FileOpenErrorEnum { - return switch (err) { - inline else => |e| @field(ErrorDetails.FileOpenError.FileOpenErrorEnum, @errorName(e)), - }; - } - }; - - pub const IconReadError = packed struct(u32) { - err: IconReadErrorEnum, - icon_type: enum(u1) { cursor, icon }, - filename_string_index: FilenameStringIndex, - - pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(IconReadErrorEnum) - 1); - pub const IconReadErrorEnum = std.meta.FieldEnum(ico.ReadError); - - pub fn enumFromError(err: ico.ReadError) IconReadErrorEnum { - return switch (err) { - inline else => |e| @field(ErrorDetails.IconReadError.IconReadErrorEnum, @errorName(e)), - }; - } - }; - - pub const IconDirContext = packed struct(u32) { - icon_type: enum(u1) { cursor, icon }, - icon_format: ico.ImageFormat, - index: u16, - bitmap_version: ico.BitmapHeader.Version = .unknown, - _: Padding = 0, - - pub const Padding = std.meta.Int(.unsigned, 15 - @bitSizeOf(ico.BitmapHeader.Version) - @bitSizeOf(ico.ImageFormat)); - }; - - pub const BitmapReadError = packed struct(u32) { - err: BitmapReadErrorEnum, - filename_string_index: FilenameStringIndex, - - pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(BitmapReadErrorEnum)); - pub const BitmapReadErrorEnum = std.meta.FieldEnum(bmp.ReadError); - - pub fn enumFromError(err: bmp.ReadError) BitmapReadErrorEnum { - return switch (err) { - inline else => |e| @field(ErrorDetails.BitmapReadError.BitmapReadErrorEnum, @errorName(e)), - }; - } - }; - - pub const BitmapUnsupportedDIB = packed struct(u32) { - dib_version: ico.BitmapHeader.Version, - filename_string_index: FilenameStringIndex, - - pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(ico.BitmapHeader.Version)); - }; - - pub const AcceleratorError = packed struct(u32) { - err: AcceleratorErrorEnum, - _: Padding = 0, - - pub const Padding = std.meta.Int(.unsigned, 32 - @bitSizeOf(AcceleratorErrorEnum)); - pub const AcceleratorErrorEnum = std.meta.FieldEnum(res.ParseAcceleratorKeyStringError); - - pub fn enumFromError(err: res.ParseAcceleratorKeyStringError) AcceleratorErrorEnum { - return switch (err) { - inline else => |e| @field(ErrorDetails.AcceleratorError.AcceleratorErrorEnum, @errorName(e)), - }; - } - }; - - pub const ExpectedTypes = packed struct(u32) { - number: bool = false, - number_expression: bool = false, - string_literal: bool = false, - accelerator_type_or_option: bool = false, - control_class: bool = false, - literal: bool = false, - // Note: This being 0 instead of undefined is arbitrary and something of a workaround, - // see https://github.com/ziglang/zig/issues/15395 - _: u26 = 0, - - pub const strings = std.ComptimeStringMap([]const u8, .{ - .{ "number", "number" }, - .{ "number_expression", "number expression" }, - .{ "string_literal", "quoted string literal" }, - .{ "accelerator_type_or_option", "accelerator type or option [ASCII, VIRTKEY, etc]" }, - .{ "control_class", "control class [BUTTON, EDIT, etc]" }, - .{ "literal", "unquoted literal" }, - }); - - pub fn writeCommaSeparated(self: ExpectedTypes, writer: anytype) !void { - const struct_info = @typeInfo(ExpectedTypes).Struct; - const num_real_fields = struct_info.fields.len - 1; - const num_padding_bits = @bitSizeOf(ExpectedTypes) - num_real_fields; - const mask = std.math.maxInt(struct_info.backing_integer.?) >> num_padding_bits; - const relevant_bits_only = @as(struct_info.backing_integer.?, @bitCast(self)) & mask; - const num_set_bits = @popCount(relevant_bits_only); - - var i: usize = 0; - inline for (struct_info.fields) |field_info| { - if (field_info.type != bool) continue; - if (i == num_set_bits) return; - if (@field(self, field_info.name)) { - try writer.writeAll(strings.get(field_info.name).?); - i += 1; - if (num_set_bits > 2 and i != num_set_bits) { - try writer.writeAll(", "); - } else if (i != num_set_bits) { - try writer.writeByte(' '); - } - if (num_set_bits > 1 and i == num_set_bits - 1) { - try writer.writeAll("or "); - } - } - } - } - }; - - pub const Error = enum { - // Lexer - unfinished_string_literal, - string_literal_too_long, - invalid_number_with_exponent, - invalid_digit_character_in_number_literal, - illegal_byte, - illegal_byte_outside_string_literals, - illegal_codepoint_outside_string_literals, - illegal_byte_order_mark, - illegal_private_use_character, - found_c_style_escaped_quote, - code_page_pragma_missing_left_paren, - code_page_pragma_missing_right_paren, - code_page_pragma_invalid_code_page, - code_page_pragma_not_integer, - code_page_pragma_overflow, - code_page_pragma_unsupported_code_page, - - // Parser - unfinished_raw_data_block, - unfinished_string_table_block, - /// `expected` is populated. - expected_token, - /// `expected_types` is populated - expected_something_else, - /// `resource` is populated - resource_type_cant_use_raw_data, - /// `resource` is populated - id_must_be_ordinal, - /// `resource` is populated - name_or_id_not_allowed, - string_resource_as_numeric_type, - ascii_character_not_equivalent_to_virtual_key_code, - empty_menu_not_allowed, - rc_would_miscompile_version_value_padding, - rc_would_miscompile_version_value_byte_count, - code_page_pragma_in_included_file, - nested_resource_level_exceeds_max, - too_many_dialog_controls, - nested_expression_level_exceeds_max, - close_paren_expression, - unary_plus_expression, - rc_could_miscompile_control_params, - - // Compiler - /// `string_and_language` is populated - string_already_defined, - font_id_already_defined, - /// `file_open_error` is populated - file_open_error, - /// `accelerator_error` is populated - invalid_accelerator_key, - accelerator_type_required, - rc_would_miscompile_control_padding, - rc_would_miscompile_control_class_ordinal, - /// `icon_dir` is populated - rc_would_error_on_icon_dir, - /// `icon_dir` is populated - format_not_supported_in_icon_dir, - /// `resource` is populated and contains the expected type - icon_dir_and_resource_type_mismatch, - /// `icon_read_error` is populated - icon_read_error, - /// `icon_dir` is populated - rc_would_error_on_bitmap_version, - /// `icon_dir` is populated - max_icon_ids_exhausted, - /// `bmp_read_error` is populated - bmp_read_error, - /// `number` is populated and contains a string index for which the string contains - /// the bytes of a `u64` (native endian). The `u64` contains the number of ignored bytes. - bmp_ignored_palette_bytes, - /// `number` is populated and contains a string index for which the string contains - /// the bytes of a `u64` (native endian). The `u64` contains the number of missing bytes. - bmp_missing_palette_bytes, - /// `number` is populated and contains a string index for which the string contains - /// the bytes of a `u64` (native endian). The `u64` contains the number of miscompiled bytes. - rc_would_miscompile_bmp_palette_padding, - /// `number` is populated and contains a string index for which the string contains - /// the bytes of two `u64`s (native endian). The first contains the number of missing - /// palette bytes and the second contains the max number of missing palette bytes. - /// If type is `.note`, then `extra` is `none`. - bmp_too_many_missing_palette_bytes, - resource_header_size_exceeds_max, - resource_data_size_exceeds_max, - control_extra_data_size_exceeds_max, - version_node_size_exceeds_max, - fontdir_size_exceeds_max, - /// `number` is populated and contains a string index for the filename - number_expression_as_filename, - /// `number` is populated and contains the control ID that is a duplicate - control_id_already_defined, - /// `number` is populated and contains the disallowed codepoint - invalid_filename, - /// `statement_with_u16_param` is populated - rc_would_error_u16_with_l_suffix, - result_contains_fontdir, - /// `number` is populated and contains the ordinal value that the id would be miscompiled to - rc_would_miscompile_dialog_menu_id, - /// `number` is populated and contains the ordinal value that the value would be miscompiled to - rc_would_miscompile_dialog_class, - /// `menu_or_class` is populated and contains the type of the parameter statement - rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal, - rc_would_miscompile_dialog_menu_id_starts_with_digit, - dialog_menu_id_was_uppercased, - /// `menu_or_class` is populated and contains the type of the parameter statement - duplicate_menu_or_class_skipped, - invalid_digit_character_in_ordinal, - - // Literals - /// `number` is populated - rc_would_miscompile_codepoint_byte_swap, - /// `number` is populated - rc_would_miscompile_codepoint_skip, - tab_converted_to_spaces, - - // General (used in various places) - /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation - win32_non_ascii_ordinal, - - // Initialization - /// `file_open_error` is populated, but `filename_string_index` is not - failed_to_open_cwd, - }; - - pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void { - switch (self.err) { - .unfinished_string_literal => { - return writer.print("unfinished string literal at '{s}', expected closing '\"'", .{self.token.nameForErrorDisplay(source)}); - }, - .string_literal_too_long => { - return writer.print("string literal too long (max is currently {} characters)", .{self.extra.number}); - }, - .invalid_number_with_exponent => { - return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)}); - }, - .invalid_digit_character_in_number_literal => switch (self.type) { - .err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"), - .note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"), - .hint => return, - }, - .illegal_byte => { - return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); - }, - .illegal_byte_outside_string_literals => { - return writer.print("character '{s}' is not allowed outside of string literals", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))}); - }, - .illegal_codepoint_outside_string_literals => { - // This is somewhat hacky, but we know that: - // - This error is only possible with codepoints outside of the Windows-1252 character range - // - So, the only supported code page that could generate this error is UTF-8 - // Therefore, we just assume the token bytes are UTF-8 and decode them to get the illegal - // codepoint. - // - // FIXME: Support other code pages if they become relevant - const bytes = self.token.slice(source); - const codepoint = std.unicode.utf8Decode(bytes) catch unreachable; - return writer.print("codepoint <U+{X:0>4}> is not allowed outside of string literals", .{codepoint}); - }, - .illegal_byte_order_mark => { - return writer.writeAll("byte order mark <U+FEFF> is not allowed"); - }, - .illegal_private_use_character => { - return writer.writeAll("private use character <U+E000> is not allowed"); - }, - .found_c_style_escaped_quote => { - return writer.writeAll("escaping quotes with \\\" is not allowed (use \"\" instead)"); - }, - .code_page_pragma_missing_left_paren => { - return writer.writeAll("expected left parenthesis after 'code_page' in #pragma code_page"); - }, - .code_page_pragma_missing_right_paren => { - return writer.writeAll("expected right parenthesis after '<number>' in #pragma code_page"); - }, - .code_page_pragma_invalid_code_page => { - return writer.writeAll("invalid or unknown code page in #pragma code_page"); - }, - .code_page_pragma_not_integer => { - return writer.writeAll("code page is not a valid integer in #pragma code_page"); - }, - .code_page_pragma_overflow => { - return writer.writeAll("code page too large in #pragma code_page"); - }, - .code_page_pragma_unsupported_code_page => { - // We know that the token slice is a well-formed #pragma code_page(N), so - // we can skip to the first ( and then get the number that follows - const token_slice = self.token.slice(source); - var number_start = std.mem.indexOfScalar(u8, token_slice, '(').? + 1; - while (std.ascii.isWhitespace(token_slice[number_start])) { - number_start += 1; - } - var number_slice = token_slice[number_start..number_start]; - while (std.ascii.isDigit(token_slice[number_start + number_slice.len])) { - number_slice.len += 1; - } - const number = std.fmt.parseUnsigned(u16, number_slice, 10) catch unreachable; - const code_page = CodePage.getByIdentifier(number) catch unreachable; - // TODO: Improve or maybe add a note making it more clear that the code page - // is valid and that the code page is unsupported purely due to a limitation - // in this compiler. - return writer.print("unsupported code page '{s} (id={})' in #pragma code_page", .{ @tagName(code_page), number }); - }, - .unfinished_raw_data_block => { - return writer.print("unfinished raw data block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); - }, - .unfinished_string_table_block => { - return writer.print("unfinished STRINGTABLE block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)}); - }, - .expected_token => { - return writer.print("expected '{s}', got '{s}'", .{ self.extra.expected.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); - }, - .expected_something_else => { - try writer.writeAll("expected "); - try self.extra.expected_types.writeCommaSeparated(writer); - return writer.print("; got '{s}'", .{self.token.nameForErrorDisplay(source)}); - }, - .resource_type_cant_use_raw_data => switch (self.type) { - .err, .warning => try writer.print("expected '<filename>', found '{s}' (resource type '{s}' can't use raw data)", .{ self.token.nameForErrorDisplay(source), self.extra.resource.nameForErrorDisplay() }), - .note => try writer.print("if '{s}' is intended to be a filename, it must be specified as a quoted string literal", .{self.token.nameForErrorDisplay(source)}), - .hint => return, - }, - .id_must_be_ordinal => { - try writer.print("id of resource type '{s}' must be an ordinal (u16), got '{s}'", .{ self.extra.resource.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) }); - }, - .name_or_id_not_allowed => { - try writer.print("name or id is not allowed for resource type '{s}'", .{self.extra.resource.nameForErrorDisplay()}); - }, - .string_resource_as_numeric_type => switch (self.type) { - .err, .warning => try writer.writeAll("the number 6 (RT_STRING) cannot be used as a resource type"), - .note => try writer.writeAll("using RT_STRING directly likely results in an invalid .res file, use a STRINGTABLE instead"), - .hint => return, - }, - .ascii_character_not_equivalent_to_virtual_key_code => { - // TODO: Better wording? This is what the Win32 RC compiler emits. - // This occurs when VIRTKEY and a control code is specified ("^c", etc) - try writer.writeAll("ASCII character not equivalent to virtual key code"); - }, - .empty_menu_not_allowed => { - try writer.print("empty menu of type '{s}' not allowed", .{self.token.nameForErrorDisplay(source)}); - }, - .rc_would_miscompile_version_value_padding => switch (self.type) { - .err, .warning => return writer.print("the padding before this quoted string value would be miscompiled by the Win32 RC compiler", .{}), - .note => return writer.print("to avoid the potential miscompilation, consider adding a comma between the key and the quoted string", .{}), - .hint => return, - }, - .rc_would_miscompile_version_value_byte_count => switch (self.type) { - .err, .warning => return writer.print("the byte count of this value would be miscompiled by the Win32 RC compiler", .{}), - .note => return writer.print("to avoid the potential miscompilation, do not mix numbers and strings within a value", .{}), - .hint => return, - }, - .code_page_pragma_in_included_file => { - try writer.print("#pragma code_page is not supported in an included resource file", .{}); - }, - .nested_resource_level_exceeds_max => switch (self.type) { - .err, .warning => { - const max = switch (self.extra.resource) { - .versioninfo => parse.max_nested_version_level, - .menu, .menuex => parse.max_nested_menu_level, - else => unreachable, - }; - return writer.print("{s} contains too many nested children (max is {})", .{ self.extra.resource.nameForErrorDisplay(), max }); - }, - .note => return writer.print("max {s} nesting level exceeded here", .{self.extra.resource.nameForErrorDisplay()}), - .hint => return, - }, - .too_many_dialog_controls => switch (self.type) { - .err, .warning => return writer.print("{s} contains too many controls (max is {})", .{ self.extra.resource.nameForErrorDisplay(), std.math.maxInt(u16) }), - .note => return writer.writeAll("maximum number of controls exceeded here"), - .hint => return, - }, - .nested_expression_level_exceeds_max => switch (self.type) { - .err, .warning => return writer.print("expression contains too many syntax levels (max is {})", .{parse.max_nested_expression_level}), - .note => return writer.print("maximum expression level exceeded here", .{}), - .hint => return, - }, - .close_paren_expression => { - try writer.writeAll("the Win32 RC compiler would accept ')' as a valid expression, but it would be skipped over and potentially lead to unexpected outcomes"); - }, - .unary_plus_expression => { - try writer.writeAll("the Win32 RC compiler may accept '+' as a unary operator here, but it is not supported in this implementation; consider omitting the unary +"); - }, - .rc_could_miscompile_control_params => switch (self.type) { - .err, .warning => return writer.print("this token could be erroneously skipped over by the Win32 RC compiler", .{}), - .note => return writer.print("to avoid the potential miscompilation, consider adding a comma after the style parameter", .{}), - .hint => return, - }, - .string_already_defined => switch (self.type) { - .err, .warning => { - const language_id = self.extra.string_and_language.language.asInt(); - const language_name = language_name: { - if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| { - break :language_name @tagName(lang_enum_val); - } else |_| {} - if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) { - break :language_name "LOCALE_CUSTOM_UNSPECIFIED"; - } - break :language_name "<UNKNOWN>"; - }; - return writer.print("string with id {d} (0x{X}) already defined for language {s} (0x{X})", .{ self.extra.string_and_language.id, self.extra.string_and_language.id, language_name, language_id }); - }, - .note => return writer.print("previous definition of string with id {d} (0x{X}) here", .{ self.extra.string_and_language.id, self.extra.string_and_language.id }), - .hint => return, - }, - .font_id_already_defined => switch (self.type) { - .err => return writer.print("font with id {d} already defined", .{self.extra.number}), - .warning => return writer.print("skipped duplicate font with id {d}", .{self.extra.number}), - .note => return writer.print("previous definition of font with id {d} here", .{self.extra.number}), - .hint => return, - }, - .file_open_error => { - try writer.print("unable to open file '{s}': {s}", .{ strings[self.extra.file_open_error.filename_string_index], @tagName(self.extra.file_open_error.err) }); - }, - .invalid_accelerator_key => { - try writer.print("invalid accelerator key '{s}': {s}", .{ self.token.nameForErrorDisplay(source), @tagName(self.extra.accelerator_error.err) }); - }, - .accelerator_type_required => { - try writer.print("accelerator type [ASCII or VIRTKEY] required when key is an integer", .{}); - }, - .rc_would_miscompile_control_padding => switch (self.type) { - .err, .warning => return writer.print("the padding before this control would be miscompiled by the Win32 RC compiler (it would insert 2 extra bytes of padding)", .{}), - .note => return writer.print("to avoid the potential miscompilation, consider removing any 'control data' blocks from the controls in this dialog", .{}), - .hint => return, - }, - .rc_would_miscompile_control_class_ordinal => switch (self.type) { - .err, .warning => return writer.print("the control class of this CONTROL would be miscompiled by the Win32 RC compiler", .{}), - .note => return writer.print("to avoid the potential miscompilation, consider specifying the control class using a string (BUTTON, EDIT, etc) instead of a number", .{}), - .hint => return, - }, - .rc_would_error_on_icon_dir => switch (self.type) { - .err, .warning => return writer.print("the resource at index {} of this {s} has the format '{s}'; this would be an error in the Win32 RC compiler", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type), @tagName(self.extra.icon_dir.icon_format) }), - .note => { - // The only note supported is one specific to exactly this combination - if (!(self.extra.icon_dir.icon_type == .icon and self.extra.icon_dir.icon_format == .riff)) unreachable; - try writer.print("animated RIFF icons within resource groups may not be well supported, consider using an animated icon file (.ani) instead", .{}); - }, - .hint => return, - }, - .format_not_supported_in_icon_dir => { - try writer.print("resource with format '{s}' (at index {}) is not allowed in {s} resource groups", .{ @tagName(self.extra.icon_dir.icon_format), self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }); - }, - .icon_dir_and_resource_type_mismatch => { - const unexpected_type: rc.Resource = if (self.extra.resource == .icon) .cursor else .icon; - // TODO: Better wording - try writer.print("resource type '{s}' does not match type '{s}' specified in the file", .{ self.extra.resource.nameForErrorDisplay(), unexpected_type.nameForErrorDisplay() }); - }, - .icon_read_error => { - try writer.print("unable to read {s} file '{s}': {s}", .{ @tagName(self.extra.icon_read_error.icon_type), strings[self.extra.icon_read_error.filename_string_index], @tagName(self.extra.icon_read_error.err) }); - }, - .rc_would_error_on_bitmap_version => switch (self.type) { - .err => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this version is no longer allowed and should be upgraded to '{s}'", .{ - self.extra.icon_dir.index, - @tagName(self.extra.icon_dir.icon_type), - self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), - ico.BitmapHeader.Version.@"nt3.1".nameForErrorDisplay(), - }), - .warning => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this would be an error in the Win32 RC compiler", .{ - self.extra.icon_dir.index, - @tagName(self.extra.icon_dir.icon_type), - self.extra.icon_dir.bitmap_version.nameForErrorDisplay(), - }), - .note => unreachable, - .hint => return, - }, - .max_icon_ids_exhausted => switch (self.type) { - .err, .warning => try writer.print("maximum global icon/cursor ids exhausted (max is {})", .{std.math.maxInt(u16) - 1}), - .note => try writer.print("maximum icon/cursor id exceeded at index {} of this {s}", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }), - .hint => return, - }, - .bmp_read_error => { - try writer.print("invalid bitmap file '{s}': {s}", .{ strings[self.extra.bmp_read_error.filename_string_index], @tagName(self.extra.bmp_read_error.err) }); - }, - .bmp_ignored_palette_bytes => { - const bytes = strings[self.extra.number]; - const ignored_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); - try writer.print("bitmap has {d} extra bytes preceding the pixel data which will be ignored", .{ignored_bytes}); - }, - .bmp_missing_palette_bytes => { - const bytes = strings[self.extra.number]; - const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); - try writer.print("bitmap has {d} missing color palette bytes which will be padded with zeroes", .{missing_bytes}); - }, - .rc_would_miscompile_bmp_palette_padding => { - const bytes = strings[self.extra.number]; - const miscompiled_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); - try writer.print("the missing color palette bytes would be miscompiled by the Win32 RC compiler (the added padding bytes would include {d} bytes of the pixel data)", .{miscompiled_bytes}); - }, - .bmp_too_many_missing_palette_bytes => switch (self.type) { - .err, .warning => { - const bytes = strings[self.extra.number]; - const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian); - const max_missing_bytes = std.mem.readInt(u64, bytes[8..16], native_endian); - try writer.print("bitmap has {} missing color palette bytes which exceeds the maximum of {}", .{ missing_bytes, max_missing_bytes }); - }, - // TODO: command line option - .note => try writer.writeAll("the maximum number of missing color palette bytes is configurable via <<TODO command line option>>"), - .hint => return, - }, - .resource_header_size_exceeds_max => { - try writer.print("resource's header length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}); - }, - .resource_data_size_exceeds_max => switch (self.type) { - .err, .warning => return writer.print("resource's data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), - .note => return writer.print("maximum data length exceeded here", .{}), - .hint => return, - }, - .control_extra_data_size_exceeds_max => switch (self.type) { - .err, .warning => try writer.print("control data length exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), - .note => return writer.print("maximum control data length exceeded here", .{}), - .hint => return, - }, - .version_node_size_exceeds_max => switch (self.type) { - .err, .warning => return writer.print("version node tree size exceeds maximum of {} bytes", .{std.math.maxInt(u16)}), - .note => return writer.print("maximum tree size exceeded while writing this child", .{}), - .hint => return, - }, - .fontdir_size_exceeds_max => switch (self.type) { - .err, .warning => return writer.print("FONTDIR data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}), - .note => return writer.writeAll("this is likely due to the size of the combined lengths of the device/face names of all FONT resources"), - .hint => return, - }, - .number_expression_as_filename => switch (self.type) { - .err, .warning => return writer.writeAll("filename cannot be specified using a number expression, consider using a quoted string instead"), - .note => return writer.print("the Win32 RC compiler would evaluate this number expression as the filename '{s}'", .{strings[self.extra.number]}), - .hint => return, - }, - .control_id_already_defined => switch (self.type) { - .err, .warning => return writer.print("control with id {d} already defined for this dialog", .{self.extra.number}), - .note => return writer.print("previous definition of control with id {d} here", .{self.extra.number}), - .hint => return, - }, - .invalid_filename => { - const disallowed_codepoint = self.extra.number; - if (disallowed_codepoint < 128 and std.ascii.isPrint(@intCast(disallowed_codepoint))) { - try writer.print("evaluated filename contains a disallowed character: '{c}'", .{@as(u8, @intCast(disallowed_codepoint))}); - } else { - try writer.print("evaluated filename contains a disallowed codepoint: <U+{X:0>4}>", .{disallowed_codepoint}); - } - }, - .rc_would_error_u16_with_l_suffix => switch (self.type) { - .err, .warning => return writer.print("this {s} parameter would be an error in the Win32 RC compiler", .{@tagName(self.extra.statement_with_u16_param)}), - .note => return writer.writeAll("to avoid the error, remove any L suffixes from numbers within the parameter"), - .hint => return, - }, - .result_contains_fontdir => return, - .rc_would_miscompile_dialog_menu_id => switch (self.type) { - .err, .warning => return writer.print("the id of this menu would be miscompiled by the Win32 RC compiler", .{}), - .note => return writer.print("the Win32 RC compiler would evaluate the id as the ordinal/number value {d}", .{self.extra.number}), - .hint => return, - }, - .rc_would_miscompile_dialog_class => switch (self.type) { - .err, .warning => return writer.print("this class would be miscompiled by the Win32 RC compiler", .{}), - .note => return writer.print("the Win32 RC compiler would evaluate it as the ordinal/number value {d}", .{self.extra.number}), - .hint => return, - }, - .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal => switch (self.type) { - .err, .warning => return, - .note => return writer.print("to avoid the potential miscompilation, only specify one {s} per dialog resource", .{@tagName(self.extra.menu_or_class)}), - .hint => return, - }, - .rc_would_miscompile_dialog_menu_id_starts_with_digit => switch (self.type) { - .err, .warning => return, - .note => return writer.writeAll("to avoid the potential miscompilation, the first character of the id should not be a digit"), - .hint => return, - }, - .dialog_menu_id_was_uppercased => return, - .duplicate_menu_or_class_skipped => { - return writer.print("this {s} was ignored; when multiple {s} statements are specified, only the last takes precedence", .{ - @tagName(self.extra.menu_or_class), - @tagName(self.extra.menu_or_class), - }); - }, - .invalid_digit_character_in_ordinal => { - return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values"); - }, - .rc_would_miscompile_codepoint_byte_swap => switch (self.type) { - .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}), - .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), - .hint => return, - }, - .rc_would_miscompile_codepoint_skip => switch (self.type) { - .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the codepoint would be missing from the compiled resource)", .{self.extra.number}), - .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}), - .hint => return, - }, - .tab_converted_to_spaces => switch (self.type) { - .err, .warning => return writer.writeAll("the tab character(s) in this string will be converted into a variable number of spaces (determined by the column of the tab character in the .rc file)"), - .note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"), - .hint => return, - }, - .win32_non_ascii_ordinal => switch (self.type) { - .err, .warning => unreachable, - .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}), - .hint => return, - }, - .failed_to_open_cwd => { - try writer.print("failed to open CWD for compilation: {s}", .{@tagName(self.extra.file_open_error.err)}); - }, - } - } - - pub const VisualTokenInfo = struct { - before_len: usize, - point_offset: usize, - after_len: usize, - }; - - pub fn visualTokenInfo(self: ErrorDetails, source_line_start: usize, source_line_end: usize) VisualTokenInfo { - // Note: A perfect solution here would involve full grapheme cluster - // awareness, but oh well. This will give incorrect offsets - // if there are any multibyte codepoints within the relevant span, - // and even more inflated for grapheme clusters. - // - // We mitigate this slightly when we know we'll be pointing at - // something that displays as 1 character. - return switch (self.err) { - // These can technically be more than 1 byte depending on encoding, - // but they always refer to one visual character/grapheme. - .illegal_byte, - .illegal_byte_outside_string_literals, - .illegal_codepoint_outside_string_literals, - .illegal_byte_order_mark, - .illegal_private_use_character, - => .{ - .before_len = 0, - .point_offset = self.token.start - source_line_start, - .after_len = 0, - }, - else => .{ - .before_len = before: { - const start = @max(source_line_start, if (self.token_span_start) |span_start| span_start.start else self.token.start); - break :before self.token.start - start; - }, - .point_offset = self.token.start - source_line_start, - .after_len = after: { - const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end); - // end may be less than start when pointing to EOF - if (end <= self.token.start) break :after 0; - break :after end - self.token.start - 1; - }, - }, - }; - } -}; - -pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_config: std.io.tty.Config, cwd: std.fs.Dir, err_details: ErrorDetails, source: []const u8, strings: []const []const u8, source_mappings: ?SourceMappings) !void { - if (err_details.type == .hint) return; - - const source_line_start = err_details.token.getLineStart(source); - // Treat tab stops as 1 column wide for error display purposes, - // and add one to get a 1-based column - const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1; - - const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings != null and source_mappings.?.has(err_details.token.line_number)) - source_mappings.?.get(err_details.token.line_number) - else - null; - const corresponding_file: ?[]const u8 = if (source_mappings != null and corresponding_span != null) - source_mappings.?.files.get(corresponding_span.?.filename_offset) - else - null; - - const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number; - - try tty_config.setColor(writer, .bold); - if (corresponding_file) |file| { - try writer.writeAll(file); - } else { - try tty_config.setColor(writer, .dim); - try writer.writeAll("<after preprocessor>"); - try tty_config.setColor(writer, .reset); - try tty_config.setColor(writer, .bold); - } - try writer.print(":{d}:{d}: ", .{ err_line, column }); - switch (err_details.type) { - .err => { - try tty_config.setColor(writer, .red); - try writer.writeAll("error: "); - }, - .warning => { - try tty_config.setColor(writer, .yellow); - try writer.writeAll("warning: "); - }, - .note => { - try tty_config.setColor(writer, .cyan); - try writer.writeAll("note: "); - }, - .hint => unreachable, - } - try tty_config.setColor(writer, .reset); - try tty_config.setColor(writer, .bold); - try err_details.render(writer, source, strings); - try writer.writeByte('\n'); - try tty_config.setColor(writer, .reset); - - if (!err_details.print_source_line) { - try writer.writeByte('\n'); - return; - } - - const source_line = err_details.token.getLine(source, source_line_start); - const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len); - - // Need this to determine if the 'line originated from' note is worth printing - var source_line_for_display_buf = try std.ArrayList(u8).initCapacity(allocator, source_line.len); - defer source_line_for_display_buf.deinit(); - try writeSourceSlice(source_line_for_display_buf.writer(), source_line); - - // TODO: General handling of long lines, not tied to this specific error - if (err_details.err == .string_literal_too_long) { - const before_slice = source_line[0..@min(source_line.len, visual_info.point_offset + 16)]; - try writeSourceSlice(writer, before_slice); - try tty_config.setColor(writer, .dim); - try writer.writeAll("<...truncated...>"); - try tty_config.setColor(writer, .reset); - } else { - try writer.writeAll(source_line_for_display_buf.items); - } - try writer.writeByte('\n'); - - try tty_config.setColor(writer, .green); - const num_spaces = visual_info.point_offset - visual_info.before_len; - try writer.writeByteNTimes(' ', num_spaces); - try writer.writeByteNTimes('~', visual_info.before_len); - try writer.writeByte('^'); - if (visual_info.after_len > 0) { - var num_squiggles = visual_info.after_len; - if (err_details.err == .string_literal_too_long) { - num_squiggles = @min(num_squiggles, 15); - } - try writer.writeByteNTimes('~', num_squiggles); - } - try writer.writeByte('\n'); - try tty_config.setColor(writer, .reset); - - if (corresponding_span != null and corresponding_file != null) { - var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?); - defer corresponding_lines.deinit(allocator); - - if (!corresponding_lines.worth_printing_note) return; - - try tty_config.setColor(writer, .bold); - if (corresponding_file) |file| { - try writer.writeAll(file); - } else { - try tty_config.setColor(writer, .dim); - try writer.writeAll("<after preprocessor>"); - try tty_config.setColor(writer, .reset); - try tty_config.setColor(writer, .bold); - } - try writer.print(":{d}:{d}: ", .{ err_line, column }); - try tty_config.setColor(writer, .cyan); - try writer.writeAll("note: "); - try tty_config.setColor(writer, .reset); - try tty_config.setColor(writer, .bold); - try writer.writeAll("this line originated from line"); - if (corresponding_span.?.start_line != corresponding_span.?.end_line) { - try writer.print("s {}-{}", .{ corresponding_span.?.start_line, corresponding_span.?.end_line }); - } else { - try writer.print(" {}", .{corresponding_span.?.start_line}); - } - try writer.print(" of file '{s}'\n", .{corresponding_file.?}); - try tty_config.setColor(writer, .reset); - - if (!corresponding_lines.worth_printing_lines) return; - - if (corresponding_lines.lines_is_error_message) { - try tty_config.setColor(writer, .red); - try writer.writeAll(" | "); - try tty_config.setColor(writer, .reset); - try tty_config.setColor(writer, .dim); - try writer.writeAll(corresponding_lines.lines.items); - try tty_config.setColor(writer, .reset); - try writer.writeAll("\n\n"); - return; - } - - try writer.writeAll(corresponding_lines.lines.items); - try writer.writeAll("\n\n"); - } -} - -const CorrespondingLines = struct { - worth_printing_note: bool = true, - worth_printing_lines: bool = true, - lines: std.ArrayListUnmanaged(u8) = .{}, - lines_is_error_message: bool = false, - - pub fn init(allocator: std.mem.Allocator, cwd: std.fs.Dir, err_details: ErrorDetails, lines_for_comparison: []const u8, corresponding_span: SourceMappings.SourceSpan, corresponding_file: []const u8) !CorrespondingLines { - var corresponding_lines = CorrespondingLines{}; - - // We don't do line comparison for this error, so don't print the note if the line - // number is different - if (err_details.err == .string_literal_too_long and err_details.token.line_number == corresponding_span.start_line) { - corresponding_lines.worth_printing_note = false; - return corresponding_lines; - } - - // Don't print the originating line for this error, we know it's really long - if (err_details.err == .string_literal_too_long) { - corresponding_lines.worth_printing_lines = false; - return corresponding_lines; - } - - var writer = corresponding_lines.lines.writer(allocator); - if (utils.openFileNotDir(cwd, corresponding_file, .{})) |file| { - defer file.close(); - var buffered_reader = std.io.bufferedReader(file.reader()); - writeLinesFromStream(writer, buffered_reader.reader(), corresponding_span.start_line, corresponding_span.end_line) catch |err| switch (err) { - error.LinesNotFound => { - corresponding_lines.lines.clearRetainingCapacity(); - try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); - corresponding_lines.lines_is_error_message = true; - return corresponding_lines; - }, - else => |e| return e, - }; - } else |err| { - corresponding_lines.lines.clearRetainingCapacity(); - try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)}); - corresponding_lines.lines_is_error_message = true; - return corresponding_lines; - } - - // If the lines are the same as they were before preprocessing, skip printing the note entirely - if (std.mem.eql(u8, lines_for_comparison, corresponding_lines.lines.items)) { - corresponding_lines.worth_printing_note = false; - } - return corresponding_lines; - } - - pub fn deinit(self: *CorrespondingLines, allocator: std.mem.Allocator) void { - self.lines.deinit(allocator); - } -}; - -fn writeSourceSlice(writer: anytype, slice: []const u8) !void { - for (slice) |c| try writeSourceByte(writer, c); -} - -inline fn writeSourceByte(writer: anytype, byte: u8) !void { - switch (byte) { - '\x00'...'\x08', '\x0E'...'\x1F', '\x7F' => try writer.writeAll("�"), - // \r is seemingly ignored by the RC compiler so skipping it when printing source lines - // could help avoid confusing output (e.g. RC\rDATA if printed verbatim would show up - // in the console as DATA but the compiler reads it as RCDATA) - // - // NOTE: This is irrelevant when using the clang preprocessor, because unpaired \r - // characters get converted to \n, but may become relevant if another - // preprocessor is used instead. - '\r' => {}, - '\t', '\x0B', '\x0C' => try writer.writeByte(' '), - else => try writer.writeByte(byte), - } -} - -pub fn writeLinesFromStream(writer: anytype, input: anytype, start_line: usize, end_line: usize) !void { - var line_num: usize = 1; - while (try readByteOrEof(input)) |byte| { - switch (byte) { - '\n' => { - if (line_num == end_line) return; - if (line_num >= start_line) try writeSourceByte(writer, byte); - line_num += 1; - }, - else => { - if (line_num >= start_line) try writeSourceByte(writer, byte); - }, - } - } - if (line_num != end_line) { - return error.LinesNotFound; - } -} - -pub fn readByteOrEof(reader: anytype) !?u8 { - return reader.readByte() catch |err| switch (err) { - error.EndOfStream => return null, - else => |e| return e, - }; -} diff --git a/src/resinator/lang.zig b/src/resinator/lang.zig @@ -1,877 +0,0 @@ -const std = @import("std"); - -/// This function is specific to how the Win32 RC command line interprets -/// language IDs specified as integers. -/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed -/// - Wraps on overflow of u16 -/// - Stops parsing on any invalid hexadecimal digits -/// - Errors if a digit is not the first char -/// - `-` (negative) prefix is allowed -pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 { - var result: u16 = 0; - const radix: u8 = 16; - var buf = str; - - const Prefix = enum { none, minus }; - var prefix: Prefix = .none; - switch (buf[0]) { - '-' => { - prefix = .minus; - buf = buf[1..]; - }, - else => {}, - } - - if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') { - buf = buf[2..]; - } - - for (buf, 0..) |c, i| { - const digit = switch (c) { - // On invalid digit for the radix, just stop parsing but don't fail - 'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break, - else => { - // First digit must be valid - if (i == 0) { - return error.InvalidLanguageId; - } - break; - }, - }; - - if (result != 0) { - result *%= radix; - } - result +%= digit; - } - - switch (prefix) { - .none => {}, - .minus => result = 0 -% result, - } - - return result; -} - -test parseInt { - try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16")); - try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A")); - try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz")); - try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1")); - try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16")); - try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100")); - try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001")); - try std.testing.expectError(error.InvalidLanguageId, parseInt("--1")); - try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha")); - try std.testing.expectError(error.InvalidLanguageId, parseInt("¹")); - try std.testing.expectError(error.InvalidLanguageId, parseInt("~1")); -} - -/// This function is specific to how the Win32 RC command line interprets -/// language tags: invalid tags are rejected, but tags that don't have -/// a specific assigned ID but are otherwise valid enough will get -/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED. -pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 { - const maybe_id = try tagToId(tag); - if (maybe_id) |id| { - return @intFromEnum(id); - } else { - return LOCALE_CUSTOM_UNSPECIFIED; - } -} - -pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId { - const parsed = try parse(tag); - // There are currently no language tags with assigned IDs that have - // multiple suffixes, so we can skip the lookup. - if (parsed.multiple_suffixes) return null; - const longest_known_tag = comptime blk: { - var len = 0; - for (@typeInfo(LanguageId).Enum.fields) |field| { - if (field.name.len > len) len = field.name.len; - } - break :blk len; - }; - // If the tag is longer than the longest tag that has an assigned ID, - // then we can skip the lookup. - if (tag.len > longest_known_tag) return null; - var normalized_buf: [longest_known_tag]u8 = undefined; - // To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to - // omit the suffix, but only if the tag contains a valid alternate sort order. - const tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag; - const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf); - return std.meta.stringToEnum(LanguageId, normalized_tag) orelse { - // special case for a tag that has been mapped to the same ID - // twice. - if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) { - return LanguageId.ff_ng; - } - return null; - }; -} - -test tagToId { - try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?); - try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?); - try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?); - // Special case - try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?); -} - -test "exhaustive tagToId" { - inline for (@typeInfo(LanguageId).Enum.fields) |field| { - const id = tagToId(field.name) catch |err| { - std.debug.print("tag: {s}\n", .{field.name}); - return err; - }; - try std.testing.expectEqual(@field(LanguageId, field.name), id orelse { - std.debug.print("tag: {s}, got null\n", .{field.name}); - return error.TestExpectedEqual; - }); - } - var buf: [32]u8 = undefined; - inline for (valid_alternate_sorts) |parsed_sort| { - var fbs = std.io.fixedBufferStream(&buf); - const writer = fbs.writer(); - writer.writeAll(parsed_sort.language_code) catch unreachable; - writer.writeAll("-") catch unreachable; - writer.writeAll(parsed_sort.country_code.?) catch unreachable; - writer.writeAll("-") catch unreachable; - writer.writeAll(parsed_sort.suffix.?) catch unreachable; - const expected_field_name = comptime field: { - var name_buf: [5]u8 = undefined; - @memcpy(&name_buf[0..parsed_sort.language_code.len], parsed_sort.language_code); - name_buf[2] = '_'; - @memcpy(name_buf[3..], parsed_sort.country_code.?); - break :field name_buf; - }; - const expected = @field(LanguageId, &expected_field_name); - const id = tagToId(fbs.getWritten()) catch |err| { - std.debug.print("tag: {s}\n", .{fbs.getWritten()}); - return err; - }; - try std.testing.expectEqual(expected, id orelse { - std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected }); - return error.TestExpectedEqual; - }); - } -} - -fn normalizeTag(tag: []const u8, buf: []u8) []u8 { - std.debug.assert(buf.len >= tag.len); - for (tag, 0..) |c, i| { - if (c == '-') - buf[i] = '_' - else - buf[i] = std.ascii.toLower(c); - } - return buf[0..tag.len]; -} - -/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D -/// "When an LCID is requested for a locale without a -/// permanent LCID assignment, nor a temporary -/// assignment as above, the protocol will respond -/// with LOCALE_CUSTOM_UNSPECIFIED for all such -/// locales. Because this single value is used for -/// numerous possible locale names, it is impossible to -/// round trip this locale, even temporarily. -/// Applications should discard this value as soon as -/// possible and never persist it. If the system is -/// forced to respond to a request for -/// LCID_CUSTOM_UNSPECIFIED, it will fall back to -/// the current user locale. This is often incorrect but -/// may prevent an application or component from -/// failing. As the meaning of this temporary LCID is -/// unstable, it should never be used for interchange -/// or persisted data. This is a 1-to-many relationship -/// that is very unstable." -pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000; - -pub const LANG_ENGLISH = 0x09; -pub const SUBLANG_ENGLISH_US = 0x01; - -/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers -pub fn MAKELANGID(primary: u10, sublang: u6) u16 { - return (@as(u16, primary) << 10) | sublang; -} - -/// Language tag format expressed as a regular expression (rough approximation): -/// -/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})? -/// lang | script | country | suffix -/// -/// Notes: -/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix -/// parsing rules (e.g. `a-0` and `a-00000000` are allowed). -/// - There can also be any number of trailing suffix parts as long as they each -/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed. -/// - When doing lookups, trailing suffix parts are taken into account, e.g. -/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`. -/// - A suffix is only allowed if: -/// + Lang code is 1 char long, or -/// + A country code is present, or -/// + A script tag is not present and: -/// - the suffix is numeric-only and has a length of 3, or -/// - the lang is `qps` and the suffix is `ploca` or `plocm` -pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed { - var it = std.mem.splitAny(u8, lang_tag, "-_"); - const lang_code = it.first(); - const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code); - if (!is_valid_lang_code) return error.InvalidLanguageTag; - var parsed = Parsed{ - .language_code = lang_code, - }; - // The second part could be a script tag, a country code, or a suffix - if (it.next()) |part_str| { - // The lang code being length 1 behaves strangely, so fully special case it. - if (lang_code.len == 1) { - // This is almost certainly not the 'right' way to do this, but I don't have a method - // to determine how exactly these language tags are parsed, and it seems like - // suffix parsing rules apply generally (digits allowed, length of 1 to 8). - // - // However, because we want to be able to lookup `x-iv-mathan` normally without - // `multiple_suffixes` being set to true, we need to make sure to treat two-length - // alphabetic parts as a country code. - if (part_str.len == 2 and isAllAlphabetic(part_str)) { - parsed.country_code = part_str; - } - // Everything else, though, we can just throw into the suffix as long as the normal - // rules apply. - else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) { - parsed.suffix = part_str; - } else { - return error.InvalidLanguageTag; - } - } else if (part_str.len == 4 and isAllAlphabetic(part_str)) { - parsed.script_tag = part_str; - } else if (part_str.len == 2 and isAllAlphabetic(part_str)) { - parsed.country_code = part_str; - } - // Only a 3-len numeric suffix is allowed as the second part of a tag - else if (part_str.len == 3 and isAllNumeric(part_str)) { - parsed.suffix = part_str; - } - // Special case for qps-ploca and qps-plocm - else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and - (std.ascii.eqlIgnoreCase(part_str, "ploca") or - std.ascii.eqlIgnoreCase(part_str, "plocm"))) - { - parsed.suffix = part_str; - } else { - return error.InvalidLanguageTag; - } - } else { - // If there's no part besides a 1-len lang code, then it is malformed - if (lang_code.len == 1) return error.InvalidLanguageTag; - return parsed; - } - if (parsed.script_tag != null) { - if (it.next()) |part_str| { - if (part_str.len == 2 and isAllAlphabetic(part_str)) { - parsed.country_code = part_str; - } else { - // Suffix is not allowed when a country code is not present. - return error.InvalidLanguageTag; - } - } else { - return parsed; - } - } - // We've now parsed any potential script tag/country codes, so anything remaining - // is a suffix - while (it.next()) |part_str| { - if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) { - return error.InvalidLanguageTag; - } - if (parsed.suffix == null) { - parsed.suffix = part_str; - } else { - // In theory we could return early here but we still want to validate - // that each part is a valid suffix all the way to the end, e.g. - // we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!` - // suffix part. - parsed.multiple_suffixes = true; - } - } - return parsed; -} - -pub const Parsed = struct { - language_code: []const u8, - script_tag: ?[]const u8 = null, - country_code: ?[]const u8 = null, - /// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc - suffix: ?[]const u8 = null, - /// There can be any number of suffixes, but we don't need to care what their - /// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah` - /// can be seen as different from `ca-es-valencia`. Storing this as a bool - /// allows us to avoid needing either (a) dynamic allocation or (b) a limit to - /// the number of suffixes allowed when parsing. - multiple_suffixes: bool = false, - - pub fn isSuffixValidSortOrder(self: Parsed) bool { - if (self.country_code == null) return false; - if (self.suffix == null) return false; - if (self.script_tag != null) return false; - if (self.multiple_suffixes) return false; - for (valid_alternate_sorts) |valid_sort| { - if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and - std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and - std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?)) - { - return true; - } - } - return false; - } -}; - -/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f -/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table." -const valid_alternate_sorts = [_]Parsed{ - // Note: x-IV-mathan is omitted due to how lookups are implemented. - // This table is used to make e.g. `de-de_phoneb` get looked up - // as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan - // instead needs to be looked up with the suffix included because - // `x-iv` is not a tag with an assigned ID. - .{ .language_code = "de", .country_code = "de", .suffix = "phoneb" }, - .{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" }, - .{ .language_code = "ka", .country_code = "ge", .suffix = "modern" }, - .{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" }, - .{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" }, - .{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" }, - .{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" }, - .{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" }, - .{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" }, - .{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" }, - .{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" }, - .{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" }, - .{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" }, -}; - -test "parse" { - try std.testing.expectEqualDeep(Parsed{ - .language_code = "en", - }, try parse("en")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "en", - .country_code = "us", - }, try parse("en-us")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "en", - .suffix = "123", - }, try parse("en-123")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "en", - .suffix = "123", - .multiple_suffixes = true, - }, try parse("en-123-blah")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "en", - .country_code = "us", - .suffix = "123", - .multiple_suffixes = true, - }, try parse("en-us_123-blah")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "eng", - .script_tag = "Latn", - }, try parse("eng-Latn")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "eng", - .script_tag = "Latn", - }, try parse("eng-Latn")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "ff", - .script_tag = "Latn", - .country_code = "NG", - }, try parse("ff-Latn-NG")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "qps", - .suffix = "Plocm", - }, try parse("qps-Plocm")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "qps", - .suffix = "ploca", - }, try parse("qps-ploca")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "x", - .country_code = "IV", - .suffix = "mathan", - }, try parse("x-IV-mathan")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "a", - .suffix = "a", - }, try parse("a-a")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "a", - .suffix = "000", - }, try parse("a-000")); - try std.testing.expectEqualDeep(Parsed{ - .language_code = "a", - .suffix = "00000000", - }, try parse("a-00000000")); - // suffix not allowed if script tag is present without country code - try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix")); - // suffix must be 3 numeric digits if neither script tag nor country code is present - try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix")); - try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm")); - // 1-len lang code is not allowed if it's the only part - try std.testing.expectError(error.InvalidLanguageTag, parse("e")); -} - -fn isAllAlphabetic(str: []const u8) bool { - for (str) |c| { - if (!std.ascii.isAlphabetic(c)) return false; - } - return true; -} - -fn isAllAlphanumeric(str: []const u8) bool { - for (str) |c| { - if (!std.ascii.isAlphanumeric(c)) return false; - } - return true; -} - -fn isAllNumeric(str: []const u8) bool { - for (str) |c| { - if (!std.ascii.isDigit(c)) return false; - } - return true; -} - -/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f -/// - Protocol Revision: 15.0 -/// - Language / Language ID / Language Tag table in Appendix A -/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED) -/// - Normalized each language tag (lowercased, replaced all `-` with `_`) -/// - There is one special case where two tags are mapped to the same ID, the following -/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467. -/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria -/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts -/// table as 0x007F (LANG_INVARIANT). -pub const LanguageId = enum(u16) { - // Language tag = Language ID, // Language, Location (or type) - af = 0x0036, // Afrikaans - af_za = 0x0436, // Afrikaans, South Africa - sq = 0x001C, // Albanian - sq_al = 0x041C, // Albanian, Albania - gsw = 0x0084, // Alsatian - gsw_fr = 0x0484, // Alsatian, France - am = 0x005E, // Amharic - am_et = 0x045E, // Amharic, Ethiopia - ar = 0x0001, // Arabic - ar_dz = 0x1401, // Arabic, Algeria - ar_bh = 0x3C01, // Arabic, Bahrain - ar_eg = 0x0c01, // Arabic, Egypt - ar_iq = 0x0801, // Arabic, Iraq - ar_jo = 0x2C01, // Arabic, Jordan - ar_kw = 0x3401, // Arabic, Kuwait - ar_lb = 0x3001, // Arabic, Lebanon - ar_ly = 0x1001, // Arabic, Libya - ar_ma = 0x1801, // Arabic, Morocco - ar_om = 0x2001, // Arabic, Oman - ar_qa = 0x4001, // Arabic, Qatar - ar_sa = 0x0401, // Arabic, Saudi Arabia - ar_sy = 0x2801, // Arabic, Syria - ar_tn = 0x1C01, // Arabic, Tunisia - ar_ae = 0x3801, // Arabic, U.A.E. - ar_ye = 0x2401, // Arabic, Yemen - hy = 0x002B, // Armenian - hy_am = 0x042B, // Armenian, Armenia - as = 0x004D, // Assamese - as_in = 0x044D, // Assamese, India - az_cyrl = 0x742C, // Azerbaijani (Cyrillic) - az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan - az = 0x002C, // Azerbaijani (Latin) - az_latn = 0x782C, // Azerbaijani (Latin) - az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan - bn = 0x0045, // Bangla - bn_bd = 0x0845, // Bangla, Bangladesh - bn_in = 0x0445, // Bangla, India - ba = 0x006D, // Bashkir - ba_ru = 0x046D, // Bashkir, Russia - eu = 0x002D, // Basque - eu_es = 0x042D, // Basque, Spain - be = 0x0023, // Belarusian - be_by = 0x0423, // Belarusian, Belarus - bs_cyrl = 0x641A, // Bosnian (Cyrillic) - bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina - bs_latn = 0x681A, // Bosnian (Latin) - bs = 0x781A, // Bosnian (Latin) - bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina - br = 0x007E, // Breton - br_fr = 0x047E, // Breton, France - bg = 0x0002, // Bulgarian - bg_bg = 0x0402, // Bulgarian, Bulgaria - my = 0x0055, // Burmese - my_mm = 0x0455, // Burmese, Myanmar - ca = 0x0003, // Catalan - ca_es = 0x0403, // Catalan, Spain - tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco - ku = 0x0092, // Central Kurdish - ku_arab = 0x7c92, // Central Kurdish - ku_arab_iq = 0x0492, // Central Kurdish, Iraq - chr = 0x005C, // Cherokee - chr_cher = 0x7c5C, // Cherokee - chr_cher_us = 0x045C, // Cherokee, United States - zh_hans = 0x0004, // Chinese (Simplified) - zh = 0x7804, // Chinese (Simplified) - zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China - zh_sg = 0x1004, // Chinese (Simplified), Singapore - zh_hant = 0x7C04, // Chinese (Traditional) - zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R. - zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R. - zh_tw = 0x0404, // Chinese (Traditional), Taiwan - co = 0x0083, // Corsican - co_fr = 0x0483, // Corsican, France - hr = 0x001A, // Croatian - hr_hr = 0x041A, // Croatian, Croatia - hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina - cs = 0x0005, // Czech - cs_cz = 0x0405, // Czech, Czech Republic - da = 0x0006, // Danish - da_dk = 0x0406, // Danish, Denmark - prs = 0x008C, // Dari - prs_af = 0x048C, // Dari, Afghanistan - dv = 0x0065, // Divehi - dv_mv = 0x0465, // Divehi, Maldives - nl = 0x0013, // Dutch - nl_be = 0x0813, // Dutch, Belgium - nl_nl = 0x0413, // Dutch, Netherlands - dz_bt = 0x0C51, // Dzongkha, Bhutan - en = 0x0009, // English - en_au = 0x0C09, // English, Australia - en_bz = 0x2809, // English, Belize - en_ca = 0x1009, // English, Canada - en_029 = 0x2409, // English, Caribbean - en_hk = 0x3C09, // English, Hong Kong - en_in = 0x4009, // English, India - en_ie = 0x1809, // English, Ireland - en_jm = 0x2009, // English, Jamaica - en_my = 0x4409, // English, Malaysia - en_nz = 0x1409, // English, New Zealand - en_ph = 0x3409, // English, Republic of the Philippines - en_sg = 0x4809, // English, Singapore - en_za = 0x1C09, // English, South Africa - en_tt = 0x2c09, // English, Trinidad and Tobago - en_ae = 0x4C09, // English, United Arab Emirates - en_gb = 0x0809, // English, United Kingdom - en_us = 0x0409, // English, United States - en_zw = 0x3009, // English, Zimbabwe - et = 0x0025, // Estonian - et_ee = 0x0425, // Estonian, Estonia - fo = 0x0038, // Faroese - fo_fo = 0x0438, // Faroese, Faroe Islands - fil = 0x0064, // Filipino - fil_ph = 0x0464, // Filipino, Philippines - fi = 0x000B, // Finnish - fi_fi = 0x040B, // Finnish, Finland - fr = 0x000C, // French - fr_be = 0x080C, // French, Belgium - fr_cm = 0x2c0C, // French, Cameroon - fr_ca = 0x0c0C, // French, Canada - fr_029 = 0x1C0C, // French, Caribbean - fr_cd = 0x240C, // French, Congo, DRC - fr_ci = 0x300C, // French, Côte d'Ivoire - fr_fr = 0x040C, // French, France - fr_ht = 0x3c0C, // French, Haiti - fr_lu = 0x140C, // French, Luxembourg - fr_ml = 0x340C, // French, Mali - fr_ma = 0x380C, // French, Morocco - fr_mc = 0x180C, // French, Principality of Monaco - fr_re = 0x200C, // French, Reunion - fr_sn = 0x280C, // French, Senegal - fr_ch = 0x100C, // French, Switzerland - fy = 0x0062, // Frisian - fy_nl = 0x0462, // Frisian, Netherlands - ff = 0x0067, // Fulah - ff_latn = 0x7C67, // Fulah (Latin) - ff_ng = 0x0467, // Fulah, Nigeria - ff_latn_sn = 0x0867, // Fulah, Senegal - gl = 0x0056, // Galician - gl_es = 0x0456, // Galician, Spain - ka = 0x0037, // Georgian - ka_ge = 0x0437, // Georgian, Georgia - de = 0x0007, // German - de_at = 0x0C07, // German, Austria - de_de = 0x0407, // German, Germany - de_li = 0x1407, // German, Liechtenstein - de_lu = 0x1007, // German, Luxembourg - de_ch = 0x0807, // German, Switzerland - el = 0x0008, // Greek - el_gr = 0x0408, // Greek, Greece - kl = 0x006F, // Greenlandic - kl_gl = 0x046F, // Greenlandic, Greenland - gn = 0x0074, // Guarani - gn_py = 0x0474, // Guarani, Paraguay - gu = 0x0047, // Gujarati - gu_in = 0x0447, // Gujarati, India - ha = 0x0068, // Hausa (Latin) - ha_latn = 0x7C68, // Hausa (Latin) - ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria - haw = 0x0075, // Hawaiian - haw_us = 0x0475, // Hawaiian, United States - he = 0x000D, // Hebrew - he_il = 0x040D, // Hebrew, Israel - hi = 0x0039, // Hindi - hi_in = 0x0439, // Hindi, India - hu = 0x000E, // Hungarian - hu_hu = 0x040E, // Hungarian, Hungary - is = 0x000F, // Icelandic - is_is = 0x040F, // Icelandic, Iceland - ig = 0x0070, // Igbo - ig_ng = 0x0470, // Igbo, Nigeria - id = 0x0021, // Indonesian - id_id = 0x0421, // Indonesian, Indonesia - iu = 0x005D, // Inuktitut (Latin) - iu_latn = 0x7C5D, // Inuktitut (Latin) - iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada - iu_cans = 0x785D, // Inuktitut (Syllabics) - iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada - ga = 0x003C, // Irish - ga_ie = 0x083C, // Irish, Ireland - it = 0x0010, // Italian - it_it = 0x0410, // Italian, Italy - it_ch = 0x0810, // Italian, Switzerland - ja = 0x0011, // Japanese - ja_jp = 0x0411, // Japanese, Japan - kn = 0x004B, // Kannada - kn_in = 0x044B, // Kannada, India - kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria - ks = 0x0060, // Kashmiri - ks_arab = 0x0460, // Kashmiri, Perso-Arabic - ks_deva_in = 0x0860, // Kashmiri (Devanagari), India - kk = 0x003F, // Kazakh - kk_kz = 0x043F, // Kazakh, Kazakhstan - km = 0x0053, // Khmer - km_kh = 0x0453, // Khmer, Cambodia - quc = 0x0086, // K'iche - quc_latn_gt = 0x0486, // K'iche, Guatemala - rw = 0x0087, // Kinyarwanda - rw_rw = 0x0487, // Kinyarwanda, Rwanda - sw = 0x0041, // Kiswahili - sw_ke = 0x0441, // Kiswahili, Kenya - kok = 0x0057, // Konkani - kok_in = 0x0457, // Konkani, India - ko = 0x0012, // Korean - ko_kr = 0x0412, // Korean, Korea - ky = 0x0040, // Kyrgyz - ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan - lo = 0x0054, // Lao - lo_la = 0x0454, // Lao, Lao P.D.R. - la_va = 0x0476, // Latin, Vatican City - lv = 0x0026, // Latvian - lv_lv = 0x0426, // Latvian, Latvia - lt = 0x0027, // Lithuanian - lt_lt = 0x0427, // Lithuanian, Lithuania - dsb = 0x7C2E, // Lower Sorbian - dsb_de = 0x082E, // Lower Sorbian, Germany - lb = 0x006E, // Luxembourgish - lb_lu = 0x046E, // Luxembourgish, Luxembourg - mk = 0x002F, // Macedonian - mk_mk = 0x042F, // Macedonian, North Macedonia - ms = 0x003E, // Malay - ms_bn = 0x083E, // Malay, Brunei Darussalam - ms_my = 0x043E, // Malay, Malaysia - ml = 0x004C, // Malayalam - ml_in = 0x044C, // Malayalam, India - mt = 0x003A, // Maltese - mt_mt = 0x043A, // Maltese, Malta - mi = 0x0081, // Maori - mi_nz = 0x0481, // Maori, New Zealand - arn = 0x007A, // Mapudungun - arn_cl = 0x047A, // Mapudungun, Chile - mr = 0x004E, // Marathi - mr_in = 0x044E, // Marathi, India - moh = 0x007C, // Mohawk - moh_ca = 0x047C, // Mohawk, Canada - mn = 0x0050, // Mongolian (Cyrillic) - mn_cyrl = 0x7850, // Mongolian (Cyrillic) - mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia - mn_mong = 0x7C50, // Mongolian (Traditional Mongolian) - mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China - mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia - ne = 0x0061, // Nepali - ne_in = 0x0861, // Nepali, India - ne_np = 0x0461, // Nepali, Nepal - no = 0x0014, // Norwegian (Bokmal) - nb = 0x7C14, // Norwegian (Bokmal) - nb_no = 0x0414, // Norwegian (Bokmal), Norway - nn = 0x7814, // Norwegian (Nynorsk) - nn_no = 0x0814, // Norwegian (Nynorsk), Norway - oc = 0x0082, // Occitan - oc_fr = 0x0482, // Occitan, France - @"or" = 0x0048, // Odia - or_in = 0x0448, // Odia, India - om = 0x0072, // Oromo - om_et = 0x0472, // Oromo, Ethiopia - ps = 0x0063, // Pashto - ps_af = 0x0463, // Pashto, Afghanistan - fa = 0x0029, // Persian - fa_ir = 0x0429, // Persian, Iran - pl = 0x0015, // Polish - pl_pl = 0x0415, // Polish, Poland - pt = 0x0016, // Portuguese - pt_br = 0x0416, // Portuguese, Brazil - pt_pt = 0x0816, // Portuguese, Portugal - qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing - qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing - qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales - pa = 0x0046, // Punjabi - pa_arab = 0x7C46, // Punjabi - pa_in = 0x0446, // Punjabi, India - pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan - quz = 0x006B, // Quechua - quz_bo = 0x046B, // Quechua, Bolivia - quz_ec = 0x086B, // Quechua, Ecuador - quz_pe = 0x0C6B, // Quechua, Peru - ro = 0x0018, // Romanian - ro_md = 0x0818, // Romanian, Moldova - ro_ro = 0x0418, // Romanian, Romania - rm = 0x0017, // Romansh - rm_ch = 0x0417, // Romansh, Switzerland - ru = 0x0019, // Russian - ru_md = 0x0819, // Russian, Moldova - ru_ru = 0x0419, // Russian, Russia - sah = 0x0085, // Sakha - sah_ru = 0x0485, // Sakha, Russia - smn = 0x703B, // Sami (Inari) - smn_fi = 0x243B, // Sami (Inari), Finland - smj = 0x7C3B, // Sami (Lule) - smj_no = 0x103B, // Sami (Lule), Norway - smj_se = 0x143B, // Sami (Lule), Sweden - se = 0x003B, // Sami (Northern) - se_fi = 0x0C3B, // Sami (Northern), Finland - se_no = 0x043B, // Sami (Northern), Norway - se_se = 0x083B, // Sami (Northern), Sweden - sms = 0x743B, // Sami (Skolt) - sms_fi = 0x203B, // Sami (Skolt), Finland - sma = 0x783B, // Sami (Southern) - sma_no = 0x183B, // Sami (Southern), Norway - sma_se = 0x1C3B, // Sami (Southern), Sweden - sa = 0x004F, // Sanskrit - sa_in = 0x044F, // Sanskrit, India - gd = 0x0091, // Scottish Gaelic - gd_gb = 0x0491, // Scottish Gaelic, United Kingdom - sr_cyrl = 0x6C1A, // Serbian (Cyrillic) - sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina - sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro - sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia - sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former) - sr_latn = 0x701A, // Serbian (Latin) - sr = 0x7C1A, // Serbian (Latin) - sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina - sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro - sr_latn_rs = 0x241A, // Serbian (Latin), Serbia - sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former) - nso = 0x006C, // Sesotho sa Leboa - nso_za = 0x046C, // Sesotho sa Leboa, South Africa - tn = 0x0032, // Setswana - tn_bw = 0x0832, // Setswana, Botswana - tn_za = 0x0432, // Setswana, South Africa - sd = 0x0059, // Sindhi - sd_arab = 0x7C59, // Sindhi - sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan - si = 0x005B, // Sinhala - si_lk = 0x045B, // Sinhala, Sri Lanka - sk = 0x001B, // Slovak - sk_sk = 0x041B, // Slovak, Slovakia - sl = 0x0024, // Slovenian - sl_si = 0x0424, // Slovenian, Slovenia - so = 0x0077, // Somali - so_so = 0x0477, // Somali, Somalia - st = 0x0030, // Sotho - st_za = 0x0430, // Sotho, South Africa - es = 0x000A, // Spanish - es_ar = 0x2C0A, // Spanish, Argentina - es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela - es_bo = 0x400A, // Spanish, Bolivia - es_cl = 0x340A, // Spanish, Chile - es_co = 0x240A, // Spanish, Colombia - es_cr = 0x140A, // Spanish, Costa Rica - es_cu = 0x5c0A, // Spanish, Cuba - es_do = 0x1c0A, // Spanish, Dominican Republic - es_ec = 0x300A, // Spanish, Ecuador - es_sv = 0x440A, // Spanish, El Salvador - es_gt = 0x100A, // Spanish, Guatemala - es_hn = 0x480A, // Spanish, Honduras - es_419 = 0x580A, // Spanish, Latin America - es_mx = 0x080A, // Spanish, Mexico - es_ni = 0x4C0A, // Spanish, Nicaragua - es_pa = 0x180A, // Spanish, Panama - es_py = 0x3C0A, // Spanish, Paraguay - es_pe = 0x280A, // Spanish, Peru - es_pr = 0x500A, // Spanish, Puerto Rico - es_es_tradnl = 0x040A, // Spanish, Spain - es_es = 0x0c0A, // Spanish, Spain - es_us = 0x540A, // Spanish, United States - es_uy = 0x380A, // Spanish, Uruguay - sv = 0x001D, // Swedish - sv_fi = 0x081D, // Swedish, Finland - sv_se = 0x041D, // Swedish, Sweden - syr = 0x005A, // Syriac - syr_sy = 0x045A, // Syriac, Syria - tg = 0x0028, // Tajik (Cyrillic) - tg_cyrl = 0x7C28, // Tajik (Cyrillic) - tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan - tzm = 0x005F, // Tamazight (Latin) - tzm_latn = 0x7C5F, // Tamazight (Latin) - tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria - ta = 0x0049, // Tamil - ta_in = 0x0449, // Tamil, India - ta_lk = 0x0849, // Tamil, Sri Lanka - tt = 0x0044, // Tatar - tt_ru = 0x0444, // Tatar, Russia - te = 0x004A, // Telugu - te_in = 0x044A, // Telugu, India - th = 0x001E, // Thai - th_th = 0x041E, // Thai, Thailand - bo = 0x0051, // Tibetan - bo_cn = 0x0451, // Tibetan, People's Republic of China - ti = 0x0073, // Tigrinya - ti_er = 0x0873, // Tigrinya, Eritrea - ti_et = 0x0473, // Tigrinya, Ethiopia - ts = 0x0031, // Tsonga - ts_za = 0x0431, // Tsonga, South Africa - tr = 0x001F, // Turkish - tr_tr = 0x041F, // Turkish, Turkey - tk = 0x0042, // Turkmen - tk_tm = 0x0442, // Turkmen, Turkmenistan - uk = 0x0022, // Ukrainian - uk_ua = 0x0422, // Ukrainian, Ukraine - hsb = 0x002E, // Upper Sorbian - hsb_de = 0x042E, // Upper Sorbian, Germany - ur = 0x0020, // Urdu - ur_in = 0x0820, // Urdu, India - ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan - ug = 0x0080, // Uyghur - ug_cn = 0x0480, // Uyghur, People's Republic of China - uz_cyrl = 0x7843, // Uzbek (Cyrillic) - uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan - uz = 0x0043, // Uzbek (Latin) - uz_latn = 0x7C43, // Uzbek (Latin) - uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan - ca_es_valencia = 0x0803, // Valencian, Spain - ve = 0x0033, // Venda - ve_za = 0x0433, // Venda, South Africa - vi = 0x002A, // Vietnamese - vi_vn = 0x042A, // Vietnamese, Vietnam - cy = 0x0052, // Welsh - cy_gb = 0x0452, // Welsh, United Kingdom - wo = 0x0088, // Wolof - wo_sn = 0x0488, // Wolof, Senegal - xh = 0x0034, // Xhosa - xh_za = 0x0434, // Xhosa, South Africa - ii = 0x0078, // Yi - ii_cn = 0x0478, // Yi, People's Republic of China - yi_001 = 0x043D, // Yiddish, World - yo = 0x006A, // Yoruba - yo_ng = 0x046A, // Yoruba, Nigeria - zu = 0x0035, // Zulu - zu_za = 0x0435, // Zulu, South Africa - - /// Special case - x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting" -}; diff --git a/src/resinator/lex.zig b/src/resinator/lex.zig @@ -1,1098 +0,0 @@ -//! Expects to be run after the C preprocessor and after `removeComments`. -//! This means that the lexer assumes that: -//! - Splices ('\' at the end of a line) have been handled/collapsed. -//! - Preprocessor directives and macros have been expanded (any remaining should be skipped with the exception of `#pragma code_page`). -//! - All comments have been removed. - -const std = @import("std"); -const ErrorDetails = @import("errors.zig").ErrorDetails; -const columnWidth = @import("literals.zig").columnWidth; -const code_pages = @import("code_pages.zig"); -const CodePage = code_pages.CodePage; -const SourceMappings = @import("source_mapping.zig").SourceMappings; -const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; - -const dumpTokensDuringTests = false; - -pub const default_max_string_literal_codepoints = 4097; - -pub const Token = struct { - id: Id, - start: usize, - end: usize, - line_number: usize, - - pub const Id = enum { - literal, - number, - quoted_ascii_string, - quoted_wide_string, - operator, - begin, - end, - comma, - open_paren, - close_paren, - /// This Id is only used for errors, the Lexer will never return one - /// of these from a `next` call. - preprocessor_command, - invalid, - eof, - - pub fn nameForErrorDisplay(self: Id) []const u8 { - return switch (self) { - .literal => "<literal>", - .number => "<number>", - .quoted_ascii_string => "<quoted ascii string>", - .quoted_wide_string => "<quoted wide string>", - .operator => "<operator>", - .begin => "<'{' or BEGIN>", - .end => "<'}' or END>", - .comma => ",", - .open_paren => "(", - .close_paren => ")", - .preprocessor_command => "<preprocessor command>", - .invalid => unreachable, - .eof => "<eof>", - }; - } - }; - - pub fn slice(self: Token, buffer: []const u8) []const u8 { - return buffer[self.start..self.end]; - } - - pub fn nameForErrorDisplay(self: Token, buffer: []const u8) []const u8 { - return switch (self.id) { - .eof => self.id.nameForErrorDisplay(), - else => self.slice(buffer), - }; - } - - /// Returns 0-based column - pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize { - const line_start = maybe_line_start orelse token.getLineStart(source); - - var i: usize = line_start; - var column: usize = 0; - while (i < token.start) : (i += 1) { - column += columnWidth(column, source[i], tab_columns); - } - return column; - } - - // TODO: This doesn't necessarily match up with how we count line numbers, but where a line starts - // has a knock-on effect on calculateColumn. More testing is needed to determine what needs - // to be changed to make this both (1) match how line numbers are counted and (2) match how - // the Win32 RC compiler counts tab columns. - // - // (the TODO in currentIndexFormsLineEndingPair should be taken into account as well) - pub fn getLineStart(token: Token, source: []const u8) usize { - const line_start = line_start: { - if (token.start != 0) { - // start checking at the byte before the token - var index = token.start - 1; - while (true) { - if (source[index] == '\n') break :line_start @min(source.len - 1, index + 1); - if (index != 0) index -= 1 else break; - } - } - break :line_start 0; - }; - return line_start; - } - - pub fn getLine(token: Token, source: []const u8, maybe_line_start: ?usize) []const u8 { - const line_start = maybe_line_start orelse token.getLineStart(source); - - var line_end = line_start + 1; - if (line_end >= source.len or source[line_end] == '\n') return source[line_start..line_start]; - while (line_end < source.len and source[line_end] != '\n') : (line_end += 1) {} - while (line_end > 0 and source[line_end - 1] == '\r') : (line_end -= 1) {} - - return source[line_start..line_end]; - } - - pub fn isStringLiteral(token: Token) bool { - return token.id == .quoted_ascii_string or token.id == .quoted_wide_string; - } -}; - -pub const LineHandler = struct { - line_number: usize = 1, - buffer: []const u8, - last_line_ending_index: ?usize = null, - - /// Like incrementLineNumber but checks that the current char is a line ending first. - /// Returns the new line number if it was incremented, null otherwise. - pub fn maybeIncrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { - const c = self.buffer[cur_index]; - if (c == '\r' or c == '\n') { - return self.incrementLineNumber(cur_index); - } - return null; - } - - /// Increments line_number appropriately (handling line ending pairs) - /// and returns the new line number if it was incremented, or null otherwise. - pub fn incrementLineNumber(self: *LineHandler, cur_index: usize) ?usize { - if (self.currentIndexFormsLineEndingPair(cur_index)) { - self.last_line_ending_index = null; - return null; - } else { - self.line_number += 1; - self.last_line_ending_index = cur_index; - return self.line_number; - } - } - - /// \r\n and \n\r pairs are treated as a single line ending (but not \r\r \n\n) - /// expects self.index and last_line_ending_index (if non-null) to contain line endings - /// - /// TODO: This is not really how the Win32 RC compiler handles line endings. Instead, it - /// seems to drop all carriage returns during preprocessing and then replace all - /// remaining line endings with well-formed CRLF pairs (e.g. `<CR>a<CR>b<LF>c` becomes `ab<CR><LF>c`). - /// Handling this the same as the Win32 RC compiler would need control over the preprocessor, - /// since Clang converts unpaired <CR> into unpaired <LF>. - pub fn currentIndexFormsLineEndingPair(self: *const LineHandler, cur_index: usize) bool { - if (self.last_line_ending_index == null) return false; - - // must immediately precede the current index, we know cur_index must - // be >= 1 since last_line_ending_index is non-null (so if the subtraction - // overflows it is a bug at the callsite of this function). - if (self.last_line_ending_index.? != cur_index - 1) return false; - - const cur_line_ending = self.buffer[cur_index]; - const last_line_ending = self.buffer[self.last_line_ending_index.?]; - - // sanity check - std.debug.assert(cur_line_ending == '\r' or cur_line_ending == '\n'); - std.debug.assert(last_line_ending == '\r' or last_line_ending == '\n'); - - // can't be \n\n or \r\r - if (last_line_ending == cur_line_ending) return false; - - return true; - } -}; - -pub const LexError = error{ - UnfinishedStringLiteral, - StringLiteralTooLong, - InvalidNumberWithExponent, - InvalidDigitCharacterInNumberLiteral, - IllegalByte, - IllegalByteOutsideStringLiterals, - IllegalCodepointOutsideStringLiterals, - IllegalByteOrderMark, - IllegalPrivateUseCharacter, - FoundCStyleEscapedQuote, - CodePagePragmaMissingLeftParen, - CodePagePragmaMissingRightParen, - /// Can be caught and ignored - CodePagePragmaInvalidCodePage, - CodePagePragmaNotInteger, - CodePagePragmaOverflow, - CodePagePragmaUnsupportedCodePage, - /// Can be caught and ignored - CodePagePragmaInIncludedFile, -}; - -pub const Lexer = struct { - const Self = @This(); - - buffer: []const u8, - index: usize, - line_handler: LineHandler, - at_start_of_line: bool = true, - error_context_token: ?Token = null, - current_code_page: CodePage, - default_code_page: CodePage, - source_mappings: ?*SourceMappings, - max_string_literal_codepoints: u15, - /// Needed to determine whether or not the output code page should - /// be set in the parser. - seen_pragma_code_pages: u2 = 0, - - pub const Error = LexError; - - pub const LexerOptions = struct { - default_code_page: CodePage = .windows1252, - source_mappings: ?*SourceMappings = null, - max_string_literal_codepoints: u15 = default_max_string_literal_codepoints, - }; - - pub fn init(buffer: []const u8, options: LexerOptions) Self { - return Self{ - .buffer = buffer, - .index = 0, - .current_code_page = options.default_code_page, - .default_code_page = options.default_code_page, - .source_mappings = options.source_mappings, - .max_string_literal_codepoints = options.max_string_literal_codepoints, - .line_handler = .{ .buffer = buffer }, - }; - } - - pub fn dump(self: *Self, token: *const Token) void { - std.debug.print("{s}:{d}: {s}\n", .{ @tagName(token.id), token.line_number, std.fmt.fmtSliceEscapeLower(token.slice(self.buffer)) }); - } - - pub const LexMethod = enum { - whitespace_delimiter_only, - normal, - normal_expect_operator, - }; - - pub fn next(self: *Self, comptime method: LexMethod) LexError!Token { - switch (method) { - .whitespace_delimiter_only => return self.nextWhitespaceDelimeterOnly(), - .normal => return self.nextNormal(), - .normal_expect_operator => return self.nextNormalWithContext(.expect_operator), - } - } - - const StateWhitespaceDelimiterOnly = enum { - start, - literal, - preprocessor, - semicolon, - }; - - pub fn nextWhitespaceDelimeterOnly(self: *Self) LexError!Token { - const start_index = self.index; - var result = Token{ - .id = .eof, - .start = start_index, - .end = undefined, - .line_number = self.line_handler.line_number, - }; - var state = StateWhitespaceDelimiterOnly.start; - - while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { - const c = codepoint.value; - try self.checkForIllegalCodepoint(codepoint, false); - switch (state) { - .start => switch (c) { - '\r', '\n' => { - result.start = self.index + 1; - result.line_number = self.incrementLineNumber(); - }, - ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { - result.start = self.index + 1; - }, - // NBSP only counts as whitespace at the start of a line (but - // can be intermixed with other whitespace). Who knows why. - '\xA0' => if (self.at_start_of_line) { - result.start = self.index + codepoint.byte_len; - } else { - state = .literal; - self.at_start_of_line = false; - }, - '#' => { - if (self.at_start_of_line) { - state = .preprocessor; - } else { - state = .literal; - } - self.at_start_of_line = false; - }, - // Semi-colon acts as a line-terminator, but in this lexing mode - // that's only true if it's at the start of a line. - ';' => { - if (self.at_start_of_line) { - state = .semicolon; - } - self.at_start_of_line = false; - }, - else => { - state = .literal; - self.at_start_of_line = false; - }, - }, - .literal => switch (c) { - '\r', '\n', ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { - result.id = .literal; - break; - }, - else => {}, - }, - .preprocessor => switch (c) { - '\r', '\n' => { - try self.evaluatePreprocessorCommand(result.start, self.index); - result.start = self.index + 1; - state = .start; - result.line_number = self.incrementLineNumber(); - }, - else => {}, - }, - .semicolon => switch (c) { - '\r', '\n' => { - result.start = self.index + 1; - state = .start; - result.line_number = self.incrementLineNumber(); - }, - else => {}, - }, - } - } else { // got EOF - switch (state) { - .start, .semicolon => {}, - .literal => { - result.id = .literal; - }, - .preprocessor => { - try self.evaluatePreprocessorCommand(result.start, self.index); - result.start = self.index; - }, - } - } - - result.end = self.index; - return result; - } - - const StateNormal = enum { - start, - literal_or_quoted_wide_string, - quoted_ascii_string, - quoted_wide_string, - quoted_ascii_string_escape, - quoted_wide_string_escape, - quoted_ascii_string_maybe_end, - quoted_wide_string_maybe_end, - literal, - number_literal, - preprocessor, - semicolon, - // end - e, - en, - // begin - b, - be, - beg, - begi, - }; - - /// TODO: A not-terrible name - pub fn nextNormal(self: *Self) LexError!Token { - return self.nextNormalWithContext(.any); - } - - pub fn nextNormalWithContext(self: *Self, context: enum { expect_operator, any }) LexError!Token { - const start_index = self.index; - var result = Token{ - .id = .eof, - .start = start_index, - .end = undefined, - .line_number = self.line_handler.line_number, - }; - var state = StateNormal.start; - - // Note: The Windows RC compiler uses a non-standard method of computing - // length for its 'string literal too long' errors; it isn't easily - // explained or intuitive (it's sort-of pre-parsed byte length but with - // a few of exceptions/edge cases). - // - // It also behaves strangely with non-ASCII codepoints, e.g. even though the default - // limit is 4097, you can only have 4094 € codepoints (1 UTF-16 code unit each), - // and 2048 𐐷 codepoints (2 UTF-16 code units each). - // - // TODO: Understand this more, bring it more in line with how the Win32 limits work. - // Alternatively, do something that makes more sense but may be more permissive. - var string_literal_length: usize = 0; - // Keeping track of the string literal column prevents pathological edge cases when - // there are tons of tab stop characters within a string literal. - var string_literal_column: usize = 0; - var string_literal_collapsing_whitespace: bool = false; - var still_could_have_exponent: bool = true; - var exponent_index: ?usize = null; - while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) { - const c = codepoint.value; - const in_string_literal = switch (state) { - .quoted_ascii_string, - .quoted_wide_string, - .quoted_ascii_string_escape, - .quoted_wide_string_escape, - .quoted_ascii_string_maybe_end, - .quoted_wide_string_maybe_end, - => - // If the current line is not the same line as the start of the string literal, - // then we want to treat the current codepoint as 'not in a string literal' - // for the purposes of detecting illegal codepoints. This means that we will - // error on illegal-outside-string-literal characters that are outside string - // literals from the perspective of a C preprocessor, but that may be - // inside string literals from the perspective of the RC lexer. For example, - // "hello - // @" - // will be treated as a single string literal by the RC lexer but the Win32 - // preprocessor will consider this an unclosed string literal followed by - // the character @ and ", and will therefore error since the Win32 RC preprocessor - // errors on the @ character outside string literals. - // - // By doing this here, we can effectively emulate the Win32 RC preprocessor behavior - // at lex-time, and avoid the need for a separate step that checks for this edge-case - // specifically. - result.line_number == self.line_handler.line_number, - else => false, - }; - try self.checkForIllegalCodepoint(codepoint, in_string_literal); - switch (state) { - .start => switch (c) { - '\r', '\n' => { - result.start = self.index + 1; - result.line_number = self.incrementLineNumber(); - }, - ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => { - result.start = self.index + 1; - }, - // NBSP only counts as whitespace at the start of a line (but - // can be intermixed with other whitespace). Who knows why. - '\xA0' => if (self.at_start_of_line) { - result.start = self.index + codepoint.byte_len; - } else { - state = .literal; - self.at_start_of_line = false; - }, - 'L', 'l' => { - state = .literal_or_quoted_wide_string; - self.at_start_of_line = false; - }, - 'E', 'e' => { - state = .e; - self.at_start_of_line = false; - }, - 'B', 'b' => { - state = .b; - self.at_start_of_line = false; - }, - '"' => { - state = .quoted_ascii_string; - self.at_start_of_line = false; - string_literal_collapsing_whitespace = false; - string_literal_length = 0; - - var dummy_token = Token{ - .start = self.index, - .end = self.index, - .line_number = self.line_handler.line_number, - .id = .invalid, - }; - string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null); - }, - '+', '&', '|' => { - self.index += 1; - result.id = .operator; - self.at_start_of_line = false; - break; - }, - '-' => { - if (context == .expect_operator) { - self.index += 1; - result.id = .operator; - self.at_start_of_line = false; - break; - } else { - state = .number_literal; - still_could_have_exponent = true; - exponent_index = null; - self.at_start_of_line = false; - } - }, - '0'...'9', '~' => { - state = .number_literal; - still_could_have_exponent = true; - exponent_index = null; - self.at_start_of_line = false; - }, - '#' => { - if (self.at_start_of_line) { - state = .preprocessor; - } else { - state = .literal; - } - self.at_start_of_line = false; - }, - ';' => { - state = .semicolon; - self.at_start_of_line = false; - }, - '{', '}' => { - self.index += 1; - result.id = if (c == '{') .begin else .end; - self.at_start_of_line = false; - break; - }, - '(', ')' => { - self.index += 1; - result.id = if (c == '(') .open_paren else .close_paren; - self.at_start_of_line = false; - break; - }, - ',' => { - self.index += 1; - result.id = .comma; - self.at_start_of_line = false; - break; - }, - else => { - if (isNonAsciiDigit(c)) { - self.error_context_token = .{ - .id = .number, - .start = result.start, - .end = self.index + 1, - .line_number = self.line_handler.line_number, - }; - return error.InvalidDigitCharacterInNumberLiteral; - } - state = .literal; - self.at_start_of_line = false; - }, - }, - .preprocessor => switch (c) { - '\r', '\n' => { - try self.evaluatePreprocessorCommand(result.start, self.index); - result.start = self.index + 1; - state = .start; - result.line_number = self.incrementLineNumber(); - }, - else => {}, - }, - // Semi-colon acts as a line-terminator--everything is skipped until - // the next line. - .semicolon => switch (c) { - '\r', '\n' => { - result.start = self.index + 1; - state = .start; - result.line_number = self.incrementLineNumber(); - }, - else => {}, - }, - .number_literal => switch (c) { - // zig fmt: off - ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', - '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', - '\'', ';', '=', - => { - // zig fmt: on - result.id = .number; - break; - }, - '0'...'9' => { - if (exponent_index) |exp_i| { - if (self.index - 1 == exp_i) { - // Note: This being an error is a quirk of the preprocessor used by - // the Win32 RC compiler. - self.error_context_token = .{ - .id = .number, - .start = result.start, - .end = self.index + 1, - .line_number = self.line_handler.line_number, - }; - return error.InvalidNumberWithExponent; - } - } - }, - 'e', 'E' => { - if (still_could_have_exponent) { - exponent_index = self.index; - still_could_have_exponent = false; - } - }, - else => { - if (isNonAsciiDigit(c)) { - self.error_context_token = .{ - .id = .number, - .start = result.start, - .end = self.index + 1, - .line_number = self.line_handler.line_number, - }; - return error.InvalidDigitCharacterInNumberLiteral; - } - still_could_have_exponent = false; - }, - }, - .literal_or_quoted_wide_string => switch (c) { - // zig fmt: off - ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', - '\r', '\n', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', - '\'', ';', '=', - // zig fmt: on - => { - result.id = .literal; - break; - }, - '"' => { - state = .quoted_wide_string; - string_literal_collapsing_whitespace = false; - string_literal_length = 0; - - var dummy_token = Token{ - .start = self.index, - .end = self.index, - .line_number = self.line_handler.line_number, - .id = .invalid, - }; - string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null); - }, - else => { - state = .literal; - }, - }, - .literal => switch (c) { - // zig fmt: off - ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F', - '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')', - '\'', ';', '=', - => { - // zig fmt: on - result.id = .literal; - break; - }, - else => {}, - }, - .e => switch (c) { - 'N', 'n' => { - state = .en; - }, - else => { - state = .literal; - self.index -= 1; - }, - }, - .en => switch (c) { - 'D', 'd' => { - result.id = .end; - self.index += 1; - break; - }, - else => { - state = .literal; - self.index -= 1; - }, - }, - .b => switch (c) { - 'E', 'e' => { - state = .be; - }, - else => { - state = .literal; - self.index -= 1; - }, - }, - .be => switch (c) { - 'G', 'g' => { - state = .beg; - }, - else => { - state = .literal; - self.index -= 1; - }, - }, - .beg => switch (c) { - 'I', 'i' => { - state = .begi; - }, - else => { - state = .literal; - self.index -= 1; - }, - }, - .begi => switch (c) { - 'N', 'n' => { - result.id = .begin; - self.index += 1; - break; - }, - else => { - state = .literal; - self.index -= 1; - }, - }, - .quoted_ascii_string, .quoted_wide_string => switch (c) { - '"' => { - string_literal_column += 1; - state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end; - }, - '\\' => { - string_literal_length += 1; - string_literal_column += 1; - state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape; - }, - '\r' => { - string_literal_column = 0; - // \r doesn't count towards string literal length - - // Increment line number but don't affect the result token's line number - _ = self.incrementLineNumber(); - }, - '\n' => { - string_literal_column = 0; - // first \n expands to <space><\n> - if (!string_literal_collapsing_whitespace) { - string_literal_length += 2; - string_literal_collapsing_whitespace = true; - } - // the rest are collapsed into the <space><\n> - - // Increment line number but don't affect the result token's line number - _ = self.incrementLineNumber(); - }, - // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing - '\t', ' ', '\x0b', '\x0c' => { - if (!string_literal_collapsing_whitespace) { - // Literal tab characters are counted as the number of space characters - // needed to reach the next 8-column tab stop. - const width = columnWidth(string_literal_column, @intCast(c), 8); - string_literal_length += width; - string_literal_column += width; - } - }, - else => { - string_literal_collapsing_whitespace = false; - string_literal_length += 1; - string_literal_column += 1; - }, - }, - .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) { - '"' => { - self.error_context_token = .{ - .id = .invalid, - .start = self.index - 1, - .end = self.index + 1, - .line_number = self.line_handler.line_number, - }; - return error.FoundCStyleEscapedQuote; - }, - else => { - string_literal_length += 1; - string_literal_column += 1; - state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string; - }, - }, - .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) { - '"' => { - state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; - // Escaped quotes count as 1 char for string literal length checks. - // Since we did not increment on the first " (because it could have been - // the end of the quoted string), we increment here - string_literal_length += 1; - string_literal_column += 1; - }, - else => { - result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; - break; - }, - }, - } - } else { // got EOF - switch (state) { - .start, .semicolon => {}, - .literal_or_quoted_wide_string, .literal, .e, .en, .b, .be, .beg, .begi => { - result.id = .literal; - }, - .preprocessor => { - try self.evaluatePreprocessorCommand(result.start, self.index); - result.start = self.index; - }, - .number_literal => { - result.id = .number; - }, - .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => { - result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string; - }, - .quoted_ascii_string, - .quoted_wide_string, - .quoted_ascii_string_escape, - .quoted_wide_string_escape, - => { - self.error_context_token = .{ - .id = .eof, - .start = self.index, - .end = self.index, - .line_number = self.line_handler.line_number, - }; - return LexError.UnfinishedStringLiteral; - }, - } - } - - result.end = self.index; - - if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) { - if (string_literal_length > self.max_string_literal_codepoints) { - self.error_context_token = result; - return LexError.StringLiteralTooLong; - } - } - - return result; - } - - /// Increments line_number appropriately (handling line ending pairs) - /// and returns the new line number. - fn incrementLineNumber(self: *Self) usize { - _ = self.line_handler.incrementLineNumber(self.index); - self.at_start_of_line = true; - return self.line_handler.line_number; - } - - fn checkForIllegalCodepoint(self: *Self, codepoint: code_pages.Codepoint, in_string_literal: bool) LexError!void { - const err = switch (codepoint.value) { - // 0x00 = NUL - // 0x1A = Substitute (treated as EOF) - // NOTE: 0x1A gets treated as EOF by the clang preprocessor so after a .rc file - // is run through the clang preprocessor it will no longer have 0x1A characters in it. - // 0x7F = DEL (treated as a context-specific terminator by the Windows RC compiler) - 0x00, 0x1A, 0x7F => error.IllegalByte, - // 0x01...0x03 result in strange 'macro definition too big' errors when used outside of string literals - // 0x04 is valid but behaves strangely (sort of acts as a 'skip the next character' instruction) - 0x01...0x04 => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, - // @ and ` both result in error RC2018: unknown character '0x60' (and subsequently - // fatal error RC1116: RC terminating after preprocessor errors) if they are ever used - // outside of string literals. Not exactly sure why this would be the case, though. - // TODO: Make sure there aren't any exceptions - '@', '`' => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return, - // The Byte Order Mark is mostly skipped over by the Windows RC compiler, but - // there are edge cases where it leads to cryptic 'compiler limit : macro definition too big' - // errors (e.g. a BOM within a number literal). By making this illegal we avoid having to - // deal with a lot of edge cases and remove the potential footgun of the bytes of a BOM - // being 'missing' when included in a string literal (the Windows RC compiler acts as - // if the codepoint was never part of the string literal). - '\u{FEFF}' => error.IllegalByteOrderMark, - // Similar deal with this private use codepoint, it gets skipped/ignored by the - // RC compiler (but without the cryptic errors). Silently dropping bytes still seems like - // enough of a footgun with no real use-cases that it's still worth erroring instead of - // emulating the RC compiler's behavior, though. - '\u{E000}' => error.IllegalPrivateUseCharacter, - // These codepoints lead to strange errors when used outside of string literals, - // and miscompilations when used within string literals. We avoid the miscompilation - // within string literals and emit a warning, but outside of string literals it makes - // more sense to just disallow these codepoints. - 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => if (!in_string_literal) error.IllegalCodepointOutsideStringLiterals else return, - else => return, - }; - self.error_context_token = .{ - .id = .invalid, - .start = self.index, - .end = self.index + codepoint.byte_len, - .line_number = self.line_handler.line_number, - }; - return err; - } - - fn evaluatePreprocessorCommand(self: *Self, start: usize, end: usize) !void { - const token = Token{ - .id = .preprocessor_command, - .start = start, - .end = end, - .line_number = self.line_handler.line_number, - }; - errdefer self.error_context_token = token; - const full_command = self.buffer[start..end]; - var command = full_command; - - // Anything besides exactly this is ignored by the Windows RC implementation - const expected_directive = "#pragma"; - if (!std.mem.startsWith(u8, command, expected_directive)) return; - command = command[expected_directive.len..]; - - if (command.len == 0 or !std.ascii.isWhitespace(command[0])) return; - while (command.len > 0 and std.ascii.isWhitespace(command[0])) { - command = command[1..]; - } - - // Note: CoDe_PaGeZ is also treated as "code_page" by the Windows RC implementation, - // and it will error with 'Missing left parenthesis in code_page #pragma' - const expected_extension = "code_page"; - if (!std.ascii.startsWithIgnoreCase(command, expected_extension)) return; - command = command[expected_extension.len..]; - - while (command.len > 0 and std.ascii.isWhitespace(command[0])) { - command = command[1..]; - } - - if (command.len == 0 or command[0] != '(') { - return error.CodePagePragmaMissingLeftParen; - } - command = command[1..]; - - while (command.len > 0 and std.ascii.isWhitespace(command[0])) { - command = command[1..]; - } - - var num_str: []u8 = command[0..0]; - while (command.len > 0 and (command[0] != ')' and !std.ascii.isWhitespace(command[0]))) { - command = command[1..]; - num_str.len += 1; - } - - if (num_str.len == 0) { - return error.CodePagePragmaNotInteger; - } - - while (command.len > 0 and std.ascii.isWhitespace(command[0])) { - command = command[1..]; - } - - if (command.len == 0 or command[0] != ')') { - return error.CodePagePragmaMissingRightParen; - } - - const code_page = code_page: { - if (std.ascii.eqlIgnoreCase("DEFAULT", num_str)) { - break :code_page self.default_code_page; - } - - // The Win32 compiler behaves fairly strangely around maxInt(u32): - // - If the overflowed u32 wraps and becomes a known code page ID, then - // it will error/warn with "Codepage not valid: ignored" (depending on /w) - // - If the overflowed u32 wraps and does not become a known code page ID, - // then it will error with 'constant too big' and 'Codepage not integer' - // - // Instead of that, we just have a separate error specifically for overflow. - const num = parseCodePageNum(num_str) catch |err| switch (err) { - error.InvalidCharacter => return error.CodePagePragmaNotInteger, - error.Overflow => return error.CodePagePragmaOverflow, - }; - - // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252 - if (num_str[0] == '0' and num != 0) { - return error.CodePagePragmaInvalidCodePage; - } - // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation. - else if (num == 0) { - return error.CodePagePragmaNotInteger; - } - // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16. - if (num > std.math.maxInt(u16)) { - return error.CodePagePragmaInvalidCodePage; - } - - break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) { - error.InvalidCodePage => return error.CodePagePragmaInvalidCodePage, - error.UnsupportedCodePage => return error.CodePagePragmaUnsupportedCodePage, - }; - }; - - // https://learn.microsoft.com/en-us/windows/win32/menurc/pragma-directives - // > This pragma is not supported in an included resource file (.rc) - // - // Even though the Win32 behavior is to just ignore such directives silently, - // this is an error in the lexer to allow for emitting warnings/errors when - // such directives are found if that's wanted. The intention is for the lexer - // to still be able to work correctly after this error is returned. - if (self.source_mappings) |source_mappings| { - if (!source_mappings.isRootFile(token.line_number)) { - return error.CodePagePragmaInIncludedFile; - } - } - - self.seen_pragma_code_pages +|= 1; - self.current_code_page = code_page; - } - - fn parseCodePageNum(str: []const u8) !u32 { - var x: u32 = 0; - for (str) |c| { - const digit = try std.fmt.charToDigit(c, 10); - if (x != 0) x = try std.math.mul(u32, x, 10); - x = try std.math.add(u32, x, digit); - } - return x; - } - - pub fn getErrorDetails(self: Self, lex_err: LexError) ErrorDetails { - const err = switch (lex_err) { - error.UnfinishedStringLiteral => ErrorDetails.Error.unfinished_string_literal, - error.StringLiteralTooLong => return .{ - .err = .string_literal_too_long, - .token = self.error_context_token.?, - .extra = .{ .number = self.max_string_literal_codepoints }, - }, - error.InvalidNumberWithExponent => ErrorDetails.Error.invalid_number_with_exponent, - error.InvalidDigitCharacterInNumberLiteral => ErrorDetails.Error.invalid_digit_character_in_number_literal, - error.IllegalByte => ErrorDetails.Error.illegal_byte, - error.IllegalByteOutsideStringLiterals => ErrorDetails.Error.illegal_byte_outside_string_literals, - error.IllegalCodepointOutsideStringLiterals => ErrorDetails.Error.illegal_codepoint_outside_string_literals, - error.IllegalByteOrderMark => ErrorDetails.Error.illegal_byte_order_mark, - error.IllegalPrivateUseCharacter => ErrorDetails.Error.illegal_private_use_character, - error.FoundCStyleEscapedQuote => ErrorDetails.Error.found_c_style_escaped_quote, - error.CodePagePragmaMissingLeftParen => ErrorDetails.Error.code_page_pragma_missing_left_paren, - error.CodePagePragmaMissingRightParen => ErrorDetails.Error.code_page_pragma_missing_right_paren, - error.CodePagePragmaInvalidCodePage => ErrorDetails.Error.code_page_pragma_invalid_code_page, - error.CodePagePragmaNotInteger => ErrorDetails.Error.code_page_pragma_not_integer, - error.CodePagePragmaOverflow => ErrorDetails.Error.code_page_pragma_overflow, - error.CodePagePragmaUnsupportedCodePage => ErrorDetails.Error.code_page_pragma_unsupported_code_page, - error.CodePagePragmaInIncludedFile => ErrorDetails.Error.code_page_pragma_in_included_file, - }; - return .{ - .err = err, - .token = self.error_context_token.?, - }; - } -}; - -fn testLexNormal(source: []const u8, expected_tokens: []const Token.Id) !void { - var lexer = Lexer.init(source, .{}); - if (dumpTokensDuringTests) std.debug.print("\n----------------------\n{s}\n----------------------\n", .{lexer.buffer}); - for (expected_tokens) |expected_token_id| { - const token = try lexer.nextNormal(); - if (dumpTokensDuringTests) lexer.dump(&token); - try std.testing.expectEqual(expected_token_id, token.id); - } - const last_token = try lexer.nextNormal(); - try std.testing.expectEqual(Token.Id.eof, last_token.id); -} - -fn expectLexError(expected: LexError, actual: anytype) !void { - try std.testing.expectError(expected, actual); - if (dumpTokensDuringTests) std.debug.print("{!}\n", .{actual}); -} - -test "normal: numbers" { - try testLexNormal("1", &.{.number}); - try testLexNormal("-1", &.{.number}); - try testLexNormal("- 1", &.{ .number, .number }); - try testLexNormal("-a", &.{.number}); -} - -test "normal: string literals" { - try testLexNormal("\"\"", &.{.quoted_ascii_string}); - // "" is an escaped " - try testLexNormal("\" \"\" \"", &.{.quoted_ascii_string}); -} - -test "superscript chars and code pages" { - const firstToken = struct { - pub fn firstToken(source: []const u8, default_code_page: CodePage, comptime lex_method: Lexer.LexMethod) LexError!Token { - var lexer = Lexer.init(source, .{ .default_code_page = default_code_page }); - return lexer.next(lex_method); - } - }.firstToken; - const utf8_source = "²"; - const windows1252_source = "\xB2"; - - const windows1252_encoded_as_windows1252 = firstToken(windows1252_source, .windows1252, .normal); - try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, windows1252_encoded_as_windows1252); - - const utf8_encoded_as_windows1252 = try firstToken(utf8_source, .windows1252, .normal); - try std.testing.expectEqual(Token{ - .id = .literal, - .start = 0, - .end = 2, - .line_number = 1, - }, utf8_encoded_as_windows1252); - - const utf8_encoded_as_utf8 = firstToken(utf8_source, .utf8, .normal); - try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, utf8_encoded_as_utf8); - - const windows1252_encoded_as_utf8 = try firstToken(windows1252_source, .utf8, .normal); - try std.testing.expectEqual(Token{ - .id = .literal, - .start = 0, - .end = 1, - .line_number = 1, - }, windows1252_encoded_as_utf8); -} diff --git a/src/resinator/literals.zig b/src/resinator/literals.zig @@ -1,911 +0,0 @@ -const std = @import("std"); -const code_pages = @import("code_pages.zig"); -const CodePage = code_pages.CodePage; -const windows1252 = @import("windows1252.zig"); -const ErrorDetails = @import("errors.zig").ErrorDetails; -const DiagnosticsContext = @import("errors.zig").DiagnosticsContext; -const Token = @import("lex.zig").Token; - -/// rc is maximally liberal in terms of what it accepts as a number literal -/// for data values. As long as it starts with a number or - or ~, that's good enough. -pub fn isValidNumberDataLiteral(str: []const u8) bool { - if (str.len == 0) return false; - switch (str[0]) { - '~', '-', '0'...'9' => return true, - else => return false, - } -} - -pub const SourceBytes = struct { - slice: []const u8, - code_page: CodePage, -}; - -pub const StringType = enum { ascii, wide }; - -/// Valid escapes: -/// "" -> " -/// \a, \A => 0x08 (not 0x07 like in C) -/// \n => 0x0A -/// \r => 0x0D -/// \t, \T => 0x09 -/// \\ => \ -/// \nnn => byte with numeric value given by nnn interpreted as octal -/// (wraps on overflow, number of digits can be 1-3 for ASCII strings -/// and 1-7 for wide strings) -/// \xhh => byte with numeric value given by hh interpreted as hex -/// (number of digits can be 0-2 for ASCII strings and 0-4 for -/// wide strings) -/// \<\r+> => \ -/// \<[\r\n\t ]+> => <nothing> -/// -/// Special cases: -/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself -/// <\r> => <nothing> -/// <\n+><\w+?\n?> => <space><\n> -/// -/// Special, especially weird case: -/// \"" => " -/// NOTE: This leads to footguns because the preprocessor can start parsing things -/// out-of-sync with the RC compiler, expanding macros within string literals, etc. -/// This parse function handles this case the same as the Windows RC compiler, but -/// \" within a string literal is treated as an error by the lexer, so the relevant -/// branches should never actually be hit during this function. -pub const IterativeStringParser = struct { - source: []const u8, - code_page: CodePage, - /// The type of the string inferred by the prefix (L"" or "") - /// This is what matters for things like the maximum digits in an - /// escape sequence, whether or not invalid escape sequences are skipped, etc. - declared_string_type: StringType, - pending_codepoint: ?u21 = null, - num_pending_spaces: u8 = 0, - index: usize = 0, - column: usize = 0, - diagnostics: ?DiagnosticsContext = null, - seen_tab: bool = false, - - const State = enum { - normal, - quote, - newline, - escaped, - escaped_cr, - escaped_newlines, - escaped_octal, - escaped_hex, - }; - - pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser { - const declared_string_type: StringType = switch (bytes.slice[0]) { - 'L', 'l' => .wide, - else => .ascii, - }; - var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove "" - var column = options.start_column + 1; // for the removed " - if (declared_string_type == .wide) { - source = source[1..]; // remove L - column += 1; // for the removed L - } - return .{ - .source = source, - .code_page = bytes.code_page, - .declared_string_type = declared_string_type, - .column = column, - .diagnostics = options.diagnostics, - }; - } - - pub const ParsedCodepoint = struct { - codepoint: u21, - from_escaped_integer: bool = false, - }; - - pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { - const result = try self.nextUnchecked(); - if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) { - switch (result.?.codepoint) { - 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => { - const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00) - .rc_would_miscompile_codepoint_skip - else - .rc_would_miscompile_codepoint_byte_swap; - try self.diagnostics.?.diagnostics.append(ErrorDetails{ - .err = err, - .type = .warning, - .token = self.diagnostics.?.token, - .extra = .{ .number = result.?.codepoint }, - }); - try self.diagnostics.?.diagnostics.append(ErrorDetails{ - .err = err, - .type = .note, - .token = self.diagnostics.?.token, - .print_source_line = false, - .extra = .{ .number = result.?.codepoint }, - }); - }, - else => {}, - } - } - return result; - } - - pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint { - if (self.num_pending_spaces > 0) { - // Ensure that we don't get into this predicament so we can ensure that - // the order of processing any pending stuff doesn't matter - std.debug.assert(self.pending_codepoint == null); - self.num_pending_spaces -= 1; - return .{ .codepoint = ' ' }; - } - if (self.pending_codepoint) |pending_codepoint| { - self.pending_codepoint = null; - return .{ .codepoint = pending_codepoint }; - } - if (self.index >= self.source.len) return null; - - var state: State = .normal; - var string_escape_n: u16 = 0; - var string_escape_i: u8 = 0; - const max_octal_escape_digits: u8 = switch (self.declared_string_type) { - .ascii => 3, - .wide => 7, - }; - const max_hex_escape_digits: u8 = switch (self.declared_string_type) { - .ascii => 2, - .wide => 4, - }; - - while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : (self.index += codepoint.byte_len) { - const c = codepoint.value; - var backtrack = false; - defer { - if (backtrack) { - self.index -= codepoint.byte_len; - } else { - if (c == '\t') { - self.column += columnsUntilTabStop(self.column, 8); - } else { - self.column += codepoint.byte_len; - } - } - } - switch (state) { - .normal => switch (c) { - '\\' => state = .escaped, - '"' => state = .quote, - '\r' => {}, - '\n' => state = .newline, - '\t' => { - // Only warn about a tab getting converted to spaces once per string - if (self.diagnostics != null and !self.seen_tab) { - try self.diagnostics.?.diagnostics.append(ErrorDetails{ - .err = .tab_converted_to_spaces, - .type = .warning, - .token = self.diagnostics.?.token, - }); - try self.diagnostics.?.diagnostics.append(ErrorDetails{ - .err = .tab_converted_to_spaces, - .type = .note, - .token = self.diagnostics.?.token, - .print_source_line = false, - }); - self.seen_tab = true; - } - const cols = columnsUntilTabStop(self.column, 8); - self.num_pending_spaces = @intCast(cols - 1); - self.index += codepoint.byte_len; - return .{ .codepoint = ' ' }; - }, - else => { - self.index += codepoint.byte_len; - return .{ .codepoint = c }; - }, - }, - .quote => switch (c) { - '"' => { - // "" => " - self.index += codepoint.byte_len; - return .{ .codepoint = '"' }; - }, - else => unreachable, // this is a bug in the lexer - }, - .newline => switch (c) { - '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {}, - else => { - // backtrack so that we handle the current char properly - backtrack = true; - // <space><newline> - self.index += codepoint.byte_len; - self.pending_codepoint = '\n'; - return .{ .codepoint = ' ' }; - }, - }, - .escaped => switch (c) { - '\r' => state = .escaped_cr, - '\n' => state = .escaped_newlines, - '0'...'7' => { - string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable; - string_escape_i = 1; - state = .escaped_octal; - }, - 'x', 'X' => { - string_escape_n = 0; - string_escape_i = 0; - state = .escaped_hex; - }, - else => { - switch (c) { - 'a', 'A' => { - self.index += codepoint.byte_len; - return .{ .codepoint = '\x08' }; - }, // might be a bug in RC, but matches its behavior - 'n' => { - self.index += codepoint.byte_len; - return .{ .codepoint = '\n' }; - }, - 'r' => { - self.index += codepoint.byte_len; - return .{ .codepoint = '\r' }; - }, - 't', 'T' => { - self.index += codepoint.byte_len; - return .{ .codepoint = '\t' }; - }, - '\\' => { - self.index += codepoint.byte_len; - return .{ .codepoint = '\\' }; - }, - '"' => { - // \" is a special case that doesn't get the \ included, - backtrack = true; - }, - else => switch (self.declared_string_type) { - .wide => {}, // invalid escape sequences are skipped in wide strings - .ascii => { - // backtrack so that we handle the current char properly - backtrack = true; - self.index += codepoint.byte_len; - return .{ .codepoint = '\\' }; - }, - }, - } - state = .normal; - }, - }, - .escaped_cr => switch (c) { - '\r' => {}, - '\n' => state = .escaped_newlines, - else => { - // backtrack so that we handle the current char properly - backtrack = true; - self.index += codepoint.byte_len; - return .{ .codepoint = '\\' }; - }, - }, - .escaped_newlines => switch (c) { - '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {}, - else => { - // backtrack so that we handle the current char properly - backtrack = true; - state = .normal; - }, - }, - .escaped_octal => switch (c) { - '0'...'7' => { - string_escape_n *%= 8; - string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable; - string_escape_i += 1; - if (string_escape_i == max_octal_escape_digits) { - const escaped_value = switch (self.declared_string_type) { - .ascii => @as(u8, @truncate(string_escape_n)), - .wide => string_escape_n, - }; - self.index += codepoint.byte_len; - return .{ .codepoint = escaped_value, .from_escaped_integer = true }; - } - }, - else => { - // backtrack so that we handle the current char properly - backtrack = true; - // write out whatever byte we have parsed so far - const escaped_value = switch (self.declared_string_type) { - .ascii => @as(u8, @truncate(string_escape_n)), - .wide => string_escape_n, - }; - self.index += codepoint.byte_len; - return .{ .codepoint = escaped_value, .from_escaped_integer = true }; - }, - }, - .escaped_hex => switch (c) { - '0'...'9', 'a'...'f', 'A'...'F' => { - string_escape_n *= 16; - string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable; - string_escape_i += 1; - if (string_escape_i == max_hex_escape_digits) { - const escaped_value = switch (self.declared_string_type) { - .ascii => @as(u8, @truncate(string_escape_n)), - .wide => string_escape_n, - }; - self.index += codepoint.byte_len; - return .{ .codepoint = escaped_value, .from_escaped_integer = true }; - } - }, - else => { - // backtrack so that we handle the current char properly - backtrack = true; - // write out whatever byte we have parsed so far - // (even with 0 actual digits, \x alone parses to 0) - const escaped_value = switch (self.declared_string_type) { - .ascii => @as(u8, @truncate(string_escape_n)), - .wide => string_escape_n, - }; - self.index += codepoint.byte_len; - return .{ .codepoint = escaped_value, .from_escaped_integer = true }; - }, - }, - } - } - - switch (state) { - .normal, .escaped_newlines => {}, - .newline => { - // <space><newline> - self.pending_codepoint = '\n'; - return .{ .codepoint = ' ' }; - }, - .escaped, .escaped_cr => return .{ .codepoint = '\\' }, - .escaped_octal, .escaped_hex => { - const escaped_value = switch (self.declared_string_type) { - .ascii => @as(u8, @truncate(string_escape_n)), - .wide => string_escape_n, - }; - return .{ .codepoint = escaped_value, .from_escaped_integer = true }; - }, - .quote => unreachable, // this is a bug in the lexer - } - - return null; - } -}; - -pub const StringParseOptions = struct { - start_column: usize = 0, - diagnostics: ?DiagnosticsContext = null, - output_code_page: CodePage = .windows1252, -}; - -pub fn parseQuotedString( - comptime literal_type: StringType, - allocator: std.mem.Allocator, - bytes: SourceBytes, - options: StringParseOptions, -) !(switch (literal_type) { - .ascii => []u8, - .wide => [:0]u16, -}) { - const T = if (literal_type == .ascii) u8 else u16; - std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars - - var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len); - errdefer buf.deinit(); - - var iterative_parser = IterativeStringParser.init(bytes, options); - - while (try iterative_parser.next()) |parsed| { - const c = parsed.codepoint; - if (parsed.from_escaped_integer) { - try buf.append(std.mem.nativeToLittle(T, @intCast(c))); - } else { - switch (literal_type) { - .ascii => switch (options.output_code_page) { - .windows1252 => { - if (windows1252.bestFitFromCodepoint(c)) |best_fit| { - try buf.append(best_fit); - } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) { - try buf.append('?'); - } else { - try buf.appendSlice("??"); - } - }, - .utf8 => { - var codepoint_to_encode = c; - if (c == code_pages.Codepoint.invalid) { - codepoint_to_encode = '�'; - } - var utf8_buf: [4]u8 = undefined; - const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable; - try buf.appendSlice(utf8_buf[0..utf8_len]); - }, - else => unreachable, // Unsupported code page - }, - .wide => { - if (c == code_pages.Codepoint.invalid) { - try buf.append(std.mem.nativeToLittle(u16, '�')); - } else if (c < 0x10000) { - const short: u16 = @intCast(c); - try buf.append(std.mem.nativeToLittle(u16, short)); - } else { - const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; - try buf.append(std.mem.nativeToLittle(u16, high)); - const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; - try buf.append(std.mem.nativeToLittle(u16, low)); - } - }, - } - } - } - - if (literal_type == .wide) { - return buf.toOwnedSliceSentinel(0); - } else { - return buf.toOwnedSlice(); - } -} - -pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 { - std.debug.assert(bytes.slice.len >= 2); // "" - return parseQuotedString(.ascii, allocator, bytes, options); -} - -pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { - std.debug.assert(bytes.slice.len >= 3); // L"" - return parseQuotedString(.wide, allocator, bytes, options); -} - -pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 { - std.debug.assert(bytes.slice.len >= 2); // "" - return parseQuotedString(.wide, allocator, bytes, options); -} - -pub fn parseQuotedStringAsAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 { - std.debug.assert(bytes.slice.len >= 2); // "" - return parseQuotedString(.ascii, allocator, bytes, options); -} - -test "parse quoted ascii string" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{ - .slice = - \\"hello" - , - .code_page = .windows1252, - }, .{})); - // hex with 0 digits - try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\x" - , - .code_page = .windows1252, - }, .{})); - // hex max of 2 digits - try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\XfFf" - , - .code_page = .windows1252, - }, .{})); - // octal with invalid octal digit - try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\19" - , - .code_page = .windows1252, - }, .{})); - // escaped quotes - try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{ - .slice = - \\" "" " - , - .code_page = .windows1252, - }, .{})); - // backslash right before escaped quotes - try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\""" - , - .code_page = .windows1252, - }, .{})); - // octal overflow - try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\401" - , - .code_page = .windows1252, - }, .{})); - // escapes - try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\a\n\r\t\\" - , - .code_page = .windows1252, - }, .{})); - // uppercase escapes - try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\A\N\R\T\\" - , - .code_page = .windows1252, - }, .{})); - // backslash on its own - try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\" - , - .code_page = .windows1252, - }, .{})); - // unrecognized escapes - try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{ - .slice = - \\"\b" - , - .code_page = .windows1252, - }, .{})); - // escaped carriage returns - try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 }, - .{}, - )); - // escaped newlines - try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 }, - .{}, - )); - // escaped CRLF pairs - try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 }, - .{}, - )); - // escaped newlines with other whitespace - try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 }, - .{}, - )); - // literal tab characters get converted to spaces (dependent on source file columns) - try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\t\"", .code_page = .windows1252 }, - .{}, - )); - try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString( - arena, - .{ .slice = "\"abc\t\"", .code_page = .windows1252 }, - .{}, - )); - try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString( - arena, - .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 }, - .{}, - )); - try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\\\t\"", .code_page = .windows1252 }, - .{}, - )); - // literal CR's get dropped - try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 }, - .{}, - )); - // contiguous newlines and whitespace get collapsed to <space><newline> - try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 }, - .{}, - )); -} - -test "parse quoted ascii string with utf8 code page" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\"", .code_page = .utf8 }, - .{}, - )); - // Codepoints that don't have a Windows-1252 representation get converted to ? - try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString( - arena, - .{ .slice = "\"кириллица\"", .code_page = .utf8 }, - .{}, - )); - // Codepoints that have a best fit mapping get converted accordingly, - // these are box drawing codepoints - try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString( - arena, - .{ .slice = "\"┌─┐\"", .code_page = .utf8 }, - .{}, - )); - // Invalid UTF-8 gets converted to ? depending on well-formedness - try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, - .{}, - )); - // Codepoints that would require a UTF-16 surrogate pair get converted to ?? - try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, - .{}, - )); - - // Output code page changes how invalid UTF-8 gets converted, since it - // now encodes the result as UTF-8 so it can write replacement characters. - try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, - .{ .output_code_page = .utf8 }, - )); - try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString( - arena, - .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 }, - .{ .output_code_page = .utf8 }, - )); -} - -test "parse quoted wide string" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("hello"), try parseQuotedWideString(arena, .{ - .slice = - \\L"hello" - , - .code_page = .windows1252, - }, .{})); - // hex with 0 digits - try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{ - .slice = - \\L"\x" - , - .code_page = .windows1252, - }, .{})); - // hex max of 4 digits - try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0xFFFF), std.mem.nativeToLittle(u16, 'f') }, try parseQuotedWideString(arena, .{ - .slice = - \\L"\XfFfFf" - , - .code_page = .windows1252, - }, .{})); - // octal max of 7 digits - try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x9493), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '3') }, try parseQuotedWideString(arena, .{ - .slice = - \\L"\111222333" - , - .code_page = .windows1252, - }, .{})); - // octal overflow - try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0xFF01)}, try parseQuotedWideString(arena, .{ - .slice = - \\L"\777401" - , - .code_page = .windows1252, - }, .{})); - // literal tab characters get converted to spaces (dependent on source file columns) - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString( - arena, - .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 }, - .{}, - )); - // Windows-1252 conversion - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString( - arena, - .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 }, - .{}, - )); - // Invalid escape sequences are skipped - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString( - arena, - .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, - .{}, - )); -} - -test "parse quoted wide string with utf8 code page" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString( - arena, - .{ .slice = "L\"\"", .code_page = .utf8 }, - .{}, - )); - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString( - arena, - .{ .slice = "L\"кириллица\"", .code_page = .utf8 }, - .{}, - )); - // Invalid UTF-8 gets converted to � depending on well-formedness - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString( - arena, - .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 }, - .{}, - )); -} - -test "parse quoted ascii string as wide string" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString( - arena, - .{ .slice = "\"кириллица\"", .code_page = .utf8 }, - .{}, - )); - // Whether or not invalid escapes are skipped is still determined by the L prefix - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString( - arena, - .{ .slice = "\"\\H\"", .code_page = .windows1252 }, - .{}, - )); - try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString( - arena, - .{ .slice = "L\"\\H\"", .code_page = .windows1252 }, - .{}, - )); - // Maximum escape sequence value is also determined by the L prefix - try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x12), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '4') }, try parseQuotedStringAsWideString( - arena, - .{ .slice = "\"\\x1234\"", .code_page = .windows1252 }, - .{}, - )); - try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0x1234)}, try parseQuotedStringAsWideString( - arena, - .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 }, - .{}, - )); -} - -pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize { - // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4 - // 5 => 3, 6 => 2, 7 => 1, 8 => 8 - return tab_columns - (column % tab_columns); -} - -pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize { - return switch (c) { - '\t' => columnsUntilTabStop(cur_column, tab_columns), - else => 1, - }; -} - -pub const Number = struct { - value: u32, - is_long: bool = false, - - pub fn asWord(self: Number) u16 { - return @truncate(self.value); - } - - pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number { - const result = switch (operator_char) { - '-' => lhs.value -% rhs.value, - '+' => lhs.value +% rhs.value, - '|' => lhs.value | rhs.value, - '&' => lhs.value & rhs.value, - else => unreachable, // invalid operator, this would be a lexer/parser bug - }; - return .{ - .value = result, - .is_long = lhs.is_long or rhs.is_long, - }; - } -}; - -/// Assumes that number literals normally rejected by RC's preprocessor -/// are similarly rejected before being parsed. -/// -/// Relevant RC preprocessor errors: -/// RC2021: expected exponent value, not '<digit>' -/// example that is rejected: 1e1 -/// example that is accepted: 1ea -/// (this function will parse the two examples above the same) -pub fn parseNumberLiteral(bytes: SourceBytes) Number { - std.debug.assert(bytes.slice.len > 0); - var result = Number{ .value = 0, .is_long = false }; - var radix: u8 = 10; - var buf = bytes.slice; - - const Prefix = enum { none, minus, complement }; - var prefix: Prefix = .none; - switch (buf[0]) { - '-' => { - prefix = .minus; - buf = buf[1..]; - }, - '~' => { - prefix = .complement; - buf = buf[1..]; - }, - else => {}, - } - - if (buf.len > 2 and buf[0] == '0') { - switch (buf[1]) { - 'o' => { // octal radix prefix is case-sensitive - radix = 8; - buf = buf[2..]; - }, - 'x', 'X' => { - radix = 16; - buf = buf[2..]; - }, - else => {}, - } - } - - var i: usize = 0; - while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { - const c = codepoint.value; - if (c == 'L' or c == 'l') { - result.is_long = true; - break; - } - const digit = switch (c) { - // On invalid digit for the radix, just stop parsing but don't fail - 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break, - else => break, - }; - - if (result.value != 0) { - result.value *%= radix; - } - result.value +%= digit; - } - - switch (prefix) { - .none => {}, - .minus => result.value = 0 -% result.value, - .complement => result.value = ~result.value, - } - - return result; -} - -test "parse number literal" { - try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 })); - - // can handle any length of number, wraps on overflow appropriately - const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 }); - try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow); - try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord()); - - try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 })); - - try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 })); - - try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 })); - try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 })); - - // anything after L is ignored - try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 })); -} diff --git a/src/resinator/parse.zig b/src/resinator/parse.zig @@ -1,1883 +0,0 @@ -const std = @import("std"); -const Lexer = @import("lex.zig").Lexer; -const Token = @import("lex.zig").Token; -const Node = @import("ast.zig").Node; -const Tree = @import("ast.zig").Tree; -const CodePageLookup = @import("ast.zig").CodePageLookup; -const Resource = @import("rc.zig").Resource; -const Allocator = std.mem.Allocator; -const ErrorDetails = @import("errors.zig").ErrorDetails; -const Diagnostics = @import("errors.zig").Diagnostics; -const SourceBytes = @import("literals.zig").SourceBytes; -const Compiler = @import("compile.zig").Compiler; -const rc = @import("rc.zig"); -const res = @import("res.zig"); - -// TODO: Make these configurable? -pub const max_nested_menu_level: u32 = 512; -pub const max_nested_version_level: u32 = 512; -pub const max_nested_expression_level: u32 = 200; - -pub const Parser = struct { - const Self = @This(); - - lexer: *Lexer, - /// values that need to be initialized per-parse - state: Parser.State = undefined, - options: Parser.Options, - - pub const Error = error{ParseError} || Allocator.Error; - - pub const Options = struct { - warn_instead_of_error_on_invalid_code_page: bool = false, - }; - - pub fn init(lexer: *Lexer, options: Options) Parser { - return Parser{ - .lexer = lexer, - .options = options, - }; - } - - pub const State = struct { - token: Token, - lookahead_lexer: Lexer, - allocator: Allocator, - arena: Allocator, - diagnostics: *Diagnostics, - input_code_page_lookup: CodePageLookup, - output_code_page_lookup: CodePageLookup, - }; - - pub fn parse(self: *Self, allocator: Allocator, diagnostics: *Diagnostics) Error!*Tree { - var arena = std.heap.ArenaAllocator.init(allocator); - errdefer arena.deinit(); - - self.state = Parser.State{ - .token = undefined, - .lookahead_lexer = undefined, - .allocator = allocator, - .arena = arena.allocator(), - .diagnostics = diagnostics, - .input_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), - .output_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page), - }; - - const parsed_root = try self.parseRoot(); - - const tree = try self.state.arena.create(Tree); - tree.* = .{ - .node = parsed_root, - .input_code_pages = self.state.input_code_page_lookup, - .output_code_pages = self.state.output_code_page_lookup, - .source = self.lexer.buffer, - .arena = arena.state, - .allocator = allocator, - }; - return tree; - } - - fn parseRoot(self: *Self) Error!*Node { - var statements = std.ArrayList(*Node).init(self.state.allocator); - defer statements.deinit(); - - try self.parseStatements(&statements); - try self.check(.eof); - - const node = try self.state.arena.create(Node.Root); - node.* = .{ - .body = try self.state.arena.dupe(*Node, statements.items), - }; - return &node.base; - } - - fn parseStatements(self: *Self, statements: *std.ArrayList(*Node)) Error!void { - while (true) { - try self.nextToken(.whitespace_delimiter_only); - if (self.state.token.id == .eof) break; - // The Win32 compiler will sometimes try to recover from errors - // and then restart parsing afterwards. We don't ever do this - // because it almost always leads to unhelpful error messages - // (usually it will end up with bogus things like 'file - // not found: {') - const statement = try self.parseStatement(); - try statements.append(statement); - } - } - - /// Expects the current token to be the token before possible common resource attributes. - /// After return, the current token will be the token immediately before the end of the - /// common resource attributes (if any). If there are no common resource attributes, the - /// current token is unchanged. - /// The returned slice is allocated by the parser's arena - fn parseCommonResourceAttributes(self: *Self) ![]Token { - var common_resource_attributes = std.ArrayListUnmanaged(Token){}; - while (true) { - const maybe_common_resource_attribute = try self.lookaheadToken(.normal); - if (maybe_common_resource_attribute.id == .literal and rc.CommonResourceAttributes.map.has(maybe_common_resource_attribute.slice(self.lexer.buffer))) { - try common_resource_attributes.append(self.state.arena, maybe_common_resource_attribute); - self.nextToken(.normal) catch unreachable; - } else { - break; - } - } - return common_resource_attributes.toOwnedSlice(self.state.arena); - } - - /// Expects the current token to have already been dealt with, and that the - /// optional statements will potentially start on the next token. - /// After return, the current token will be the token immediately before the end of the - /// optional statements (if any). If there are no optional statements, the - /// current token is unchanged. - /// The returned slice is allocated by the parser's arena - fn parseOptionalStatements(self: *Self, resource: Resource) ![]*Node { - var optional_statements = std.ArrayListUnmanaged(*Node){}; - while (true) { - const lookahead_token = try self.lookaheadToken(.normal); - if (lookahead_token.id != .literal) break; - const slice = lookahead_token.slice(self.lexer.buffer); - const optional_statement_type = rc.OptionalStatements.map.get(slice) orelse switch (resource) { - .dialog, .dialogex => rc.OptionalStatements.dialog_map.get(slice) orelse break, - else => break, - }; - self.nextToken(.normal) catch unreachable; - switch (optional_statement_type) { - .language => { - const language = try self.parseLanguageStatement(); - try optional_statements.append(self.state.arena, language); - }, - // Number only - .version, .characteristics, .style, .exstyle => { - const identifier = self.state.token; - const value = try self.parseExpression(.{ - .can_contain_not_expressions = optional_statement_type == .style or optional_statement_type == .exstyle, - .allowed_types = .{ .number = true }, - }); - const node = try self.state.arena.create(Node.SimpleStatement); - node.* = .{ - .identifier = identifier, - .value = value, - }; - try optional_statements.append(self.state.arena, &node.base); - }, - // String only - .caption => { - const identifier = self.state.token; - try self.nextToken(.normal); - const value = self.state.token; - if (!value.isStringLiteral()) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = value, - .extra = .{ .expected_types = .{ - .string_literal = true, - } }, - }); - } - // TODO: Wrapping this in a Node.Literal is superfluous but necessary - // to put it in a SimpleStatement - const value_node = try self.state.arena.create(Node.Literal); - value_node.* = .{ - .token = value, - }; - const node = try self.state.arena.create(Node.SimpleStatement); - node.* = .{ - .identifier = identifier, - .value = &value_node.base, - }; - try optional_statements.append(self.state.arena, &node.base); - }, - // String or number - .class => { - const identifier = self.state.token; - const value = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); - const node = try self.state.arena.create(Node.SimpleStatement); - node.* = .{ - .identifier = identifier, - .value = value, - }; - try optional_statements.append(self.state.arena, &node.base); - }, - // Special case - .menu => { - const identifier = self.state.token; - try self.nextToken(.whitespace_delimiter_only); - try self.check(.literal); - // TODO: Wrapping this in a Node.Literal is superfluous but necessary - // to put it in a SimpleStatement - const value_node = try self.state.arena.create(Node.Literal); - value_node.* = .{ - .token = self.state.token, - }; - const node = try self.state.arena.create(Node.SimpleStatement); - node.* = .{ - .identifier = identifier, - .value = &value_node.base, - }; - try optional_statements.append(self.state.arena, &node.base); - }, - .font => { - const identifier = self.state.token; - const point_size = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - // The comma between point_size and typeface is both optional and - // there can be any number of them - try self.skipAnyCommas(); - - try self.nextToken(.normal); - const typeface = self.state.token; - if (!typeface.isStringLiteral()) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = typeface, - .extra = .{ .expected_types = .{ - .string_literal = true, - } }, - }); - } - - const ExSpecificValues = struct { - weight: ?*Node = null, - italic: ?*Node = null, - char_set: ?*Node = null, - }; - var ex_specific = ExSpecificValues{}; - ex_specific: { - var optional_param_parser = OptionalParamParser{ .parser = self }; - switch (resource) { - .dialogex => { - { - ex_specific.weight = try optional_param_parser.parse(.{}); - if (optional_param_parser.finished) break :ex_specific; - } - { - if (!(try self.parseOptionalToken(.comma))) break :ex_specific; - ex_specific.italic = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - } - { - ex_specific.char_set = try optional_param_parser.parse(.{}); - if (optional_param_parser.finished) break :ex_specific; - } - }, - .dialog => {}, - else => unreachable, // only DIALOG and DIALOGEX have FONT optional-statements - } - } - - const node = try self.state.arena.create(Node.FontStatement); - node.* = .{ - .identifier = identifier, - .point_size = point_size, - .typeface = typeface, - .weight = ex_specific.weight, - .italic = ex_specific.italic, - .char_set = ex_specific.char_set, - }; - try optional_statements.append(self.state.arena, &node.base); - }, - } - } - return optional_statements.toOwnedSlice(self.state.arena); - } - - /// Expects the current token to be the first token of the statement. - fn parseStatement(self: *Self) Error!*Node { - const first_token = self.state.token; - std.debug.assert(first_token.id == .literal); - - if (rc.TopLevelKeywords.map.get(first_token.slice(self.lexer.buffer))) |keyword| switch (keyword) { - .language => { - const language_statement = try self.parseLanguageStatement(); - return language_statement; - }, - .version, .characteristics => { - const identifier = self.state.token; - const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - const node = try self.state.arena.create(Node.SimpleStatement); - node.* = .{ - .identifier = identifier, - .value = value, - }; - return &node.base; - }, - .stringtable => { - // common resource attributes must all be contiguous and come before optional-statements - const common_resource_attributes = try self.parseCommonResourceAttributes(); - const optional_statements = try self.parseOptionalStatements(.stringtable); - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var strings = std.ArrayList(*Node).init(self.state.allocator); - defer strings.deinit(); - while (true) { - const maybe_end_token = try self.lookaheadToken(.normal); - switch (maybe_end_token.id) { - .end => { - self.nextToken(.normal) catch unreachable; - break; - }, - .eof => { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .unfinished_string_table_block, - .token = maybe_end_token, - }); - }, - else => {}, - } - const id_expression = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - const comma_token: ?Token = if (try self.parseOptionalToken(.comma)) self.state.token else null; - - try self.nextToken(.normal); - if (self.state.token.id != .quoted_ascii_string and self.state.token.id != .quoted_wide_string) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = self.state.token, - .extra = .{ .expected_types = .{ .string_literal = true } }, - }); - } - - const string_node = try self.state.arena.create(Node.StringTableString); - string_node.* = .{ - .id = id_expression, - .maybe_comma = comma_token, - .string = self.state.token, - }; - try strings.append(&string_node.base); - } - - if (strings.items.len == 0) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_token, // TODO: probably a more specific error message - .token = self.state.token, - .extra = .{ .expected = .number }, - }); - } - - const end_token = self.state.token; - try self.check(.end); - - const node = try self.state.arena.create(Node.StringTable); - node.* = .{ - .type = first_token, - .common_resource_attributes = common_resource_attributes, - .optional_statements = optional_statements, - .begin_token = begin_token, - .strings = try self.state.arena.dupe(*Node, strings.items), - .end_token = end_token, - }; - return &node.base; - }, - }; - - // The Win32 RC compiler allows for a 'dangling' literal at the end of a file - // (as long as it's not a valid top-level keyword), and there is actually an - // .rc file with a such a dangling literal in the Windows-classic-samples set - // of projects. So, we have special compatibility for this particular case. - const maybe_eof = try self.lookaheadToken(.whitespace_delimiter_only); - if (maybe_eof.id == .eof) { - // TODO: emit warning - var context = try self.state.arena.alloc(Token, 2); - context[0] = first_token; - context[1] = maybe_eof; - const invalid_node = try self.state.arena.create(Node.Invalid); - invalid_node.* = .{ - .context = context, - }; - return &invalid_node.base; - } - - const id_token = first_token; - const id_code_page = self.lexer.current_code_page; - try self.nextToken(.whitespace_delimiter_only); - const resource = try self.checkResource(); - const type_token = self.state.token; - - if (resource == .string_num) { - try self.addErrorDetails(.{ - .err = .string_resource_as_numeric_type, - .token = type_token, - }); - return self.addErrorDetailsAndFail(.{ - .err = .string_resource_as_numeric_type, - .token = type_token, - .type = .note, - .print_source_line = false, - }); - } - - if (resource == .font) { - const id_bytes = SourceBytes{ - .slice = id_token.slice(self.lexer.buffer), - .code_page = id_code_page, - }; - const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(id_bytes); - if (maybe_ordinal == null) { - const would_be_win32_rc_ordinal = res.NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes); - if (would_be_win32_rc_ordinal) |win32_rc_ordinal| { - try self.addErrorDetails(ErrorDetails{ - .err = .id_must_be_ordinal, - .token = id_token, - .extra = .{ .resource = resource }, - }); - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .win32_non_ascii_ordinal, - .token = id_token, - .type = .note, - .print_source_line = false, - .extra = .{ .number = win32_rc_ordinal.ordinal }, - }); - } else { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .id_must_be_ordinal, - .token = id_token, - .extra = .{ .resource = resource }, - }); - } - } - } - - switch (resource) { - .accelerators => { - // common resource attributes must all be contiguous and come before optional-statements - const common_resource_attributes = try self.parseCommonResourceAttributes(); - const optional_statements = try self.parseOptionalStatements(resource); - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var accelerators = std.ArrayListUnmanaged(*Node){}; - - while (true) { - const lookahead = try self.lookaheadToken(.normal); - switch (lookahead.id) { - .end, .eof => { - self.nextToken(.normal) catch unreachable; - break; - }, - else => {}, - } - const event = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); - - try self.nextToken(.normal); - try self.check(.comma); - - const idvalue = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - var type_and_options = std.ArrayListUnmanaged(Token){}; - while (true) { - if (!(try self.parseOptionalToken(.comma))) break; - - try self.nextToken(.normal); - if (!rc.AcceleratorTypeAndOptions.map.has(self.tokenSlice())) { - return self.addErrorDetailsAndFail(.{ - .err = .expected_something_else, - .token = self.state.token, - .extra = .{ .expected_types = .{ - .accelerator_type_or_option = true, - } }, - }); - } - try type_and_options.append(self.state.arena, self.state.token); - } - - const node = try self.state.arena.create(Node.Accelerator); - node.* = .{ - .event = event, - .idvalue = idvalue, - .type_and_options = try type_and_options.toOwnedSlice(self.state.arena), - }; - try accelerators.append(self.state.arena, &node.base); - } - - const end_token = self.state.token; - try self.check(.end); - - const node = try self.state.arena.create(Node.Accelerators); - node.* = .{ - .id = id_token, - .type = type_token, - .common_resource_attributes = common_resource_attributes, - .optional_statements = optional_statements, - .begin_token = begin_token, - .accelerators = try accelerators.toOwnedSlice(self.state.arena), - .end_token = end_token, - }; - return &node.base; - }, - .dialog, .dialogex => { - // common resource attributes must all be contiguous and come before optional-statements - const common_resource_attributes = try self.parseCommonResourceAttributes(); - - const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - _ = try self.parseOptionalToken(.comma); - - const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - _ = try self.parseOptionalToken(.comma); - - const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - _ = try self.parseOptionalToken(.comma); - - const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - var optional_param_parser = OptionalParamParser{ .parser = self }; - const help_id: ?*Node = try optional_param_parser.parse(.{}); - - const optional_statements = try self.parseOptionalStatements(resource); - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var controls = std.ArrayListUnmanaged(*Node){}; - defer controls.deinit(self.state.allocator); - while (try self.parseControlStatement(resource)) |control_node| { - // The number of controls must fit in a u16 in order for it to - // be able to be written into the relevant field in the .res data. - if (controls.items.len >= std.math.maxInt(u16)) { - try self.addErrorDetails(.{ - .err = .too_many_dialog_controls, - .token = id_token, - .extra = .{ .resource = resource }, - }); - return self.addErrorDetailsAndFail(.{ - .err = .too_many_dialog_controls, - .type = .note, - .token = control_node.getFirstToken(), - .token_span_end = control_node.getLastToken(), - .extra = .{ .resource = resource }, - }); - } - - try controls.append(self.state.allocator, control_node); - } - - try self.nextToken(.normal); - const end_token = self.state.token; - try self.check(.end); - - const node = try self.state.arena.create(Node.Dialog); - node.* = .{ - .id = id_token, - .type = type_token, - .common_resource_attributes = common_resource_attributes, - .x = x, - .y = y, - .width = width, - .height = height, - .help_id = help_id, - .optional_statements = optional_statements, - .begin_token = begin_token, - .controls = try self.state.arena.dupe(*Node, controls.items), - .end_token = end_token, - }; - return &node.base; - }, - .toolbar => { - // common resource attributes must all be contiguous and come before optional-statements - const common_resource_attributes = try self.parseCommonResourceAttributes(); - - const button_width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - try self.nextToken(.normal); - try self.check(.comma); - - const button_height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var buttons = std.ArrayListUnmanaged(*Node){}; - while (try self.parseToolbarButtonStatement()) |button_node| { - try buttons.append(self.state.arena, button_node); - } - - try self.nextToken(.normal); - const end_token = self.state.token; - try self.check(.end); - - const node = try self.state.arena.create(Node.Toolbar); - node.* = .{ - .id = id_token, - .type = type_token, - .common_resource_attributes = common_resource_attributes, - .button_width = button_width, - .button_height = button_height, - .begin_token = begin_token, - .buttons = try buttons.toOwnedSlice(self.state.arena), - .end_token = end_token, - }; - return &node.base; - }, - .menu, .menuex => { - // common resource attributes must all be contiguous and come before optional-statements - const common_resource_attributes = try self.parseCommonResourceAttributes(); - // help id is optional but must come between common resource attributes and optional-statements - var help_id: ?*Node = null; - // Note: No comma is allowed before or after help_id of MENUEX and help_id is not - // a possible field of MENU. - if (resource == .menuex and try self.lookaheadCouldBeNumberExpression(.not_disallowed)) { - help_id = try self.parseExpression(.{ - .is_known_to_be_number_expression = true, - }); - } - const optional_statements = try self.parseOptionalStatements(.stringtable); - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var items = std.ArrayListUnmanaged(*Node){}; - defer items.deinit(self.state.allocator); - while (try self.parseMenuItemStatement(resource, id_token, 1)) |item_node| { - try items.append(self.state.allocator, item_node); - } - - try self.nextToken(.normal); - const end_token = self.state.token; - try self.check(.end); - - if (items.items.len == 0) { - return self.addErrorDetailsAndFail(.{ - .err = .empty_menu_not_allowed, - .token = type_token, - }); - } - - const node = try self.state.arena.create(Node.Menu); - node.* = .{ - .id = id_token, - .type = type_token, - .common_resource_attributes = common_resource_attributes, - .optional_statements = optional_statements, - .help_id = help_id, - .begin_token = begin_token, - .items = try self.state.arena.dupe(*Node, items.items), - .end_token = end_token, - }; - return &node.base; - }, - .versioninfo => { - // common resource attributes must all be contiguous and come before optional-statements - const common_resource_attributes = try self.parseCommonResourceAttributes(); - - var fixed_info = std.ArrayListUnmanaged(*Node){}; - while (try self.parseVersionStatement()) |version_statement| { - try fixed_info.append(self.state.arena, version_statement); - } - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var block_statements = std.ArrayListUnmanaged(*Node){}; - while (try self.parseVersionBlockOrValue(id_token, 1)) |block_node| { - try block_statements.append(self.state.arena, block_node); - } - - try self.nextToken(.normal); - const end_token = self.state.token; - try self.check(.end); - - const node = try self.state.arena.create(Node.VersionInfo); - node.* = .{ - .id = id_token, - .versioninfo = type_token, - .common_resource_attributes = common_resource_attributes, - .fixed_info = try fixed_info.toOwnedSlice(self.state.arena), - .begin_token = begin_token, - .block_statements = try block_statements.toOwnedSlice(self.state.arena), - .end_token = end_token, - }; - return &node.base; - }, - .dlginclude => { - const common_resource_attributes = try self.parseCommonResourceAttributes(); - - const filename_expression = try self.parseExpression(.{ - .allowed_types = .{ .string = true }, - }); - - const node = try self.state.arena.create(Node.ResourceExternal); - node.* = .{ - .id = id_token, - .type = type_token, - .common_resource_attributes = common_resource_attributes, - .filename = filename_expression, - }; - return &node.base; - }, - .stringtable => { - return self.addErrorDetailsAndFail(.{ - .err = .name_or_id_not_allowed, - .token = id_token, - .extra = .{ .resource = resource }, - }); - }, - // Just try everything as a 'generic' resource (raw data or external file) - // TODO: More fine-grained switch cases as necessary - else => { - const common_resource_attributes = try self.parseCommonResourceAttributes(); - - const maybe_begin = try self.lookaheadToken(.normal); - if (maybe_begin.id == .begin) { - self.nextToken(.normal) catch unreachable; - - if (!resource.canUseRawData()) { - try self.addErrorDetails(ErrorDetails{ - .err = .resource_type_cant_use_raw_data, - .token = maybe_begin, - .extra = .{ .resource = resource }, - }); - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .resource_type_cant_use_raw_data, - .type = .note, - .print_source_line = false, - .token = maybe_begin, - }); - } - - const raw_data = try self.parseRawDataBlock(); - const end_token = self.state.token; - - const node = try self.state.arena.create(Node.ResourceRawData); - node.* = .{ - .id = id_token, - .type = type_token, - .common_resource_attributes = common_resource_attributes, - .begin_token = maybe_begin, - .raw_data = raw_data, - .end_token = end_token, - }; - return &node.base; - } - - const filename_expression = try self.parseExpression(.{ - // Don't tell the user that numbers are accepted since we error on - // number expressions and regular number literals are treated as unquoted - // literals rather than numbers, so from the users perspective - // numbers aren't really allowed. - .expected_types_override = .{ - .literal = true, - .string_literal = true, - }, - }); - - const node = try self.state.arena.create(Node.ResourceExternal); - node.* = .{ - .id = id_token, - .type = type_token, - .common_resource_attributes = common_resource_attributes, - .filename = filename_expression, - }; - return &node.base; - }, - } - } - - /// Expects the current token to be a begin token. - /// After return, the current token will be the end token. - fn parseRawDataBlock(self: *Self) Error![]*Node { - var raw_data = std.ArrayList(*Node).init(self.state.allocator); - defer raw_data.deinit(); - while (true) { - const maybe_end_token = try self.lookaheadToken(.normal); - switch (maybe_end_token.id) { - .comma => { - // comma as the first token in a raw data block is an error - if (raw_data.items.len == 0) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = maybe_end_token, - .extra = .{ .expected_types = .{ - .number = true, - .number_expression = true, - .string_literal = true, - } }, - }); - } - // otherwise just skip over commas - self.nextToken(.normal) catch unreachable; - continue; - }, - .end => { - self.nextToken(.normal) catch unreachable; - break; - }, - .eof => { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .unfinished_raw_data_block, - .token = maybe_end_token, - }); - }, - else => {}, - } - const expression = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } }); - try raw_data.append(expression); - - if (expression.isNumberExpression()) { - const maybe_close_paren = try self.lookaheadToken(.normal); - if (maybe_close_paren.id == .close_paren) { - // <number expression>) is an error - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_token, - .token = maybe_close_paren, - .extra = .{ .expected = .operator }, - }); - } - } - } - return try self.state.arena.dupe(*Node, raw_data.items); - } - - /// Expects the current token to be handled, and that the control statement will - /// begin on the next token. - /// After return, the current token will be the token immediately before the end of the - /// control statement (or unchanged if the function returns null). - fn parseControlStatement(self: *Self, resource: Resource) Error!?*Node { - const control_token = try self.lookaheadToken(.normal); - const control = rc.Control.map.get(control_token.slice(self.lexer.buffer)) orelse return null; - self.nextToken(.normal) catch unreachable; - - try self.skipAnyCommas(); - - var text: ?Token = null; - if (control.hasTextParam()) { - try self.nextToken(.normal); - switch (self.state.token.id) { - .quoted_ascii_string, .quoted_wide_string, .number => { - text = self.state.token; - }, - else => { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = self.state.token, - .extra = .{ .expected_types = .{ - .number = true, - .string_literal = true, - } }, - }); - }, - } - try self.skipAnyCommas(); - } - - const id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - try self.skipAnyCommas(); - - var class: ?*Node = null; - var style: ?*Node = null; - if (control == .control) { - class = try self.parseExpression(.{}); - if (class.?.id == .literal) { - const class_literal = @fieldParentPtr(Node.Literal, "base", class.?); - const is_invalid_control_class = class_literal.token.id == .literal and !rc.ControlClass.map.has(class_literal.token.slice(self.lexer.buffer)); - if (is_invalid_control_class) { - return self.addErrorDetailsAndFail(.{ - .err = .expected_something_else, - .token = self.state.token, - .extra = .{ .expected_types = .{ - .control_class = true, - } }, - }); - } - } - try self.skipAnyCommas(); - style = try self.parseExpression(.{ - .can_contain_not_expressions = true, - .allowed_types = .{ .number = true }, - }); - // If there is no comma after the style paramter, the Win32 RC compiler - // could misinterpret the statement and end up skipping over at least one token - // that should have been interepeted as the next parameter (x). For example: - // CONTROL "text", 1, BUTTON, 15 30, 1, 2, 3, 4 - // the `15` is the style parameter, but in the Win32 implementation the `30` - // is completely ignored (i.e. the `1, 2, 3, 4` are `x`, `y`, `w`, `h`). - // If a comma is added after the `15`, then `30` gets interpreted (correctly) - // as the `x` value. - // - // Instead of emulating this behavior, we just warn about the potential for - // weird behavior in the Win32 implementation whenever there isn't a comma after - // the style parameter. - const lookahead_token = try self.lookaheadToken(.normal); - if (lookahead_token.id != .comma and lookahead_token.id != .eof) { - try self.addErrorDetails(.{ - .err = .rc_could_miscompile_control_params, - .type = .warning, - .token = lookahead_token, - }); - try self.addErrorDetails(.{ - .err = .rc_could_miscompile_control_params, - .type = .note, - .token = style.?.getFirstToken(), - .token_span_end = style.?.getLastToken(), - }); - } - try self.skipAnyCommas(); - } - - const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - _ = try self.parseOptionalToken(.comma); - const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - _ = try self.parseOptionalToken(.comma); - const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - _ = try self.parseOptionalToken(.comma); - const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - var optional_param_parser = OptionalParamParser{ .parser = self }; - if (control != .control) { - style = try optional_param_parser.parse(.{ .not_expression_allowed = true }); - } - - const exstyle: ?*Node = try optional_param_parser.parse(.{ .not_expression_allowed = true }); - const help_id: ?*Node = switch (resource) { - .dialogex => try optional_param_parser.parse(.{}), - else => null, - }; - - var extra_data: []*Node = &[_]*Node{}; - var extra_data_begin: ?Token = null; - var extra_data_end: ?Token = null; - // extra data is DIALOGEX-only - if (resource == .dialogex and try self.parseOptionalToken(.begin)) { - extra_data_begin = self.state.token; - extra_data = try self.parseRawDataBlock(); - extra_data_end = self.state.token; - } - - const node = try self.state.arena.create(Node.ControlStatement); - node.* = .{ - .type = control_token, - .text = text, - .class = class, - .id = id, - .x = x, - .y = y, - .width = width, - .height = height, - .style = style, - .exstyle = exstyle, - .help_id = help_id, - .extra_data_begin = extra_data_begin, - .extra_data = extra_data, - .extra_data_end = extra_data_end, - }; - return &node.base; - } - - fn parseToolbarButtonStatement(self: *Self) Error!?*Node { - const keyword_token = try self.lookaheadToken(.normal); - const button_type = rc.ToolbarButton.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; - self.nextToken(.normal) catch unreachable; - - switch (button_type) { - .separator => { - const node = try self.state.arena.create(Node.Literal); - node.* = .{ - .token = keyword_token, - }; - return &node.base; - }, - .button => { - const button_id = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - const node = try self.state.arena.create(Node.SimpleStatement); - node.* = .{ - .identifier = keyword_token, - .value = button_id, - }; - return &node.base; - }, - } - } - - /// Expects the current token to be handled, and that the menuitem/popup statement will - /// begin on the next token. - /// After return, the current token will be the token immediately before the end of the - /// menuitem statement (or unchanged if the function returns null). - fn parseMenuItemStatement(self: *Self, resource: Resource, top_level_menu_id_token: Token, nesting_level: u32) Error!?*Node { - const menuitem_token = try self.lookaheadToken(.normal); - const menuitem = rc.MenuItem.map.get(menuitem_token.slice(self.lexer.buffer)) orelse return null; - self.nextToken(.normal) catch unreachable; - - if (nesting_level > max_nested_menu_level) { - try self.addErrorDetails(.{ - .err = .nested_resource_level_exceeds_max, - .token = top_level_menu_id_token, - .extra = .{ .resource = resource }, - }); - return self.addErrorDetailsAndFail(.{ - .err = .nested_resource_level_exceeds_max, - .type = .note, - .token = menuitem_token, - .extra = .{ .resource = resource }, - }); - } - - switch (resource) { - .menu => switch (menuitem) { - .menuitem => { - try self.nextToken(.normal); - if (rc.MenuItem.isSeparator(self.state.token.slice(self.lexer.buffer))) { - const separator_token = self.state.token; - // There can be any number of trailing commas after SEPARATOR - try self.skipAnyCommas(); - const node = try self.state.arena.create(Node.MenuItemSeparator); - node.* = .{ - .menuitem = menuitem_token, - .separator = separator_token, - }; - return &node.base; - } else { - const text = self.state.token; - if (!text.isStringLiteral()) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = text, - .extra = .{ .expected_types = .{ - .string_literal = true, - } }, - }); - } - try self.skipAnyCommas(); - - const result = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - _ = try self.parseOptionalToken(.comma); - - var options = std.ArrayListUnmanaged(Token){}; - while (true) { - const option_token = try self.lookaheadToken(.normal); - if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { - break; - } - self.nextToken(.normal) catch unreachable; - try options.append(self.state.arena, option_token); - try self.skipAnyCommas(); - } - - const node = try self.state.arena.create(Node.MenuItem); - node.* = .{ - .menuitem = menuitem_token, - .text = text, - .result = result, - .option_list = try options.toOwnedSlice(self.state.arena), - }; - return &node.base; - } - }, - .popup => { - try self.nextToken(.normal); - const text = self.state.token; - if (!text.isStringLiteral()) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = text, - .extra = .{ .expected_types = .{ - .string_literal = true, - } }, - }); - } - try self.skipAnyCommas(); - - var options = std.ArrayListUnmanaged(Token){}; - while (true) { - const option_token = try self.lookaheadToken(.normal); - if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) { - break; - } - self.nextToken(.normal) catch unreachable; - try options.append(self.state.arena, option_token); - try self.skipAnyCommas(); - } - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var items = std.ArrayListUnmanaged(*Node){}; - while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { - try items.append(self.state.arena, item_node); - } - - try self.nextToken(.normal); - const end_token = self.state.token; - try self.check(.end); - - if (items.items.len == 0) { - return self.addErrorDetailsAndFail(.{ - .err = .empty_menu_not_allowed, - .token = menuitem_token, - }); - } - - const node = try self.state.arena.create(Node.Popup); - node.* = .{ - .popup = menuitem_token, - .text = text, - .option_list = try options.toOwnedSlice(self.state.arena), - .begin_token = begin_token, - .items = try items.toOwnedSlice(self.state.arena), - .end_token = end_token, - }; - return &node.base; - }, - }, - .menuex => { - try self.nextToken(.normal); - const text = self.state.token; - if (!text.isStringLiteral()) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = text, - .extra = .{ .expected_types = .{ - .string_literal = true, - } }, - }); - } - - var param_parser = OptionalParamParser{ .parser = self }; - const id = try param_parser.parse(.{}); - const item_type = try param_parser.parse(.{}); - const state = try param_parser.parse(.{}); - - if (menuitem == .menuitem) { - // trailing comma is allowed, skip it - _ = try self.parseOptionalToken(.comma); - - const node = try self.state.arena.create(Node.MenuItemEx); - node.* = .{ - .menuitem = menuitem_token, - .text = text, - .id = id, - .type = item_type, - .state = state, - }; - return &node.base; - } - - const help_id = try param_parser.parse(.{}); - - // trailing comma is allowed, skip it - _ = try self.parseOptionalToken(.comma); - - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var items = std.ArrayListUnmanaged(*Node){}; - while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| { - try items.append(self.state.arena, item_node); - } - - try self.nextToken(.normal); - const end_token = self.state.token; - try self.check(.end); - - if (items.items.len == 0) { - return self.addErrorDetailsAndFail(.{ - .err = .empty_menu_not_allowed, - .token = menuitem_token, - }); - } - - const node = try self.state.arena.create(Node.PopupEx); - node.* = .{ - .popup = menuitem_token, - .text = text, - .id = id, - .type = item_type, - .state = state, - .help_id = help_id, - .begin_token = begin_token, - .items = try items.toOwnedSlice(self.state.arena), - .end_token = end_token, - }; - return &node.base; - }, - else => unreachable, - } - @compileError("unreachable"); - } - - pub const OptionalParamParser = struct { - finished: bool = false, - parser: *Self, - - pub const Options = struct { - not_expression_allowed: bool = false, - }; - - pub fn parse(self: *OptionalParamParser, options: OptionalParamParser.Options) Error!?*Node { - if (self.finished) return null; - if (!(try self.parser.parseOptionalToken(.comma))) { - self.finished = true; - return null; - } - // If the next lookahead token could be part of a number expression, - // then parse it. Otherwise, treat it as an 'empty' expression and - // continue parsing, since 'empty' values are allowed. - if (try self.parser.lookaheadCouldBeNumberExpression(switch (options.not_expression_allowed) { - true => .not_allowed, - false => .not_disallowed, - })) { - const node = try self.parser.parseExpression(.{ - .allowed_types = .{ .number = true }, - .can_contain_not_expressions = options.not_expression_allowed, - }); - return node; - } - return null; - } - }; - - /// Expects the current token to be handled, and that the version statement will - /// begin on the next token. - /// After return, the current token will be the token immediately before the end of the - /// version statement (or unchanged if the function returns null). - fn parseVersionStatement(self: *Self) Error!?*Node { - const type_token = try self.lookaheadToken(.normal); - const statement_type = rc.VersionInfo.map.get(type_token.slice(self.lexer.buffer)) orelse return null; - self.nextToken(.normal) catch unreachable; - switch (statement_type) { - .file_version, .product_version => { - var parts_buffer: [4]*Node = undefined; - var parts = std.ArrayListUnmanaged(*Node).initBuffer(&parts_buffer); - - while (true) { - const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - parts.addOneAssumeCapacity().* = value; - - if (parts.unusedCapacitySlice().len == 0 or - !(try self.parseOptionalToken(.comma))) - { - break; - } - } - - const node = try self.state.arena.create(Node.VersionStatement); - node.* = .{ - .type = type_token, - .parts = try self.state.arena.dupe(*Node, parts.items), - }; - return &node.base; - }, - else => { - const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - const node = try self.state.arena.create(Node.SimpleStatement); - node.* = .{ - .identifier = type_token, - .value = value, - }; - return &node.base; - }, - } - } - - /// Expects the current token to be handled, and that the version BLOCK/VALUE will - /// begin on the next token. - /// After return, the current token will be the token immediately before the end of the - /// version BLOCK/VALUE (or unchanged if the function returns null). - fn parseVersionBlockOrValue(self: *Self, top_level_version_id_token: Token, nesting_level: u32) Error!?*Node { - const keyword_token = try self.lookaheadToken(.normal); - const keyword = rc.VersionBlock.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null; - self.nextToken(.normal) catch unreachable; - - if (nesting_level > max_nested_version_level) { - try self.addErrorDetails(.{ - .err = .nested_resource_level_exceeds_max, - .token = top_level_version_id_token, - .extra = .{ .resource = .versioninfo }, - }); - return self.addErrorDetailsAndFail(.{ - .err = .nested_resource_level_exceeds_max, - .type = .note, - .token = keyword_token, - .extra = .{ .resource = .versioninfo }, - }); - } - - try self.nextToken(.normal); - const key = self.state.token; - if (!key.isStringLiteral()) { - return self.addErrorDetailsAndFail(.{ - .err = .expected_something_else, - .token = key, - .extra = .{ .expected_types = .{ - .string_literal = true, - } }, - }); - } - // Need to keep track of this to detect a potential miscompilation when - // the comma is omitted and the first value is a quoted string. - const had_comma_before_first_value = try self.parseOptionalToken(.comma); - try self.skipAnyCommas(); - - const values = try self.parseBlockValuesList(had_comma_before_first_value); - - switch (keyword) { - .block => { - try self.nextToken(.normal); - const begin_token = self.state.token; - try self.check(.begin); - - var children = std.ArrayListUnmanaged(*Node){}; - while (try self.parseVersionBlockOrValue(top_level_version_id_token, nesting_level + 1)) |value_node| { - try children.append(self.state.arena, value_node); - } - - try self.nextToken(.normal); - const end_token = self.state.token; - try self.check(.end); - - const node = try self.state.arena.create(Node.Block); - node.* = .{ - .identifier = keyword_token, - .key = key, - .values = values, - .begin_token = begin_token, - .children = try children.toOwnedSlice(self.state.arena), - .end_token = end_token, - }; - return &node.base; - }, - .value => { - const node = try self.state.arena.create(Node.BlockValue); - node.* = .{ - .identifier = keyword_token, - .key = key, - .values = values, - }; - return &node.base; - }, - } - } - - fn parseBlockValuesList(self: *Self, had_comma_before_first_value: bool) Error![]*Node { - var values = std.ArrayListUnmanaged(*Node){}; - var seen_number: bool = false; - var first_string_value: ?*Node = null; - while (true) { - const lookahead_token = try self.lookaheadToken(.normal); - switch (lookahead_token.id) { - .operator, - .number, - .open_paren, - .quoted_ascii_string, - .quoted_wide_string, - => {}, - else => break, - } - const value = try self.parseExpression(.{}); - - if (value.isNumberExpression()) { - seen_number = true; - } else if (first_string_value == null) { - std.debug.assert(value.isStringLiteral()); - first_string_value = value; - } - - const has_trailing_comma = try self.parseOptionalToken(.comma); - try self.skipAnyCommas(); - - const value_value = try self.state.arena.create(Node.BlockValueValue); - value_value.* = .{ - .expression = value, - .trailing_comma = has_trailing_comma, - }; - try values.append(self.state.arena, &value_value.base); - } - if (seen_number and first_string_value != null) { - // The Win32 RC compiler does some strange stuff with the data size: - // Strings are counted as UTF-16 code units including the null-terminator - // Numbers are counted as their byte lengths - // So, when both strings and numbers are within a single value, - // it incorrectly sets the value's type as binary, but then gives the - // data length as a mixture of bytes and UTF-16 code units. This means that - // when the length is read, it will be treated as byte length and will - // not read the full value. We don't reproduce this behavior, so we warn - // of the miscompilation here. - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_version_value_byte_count, - .type = .warning, - .token = first_string_value.?.getFirstToken(), - .token_span_start = values.items[0].getFirstToken(), - .token_span_end = values.items[values.items.len - 1].getLastToken(), - }); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_version_value_byte_count, - .type = .note, - .token = first_string_value.?.getFirstToken(), - .token_span_start = values.items[0].getFirstToken(), - .token_span_end = values.items[values.items.len - 1].getLastToken(), - .print_source_line = false, - }); - } - if (!had_comma_before_first_value and values.items.len > 0 and values.items[0].cast(.block_value_value).?.expression.isStringLiteral()) { - const token = values.items[0].cast(.block_value_value).?.expression.cast(.literal).?.token; - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_version_value_padding, - .type = .warning, - .token = token, - }); - try self.addErrorDetails(.{ - .err = .rc_would_miscompile_version_value_padding, - .type = .note, - .token = token, - .print_source_line = false, - }); - } - return values.toOwnedSlice(self.state.arena); - } - - fn numberExpressionContainsAnyLSuffixes(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) bool { - // TODO: This could probably be done without evaluating the whole expression - return Compiler.evaluateNumberExpression(expression_node, source, code_page_lookup).is_long; - } - - /// Expects the current token to be a literal token that contains the string LANGUAGE - fn parseLanguageStatement(self: *Self) Error!*Node { - const language_token = self.state.token; - - const primary_language = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - try self.nextToken(.normal); - try self.check(.comma); - - const sublanguage = try self.parseExpression(.{ .allowed_types = .{ .number = true } }); - - // The Win32 RC compiler errors if either parameter contains any number with an L - // suffix. Instead of that, we want to warn and then let the values get truncated. - // The warning is done here to allow the compiler logic to not have to deal with this. - if (numberExpressionContainsAnyLSuffixes(primary_language, self.lexer.buffer, &self.state.input_code_page_lookup)) { - try self.addErrorDetails(.{ - .err = .rc_would_error_u16_with_l_suffix, - .type = .warning, - .token = primary_language.getFirstToken(), - .token_span_end = primary_language.getLastToken(), - .extra = .{ .statement_with_u16_param = .language }, - }); - try self.addErrorDetails(.{ - .err = .rc_would_error_u16_with_l_suffix, - .print_source_line = false, - .type = .note, - .token = primary_language.getFirstToken(), - .token_span_end = primary_language.getLastToken(), - .extra = .{ .statement_with_u16_param = .language }, - }); - } - if (numberExpressionContainsAnyLSuffixes(sublanguage, self.lexer.buffer, &self.state.input_code_page_lookup)) { - try self.addErrorDetails(.{ - .err = .rc_would_error_u16_with_l_suffix, - .type = .warning, - .token = sublanguage.getFirstToken(), - .token_span_end = sublanguage.getLastToken(), - .extra = .{ .statement_with_u16_param = .language }, - }); - try self.addErrorDetails(.{ - .err = .rc_would_error_u16_with_l_suffix, - .print_source_line = false, - .type = .note, - .token = sublanguage.getFirstToken(), - .token_span_end = sublanguage.getLastToken(), - .extra = .{ .statement_with_u16_param = .language }, - }); - } - - const node = try self.state.arena.create(Node.LanguageStatement); - node.* = .{ - .language_token = language_token, - .primary_language_id = primary_language, - .sublanguage_id = sublanguage, - }; - return &node.base; - } - - pub const ParseExpressionOptions = struct { - is_known_to_be_number_expression: bool = false, - can_contain_not_expressions: bool = false, - nesting_context: NestingContext = .{}, - allowed_types: AllowedTypes = .{ .literal = true, .number = true, .string = true }, - expected_types_override: ?ErrorDetails.ExpectedTypes = null, - - pub const AllowedTypes = struct { - literal: bool = false, - number: bool = false, - string: bool = false, - }; - - pub const NestingContext = struct { - first_token: ?Token = null, - last_token: ?Token = null, - level: u32 = 0, - - /// Returns a new NestingContext with values modified appropriately for an increased nesting level - fn incremented(ctx: NestingContext, first_token: Token, most_recent_token: Token) NestingContext { - return .{ - .first_token = ctx.first_token orelse first_token, - .last_token = most_recent_token, - .level = ctx.level + 1, - }; - } - }; - - pub fn toErrorDetails(options: ParseExpressionOptions, token: Token) ErrorDetails { - // TODO: expected_types_override interaction with is_known_to_be_number_expression? - const expected_types = options.expected_types_override orelse ErrorDetails.ExpectedTypes{ - .number = options.allowed_types.number, - .number_expression = options.allowed_types.number, - .string_literal = options.allowed_types.string and !options.is_known_to_be_number_expression, - .literal = options.allowed_types.literal and !options.is_known_to_be_number_expression, - }; - return ErrorDetails{ - .err = .expected_something_else, - .token = token, - .extra = .{ .expected_types = expected_types }, - }; - } - }; - - /// Returns true if the next lookahead token is a number or could be the start of a number expression. - /// Only useful when looking for empty expressions in optional fields. - fn lookaheadCouldBeNumberExpression(self: *Self, not_allowed: enum { not_allowed, not_disallowed }) Error!bool { - var lookahead_token = try self.lookaheadToken(.normal); - switch (lookahead_token.id) { - .literal => if (not_allowed == .not_allowed) { - return std.ascii.eqlIgnoreCase("NOT", lookahead_token.slice(self.lexer.buffer)); - } else return false, - .number => return true, - .open_paren => return true, - .operator => { - // + can be a unary operator, see parseExpression's handling of unary + - const operator_char = lookahead_token.slice(self.lexer.buffer)[0]; - return operator_char == '+'; - }, - else => return false, - } - } - - fn parsePrimary(self: *Self, options: ParseExpressionOptions) Error!*Node { - try self.nextToken(.normal); - const first_token = self.state.token; - var is_close_paren_expression = false; - var is_unary_plus_expression = false; - switch (self.state.token.id) { - .quoted_ascii_string, .quoted_wide_string => { - if (!options.allowed_types.string) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); - const node = try self.state.arena.create(Node.Literal); - node.* = .{ .token = self.state.token }; - return &node.base; - }, - .literal => { - if (options.can_contain_not_expressions and std.ascii.eqlIgnoreCase("NOT", self.state.token.slice(self.lexer.buffer))) { - const not_token = self.state.token; - try self.nextToken(.normal); - try self.check(.number); - if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); - const node = try self.state.arena.create(Node.NotExpression); - node.* = .{ - .not_token = not_token, - .number_token = self.state.token, - }; - return &node.base; - } - if (!options.allowed_types.literal) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); - const node = try self.state.arena.create(Node.Literal); - node.* = .{ .token = self.state.token }; - return &node.base; - }, - .number => { - if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token)); - const node = try self.state.arena.create(Node.Literal); - node.* = .{ .token = self.state.token }; - return &node.base; - }, - .open_paren => { - const open_paren_token = self.state.token; - - const expression = try self.parseExpression(.{ - .is_known_to_be_number_expression = true, - .can_contain_not_expressions = options.can_contain_not_expressions, - .nesting_context = options.nesting_context.incremented(first_token, open_paren_token), - .allowed_types = .{ .number = true }, - }); - - try self.nextToken(.normal); - // TODO: Add context to error about where the open paren is - try self.check(.close_paren); - - if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(open_paren_token)); - const node = try self.state.arena.create(Node.GroupedExpression); - node.* = .{ - .open_token = open_paren_token, - .expression = expression, - .close_token = self.state.token, - }; - return &node.base; - }, - .close_paren => { - // Note: In the Win32 implementation, a single close paren - // counts as a valid "expression", but only when its the first and - // only token in the expression. Such an expression is then treated - // as a 'skip this expression' instruction. For example: - // 1 RCDATA { 1, ), ), ), 2 } - // will be evaluated as if it were `1 RCDATA { 1, 2 }` and only - // 0x0001 and 0x0002 will be written to the .res data. - // - // This behavior is not emulated because it almost certainly has - // no valid use cases and only introduces edge cases that are - // not worth the effort to track down and deal with. Instead, - // we error but also add a note about the Win32 RC behavior if - // this edge case is detected. - if (!options.is_known_to_be_number_expression) { - is_close_paren_expression = true; - } - }, - .operator => { - // In the Win32 implementation, something akin to a unary + - // is allowed but it doesn't behave exactly like a unary +. - // Instead of emulating the Win32 behavior, we instead error - // and add a note about unary plus not being allowed. - // - // This is done because unary + only works in some places, - // and there's no real use-case for it since it's so limited - // in how it can be used (e.g. +1 is accepted but (+1) will error) - // - // Even understanding when unary plus is allowed is difficult, so - // we don't do any fancy detection of when the Win32 RC compiler would - // allow a unary + and instead just output the note in all cases. - // - // Some examples of allowed expressions by the Win32 compiler: - // +1 - // 0|+5 - // +1+2 - // +~-5 - // +(1) - // - // Some examples of disallowed expressions by the Win32 compiler: - // (+1) - // ++5 - // - // TODO: Potentially re-evaluate and support the unary plus in a bug-for-bug - // compatible way. - const operator_char = self.state.token.slice(self.lexer.buffer)[0]; - if (operator_char == '+') { - is_unary_plus_expression = true; - } - }, - else => {}, - } - - try self.addErrorDetails(options.toErrorDetails(self.state.token)); - if (is_close_paren_expression) { - try self.addErrorDetails(ErrorDetails{ - .err = .close_paren_expression, - .type = .note, - .token = self.state.token, - .print_source_line = false, - }); - } - if (is_unary_plus_expression) { - try self.addErrorDetails(ErrorDetails{ - .err = .unary_plus_expression, - .type = .note, - .token = self.state.token, - .print_source_line = false, - }); - } - return error.ParseError; - } - - /// Expects the current token to have already been dealt with, and that the - /// expression will start on the next token. - /// After return, the current token will have been dealt with. - fn parseExpression(self: *Self, options: ParseExpressionOptions) Error!*Node { - if (options.nesting_context.level > max_nested_expression_level) { - try self.addErrorDetails(.{ - .err = .nested_expression_level_exceeds_max, - .token = options.nesting_context.first_token.?, - }); - return self.addErrorDetailsAndFail(.{ - .err = .nested_expression_level_exceeds_max, - .type = .note, - .token = options.nesting_context.last_token.?, - }); - } - var expr: *Node = try self.parsePrimary(options); - const first_token = expr.getFirstToken(); - - // Non-number expressions can't have operators, so we can just return - if (!expr.isNumberExpression()) return expr; - - while (try self.parseOptionalTokenAdvanced(.operator, .normal_expect_operator)) { - const operator = self.state.token; - const rhs_node = try self.parsePrimary(.{ - .is_known_to_be_number_expression = true, - .can_contain_not_expressions = options.can_contain_not_expressions, - .nesting_context = options.nesting_context.incremented(first_token, operator), - .allowed_types = options.allowed_types, - }); - - if (!rhs_node.isNumberExpression()) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_something_else, - .token = rhs_node.getFirstToken(), - .token_span_end = rhs_node.getLastToken(), - .extra = .{ .expected_types = .{ - .number = true, - .number_expression = true, - } }, - }); - } - - const node = try self.state.arena.create(Node.BinaryExpression); - node.* = .{ - .left = expr, - .operator = operator, - .right = rhs_node, - }; - expr = &node.base; - } - - return expr; - } - - /// Skips any amount of commas (including zero) - /// In other words, it will skip the regex `,*` - /// Assumes the token(s) should be parsed with `.normal` as the method. - fn skipAnyCommas(self: *Self) !void { - while (try self.parseOptionalToken(.comma)) {} - } - - /// Advances the current token only if the token's id matches the specified `id`. - /// Assumes the token should be parsed with `.normal` as the method. - /// Returns true if the token matched, false otherwise. - fn parseOptionalToken(self: *Self, id: Token.Id) Error!bool { - return self.parseOptionalTokenAdvanced(id, .normal); - } - - /// Advances the current token only if the token's id matches the specified `id`. - /// Returns true if the token matched, false otherwise. - fn parseOptionalTokenAdvanced(self: *Self, id: Token.Id, comptime method: Lexer.LexMethod) Error!bool { - const maybe_token = try self.lookaheadToken(method); - if (maybe_token.id != id) return false; - self.nextToken(method) catch unreachable; - return true; - } - - fn addErrorDetails(self: *Self, details: ErrorDetails) Allocator.Error!void { - try self.state.diagnostics.append(details); - } - - fn addErrorDetailsAndFail(self: *Self, details: ErrorDetails) Error { - try self.addErrorDetails(details); - return error.ParseError; - } - - fn nextToken(self: *Self, comptime method: Lexer.LexMethod) Error!void { - self.state.token = token: while (true) { - const token = self.lexer.next(method) catch |err| switch (err) { - error.CodePagePragmaInIncludedFile => { - // The Win32 RC compiler silently ignores such `#pragma code_point` directives, - // but we want to both ignore them *and* emit a warning - try self.addErrorDetails(.{ - .err = .code_page_pragma_in_included_file, - .type = .warning, - .token = self.lexer.error_context_token.?, - }); - continue; - }, - error.CodePagePragmaInvalidCodePage => { - var details = self.lexer.getErrorDetails(err); - if (!self.options.warn_instead_of_error_on_invalid_code_page) { - return self.addErrorDetailsAndFail(details); - } - details.type = .warning; - try self.addErrorDetails(details); - continue; - }, - error.InvalidDigitCharacterInNumberLiteral => { - const details = self.lexer.getErrorDetails(err); - try self.addErrorDetails(details); - return self.addErrorDetailsAndFail(.{ - .err = details.err, - .type = .note, - .token = details.token, - .print_source_line = false, - }); - }, - else => return self.addErrorDetailsAndFail(self.lexer.getErrorDetails(err)), - }; - break :token token; - }; - // After every token, set the input code page for its line - try self.state.input_code_page_lookup.setForToken(self.state.token, self.lexer.current_code_page); - // But only set the output code page to the current code page if we are past the first code_page pragma in the file. - // Otherwise, we want to fill the lookup using the default code page so that lookups still work for lines that - // don't have an explicit output code page set. - const output_code_page = if (self.lexer.seen_pragma_code_pages > 1) self.lexer.current_code_page else self.state.output_code_page_lookup.default_code_page; - try self.state.output_code_page_lookup.setForToken(self.state.token, output_code_page); - } - - fn lookaheadToken(self: *Self, comptime method: Lexer.LexMethod) Error!Token { - self.state.lookahead_lexer = self.lexer.*; - return token: while (true) { - break :token self.state.lookahead_lexer.next(method) catch |err| switch (err) { - // Ignore this error and get the next valid token, we'll deal with this - // properly when getting the token for real - error.CodePagePragmaInIncludedFile => continue, - else => return self.addErrorDetailsAndFail(self.state.lookahead_lexer.getErrorDetails(err)), - }; - }; - } - - fn tokenSlice(self: *Self) []const u8 { - return self.state.token.slice(self.lexer.buffer); - } - - /// Check that the current token is something that can be used as an ID - fn checkId(self: *Self) !void { - switch (self.state.token.id) { - .literal => {}, - else => { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_token, - .token = self.state.token, - .extra = .{ .expected = .literal }, - }); - }, - } - } - - fn check(self: *Self, expected_token_id: Token.Id) !void { - if (self.state.token.id != expected_token_id) { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_token, - .token = self.state.token, - .extra = .{ .expected = expected_token_id }, - }); - } - } - - fn checkResource(self: *Self) !Resource { - switch (self.state.token.id) { - .literal => return Resource.fromString(.{ - .slice = self.state.token.slice(self.lexer.buffer), - .code_page = self.lexer.current_code_page, - }), - else => { - return self.addErrorDetailsAndFail(ErrorDetails{ - .err = .expected_token, - .token = self.state.token, - .extra = .{ .expected = .literal }, - }); - }, - } - } -}; diff --git a/src/resinator/preprocess.zig b/src/resinator/preprocess.zig @@ -1,100 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const Allocator = std.mem.Allocator; -const cli = @import("cli.zig"); - -pub const IncludeArgs = struct { - clang_target: ?[]const u8 = null, - system_include_paths: []const []const u8, - /// Should be set to `true` when -target has the GNU abi - /// (either because `clang_target` has `-gnu` or `-target` - /// is appended via other means and it has `-gnu`) - needs_gnu_workaround: bool = false, - nostdinc: bool = false, - - pub const IncludeAbi = enum { - msvc, - gnu, - }; -}; - -/// `arena` is used for temporary -D argument strings and the INCLUDE environment variable. -/// The arena should be kept alive at least as long as `argv`. -pub fn appendClangArgs(arena: Allocator, argv: *std.ArrayList([]const u8), options: cli.Options, include_args: IncludeArgs) !void { - try argv.appendSlice(&[_][]const u8{ - "-E", // preprocessor only - "--comments", - "-fuse-line-directives", // #line <num> instead of # <num> - // TODO: could use --trace-includes to give info about what's included from where - "-xc", // output c - // TODO: Turn this off, check the warnings, and convert the spaces back to NUL - "-Werror=null-character", // error on null characters instead of converting them to spaces - // TODO: could remove -Werror=null-character and instead parse warnings looking for 'warning: null character ignored' - // since the only real problem is when clang doesn't preserve null characters - //"-Werror=invalid-pp-token", // will error on unfinished string literals - // TODO: could use -Werror instead - "-fms-compatibility", // Allow things like "header.h" to be resolved relative to the 'root' .rc file, among other things - // https://learn.microsoft.com/en-us/windows/win32/menurc/predefined-macros - "-DRC_INVOKED", - }); - for (options.extra_include_paths.items) |extra_include_path| { - try argv.append("-I"); - try argv.append(extra_include_path); - } - - if (include_args.nostdinc) { - try argv.append("-nostdinc"); - } - for (include_args.system_include_paths) |include_path| { - try argv.append("-isystem"); - try argv.append(include_path); - } - if (include_args.clang_target) |target| { - try argv.append("-target"); - try argv.append(target); - } - // Using -fms-compatibility and targeting the GNU abi interact in a strange way: - // - Targeting the GNU abi stops _MSC_VER from being defined - // - Passing -fms-compatibility stops __GNUC__ from being defined - // Neither being defined is a problem for things like MinGW's vadefs.h, - // which will fail during preprocessing if neither are defined. - // So, when targeting the GNU abi, we need to force __GNUC__ to be defined. - // - // TODO: This is a workaround that should be removed if possible. - if (include_args.needs_gnu_workaround) { - // This is the same default gnuc version that Clang uses: - // https://github.com/llvm/llvm-project/blob/4b5366c9512aa273a5272af1d833961e1ed156e7/clang/lib/Driver/ToolChains/Clang.cpp#L6738 - try argv.append("-fgnuc-version=4.2.1"); - } - - if (!options.ignore_include_env_var) { - const INCLUDE = std.process.getEnvVarOwned(arena, "INCLUDE") catch ""; - - // The only precedence here is llvm-rc which also uses the platform-specific - // delimiter. There's no precedence set by `rc.exe` since it's Windows-only. - const delimiter = switch (builtin.os.tag) { - .windows => ';', - else => ':', - }; - var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter); - while (it.next()) |include_path| { - try argv.append("-isystem"); - try argv.append(include_path); - } - } - - var symbol_it = options.symbols.iterator(); - while (symbol_it.next()) |entry| { - switch (entry.value_ptr.*) { - .define => |value| { - try argv.append("-D"); - const define_arg = try std.fmt.allocPrint(arena, "{s}={s}", .{ entry.key_ptr.*, value }); - try argv.append(define_arg); - }, - .undefine => { - try argv.append("-U"); - try argv.append(entry.key_ptr.*); - }, - } - } -} diff --git a/src/resinator/res.zig b/src/resinator/res.zig @@ -1,1107 +0,0 @@ -const std = @import("std"); -const rc = @import("rc.zig"); -const Resource = rc.Resource; -const CommonResourceAttributes = rc.CommonResourceAttributes; -const Allocator = std.mem.Allocator; -const windows1252 = @import("windows1252.zig"); -const CodePage = @import("code_pages.zig").CodePage; -const literals = @import("literals.zig"); -const SourceBytes = literals.SourceBytes; -const Codepoint = @import("code_pages.zig").Codepoint; -const lang = @import("lang.zig"); -const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit; - -/// https://learn.microsoft.com/en-us/windows/win32/menurc/resource-types -pub const RT = enum(u8) { - ACCELERATOR = 9, - ANICURSOR = 21, - ANIICON = 22, - BITMAP = 2, - CURSOR = 1, - DIALOG = 5, - DLGINCLUDE = 17, - DLGINIT = 240, - FONT = 8, - FONTDIR = 7, - GROUP_CURSOR = 1 + 11, // CURSOR + 11 - GROUP_ICON = 3 + 11, // ICON + 11 - HTML = 23, - ICON = 3, - MANIFEST = 24, - MENU = 4, - MESSAGETABLE = 11, - PLUGPLAY = 19, - RCDATA = 10, - STRING = 6, - TOOLBAR = 241, - VERSION = 16, - VXD = 20, - _, - - /// Returns null if the resource type is user-defined - /// Asserts that the resource is not `stringtable` - pub fn fromResource(resource: Resource) ?RT { - return switch (resource) { - .accelerators => .ACCELERATOR, - .bitmap => .BITMAP, - .cursor => .GROUP_CURSOR, - .dialog => .DIALOG, - .dialogex => .DIALOG, - .dlginclude => .DLGINCLUDE, - .dlginit => .DLGINIT, - .font => .FONT, - .html => .HTML, - .icon => .GROUP_ICON, - .menu => .MENU, - .menuex => .MENU, - .messagetable => .MESSAGETABLE, - .plugplay => .PLUGPLAY, - .rcdata => .RCDATA, - .stringtable => unreachable, - .toolbar => .TOOLBAR, - .user_defined => null, - .versioninfo => .VERSION, - .vxd => .VXD, - - .cursor_num => .CURSOR, - .icon_num => .ICON, - .string_num => .STRING, - .anicursor_num => .ANICURSOR, - .aniicon_num => .ANIICON, - .fontdir_num => .FONTDIR, - .manifest_num => .MANIFEST, - }; - } -}; - -/// https://learn.microsoft.com/en-us/windows/win32/menurc/common-resource-attributes -/// https://learn.microsoft.com/en-us/windows/win32/menurc/resourceheader -pub const MemoryFlags = packed struct(u16) { - value: u16, - - pub const MOVEABLE: u16 = 0x10; - // TODO: SHARED and PURE seem to be the same thing? Testing seems to confirm this but - // would like to find mention of it somewhere. - pub const SHARED: u16 = 0x20; - pub const PURE: u16 = 0x20; - pub const PRELOAD: u16 = 0x40; - pub const DISCARDABLE: u16 = 0x1000; - - /// Note: The defaults can have combinations that are not possible to specify within - /// an .rc file, as the .rc attributes imply other values (i.e. specifying - /// DISCARDABLE always implies MOVEABLE and PURE/SHARED, and yet RT_ICON - /// has a default of only MOVEABLE | DISCARDABLE). - pub fn defaults(predefined_resource_type: ?RT) MemoryFlags { - if (predefined_resource_type == null) { - return MemoryFlags{ .value = MOVEABLE | SHARED }; - } else { - return switch (predefined_resource_type.?) { - // zig fmt: off - .RCDATA, .BITMAP, .HTML, .MANIFEST, - .ACCELERATOR, .VERSION, .MESSAGETABLE, - .DLGINIT, .TOOLBAR, .PLUGPLAY, - .VXD, => MemoryFlags{ .value = MOVEABLE | SHARED }, - - .GROUP_ICON, .GROUP_CURSOR, - .STRING, .FONT, .DIALOG, .MENU, - .DLGINCLUDE, => MemoryFlags{ .value = MOVEABLE | SHARED | DISCARDABLE }, - - .ICON, .CURSOR, .ANIICON, .ANICURSOR => MemoryFlags{ .value = MOVEABLE | DISCARDABLE }, - .FONTDIR => MemoryFlags{ .value = MOVEABLE | PRELOAD }, - // zig fmt: on - // Same as predefined_resource_type == null - _ => return MemoryFlags{ .value = MOVEABLE | SHARED }, - }; - } - } - - pub fn set(self: *MemoryFlags, attribute: CommonResourceAttributes) void { - switch (attribute) { - .preload => self.value |= PRELOAD, - .loadoncall => self.value &= ~PRELOAD, - .moveable => self.value |= MOVEABLE, - .fixed => self.value &= ~(MOVEABLE | DISCARDABLE), - .shared => self.value |= SHARED, - .nonshared => self.value &= ~(SHARED | DISCARDABLE), - .pure => self.value |= PURE, - .impure => self.value &= ~(PURE | DISCARDABLE), - .discardable => self.value |= DISCARDABLE | MOVEABLE | PURE, - } - } - - pub fn setGroup(self: *MemoryFlags, attribute: CommonResourceAttributes, implied_shared_or_pure: bool) void { - switch (attribute) { - .preload => { - self.value |= PRELOAD; - if (implied_shared_or_pure) self.value &= ~SHARED; - }, - .loadoncall => { - self.value &= ~PRELOAD; - if (implied_shared_or_pure) self.value |= SHARED; - }, - else => self.set(attribute), - } - } -}; - -/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers -pub const Language = packed struct(u16) { - // Note: This is the default no matter what locale the current system is set to, - // e.g. even if the system's locale is en-GB, en-US will still be the - // default language for resources in the Win32 rc compiler. - primary_language_id: u10 = lang.LANG_ENGLISH, - sublanguage_id: u6 = lang.SUBLANG_ENGLISH_US, - - /// Default language ID as a u16 - pub const default: u16 = (Language{}).asInt(); - - pub fn fromInt(int: u16) Language { - return @bitCast(int); - } - - pub fn asInt(self: Language) u16 { - return @bitCast(self); - } -}; - -/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-dlgitemtemplate#remarks -pub const ControlClass = enum(u16) { - button = 0x80, - edit = 0x81, - static = 0x82, - listbox = 0x83, - scrollbar = 0x84, - combobox = 0x85, - - pub fn fromControl(control: rc.Control) ?ControlClass { - return switch (control) { - // zig fmt: off - .auto3state, .autocheckbox, .autoradiobutton, - .checkbox, .defpushbutton, .groupbox, .pushbox, - .pushbutton, .radiobutton, .state3, .userbutton => .button, - // zig fmt: on - .combobox => .combobox, - .control => null, - .ctext, .icon, .ltext, .rtext => .static, - .edittext, .hedit, .iedit => .edit, - .listbox => .listbox, - .scrollbar => .scrollbar, - }; - } - - pub fn getImpliedStyle(control: rc.Control) u32 { - var style = WS.CHILD | WS.VISIBLE; - switch (control) { - .auto3state => style |= BS.AUTO3STATE | WS.TABSTOP, - .autocheckbox => style |= BS.AUTOCHECKBOX | WS.TABSTOP, - .autoradiobutton => style |= BS.AUTORADIOBUTTON, - .checkbox => style |= BS.CHECKBOX | WS.TABSTOP, - .combobox => {}, - .control => {}, - .ctext => style |= SS.CENTER | WS.GROUP, - .defpushbutton => style |= BS.DEFPUSHBUTTON | WS.TABSTOP, - .edittext, .hedit, .iedit => style |= WS.TABSTOP | WS.BORDER, - .groupbox => style |= BS.GROUPBOX, - .icon => style |= SS.ICON, - .listbox => style |= LBS.NOTIFY | WS.BORDER, - .ltext => style |= WS.GROUP, - .pushbox => style |= BS.PUSHBOX | WS.TABSTOP, - .pushbutton => style |= WS.TABSTOP, - .radiobutton => style |= BS.RADIOBUTTON, - .rtext => style |= SS.RIGHT | WS.GROUP, - .scrollbar => {}, - .state3 => style |= BS.@"3STATE" | WS.TABSTOP, - .userbutton => style |= BS.USERBUTTON | WS.TABSTOP, - } - return style; - } -}; - -pub const NameOrOrdinal = union(enum) { - // UTF-16 LE - name: [:0]const u16, - ordinal: u16, - - pub fn deinit(self: NameOrOrdinal, allocator: Allocator) void { - switch (self) { - .name => |name| { - allocator.free(name); - }, - .ordinal => {}, - } - } - - /// Returns the full length of the amount of bytes that would be written by `write` - /// (e.g. for an ordinal it will return the length including the 0xFFFF indicator) - pub fn byteLen(self: NameOrOrdinal) usize { - switch (self) { - .name => |name| { - // + 1 for 0-terminated - return (name.len + 1) * @sizeOf(u16); - }, - .ordinal => return 4, - } - } - - pub fn write(self: NameOrOrdinal, writer: anytype) !void { - switch (self) { - .name => |name| { - try writer.writeAll(std.mem.sliceAsBytes(name[0 .. name.len + 1])); - }, - .ordinal => |ordinal| { - try writer.writeInt(u16, 0xffff, .little); - try writer.writeInt(u16, ordinal, .little); - }, - } - } - - pub fn writeEmpty(writer: anytype) !void { - try writer.writeInt(u16, 0, .little); - } - - pub fn fromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { - if (maybeOrdinalFromString(bytes)) |ordinal| { - return ordinal; - } - return nameFromString(allocator, bytes); - } - - pub fn nameFromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal { - // Names have a limit of 256 UTF-16 code units + null terminator - var buf = try std.ArrayList(u16).initCapacity(allocator, @min(257, bytes.slice.len)); - errdefer buf.deinit(); - - var i: usize = 0; - while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { - if (buf.items.len == 256) break; - - const c = codepoint.value; - if (c == Codepoint.invalid) { - try buf.append(std.mem.nativeToLittle(u16, '�')); - } else if (c < 0x7F) { - // ASCII chars in names are always converted to uppercase - try buf.append(std.mem.nativeToLittle(u16, std.ascii.toUpper(@intCast(c)))); - } else if (c < 0x10000) { - const short: u16 = @intCast(c); - try buf.append(std.mem.nativeToLittle(u16, short)); - } else { - const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800; - try buf.append(std.mem.nativeToLittle(u16, high)); - - // Note: This can cut-off in the middle of a UTF-16 surrogate pair, - // i.e. it can make the string end with an unpaired high surrogate - if (buf.items.len == 256) break; - - const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00; - try buf.append(std.mem.nativeToLittle(u16, low)); - } - } - - return NameOrOrdinal{ .name = try buf.toOwnedSliceSentinel(0) }; - } - - /// Returns `null` if the bytes do not form a valid number. - /// Does not allow non-ASCII digits (which the Win32 RC compiler does allow - /// in base 10 numbers, see `maybeNonAsciiOrdinalFromString`). - pub fn maybeOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { - var buf = bytes.slice; - var radix: u8 = 10; - if (buf.len > 2 and buf[0] == '0') { - switch (buf[1]) { - '0'...'9' => {}, - 'x', 'X' => { - radix = 16; - buf = buf[2..]; - // only the first 4 hex digits matter, anything else is ignored - // i.e. 0x12345 is treated as if it were 0x1234 - buf.len = @min(buf.len, 4); - }, - else => return null, - } - } - - var i: usize = 0; - var result: u16 = 0; - while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { - const c = codepoint.value; - const digit: u8 = switch (c) { - 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch switch (radix) { - 10 => return null, - // non-hex-digits are treated as a terminator rather than invalidating - // the number (note: if there are no valid hex digits then the result - // will be zero which is not treated as a valid number) - 16 => break, - else => unreachable, - }, - else => if (radix == 10) return null else break, - }; - - if (result != 0) { - result *%= radix; - } - result +%= digit; - } - - // Anything that resolves to zero is not interpretted as a number - if (result == 0) return null; - return NameOrOrdinal{ .ordinal = result }; - } - - /// The Win32 RC compiler uses `iswdigit` for digit detection for base 10 - /// numbers, which means that non-ASCII digits are 'accepted' but handled - /// in a totally unintuitive manner, leading to arbitrary results. - /// - /// This function will return the value that such an ordinal 'would' have - /// if it was run through the Win32 RC compiler. This allows us to disallow - /// non-ASCII digits in number literals but still detect when the Win32 - /// RC compiler would have allowed them, so that a proper warning/error - /// can be emitted. - pub fn maybeNonAsciiOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal { - const buf = bytes.slice; - const radix = 10; - if (buf.len > 2 and buf[0] == '0') { - switch (buf[1]) { - // We only care about base 10 numbers here - 'x', 'X' => return null, - else => {}, - } - } - - var i: usize = 0; - var result: u16 = 0; - while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) { - const c = codepoint.value; - const digit: u16 = digit: { - const is_digit = (c >= '0' and c <= '9') or isNonAsciiDigit(c); - if (!is_digit) return null; - break :digit @intCast(c - '0'); - }; - - if (result != 0) { - result *%= radix; - } - result +%= digit; - } - - // Anything that resolves to zero is not interpretted as a number - if (result == 0) return null; - return NameOrOrdinal{ .ordinal = result }; - } - - pub fn predefinedResourceType(self: NameOrOrdinal) ?RT { - switch (self) { - .ordinal => |ordinal| { - if (ordinal >= 256) return null; - switch (@as(RT, @enumFromInt(ordinal))) { - .ACCELERATOR, - .ANICURSOR, - .ANIICON, - .BITMAP, - .CURSOR, - .DIALOG, - .DLGINCLUDE, - .DLGINIT, - .FONT, - .FONTDIR, - .GROUP_CURSOR, - .GROUP_ICON, - .HTML, - .ICON, - .MANIFEST, - .MENU, - .MESSAGETABLE, - .PLUGPLAY, - .RCDATA, - .STRING, - .TOOLBAR, - .VERSION, - .VXD, - => |rt| return rt, - _ => return null, - } - }, - .name => return null, - } - } -}; - -fn expectNameOrOrdinal(expected: NameOrOrdinal, actual: NameOrOrdinal) !void { - switch (expected) { - .name => { - if (actual != .name) return error.TestExpectedEqual; - try std.testing.expectEqualSlices(u16, expected.name, actual.name); - }, - .ordinal => { - if (actual != .ordinal) return error.TestExpectedEqual; - try std.testing.expectEqual(expected.ordinal, actual.ordinal); - }, - } -} - -test "NameOrOrdinal" { - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - - const allocator = arena.allocator(); - - // zero is treated as a string - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "0", .code_page = .windows1252 }), - ); - // any non-digit byte invalidates the number - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1A") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "1a", .code_page = .windows1252 }), - ); - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1ÿ") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "1\xff", .code_page = .windows1252 }), - ); - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1€") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "1€", .code_page = .utf8 }), - ); - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1�") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "1\x80", .code_page = .utf8 }), - ); - // same with overflow that resolves to 0 - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("65536") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "65536", .code_page = .windows1252 }), - ); - // hex zero is also treated as a string - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0X0") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "0x0", .code_page = .windows1252 }), - ); - // hex numbers work - try expectNameOrOrdinal( - NameOrOrdinal{ .ordinal = 0x100 }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "0x100", .code_page = .windows1252 }), - ); - // only the first 4 hex digits matter - try expectNameOrOrdinal( - NameOrOrdinal{ .ordinal = 0x1234 }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "0X12345", .code_page = .windows1252 }), - ); - // octal is not supported so it gets treated as a string - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0O1234") }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "0o1234", .code_page = .windows1252 }), - ); - // overflow wraps - try expectNameOrOrdinal( - NameOrOrdinal{ .ordinal = @truncate(65635) }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "65635", .code_page = .windows1252 }), - ); - // non-hex-digits in a hex literal are treated as a terminator - try expectNameOrOrdinal( - NameOrOrdinal{ .ordinal = 0x4 }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "0x4n", .code_page = .windows1252 }), - ); - try expectNameOrOrdinal( - NameOrOrdinal{ .ordinal = 0xFA }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "0xFAZ92348", .code_page = .windows1252 }), - ); - // 0 at the start is allowed - try expectNameOrOrdinal( - NameOrOrdinal{ .ordinal = 50 }, - try NameOrOrdinal.fromString(allocator, .{ .slice = "050", .code_page = .windows1252 }), - ); - // limit of 256 UTF-16 code units, can cut off between a surrogate pair - { - var expected = blk: { - // the input before the 𐐷 character, but uppercased - const expected_u8_bytes = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528QFFL7SHNSIETG0QKLR1UYPBTUV1PMFQRRA0VJDG354GQEDJMUPGPP1W1EXVNTZVEIZ6K3IPQM1AWGEYALMEODYVEZGOD3MFMGEY8FNR4JUETTB1PZDEWSNDRGZUA8SNXP3NGO"; - var buf: [256:0]u16 = undefined; - for (expected_u8_bytes, 0..) |byte, i| { - buf[i] = std.mem.nativeToLittle(u16, byte); - } - // surrogate pair that is now orphaned - buf[255] = std.mem.nativeToLittle(u16, 0xD801); - break :blk buf; - }; - try expectNameOrOrdinal( - NameOrOrdinal{ .name = &expected }, - try NameOrOrdinal.fromString(allocator, .{ - .slice = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528qffL7ShnSIETg0qkLr1UYpbtuv1PMFQRRa0VjDG354GQedJmUPgpp1w1ExVnTzVEiz6K3iPqM1AWGeYALmeODyvEZGOD3MfmGey8fnR4jUeTtB1PzdeWsNDrGzuA8Snxp3NGO𐐷", - .code_page = .utf8, - }), - ); - } -} - -test "NameOrOrdinal code page awareness" { - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - - const allocator = arena.allocator(); - - try expectNameOrOrdinal( - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("��𐐷") }, - try NameOrOrdinal.fromString(allocator, .{ - .slice = "\xF0\x80\x80𐐷", - .code_page = .utf8, - }), - ); - try expectNameOrOrdinal( - // The UTF-8 representation of 𐐷 is 0xF0 0x90 0x90 0xB7. In order to provide valid - // UTF-8 to utf8ToUtf16LeStringLiteral, it uses the UTF-8 representation of the codepoint - // <U+0x90> which is 0xC2 0x90. The code units in the expected UTF-16 string are: - // { 0x00F0, 0x20AC, 0x20AC, 0x00F0, 0x0090, 0x0090, 0x00B7 } - NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("ð€€ð\xC2\x90\xC2\x90·") }, - try NameOrOrdinal.fromString(allocator, .{ - .slice = "\xF0\x80\x80𐐷", - .code_page = .windows1252, - }), - ); -} - -/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-accel#members -/// https://devblogs.microsoft.com/oldnewthing/20070316-00/?p=27593 -pub const AcceleratorModifiers = struct { - value: u8 = 0, - explicit_ascii_or_virtkey: bool = false, - - pub const ASCII = 0; - pub const VIRTKEY = 1; - pub const NOINVERT = 1 << 1; - pub const SHIFT = 1 << 2; - pub const CONTROL = 1 << 3; - pub const ALT = 1 << 4; - /// Marker for the last accelerator in an accelerator table - pub const last_accelerator_in_table = 1 << 7; - - pub fn apply(self: *AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) void { - if (modifier == .ascii or modifier == .virtkey) self.explicit_ascii_or_virtkey = true; - self.value |= modifierValue(modifier); - } - - pub fn isSet(self: AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) bool { - // ASCII is set whenever VIRTKEY is not - if (modifier == .ascii) return self.value & modifierValue(.virtkey) == 0; - return self.value & modifierValue(modifier) != 0; - } - - fn modifierValue(modifier: rc.AcceleratorTypeAndOptions) u8 { - return switch (modifier) { - .ascii => ASCII, - .virtkey => VIRTKEY, - .noinvert => NOINVERT, - .shift => SHIFT, - .control => CONTROL, - .alt => ALT, - }; - } - - pub fn markLast(self: *AcceleratorModifiers) void { - self.value |= last_accelerator_in_table; - } -}; - -const AcceleratorKeyCodepointTranslator = struct { - string_type: literals.StringType, - - pub fn translate(self: @This(), maybe_parsed: ?literals.IterativeStringParser.ParsedCodepoint) ?u21 { - const parsed = maybe_parsed orelse return null; - if (parsed.codepoint == Codepoint.invalid) return 0xFFFD; - if (parsed.from_escaped_integer and self.string_type == .ascii) { - return windows1252.toCodepoint(@intCast(parsed.codepoint)); - } - return parsed.codepoint; - } -}; - -pub const ParseAcceleratorKeyStringError = error{ EmptyAccelerator, AcceleratorTooLong, InvalidControlCharacter, ControlCharacterOutOfRange }; - -/// Expects bytes to be the full bytes of a string literal token (e.g. including the "" or L""). -pub fn parseAcceleratorKeyString(bytes: SourceBytes, is_virt: bool, options: literals.StringParseOptions) (ParseAcceleratorKeyStringError || Allocator.Error)!u16 { - if (bytes.slice.len == 0) { - return error.EmptyAccelerator; - } - - var parser = literals.IterativeStringParser.init(bytes, options); - var translator = AcceleratorKeyCodepointTranslator{ .string_type = parser.declared_string_type }; - - const first_codepoint = translator.translate(try parser.next()) orelse return error.EmptyAccelerator; - // 0 is treated as a terminator, so this is equivalent to an empty string - if (first_codepoint == 0) return error.EmptyAccelerator; - - if (first_codepoint == '^') { - // Note: Emitting this warning unconditonally whenever ^ is the first character - // matches the Win32 RC behavior, but it's questionable whether or not - // the warning should be emitted for ^^ since that results in the ASCII - // character ^ being written to the .res. - if (is_virt and options.diagnostics != null) { - try options.diagnostics.?.diagnostics.append(.{ - .err = .ascii_character_not_equivalent_to_virtual_key_code, - .type = .warning, - .token = options.diagnostics.?.token, - }); - } - - const c = translator.translate(try parser.next()) orelse return error.InvalidControlCharacter; - switch (c) { - '^' => return '^', // special case - 'a'...'z', 'A'...'Z' => return std.ascii.toUpper(@intCast(c)) - 0x40, - // Note: The Windows RC compiler allows more than just A-Z, but what it allows - // seems to be tied to some sort of Unicode-aware 'is character' function or something. - // The full list of codepoints that trigger an out-of-range error can be found here: - // https://gist.github.com/squeek502/2e9d0a4728a83eed074ad9785a209fd0 - // For codepoints >= 0x80 that don't trigger the error, the Windows RC compiler takes the - // codepoint and does the `- 0x40` transformation as if it were A-Z which couldn't lead - // to anything useable, so there's no point in emulating that behavior--erroring for - // all non-[a-zA-Z] makes much more sense and is what was probably intended by the - // Windows RC compiler. - else => return error.ControlCharacterOutOfRange, - } - @compileError("this should be unreachable"); - } - - const second_codepoint = translator.translate(try parser.next()); - - var result: u32 = initial_value: { - if (first_codepoint >= 0x10000) { - if (second_codepoint != null and second_codepoint.? != 0) return error.AcceleratorTooLong; - // No idea why it works this way, but this seems to match the Windows RC - // behavior for codepoints >= 0x10000 - const low = @as(u16, @intCast(first_codepoint & 0x3FF)) + 0xDC00; - const extra = (first_codepoint - 0x10000) / 0x400; - break :initial_value low + extra * 0x100; - } - break :initial_value first_codepoint; - }; - - // 0 is treated as a terminator - if (second_codepoint != null and second_codepoint.? == 0) return @truncate(result); - - const third_codepoint = translator.translate(try parser.next()); - // 0 is treated as a terminator, so a 0 in the third position is fine but - // anything else is too many codepoints for an accelerator - if (third_codepoint != null and third_codepoint.? != 0) return error.AcceleratorTooLong; - - if (second_codepoint) |c| { - if (c >= 0x10000) return error.AcceleratorTooLong; - result <<= 8; - result += c; - } else if (is_virt) { - switch (result) { - 'a'...'z' => result -= 0x20, // toUpper - else => {}, - } - } - return @truncate(result); -} - -test "accelerator keys" { - try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( - .{ .slice = "\"^a\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString( - .{ .slice = "\"^A\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 26), try parseAcceleratorKeyString( - .{ .slice = "\"^Z\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, '^'), try parseAcceleratorKeyString( - .{ .slice = "\"^^\"", .code_page = .windows1252 }, - false, - .{}, - )); - - try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( - .{ .slice = "\"a\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 0x6162), try parseAcceleratorKeyString( - .{ .slice = "\"ab\"", .code_page = .windows1252 }, - false, - .{}, - )); - - try std.testing.expectEqual(@as(u16, 'C'), try parseAcceleratorKeyString( - .{ .slice = "\"c\"", .code_page = .windows1252 }, - true, - .{}, - )); - try std.testing.expectEqual(@as(u16, 0x6363), try parseAcceleratorKeyString( - .{ .slice = "\"cc\"", .code_page = .windows1252 }, - true, - .{}, - )); - - // \x00 or any escape that evaluates to zero acts as a terminator, everything past it - // is ignored - try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString( - .{ .slice = "\"a\\0bcdef\"", .code_page = .windows1252 }, - false, - .{}, - )); - - // \x80 is € in Windows-1252, which is Unicode codepoint 20AC - try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( - .{ .slice = "\"\x80\"", .code_page = .windows1252 }, - false, - .{}, - )); - // This depends on the code page, though, with codepage 65001, \x80 - // on its own is invalid UTF-8 so it gets converted to the replacement character - try std.testing.expectEqual(@as(u16, 0xFFFD), try parseAcceleratorKeyString( - .{ .slice = "\"\x80\"", .code_page = .utf8 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( - .{ .slice = "\"\x80\x80\"", .code_page = .windows1252 }, - false, - .{}, - )); - // This also behaves the same with escaped characters - try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( - .{ .slice = "\"\\x80\"", .code_page = .windows1252 }, - false, - .{}, - )); - // Even with utf8 code page - try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString( - .{ .slice = "\"\\x80\"", .code_page = .utf8 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( - .{ .slice = "\"\\x80\\x80\"", .code_page = .windows1252 }, - false, - .{}, - )); - // Wide string with the actual characters behaves like the ASCII string version - try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString( - .{ .slice = "L\"\x80\x80\"", .code_page = .windows1252 }, - false, - .{}, - )); - // But wide string with escapes behaves differently - try std.testing.expectEqual(@as(u16, 0x8080), try parseAcceleratorKeyString( - .{ .slice = "L\"\\x80\\x80\"", .code_page = .windows1252 }, - false, - .{}, - )); - // and invalid escapes within wide strings get skipped - try std.testing.expectEqual(@as(u16, 'z'), try parseAcceleratorKeyString( - .{ .slice = "L\"\\Hz\"", .code_page = .windows1252 }, - false, - .{}, - )); - - // any non-A-Z codepoints are illegal - try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( - .{ .slice = "\"^\x83\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( - .{ .slice = "\"^1\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectError(error.InvalidControlCharacter, parseAcceleratorKeyString( - .{ .slice = "\"^\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectError(error.EmptyAccelerator, parseAcceleratorKeyString( - .{ .slice = "\"\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( - .{ .slice = "\"hello\"", .code_page = .windows1252 }, - false, - .{}, - )); - try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString( - .{ .slice = "\"^\x80\"", .code_page = .windows1252 }, - false, - .{}, - )); - - // Invalid UTF-8 gets converted to 0xFFFD, multiple invalids get shifted and added together - // The behavior is the same for ascii and wide strings - try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( - .{ .slice = "\"\x80\x80\"", .code_page = .utf8 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString( - .{ .slice = "L\"\x80\x80\"", .code_page = .utf8 }, - false, - .{}, - )); - - // Codepoints >= 0x10000 - try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( - .{ .slice = "\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString( - .{ .slice = "L\"\xF0\x90\x84\x80\"", .code_page = .utf8 }, - false, - .{}, - )); - try std.testing.expectEqual(@as(u16, 0x9C01), try parseAcceleratorKeyString( - .{ .slice = "\"\xF4\x80\x80\x81\"", .code_page = .utf8 }, - false, - .{}, - )); - // anything before or after a codepoint >= 0x10000 causes an error - try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( - .{ .slice = "\"a\xF0\x90\x80\x80\"", .code_page = .utf8 }, - false, - .{}, - )); - try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString( - .{ .slice = "\"\xF0\x90\x80\x80a\"", .code_page = .utf8 }, - false, - .{}, - )); -} - -pub const ForcedOrdinal = struct { - pub fn fromBytes(bytes: SourceBytes) u16 { - var i: usize = 0; - var result: u21 = 0; - while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) { - const c = switch (codepoint.value) { - // Codepoints that would need a surrogate pair in UTF-16 are - // broken up into their UTF-16 code units and each code unit - // is interpreted as a digit. - 0x10000...0x10FFFF => { - const high = @as(u16, @intCast((codepoint.value - 0x10000) >> 10)) + 0xD800; - if (result != 0) result *%= 10; - result +%= high -% '0'; - - const low = @as(u16, @intCast(codepoint.value & 0x3FF)) + 0xDC00; - if (result != 0) result *%= 10; - result +%= low -% '0'; - continue; - }, - Codepoint.invalid => 0xFFFD, - else => codepoint.value, - }; - if (result != 0) result *%= 10; - result +%= c -% '0'; - } - return @truncate(result); - } - - pub fn fromUtf16Le(utf16: [:0]const u16) u16 { - var result: u16 = 0; - for (utf16) |code_unit| { - if (result != 0) result *%= 10; - result +%= std.mem.littleToNative(u16, code_unit) -% '0'; - } - return result; - } -}; - -test "forced ordinal" { - try std.testing.expectEqual(@as(u16, 3200), ForcedOrdinal.fromBytes(.{ .slice = "3200", .code_page = .windows1252 })); - try std.testing.expectEqual(@as(u16, 0x33), ForcedOrdinal.fromBytes(.{ .slice = "1+1", .code_page = .windows1252 })); - try std.testing.expectEqual(@as(u16, 65531), ForcedOrdinal.fromBytes(.{ .slice = "1!", .code_page = .windows1252 })); - - try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0\x8C", .code_page = .windows1252 })); - try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0Œ", .code_page = .utf8 })); - - // invalid UTF-8 gets converted to 0xFFFD (replacement char) and then interpreted as a digit - try std.testing.expectEqual(@as(u16, 0xFFCD), ForcedOrdinal.fromBytes(.{ .slice = "0\x81", .code_page = .utf8 })); - // codepoints >= 0x10000 - try std.testing.expectEqual(@as(u16, 0x49F2), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10002}", .code_page = .utf8 })); - try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10100}", .code_page = .utf8 })); - - // From UTF-16 - try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromUtf16Le(&[_:0]u16{ std.mem.nativeToLittle(u16, '0'), std.mem.nativeToLittle(u16, 'Œ') })); - try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromUtf16Le(std.unicode.utf8ToUtf16LeStringLiteral("0\u{10100}"))); -} - -/// https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo -pub const FixedFileInfo = struct { - file_version: Version = .{}, - product_version: Version = .{}, - file_flags_mask: u32 = 0, - file_flags: u32 = 0, - file_os: u32 = 0, - file_type: u32 = 0, - file_subtype: u32 = 0, - file_date: Version = .{}, // TODO: I think this is always all zeroes? - - pub const signature = 0xFEEF04BD; - // Note: This corresponds to a version of 1.0 - pub const version = 0x00010000; - - pub const byte_len = 0x34; - pub const key = std.unicode.utf8ToUtf16LeStringLiteral("VS_VERSION_INFO"); - - pub const Version = struct { - parts: [4]u16 = [_]u16{0} ** 4, - - pub fn mostSignificantCombinedParts(self: Version) u32 { - return (@as(u32, self.parts[0]) << 16) + self.parts[1]; - } - - pub fn leastSignificantCombinedParts(self: Version) u32 { - return (@as(u32, self.parts[2]) << 16) + self.parts[3]; - } - }; - - pub fn write(self: FixedFileInfo, writer: anytype) !void { - try writer.writeInt(u32, signature, .little); - try writer.writeInt(u32, version, .little); - try writer.writeInt(u32, self.file_version.mostSignificantCombinedParts(), .little); - try writer.writeInt(u32, self.file_version.leastSignificantCombinedParts(), .little); - try writer.writeInt(u32, self.product_version.mostSignificantCombinedParts(), .little); - try writer.writeInt(u32, self.product_version.leastSignificantCombinedParts(), .little); - try writer.writeInt(u32, self.file_flags_mask, .little); - try writer.writeInt(u32, self.file_flags, .little); - try writer.writeInt(u32, self.file_os, .little); - try writer.writeInt(u32, self.file_type, .little); - try writer.writeInt(u32, self.file_subtype, .little); - try writer.writeInt(u32, self.file_date.mostSignificantCombinedParts(), .little); - try writer.writeInt(u32, self.file_date.leastSignificantCombinedParts(), .little); - } -}; - -test "FixedFileInfo.Version" { - const version = FixedFileInfo.Version{ - .parts = .{ 1, 2, 3, 4 }, - }; - try std.testing.expectEqual(@as(u32, 0x00010002), version.mostSignificantCombinedParts()); - try std.testing.expectEqual(@as(u32, 0x00030004), version.leastSignificantCombinedParts()); -} - -pub const VersionNode = struct { - pub const type_string: u16 = 1; - pub const type_binary: u16 = 0; -}; - -pub const MenuItemFlags = struct { - value: u16 = 0, - - pub fn apply(self: *MenuItemFlags, option: rc.MenuItem.Option) void { - self.value |= optionValue(option); - } - - pub fn isSet(self: MenuItemFlags, option: rc.MenuItem.Option) bool { - return self.value & optionValue(option) != 0; - } - - fn optionValue(option: rc.MenuItem.Option) u16 { - return @intCast(switch (option) { - .checked => MF.CHECKED, - .grayed => MF.GRAYED, - .help => MF.HELP, - .inactive => MF.DISABLED, - .menubarbreak => MF.MENUBARBREAK, - .menubreak => MF.MENUBREAK, - }); - } - - pub fn markLast(self: *MenuItemFlags) void { - self.value |= @intCast(MF.END); - } -}; - -/// Menu Flags from WinUser.h -/// This is not complete, it only contains what is needed -pub const MF = struct { - pub const GRAYED: u32 = 0x00000001; - pub const DISABLED: u32 = 0x00000002; - pub const CHECKED: u32 = 0x00000008; - pub const POPUP: u32 = 0x00000010; - pub const MENUBARBREAK: u32 = 0x00000020; - pub const MENUBREAK: u32 = 0x00000040; - pub const HELP: u32 = 0x00004000; - pub const END: u32 = 0x00000080; -}; - -/// Window Styles from WinUser.h -pub const WS = struct { - pub const OVERLAPPED: u32 = 0x00000000; - pub const POPUP: u32 = 0x80000000; - pub const CHILD: u32 = 0x40000000; - pub const MINIMIZE: u32 = 0x20000000; - pub const VISIBLE: u32 = 0x10000000; - pub const DISABLED: u32 = 0x08000000; - pub const CLIPSIBLINGS: u32 = 0x04000000; - pub const CLIPCHILDREN: u32 = 0x02000000; - pub const MAXIMIZE: u32 = 0x01000000; - pub const CAPTION: u32 = BORDER | DLGFRAME; - pub const BORDER: u32 = 0x00800000; - pub const DLGFRAME: u32 = 0x00400000; - pub const VSCROLL: u32 = 0x00200000; - pub const HSCROLL: u32 = 0x00100000; - pub const SYSMENU: u32 = 0x00080000; - pub const THICKFRAME: u32 = 0x00040000; - pub const GROUP: u32 = 0x00020000; - pub const TABSTOP: u32 = 0x00010000; - - pub const MINIMIZEBOX: u32 = 0x00020000; - pub const MAXIMIZEBOX: u32 = 0x00010000; - - pub const TILED: u32 = OVERLAPPED; - pub const ICONIC: u32 = MINIMIZE; - pub const SIZEBOX: u32 = THICKFRAME; - pub const TILEDWINDOW: u32 = OVERLAPPEDWINDOW; - - // Common Window Styles - pub const OVERLAPPEDWINDOW: u32 = OVERLAPPED | CAPTION | SYSMENU | THICKFRAME | MINIMIZEBOX | MAXIMIZEBOX; - pub const POPUPWINDOW: u32 = POPUP | BORDER | SYSMENU; - pub const CHILDWINDOW: u32 = CHILD; -}; - -/// Dialog Box Template Styles from WinUser.h -pub const DS = struct { - pub const SETFONT: u32 = 0x40; -}; - -/// Button Control Styles from WinUser.h -/// This is not complete, it only contains what is needed -pub const BS = struct { - pub const PUSHBUTTON: u32 = 0x00000000; - pub const DEFPUSHBUTTON: u32 = 0x00000001; - pub const CHECKBOX: u32 = 0x00000002; - pub const AUTOCHECKBOX: u32 = 0x00000003; - pub const RADIOBUTTON: u32 = 0x00000004; - pub const @"3STATE": u32 = 0x00000005; - pub const AUTO3STATE: u32 = 0x00000006; - pub const GROUPBOX: u32 = 0x00000007; - pub const USERBUTTON: u32 = 0x00000008; - pub const AUTORADIOBUTTON: u32 = 0x00000009; - pub const PUSHBOX: u32 = 0x0000000A; - pub const OWNERDRAW: u32 = 0x0000000B; - pub const TYPEMASK: u32 = 0x0000000F; - pub const LEFTTEXT: u32 = 0x00000020; -}; - -/// Static Control Constants from WinUser.h -/// This is not complete, it only contains what is needed -pub const SS = struct { - pub const LEFT: u32 = 0x00000000; - pub const CENTER: u32 = 0x00000001; - pub const RIGHT: u32 = 0x00000002; - pub const ICON: u32 = 0x00000003; -}; - -/// Listbox Styles from WinUser.h -/// This is not complete, it only contains what is needed -pub const LBS = struct { - pub const NOTIFY: u32 = 0x0001; -}; diff --git a/src/resinator/source_mapping.zig b/src/resinator/source_mapping.zig @@ -1,687 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter; -const parseQuotedAsciiString = @import("literals.zig").parseQuotedAsciiString; -const lex = @import("lex.zig"); - -pub const ParseLineCommandsResult = struct { - result: []u8, - mappings: SourceMappings, -}; - -const CurrentMapping = struct { - line_num: usize = 1, - filename: std.ArrayListUnmanaged(u8) = .{}, - pending: bool = true, - ignore_contents: bool = false, -}; - -pub const ParseAndRemoveLineCommandsOptions = struct { - initial_filename: ?[]const u8 = null, -}; - -/// Parses and removes #line commands as well as all source code that is within a file -/// with .c or .h extensions. -/// -/// > RC treats files with the .c and .h extensions in a special manner. It -/// > assumes that a file with one of these extensions does not contain -/// > resources. If a file has the .c or .h file name extension, RC ignores all -/// > lines in the file except the preprocessor directives. Therefore, to -/// > include a file that contains resources in another resource script, give -/// > the file to be included an extension other than .c or .h. -/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives -/// -/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping -/// between the lines and their corresponding lines in their original files. -/// -/// `buf` must be at least as long as `source` -/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) -/// -/// If `options.initial_filename` is provided, that filename is guaranteed to be -/// within the `mappings.files` table and `root_filename_offset` will be set appropriately. -pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { - var parse_result = ParseLineCommandsResult{ - .result = undefined, - .mappings = .{}, - }; - errdefer parse_result.mappings.deinit(allocator); - - var current_mapping: CurrentMapping = .{}; - defer current_mapping.filename.deinit(allocator); - - if (options.initial_filename) |initial_filename| { - try current_mapping.filename.appendSlice(allocator, initial_filename); - parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename); - } - - std.debug.assert(buf.len >= source.len); - var result = UncheckedSliceWriter{ .slice = buf }; - const State = enum { - line_start, - preprocessor, - non_preprocessor, - }; - var state: State = .line_start; - var index: usize = 0; - var pending_start: ?usize = null; - var preprocessor_start: usize = 0; - var line_number: usize = 1; - while (index < source.len) : (index += 1) { - const c = source[index]; - switch (state) { - .line_start => switch (c) { - '#' => { - preprocessor_start = index; - state = .preprocessor; - if (pending_start == null) { - pending_start = index; - } - }, - '\r', '\n' => { - const is_crlf = formsLineEndingPair(source, c, index + 1); - try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); - if (!current_mapping.ignore_contents) { - result.write(c); - if (is_crlf) result.write(source[index + 1]); - line_number += 1; - } - if (is_crlf) index += 1; - pending_start = null; - }, - ' ', '\t', '\x0b', '\x0c' => { - if (pending_start == null) { - pending_start = index; - } - }, - else => { - state = .non_preprocessor; - if (pending_start != null) { - if (!current_mapping.ignore_contents) { - result.writeSlice(source[pending_start.? .. index + 1]); - } - pending_start = null; - continue; - } - if (!current_mapping.ignore_contents) { - result.write(c); - } - }, - }, - .preprocessor => switch (c) { - '\r', '\n' => { - // Now that we have the full line we can decide what to do with it - const preprocessor_str = source[preprocessor_start..index]; - const is_crlf = formsLineEndingPair(source, c, index + 1); - if (std.mem.startsWith(u8, preprocessor_str, "#line")) { - try handleLineCommand(allocator, preprocessor_str, &current_mapping); - } else { - try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); - if (!current_mapping.ignore_contents) { - const line_ending_len: usize = if (is_crlf) 2 else 1; - result.writeSlice(source[pending_start.? .. index + line_ending_len]); - line_number += 1; - } - } - if (is_crlf) index += 1; - state = .line_start; - pending_start = null; - }, - else => {}, - }, - .non_preprocessor => switch (c) { - '\r', '\n' => { - const is_crlf = formsLineEndingPair(source, c, index + 1); - try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); - if (!current_mapping.ignore_contents) { - result.write(c); - if (is_crlf) result.write(source[index + 1]); - line_number += 1; - } - if (is_crlf) index += 1; - state = .line_start; - pending_start = null; - }, - else => { - if (!current_mapping.ignore_contents) { - result.write(c); - } - }, - }, - } - } else { - switch (state) { - .line_start => {}, - .non_preprocessor => { - try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); - }, - .preprocessor => { - // Now that we have the full line we can decide what to do with it - const preprocessor_str = source[preprocessor_start..index]; - if (std.mem.startsWith(u8, preprocessor_str, "#line")) { - try handleLineCommand(allocator, preprocessor_str, &current_mapping); - } else { - try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); - if (!current_mapping.ignore_contents) { - result.writeSlice(source[pending_start.?..index]); - } - } - }, - } - } - - parse_result.result = result.getWritten(); - - // Remove whitespace from the end of the result. This avoids issues when the - // preprocessor adds a newline to the end of the file, since then the - // post-preprocessed source could have more lines than the corresponding input source and - // the inserted line can't be mapped to any lines in the original file. - // There's no way that whitespace at the end of a file can affect the parsing - // of the RC script so this is okay to do unconditionally. - // TODO: There might be a better way around this - while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) { - parse_result.result.len -= 1; - } - - // If there have been no line mappings at all, then we're dealing with an empty file. - // In this case, we want to fake a line mapping just so that we return something - // that is useable in the same way that a non-empty mapping would be. - if (parse_result.mappings.mapping.items.len == 0) { - try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping); - } - - return parse_result; -} - -/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair -pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool { - if (next_index >= source.len) return false; - - const next_ending = source[next_index]; - if (next_ending != '\r' and next_ending != '\n') return false; - - // can't be \n\n or \r\r - if (line_ending == next_ending) return false; - - return true; -} - -pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void { - const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items); - - try mapping.set(allocator, post_processed_line_number, .{ - .start_line = current_mapping.line_num, - .end_line = current_mapping.line_num, - .filename_offset = filename_offset, - }); - - current_mapping.line_num += 1; - current_mapping.pending = false; -} - -// TODO: Might want to provide diagnostics on invalid line commands instead of just returning -pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void { - // TODO: Are there other whitespace characters that should be included? - var tokenizer = std.mem.tokenize(u8, line_command, " \t"); - const line_directive = tokenizer.next() orelse return; // #line - if (!std.mem.eql(u8, line_directive, "#line")) return; - const linenum_str = tokenizer.next() orelse return; - const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return; - - var filename_literal = tokenizer.rest(); - while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) { - filename_literal.len -= 1; - } - if (filename_literal.len < 2) return; - const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"'; - if (!is_quoted) return; - const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - else => return, - }; - defer allocator.free(filename); - - // \x00 bytes in the filename is incompatible with how StringTable works - if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return; - - current_mapping.line_num = linenum; - current_mapping.filename.clearRetainingCapacity(); - try current_mapping.filename.appendSlice(allocator, filename); - current_mapping.pending = true; - current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h"); -} - -pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { - const buf = try allocator.alloc(u8, source.len); - errdefer allocator.free(buf); - var result = try parseAndRemoveLineCommands(allocator, source, buf, options); - result.result = try allocator.realloc(buf, result.result.len); - return result; -} - -/// C-style string parsing with a few caveats: -/// - The str cannot contain newlines or carriage returns -/// - Hex and octal escape are limited to u8 -/// - No handling/support for L, u, or U prefixed strings -/// - The start and end double quotes should be omitted from the `str` -/// - Other than the above, does not assume any validity of the strings (i.e. there -/// may be unescaped double quotes within the str) and will return error.InvalidString -/// on any problems found. -/// -/// The result is a UTF-8 encoded string. -fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 { - const State = enum { - string, - escape, - escape_hex, - escape_octal, - escape_u, - }; - - var filename = try std.ArrayList(u8).initCapacity(allocator, str.len); - errdefer filename.deinit(); - var state: State = .string; - var index: usize = 0; - var escape_len: usize = undefined; - var escape_val: u64 = undefined; - var escape_expected_len: u8 = undefined; - while (index < str.len) : (index += 1) { - const c = str[index]; - switch (state) { - .string => switch (c) { - '\\' => state = .escape, - '"' => return error.InvalidString, - else => filename.appendAssumeCapacity(c), - }, - .escape => switch (c) { - '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => { - const escaped_c = switch (c) { - '\'', '"', '\\', '?' => c, - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'a' => '\x07', - 'b' => '\x08', - 'e' => '\x1b', // non-standard - 'f' => '\x0c', - 'v' => '\x0b', - else => unreachable, - }; - filename.appendAssumeCapacity(escaped_c); - state = .string; - }, - 'x' => { - escape_val = 0; - escape_len = 0; - state = .escape_hex; - }, - '0'...'7' => { - escape_val = std.fmt.charToDigit(c, 8) catch unreachable; - escape_len = 1; - state = .escape_octal; - }, - 'u' => { - escape_val = 0; - escape_len = 0; - state = .escape_u; - escape_expected_len = 4; - }, - 'U' => { - escape_val = 0; - escape_len = 0; - state = .escape_u; - escape_expected_len = 8; - }, - else => return error.InvalidString, - }, - .escape_hex => switch (c) { - '0'...'9', 'a'...'f', 'A'...'F' => { - const digit = std.fmt.charToDigit(c, 16) catch unreachable; - if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString; - escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; - escape_len += 1; - }, - else => { - if (escape_len == 0) return error.InvalidString; - filename.appendAssumeCapacity(@intCast(escape_val)); - state = .string; - index -= 1; // reconsume - }, - }, - .escape_octal => switch (c) { - '0'...'7' => { - const digit = std.fmt.charToDigit(c, 8) catch unreachable; - if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString; - escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; - escape_len += 1; - if (escape_len == 3) { - filename.appendAssumeCapacity(@intCast(escape_val)); - state = .string; - } - }, - else => { - if (escape_len == 0) return error.InvalidString; - filename.appendAssumeCapacity(@intCast(escape_val)); - state = .string; - index -= 1; // reconsume - }, - }, - .escape_u => switch (c) { - '0'...'9', 'a'...'f', 'A'...'F' => { - const digit = std.fmt.charToDigit(c, 16) catch unreachable; - if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString; - escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString; - escape_len += 1; - if (escape_len == escape_expected_len) { - var buf: [4]u8 = undefined; - const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString; - filename.appendSliceAssumeCapacity(buf[0..utf8_len]); - state = .string; - } - }, - // Requires escape_expected_len valid hex digits - else => return error.InvalidString, - }, - } - } else { - switch (state) { - .string => {}, - .escape, .escape_u => return error.InvalidString, - .escape_hex => { - if (escape_len == 0) return error.InvalidString; - filename.appendAssumeCapacity(@intCast(escape_val)); - }, - .escape_octal => { - filename.appendAssumeCapacity(@intCast(escape_val)); - }, - } - } - - return filename.toOwnedSlice(); -} - -fn testParseFilename(expected: []const u8, input: []const u8) !void { - const parsed = try parseFilename(std.testing.allocator, input); - defer std.testing.allocator.free(parsed); - - return std.testing.expectEqualSlices(u8, expected, parsed); -} - -test parseFilename { - try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11"); - try testParseFilename("\xABz\x53", "\\xABz\\123"); - try testParseFilename("⚡⚡", "\\u26A1\\U000026A1"); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\"")); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\")); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u")); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U")); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x")); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ")); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF")); - try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777")); -} - -pub const SourceMappings = struct { - /// line number -> span where the index is (line number - 1) - mapping: std.ArrayListUnmanaged(SourceSpan) = .{}, - files: StringTable = .{}, - /// The default assumes that the first filename added is the root file. - /// The value should be set to the correct offset if that assumption does not hold. - root_filename_offset: u32 = 0, - - pub const SourceSpan = struct { - start_line: usize, - end_line: usize, - filename_offset: u32, - }; - - pub fn deinit(self: *SourceMappings, allocator: Allocator) void { - self.files.deinit(allocator); - self.mapping.deinit(allocator); - } - - pub fn set(self: *SourceMappings, allocator: Allocator, line_num: usize, span: SourceSpan) !void { - const ptr = try self.expandAndGet(allocator, line_num); - ptr.* = span; - } - - pub fn has(self: SourceMappings, line_num: usize) bool { - return self.mapping.items.len >= line_num; - } - - /// Note: `line_num` is 1-indexed - pub fn get(self: SourceMappings, line_num: usize) SourceSpan { - return self.mapping.items[line_num - 1]; - } - - pub fn getPtr(self: SourceMappings, line_num: usize) *SourceSpan { - return &self.mapping.items[line_num - 1]; - } - - /// Expands the number of lines in the mapping to include the requested - /// line number (if necessary) and returns a pointer to the value at that - /// line number. - /// - /// Note: `line_num` is 1-indexed - pub fn expandAndGet(self: *SourceMappings, allocator: Allocator, line_num: usize) !*SourceSpan { - try self.mapping.resize(allocator, line_num); - return &self.mapping.items[line_num - 1]; - } - - pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) void { - std.debug.assert(num_following_lines_to_collapse > 0); - - var span_to_collapse_into = self.getPtr(line_num); - const last_collapsed_span = self.get(line_num + num_following_lines_to_collapse); - span_to_collapse_into.end_line = last_collapsed_span.end_line; - - const after_collapsed_start = line_num + num_following_lines_to_collapse; - const new_num_lines = self.mapping.items.len - num_following_lines_to_collapse; - std.mem.copyForwards(SourceSpan, self.mapping.items[line_num..new_num_lines], self.mapping.items[after_collapsed_start..]); - - self.mapping.items.len = new_num_lines; - } - - /// Returns true if the line is from the main/root file (i.e. not a file that has been - /// `#include`d). - pub fn isRootFile(self: *SourceMappings, line_num: usize) bool { - const line_mapping = self.get(line_num); - if (line_mapping.filename_offset == self.root_filename_offset) return true; - return false; - } -}; - -test "SourceMappings collapse" { - const allocator = std.testing.allocator; - - var mappings = SourceMappings{}; - defer mappings.deinit(allocator); - const filename_offset = try mappings.files.put(allocator, "test.rc"); - - try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = filename_offset }); - try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 3, .filename_offset = filename_offset }); - try mappings.set(allocator, 3, .{ .start_line = 4, .end_line = 4, .filename_offset = filename_offset }); - try mappings.set(allocator, 4, .{ .start_line = 5, .end_line = 5, .filename_offset = filename_offset }); - - mappings.collapse(1, 2); - - try std.testing.expectEqual(@as(usize, 2), mappings.mapping.items.len); - try std.testing.expectEqual(@as(usize, 4), mappings.mapping.items[0].end_line); - try std.testing.expectEqual(@as(usize, 5), mappings.mapping.items[1].end_line); -} - -/// Same thing as StringTable in Zig's src/Wasm.zig -pub const StringTable = struct { - data: std.ArrayListUnmanaged(u8) = .{}, - map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, - - pub fn deinit(self: *StringTable, allocator: Allocator) void { - self.data.deinit(allocator); - self.map.deinit(allocator); - } - - pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 { - const result = try self.map.getOrPutContextAdapted( - allocator, - value, - std.hash_map.StringIndexAdapter{ .bytes = &self.data }, - .{ .bytes = &self.data }, - ); - if (result.found_existing) { - return result.key_ptr.*; - } - - try self.data.ensureUnusedCapacity(allocator, value.len + 1); - const offset: u32 = @intCast(self.data.items.len); - - self.data.appendSliceAssumeCapacity(value); - self.data.appendAssumeCapacity(0); - - result.key_ptr.* = offset; - - return offset; - } - - pub fn get(self: StringTable, offset: u32) []const u8 { - std.debug.assert(offset < self.data.items.len); - return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0); - } - - pub fn getOffset(self: *StringTable, value: []const u8) ?u32 { - return self.map.getKeyAdapted( - value, - std.hash_map.StringIndexAdapter{ .bytes = &self.data }, - ); - } -}; - -const ExpectedSourceSpan = struct { - start_line: usize, - end_line: usize, - filename: []const u8, -}; - -fn testParseAndRemoveLineCommands( - expected: []const u8, - comptime expected_spans: []const ExpectedSourceSpan, - source: []const u8, - options: ParseAndRemoveLineCommandsOptions, -) !void { - var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options); - defer std.testing.allocator.free(results.result); - defer results.mappings.deinit(std.testing.allocator); - - try std.testing.expectEqualStrings(expected, results.result); - - expectEqualMappings(expected_spans, results.mappings) catch |err| { - std.debug.print("\nexpected mappings:\n", .{}); - for (expected_spans, 0..) |span, i| { - const line_num = i + 1; - std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line }); - } - std.debug.print("\nactual mappings:\n", .{}); - for (results.mappings.mapping.items, 0..) |span, i| { - const line_num = i + 1; - const filename = results.mappings.files.get(span.filename_offset); - std.debug.print("{}: {s}:{}-{}\n", .{ line_num, filename, span.start_line, span.end_line }); - } - std.debug.print("\n", .{}); - return err; - }; -} - -fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void { - try std.testing.expectEqual(expected_spans.len, mappings.mapping.items.len); - for (expected_spans, 0..) |expected_span, i| { - const line_num = i + 1; - const span = mappings.get(line_num); - const filename = mappings.files.get(span.filename_offset); - try std.testing.expectEqual(expected_span.start_line, span.start_line); - try std.testing.expectEqual(expected_span.end_line, span.end_line); - try std.testing.expectEqualStrings(expected_span.filename, filename); - } -} - -test "basic" { - try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ - .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, - }, "#line 1 \"blah.rc\"", .{}); -} - -test "only removes line commands" { - try testParseAndRemoveLineCommands( - \\#pragma code_page(65001) - , &[_]ExpectedSourceSpan{ - .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, - }, - \\#line 1 "blah.rc" - \\#pragma code_page(65001) - , .{}); -} - -test "whitespace and line endings" { - try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ - .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, - }, "#line \t 1 \t \"blah.rc\"\r\n", .{}); -} - -test "example" { - try testParseAndRemoveLineCommands( - \\ - \\included RCDATA {"hello"} - , &[_]ExpectedSourceSpan{ - .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" }, - .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" }, - }, - \\#line 1 "rcdata.rc" - \\#line 1 "<built-in>" - \\#line 1 "<built-in>" - \\#line 355 "<built-in>" - \\#line 1 "<command line>" - \\#line 1 "<built-in>" - \\#line 1 "rcdata.rc" - \\#line 1 "./header.h" - \\ - \\ - \\2 RCDATA {"blah"} - \\ - \\ - \\#line 1 "./included.rc" - \\ - \\included RCDATA {"hello"} - \\#line 7 "./header.h" - \\#line 1 "rcdata.rc" - , .{}); -} - -test "CRLF and other line endings" { - try testParseAndRemoveLineCommands( - "hello\r\n#pragma code_page(65001)\r\nworld", - &[_]ExpectedSourceSpan{ - .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" }, - .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" }, - .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" }, - }, - "#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n", - .{}, - ); -} - -test "no line commands" { - try testParseAndRemoveLineCommands( - \\1 RCDATA {"blah"} - \\2 RCDATA {"blah"} - , &[_]ExpectedSourceSpan{ - .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, - .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, - }, - \\1 RCDATA {"blah"} - \\2 RCDATA {"blah"} - , .{ .initial_filename = "blah.rc" }); -} - -test "in place" { - var mut_source = "#line 1 \"blah.rc\"".*; - var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}); - defer result.mappings.deinit(std.testing.allocator); - try std.testing.expectEqualStrings("", result.result); -} diff --git a/src/resinator/utils.zig b/src/resinator/utils.zig @@ -1,112 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); - -/// Like std.io.FixedBufferStream but does no bounds checking -pub const UncheckedSliceWriter = struct { - const Self = @This(); - - pos: usize = 0, - slice: []u8, - - pub fn write(self: *Self, char: u8) void { - self.slice[self.pos] = char; - self.pos += 1; - } - - pub fn writeSlice(self: *Self, slice: []const u8) void { - for (slice) |c| { - self.write(c); - } - } - - pub fn getWritten(self: Self) []u8 { - return self.slice[0..self.pos]; - } -}; - -/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if -/// a directory is attempted to be opened. -/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed. -pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File { - const file = try cwd.openFile(path, flags); - errdefer file.close(); - // https://github.com/ziglang/zig/issues/5732 - if (builtin.os.tag != .windows) { - const stat = try file.stat(); - - if (stat.kind == .directory) - return error.IsDir; - } - return file; -} - -/// Emulates the Windows implementation of `iswdigit`, but only returns true -/// for the non-ASCII digits that `iswdigit` on Windows would return true for. -pub fn isNonAsciiDigit(c: u21) bool { - return switch (c) { - '²', - '³', - '¹', - '\u{660}'...'\u{669}', - '\u{6F0}'...'\u{6F9}', - '\u{7C0}'...'\u{7C9}', - '\u{966}'...'\u{96F}', - '\u{9E6}'...'\u{9EF}', - '\u{A66}'...'\u{A6F}', - '\u{AE6}'...'\u{AEF}', - '\u{B66}'...'\u{B6F}', - '\u{BE6}'...'\u{BEF}', - '\u{C66}'...'\u{C6F}', - '\u{CE6}'...'\u{CEF}', - '\u{D66}'...'\u{D6F}', - '\u{E50}'...'\u{E59}', - '\u{ED0}'...'\u{ED9}', - '\u{F20}'...'\u{F29}', - '\u{1040}'...'\u{1049}', - '\u{1090}'...'\u{1099}', - '\u{17E0}'...'\u{17E9}', - '\u{1810}'...'\u{1819}', - '\u{1946}'...'\u{194F}', - '\u{19D0}'...'\u{19D9}', - '\u{1B50}'...'\u{1B59}', - '\u{1BB0}'...'\u{1BB9}', - '\u{1C40}'...'\u{1C49}', - '\u{1C50}'...'\u{1C59}', - '\u{A620}'...'\u{A629}', - '\u{A8D0}'...'\u{A8D9}', - '\u{A900}'...'\u{A909}', - '\u{AA50}'...'\u{AA59}', - '\u{FF10}'...'\u{FF19}', - => true, - else => false, - }; -} - -/// Used for generic colored errors/warnings/notes, more context-specific error messages -/// are handled elsewhere. -pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, msg_type: enum { err, warning, note }, comptime format: []const u8, args: anytype) !void { - switch (msg_type) { - .err => { - try config.setColor(writer, .bold); - try config.setColor(writer, .red); - try writer.writeAll("error: "); - }, - .warning => { - try config.setColor(writer, .bold); - try config.setColor(writer, .yellow); - try writer.writeAll("warning: "); - }, - .note => { - try config.setColor(writer, .reset); - try config.setColor(writer, .cyan); - try writer.writeAll("note: "); - }, - } - try config.setColor(writer, .reset); - if (msg_type == .err) { - try config.setColor(writer, .bold); - } - try writer.print(format, args); - try writer.writeByte('\n'); - try config.setColor(writer, .reset); -}